[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <086d37e1-a366-46a8-eeff-82817317d62b@kernel.dk>
Date: Fri, 5 Aug 2016 15:27:42 -0600
From: Jens Axboe <axboe@...nel.dk>
To: Mike Christie <mchristi@...hat.com>,
Christoph Hellwig <hch@...radead.org>
Cc: ross.zwisler@...ux.intel.com, linux-ext4@...r.kernel.org,
linux-block@...r.kernel.org, linux-fsdevel@...r.kernel.org,
david@...morbit.com, minchan@...nel.org, ngupta@...are.org,
vishal.l.verma@...el.com
Subject: Re: [PATCH 1/1] mm/block: convert rw_page users to bio op use
On 08/05/2016 12:05 PM, Mike Christie wrote:
> On 08/05/2016 09:09 AM, Jens Axboe wrote:
>> On 08/05/2016 07:52 AM, Jens Axboe wrote:
>>> On 08/05/2016 01:57 AM, Christoph Hellwig wrote:
>>>>>
>>>>> The rw_page users were not converted to use bio/req ops. As a result
>>>>> bdev_write_page is not passing down REQ_OP_WRITE and the IOs will
>>>>> be sent down as reads.
>>>>
>>>> Can we just get rid of REQ_OP_WRITE enum for the ->rw_page interface
>>>> and pass a 'bool write'? If not I'd prefer to avoid using op_is_write
>>>> as much as possible - it's a confusing interface if we really want
>>>> to do a switch on read vs write vs invalid.
>>>
>>> That's a really good point, especially since the op_flags was dropped as
>>> well. I'll cook that up.
>>
>> How about something like this? We don't bleed into the non-core bits
>> anymore, so we can move those back under CONFIG_BLOCK too.
>>
>> diff --git a/drivers/block/brd.c b/drivers/block/brd.c
>> index 3439b28cce8b..0c76d4016eeb 100644
>> --- a/drivers/block/brd.c
>> +++ b/drivers/block/brd.c
>> @@ -300,20 +300,20 @@ static void copy_from_brd(void *dst, struct brd_device *brd,
>> * Process a single bvec of a bio.
>> */
>> static int brd_do_bvec(struct brd_device *brd, struct page *page,
>> - unsigned int len, unsigned int off, int op,
>> + unsigned int len, unsigned int off, bool is_write,
>> sector_t sector)
>> {
>> void *mem;
>> int err = 0;
>>
>> - if (op_is_write(op)) {
>> + if (is_write) {
>> err = copy_to_brd_setup(brd, sector, len);
>> if (err)
>> goto out;
>> }
>>
>> mem = kmap_atomic(page);
>> - if (!op_is_write(op)) {
>> + if (!is_write) {
>> copy_from_brd(mem + off, brd, sector, len);
>> flush_dcache_page(page);
>> } else {
>> @@ -350,8 +350,8 @@ static blk_qc_t brd_make_request(struct request_queue *q, struct bio *bio)
>> unsigned int len = bvec.bv_len;
>> int err;
>>
>> - err = brd_do_bvec(brd, bvec.bv_page, len,
>> - bvec.bv_offset, bio_op(bio), sector);
>> + err = brd_do_bvec(brd, bvec.bv_page, len, bvec.bv_offset,
>> + op_is_write(bio_op(bio)), sector);
>> if (err)
>> goto io_error;
>> sector += len >> SECTOR_SHIFT;
>> @@ -366,11 +366,11 @@ io_error:
>> }
>>
>> static int brd_rw_page(struct block_device *bdev, sector_t sector,
>> - struct page *page, int op)
>> + struct page *page, bool is_write)
>> {
>> struct brd_device *brd = bdev->bd_disk->private_data;
>> - int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, op, sector);
>> - page_endio(page, op, err);
>> + int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector);
>> + page_endio(page, is_write, err);
>> return err;
>> }
>>
>> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
>> index ca29649c4b08..04365b17ee67 100644
>> --- a/drivers/block/zram/zram_drv.c
>> +++ b/drivers/block/zram/zram_drv.c
>> @@ -843,15 +843,16 @@ static void zram_bio_discard(struct zram *zram, u32 index,
>> }
>>
>> static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
>> - int offset, int op)
>> + int offset, bool is_write)
>> {
>> unsigned long start_time = jiffies;
>> + int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
>> int ret;
>>
>> - generic_start_io_acct(op, bvec->bv_len >> SECTOR_SHIFT,
>> + generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT,
>> &zram->disk->part0);
>>
>> - if (!op_is_write(op)) {
>> + if (!is_write) {
>> atomic64_inc(&zram->stats.num_reads);
>> ret = zram_bvec_read(zram, bvec, index, offset);
>> } else {
>> @@ -859,10 +860,10 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
>> ret = zram_bvec_write(zram, bvec, index, offset);
>> }
>>
>> - generic_end_io_acct(op, &zram->disk->part0, start_time);
>> + generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
>>
>> if (unlikely(ret)) {
>> - if (!op_is_write(op))
>> + if (!is_write)
>> atomic64_inc(&zram->stats.failed_reads);
>> else
>> atomic64_inc(&zram->stats.failed_writes);
>> @@ -903,17 +904,17 @@ static void __zram_make_request(struct zram *zram, struct bio *bio)
>> bv.bv_offset = bvec.bv_offset;
>>
>> if (zram_bvec_rw(zram, &bv, index, offset,
>> - bio_op(bio)) < 0)
>> + op_is_write(bio_op(bio))) < 0)
>> goto out;
>>
>> bv.bv_len = bvec.bv_len - max_transfer_size;
>> bv.bv_offset += max_transfer_size;
>> if (zram_bvec_rw(zram, &bv, index + 1, 0,
>> - bio_op(bio)) < 0)
>> + op_is_write(bio_op(bio))) < 0)
>> goto out;
>> } else
>> if (zram_bvec_rw(zram, &bvec, index, offset,
>> - bio_op(bio)) < 0)
>> + op_is_write(bio_op(bio))) < 0)
>> goto out;
>>
>> update_position(&index, &offset, &bvec);
>> @@ -970,7 +971,7 @@ static void zram_slot_free_notify(struct block_device *bdev,
>> }
>>
>> static int zram_rw_page(struct block_device *bdev, sector_t sector,
>> - struct page *page, int op)
>> + struct page *page, bool is_write)
>> {
>> int offset, err = -EIO;
>> u32 index;
>> @@ -994,7 +995,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector,
>> bv.bv_len = PAGE_SIZE;
>> bv.bv_offset = 0;
>>
>> - err = zram_bvec_rw(zram, &bv, index, offset, op);
>> + err = zram_bvec_rw(zram, &bv, index, offset, is_write);
>> put_zram:
>> zram_meta_put(zram);
>> out:
>> @@ -1007,7 +1008,7 @@ out:
>> * (e.g., SetPageError, set_page_dirty and extra works).
>> */
>> if (err == 0)
>> - page_endio(page, op, 0);
>> + page_endio(page, is_write, 0);
>> return err;
>> }
>>
>> diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
>> index 7cf3bdfaf809..88e91666f145 100644
>> --- a/drivers/nvdimm/btt.c
>> +++ b/drivers/nvdimm/btt.c
>> @@ -1133,11 +1133,11 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
>>
>> static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
>> struct page *page, unsigned int len, unsigned int off,
>> - int op, sector_t sector)
>> + bool is_write, sector_t sector)
>> {
>> int ret;
>>
>> - if (!op_is_write(op)) {
>> + if (!is_write) {
>> ret = btt_read_pg(btt, bip, page, off, sector, len);
>> flush_dcache_page(page);
>> } else {
>> @@ -1180,7 +1180,7 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
>> BUG_ON(len % btt->sector_size);
>>
>> err = btt_do_bvec(btt, bip, bvec.bv_page, len, bvec.bv_offset,
>> - bio_op(bio), iter.bi_sector);
>> + op_is_write(bio_op(bio)), iter.bi_sector);
>> if (err) {
>> dev_info(&btt->nd_btt->dev,
>> "io error in %s sector %lld, len %d,\n",
>> @@ -1200,12 +1200,12 @@ out:
>> }
>>
>> static int btt_rw_page(struct block_device *bdev, sector_t sector,
>> - struct page *page, int op)
>> + struct page *page, bool is_write)
>> {
>> struct btt *btt = bdev->bd_disk->private_data;
>>
>> - btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, op, sector);
>> - page_endio(page, op, 0);
>> + btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector);
>> + page_endio(page, is_write, 0);
>> return 0;
>> }
>>
>> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
>> index d64d92481c1d..20bae50c231d 100644
>> --- a/drivers/nvdimm/pmem.c
>> +++ b/drivers/nvdimm/pmem.c
>> @@ -67,7 +67,7 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
>> }
>>
>> static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
>> - unsigned int len, unsigned int off, int op,
>> + unsigned int len, unsigned int off, bool is_write,
>> sector_t sector)
>> {
>> int rc = 0;
>> @@ -79,7 +79,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
>> if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
>> bad_pmem = true;
>>
>> - if (!op_is_write(op)) {
>> + if (!is_write) {
>> if (unlikely(bad_pmem))
>> rc = -EIO;
>> else {
>> @@ -134,7 +134,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
>> do_acct = nd_iostat_start(bio, &start);
>> bio_for_each_segment(bvec, bio, iter) {
>> rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
>> - bvec.bv_offset, bio_op(bio),
>> + bvec.bv_offset, op_is_write(bio_op(bio)),
>> iter.bi_sector);
>> if (rc) {
>> bio->bi_error = rc;
>> @@ -152,12 +152,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
>> }
>>
>> static int pmem_rw_page(struct block_device *bdev, sector_t sector,
>> - struct page *page, int op)
>> + struct page *page, bool is_write)
>> {
>> struct pmem_device *pmem = bdev->bd_queue->queuedata;
>> int rc;
>>
>> - rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, op, sector);
>> + rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector);
>>
>> /*
>> * The ->rw_page interface is subtle and tricky. The core
>> @@ -166,7 +166,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
>> * caused by double completion.
>> */
>> if (rc == 0)
>> - page_endio(page, op, 0);
>> + page_endio(page, is_write, 0);
>>
>> return rc;
>> }
>> diff --git a/fs/block_dev.c b/fs/block_dev.c
>> index d402899ba135..c3cdde87cc8c 100644
>> --- a/fs/block_dev.c
>> +++ b/fs/block_dev.c
>> @@ -416,8 +416,7 @@ int bdev_read_page(struct block_device *bdev, sector_t sector,
>> result = blk_queue_enter(bdev->bd_queue, false);
>> if (result)
>> return result;
>> - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
>> - REQ_OP_READ);
>> + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false);
>> blk_queue_exit(bdev->bd_queue);
>> return result;
>> }
>> @@ -455,8 +454,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector,
>> return result;
>>
>> set_page_writeback(page);
>> - result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
>> - REQ_OP_WRITE);
>> + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, true);
>> if (result)
>> end_page_writeback(page);
>> else
>> diff --git a/fs/mpage.c b/fs/mpage.c
>> index 7a09c55b4bd0..d2413af0823a 100644
>> --- a/fs/mpage.c
>> +++ b/fs/mpage.c
>> @@ -50,7 +50,7 @@ static void mpage_end_io(struct bio *bio)
>>
>> bio_for_each_segment_all(bv, bio, i) {
>> struct page *page = bv->bv_page;
>> - page_endio(page, bio_op(bio), bio->bi_error);
>> + page_endio(page, op_is_write(bio_op(bio)), bio->bi_error);
>> }
>>
>> bio_put(bio);
>> diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
>> index 14b28ff2caf8..f254eb264924 100644
>> --- a/include/linux/blk_types.h
>> +++ b/include/linux/blk_types.h
>> @@ -18,17 +18,6 @@ struct cgroup_subsys_state;
>> typedef void (bio_end_io_t) (struct bio *);
>> typedef void (bio_destructor_t) (struct bio *);
>>
>> -enum req_op {
>> - REQ_OP_READ,
>> - REQ_OP_WRITE,
>> - REQ_OP_DISCARD, /* request to discard sectors */
>> - REQ_OP_SECURE_ERASE, /* request to securely erase sectors */
>> - REQ_OP_WRITE_SAME, /* write same block many times */
>> - REQ_OP_FLUSH, /* request for cache flush */
>> -};
>> -
>> -#define REQ_OP_BITS 3
>> -
>> #ifdef CONFIG_BLOCK
>> /*
>> * main unit of I/O for the block layer and lower layers (ie drivers and
>> @@ -239,6 +228,17 @@ enum rq_flag_bits {
>> #define REQ_HASHED (1ULL << __REQ_HASHED)
>> #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT)
>>
>> +enum req_op {
>> + REQ_OP_READ,
>> + REQ_OP_WRITE,
>> + REQ_OP_DISCARD, /* request to discard sectors */
>> + REQ_OP_SECURE_ERASE, /* request to securely erase sectors */
>> + REQ_OP_WRITE_SAME, /* write same block many times */
>> + REQ_OP_FLUSH, /* request for cache flush */
>> +};
>> +
>> +#define REQ_OP_BITS 3
>> +
>> typedef unsigned int blk_qc_t;
>> #define BLK_QC_T_NONE -1U
>> #define BLK_QC_T_SHIFT 16
>> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
>> index ccd68c0d01de..2c210b6a7bcf 100644
>> --- a/include/linux/blkdev.h
>> +++ b/include/linux/blkdev.h
>> @@ -1672,7 +1672,7 @@ struct blk_dax_ctl {
>> struct block_device_operations {
>> int (*open) (struct block_device *, fmode_t);
>> void (*release) (struct gendisk *, fmode_t);
>> - int (*rw_page)(struct block_device *, sector_t, struct page *, int op);
>> + int (*rw_page)(struct block_device *, sector_t, struct page *, bool);
>> int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
>> int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
>> long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *,
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 498255e6914e..f3f0b4c8e8ac 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -2480,13 +2480,12 @@ extern void init_special_inode(struct inode *, umode_t, dev_t);
>> extern void make_bad_inode(struct inode *);
>> extern bool is_bad_inode(struct inode *);
>>
>> +#ifdef CONFIG_BLOCK
>> static inline bool op_is_write(unsigned int op)
>> {
>> return op == REQ_OP_READ ? false : true;
>> }
>>
>> -#ifdef CONFIG_BLOCK
>> -
>> /*
>> * return data direction, READ or WRITE
>> */
>> diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
>> index 45786374abbd..66a1260b33de 100644
>> --- a/include/linux/pagemap.h
>> +++ b/include/linux/pagemap.h
>> @@ -510,7 +510,7 @@ static inline void wait_on_page_writeback(struct page *page)
>> extern void end_page_writeback(struct page *page);
>> void wait_for_stable_page(struct page *page);
>>
>> -void page_endio(struct page *page, int op, int err);
>> +void page_endio(struct page *page, bool is_write, int err);
>>
>> /*
>> * Add an arbitrary waiter to a page's wait queue
>> diff --git a/mm/filemap.c b/mm/filemap.c
>> index daef091d4c50..8a287dfc5372 100644
>> --- a/mm/filemap.c
>> +++ b/mm/filemap.c
>> @@ -887,9 +887,9 @@ EXPORT_SYMBOL(end_page_writeback);
>> * After completing I/O on a page, call this routine to update the page
>> * flags appropriately
>> */
>> -void page_endio(struct page *page, int op, int err)
>> +void page_endio(struct page *page, bool is_write, int err)
>> {
>> - if (!op_is_write(op)) {
>> + if (!is_write) {
>> if (!err) {
>> SetPageUptodate(page);
>> } else {
>>
>
> Looks ok to me. Also tested all the patches in the linus2 branch
> and verified it fixed the rw_page related hang for me.
>
> Reviewed-by: Mike Christie <mchristi@...hat.com>
Great, thanks Mike. I'll add that, I'm going to rebase it with the
bio_opf branch anyway.
--
Jens Axboe
--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists