lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <YPXEMU8nQdv8n4Dg@google.com>
Date:   Mon, 19 Jul 2021 11:28:01 -0700
From:   Jaegeuk Kim <jaegeuk@...nel.org>
To:     Chao Yu <chao@...nel.org>
Cc:     linux-f2fs-devel@...ts.sourceforge.net,
        linux-kernel@...r.kernel.org, Chao Yu <chao.yu@...ux.dev>
Subject: Re: [PATCH] f2fs: multidevice: support direct IO

On 07/19, Chao Yu wrote:
> Commit 3c62be17d4f5 ("f2fs: support multiple devices") missed
> to support direct IO for multiple device feature, this patch
> adds to support the missing part of multidevice feature.
> 
> In addition, for multiple device image, we should be aware of
> any issued direct write IO rather than just buffered write IO,
> so that fsync and syncfs can issue a preflush command to the
> device where direct write IO goes, to persist user data for
> posix compliant.

Is this aligned to Eric's iomap work?

> 
> Signed-off-by: Chao Yu <chao@...nel.org>
> ---
>  fs/f2fs/data.c    | 52 +++++++++++++++++++++++++++++++++++++++++++++--
>  fs/f2fs/f2fs.h    | 24 +++++++++++++++++++---
>  fs/f2fs/segment.c | 29 +++++++++++++-------------
>  fs/f2fs/super.c   |  7 +++++++
>  4 files changed, 93 insertions(+), 19 deletions(-)
> 
> diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
> index 095350ccf80d..be65dca0de40 100644
> --- a/fs/f2fs/data.c
> +++ b/fs/f2fs/data.c
> @@ -1451,10 +1451,16 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>  	struct extent_info ei = {0,0,0};
>  	block_t blkaddr;
>  	unsigned int start_pgofs;
> +	int bidx = 0;
> +	bool multidevice_dio;
>  
>  	if (!maxblocks)
>  		return 0;
>  
> +	multidevice_dio = f2fs_allow_multi_device_dio(sbi, flag);
> +	if (multidevice_dio)
> +		map->m_bdev = sbi->sb->s_bdev;
> +
>  	map->m_len = 0;
>  	map->m_flags = 0;
>  
> @@ -1477,6 +1483,16 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>  		if (flag == F2FS_GET_BLOCK_DIO)
>  			f2fs_wait_on_block_writeback_range(inode,
>  						map->m_pblk, map->m_len);
> +
> +		if (multidevice_dio) {
> +			bidx = f2fs_target_device_index(sbi, map->m_pblk);
> +			if (bidx) {
> +				map->m_bdev = FDEV(bidx).bdev;
> +				map->m_pblk -= FDEV(bidx).start_blk;
> +				map->m_len = min(map->m_len,
> +					FDEV(bidx).end_blk + 1 - map->m_pblk);
> +			}
> +		}
>  		goto out;
>  	}
>  
> @@ -1574,6 +1590,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>  	if (flag == F2FS_GET_BLOCK_PRE_AIO)
>  		goto skip;
>  
> +	if (multidevice_dio)
> +		bidx = f2fs_target_device_index(sbi, blkaddr);
> +
>  	if (map->m_len == 0) {
>  		/* preallocated unwritten block should be mapped for fiemap. */
>  		if (blkaddr == NEW_ADDR)
> @@ -1582,10 +1601,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>  
>  		map->m_pblk = blkaddr;
>  		map->m_len = 1;
> +
> +		if (multidevice_dio && map->m_bdev != FDEV(bidx).bdev)
> +			map->m_bdev = FDEV(bidx).bdev;
>  	} else if ((map->m_pblk != NEW_ADDR &&
>  			blkaddr == (map->m_pblk + ofs)) ||
>  			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
>  			flag == F2FS_GET_BLOCK_PRE_DIO) {
> +		if (multidevice_dio && map->m_bdev != FDEV(bidx).bdev)
> +			goto sync_out;
>  		ofs++;
>  		map->m_len++;
>  	} else {
> @@ -1638,11 +1662,27 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
>  
>  sync_out:
>  
> -	/* for hardware encryption, but to avoid potential issue in future */
> -	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
> +	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
> +		/*
> +		 * for hardware encryption, but to avoid potential issue
> +		 * in future
> +		 */
>  		f2fs_wait_on_block_writeback_range(inode,
>  						map->m_pblk, map->m_len);
>  
> +		if (multidevice_dio) {
> +			bidx = f2fs_target_device_index(sbi, map->m_pblk);
> +			if (bidx) {
> +				map->m_bdev = FDEV(bidx).bdev;
> +				map->m_pblk -= FDEV(bidx).start_blk;
> +				f2fs_bug_on(sbi,
> +					map->m_bdev != FDEV(bidx).bdev ||
> +					map->m_pblk + map->m_len >
> +						FDEV(bidx).end_blk + 1);
> +			}
> +		}
> +	}
> +
>  	if (flag == F2FS_GET_BLOCK_PRECACHE) {
>  		if (map->m_flags & F2FS_MAP_MAPPED) {
>  			unsigned int ofs = start_pgofs - map->m_lblk;
> @@ -1720,6 +1760,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
>  		map_bh(bh, inode->i_sb, map.m_pblk);
>  		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
>  		bh->b_size = blks_to_bytes(inode, map.m_len);
> +
> +		if (f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag))
> +			bh->b_bdev = map.m_bdev;
>  	}
>  	return err;
>  }
> @@ -3511,6 +3554,7 @@ static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
>  {
>  	struct f2fs_private_dio *dio;
>  	bool write = (bio_op(bio) == REQ_OP_WRITE);
> +	block_t blkaddr = SECTOR_TO_BLOCK(bio->bi_iter.bi_sector);
>  	unsigned int blkcnt = bio_sectors(bio) >> F2FS_LOG_SECTORS_PER_BLOCK;
>  
>  	dio = f2fs_kzalloc(F2FS_I_SB(inode),
> @@ -3527,6 +3571,10 @@ static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
>  	bio->bi_end_io = f2fs_dio_end_io;
>  	bio->bi_private = dio;
>  
> +	if (write)
> +		f2fs_update_device_state(F2FS_I_SB(inode), inode->i_ino,
> +							blkaddr, blkcnt);
> +
>  	inc_page_counts(F2FS_I_SB(inode),
>  			write ? F2FS_DIO_WRITE : F2FS_DIO_READ, dio->blkcnt);
>  
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 7369f8087f64..f3ac3cbad2e3 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -607,6 +607,7 @@ struct extent_tree {
>  				F2FS_MAP_UNWRITTEN)
>  
>  struct f2fs_map_blocks {
> +	struct block_device *m_bdev;	/* for multi-device */
>  	block_t m_pblk;
>  	block_t m_lblk;
>  	unsigned int m_len;
> @@ -1712,12 +1713,15 @@ struct f2fs_sb_info {
>  
>  	/* For shrinker support */
>  	struct list_head s_list;
> +	struct mutex umount_mutex;
> +	unsigned int shrinker_run_no;
> +
> +	/* For multi devices */
>  	int s_ndevs;				/* number of devices */
>  	struct f2fs_dev_info *devs;		/* for device list */
>  	unsigned int dirty_device;		/* for checkpoint data flush */
>  	spinlock_t dev_lock;			/* protect dirty_device */
> -	struct mutex umount_mutex;
> -	unsigned int shrinker_run_no;
> +	bool aligned_blksize;			/* all devices has the same logical blksize */
>  
>  	/* For write statistics */
>  	u64 sectors_written_start;
> @@ -3527,6 +3531,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>  			block_t old_blkaddr, block_t *new_blkaddr,
>  			struct f2fs_summary *sum, int type,
>  			struct f2fs_io_info *fio);
> +void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
> +					block_t blkaddr, unsigned int blkcnt);
>  void f2fs_wait_on_page_writeback(struct page *page,
>  			enum page_type type, bool ordered, bool locked);
>  void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
> @@ -4334,6 +4340,16 @@ static inline int allow_outplace_dio(struct inode *inode,
>  				!block_unaligned_IO(inode, iocb, iter));
>  }
>  
> +static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi,
> +								int flag)
> +{
> +	if (!f2fs_is_multi_device(sbi))
> +		return false;
> +	if (flag != F2FS_GET_BLOCK_DIO)
> +		return false;
> +	return sbi->aligned_blksize;
> +}
> +
>  static inline bool f2fs_force_buffered_io(struct inode *inode,
>  				struct kiocb *iocb, struct iov_iter *iter)
>  {
> @@ -4342,7 +4358,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
>  
>  	if (f2fs_post_read_required(inode))
>  		return true;
> -	if (f2fs_is_multi_device(sbi))
> +
> +	/* disallow direct IO if any of devices has unaligned blksize */
> +	if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
>  		return true;
>  	/*
>  	 * for blkzoned device, fallback direct IO to buffered IO, so
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 15cc89eef28d..6fcfd234108a 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -3442,24 +3442,24 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
>  	up_read(&SM_I(sbi)->curseg_lock);
>  }
>  
> -static void update_device_state(struct f2fs_io_info *fio)
> +void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
> +					block_t blkaddr, unsigned int blkcnt)
>  {
> -	struct f2fs_sb_info *sbi = fio->sbi;
> -	unsigned int devidx;
> -
>  	if (!f2fs_is_multi_device(sbi))
>  		return;
>  
> -	devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
> +	for (; blkcnt > 0; blkcnt--, blkaddr++) {
> +		unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
>  
> -	/* update device state for fsync */
> -	f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
> +		/* update device state for fsync */
> +		f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
>  
> -	/* update device state for checkpoint */
> -	if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
> -		spin_lock(&sbi->dev_lock);
> -		f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
> -		spin_unlock(&sbi->dev_lock);
> +		/* update device state for checkpoint */
> +		if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
> +			spin_lock(&sbi->dev_lock);
> +			f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
> +			spin_unlock(&sbi->dev_lock);
> +		}
>  	}
>  }
>  
> @@ -3486,7 +3486,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
>  		goto reallocate;
>  	}
>  
> -	update_device_state(fio);
> +	f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
>  
>  	if (keep_order)
>  		up_read(&fio->sbi->io_order_lock);
> @@ -3575,7 +3575,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
>  	else
>  		err = f2fs_submit_page_bio(fio);
>  	if (!err) {
> -		update_device_state(fio);
> +		f2fs_update_device_state(fio->sbi, fio->ino,
> +						fio->new_blkaddr, 1);
>  		f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
>  	}
>  
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 72eb9d70969f..dced7778d530 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -3646,6 +3646,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>  {
>  	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>  	unsigned int max_devices = MAX_DEVICES;
> +	unsigned int logical_blksize;
>  	int i;
>  
>  	/* Initialize single device information */
> @@ -3666,6 +3667,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>  	if (!sbi->devs)
>  		return -ENOMEM;
>  
> +	logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
> +	sbi->aligned_blksize = true;
> +
>  	for (i = 0; i < max_devices; i++) {
>  
>  		if (i > 0 && !RDEV(i).path[0])
> @@ -3702,6 +3706,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>  		/* to release errored devices */
>  		sbi->s_ndevs = i + 1;
>  
> +		if (logical_blksize != bdev_logical_block_size(FDEV(i).bdev))
> +			sbi->aligned_blksize = false;
> +
>  #ifdef CONFIG_BLK_DEV_ZONED
>  		if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
>  				!f2fs_sb_has_blkzoned(sbi)) {
> -- 
> 2.22.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ