lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <Y50STj+VwgLs6AMi@google.com>
Date:   Fri, 16 Dec 2022 16:50:22 -0800
From:   Jaegeuk Kim <jaegeuk@...nel.org>
To:     qixiaoyu <qxy65535@...il.com>
Cc:     linux-kernel@...r.kernel.org,
        linux-f2fs-devel@...ts.sourceforge.net,
        qixiaoyu1 <qixiaoyu1@...omi.com>,
        xiongping1 <xiongping1@...omi.com>
Subject: Re: [f2fs-dev] [PATCH 6/6] f2fs: add block_age-based extent cache

On 12/16, qixiaoyu wrote:
> On Mon, Dec 05, 2022 at 10:54:33AM -0800, Jaegeuk Kim wrote:
> 
> Hi Jaegeuk,
> 
> Thank you for your patchset!
> 
> >  
> >  static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> > @@ -544,6 +585,8 @@ static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
> >  
> >  	if (type == EX_READ)
> >  		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
> > +	else if (type == EX_BLOCK_AGE)
> > +		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
> >  	return ret;
> >  }
> >  
> 
> If (!en), ei->age may be uninitialized, which makes the trace output confused.
> 
> > +/* This returns a new age and allocated blocks in ei */
> > +static int __get_new_block_age(struct inode *inode, struct extent_info *ei)
> > +{
> > +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > +	loff_t f_size = i_size_read(inode);
> > +	unsigned long long cur_blocks =
> > +				atomic64_read(&sbi->allocated_data_blocks);
> > +
> > +	/*
> > +	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
> > +	 * file block even in seq write. So don't record age for newly last file
> > +	 * block here.
> > +	 */
> > +	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
> > +			ei->blk == NEW_ADDR)
> > +		return -EINVAL;
> > +
> > +	if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) {
> > +		unsigned long long cur_age;
> > +
> > +		if (cur_blocks >= ei->last_blocks)
> > +			cur_age = cur_blocks - ei->last_blocks;
> > +		else
> > +			/* allocated_data_blocks overflow */
> > +			cur_age = ULLONG_MAX - ei->last_blocks + cur_blocks;
> > +
> > +		if (ei->age)
> > +			ei->age = __calculate_block_age(cur_age, ei->age);
> > +		else
> > +			ei->age = cur_age;
> > +		ei->last_blocks = cur_blocks;
> > +		WARN_ON(ei->age > cur_blocks);
> > +		return 0;
> > +	}
> > +
> > +	f2fs_bug_on(sbi, ei->blk == NULL_ADDR);
> > +
> > +	/* the data block was allocated for the first time */
> > +	if (ei->blk == NEW_ADDR)
> > +		goto out;
> > +
> > +	if (__is_valid_data_blkaddr(ei->blk) &&
> > +			!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE)) {
> > +		f2fs_bug_on(sbi, 1);
> > +		return -EINVAL;
> > +	}
> > +out:
> > +	/*
> > +	 * init block age with zero, this can happen when the block age extent
> > +	 * was reclaimed due to memory constraint or system reboot
> > +	 */
> > +	ei->age = 0;
> > +	ei->last_blocks = cur_blocks;
> > +	return 0;
> > +}
> > +
> >  static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
> >  {
> >  	struct extent_info ei;
> > @@ -823,6 +951,10 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ
> >  			ei.blk = NULL_ADDR;
> >  		else
> >  			ei.blk = dn->data_blkaddr;
> > +	} else if (type == EX_BLOCK_AGE) {
> > +		ei.blk = dn->data_blkaddr;
> > +		if (__get_new_block_age(dn->inode, &ei))
> > +			return;
> >  	}
> >  	__update_extent_tree_range(dn->inode, &ei, type);
> >  }
> 
> Here, the whole extent will be updated to the same new age in __get_new_block_age
> and __update_extent_tree_range.
> 
> For example, if we create a new file with 10 blocks and then update fofs=3, we
> will get:
>   fofs: 0, len: 10, age: 1000
> But in our design, we expect to split the large age extent cache to record the
> updated block:
>   fofs: 0, len: 3, age: 0
>   fofs: 3, len: 1, age: 1000
>   fofs: 4, len: 6, age: 0
> 
> Some blocks in the file can be updated more frequently than others and we want
> distinguish them by the block age extent cache, currently the code breaks it.

Could you please check this patch series?

https://lore.kernel.org/linux-f2fs-devel/20221217004659.2092427-1-jaegeuk@kernel.org/T/#t

> 
> Moreover, in the current code, if we update more than one block of the file at
> once, the age of all extent will soon be close to 0, which is not expected.
> 
> f2fs_lookup_age_extent_tree_end: dev = (254,7), ino = 34547, pgofs = 2, age_ext_info(fofs: 0, len: 325, age: 172156, blocks: 861203)
> f2fs_update_age_extent_tree_range: dev = (254,7), ino = 34547, pgofs = 0, len = 325, age = 51647, blocks = 861204
> f2fs_lookup_age_extent_tree_end: dev = (254,7), ino = 34547, pgofs = 4, age_ext_info(fofs: 0, len: 325, age: 51647, blocks: 861204)
> f2fs_update_age_extent_tree_range: dev = (254,7), ino = 34547, pgofs = 0, len = 325, age = 15494, blocks = 861205
> f2fs_lookup_age_extent_tree_end: dev = (254,7), ino = 34547, pgofs = 6, age_ext_info(fofs: 0, len: 325, age: 15494, blocks: 861205)
> f2fs_update_age_extent_tree_range: dev = (254,7), ino = 34547, pgofs = 0, len = 325, age = 4648, blocks = 861206
> f2fs_lookup_age_extent_tree_end: dev = (254,7), ino = 34547, pgofs = 8, age_ext_info(fofs: 0, len: 325, age: 4648, blocks: 861206)
> f2fs_update_age_extent_tree_range: dev = (254,7), ino = 34547, pgofs = 0, len = 325, age = 1395, blocks = 861207
> 
> > @@ -940,6 +1072,43 @@ unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrin
> >  	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
> >  }
> >  
> > +/* block age extent cache operations */
> > +bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
> > +				struct extent_info *ei)
> > +{
> > +	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
> > +		return false;
> > +
> > +	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
> > +}
> > +
> > +void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
> > +{
> > +	return __update_extent_cache(dn, EX_BLOCK_AGE);
> > +}
> > +
> > +void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
> > +				pgoff_t fofs, unsigned int len)
> > +{
> > +	struct extent_info ei = {
> > +		.fofs = fofs,
> > +		.len = len,
> > +	};
> > +
> > +	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
> > +		return;
> > +
> > +	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
> > +}
> > +
> > +unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
> > +{
> > +	if (!test_opt(sbi, AGE_EXTENT_CACHE))
> > +		return 0;
> > +
> > +	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
> > +}
> > +
> >  static unsigned int __destroy_extent_node(struct inode *inode,
> >  					enum extent_type type)
> >  {
> > @@ -960,6 +1129,7 @@ static unsigned int __destroy_extent_node(struct inode *inode,
> >  void f2fs_destroy_extent_node(struct inode *inode)
> >  {
> >  	__destroy_extent_node(inode, EX_READ);
> > +	__destroy_extent_node(inode, EX_BLOCK_AGE);
> >  }
> >  
> >  static void __drop_extent_tree(struct inode *inode, enum extent_type type)
> > @@ -988,6 +1158,7 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type)
> >  void f2fs_drop_extent_tree(struct inode *inode)
> >  {
> >  	__drop_extent_tree(inode, EX_READ);
> > +	__drop_extent_tree(inode, EX_BLOCK_AGE);
> >  }
> >  
> >  static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
> > @@ -1028,6 +1199,7 @@ static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
> >  void f2fs_destroy_extent_tree(struct inode *inode)
> >  {
> >  	__destroy_extent_tree(inode, EX_READ);
> > +	__destroy_extent_tree(inode, EX_BLOCK_AGE);
> >  }
> >  
> >  static void __init_extent_tree_info(struct extent_tree_info *eti)
> > @@ -1045,6 +1217,12 @@ static void __init_extent_tree_info(struct extent_tree_info *eti)
> >  void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
> >  {
> >  	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
> > +	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
> > +
> > +	/* initialize for block age extents */
> > +	atomic64_set(&sbi->allocated_data_blocks, 0);
> > +	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
> > +	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
> >  }
> >  
> >  int __init f2fs_create_extent_cache(void)
> > diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> > index ff940cba4600..eb71edcf70de 100644
> > --- a/fs/f2fs/f2fs.h
> > +++ b/fs/f2fs/f2fs.h
> > @@ -107,6 +107,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
> >  #define F2FS_MOUNT_MERGE_CHECKPOINT	0x10000000
> >  #define	F2FS_MOUNT_GC_MERGE		0x20000000
> >  #define F2FS_MOUNT_COMPRESS_CACHE	0x40000000
> > +#define F2FS_MOUNT_AGE_EXTENT_CACHE	0x80000000
> >  
> >  #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
> >  #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
> > @@ -607,9 +608,22 @@ enum {
> >  /* number of extent info in extent cache we try to shrink */
> >  #define READ_EXTENT_CACHE_SHRINK_NUMBER	128
> >  
> > +/* number of age extent info in extent cache we try to shrink */
> > +#define AGE_EXTENT_CACHE_SHRINK_NUMBER	128
> > +#define LAST_AGE_WEIGHT			30
> > +#define SAME_AGE_REGION			1024
> > +
> > +/*
> > + * Define data block with age less than 1GB as hot data
> > + * define data block with age less than 10GB but more than 1GB as warm data
> > + */
> > +#define DEF_HOT_DATA_AGE_THRESHOLD	262144
> > +#define DEF_WARM_DATA_AGE_THRESHOLD	2621440
> > +
> >  /* extent cache type */
> >  enum extent_type {
> >  	EX_READ,
> > +	EX_BLOCK_AGE,
> >  	NR_EXTENT_CACHES,
> >  };
> >  
> > @@ -637,6 +651,13 @@ struct extent_info {
> >  			unsigned int c_len;
> >  #endif
> >  		};
> > +		/* block age extent_cache */
> > +		struct {
> > +			/* block age of the extent */
> > +			unsigned long long age;
> > +			/* last total blocks allocated */
> > +			unsigned long long last_blocks;
> > +		};
> >  	};
> >  };
> >  
> > @@ -1653,6 +1674,11 @@ struct f2fs_sb_info {
> >  
> >  	/* for extent tree cache */
> >  	struct extent_tree_info extent_tree[NR_EXTENT_CACHES];
> > +	atomic64_t allocated_data_blocks;	/* for block age extent_cache */
> > +
> > +	/* The threshold used for hot and warm data seperation*/
> > +	unsigned int hot_data_age_threshold;
> > +	unsigned int warm_data_age_threshold;
> >  
> >  	/* basic filesystem units */
> >  	unsigned int log_sectors_per_block;	/* log2 sectors per block */
> > @@ -3857,6 +3883,8 @@ struct f2fs_stat_info {
> >  	unsigned long long ext_mem[NR_EXTENT_CACHES];
> >  	/* for read extent cache */
> >  	unsigned long long hit_largest;
> > +	/* for block age extent cache */
> > +	unsigned long long allocated_data_blocks;
> >  	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
> >  	int ndirty_data, ndirty_qdata;
> >  	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
> > @@ -4168,6 +4196,16 @@ void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
> >  unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi,
> >  			int nr_shrink);
> >  
> > +/* block age extent cache ops */
> > +void f2fs_init_age_extent_tree(struct inode *inode);
> > +bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
> > +			struct extent_info *ei);
> > +void f2fs_update_age_extent_cache(struct dnode_of_data *dn);
> > +void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
> > +			pgoff_t fofs, unsigned int len);
> > +unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi,
> > +			int nr_shrink);
> > +
> >  /*
> >   * sysfs.c
> >   */
> > diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
> > index cbe7c24065c7..56c23b5e9d65 100644
> > --- a/fs/f2fs/file.c
> > +++ b/fs/f2fs/file.c
> > @@ -619,6 +619,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
> >  		fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
> >  							dn->inode) + ofs;
> >  		f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
> > +		f2fs_update_age_extent_cache_range(dn, fofs, nr_free);
> >  		dec_valid_block_count(sbi, dn->inode, nr_free);
> >  	}
> >  	dn->ofs_in_node = ofs;
> > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> > index c845c16f97d0..ff6cf66ed46b 100644
> > --- a/fs/f2fs/inode.c
> > +++ b/fs/f2fs/inode.c
> > @@ -480,6 +480,7 @@ static int do_read_inode(struct inode *inode)
> >  
> >  	/* Need all the flag bits */
> >  	f2fs_init_read_extent_tree(inode, node_page);
> > +	f2fs_init_age_extent_tree(inode);
> >  
> >  	f2fs_put_page(node_page, 1);
> >  
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index 07419c3e42a5..dde4c0458704 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -60,7 +60,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
> >  	avail_ram = val.totalram - val.totalhigh;
> >  
> >  	/*
> > -	 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
> > +	 * give 25%, 25%, 50%, 50%, 25%, 25% memory for each components respectively
> >  	 */
> >  	if (type == FREE_NIDS) {
> >  		mem_size = (nm_i->nid_cnt[FREE_NID] *
> > @@ -85,14 +85,16 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
> >  						sizeof(struct ino_entry);
> >  		mem_size >>= PAGE_SHIFT;
> >  		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
> > -	} else if (type == READ_EXTENT_CACHE) {
> > -		struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
> > +	} else if (type == READ_EXTENT_CACHE || type == AGE_EXTENT_CACHE) {
> > +		enum extent_type etype = type == READ_EXTENT_CACHE ?
> > +						EX_READ : EX_BLOCK_AGE;
> > +		struct extent_tree_info *eti = &sbi->extent_tree[etype];
> >  
> >  		mem_size = (atomic_read(&eti->total_ext_tree) *
> >  				sizeof(struct extent_tree) +
> >  				atomic_read(&eti->total_ext_node) *
> >  				sizeof(struct extent_node)) >> PAGE_SHIFT;
> > -		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
> > +		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
> >  	} else if (type == DISCARD_CACHE) {
> >  		mem_size = (atomic_read(&dcc->discard_cmd_cnt) *
> >  				sizeof(struct discard_cmd)) >> PAGE_SHIFT;
> > diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> > index 0aa48704c77a..99454d46a939 100644
> > --- a/fs/f2fs/node.h
> > +++ b/fs/f2fs/node.h
> > @@ -147,6 +147,7 @@ enum mem_type {
> >  	DIRTY_DENTS,	/* indicates dirty dentry pages */
> >  	INO_ENTRIES,	/* indicates inode entries */
> >  	READ_EXTENT_CACHE,	/* indicates read extent cache */
> > +	AGE_EXTENT_CACHE,	/* indicates age extent cache */
> >  	DISCARD_CACHE,	/* indicates memory of cached discard cmds */
> >  	COMPRESS_PAGE,	/* indicates memory of cached compressed pages */
> >  	BASE_CHECK,	/* check kernel status */
> > diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> > index 8722d1a13c17..dee712f7225f 100644
> > --- a/fs/f2fs/segment.c
> > +++ b/fs/f2fs/segment.c
> > @@ -453,6 +453,11 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
> >  		f2fs_shrink_read_extent_tree(sbi,
> >  				READ_EXTENT_CACHE_SHRINK_NUMBER);
> >  
> > +	/* try to shrink age extent cache when there is no enough memory */
> > +	if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
> > +		f2fs_shrink_age_extent_tree(sbi,
> > +				AGE_EXTENT_CACHE_SHRINK_NUMBER);
> > +
> >  	/* check the # of cached NAT entries */
> >  	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
> >  		f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
> > @@ -3151,10 +3156,28 @@ static int __get_segment_type_4(struct f2fs_io_info *fio)
> >  	}
> >  }
> >  
> > +static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
> > +{
> > +	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
> > +	struct extent_info ei;
> > +
> > +	if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
> > +		if (!ei.age)
> > +			return NO_CHECK_TYPE;
> > +		if (ei.age <= sbi->hot_data_age_threshold)
> > +			return CURSEG_HOT_DATA;
> > +		if (ei.age <= sbi->warm_data_age_threshold)
> > +			return CURSEG_WARM_DATA;
> > +		return CURSEG_COLD_DATA;
> > +	}
> > +	return NO_CHECK_TYPE;
> > +}
> > +
> >  static int __get_segment_type_6(struct f2fs_io_info *fio)
> >  {
> >  	if (fio->type == DATA) {
> >  		struct inode *inode = fio->page->mapping->host;
> > +		int type;
> >  
> >  		if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
> >  			return CURSEG_COLD_DATA_PINNED;
> > @@ -3169,6 +3192,11 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
> >  		}
> >  		if (file_is_cold(inode) || f2fs_need_compress_data(inode))
> >  			return CURSEG_COLD_DATA;
> > +
> > +		type = __get_age_segment_type(inode, fio->page->index);
> > +		if (type != NO_CHECK_TYPE)
> > +			return type;
> > +
> >  		if (file_is_hot(inode) ||
> >  				is_inode_flag_set(inode, FI_HOT_DATA) ||
> >  				f2fs_is_cow_file(inode))
> > @@ -3287,6 +3315,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
> >  	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
> >  	locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
> >  
> > +	if (IS_DATASEG(type))
> > +		atomic64_inc(&sbi->allocated_data_blocks);
> > +
> >  	up_write(&sit_i->sentry_lock);
> >  
> >  	if (page && IS_NODESEG(type)) {
> > @@ -3414,6 +3445,8 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
> >  	struct f2fs_summary sum;
> >  
> >  	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
> > +	if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
> > +		f2fs_update_age_extent_cache(dn);
> >  	set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
> >  	do_write_page(&sum, fio);
> >  	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
> > diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
> > index 33c490e69ae3..83d6fb97dcae 100644
> > --- a/fs/f2fs/shrinker.c
> > +++ b/fs/f2fs/shrinker.c
> > @@ -59,6 +59,9 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
> >  		/* count read extent cache entries */
> >  		count += __count_extent_cache(sbi, EX_READ);
> >  
> > +		/* count block age extent cache entries */
> > +		count += __count_extent_cache(sbi, EX_BLOCK_AGE);
> > +
> >  		/* count clean nat cache entries */
> >  		count += __count_nat_entries(sbi);
> >  
> > @@ -102,8 +105,11 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
> >  
> >  		sbi->shrinker_run_no = run_no;
> >  
> > +		/* shrink extent cache entries */
> > +		freed += f2fs_shrink_age_extent_tree(sbi, nr >> 2);
> > +
> >  		/* shrink read extent cache entries */
> > -		freed += f2fs_shrink_read_extent_tree(sbi, nr >> 1);
> > +		freed += f2fs_shrink_read_extent_tree(sbi, nr >> 2);
> >  
> >  		/* shrink clean nat cache entries */
> >  		if (freed < nr)
> > @@ -134,6 +140,8 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
> >  void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
> >  {
> >  	f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ));
> > +	f2fs_shrink_age_extent_tree(sbi,
> > +				__count_extent_cache(sbi, EX_BLOCK_AGE));
> >  
> >  	spin_lock(&f2fs_list_lock);
> >  	list_del_init(&sbi->s_list);
> > diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> > index 10bd03bbefec..5bdab376b852 100644
> > --- a/fs/f2fs/super.c
> > +++ b/fs/f2fs/super.c
> > @@ -163,6 +163,7 @@ enum {
> >  	Opt_nogc_merge,
> >  	Opt_discard_unit,
> >  	Opt_memory_mode,
> > +	Opt_age_extent_cache,
> >  	Opt_err,
> >  };
> >  
> > @@ -241,6 +242,7 @@ static match_table_t f2fs_tokens = {
> >  	{Opt_nogc_merge, "nogc_merge"},
> >  	{Opt_discard_unit, "discard_unit=%s"},
> >  	{Opt_memory_mode, "memory=%s"},
> > +	{Opt_age_extent_cache, "age_extent_cache"},
> >  	{Opt_err, NULL},
> >  };
> >  
> > @@ -1257,6 +1259,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
> >  			}
> >  			kfree(name);
> >  			break;
> > +		case Opt_age_extent_cache:
> > +			set_opt(sbi, AGE_EXTENT_CACHE);
> > +			break;
> >  		default:
> >  			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
> >  				 p);
> > @@ -1958,6 +1963,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
> >  		seq_puts(seq, ",read_extent_cache");
> >  	else
> >  		seq_puts(seq, ",no_read_extent_cache");
> > +	if (test_opt(sbi, AGE_EXTENT_CACHE))
> > +		seq_puts(seq, ",age_extent_cache");
> >  	if (test_opt(sbi, DATA_FLUSH))
> >  		seq_puts(seq, ",data_flush");
> >  
> > @@ -2219,6 +2226,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> >  	bool need_restart_flush = false, need_stop_flush = false;
> >  	bool need_restart_discard = false, need_stop_discard = false;
> >  	bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
> > +	bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE);
> >  	bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
> >  	bool no_io_align = !F2FS_IO_ALIGNED(sbi);
> >  	bool no_atgc = !test_opt(sbi, ATGC);
> > @@ -2313,6 +2321,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
> >  		f2fs_warn(sbi, "switch extent_cache option is not allowed");
> >  		goto restore_opts;
> >  	}
> > +	/* disallow enable/disable age extent_cache dynamically */
> > +	if (no_age_extent_cache == !!test_opt(sbi, AGE_EXTENT_CACHE)) {
> > +		err = -EINVAL;
> > +		f2fs_warn(sbi, "switch age_extent_cache option is not allowed");
> > +		goto restore_opts;
> > +	}
> >  
> >  	if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) {
> >  		err = -EINVAL;
> > diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
> > index a4745d596310..2ab215110596 100644
> > --- a/fs/f2fs/sysfs.c
> > +++ b/fs/f2fs/sysfs.c
> > @@ -668,6 +668,24 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
> >  		return count;
> >  	}
> >  
> > +	if (!strcmp(a->attr.name, "hot_data_age_threshold")) {
> > +		if (t == 0 || t >= sbi->warm_data_age_threshold)
> > +			return -EINVAL;
> > +		if (t == *ui)
> > +			return count;
> > +		*ui = (unsigned int)t;
> > +		return count;
> > +	}
> > +
> > +	if (!strcmp(a->attr.name, "warm_data_age_threshold")) {
> > +		if (t == 0 || t <= sbi->hot_data_age_threshold)
> > +			return -EINVAL;
> > +		if (t == *ui)
> > +			return count;
> > +		*ui = (unsigned int)t;
> > +		return count;
> > +	}
> > +
> >  	*ui = (unsigned int)t;
> >  
> >  	return count;
> > @@ -923,6 +941,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write);
> >  F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block);
> >  F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block);
> >  
> > +/* For block age extent cache */
> > +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold);
> > +F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold);
> > +
> >  #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
> >  static struct attribute *f2fs_attrs[] = {
> >  	ATTR_LIST(gc_urgent_sleep_time),
> > @@ -1018,6 +1040,8 @@ static struct attribute *f2fs_attrs[] = {
> >  	ATTR_LIST(peak_atomic_write),
> >  	ATTR_LIST(committed_atomic_block),
> >  	ATTR_LIST(revoked_atomic_block),
> > +	ATTR_LIST(hot_data_age_threshold),
> > +	ATTR_LIST(warm_data_age_threshold),
> >  	NULL,
> >  };
> >  ATTRIBUTE_GROUPS(f2fs);
> > diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
> > index 2bb37892d2ba..31d994e6b4ca 100644
> > --- a/include/trace/events/f2fs.h
> > +++ b/include/trace/events/f2fs.h
> > @@ -49,6 +49,7 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
> >  TRACE_DEFINE_ENUM(CP_PAUSE);
> >  TRACE_DEFINE_ENUM(CP_RESIZE);
> >  TRACE_DEFINE_ENUM(EX_READ);
> > +TRACE_DEFINE_ENUM(EX_BLOCK_AGE);
> >  
> >  #define show_block_type(type)						\
> >  	__print_symbolic(type,						\
> > @@ -155,6 +156,11 @@ TRACE_DEFINE_ENUM(EX_READ);
> >  		{ COMPRESS_ZSTD,	"ZSTD" },			\
> >  		{ COMPRESS_LZORLE,	"LZO-RLE" })
> >  
> > +#define show_extent_type(type)						\
> > +	__print_symbolic(type,						\
> > +		{ EX_READ,	"Read" },				\
> > +		{ EX_BLOCK_AGE,	"Block Age" })
> > +
> >  struct f2fs_sb_info;
> >  struct f2fs_io_info;
> >  struct extent_info;
> > @@ -1544,7 +1550,7 @@ TRACE_EVENT(f2fs_lookup_extent_tree_start,
> >  	TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, type = %s",
> >  		show_dev_ino(__entry),
> >  		__entry->pgofs,
> > -		__entry->type == EX_READ ? "Read" : "N/A")
> > +		show_extent_type(__entry->type))
> >  );
> >  
> >  TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end,
> > @@ -1583,6 +1589,45 @@ TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end,
> >  		__entry->blk)
> >  );
> >  
> > +TRACE_EVENT_CONDITION(f2fs_lookup_age_extent_tree_end,
> > +
> > +	TP_PROTO(struct inode *inode, unsigned int pgofs,
> > +						struct extent_info *ei),
> > +
> > +	TP_ARGS(inode, pgofs, ei),
> > +
> > +	TP_CONDITION(ei),
> > +
> > +	TP_STRUCT__entry(
> > +		__field(dev_t,	dev)
> > +		__field(ino_t,	ino)
> > +		__field(unsigned int, pgofs)
> > +		__field(unsigned int, fofs)
> > +		__field(unsigned int, len)
> > +		__field(unsigned long long, age)
> > +		__field(unsigned long long, blocks)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		__entry->dev = inode->i_sb->s_dev;
> > +		__entry->ino = inode->i_ino;
> > +		__entry->pgofs = pgofs;
> > +		__entry->fofs = ei->fofs;
> > +		__entry->len = ei->len;
> > +		__entry->age = ei->age;
> > +		__entry->blocks = ei->last_blocks;
> > +	),
> > +
> > +	TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
> > +		"age_ext_info(fofs: %u, len: %u, age: %llu, blocks: %llu)",
> > +		show_dev_ino(__entry),
> > +		__entry->pgofs,
> > +		__entry->fofs,
> > +		__entry->len,
> > +		__entry->age,
> > +		__entry->blocks)
> > +);
> > +
> >  TRACE_EVENT(f2fs_update_read_extent_tree_range,
> >  
> >  	TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len,
> > @@ -1618,6 +1663,41 @@ TRACE_EVENT(f2fs_update_read_extent_tree_range,
> >  		__entry->c_len)
> >  );
> >  
> > +TRACE_EVENT(f2fs_update_age_extent_tree_range,
> > +
> > +	TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len,
> > +					unsigned long long age,
> > +					unsigned long long last_blks),
> > +
> > +	TP_ARGS(inode, pgofs, len, age, last_blks),
> > +
> > +	TP_STRUCT__entry(
> > +		__field(dev_t,	dev)
> > +		__field(ino_t,	ino)
> > +		__field(unsigned int, pgofs)
> > +		__field(unsigned int, len)
> > +		__field(unsigned long long, age)
> > +		__field(unsigned long long, blocks)
> > +	),
> > +
> > +	TP_fast_assign(
> > +		__entry->dev = inode->i_sb->s_dev;
> > +		__entry->ino = inode->i_ino;
> > +		__entry->pgofs = pgofs;
> > +		__entry->len = len;
> > +		__entry->age = age;
> > +		__entry->blocks = last_blks;
> > +	),
> > +
> > +	TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
> > +				"len = %u, age = %llu, blocks = %llu",
> > +		show_dev_ino(__entry),
> > +		__entry->pgofs,
> > +		__entry->len,
> > +		__entry->age,
> > +		__entry->blocks)
> > +);
> > +
> >  TRACE_EVENT(f2fs_shrink_extent_tree,
> >  
> >  	TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt,
> > @@ -1643,7 +1723,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree,
> >  		show_dev(__entry->dev),
> >  		__entry->node_cnt,
> >  		__entry->tree_cnt,
> > -		__entry->type == EX_READ ? "Read" : "N/A")
> > +		show_extent_type(__entry->type))
> >  );
> >  
> >  TRACE_EVENT(f2fs_destroy_extent_tree,
> > @@ -1670,7 +1750,7 @@ TRACE_EVENT(f2fs_destroy_extent_tree,
> >  	TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u, type = %s",
> >  		show_dev_ino(__entry),
> >  		__entry->node_cnt,
> > -		__entry->type == EX_READ ? "Read" : "N/A")
> > +		show_extent_type(__entry->type))
> >  );
> >  
> >  DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
> > -- 
> > 2.39.0.rc0.267.gcb52ba06e7-goog
> > 
> > 
> > 
> > _______________________________________________
> > Linux-f2fs-devel mailing list
> > Linux-f2fs-devel@...ts.sourceforge.net
> > https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ