lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180420033752.GH39280@jaegeuk-macbookpro.roam.corp.google.com>
Date:   Thu, 19 Apr 2018 20:37:52 -0700
From:   Jaegeuk Kim <jaegeuk@...nel.org>
To:     Chao Yu <yuchao0@...wei.com>
Cc:     linux-f2fs-devel@...ts.sourceforge.net,
        linux-kernel@...r.kernel.org, chao@...nel.org
Subject: Re: [PATCH] f2fs: sepearte hot/cold in free nid

On 04/20, Chao Yu wrote:
> As most indirect node, dindirect node, and xattr node won't be updated
> after they are created, but inode node and other direct node will change
> more frequently, so store their nat entries mixedly in whole nat table
> will suffer:
> - fragment nat table soon due to different update rate
> - more nat block update due to fragmented nat table
> 
> In order to solve above issue, we're trying to separate whole nat table to
> two part:
> a. Hot free nid area:
>  - range: [nid #0, nid #x)
>  - store node block address for
>    * inode node
>    * other direct node
> b. Cold free nid area:
>  - range: [nid #x, max nid)
>  - store node block address for
>    * indirect node
>    * dindirect node
>    * xattr node
> 
> Allocation strategy example:
> 
> Free nid: '-'
> Used nid: '='
> 
> 1. Initial status:
> Free Nids:	|-----------------------------------------------------------------------|
> 		^		^					^		^
> Alloc Range:	|---------------|					|---------------|
> 		hot_start	hot_end					cold_start	cold_end
> 
> 2. Free nids have ran out:
> Free Nids:	|===============-----------------------------------------===============|
> 		^		^					^		^
> Alloc Range:	|===============|					|===============|
> 		hot_start	hot_end					cold_start	cold_end
> 
> 3. Expand hot/cold area range:
> Free Nids:	|===============-----------------------------------------===============|
> 		^				^	^				^
> Alloc Range:	|===============----------------|	|----------------===============|
> 		hot_start			hot_end	cold_start			cold_end
> 
> 4. Hot free nids have ran out:
> Free Nids:	|===============================-------------------------===============|
> 		^				^	^				^
> Alloc Range:	|===============================|	|----------------===============|
> 		hot_start			hot_end	cold_start			cold_end
> 
> 5. Expand hot area range, hot/cold area boundary has been fixed:
> Free Nids:	|===============================-------------------------===============|
> 		^					^				^
> Alloc Range:	|===============================--------|----------------===============|
> 		hot_start				hot_end(cold_start)		cold_end
> 
> Run xfstests with generic/*:
> 
> before
> node_write:	169660
> cp_count:	60118
> node/cp		2.82
> 
> after:
> node_write:	159145
> cp_count:	84501
> node/cp:	2.64

Nice trial tho, I don't see much benefit on this huge patch. I guess we may be
able to find an efficient way to achieve this issue rather than changing whole
stable codes.

How about getting a free nid in the list from head or tail separately?

> 
> Signed-off-by: Chao Yu <yuchao0@...wei.com>
> ---
>  fs/f2fs/checkpoint.c |   4 -
>  fs/f2fs/debug.c      |   6 +-
>  fs/f2fs/f2fs.h       |  19 +++-
>  fs/f2fs/inode.c      |   2 +-
>  fs/f2fs/namei.c      |   2 +-
>  fs/f2fs/node.c       | 302 ++++++++++++++++++++++++++++++++-------------------
>  fs/f2fs/node.h       |  17 +--
>  fs/f2fs/segment.c    |   8 +-
>  fs/f2fs/shrinker.c   |   3 +-
>  fs/f2fs/xattr.c      |  10 +-
>  10 files changed, 221 insertions(+), 152 deletions(-)
> 
> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
> index 96785ffc6181..c17feec72c74 100644
> --- a/fs/f2fs/checkpoint.c
> +++ b/fs/f2fs/checkpoint.c
> @@ -1029,14 +1029,10 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
>  static void __prepare_cp_block(struct f2fs_sb_info *sbi)
>  {
>  	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
> -	struct f2fs_nm_info *nm_i = NM_I(sbi);
> -	nid_t last_nid = nm_i->next_scan_nid;
>  
> -	next_free_nid(sbi, &last_nid);
>  	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
>  	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
>  	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
> -	ckpt->next_free_nid = cpu_to_le32(last_nid);
>  }
>  
>  /*
> diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
> index 7bb036a3bb81..b13c1d4f110f 100644
> --- a/fs/f2fs/debug.c
> +++ b/fs/f2fs/debug.c
> @@ -100,7 +100,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
>  	si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
>  	si->sits = MAIN_SEGS(sbi);
>  	si->dirty_sits = SIT_I(sbi)->dirty_sentries;
> -	si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
> +	si->free_nids = NM_I(sbi)->nid_cnt[FREE_HOT_NID] +
> +				NM_I(sbi)->nid_cnt[FREE_COLD_NID];
>  	si->avail_nids = NM_I(sbi)->available_nids;
>  	si->alloc_nids = NM_I(sbi)->nid_cnt[PREALLOC_NID];
>  	si->bg_gc = sbi->bg_gc;
> @@ -235,7 +236,8 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
>  	}
>  
>  	/* free nids */
> -	si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] +
> +	si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_HOT_NID] +
> +				NM_I(sbi)->nid_cnt[FREE_COLD_NID] +
>  				NM_I(sbi)->nid_cnt[PREALLOC_NID]) *
>  				sizeof(struct free_nid);
>  	si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index de1af8dc19e5..9e2b4f84b1b2 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -739,8 +739,11 @@ static inline void __try_update_largest_extent(struct inode *inode,
>   * For free nid management
>   */
>  enum nid_state {
> -	FREE_NID,		/* newly added to free nid list */
> -	PREALLOC_NID,		/* it is preallocated */
> +	FREE_HOT_NID,			/* list with hot type free nids */
> +	FREE_COLD_NID,			/* list with cold type free nid */
> +	MAX_NID_TYPE,
> +	INVALID_NID_TYPE = MAX_NID_TYPE,
> +	PREALLOC_NID = MAX_NID_TYPE,	/* it is preallocated */
>  	MAX_NID_STATE,
>  };
>  
> @@ -764,8 +767,10 @@ struct f2fs_nm_info {
>  
>  	/* free node ids management */
>  	struct radix_tree_root free_nid_root;/* root of the free_nid cache */
> -	struct list_head free_nid_list;		/* list for free nids excluding preallocated nids */
> +	struct list_head free_nid_list[MAX_NID_TYPE];	/* list for free nids excluding preallocated nids */
>  	unsigned int nid_cnt[MAX_NID_STATE];	/* the number of free node id */
> +	unsigned int nat_block_start[MAX_NID_TYPE];	/* nat block start position */
> +	unsigned int nat_block_end[MAX_NID_TYPE];	/* nat block end position */
>  	spinlock_t nid_list_lock;	/* protect nid lists ops */
>  	struct mutex build_lock;	/* lock for build free nids */
>  	unsigned char **free_nid_bitmap;
> @@ -2796,10 +2801,12 @@ int fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
>  			struct writeback_control *wbc, bool atomic);
>  int sync_node_pages(struct f2fs_sb_info *sbi, struct writeback_control *wbc,
>  			bool do_balance, enum iostat_type io_type);
> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount);
> -bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid);
> +bool build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount,
> +						enum nid_state state);
> +enum nid_state default_free_nid_type(struct f2fs_sb_info *sbi, nid_t nid);
> +int alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid, enum nid_state state);
>  void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid);
> -void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid);
> +void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid, enum nid_state state);
>  int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink);
>  void recover_inline_xattr(struct inode *inode, struct page *page);
>  int recover_xattr_data(struct inode *inode, struct page *page);
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 176f8e84bb6e..e9e1ee6c3ba1 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -585,7 +585,7 @@ void f2fs_evict_inode(struct inode *inode)
>  			add_ino_entry(sbi, inode->i_ino, UPDATE_INO);
>  	}
>  	if (is_inode_flag_set(inode, FI_FREE_NID)) {
> -		alloc_nid_failed(sbi, inode->i_ino);
> +		alloc_nid_failed(sbi, inode->i_ino, FREE_HOT_NID);
>  		clear_inode_flag(inode, FI_FREE_NID);
>  	} else {
>  		f2fs_bug_on(sbi, err &&
> diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
> index b5f404674cad..c049076c49e2 100644
> --- a/fs/f2fs/namei.c
> +++ b/fs/f2fs/namei.c
> @@ -37,7 +37,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
>  		return ERR_PTR(-ENOMEM);
>  
>  	f2fs_lock_op(sbi);
> -	if (!alloc_nid(sbi, &ino)) {
> +	if (!alloc_nid(sbi, &ino, FREE_HOT_NID)) {
>  		f2fs_unlock_op(sbi);
>  		err = -ENOSPC;
>  		goto fail;
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index bbf2c3441ac0..6b74bff5cdc1 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -46,7 +46,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
>  	 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
>  	 */
>  	if (type == FREE_NIDS) {
> -		mem_size = (nm_i->nid_cnt[FREE_NID] *
> +		mem_size = ((nm_i->nid_cnt[FREE_HOT_NID] +
> +				nm_i->nid_cnt[FREE_COLD_NID]) *
>  				sizeof(struct free_nid)) >> PAGE_SHIFT;
>  		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
>  	} else if (type == NAT_ENTRIES) {
> @@ -651,10 +652,13 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
>  	/* get indirect or direct nodes */
>  	for (i = 1; i <= level; i++) {
>  		bool done = false;
> +		enum nid_state state;
> +
> +		state = (i == 1) ? FREE_HOT_NID : FREE_COLD_NID;
>  
>  		if (!nids[i] && mode == ALLOC_NODE) {
>  			/* alloc new node */
> -			if (!alloc_nid(sbi, &(nids[i]))) {
> +			if (!alloc_nid(sbi, &(nids[i]), state)) {
>  				err = -ENOSPC;
>  				goto release_pages;
>  			}
> @@ -662,7 +666,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
>  			dn->nid = nids[i];
>  			npage[i] = new_node_page(dn, noffset[i]);
>  			if (IS_ERR(npage[i])) {
> -				alloc_nid_failed(sbi, nids[i]);
> +				alloc_nid_failed(sbi, nids[i], state);
>  				err = PTR_ERR(npage[i]);
>  				goto release_pages;
>  			}
> @@ -1796,10 +1800,9 @@ static int __insert_free_nid(struct f2fs_sb_info *sbi,
>  	if (err)
>  		return err;
>  
> -	f2fs_bug_on(sbi, state != i->state);
>  	nm_i->nid_cnt[state]++;
> -	if (state == FREE_NID)
> -		list_add_tail(&i->list, &nm_i->free_nid_list);
> +	if (state < MAX_NID_TYPE)
> +		list_add_tail(&i->list, &nm_i->free_nid_list[state]);
>  	return 0;
>  }
>  
> @@ -1808,9 +1811,8 @@ static void __remove_free_nid(struct f2fs_sb_info *sbi,
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  
> -	f2fs_bug_on(sbi, state != i->state);
>  	nm_i->nid_cnt[state]--;
> -	if (state == FREE_NID)
> +	if (state < MAX_NID_TYPE)
>  		list_del(&i->list);
>  	radix_tree_delete(&nm_i->free_nid_root, i->nid);
>  }
> @@ -1820,7 +1822,6 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  
> -	f2fs_bug_on(sbi, org_state != i->state);
>  	i->state = dst_state;
>  	nm_i->nid_cnt[org_state]--;
>  	nm_i->nid_cnt[dst_state]++;
> @@ -1829,8 +1830,9 @@ static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
>  	case PREALLOC_NID:
>  		list_del(&i->list);
>  		break;
> -	case FREE_NID:
> -		list_add_tail(&i->list, &nm_i->free_nid_list);
> +	case FREE_HOT_NID:
> +	case FREE_COLD_NID:
> +		list_add_tail(&i->list, &nm_i->free_nid_list[dst_state]);
>  		break;
>  	default:
>  		BUG_ON(1);
> @@ -1862,8 +1864,8 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
>  }
>  
>  /* return if the nid is recognized as free */
> -static bool add_free_nid(struct f2fs_sb_info *sbi,
> -				nid_t nid, bool build, bool update)
> +static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build,
> +					bool update, enum nid_state state)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct free_nid *i, *e;
> @@ -1877,7 +1879,7 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
>  
>  	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
>  	i->nid = nid;
> -	i->state = FREE_NID;
> +	i->state = state;
>  
>  	radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
>  
> @@ -1912,13 +1914,14 @@ static bool add_free_nid(struct f2fs_sb_info *sbi,
>  
>  		e = __lookup_free_nid_list(nm_i, nid);
>  		if (e) {
> -			if (e->state == FREE_NID)
> +			if (e->state < MAX_NID_TYPE)
>  				ret = true;
>  			goto err_out;
>  		}
>  	}
>  	ret = true;
> -	err = __insert_free_nid(sbi, i, FREE_NID);
> +	if (state < MAX_NID_TYPE)
> +		err = __insert_free_nid(sbi, i, state);
>  err_out:
>  	if (update) {
>  		update_free_nid_bitmap(sbi, nid, ret, build);
> @@ -1941,8 +1944,8 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
>  
>  	spin_lock(&nm_i->nid_list_lock);
>  	i = __lookup_free_nid_list(nm_i, nid);
> -	if (i && i->state == FREE_NID) {
> -		__remove_free_nid(sbi, i, FREE_NID);
> +	if (i && i->state < MAX_NID_TYPE) {
> +		__remove_free_nid(sbi, i, i->state);
>  		need_free = true;
>  	}
>  	spin_unlock(&nm_i->nid_list_lock);
> @@ -1951,36 +1954,33 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
>  		kmem_cache_free(free_nid_slab, i);
>  }
>  
> -static void scan_nat_page(struct f2fs_sb_info *sbi,
> -			struct page *nat_page, nid_t start_nid)
> +static void scan_nat_page(struct f2fs_sb_info *sbi, struct page *nat_page,
> +				unsigned int nat_ofs, enum nid_state state)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct f2fs_nat_block *nat_blk = page_address(nat_page);
> -	block_t blk_addr;
> -	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
> +	nid_t nid = nat_ofs * NAT_ENTRY_PER_BLOCK;
>  	int i;
>  
>  	__set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
>  
> -	i = start_nid % NAT_ENTRY_PER_BLOCK;
> -
> -	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
> -		if (unlikely(start_nid >= nm_i->max_nid))
> -			break;
> +	for (i = 0; i < NAT_ENTRY_PER_BLOCK; i++, nid++) {
> +		block_t blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
>  
> -		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
>  		f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
> +
>  		if (blk_addr == NULL_ADDR) {
> -			add_free_nid(sbi, start_nid, true, true);
> +			add_free_nid(sbi, nid, true, true, state);
>  		} else {
>  			spin_lock(&NM_I(sbi)->nid_list_lock);
> -			update_free_nid_bitmap(sbi, start_nid, false, true);
> +			update_free_nid_bitmap(sbi, nid, false, true);
>  			spin_unlock(&NM_I(sbi)->nid_list_lock);
>  		}
>  	}
>  }
>  
> -static void scan_curseg_cache(struct f2fs_sb_info *sbi)
> +static void scan_curseg_cache(struct f2fs_sb_info *sbi,
> +						enum nid_state state)
>  {
>  	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>  	struct f2fs_journal *journal = curseg->journal;
> @@ -1994,14 +1994,14 @@ static void scan_curseg_cache(struct f2fs_sb_info *sbi)
>  		addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
>  		nid = le32_to_cpu(nid_in_journal(journal, i));
>  		if (addr == NULL_ADDR)
> -			add_free_nid(sbi, nid, true, false);
> +			add_free_nid(sbi, nid, true, false, state);
>  		else
>  			remove_free_nid(sbi, nid);
>  	}
>  	up_read(&curseg->journal_rwsem);
>  }
>  
> -static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
> +static void scan_free_nid_bits(struct f2fs_sb_info *sbi, enum nid_state state)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	unsigned int i, idx;
> @@ -2010,6 +2010,9 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
>  	down_read(&nm_i->nat_tree_lock);
>  
>  	for (i = 0; i < nm_i->nat_blocks; i++) {
> +		if (i < nm_i->nat_block_start[state] ||
> +				i >= nm_i->nat_block_end[state])
> +			continue;
>  		if (!test_bit_le(i, nm_i->nat_block_bitmap))
>  			continue;
>  		if (!nm_i->free_nid_count[i])
> @@ -2021,90 +2024,124 @@ static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
>  				break;
>  
>  			nid = i * NAT_ENTRY_PER_BLOCK + idx;
> -			add_free_nid(sbi, nid, true, false);
> +			add_free_nid(sbi, nid, true, false, state);
>  
> -			if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS)
> +			if (nm_i->nid_cnt[state] >= MAX_FREE_NIDS)
>  				goto out;
>  		}
>  	}
>  out:
> -	scan_curseg_cache(sbi);
> +	scan_curseg_cache(sbi, state);
>  
>  	up_read(&nm_i->nat_tree_lock);
>  }
>  
> -static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> +void load_nat_block(struct f2fs_sb_info *sbi, enum nid_state state)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
> -	int i = 0;
> -	nid_t nid = nm_i->next_scan_nid;
> +	unsigned int nat_ofs;
> +	unsigned int count = FREE_NID_PAGES;
>  
> -	if (unlikely(nid >= nm_i->max_nid))
> -		nid = 0;
> +	if (state == FREE_HOT_NID) {
> +		nat_ofs = nm_i->nat_block_end[FREE_HOT_NID];
> +		nm_i->nat_block_end[FREE_HOT_NID] += count;
> +	} else {
> +		nm_i->nat_block_start[FREE_COLD_NID] -= count;
> +		nat_ofs = nm_i->nat_block_start[FREE_COLD_NID];
> +	}
>  
> -	/* Enough entries */
> -	if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
> -		return;
> +	f2fs_bug_on(sbi, nm_i->nat_block_end[FREE_HOT_NID] >
> +				nm_i->nat_block_start[FREE_COLD_NID]);
>  
> -	if (!sync && !available_free_memory(sbi, FREE_NIDS))
> -		return;
> +	/* readahead nat pages to be scanned */
> +	ra_meta_pages(sbi, nat_ofs, count, META_NAT, true);
>  
> -	if (!mount) {
> -		/* try to find free nids in free_nid_bitmap */
> -		scan_free_nid_bits(sbi);
> +	for (; count > 0; count--, nat_ofs++) {
> +		struct page *page;
>  
> -		if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
> -			return;
> +		if (test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
> +			continue;
> +
> +		page = get_current_nat_page(sbi, nat_ofs * NAT_ENTRY_PER_BLOCK);
> +		scan_nat_page(sbi, page, nat_ofs, state);
> +		f2fs_put_page(page, 1);
>  	}
> +}
>  
> -	/* readahead nat pages to be scanned */
> -	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
> -							META_NAT, true);
> +static bool __build_free_nids(struct f2fs_sb_info *sbi, bool sync,
> +					bool mount, enum nid_state state)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  
> -	down_read(&nm_i->nat_tree_lock);
> +	if (nm_i->nid_cnt[state])
> +		return false;
>  
> -	while (1) {
> -		if (!test_bit_le(NAT_BLOCK_OFFSET(nid),
> -						nm_i->nat_block_bitmap)) {
> -			struct page *page = get_current_nat_page(sbi, nid);
> +	if (!sync && !available_free_memory(sbi, FREE_NIDS))
> +		return false;
>  
> -			scan_nat_page(sbi, page, nid);
> -			f2fs_put_page(page, 1);
> -		}
> +	if (!mount) {
> +		/* try to find free nids in free_nid_bitmap */
> +		scan_free_nid_bits(sbi, state);
>  
> -		nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
> -		if (unlikely(nid >= nm_i->max_nid))
> -			nid = 0;
> +		if (nm_i->nid_cnt[state])
> +			return false;
>  
> -		if (++i >= FREE_NID_PAGES)
> -			break;
> +		/* all nat blocks have been scanned */
> +		if (nm_i->nat_block_end[FREE_HOT_NID] >=
> +				nm_i->nat_block_start[FREE_COLD_NID])
> +			return true;
>  	}
>  
> -	/* go to the next free nat pages to find free nids abundantly */
> -	nm_i->next_scan_nid = nid;
> +	down_read(&nm_i->nat_tree_lock);
> +
> +	/* find free nids from nat blocks */
> +	load_nat_block(sbi, state);
>  
>  	/* find free nids from current sum_pages */
> -	scan_curseg_cache(sbi);
> +	scan_curseg_cache(sbi, state);
>  
>  	up_read(&nm_i->nat_tree_lock);
>  
> -	ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
> -					nm_i->ra_nid_pages, META_NAT, false);
> +	return false;
>  }
>  
> -void build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
> +bool build_free_nids(struct f2fs_sb_info *sbi, bool sync,
> +					bool mount, enum nid_state state)
>  {
> +	bool exhausted;
> +
>  	mutex_lock(&NM_I(sbi)->build_lock);
> -	__build_free_nids(sbi, sync, mount);
> +	exhausted = __build_free_nids(sbi, sync, mount, state);
>  	mutex_unlock(&NM_I(sbi)->build_lock);
> +
> +	return exhausted;
>  }
>  
> -/*
> - * If this function returns success, caller can obtain a new nid
> - * from second parameter of this function.
> - * The returned nid could be used ino as well as nid when inode is created.
> - */
> -bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> +enum nid_state default_free_nid_type(struct f2fs_sb_info *sbi, nid_t nid)
> +{
> +	struct f2fs_nm_info *nm_i = NM_I(sbi);
> +	unsigned int hot_start = nm_i->nat_block_start[FREE_HOT_NID];
> +	unsigned int hot_end = nm_i->nat_block_end[FREE_HOT_NID];
> +	unsigned int cold_start = nm_i->nat_block_start[FREE_COLD_NID];
> +	unsigned int cold_end = nm_i->nat_block_end[FREE_COLD_NID];
> +
> +	if (hot_start != hot_end) {
> +		if (nid >= hot_start * NAT_ENTRY_PER_BLOCK &&
> +			nid < hot_end * NAT_ENTRY_PER_BLOCK)
> +			return FREE_HOT_NID;
> +	}
> +
> +	if (cold_start != cold_end) {
> +		if (nid >= cold_start * NAT_ENTRY_PER_BLOCK &&
> +			nid < cold_end * NAT_ENTRY_PER_BLOCK)
> +			return FREE_COLD_NID;
> +	}
> +
> +	return INVALID_NID_TYPE;
> +}
> +
> +static int __alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid,
> +						enum nid_state state)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct free_nid *i = NULL;
> @@ -2112,38 +2149,61 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
>  #ifdef CONFIG_F2FS_FAULT_INJECTION
>  	if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
>  		f2fs_show_injection_info(FAULT_ALLOC_NID);
> -		return false;
> +		return 0;
>  	}
>  #endif
>  	spin_lock(&nm_i->nid_list_lock);
>  
>  	if (unlikely(nm_i->available_nids == 0)) {
>  		spin_unlock(&nm_i->nid_list_lock);
> -		return false;
> +		return 0;
>  	}
>  
>  	/* We should not use stale free nids created by build_free_nids */
> -	if (nm_i->nid_cnt[FREE_NID] && !on_build_free_nids(nm_i)) {
> -		f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
> -		i = list_first_entry(&nm_i->free_nid_list,
> +	if (nm_i->nid_cnt[state] && !on_build_free_nids(nm_i)) {
> +		f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list[state]));
> +		i = list_first_entry(&nm_i->free_nid_list[state],
>  					struct free_nid, list);
>  		*nid = i->nid;
>  
> -		__move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);
> +		__move_free_nid(sbi, i, state, PREALLOC_NID);
>  		nm_i->available_nids--;
>  
>  		update_free_nid_bitmap(sbi, *nid, false, false);
>  
>  		spin_unlock(&nm_i->nid_list_lock);
> -		return true;
> +
> +		f2fs_bug_on(sbi, *nid >= nm_i->max_nid);
> +		return 1;
>  	}
>  	spin_unlock(&nm_i->nid_list_lock);
>  
>  	/* Let's scan nat pages and its caches to get free nids */
> -	build_free_nids(sbi, true, false);
> +	if (build_free_nids(sbi, true, false, state))
> +		return -1;
>  	goto retry;
>  }
>  
> +/*
> + * If this function returns success, caller can obtain a new nid
> + * from second parameter of this function.
> + * The returned nid could be used ino as well as nid when inode is created.
> + */
> +int alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid, enum nid_state state)
> +{
> +	int ret;
> +
> +	ret = __alloc_nid(sbi, nid, state);
> +	if (ret >= 0)
> +		return ret;
> +
> +	ret = __alloc_nid(sbi, nid, !state);
> +	if (ret < 0)
> +		ret = 0;
> +
> +	return ret;
> +}
> +
>  /*
>   * alloc_nid() should be called prior to this function.
>   */
> @@ -2164,7 +2224,8 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
>  /*
>   * alloc_nid() should be called prior to this function.
>   */
> -void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
> +void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid,
> +						enum nid_state state)
>  {
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct free_nid *i;
> @@ -2181,7 +2242,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
>  		__remove_free_nid(sbi, i, PREALLOC_NID);
>  		need_free = true;
>  	} else {
> -		__move_free_nid(sbi, i, PREALLOC_NID, FREE_NID);
> +		__move_free_nid(sbi, i, PREALLOC_NID, state);
>  	}
>  
>  	nm_i->available_nids++;
> @@ -2199,22 +2260,23 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
>  	struct f2fs_nm_info *nm_i = NM_I(sbi);
>  	struct free_nid *i, *next;
>  	int nr = nr_shrink;
> -
> -	if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
> -		return 0;
> +	enum nid_state state;
>  
>  	if (!mutex_trylock(&nm_i->build_lock))
>  		return 0;
>  
>  	spin_lock(&nm_i->nid_list_lock);
> -	list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
> -		if (nr_shrink <= 0 ||
> -				nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
> -			break;
> +	for (state = FREE_HOT_NID; state < MAX_NID_TYPE; state++) {
> +		list_for_each_entry_safe(i, next,
> +				&nm_i->free_nid_list[state], list) {
> +			if (nr_shrink <= 0 ||
> +				nm_i->nid_cnt[state] <= MAX_FREE_NIDS)
> +				break;
>  
> -		__remove_free_nid(sbi, i, FREE_NID);
> -		kmem_cache_free(free_nid_slab, i);
> -		nr_shrink--;
> +			__remove_free_nid(sbi, i, state);
> +			kmem_cache_free(free_nid_slab, i);
> +			nr_shrink--;
> +		}
>  	}
>  	spin_unlock(&nm_i->nid_list_lock);
>  	mutex_unlock(&nm_i->build_lock);
> @@ -2271,13 +2333,13 @@ int recover_xattr_data(struct inode *inode, struct page *page)
>  
>  recover_xnid:
>  	/* 2: update xattr nid in inode */
> -	if (!alloc_nid(sbi, &new_xnid))
> +	if (!alloc_nid(sbi, &new_xnid, FREE_COLD_NID))
>  		return -ENOSPC;
>  
>  	set_new_dnode(&dn, inode, NULL, NULL, new_xnid);
>  	xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
>  	if (IS_ERR(xpage)) {
> -		alloc_nid_failed(sbi, new_xnid);
> +		alloc_nid_failed(sbi, new_xnid, FREE_COLD_NID);
>  		return PTR_ERR(xpage);
>  	}
>  
> @@ -2532,7 +2594,9 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
>  		nat_reset_flag(ne);
>  		__clear_nat_cache_dirty(NM_I(sbi), set, ne);
>  		if (nat_get_blkaddr(ne) == NULL_ADDR) {
> -			add_free_nid(sbi, nid, false, true);
> +			enum nid_state state = default_free_nid_type(sbi, nid);
> +
> +			add_free_nid(sbi, nid, false, true, state);
>  		} else {
>  			spin_lock(&NM_I(sbi)->nid_list_lock);
>  			update_free_nid_bitmap(sbi, nid, false, false);
> @@ -2691,15 +2755,21 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
>  	/* not used nids: 0, node, meta, (and root counted as valid node) */
>  	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
>  				sbi->nquota_files - F2FS_RESERVED_NODE_NUM;
> -	nm_i->nid_cnt[FREE_NID] = 0;
> +	nm_i->nid_cnt[FREE_HOT_NID] = 0;
> +	nm_i->nid_cnt[FREE_COLD_NID] = 0;
>  	nm_i->nid_cnt[PREALLOC_NID] = 0;
> +	nm_i->nat_block_start[FREE_HOT_NID] =
> +			nm_i->nat_block_end[FREE_HOT_NID] = 0;
> +	nm_i->nat_block_start[FREE_COLD_NID] =
> +			nm_i->nat_block_end[FREE_COLD_NID] = nm_i->nat_blocks;
>  	nm_i->nat_cnt = 0;
>  	nm_i->ram_thresh = DEF_RAM_THRESHOLD;
>  	nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
>  	nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
>  
>  	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
> -	INIT_LIST_HEAD(&nm_i->free_nid_list);
> +	INIT_LIST_HEAD(&nm_i->free_nid_list[FREE_HOT_NID]);
> +	INIT_LIST_HEAD(&nm_i->free_nid_list[FREE_COLD_NID]);
>  	INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
>  	INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
>  	INIT_LIST_HEAD(&nm_i->nat_entries);
> @@ -2782,7 +2852,8 @@ int build_node_manager(struct f2fs_sb_info *sbi)
>  	/* load free nid status from nat_bits table */
>  	load_free_nid_bitmap(sbi);
>  
> -	build_free_nids(sbi, true, true);
> +	build_free_nids(sbi, true, true, FREE_HOT_NID);
> +	build_free_nids(sbi, true, true, FREE_COLD_NID);
>  	return 0;
>  }
>  
> @@ -2794,21 +2865,26 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
>  	struct nat_entry_set *setvec[SETVEC_SIZE];
>  	nid_t nid = 0;
>  	unsigned int found;
> +	enum nid_state state;
>  
>  	if (!nm_i)
>  		return;
>  
>  	/* destroy free nid list */
>  	spin_lock(&nm_i->nid_list_lock);
> -	list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
> -		__remove_free_nid(sbi, i, FREE_NID);
> -		spin_unlock(&nm_i->nid_list_lock);
> -		kmem_cache_free(free_nid_slab, i);
> -		spin_lock(&nm_i->nid_list_lock);
> +	for (state = FREE_HOT_NID; state < MAX_NID_TYPE; state++) {
> +		list_for_each_entry_safe(i, next_i,
> +				&nm_i->free_nid_list[state], list) {
> +			__remove_free_nid(sbi, i, state);
> +			spin_unlock(&nm_i->nid_list_lock);
> +			kmem_cache_free(free_nid_slab, i);
> +			spin_lock(&nm_i->nid_list_lock);
> +		}
> +		f2fs_bug_on(sbi, nm_i->nid_cnt[state]);
> +		f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list[state]));
>  	}
> -	f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]);
> +
>  	f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]);
> -	f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list));
>  	spin_unlock(&nm_i->nid_list_lock);
>  
>  	/* destroy nat cache */
> diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> index 2129d63ab3d7..732c8e672fe2 100644
> --- a/fs/f2fs/node.h
> +++ b/fs/f2fs/node.h
> @@ -157,24 +157,9 @@ struct nat_entry_set {
>  struct free_nid {
>  	struct list_head list;	/* for free node id list */
>  	nid_t nid;		/* node id */
> -	int state;		/* in use or not: FREE_NID or PREALLOC_NID */
> +	int state;		/* in use or not */
>  };
>  
> -static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
> -{
> -	struct f2fs_nm_info *nm_i = NM_I(sbi);
> -	struct free_nid *fnid;
> -
> -	spin_lock(&nm_i->nid_list_lock);
> -	if (nm_i->nid_cnt[FREE_NID] <= 0) {
> -		spin_unlock(&nm_i->nid_list_lock);
> -		return;
> -	}
> -	fnid = list_first_entry(&nm_i->free_nid_list, struct free_nid, list);
> -	*nid = fnid->nid;
> -	spin_unlock(&nm_i->nid_list_lock);
> -}
> -
>  /*
>   * inline functions
>   */
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 720b01461adc..2ffb5c2d205e 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -487,10 +487,12 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
>  	if (!available_free_memory(sbi, NAT_ENTRIES))
>  		try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
>  
> -	if (!available_free_memory(sbi, FREE_NIDS))
> +	if (!available_free_memory(sbi, FREE_NIDS)) {
>  		try_to_free_nids(sbi, MAX_FREE_NIDS);
> -	else
> -		build_free_nids(sbi, false, false);
> +	} else {
> +		build_free_nids(sbi, false, false, FREE_HOT_NID);
> +		build_free_nids(sbi, false, false, FREE_COLD_NID);
> +	}
>  
>  	if (!is_idle(sbi) && !excess_dirty_nats(sbi))
>  		return;
> diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
> index 0b5664a1a6cc..7b35746fb615 100644
> --- a/fs/f2fs/shrinker.c
> +++ b/fs/f2fs/shrinker.c
> @@ -28,7 +28,8 @@ static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
>  
>  static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
>  {
> -	long count = NM_I(sbi)->nid_cnt[FREE_NID] - MAX_FREE_NIDS;
> +	long count = NM_I(sbi)->nid_cnt[FREE_HOT_NID] +
> +			NM_I(sbi)->nid_cnt[FREE_COLD_NID] - MAX_FREE_NIDS;
>  
>  	return count > 0 ? count : 0;
>  }
> diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
> index ae2dfa709f5d..c42a670e17f6 100644
> --- a/fs/f2fs/xattr.c
> +++ b/fs/f2fs/xattr.c
> @@ -397,7 +397,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
>  	int err = 0;
>  
>  	if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid)
> -		if (!alloc_nid(sbi, &new_nid))
> +		if (!alloc_nid(sbi, &new_nid, FREE_COLD_NID))
>  			return -ENOSPC;
>  
>  	/* write to inline xattr */
> @@ -407,7 +407,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
>  		} else {
>  			in_page = get_node_page(sbi, inode->i_ino);
>  			if (IS_ERR(in_page)) {
> -				alloc_nid_failed(sbi, new_nid);
> +				alloc_nid_failed(sbi, new_nid, FREE_COLD_NID);
>  				return PTR_ERR(in_page);
>  			}
>  			inline_addr = inline_xattr_addr(inode, in_page);
> @@ -418,7 +418,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
>  		/* no need to use xattr node block */
>  		if (hsize <= inline_size) {
>  			err = truncate_xattr_node(inode);
> -			alloc_nid_failed(sbi, new_nid);
> +			alloc_nid_failed(sbi, new_nid, FREE_COLD_NID);
>  			if (err) {
>  				f2fs_put_page(in_page, 1);
>  				return err;
> @@ -434,7 +434,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
>  		xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid);
>  		if (IS_ERR(xpage)) {
>  			err = PTR_ERR(xpage);
> -			alloc_nid_failed(sbi, new_nid);
> +			alloc_nid_failed(sbi, new_nid, FREE_COLD_NID);
>  			goto in_page_out;
>  		}
>  		f2fs_bug_on(sbi, new_nid);
> @@ -445,7 +445,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
>  		xpage = new_node_page(&dn, XATTR_NODE_OFFSET);
>  		if (IS_ERR(xpage)) {
>  			err = PTR_ERR(xpage);
> -			alloc_nid_failed(sbi, new_nid);
> +			alloc_nid_failed(sbi, new_nid, FREE_COLD_NID);
>  			goto in_page_out;
>  		}
>  		alloc_nid_done(sbi, new_nid);
> -- 
> 2.15.0.55.gc2ece9dc4de6

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ