lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20100510223324.424796921@kvm.kroah.org>
Date:	Mon, 10 May 2010 15:32:49 -0700
From:	Greg KH <gregkh@...e.de>
To:	linux-kernel@...r.kernel.org, stable@...nel.org
Cc:	stable-review@...nel.org, torvalds@...ux-foundation.org,
	akpm@...ux-foundation.org, alan@...rguk.ukuu.org.uk,
	xfs@....sgi.com, Dave Chinner <dchinner@...hat.com>
Subject: [078/117] xfs: add a shrinker to background inode reclaim

2.6.33-stable review patch.  If anyone has any objections, please let us know.

------------------

From: Dave Chinner <dchinner@...hat.com>

commit 9bf729c0af67897ea8498ce17c29b0683f7f2028 upstream

On low memory boxes or those with highmem, kernel can OOM before the
background reclaims inodes via xfssyncd. Add a shrinker to run inode
reclaim so that it inode reclaim is expedited when memory is low.

This is more complex than it needs to be because the VM folk don't
want a context added to the shrinker infrastructure. Hence we need
to add a global list of XFS mount structures so the shrinker can
traverse them.

Signed-off-by: Dave Chinner <dchinner@...hat.com>
Reviewed-by: Christoph Hellwig <hch@....de>
Signed-off-by: Greg Kroah-Hartman <gregkh@...e.de>


---
 fs/xfs/linux-2.6/xfs_super.c   |    5 +
 fs/xfs/linux-2.6/xfs_sync.c    |  107 ++++++++++++++++++++++++++++++++++++++---
 fs/xfs/linux-2.6/xfs_sync.h    |    7 ++
 fs/xfs/quota/xfs_qm_syscalls.c |    3 -
 fs/xfs/xfs_ag.h                |    1 
 fs/xfs/xfs_mount.h             |    1 
 6 files changed, 115 insertions(+), 9 deletions(-)

--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1160,6 +1160,7 @@ xfs_fs_put_super(
 
 	xfs_unmountfs(mp);
 	xfs_freesb(mp);
+	xfs_inode_shrinker_unregister(mp);
 	xfs_icsb_destroy_counters(mp);
 	xfs_close_devices(mp);
 	xfs_dmops_put(mp);
@@ -1523,6 +1524,8 @@ xfs_fs_fill_super(
 	if (error)
 		goto fail_vnrele;
 
+	xfs_inode_shrinker_register(mp);
+
 	kfree(mtpt);
 	return 0;
 
@@ -1767,6 +1770,7 @@ init_xfs_fs(void)
 		goto out_cleanup_procfs;
 
 	vfs_initquota();
+	xfs_inode_shrinker_init();
 
 	error = register_filesystem(&xfs_fs_type);
 	if (error)
@@ -1794,6 +1798,7 @@ exit_xfs_fs(void)
 {
 	vfs_exitquota();
 	unregister_filesystem(&xfs_fs_type);
+	xfs_inode_shrinker_destroy();
 	xfs_sysctl_unregister();
 	xfs_cleanup_procfs();
 	xfs_buf_terminate();
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -95,7 +95,8 @@ xfs_inode_ag_walk(
 					   struct xfs_perag *pag, int flags),
 	int			flags,
 	int			tag,
-	int			exclusive)
+	int			exclusive,
+	int			*nr_to_scan)
 {
 	struct xfs_perag	*pag = &mp->m_perag[ag];
 	uint32_t		first_index;
@@ -135,7 +136,7 @@ restart:
 		if (error == EFSCORRUPTED)
 			break;
 
-	} while (1);
+	} while ((*nr_to_scan)--);
 
 	if (skipped) {
 		delay(1);
@@ -153,23 +154,30 @@ xfs_inode_ag_iterator(
 					   struct xfs_perag *pag, int flags),
 	int			flags,
 	int			tag,
-	int			exclusive)
+	int			exclusive,
+	int			*nr_to_scan)
 {
 	int			error = 0;
 	int			last_error = 0;
 	xfs_agnumber_t		ag;
+	int			nr;
 
+	nr = nr_to_scan ? *nr_to_scan : INT_MAX;
 	for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
 		if (!mp->m_perag[ag].pag_ici_init)
 			continue;
 		error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
-						exclusive);
+						exclusive, &nr);
 		if (error) {
 			last_error = error;
 			if (error == EFSCORRUPTED)
 				break;
 		}
+		if (nr <= 0)
+			break;
 	}
+	if (nr_to_scan)
+		*nr_to_scan = nr;
 	return XFS_ERROR(last_error);
 }
 
@@ -289,7 +297,7 @@ xfs_sync_data(
 	ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
 
 	error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
-				      XFS_ICI_NO_TAG, 0);
+				      XFS_ICI_NO_TAG, 0, NULL);
 	if (error)
 		return XFS_ERROR(error);
 
@@ -311,7 +319,7 @@ xfs_sync_attr(
 	ASSERT((flags & ~SYNC_WAIT) == 0);
 
 	return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
-				     XFS_ICI_NO_TAG, 0);
+				     XFS_ICI_NO_TAG, 0, NULL);
 }
 
 STATIC int
@@ -679,6 +687,7 @@ __xfs_inode_set_reclaim_tag(
 	radix_tree_tag_set(&pag->pag_ici_root,
 			   XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
 			   XFS_ICI_RECLAIM_TAG);
+	pag->pag_ici_reclaimable++;
 }
 
 /*
@@ -710,6 +719,7 @@ __xfs_inode_clear_reclaim_tag(
 {
 	radix_tree_tag_clear(&pag->pag_ici_root,
 			XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+	pag->pag_ici_reclaimable--;
 }
 
 STATIC int
@@ -770,5 +780,88 @@ xfs_reclaim_inodes(
 	int		mode)
 {
 	return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
-					XFS_ICI_RECLAIM_TAG, 1);
+					XFS_ICI_RECLAIM_TAG, 1, NULL);
+}
+
+/*
+ * Shrinker infrastructure.
+ *
+ * This is all far more complex than it needs to be. It adds a global list of
+ * mounts because the shrinkers can only call a global context. We need to make
+ * the shrinkers pass a context to avoid the need for global state.
+ */
+static LIST_HEAD(xfs_mount_list);
+static struct rw_semaphore xfs_mount_list_lock;
+
+static int
+xfs_reclaim_inode_shrink(
+	int		nr_to_scan,
+	gfp_t		gfp_mask)
+{
+	struct xfs_mount *mp;
+	xfs_agnumber_t	ag;
+	int		reclaimable = 0;
+
+	if (nr_to_scan) {
+		if (!(gfp_mask & __GFP_FS))
+			return -1;
+
+		down_read(&xfs_mount_list_lock);
+		list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+			xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
+					XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
+			if (nr_to_scan <= 0)
+				break;
+		}
+		up_read(&xfs_mount_list_lock);
+	}
+
+	down_read(&xfs_mount_list_lock);
+	list_for_each_entry(mp, &xfs_mount_list, m_mplist) {
+		for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
+
+			if (!mp->m_perag[ag].pag_ici_init)
+				continue;
+			reclaimable += mp->m_perag[ag].pag_ici_reclaimable;
+		}
+	}
+	up_read(&xfs_mount_list_lock);
+	return reclaimable;
+}
+
+static struct shrinker xfs_inode_shrinker = {
+	.shrink = xfs_reclaim_inode_shrink,
+	.seeks = DEFAULT_SEEKS,
+};
+
+void __init
+xfs_inode_shrinker_init(void)
+{
+	init_rwsem(&xfs_mount_list_lock);
+	register_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_destroy(void)
+{
+	ASSERT(list_empty(&xfs_mount_list));
+	unregister_shrinker(&xfs_inode_shrinker);
+}
+
+void
+xfs_inode_shrinker_register(
+	struct xfs_mount	*mp)
+{
+	down_write(&xfs_mount_list_lock);
+	list_add_tail(&mp->m_mplist, &xfs_mount_list);
+	up_write(&xfs_mount_list_lock);
+}
+
+void
+xfs_inode_shrinker_unregister(
+	struct xfs_mount	*mp)
+{
+	down_write(&xfs_mount_list_lock);
+	list_del(&mp->m_mplist);
+	up_write(&xfs_mount_list_lock);
 }
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,6 +54,11 @@ void __xfs_inode_clear_reclaim_tag(struc
 int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
 int xfs_inode_ag_iterator(struct xfs_mount *mp,
 	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
-	int flags, int tag, int write_lock);
+	int flags, int tag, int write_lock, int *nr_to_scan);
+
+void xfs_inode_shrinker_init(void);
+void xfs_inode_shrinker_destroy(void);
+void xfs_inode_shrinker_register(struct xfs_mount *mp);
+void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
 
 #endif
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,8 @@ xfs_qm_dqrele_all_inodes(
 	uint		 flags)
 {
 	ASSERT(mp->m_quotainfo);
-	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
+	xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags,
+				XFS_ICI_NO_TAG, 0, NULL);
 }
 
 /*------------------------------------------------------------------------*/
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -229,6 +229,7 @@ typedef struct xfs_perag
 	int		pag_ici_init;	/* incore inode cache initialised */
 	rwlock_t	pag_ici_lock;	/* incore inode lock */
 	struct radix_tree_root pag_ici_root;	/* incore inode cache root */
+	int		pag_ici_reclaimable;	/* reclaimable inodes */
 #endif
 } xfs_perag_t;
 
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -257,6 +257,7 @@ typedef struct xfs_mount {
 	wait_queue_head_t	m_wait_single_sync_task;
 	__int64_t		m_update_flags;	/* sb flags we need to update
 						   on the next remount,rw */
+	struct list_head	m_mplist;	/* inode shrinker mount list */
 } xfs_mount_t;
 
 /*


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ