[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <fb3d817d8b6235472e517a9fc9ad0956fb4e8cf2.camel@kernel.org>
Date:   Thu, 26 May 2022 14:39:55 -0400
From:   Jeff Layton <jlayton@...nel.org>
To:     Luís Henriques <lhenriques@...e.de>,
        Xiubo Li <xiubli@...hat.com>, Ilya Dryomov <idryomov@...il.com>
Cc:     ceph-devel@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH v2] ceph: prevent a client from exceeding the MDS
 maximum xattr size
On Wed, 2022-05-25 at 18:24 +0100, Luís Henriques wrote:
> The MDS tries to enforce a limit on the total key/values in extended
> attributes.  However, this limit is enforced only if doing a synchronous
> operation (MDS_OP_SETXATTR) -- if we're buffering the xattrs, the MDS
> doesn't have a chance to enforce these limits.
> 
> This patch adds support for an extra feature bit that will allow the
> client to get the MDS max_xattr_pairs_size setting in the session message.
> Then, when setting an xattr, the kernel will revert to do a synchronous
> operation if that maximum size is exceeded.
> 
> While there, fix a dout() that would trigger a printk warning:
> 
> [   98.718078] ------------[ cut here ]------------
> [   98.719012] precision 65536 too large
> [   98.719039] WARNING: CPU: 1 PID: 3755 at lib/vsprintf.c:2703 vsnprintf+0x5e3/0x600
> ...
> 
> URL: https://tracker.ceph.com/issues/55725
> Signed-off-by: Luís Henriques <lhenriques@...e.de>
> ---
>  fs/ceph/mds_client.c | 12 ++++++++++++
>  fs/ceph/mds_client.h | 15 ++++++++++++++-
>  fs/ceph/xattr.c      | 12 ++++++++----
>  3 files changed, 34 insertions(+), 5 deletions(-)
> 
> * Changes since v1
> 
> Added support for new feature bit to get the MDS max_xattr_pairs_size
> setting.
> 
> Also note that this patch relies on a patch that hasn't been merged yet
> ("ceph: use correct index when encoding client supported features"),
> otherwise the new feature bit won't be correctly encoded.
> 
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 35597fafb48c..87a25b7cf496 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -3500,6 +3500,7 @@ static void handle_session(struct ceph_mds_session *session,
>  	struct ceph_mds_session_head *h;
>  	u32 op;
>  	u64 seq, features = 0;
> +	u64 max_xattr_pairs_size = 0;
>  	int wake = 0;
>  	bool blocklisted = false;
>  
> @@ -3545,6 +3546,9 @@ static void handle_session(struct ceph_mds_session *session,
>  		}
>  	}
>  
> +	if (msg_version >= 6)
> +		ceph_decode_64_safe(&p, end, max_xattr_pairs_size, bad);
> +
>  	mutex_lock(&mdsc->mutex);
>  	if (op == CEPH_SESSION_CLOSE) {
>  		ceph_get_mds_session(session);
> @@ -3552,6 +3556,12 @@ static void handle_session(struct ceph_mds_session *session,
>  	}
>  	/* FIXME: this ttl calculation is generous */
>  	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
> +
> +	if (max_xattr_pairs_size && (op == CEPH_SESSION_OPEN)) {
> +		dout("Changing MDS max xattrs pairs size: %llu => %llu\n",
> +		     mdsc->max_xattr_pairs_size, max_xattr_pairs_size);
> +		mdsc->max_xattr_pairs_size = max_xattr_pairs_size;
> +	}
>  	mutex_unlock(&mdsc->mutex);
>  
>  	mutex_lock(&session->s_mutex);
> @@ -4761,6 +4771,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
>  	strscpy(mdsc->nodename, utsname()->nodename,
>  		sizeof(mdsc->nodename));
>  
> +	mdsc->max_xattr_pairs_size = MDS_MAX_XATTR_PAIRS_SIZE;
> +
>  	fsc->mdsc = mdsc;
>  	return 0;
>  
> diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
> index ca32f26f5eed..3db777df6d88 100644
> --- a/fs/ceph/mds_client.h
> +++ b/fs/ceph/mds_client.h
> @@ -29,8 +29,11 @@ enum ceph_feature_type {
>  	CEPHFS_FEATURE_MULTI_RECONNECT,
>  	CEPHFS_FEATURE_DELEG_INO,
>  	CEPHFS_FEATURE_METRIC_COLLECT,
> +	CEPHFS_FEATURE_ALTERNATE_NAME,
> +	CEPHFS_FEATURE_NOTIFY_SESSION_STATE,
> +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
Having to make this feature-bit-dependent kind of sucks. I wonder if it
could be avoided...
A question:
How do the MDS's discover this setting? Do they get it from the mons? If
so, I wonder if there is a way for the clients to query the mon for this
instead of having to extend the MDS protocol?
>  
> -	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_METRIC_COLLECT,
> +	CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,
>  };
>  
>  /*
> @@ -45,9 +48,16 @@ enum ceph_feature_type {
>  	CEPHFS_FEATURE_MULTI_RECONNECT,		\
>  	CEPHFS_FEATURE_DELEG_INO,		\
>  	CEPHFS_FEATURE_METRIC_COLLECT,		\
> +	CEPHFS_FEATURE_MAX_XATTR_PAIRS_SIZE,	\
>  }
>  #define CEPHFS_FEATURES_CLIENT_REQUIRED {}
>  
> +/*
> + * Maximum size of xattrs the MDS can handle per inode by default.  This
> + * includes the attribute name and 4+4 bytes for the key/value sizes.
> + */
> +#define MDS_MAX_XATTR_PAIRS_SIZE (1<<16) /* 64K */
> +
>  /*
>   * Some lock dependencies:
>   *
> @@ -404,6 +414,9 @@ struct ceph_mds_client {
>  	struct rb_root		quotarealms_inodes;
>  	struct mutex		quotarealms_inodes_mutex;
>  
> +	/* maximum aggregate size of extended attributes on a file */
> +	u64			max_xattr_pairs_size;
> +
>  	/*
>  	 * snap_rwsem will cover cap linkage into snaprealms, and
>  	 * realm snap contexts.  (later, we can do per-realm snap
> diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
> index 8c2dc2c762a4..175a8c1449aa 100644
> --- a/fs/ceph/xattr.c
> +++ b/fs/ceph/xattr.c
> @@ -1086,7 +1086,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
>  			flags |= CEPH_XATTR_REMOVE;
>  	}
>  
> -	dout("setxattr value=%.*s\n", (int)size, value);
> +	dout("setxattr value size: %ld\n", size);
>  
>  	/* do request */
>  	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> @@ -1184,8 +1184,14 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>  	spin_lock(&ci->i_ceph_lock);
>  retry:
>  	issued = __ceph_caps_issued(ci, NULL);
> -	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
> +	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
> +	if ((ci->i_xattrs.version == 0) || !(issued & CEPH_CAP_XATTR_EXCL) ||
> +	    (required_blob_size >= mdsc->max_xattr_pairs_size)) {
> +		dout("%s do sync setxattr: version: %llu size: %d max: %llu\n",
> +		     __func__, ci->i_xattrs.version, required_blob_size,
> +		     mdsc->max_xattr_pairs_size);
>  		goto do_sync;
> +	}
>  
>  	if (!lock_snap_rwsem && !ci->i_head_snapc) {
>  		lock_snap_rwsem = true;
> @@ -1201,8 +1207,6 @@ int __ceph_setxattr(struct inode *inode, const char *name,
>  	     ceph_cap_string(issued));
>  	__build_xattrs(inode);
>  
> -	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
> -
>  	if (!ci->i_xattrs.prealloc_blob ||
>  	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
>  		struct ceph_buffer *blob;
-- 
Jeff Layton <jlayton@...nel.org>
Powered by blists - more mailing lists
 
