lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 9 Sep 2022 14:03:44 -0700
From:   Andrew Morton <akpm@...ux-foundation.org>
To:     Jeff Layton <jlayton@...nel.org>
Cc:     hughd@...gle.com, viro@...iv.linux.org.uk,
        linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
        linux-mm@...ck.org, linux-nfs@...r.kernel.org,
        Chuck Lever <chuck.lever@...cle.com>
Subject: Re: [PATCH] tmpfs: add support for an i_version counter

On Fri,  9 Sep 2022 09:00:31 -0400 Jeff Layton <jlayton@...nel.org> wrote:

> NFSv4 mandates a change attribute to avoid problems with timestamp
> granularity, which Linux implements using the i_version counter. This is
> particularly important when the underlying filesystem is fast.
> 
> Give tmpfs an i_version counter. Since it doesn't have to be persistent,
> we can just turn on SB_I_VERSION and sprinkle some inode_inc_iversion
> calls in the right places.
> 
> Also, while there is no formal spec for xattrs, most implementations
> update the ctime on setxattr. Fix shmem_xattr_handler_set to update the
> ctime and bump the i_version appropriately.
> 
> ...
>
> --- a/fs/posix_acl.c
> +++ b/fs/posix_acl.c
> @@ -24,6 +24,7 @@
>  #include <linux/user_namespace.h>
>  #include <linux/namei.h>
>  #include <linux/mnt_idmapping.h>
> +#include <linux/iversion.h>
>  
>  static struct posix_acl **acl_by_type(struct inode *inode, int type)
>  {
> @@ -1073,6 +1074,8 @@ int simple_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
>  	}
>  
>  	inode->i_ctime = current_time(inode);
> +	if (IS_I_VERSION(inode))
> +		inode_inc_iversion(inode);
>  	set_cached_acl(inode, type, acl);
>  	return 0;
>  }

adds a kilobyte of text to shmem.o because the quite large
inode_maybe_inc_iversion() get inlined all over the place.  Why oh why.

Is there any reason not to do the obvious?

--- a/include/linux/iversion.h~a
+++ a/include/linux/iversion.h
@@ -177,56 +177,7 @@ inode_set_iversion_queried(struct inode
 				I_VERSION_QUERIED);
 }
 
-/**
- * inode_maybe_inc_iversion - increments i_version
- * @inode: inode with the i_version that should be updated
- * @force: increment the counter even if it's not necessary?
- *
- * Every time the inode is modified, the i_version field must be seen to have
- * changed by any observer.
- *
- * If "force" is set or the QUERIED flag is set, then ensure that we increment
- * the value, and clear the queried flag.
- *
- * In the common case where neither is set, then we can return "false" without
- * updating i_version.
- *
- * If this function returns false, and no other metadata has changed, then we
- * can avoid logging the metadata.
- */
-static inline bool
-inode_maybe_inc_iversion(struct inode *inode, bool force)
-{
-	u64 cur, old, new;
-
-	/*
-	 * The i_version field is not strictly ordered with any other inode
-	 * information, but the legacy inode_inc_iversion code used a spinlock
-	 * to serialize increments.
-	 *
-	 * Here, we add full memory barriers to ensure that any de-facto
-	 * ordering with other info is preserved.
-	 *
-	 * This barrier pairs with the barrier in inode_query_iversion()
-	 */
-	smp_mb();
-	cur = inode_peek_iversion_raw(inode);
-	for (;;) {
-		/* If flag is clear then we needn't do anything */
-		if (!force && !(cur & I_VERSION_QUERIED))
-			return false;
-
-		/* Since lowest bit is flag, add 2 to avoid it */
-		new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT;
-
-		old = atomic64_cmpxchg(&inode->i_version, cur, new);
-		if (likely(old == cur))
-			break;
-		cur = old;
-	}
-	return true;
-}
-
+bool inode_maybe_inc_iversion(struct inode *inode, bool force);
 
 /**
  * inode_inc_iversion - forcibly increment i_version
--- a/fs/libfs.c~a
+++ a/fs/libfs.c
@@ -15,6 +15,7 @@
 #include <linux/mutex.h>
 #include <linux/namei.h>
 #include <linux/exportfs.h>
+#include <linux/iversion.h>
 #include <linux/writeback.h>
 #include <linux/buffer_head.h> /* sync_mapping_buffers */
 #include <linux/fs_context.h>
@@ -1529,3 +1530,53 @@ void generic_set_encrypted_ci_d_ops(stru
 #endif
 }
 EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops);
+
+/**
+ * inode_maybe_inc_iversion - increments i_version
+ * @inode: inode with the i_version that should be updated
+ * @force: increment the counter even if it's not necessary?
+ *
+ * Every time the inode is modified, the i_version field must be seen to have
+ * changed by any observer.
+ *
+ * If "force" is set or the QUERIED flag is set, then ensure that we increment
+ * the value, and clear the queried flag.
+ *
+ * In the common case where neither is set, then we can return "false" without
+ * updating i_version.
+ *
+ * If this function returns false, and no other metadata has changed, then we
+ * can avoid logging the metadata.
+ */
+bool inode_maybe_inc_iversion(struct inode *inode, bool force)
+{
+	u64 cur, old, new;
+
+	/*
+	 * The i_version field is not strictly ordered with any other inode
+	 * information, but the legacy inode_inc_iversion code used a spinlock
+	 * to serialize increments.
+	 *
+	 * Here, we add full memory barriers to ensure that any de-facto
+	 * ordering with other info is preserved.
+	 *
+	 * This barrier pairs with the barrier in inode_query_iversion()
+	 */
+	smp_mb();
+	cur = inode_peek_iversion_raw(inode);
+	for (;;) {
+		/* If flag is clear then we needn't do anything */
+		if (!force && !(cur & I_VERSION_QUERIED))
+			return false;
+
+		/* Since lowest bit is flag, add 2 to avoid it */
+		new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT;
+
+		old = atomic64_cmpxchg(&inode->i_version, cur, new);
+		if (likely(old == cur))
+			break;
+		cur = old;
+	}
+	return true;
+}
+EXPORT_SYMBOL(inode_maybe_inc_iversion);
_

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ