From 7a2cd74654a53684d545b96c57c9091e420b3add Mon Sep 17 00:00:00 2001 From: Amir Goldstein Date: Tue, 12 Nov 2024 13:46:08 +0100 Subject: [PATCH] fsnotify: opt-in for permission events at file open time Legacy inotify/fanotify listeners can add watches for events on inode, parent or mount and expect to get events (e.g. FS_MODIFY) on files that were already open at the time of setting up the watches. fanotify permission events are typically used by Anti-malware sofware, that is watching the entire mount and it is not common to have more that one Anti-malware engine installed on a system. To reduce the overhead of the fsnotify_file_perm() hooks on every file access, relax the semantics of the legacy FAN_ACCESS_PERM event to generate events only if there were *any* permission event listeners on the filesystem at the time that the file was opened. The new semantic is implemented by extending the FMODE_NONOTIFY bit into two FMODE_NONOTIFY_* bits, that are used to store a mode for which of the events types to report. This is going to apply to the new fanotify pre-content events in order to reduce the cost of the new pre-content event vfs hooks. Suggested-by: Linus Torvalds Link: https://lore.kernel.org/linux-fsdevel/CAHk-=wj8L=mtcRTi=NECHMGfZQgXOp_uix1YVh04fEmrKaMnXA@mail.gmail.com/ Signed-off-by: Amir Goldstein --- fs/open.c | 8 ++++- include/linux/fs.h | 26 ++++++++++++--- include/linux/fsnotify.h | 71 +++++++++++++++++++++++++++++++--------- 3 files changed, 83 insertions(+), 22 deletions(-) diff --git a/fs/open.c b/fs/open.c index 226aca8c7909..194c2c8d8cd4 100644 --- a/fs/open.c +++ b/fs/open.c @@ -901,7 +901,7 @@ static int do_dentry_open(struct file *f, f->f_sb_err = file_sample_sb_err(f); if (unlikely(f->f_flags & O_PATH)) { - f->f_mode = FMODE_PATH | FMODE_OPENED; + f->f_mode = FMODE_PATH | FMODE_OPENED | FMODE_NONOTIFY; f->f_op = &empty_fops; return 0; } @@ -929,6 +929,12 @@ static int do_dentry_open(struct file *f, if (error) goto cleanup_all; + /* + * Set FMODE_NONOTIFY_* bits according to existing permission watches. + * If FMODE_NONOTIFY was already set for an fanotify fd, this doesn't + * change anything. + */ + f->f_mode |= fsnotify_file_mode(f); error = fsnotify_open_perm(f); if (error) goto cleanup_all; diff --git a/include/linux/fs.h b/include/linux/fs.h index 70359dd669ff..dd583ce7dba8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -173,13 +173,14 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define FMODE_NOREUSE ((__force fmode_t)(1 << 23)) -/* FMODE_* bit 24 */ - /* File is embedded in backing_file object */ -#define FMODE_BACKING ((__force fmode_t)(1 << 25)) +#define FMODE_BACKING ((__force fmode_t)(1 << 24)) + +/* File shouldn't generate fanotify pre-content events */ +#define FMODE_NONOTIFY_HSM ((__force fmode_t)(1 << 25)) -/* File was opened by fanotify and shouldn't generate fanotify events */ -#define FMODE_NONOTIFY ((__force fmode_t)(1 << 26)) +/* File shouldn't generate fanotify permission events */ +#define FMODE_NONOTIFY_PERM ((__force fmode_t)(1 << 26)) /* File is capable of returning -EAGAIN if I/O will block */ #define FMODE_NOWAIT ((__force fmode_t)(1 << 27)) @@ -190,6 +191,21 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)(1 << 29)) +/* + * The two FMODE_NONOTIFY_ bits used together have a special meaning of + * not reporting any events at all including non-permission events. + * These are the possible values of FMODE_NOTIFY(f->f_mode) and their meaning: + * + * FMODE_NONOTIFY_HSM - suppress only pre-content events. + * FMODE_NONOTIFY_PERM - suppress permission (incl. pre-content) events. + * FMODE_NONOTIFY - suppress all (incl. non-permission) events. + */ +#define FMODE_NONOTIFY_MASK \ + (FMODE_NONOTIFY_HSM | FMODE_NONOTIFY_PERM) +#define FMODE_NONOTIFY FMODE_NONOTIFY_MASK +#define FMODE_NOTIFY(mode) \ + ((mode) & FMODE_NONOTIFY_MASK) + /* * Attribute flags. These should be or-ed together to figure out what * has been changed! diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 278620e063ab..9f13c7c19b74 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -108,38 +108,66 @@ static inline void fsnotify_dentry(struct dentry *dentry, __u32 mask) fsnotify_parent(dentry, mask, dentry, FSNOTIFY_EVENT_DENTRY); } -static inline int fsnotify_file(struct file *file, __u32 mask) +static inline int fsnotify_path(const struct path *path, __u32 mask) { - const struct path *path; + return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); +} +static inline int fsnotify_file(struct file *file, __u32 mask) +{ /* * FMODE_NONOTIFY are fds generated by fanotify itself which should not * generate new events. We also don't want to generate events for * FMODE_PATH fds (involves open & close events) as they are just * handle creation / destruction events and not "real" file events. */ - if (file->f_mode & (FMODE_NONOTIFY | FMODE_PATH)) - return 0; - - path = &file->f_path; - /* Permission events require group prio >= FSNOTIFY_PRIO_CONTENT */ - if (mask & ALL_FSNOTIFY_PERM_EVENTS && - !fsnotify_sb_has_priority_watchers(path->dentry->d_sb, - FSNOTIFY_PRIO_CONTENT)) + if (FMODE_NOTIFY(file->f_mode) == FMODE_NONOTIFY) return 0; - return fsnotify_parent(path->dentry, mask, path, FSNOTIFY_EVENT_PATH); + return fsnotify_path(&file->f_path, mask); } #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS +/* + * At open time we check fsnotify_sb_has_priority_watchers() and set the + * FMODE_NONOTIFY_ mode bits accordignly. + * Later, fsnotify permission hooks do not check if there are permission event + * watches, but that there were permission event watches at open time. + */ +static inline fmode_t fsnotify_file_mode(struct file *file) +{ + struct super_block *sb = file->f_path.dentry->d_sb; + + /* Is it a file opened by fanotify? */ + if (FMODE_NOTIFY(file->f_mode) == FMODE_NONOTIFY) + return FMODE_NONOTIFY; + + /* + * Permission events is a super set of pre-content events, so if there + * are no permission event watchers, there are also no pre-content event + * watchers and this is implied from the single FMODE_NONOTIFY_PERM bit. + */ + if (likely(!fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_CONTENT))) + return FMODE_NONOTIFY_PERM; + + /* + * FMODE_NONOTIFY_HSM bit means there are permission event watchers, but + * no pre-content event watchers. + */ + if (likely(!fsnotify_sb_has_priority_watchers(sb, + FSNOTIFY_PRIO_PRE_CONTENT))) + return FMODE_NONOTIFY_HSM; + + return 0; +} + /* * fsnotify_file_area_perm - permission hook before access to file range */ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { - __u32 fsnotify_mask = FS_ACCESS_PERM; - /* * filesystem may be modified in the context of permission events * (e.g. by HSM filling a file on access), so sb freeze protection @@ -150,7 +178,10 @@ static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, if (!(perm_mask & MAY_READ)) return 0; - return fsnotify_file(file, fsnotify_mask); + if (likely(file->f_mode & FMODE_NONOTIFY_PERM)) + return 0; + + return fsnotify_path(&file->f_path, FS_ACCESS_PERM); } /* @@ -168,16 +199,24 @@ static inline int fsnotify_open_perm(struct file *file) { int ret; + if (likely(file->f_mode & FMODE_NONOTIFY_PERM)) + return 0; + if (file->f_flags & __FMODE_EXEC) { - ret = fsnotify_file(file, FS_OPEN_EXEC_PERM); + ret = fsnotify_path(&file->f_path, FS_OPEN_EXEC_PERM); if (ret) return ret; } - return fsnotify_file(file, FS_OPEN_PERM); + return fsnotify_path(&file->f_path, FS_OPEN_PERM); } #else +static inline fmode_t fsnotify_file_mode(struct file *file) +{ + return 0; +} + static inline int fsnotify_file_area_perm(struct file *file, int perm_mask, const loff_t *ppos, size_t count) { -- 2.34.1