[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAOQ4uxhjcW2NUzmnboeXhUgfdUc4dfu2gLYWyEEcv5bVTUY-9A@mail.gmail.com>
Date: Fri, 21 May 2021 14:02:34 +0300
From: Amir Goldstein <amir73il@...il.com>
To: Gabriel Krisman Bertazi <krisman@...labora.com>
Cc: kernel@...labora.com, "Darrick J . Wong" <djwong@...nel.org>,
"Theodore Ts'o" <tytso@....edu>,
Dave Chinner <david@...morbit.com>, Jan Kara <jack@...e.com>,
David Howells <dhowells@...hat.com>,
Khazhismel Kumykov <khazhy@...gle.com>,
linux-fsdevel <linux-fsdevel@...r.kernel.org>,
Ext4 <linux-ext4@...r.kernel.org>
Subject: Re: [PATCH 08/11] fanotify: Introduce FAN_ERROR event
On Fri, May 21, 2021 at 5:42 AM Gabriel Krisman Bertazi
<krisman@...labora.com> wrote:
>
> The FAN_ERROR event is used by filesystem wide monitoring tools to
> receive notifications of type FS_ERROR_EVENT, emited by filesystems when
> a problem is detected. The error notification includes a generic error
> descriptor.
>
> Signed-off-by: Gabriel Krisman Bertazi <krisman@...labora.com>
>
> Changes since v1:
> - Pass dentry to fanotify_check_fsid (Amir)
> - FANOTIFY_EVENT_TYPE_ERROR -> FANOTIFY_EVENT_TYPE_FS_ERROR
> - Merge previous patch into it
> - Use a single slot
> ---
> fs/notify/fanotify/fanotify.c | 74 ++++++++++++++++-
> fs/notify/fanotify/fanotify.h | 28 ++++++-
> fs/notify/fanotify/fanotify_user.c | 123 ++++++++++++++++++++++++++---
> include/linux/fanotify.h | 6 +-
> include/uapi/linux/fanotify.h | 10 +++
> 5 files changed, 225 insertions(+), 16 deletions(-)
>
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index 34e2ee759b39..197291a8c41d 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -269,7 +269,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
> pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
> __func__, iter_info->report_mask, event_mask, data, data_type);
>
> - if (!fid_mode) {
> + if (!fid_mode && data_type != FSNOTIFY_EVENT_ERROR) {
> /* Do we have path to open a file descriptor? */
> if (!path)
> return 0;
> @@ -657,6 +657,51 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
> return event;
> }
>
> +static void fanotify_init_error_event(struct fanotify_error_event *event,
> + __kernel_fsid_t fsid,
> + const struct fs_error_report *report)
> +{
> + event->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR;
> + event->err_count = 1;
> + event->fsid = fsid;
> + event->error = report->error;
> + event->ino = (report->inode) ? report->inode->i_ino : 0;
> +}
> +
> +static int fanotify_queue_error_event(struct fsnotify_iter_info *iter_info,
> + struct fsnotify_group *group,
> + __kernel_fsid_t fsid,
> + const struct fs_error_report *report)
> +{
> + struct fanotify_mark *mark;
> + int type;
> + int ret = -ENOMEM;
> +
> + fsnotify_foreach_obj_type(type) {
> + if (!fsnotify_iter_should_report_type(iter_info, type))
> + continue;
> + mark = FANOTIFY_MARK(iter_info->marks[type]);
> + }
Two options.
If FAN_ERROR is allowed on inode marks, need to do the below
for every mark found in mark type iterator.
If FAN_ERROR is only allowed in sb marks use
fsnotify_iter_sb_mark(iter_info) instead of mark iterator.
> +
> + spin_lock(&mark->fsn_mark.lock);
> + if (mark->error_event) {
> + if (list_empty(&mark->error_event->fae.fse.list)) {
event list check should be protected with notification_lock.
err_count++ as well.
I suggest that you use the merge() callback of fsnotify_add_event()
to do err_count++ under notification_lock and the insert() callback
to do fsnotify_get_mark() and fanotify_init_error_event() under
notification_lock.
> + fsnotify_get_mark(&mark->fsn_mark);
> + fanotify_init_error_event(mark->error_event, fsid, report);
> + ret = fsnotify_add_event(group, &mark->error_event->fae.fse,
> + NULL, NULL);
> + if (ret)
> + fsnotify_put_mark(&mark->fsn_mark);
> + } else {
> + mark->error_event->err_count++;
> + ret = 0;
> + }
> + }
> + spin_unlock(&mark->fsn_mark.lock);
> +
> + return ret;
> +}
> +
> /*
> * Get cached fsid of the filesystem containing the object from any connector.
> * All connectors are supposed to have the same fsid, but we do not verify that
> @@ -738,8 +783,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
> BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
> BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
> BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
> + BUILD_BUG_ON(FAN_ERROR != FS_ERROR);
>
> - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
> + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20);
>
> mask = fanotify_group_event_mask(group, iter_info, mask, data,
> data_type, dir);
> @@ -757,13 +803,20 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
> return 0;
> }
>
> - if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) {
> + if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS) || fanotify_is_error_event(mask)) {
Jan likes to keep code inside 80-columns.
Please run checkpatch.pl --max-line-length=80 on your patches.
> fsid = fanotify_get_fsid(iter_info);
> /* Racing with mark destruction or creation? */
> if (!fsid.val[0] && !fsid.val[1])
> return 0;
> }
>
> + if (fanotify_is_error_event(mask)) {
> + ret = fanotify_queue_error_event(iter_info, group, fsid, data);
> + if (ret)
> + fsnotify_queue_overflow(group);
fsnotify_add_event() already takes care of overflow event.
> + goto finish;
> + }
> +
> event = fanotify_alloc_event(group, mask, data, data_type, dir,
> file_name, &fsid);
> ret = -ENOMEM;
> @@ -833,6 +886,17 @@ static void fanotify_free_name_event(struct fanotify_event *event)
> kfree(FANOTIFY_NE(event));
> }
>
> +static void fanotify_free_error_event(struct fanotify_event *event)
> +{
> + /*
> + * Just drop the reference acquired by
> + * fanotify_queue_error_event.
> + *
> + * The actual memory is freed with the mark.
> + */
> + fsnotify_put_mark(&(FANOTIFY_EE(event)->mark->fsn_mark));
> +}
> +
> static void fanotify_free_event(struct fsnotify_event *fsn_event)
> {
> struct fanotify_event *event;
> @@ -855,6 +919,9 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
> case FANOTIFY_EVENT_TYPE_OVERFLOW:
> kfree(event);
> break;
> + case FANOTIFY_EVENT_TYPE_FS_ERROR:
> + fanotify_free_error_event(event);
> + break;
> default:
> WARN_ON_ONCE(1);
> }
> @@ -871,6 +938,7 @@ static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
> {
> struct fanotify_mark *mark = FANOTIFY_MARK(fsn_mark);
>
> + kfree(mark->error_event);
> kmem_cache_free(fanotify_mark_cache, mark);
> }
>
> diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
> index a399c5e2615d..ebe9e593dfbf 100644
> --- a/fs/notify/fanotify/fanotify.h
> +++ b/fs/notify/fanotify/fanotify.h
> @@ -151,6 +151,7 @@ enum fanotify_event_type {
> FANOTIFY_EVENT_TYPE_PATH,
> FANOTIFY_EVENT_TYPE_PATH_PERM,
> FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
> + FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
> __FANOTIFY_EVENT_TYPE_NUM
> };
>
> @@ -206,12 +207,31 @@ FANOTIFY_NE(struct fanotify_event *event)
> return container_of(event, struct fanotify_name_event, fae);
> }
>
> +struct fanotify_error_event {
> + struct fanotify_event fae;
> + __kernel_fsid_t fsid;
> + unsigned long ino;
> + int error;
> + u32 err_count;
> +
> + /* Back reference to the mark this error refers to. */
> + struct fanotify_mark *mark;
> +};
> +
> +static inline struct fanotify_error_event *
> +FANOTIFY_EE(struct fanotify_event *event)
> +{
> + return container_of(event, struct fanotify_error_event, fae);
> +}
> +
> static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event)
> {
> if (event->type == FANOTIFY_EVENT_TYPE_FID)
> return &FANOTIFY_FE(event)->fsid;
> else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME)
> return &FANOTIFY_NE(event)->fsid;
> + else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR)
> + return &FANOTIFY_EE(event)->fsid;
> else
> return NULL;
> }
> @@ -297,6 +317,11 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
> return container_of(fse, struct fanotify_event, fse);
> }
>
> +static inline bool fanotify_is_error_event(u32 mask)
> +{
> + return mask & FANOTIFY_ERROR_EVENTS;
> +}
> +
> static inline bool fanotify_event_has_path(struct fanotify_event *event)
> {
> return event->type == FANOTIFY_EVENT_TYPE_PATH ||
> @@ -325,7 +350,8 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event)
> */
> static inline bool fanotify_is_hashed_event(u32 mask)
> {
> - return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW);
> + return (!fanotify_is_perm_event(mask) && !fanotify_is_error_event(mask)
> + && !(mask & FS_Q_OVERFLOW));
Please do us the service off adding helper fanotify_is_overflow_event(mask)
> }
>
> static inline unsigned int fanotify_event_hash_bucket(
> diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
> index 00210535a78e..ea9b9f8f7c21 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -106,6 +106,8 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
> #define FANOTIFY_EVENT_ALIGN 4
> #define FANOTIFY_INFO_HDR_LEN \
> (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
> +#define FANOTIFY_INFO_ERROR_LEN \
> + (sizeof(struct fanotify_event_info_error))
>
> static int fanotify_fid_info_len(int fh_len, int name_len)
> {
> @@ -126,6 +128,9 @@ static size_t fanotify_event_len(struct fanotify_event *event,
> int fh_len;
> int dot_len = 0;
>
> + if (fanotify_is_error_event(event->mask))
> + return event_len + FANOTIFY_INFO_ERROR_LEN;
> +
> if (!fid_mode)
> return event_len;
>
> @@ -149,6 +154,30 @@ static size_t fanotify_event_len(struct fanotify_event *event,
> return event_len;
> }
>
> +static struct fanotify_event *fanotify_dequeue_error_event(struct fsnotify_group *group,
> + struct fanotify_event *event,
> + struct fanotify_error_event *error_event)
> +{
> + struct fsnotify_mark *mark = &(FANOTIFY_EE(event)->mark->fsn_mark);
> + /*
> + * In order to avoid missing an error count update, the
> + * queued event is de-queued and duplicated to an
> + * in-stack fanotify_error_event while still inside
> + * mark->lock. Once the event is dequeued, it can be
> + * immediately re-used for a new event.
> + *
> + * The ownership of the mark reference is dropped later
> + * by destroy_event.
> + */
> + spin_lock(&mark->lock);
> + memcpy(error_event, FANOTIFY_EE(event), sizeof(*error_event));
> + fsnotify_init_event(&error_event->fae.fse);
> + fsnotify_remove_queued_event(group, &event->fse);
> + spin_unlock(&mark->lock);
> +
> + return &error_event->fae;
> +}
> +
> /*
> * Remove an hashed event from merge hash table.
> */
> @@ -173,7 +202,8 @@ static void fanotify_unhash_event(struct fsnotify_group *group,
> * updated accordingly.
> */
> static struct fanotify_event *get_one_event(struct fsnotify_group *group,
> - size_t count)
> + size_t count,
> + struct fanotify_error_event *error_event)
> {
> size_t event_size;
> struct fanotify_event *event = NULL;
> @@ -197,9 +227,14 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,
>
> /*
> * Held the notification_lock the whole time, so this is the
> - * same event we peeked above.
> + * same event we peeked above, unless it is copied to
> + * error_event.
> */
> - fsnotify_remove_first_event(group);
> + if (fanotify_is_error_event(event->mask))
> + event = fanotify_dequeue_error_event(group, event, error_event);
> + else
> + fsnotify_remove_first_event(group);
> +
> if (fanotify_is_perm_event(event->mask))
> FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED;
> if (fanotify_is_hashed_event(event->mask))
> @@ -309,6 +344,30 @@ static int process_access_response(struct fsnotify_group *group,
> return -ENOENT;
> }
>
> +static size_t copy_error_info_to_user(struct fanotify_event *event,
> + char __user *buf, int count)
> +{
> + struct fanotify_event_info_error info;
> + struct fanotify_error_event *fee = FANOTIFY_EE(event);
> +
> + info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR;
> + info.hdr.pad = 0;
> + info.hdr.len = sizeof(struct fanotify_event_info_error);
> +
> + if (WARN_ON(count < info.hdr.len))
> + return -EFAULT;
> +
> + info.fsid = fee->fsid;
> + info.error = fee->error;
> + info.inode = fee->ino;
> + info.error_count = fee->err_count;
> +
> + if (copy_to_user(buf, &info, sizeof(info)))
> + return -EFAULT;
> +
> + return info.hdr.len;
> +}
> +
> static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
> int info_type, const char *name, size_t name_len,
> char __user *buf, size_t count)
> @@ -523,6 +582,14 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
> count -= ret;
> }
>
> + if (fanotify_is_error_event(event->mask)) {
> + ret = copy_error_info_to_user(event, buf, count);
> + if (ret < 0)
> + return ret;
> + buf += ret;
> + count -= ret;
> + }
> +
> return metadata.event_len;
>
> out_close_fd:
> @@ -553,6 +620,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
> {
> struct fsnotify_group *group;
> struct fanotify_event *event;
> + struct fanotify_error_event error_event;
> char __user *start;
> int ret;
> DEFINE_WAIT_FUNC(wait, woken_wake_function);
> @@ -569,7 +637,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
> * in case there are lots of available events.
> */
> cond_resched();
> - event = get_one_event(group, count);
> + event = get_one_event(group, count, &error_event);
> if (IS_ERR(event)) {
> ret = PTR_ERR(event);
> break;
> @@ -888,16 +956,33 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
> flags, umask);
> }
>
> -static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
> - __u32 mask,
> - unsigned int flags)
> +static int fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
> + __u32 mask, unsigned int flags,
> + __u32 *modified_mask)
It really means 'added' not 'modified' just like the local variable name.
> {
> + struct fanotify_mark *mark = FANOTIFY_MARK(fsn_mark);
> + struct fanotify_error_event *error_event = NULL;
> + bool addition = !(flags & FAN_MARK_IGNORED_MASK);
'addition' is confusing here. I would call it 'ignored',
but do we want to allow FAN_ERROR in ignored mask?
for what case? on inode mark?
If we do not, let's not allow it.
> __u32 oldmask = -1;
>
> + /* Only pre-alloc error_event if needed. */
> + if (addition && (mask & FAN_ERROR) && !mark->error_event) {
> + error_event = kzalloc(sizeof(*error_event), GFP_KERNEL);
> + if (!error_event)
> + return -ENOMEM;
> + fanotify_init_event(&error_event->fae, 0, FS_ERROR);
> + error_event->mark = mark;
> + }
> +
> spin_lock(&fsn_mark->lock);
> - if (!(flags & FAN_MARK_IGNORED_MASK)) {
> + if (addition) {
> oldmask = fsn_mark->mask;
> fsn_mark->mask |= mask;
> +
> + if (!mark->error_event) {
> + mark->error_event = error_event;
> + error_event = NULL;
> + }
> } else {
> fsn_mark->ignored_mask |= mask;
> if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
> @@ -905,7 +990,11 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
> }
> spin_unlock(&fsn_mark->lock);
>
> - return mask & ~oldmask;
> + kfree(error_event);
> +
> + *modified_mask = mask & ~oldmask;
> + return 0;
> +
> }
>
> static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
> @@ -955,6 +1044,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
> {
> struct fsnotify_mark *fsn_mark;
> __u32 added;
> + int ret = 0;
>
> mutex_lock(&group->mark_mutex);
> fsn_mark = fsnotify_find_mark(connp, group);
> @@ -965,13 +1055,18 @@ static int fanotify_add_mark(struct fsnotify_group *group,
> return PTR_ERR(fsn_mark);
> }
> }
> - added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
> + ret = fanotify_mark_add_to_mask(fsn_mark, mask, flags, &added);
> + if (ret)
> + goto out;
> +
> if (added & ~fsnotify_conn_mask(fsn_mark->connector))
> fsnotify_recalc_mask(fsn_mark->connector);
> +
> +out:
> mutex_unlock(&group->mark_mutex);
>
> fsnotify_put_mark(fsn_mark);
> - return 0;
> + return ret;
> }
>
> static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
> @@ -1377,6 +1472,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,
>
> fsid = &__fsid;
> }
> + if (mask & FAN_ERROR) {
> + ret = fanotify_test_fsid(path.dentry, &__fsid);
> + if (ret)
> + goto path_put_and_out;
> + fsid = &__fsid;
> + }
>
> /* inode held in place by reference to path; group by fget on fd */
> if (mark_type == FAN_MARK_INODE)
> diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
> index bad41bcb25df..05c929d588e4 100644
> --- a/include/linux/fanotify.h
> +++ b/include/linux/fanotify.h
> @@ -81,9 +81,12 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
> #define FANOTIFY_INODE_EVENTS (FANOTIFY_DIRENT_EVENTS | \
> FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF)
>
> +#define FANOTIFY_ERROR_EVENTS (FAN_ERROR)
> +
> /* Events that user can request to be notified on */
> #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \
> - FANOTIFY_INODE_EVENTS)
> + FANOTIFY_INODE_EVENTS | \
> + FANOTIFY_ERROR_EVENTS)
>
> /* Events that require a permission response from user */
> #define FANOTIFY_PERM_EVENTS (FAN_OPEN_PERM | FAN_ACCESS_PERM | \
> @@ -95,6 +98,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
> /* Events that may be reported to user */
> #define FANOTIFY_OUTGOING_EVENTS (FANOTIFY_EVENTS | \
> FANOTIFY_PERM_EVENTS | \
> + FANOTIFY_ERROR_EVENTS | \
> FAN_Q_OVERFLOW | FAN_ONDIR)
>
> #define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \
> diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
> index fbf9c5c7dd59..e3920597112f 100644
> --- a/include/uapi/linux/fanotify.h
> +++ b/include/uapi/linux/fanotify.h
> @@ -20,6 +20,7 @@
> #define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */
>
> #define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */
> +#define FAN_ERROR 0x00008000 /* Filesystem error */
>
> #define FAN_OPEN_PERM 0x00010000 /* File open in perm check */
> #define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */
> @@ -123,6 +124,7 @@ struct fanotify_event_metadata {
> #define FAN_EVENT_INFO_TYPE_FID 1
> #define FAN_EVENT_INFO_TYPE_DFID_NAME 2
> #define FAN_EVENT_INFO_TYPE_DFID 3
> +#define FAN_EVENT_INFO_TYPE_ERROR 4
>
> /* Variable length info record following event metadata */
> struct fanotify_event_info_header {
> @@ -148,6 +150,14 @@ struct fanotify_event_info_fid {
> unsigned char handle[0];
> };
>
> +struct fanotify_event_info_error {
> + struct fanotify_event_info_header hdr;
> + int error;
> + __kernel_fsid_t fsid;
> + unsigned long inode;
I think for UAPI this should use __u64 and better call this ino and
not inode?
Thanks,
Amir.
Powered by blists - more mailing lists