lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4656fe9c-cacb-1e52-1a3c-b549bac899c8@kernel.org>
Date:   Fri, 21 Apr 2023 09:56:34 +0800
From:   Chao Yu <chao@...nel.org>
To:     Jaegeuk Kim <jaegeuk@...nel.org>
Cc:     linux-f2fs-devel@...ts.sourceforge.net,
        linux-kernel@...r.kernel.org, Yangtao Li <frank.li@...o.com>
Subject: Re: [PATCH v8] f2fs: support errors=remount-ro|continue|panic
 mountoption

On 2023/4/21 0:34, Jaegeuk Kim wrote:
> On 04/20, Chao Yu wrote:
>> On 2023/4/20 4:19, Jaegeuk Kim wrote:
>>> On 04/08, Chao Yu wrote:
>>>> This patch supports errors=remount-ro|continue|panic mount option,
>>>> by default it uses "remount-ro" mode.
>>>>
>>>> Signed-off-by: Chao Yu <chao@...nel.org>
>>>> Signed-off-by: Yangtao Li <frank.li@...o.com>
>>>> ---
>>>> v8:
>>>> - record stop reason synchronously if it's from ioctl shutdown interface
>>>> - fix typo in doc and commit message.
>>>> - use f2fs_hw_is_readonly() instead of bdev_read_only()
>>>>    Documentation/filesystems/f2fs.rst |   5 ++
>>>>    fs/f2fs/checkpoint.c               |   7 +-
>>>>    fs/f2fs/f2fs.h                     |  20 ++++-
>>>>    fs/f2fs/file.c                     |   5 --
>>>>    fs/f2fs/gc.c                       |   2 +-
>>>>    fs/f2fs/super.c                    | 134 ++++++++++++++++++++++++++---
>>>>    6 files changed, 149 insertions(+), 24 deletions(-)
>>>>
>>>> diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
>>>> index 2055e72871fe..ba4c02fad647 100644
>>>> --- a/Documentation/filesystems/f2fs.rst
>>>> +++ b/Documentation/filesystems/f2fs.rst
>>>> @@ -351,6 +351,11 @@ age_extent_cache	 Enable an age extent cache based on rb-tree. It records
>>>>    			 data block update frequency of the extent per inode, in
>>>>    			 order to provide better temperature hints for data block
>>>>    			 allocation.
>>>> +errors=%s		 Specify f2fs behavior on critical errors. This supports modes:
>>>> +			 "panic", "continue" and "remount-ro", respectively, trigger
>>>> +			 panic immediately, continue without doing anything, and remount
>>>> +			 the partition in read-only mode. By default it uses "remount-ro"
>>>> +			 mode.
>>>>    ======================== ============================================================
>>>>    Debugfs Entries
>>>> diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
>>>> index 73ae4e85e70d..e6b266bb9ce0 100644
>>>> --- a/fs/f2fs/checkpoint.c
>>>> +++ b/fs/f2fs/checkpoint.c
>>>> @@ -30,12 +30,9 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
>>>>    						unsigned char reason)
>>>>    {
>>>>    	f2fs_build_fault_attr(sbi, 0, 0);
>>>> -	set_ckpt_flags(sbi, CP_ERROR_FLAG);
>>>> -	if (!end_io) {
>>>> +	if (!end_io)
>>>>    		f2fs_flush_merged_writes(sbi);
>>>> -
>>>> -		f2fs_handle_stop(sbi, reason);
>>>> -	}
>>>> +	f2fs_handle_critical_error(sbi, reason, end_io);
>>>>    }
>>>>    /*
>>>> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
>>>> index ac4942f8e83e..7d9c5a65f974 100644
>>>> --- a/fs/f2fs/f2fs.h
>>>> +++ b/fs/f2fs/f2fs.h
>>>> @@ -162,6 +162,7 @@ struct f2fs_mount_info {
>>>>    	int fs_mode;			/* fs mode: LFS or ADAPTIVE */
>>>>    	int bggc_mode;			/* bggc mode: off, on or sync */
>>>>    	int memory_mode;		/* memory mode */
>>>> +	int errors;			/* errors parameter */
>>>>    	int discard_unit;		/*
>>>>    					 * discard command's offset/size should
>>>>    					 * be aligned to this unit: block,
>>>> @@ -1370,6 +1371,12 @@ enum {
>>>>    	MEMORY_MODE_LOW,	/* memory mode for low memry devices */
>>>>    };
>>>> +enum errors_option {
>>>> +	MOUNT_ERRORS_READONLY,	/* remount fs ro on errors */
>>>> +	MOUNT_ERRORS_CONTINUE,	/* continue on errors */
>>>> +	MOUNT_ERRORS_PANIC,	/* panic on errors */
>>>> +};
>>>> +
>>>>    static inline int f2fs_test_bit(unsigned int nr, char *addr);
>>>>    static inline void f2fs_set_bit(unsigned int nr, char *addr);
>>>>    static inline void f2fs_clear_bit(unsigned int nr, char *addr);
>>>> @@ -1786,8 +1793,14 @@ struct f2fs_sb_info {
>>>>    	struct workqueue_struct *post_read_wq;	/* post read workqueue */
>>>> -	unsigned char errors[MAX_F2FS_ERRORS];	/* error flags */
>>>> -	spinlock_t error_lock;			/* protect errors array */
>>>> +	/*
>>>> +	 * If we are in irq context, let's update error information into
>>>> +	 * on-disk superblock in the work.
>>>> +	 */
>>>> +	struct work_struct s_error_work;
>>>> +	unsigned char errors[MAX_F2FS_ERRORS];		/* error flags */
>>>> +	unsigned char stop_reason[MAX_STOP_REASON];	/* stop reason */
>>>> +	spinlock_t error_lock;			/* protect errors/stop_reason array */
>>>>    	bool error_dirty;			/* errors of sb is dirty */
>>>>    	struct kmem_cache *inline_xattr_slab;	/* inline xattr entry */
>>>> @@ -3522,8 +3535,9 @@ int f2fs_enable_quota_files(struct f2fs_sb_info *sbi, bool rdonly);
>>>>    int f2fs_quota_sync(struct super_block *sb, int type);
>>>>    loff_t max_file_blocks(struct inode *inode);
>>>>    void f2fs_quota_off_umount(struct super_block *sb);
>>>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason);
>>>>    void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
>>>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>>>> +							bool irq_context);
>>>>    void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
>>>>    int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
>>>>    int f2fs_sync_fs(struct super_block *sb, int sync);
>>>> diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
>>>> index 14e9a20e68df..10743e864d57 100644
>>>> --- a/fs/f2fs/file.c
>>>> +++ b/fs/f2fs/file.c
>>>> @@ -2221,7 +2221,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>    				ret = 0;
>>>>    				f2fs_stop_checkpoint(sbi, false,
>>>>    						STOP_CP_REASON_SHUTDOWN);
>>>> -				set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>    				trace_f2fs_shutdown(sbi, in, ret);
>>>>    			}
>>>>    			return ret;
>>>> @@ -2234,7 +2233,6 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>    		if (ret)
>>>>    			goto out;
>>>>    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>    		thaw_bdev(sb->s_bdev);
>>>>    		break;
>>>>    	case F2FS_GOING_DOWN_METASYNC:
>>>> @@ -2243,16 +2241,13 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
>>>>    		if (ret)
>>>>    			goto out;
>>>>    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>    		break;
>>>>    	case F2FS_GOING_DOWN_NOSYNC:
>>>>    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>    		break;
>>>>    	case F2FS_GOING_DOWN_METAFLUSH:
>>>>    		f2fs_sync_meta_pages(sbi, META, LONG_MAX, FS_META_IO);
>>>>    		f2fs_stop_checkpoint(sbi, false, STOP_CP_REASON_SHUTDOWN);
>>>> -		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>>    		break;
>>>>    	case F2FS_GOING_DOWN_NEED_FSCK:
>>>>    		set_sbi_flag(sbi, SBI_NEED_FSCK);
>>>> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
>>>> index f1d0dd9c5a6c..6dc2bc710ba8 100644
>>>> --- a/fs/f2fs/gc.c
>>>> +++ b/fs/f2fs/gc.c
>>>> @@ -59,7 +59,7 @@ static int gc_thread_func(void *data)
>>>>    		if (gc_th->gc_wake)
>>>>    			gc_th->gc_wake = false;
>>>> -		if (try_to_freeze()) {
>>>> +		if (try_to_freeze() || f2fs_readonly(sbi->sb)) {
>>>>    			stat_other_skip_bggc_count(sbi);
>>>>    			continue;
>>>>    		}
>>>> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
>>>> index 23574894e170..1573bf123197 100644
>>>> --- a/fs/f2fs/super.c
>>>> +++ b/fs/f2fs/super.c
>>>> @@ -164,6 +164,7 @@ enum {
>>>>    	Opt_discard_unit,
>>>>    	Opt_memory_mode,
>>>>    	Opt_age_extent_cache,
>>>> +	Opt_errors,
>>>>    	Opt_err,
>>>>    };
>>>> @@ -243,6 +244,7 @@ static match_table_t f2fs_tokens = {
>>>>    	{Opt_discard_unit, "discard_unit=%s"},
>>>>    	{Opt_memory_mode, "memory=%s"},
>>>>    	{Opt_age_extent_cache, "age_extent_cache"},
>>>> +	{Opt_errors, "errors=%s"},
>>>>    	{Opt_err, NULL},
>>>>    };
>>>> @@ -1268,6 +1270,25 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
>>>>    		case Opt_age_extent_cache:
>>>>    			set_opt(sbi, AGE_EXTENT_CACHE);
>>>>    			break;
>>>> +		case Opt_errors:
>>>> +			name = match_strdup(&args[0]);
>>>> +			if (!name)
>>>> +				return -ENOMEM;
>>>> +			if (!strcmp(name, "remount-ro")) {
>>>> +				F2FS_OPTION(sbi).errors =
>>>> +						MOUNT_ERRORS_READONLY;
>>>> +			} else if (!strcmp(name, "continue")) {
>>>> +				F2FS_OPTION(sbi).errors =
>>>> +						MOUNT_ERRORS_CONTINUE;
>>>> +			} else if (!strcmp(name, "panic")) {
>>>> +				F2FS_OPTION(sbi).errors =
>>>> +						MOUNT_ERRORS_PANIC;
>>>> +			} else {
>>>> +				kfree(name);
>>>> +				return -EINVAL;
>>>> +			}
>>>> +			kfree(name);
>>>> +			break;
>>>>    		default:
>>>>    			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
>>>>    				 p);
>>>> @@ -1623,6 +1644,9 @@ static void f2fs_put_super(struct super_block *sb)
>>>>    	f2fs_destroy_node_manager(sbi);
>>>>    	f2fs_destroy_segment_manager(sbi);
>>>> +	/* flush s_error_work before sbi destroy */
>>>> +	flush_work(&sbi->s_error_work);
>>>> +
>>>>    	f2fs_destroy_post_read_wq(sbi);
>>>>    	kvfree(sbi->ckpt);
>>>> @@ -2053,6 +2077,13 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
>>>>    	else if (F2FS_OPTION(sbi).memory_mode == MEMORY_MODE_LOW)
>>>>    		seq_printf(seq, ",memory=%s", "low");
>>>> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
>>>> +		seq_printf(seq, ",errors=%s", "remount-ro");
>>>> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE)
>>>> +		seq_printf(seq, ",errors=%s", "continue");
>>>> +	else if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC)
>>>> +		seq_printf(seq, ",errors=%s", "panic");
>>>> +
>>>>    	return 0;
>>>>    }
>>>> @@ -2081,6 +2112,7 @@ static void default_options(struct f2fs_sb_info *sbi)
>>>>    	}
>>>>    	F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
>>>>    	F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL;
>>>> +	F2FS_OPTION(sbi).errors = MOUNT_ERRORS_READONLY;
>>>
>>> Was the default MOUNT_ERRORS_CONTINUE?
>>
>> Okay, let me revise in v9.
>>
>> Please note that main difference between continue mode and readonly mode once
>> cp_error is set:
>>
>> 		continue mode		readonly mode
>> access ops:	Normal			Normal
>> modify ops:	-EIO			-EROFS
>> mount option	rw			ro
> 
> Could you please add the difference of all the modes in patch description?
> 
> I think it'd be good to add:
> 1. access ops
> 2. syscall errors
> 3. mount option
> 4. pending data writes
> 5. pending node writes
> 6. pending meta writes

Sure, I'd like to add below description into f2fs.rst, any comments?

			 			continue	remount-ro	panic
			 access ops		normal		noraml		N/A
			 syscall errors		-EIO		-EROFS		N/A
			 mount option		rw		ro		N/A
			 pending dir write	keep		keep		N/A
			 pending nondir write	drop		drop		N/A
			 pending node write	drop		drop		N/A
			 pending meta write	keep		keep		N/A

Thanks,

> 
> 
>>
>> Thanks,
>>
>>>
>>>>    	sbi->sb->s_flags &= ~SB_INLINECRYPT;
>>>> @@ -2282,6 +2314,9 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
>>>>    	if (err)
>>>>    		goto restore_opts;
>>>> +	/* flush outstanding errors before changing fs state */
>>>> +	flush_work(&sbi->s_error_work);
>>>> +
>>>>    	/*
>>>>    	 * Previous and new state of filesystem is RO,
>>>>    	 * so skip checking GC and FLUSH_MERGE conditions.
>>>> @@ -3931,45 +3966,60 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
>>>>    	return err;
>>>>    }
>>>> -void f2fs_handle_stop(struct f2fs_sb_info *sbi, unsigned char reason)
>>>> +static void save_stop_reason(struct f2fs_sb_info *sbi, unsigned char reason)
>>>> +{
>>>> +	unsigned long flags;
>>>> +
>>>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>>>> +	if (sbi->stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>>>> +		sbi->stop_reason[reason]++;
>>>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>> +}
>>>> +
>>>> +static void f2fs_record_stop_reason(struct f2fs_sb_info *sbi)
>>>>    {
>>>>    	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>>>> +	unsigned long flags;
>>>>    	int err;
>>>>    	f2fs_down_write(&sbi->sb_lock);
>>>> -	if (raw_super->s_stop_reason[reason] < GENMASK(BITS_PER_BYTE - 1, 0))
>>>> -		raw_super->s_stop_reason[reason]++;
>>>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>>>> +	memcpy(raw_super->s_stop_reason, sbi->stop_reason, MAX_STOP_REASON);
>>>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>>    	err = f2fs_commit_super(sbi, false);
>>>> -	if (err)
>>>> -		f2fs_err(sbi, "f2fs_commit_super fails to record reason:%u err:%d",
>>>> -								reason, err);
>>>> +
>>>>    	f2fs_up_write(&sbi->sb_lock);
>>>> +	if (err)
>>>> +		f2fs_err(sbi, "f2fs_commit_super fails to record err:%d", err);
>>>>    }
>>>>    void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag)
>>>>    {
>>>> -	spin_lock(&sbi->error_lock);
>>>> +	unsigned long flags;
>>>> +
>>>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>>>>    	if (!test_bit(flag, (unsigned long *)sbi->errors)) {
>>>>    		set_bit(flag, (unsigned long *)sbi->errors);
>>>>    		sbi->error_dirty = true;
>>>>    	}
>>>> -	spin_unlock(&sbi->error_lock);
>>>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>>    }
>>>>    static bool f2fs_update_errors(struct f2fs_sb_info *sbi)
>>>>    {
>>>> +	unsigned long flags;
>>>>    	bool need_update = false;
>>>> -	spin_lock(&sbi->error_lock);
>>>> +	spin_lock_irqsave(&sbi->error_lock, flags);
>>>>    	if (sbi->error_dirty) {
>>>>    		memcpy(F2FS_RAW_SUPER(sbi)->s_errors, sbi->errors,
>>>>    							MAX_F2FS_ERRORS);
>>>>    		sbi->error_dirty = false;
>>>>    		need_update = true;
>>>>    	}
>>>> -	spin_unlock(&sbi->error_lock);
>>>> +	spin_unlock_irqrestore(&sbi->error_lock, flags);
>>>>    	return need_update;
>>>>    }
>>>> @@ -3993,6 +4043,66 @@ void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error)
>>>>    	f2fs_up_write(&sbi->sb_lock);
>>>>    }
>>>> +static bool system_going_down(void)
>>>> +{
>>>> +	return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
>>>> +		|| system_state == SYSTEM_RESTART;
>>>> +}
>>>> +
>>>> +void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason,
>>>> +							bool irq_context)
>>>> +{
>>>> +	struct super_block *sb = sbi->sb;
>>>> +	bool shutdown = reason == STOP_CP_REASON_SHUTDOWN;
>>>> +	bool continue_fs = !shutdown &&
>>>> +			F2FS_OPTION(sbi).errors == MOUNT_ERRORS_CONTINUE;
>>>> +
>>>> +	set_ckpt_flags(sbi, CP_ERROR_FLAG);
>>>> +
>>>> +	if (!f2fs_hw_is_readonly(sbi)) {
>>>> +		save_stop_reason(sbi, reason);
>>>> +
>>>> +		if (irq_context && !shutdown)
>>>> +			schedule_work(&sbi->s_error_work);
>>>> +		else
>>>> +			f2fs_record_stop_reason(sbi);
>>>> +	}
>>>> +
>>>> +	/*
>>>> +	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
>>>> +	 * could panic during 'reboot -f' as the underlying device got already
>>>> +	 * disabled.
>>>> +	 */
>>>> +	if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_PANIC &&
>>>> +				!shutdown && !system_going_down() &&
>>>> +				!is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
>>>> +		panic("F2FS-fs (device %s): panic forced after error\n",
>>>> +							sb->s_id);
>>>> +
>>>> +	if (shutdown)
>>>> +		set_sbi_flag(sbi, SBI_IS_SHUTDOWN);
>>>> +
>>>> +	/* continue filesystem operators if errors=continue */
>>>> +	if (continue_fs || f2fs_readonly(sb))
>>>> +		return;
>>>> +
>>>> +	f2fs_warn(sbi, "Remounting filesystem read-only");
>>>> +	/*
>>>> +	 * Make sure updated value of ->s_mount_flags will be visible before
>>>> +	 * ->s_flags update
>>>> +	 */
>>>> +	smp_wmb();
>>>> +	sb->s_flags |= SB_RDONLY;
>>>> +}
>>>> +
>>>> +static void f2fs_record_error_work(struct work_struct *work)
>>>> +{
>>>> +	struct f2fs_sb_info *sbi = container_of(work,
>>>> +					struct f2fs_sb_info, s_error_work);
>>>> +
>>>> +	f2fs_record_stop_reason(sbi);
>>>> +}
>>>> +
>>>>    static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
>>>>    {
>>>>    	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
>>>> @@ -4223,7 +4333,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>    	sb->s_fs_info = sbi;
>>>>    	sbi->raw_super = raw_super;
>>>> +	INIT_WORK(&sbi->s_error_work, f2fs_record_error_work);
>>>>    	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
>>>> +	memcpy(sbi->stop_reason, raw_super->s_stop_reason, MAX_STOP_REASON);
>>>>    	/* precompute checksum seed for metadata */
>>>>    	if (f2fs_sb_has_inode_chksum(sbi))
>>>> @@ -4620,6 +4732,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
>>>>    	f2fs_destroy_segment_manager(sbi);
>>>>    stop_ckpt_thread:
>>>>    	f2fs_stop_ckpt_thread(sbi);
>>>> +	/* flush s_error_work before sbi destroy */
>>>> +	flush_work(&sbi->s_error_work);
>>>>    	f2fs_destroy_post_read_wq(sbi);
>>>>    free_devices:
>>>>    	destroy_device_list(sbi);
>>>> -- 
>>>> 2.36.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ