[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <D73FB067-2AC0-4175-AFF1-A9C1365EBBCD@gmail.com>
Date: Sat, 1 Feb 2025 02:12:37 +0800
From: Alan Huang <mmpgouride@...il.com>
To: Jeongjun Park <aha310510@...il.com>
Cc: Kent Overstreet <kent.overstreet@...ux.dev>,
linux-bcachefs@...r.kernel.org,
LKML <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v2] bcachefs: fix deadlock in journal_entry_open()
On Jan 31, 2025, at 20:47, Jeongjun Park <aha310510@...il.com> wrote:
>
> In the previous commit b3d82c2f2761, code was added to prevent journal sequence
> overflow. Among them, the code added to journal_entry_open() uses the
> bch2_fs_fatal_err_on() function to handle errors.
At least, bch2_journal_halt could invoke bch2_journal_halt_locked.
Is it possible to release the lock before bch2_fs_fatal_err_on() ? Not familiar with journal code yet.
>
> However, __journal_res_get() , which calls journal_entry_open() , calls
> journal_entry_open() while holding journal->lock , but bch2_fs_fatal_err_on()
> internally tries to acquire journal->lock , which results in a deadlock.
>
> Therefore, we need to add bch2_fs_fatal_err_on_locked() to handle fatal errors
> even when journal->lock is held.
>
> Fixes: b3d82c2f2761 ("bcachefs: Guard against journal seq overflow")
> Signed-off-by: Jeongjun Park <aha310510@...il.com>
> ---
> fs/bcachefs/error.c | 6 ++++++
> fs/bcachefs/error.h | 16 ++++++++++++++++
> fs/bcachefs/journal.c | 10 +++++++++-
> fs/bcachefs/journal.h | 1 +
> fs/bcachefs/super.c | 11 +++++++++++
> fs/bcachefs/super.h | 1 +
> 6 files changed, 44 insertions(+), 1 deletion(-)
>
> diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c
> index 038da6a61f6b..25f51dec732d 100644
> --- a/fs/bcachefs/error.c
> +++ b/fs/bcachefs/error.c
> @@ -50,6 +50,12 @@ void bch2_fatal_error(struct bch_fs *c)
> bch_err(c, "fatal error - emergency read only");
> }
>
> +void bch2_fatal_error_locked(struct bch_fs *c)
> +{
> + if (bch2_fs_emergency_read_only_locked(c))
> + bch_err(c, "fatal error - emergency read only");
> +}
> +
> void bch2_io_error_work(struct work_struct *work)
> {
> struct bch_dev *ca = container_of(work, struct bch_dev, io_error_work);
> diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
> index 7acf2a27ca28..760623c07e67 100644
> --- a/fs/bcachefs/error.h
> +++ b/fs/bcachefs/error.h
> @@ -189,6 +189,7 @@ do { \
> */
>
> void bch2_fatal_error(struct bch_fs *);
> +void bch2_fatal_error_locked(struct bch_fs *);
>
> #define bch2_fs_fatal_error(c, _msg, ...) \
> do { \
> @@ -205,6 +206,21 @@ do { \
> _ret; \
> })
>
> +#define bch2_fs_fatal_error_locked(c, _msg, ...) \
> +do { \
> + bch_err(c, "%s(): fatal error " _msg, __func__, ##__VA_ARGS__); \
> + bch2_fatal_error_locked(c); \
> +} while (0)
> +
> +#define bch2_fs_fatal_err_on_locked(cond, c, ...) \
> +({ \
> + bool _ret = unlikely(!!(cond)); \
> + \
> + if (_ret) \
> + bch2_fs_fatal_error_locked(c, __VA_ARGS__); \
> + _ret; \
> +})
> +
> /*
> * IO errors: either recoverable metadata IO (because we have replicas), or data
> * IO - we need to log it and print out a message, but we don't (necessarily)
> diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
> index 2cd20114b74b..12e3b4024494 100644
> --- a/fs/bcachefs/journal.c
> +++ b/fs/bcachefs/journal.c
> @@ -320,6 +320,14 @@ void bch2_journal_halt(struct journal *j)
> spin_unlock(&j->lock);
> }
>
> +void bch2_journal_halt_locked(struct journal *j)
> +{
> + __journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL, true);
> + if (!j->err_seq)
> + j->err_seq = journal_cur_seq(j);
> + journal_wake(j);
> +}
> +
> static bool journal_entry_want_write(struct journal *j)
> {
> bool ret = !journal_entry_is_open(j) ||
> @@ -382,7 +390,7 @@ static int journal_entry_open(struct journal *j)
> if (nr_unwritten_journal_entries(j) == ARRAY_SIZE(j->buf))
> return JOURNAL_ERR_max_in_flight;
>
> - if (bch2_fs_fatal_err_on(journal_cur_seq(j) >= JOURNAL_SEQ_MAX,
> + if (bch2_fs_fatal_err_on_locked(journal_cur_seq(j) >= JOURNAL_SEQ_MAX,
> c, "cannot start: journal seq overflow"))
> return JOURNAL_ERR_insufficient_devices; /* -EROFS */
>
> diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
> index cb0df0663946..416fbed447de 100644
> --- a/fs/bcachefs/journal.h
> +++ b/fs/bcachefs/journal.h
> @@ -408,6 +408,7 @@ bool bch2_journal_noflush_seq(struct journal *, u64, u64);
> int bch2_journal_meta(struct journal *);
>
> void bch2_journal_halt(struct journal *);
> +void bch2_journal_halt_locked(struct journal *);
>
> static inline int bch2_journal_error(struct journal *j)
> {
> diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
> index d97ea7bd1171..6d97d412fed9 100644
> --- a/fs/bcachefs/super.c
> +++ b/fs/bcachefs/super.c
> @@ -411,6 +411,17 @@ bool bch2_fs_emergency_read_only(struct bch_fs *c)
> return ret;
> }
>
> +bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
> +{
> + bool ret = !test_and_set_bit(BCH_FS_emergency_ro, &c->flags);
> +
> + bch2_journal_halt_locked(&c->journal);
> + bch2_fs_read_only_async(c);
> +
> + wake_up(&bch2_read_only_wait);
> + return ret;
> +}
> +
> static int bch2_fs_read_write_late(struct bch_fs *c)
> {
> int ret;
> diff --git a/fs/bcachefs/super.h b/fs/bcachefs/super.h
> index fa6d52216510..04f8287eff5c 100644
> --- a/fs/bcachefs/super.h
> +++ b/fs/bcachefs/super.h
> @@ -29,6 +29,7 @@ int bch2_dev_resize(struct bch_fs *, struct bch_dev *, u64);
> struct bch_dev *bch2_dev_lookup(struct bch_fs *, const char *);
>
> bool bch2_fs_emergency_read_only(struct bch_fs *);
> +bool bch2_fs_emergency_read_only_locked(struct bch_fs *);
> void bch2_fs_read_only(struct bch_fs *);
>
> int bch2_fs_read_write(struct bch_fs *);
> --
>
Powered by blists - more mailing lists