[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CACT4Y+Z+HtUttrd+btEWLj5Nut4Gv++gzCOL3aDjvRTNtMDEvg@mail.gmail.com>
Date: Wed, 18 May 2022 10:56:17 +0200
From: Dmitry Vyukov <dvyukov@...gle.com>
To: Congyu Liu <liu3101@...due.edu>
Cc: andreyknvl@...il.com, kasan-dev@...glegroups.com,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH] kcov: fix race caused by unblocked interrupt
On Tue, 17 May 2022 at 23:05, Congyu Liu <liu3101@...due.edu> wrote:
>
> Some code runs in interrupts cannot be blocked by `in_task()` check.
> In some unfortunate interleavings, such interrupt is raised during
> serializing trace data and the incoming nested trace functionn could
> lead to loss of previous trace data. For instance, in
> `__sanitizer_cov_trace_pc`, if such interrupt is raised between
> `area[pos] = ip;` and `WRITE_ONCE(area[0], pos);`, then trace data in
> `area[pos]` could be replaced.
>
> The fix is done by adding a flag indicating if the trace buffer is being
> updated. No modification to trace buffer is allowed when the flag is set.
Hi Congyu,
What is that interrupt code? What interrupts PCs do you see in the trace.
I would assume such early interrupt code should be in asm and/or not
instrumented. The presence of instrumented traced interrupt code is
problematic for other reasons (add random stray coverage to the
trace). So if we make it not traced, it would resolve both problems at
once and without the fast path overhead that this change adds.
> Signed-off-by: Congyu Liu <liu3101@...due.edu>
> ---
> include/linux/sched.h | 3 +++
> kernel/kcov.c | 16 ++++++++++++++++
> 2 files changed, 19 insertions(+)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index a8911b1f35aa..d06cedd9595f 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1408,6 +1408,9 @@ struct task_struct {
>
> /* Collect coverage from softirq context: */
> unsigned int kcov_softirq;
> +
> + /* Flag of if KCOV area is being written: */
> + bool kcov_writing;
> #endif
>
> #ifdef CONFIG_MEMCG
> diff --git a/kernel/kcov.c b/kernel/kcov.c
> index b3732b210593..a595a8ad5d8a 100644
> --- a/kernel/kcov.c
> +++ b/kernel/kcov.c
> @@ -165,6 +165,8 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru
> */
> if (!in_task() && !(in_serving_softirq() && t->kcov_softirq))
> return false;
> + if (READ_ONCE(t->kcov_writing))
> + return false;
> mode = READ_ONCE(t->kcov_mode);
> /*
> * There is some code that runs in interrupts but for which
> @@ -201,12 +203,19 @@ void notrace __sanitizer_cov_trace_pc(void)
> return;
>
> area = t->kcov_area;
> +
> + /* Prevent race from unblocked interrupt. */
> + WRITE_ONCE(t->kcov_writing, true);
> + barrier();
> +
> /* The first 64-bit word is the number of subsequent PCs. */
> pos = READ_ONCE(area[0]) + 1;
> if (likely(pos < t->kcov_size)) {
> area[pos] = ip;
> WRITE_ONCE(area[0], pos);
> }
> + barrier();
> + WRITE_ONCE(t->kcov_writing, false);
> }
> EXPORT_SYMBOL(__sanitizer_cov_trace_pc);
>
> @@ -230,6 +239,10 @@ static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
> area = (u64 *)t->kcov_area;
> max_pos = t->kcov_size * sizeof(unsigned long);
>
> + /* Prevent race from unblocked interrupt. */
> + WRITE_ONCE(t->kcov_writing, true);
> + barrier();
> +
> count = READ_ONCE(area[0]);
>
> /* Every record is KCOV_WORDS_PER_CMP 64-bit words. */
> @@ -242,6 +255,8 @@ static void notrace write_comp_data(u64 type, u64 arg1, u64 arg2, u64 ip)
> area[start_index + 3] = ip;
> WRITE_ONCE(area[0], count + 1);
> }
> + barrier();
> + WRITE_ONCE(t->kcov_writing, false);
> }
>
> void notrace __sanitizer_cov_trace_cmp1(u8 arg1, u8 arg2)
> @@ -335,6 +350,7 @@ static void kcov_start(struct task_struct *t, struct kcov *kcov,
> t->kcov_size = size;
> t->kcov_area = area;
> t->kcov_sequence = sequence;
> + t->kcov_writing = false;
> /* See comment in check_kcov_mode(). */
> barrier();
> WRITE_ONCE(t->kcov_mode, mode);
> --
> 2.34.1
>
Powered by blists - more mailing lists