[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190125091057.GK17749@hirez.programming.kicks-ass.net>
Date: Fri, 25 Jan 2019 10:10:57 +0100
From: Peter Zijlstra <peterz@...radead.org>
To: Alexei Starovoitov <alexei.starovoitov@...il.com>
Cc: Alexei Starovoitov <ast@...nel.org>, davem@...emloft.net,
daniel@...earbox.net, jakub.kicinski@...ronome.com,
netdev@...r.kernel.org, kernel-team@...com, mingo@...hat.com,
will.deacon@....com, Paul McKenney <paulmck@...ux.vnet.ibm.com>,
jannh@...gle.com
Subject: Re: [PATCH v4 bpf-next 1/9] bpf: introduce bpf_spin_lock
On Thu, Jan 24, 2019 at 03:58:59PM -0800, Alexei Starovoitov wrote:
> On Thu, Jan 24, 2019 at 07:01:09PM +0100, Peter Zijlstra wrote:
> >
> > Thanks for having kernel/locking people on Cc...
> >
> > On Wed, Jan 23, 2019 at 08:13:55PM -0800, Alexei Starovoitov wrote:
> >
> > > Implementation details:
> > > - on !SMP bpf_spin_lock() becomes nop
> >
> > Because no BPF program is preemptible? I don't see any assertions or
> > even a comment that says this code is non-preemptible.
> >
> > AFAICT some of the BPF_RUN_PROG things are under rcu_read_lock() only,
> > which is not sufficient.
>
> nope. all bpf prog types disable preemption. That is must have for all
> sorts of things to work properly.
> If there is a prog type that doing rcu_read_lock only it's a serious bug.
> About a year or so ago we audited everything specifically to make
> sure everything disables preemption before calling bpf progs.
> I'm pretty sure nothing crept in in the mean time.
Do we want something like (the completely untested) below to avoid
having to manually audit this over and over?
---
include/linux/filter.h | 2 +-
include/linux/kernel.h | 9 +++++++--
kernel/sched/core.c | 28 ++++++++++++++++++++++++++++
3 files changed, 36 insertions(+), 3 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d531d4250bff..4ab51e78da36 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -513,7 +513,7 @@ struct sk_filter {
struct bpf_prog *prog;
};
-#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi)
+#define BPF_PROG_RUN(filter, ctx) ({ cant_sleep(); (*(filter)->bpf_func)(ctx, (filter)->insnsi); })
#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 8f0e68e250a7..f4cea3260a28 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -245,8 +245,10 @@ extern int _cond_resched(void);
#endif
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
- void ___might_sleep(const char *file, int line, int preempt_offset);
- void __might_sleep(const char *file, int line, int preempt_offset);
+extern void ___might_sleep(const char *file, int line, int preempt_offset);
+extern void __might_sleep(const char *file, int line, int preempt_offset);
+extern void __cant_sleep(const char *file, int line, int preempt_offset);
+
/**
* might_sleep - annotation for functions that can sleep
*
@@ -259,6 +261,8 @@ extern int _cond_resched(void);
*/
# define might_sleep() \
do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+# define cant_sleep() \
+ do { __cant_sleep(__FILE__, __LINE__, 0); } while (0)
# define sched_annotate_sleep() (current->task_state_change = 0)
#else
static inline void ___might_sleep(const char *file, int line,
@@ -266,6 +270,7 @@ extern int _cond_resched(void);
static inline void __might_sleep(const char *file, int line,
int preempt_offset) { }
# define might_sleep() do { might_resched(); } while (0)
+# define cant_sleep() do { } while (0)
# define sched_annotate_sleep() do { } while (0)
#endif
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ee7763641348..799c285f4e0f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6162,6 +6162,34 @@ void ___might_sleep(const char *file, int line, int preempt_offset)
add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
}
EXPORT_SYMBOL(___might_sleep);
+
+void __cant_sleep(const char *file, int line, int preempt_offset)
+{
+ static unsigned long prev_jiffy;
+
+ if (irqs_disabled())
+ return;
+
+ if (!IS_ENABLED(CONFIG_PREEMPT_COUNT))
+ return;
+
+ if (preempt_count() > preempt_offset)
+ return;
+
+ if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+ return;
+ prev_jiffy = jiffies;
+
+ printk(KERN_ERR "BUG: assuming atomic context at %s:%d\n", file, line);
+ printk(KERN_ERR "in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+ in_atomic(), irqs_disabled(),
+ current->pid, current->comm);
+
+ debug_show_held_locks(current);
+ dump_stack();
+ add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
+}
+EXPORT_SYMBOL_GPL(__cant_sleep);
#endif
#ifdef CONFIG_MAGIC_SYSRQ
Powered by blists - more mailing lists