lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 13 Jun 2017 14:52:27 -0400
From:   Steven Rostedt <rostedt@...dmis.org>
To:     Joel Fernandes <joelaf@...gle.com>
Cc:     linux-kernel@...r.kernel.org, mikesart@...il.com,
        kernel-team@...roid.com, Ingo Molnar <mingo@...hat.com>
Subject: Re: [PATCH RFC v3 2/4] tracing: Add support for recording tgid of
 tasks

On Thu,  8 Jun 2017 19:53:25 -0700
Joel Fernandes <joelaf@...gle.com> wrote:

> Inorder to support recording of tgid, the following changes are made:
> 
> * Introduce a new API (tracing_record_taskinfo) to additionally record the tgid
>   along with the task's comm at the same time. The API is flexible to
>   optionally also record for a group of tasks which is useful in recording the
>   info of multiple tasks (such as when recording previous and next task in a
>   sched_switch). Along with simplifying the API and the callers, this approach
>   also has the benefit of not setting trace_cmdline_save before information for
>   multiple tasks are saved.

Hmm, I'm not so sure I like the above.

> * We preserve the old API (tracing_record_cmdline) and create it as a wrapper
>   around the new one. Along with this we add a tracing_record_tgid function.
> * Reuse the existing sched_switch and sched_wakeup probes Add a new option
>   'record-tgid' to enable recording of tgid
> 
> When record-tgid option isn't enabled to begin with, we take care to make sure
> that there's isn't memory or runtime overhead.
> 
> Cc: kernel-team@...roid.com
> Cc: Steven Rostedt <rostedt@...dmis.org>
> Cc: Ingo Molnar <mingo@...hat.com>
> Tested-by: Michael Sartain <mikesart@...il.com>
> Signed-off-by: Joel Fernandes <joelaf@...gle.com>
> ---
>  include/linux/trace_events.h      |  12 ++++-
>  kernel/trace/trace.c              | 102 ++++++++++++++++++++++++++++++++++----
>  kernel/trace/trace.h              |   8 +++
>  kernel/trace/trace_events.c       |  42 +++++++++++++++-
>  kernel/trace/trace_sched_switch.c |  79 +++++++++++++++++++++++------
>  5 files changed, 217 insertions(+), 26 deletions(-)
> 
> diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
> index a556805eff8a..6c9e84b1f8ae 100644
> --- a/include/linux/trace_events.h
> +++ b/include/linux/trace_events.h
> @@ -151,7 +151,14 @@ trace_event_buffer_lock_reserve(struct ring_buffer **current_buffer,
>  				int type, unsigned long len,
>  				unsigned long flags, int pc);
>  
> -void tracing_record_cmdline(struct task_struct *tsk);
> +#define TRACE_RECORD_CMDLINE	BIT(0)
> +#define TRACE_RECORD_TGID	BIT(1)
> +
> +void tracing_record_taskinfo(struct task_struct **tasks, int len, int flags);
> +void tracing_record_taskinfo_single(struct task_struct *task, int flags);
> +
> +void tracing_record_cmdline(struct task_struct *task);
> +void tracing_record_tgid(struct task_struct *task);
>  
>  int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
>  
> @@ -290,6 +297,7 @@ struct trace_subsystem_dir;
>  enum {
>  	EVENT_FILE_FL_ENABLED_BIT,
>  	EVENT_FILE_FL_RECORDED_CMD_BIT,
> +	EVENT_FILE_FL_RECORDED_TGID_BIT,
>  	EVENT_FILE_FL_FILTERED_BIT,
>  	EVENT_FILE_FL_NO_SET_FILTER_BIT,
>  	EVENT_FILE_FL_SOFT_MODE_BIT,
> @@ -303,6 +311,7 @@ enum {
>   * Event file flags:
>   *  ENABLED	  - The event is enabled
>   *  RECORDED_CMD  - The comms should be recorded at sched_switch
> + *  RECORDED_TGID - The tgids should be recorded at sched_switch
>   *  FILTERED	  - The event has a filter attached
>   *  NO_SET_FILTER - Set when filter has error and is to be ignored
>   *  SOFT_MODE     - The event is enabled/disabled by SOFT_DISABLED
> @@ -315,6 +324,7 @@ enum {
>  enum {
>  	EVENT_FILE_FL_ENABLED		= (1 << EVENT_FILE_FL_ENABLED_BIT),
>  	EVENT_FILE_FL_RECORDED_CMD	= (1 << EVENT_FILE_FL_RECORDED_CMD_BIT),
> +	EVENT_FILE_FL_RECORDED_TGID	= (1 << EVENT_FILE_FL_RECORDED_TGID_BIT),
>  	EVENT_FILE_FL_FILTERED		= (1 << EVENT_FILE_FL_FILTERED_BIT),
>  	EVENT_FILE_FL_NO_SET_FILTER	= (1 << EVENT_FILE_FL_NO_SET_FILTER_BIT),
>  	EVENT_FILE_FL_SOFT_MODE		= (1 << EVENT_FILE_FL_SOFT_MODE_BIT),
> diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
> index 63deff9cdf2c..185a09f8b67e 100644
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -87,7 +87,7 @@ dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
>   * tracing is active, only save the comm when a trace event
>   * occurred.
>   */
> -static DEFINE_PER_CPU(bool, trace_cmdline_save);
> +static DEFINE_PER_CPU(bool, trace_taskinfo_save);
>  
>  /*
>   * Kill all tracing for good (never come back).
> @@ -790,7 +790,7 @@ EXPORT_SYMBOL_GPL(tracing_on);
>  static __always_inline void
>  __buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event)
>  {
> -	__this_cpu_write(trace_cmdline_save, true);
> +	__this_cpu_write(trace_taskinfo_save, true);
>  
>  	/* If this is the temp buffer, we need to commit fully */
>  	if (this_cpu_read(trace_buffered_event) == event) {
> @@ -1709,6 +1709,18 @@ void tracing_reset_all_online_cpus(void)
>  	}
>  }
>  
> +static int *tgid_map;
> +
> +void tracing_alloc_tgid_map(void)
> +{
> +	if (tgid_map)
> +		return;
> +
> +	tgid_map = kzalloc((PID_MAX_DEFAULT + 1) * sizeof(*tgid_map),
> +			   GFP_KERNEL);
> +	WARN_ONCE(!tgid_map, "Allocation of tgid_map failed\n");
> +}
> +
>  #define SAVED_CMDLINES_DEFAULT 128
>  #define NO_CMDLINE_MAP UINT_MAX
>  static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
> @@ -1722,7 +1734,7 @@ struct saved_cmdlines_buffer {
>  static struct saved_cmdlines_buffer *savedcmd;
>  
>  /* temporary disable recording */
> -static atomic_t trace_record_cmdline_disabled __read_mostly;
> +static atomic_t trace_record_taskinfo_disabled __read_mostly;
>  
>  static inline char *get_saved_cmdlines(int idx)
>  {
> @@ -1990,16 +2002,83 @@ void trace_find_cmdline(int pid, char comm[])
>  	preempt_enable();
>  }
>  
> -void tracing_record_cmdline(struct task_struct *tsk)
> +int trace_find_tgid(int pid)
> +{
> +	if (!tgid_map || pid <= 0 || pid > PID_MAX_DEFAULT)
> +		return 0;
> +
> +	return tgid_map[pid];
> +}
> +
> +static int trace_save_tgid(struct task_struct *tsk)
>  {
> -	if (atomic_read(&trace_record_cmdline_disabled) || !tracing_is_on())
> +	if (!tgid_map || tsk->pid <= 0 || tsk->pid > PID_MAX_DEFAULT)
> +		return 0;
> +
> +	tgid_map[tsk->pid] = tsk->tgid;
> +	return 1;
> +}
> +
> +/**
> + * tracing_record_taskinfo - record the task information of multiple tasks
> + *
> + * @tasks - list of tasks (array of task_struct pointers)
> + * @len   - length of the list of tasks
> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> + *          TRACE_RECORD_TGID for recording tgid
> + */
> +void tracing_record_taskinfo(struct task_struct **tasks, int len, int flags)
> +{
> +	int i;
> +	bool cmdline, tgid;
> +
> +	tgid = !!(flags & TRACE_RECORD_TGID);
> +	cmdline = !!(flags & TRACE_RECORD_CMDLINE);
> +
> +	if (unlikely(!cmdline && !tgid))
> +		return;

This would be better to move this above the assignment of tgid and
cmdline and just have:

	if (unlikely(!(flags & (TRACE_RECORD_TGID | TRACE_RECORD_CMDLINE))
		return;

Also, no need for the !! in the assignments. They are already marked as
boolean. A simple:

	tgid = flags & TRACE_RECORDC_TGID;

would suffice.


> +
> +	if (atomic_read(&trace_record_taskinfo_disabled) || !tracing_is_on())
>  		return;
>  
> -	if (!__this_cpu_read(trace_cmdline_save))
> +	if (!__this_cpu_read(trace_taskinfo_save))
>  		return;

You can move the assignments after the above too. gcc *should*
optimize it anyway. But I like to have all exit conditions first before
we add other code, which includes assignments.

>  
> -	if (trace_save_cmdline(tsk))
> -		__this_cpu_write(trace_cmdline_save, false);
> +	for (i = 0; i < len; i++) {
> +		if (cmdline && !trace_save_cmdline(tasks[i]))
> +			break;
> +		if (tgid && !trace_save_tgid(tasks[i]))
> +			break;
> +	}

I really do not like this loop in the hot path. I say keep this as a
single code as default, and add a tracing_record_taskinfo_multi(), as
that's the exception (I only see one user compared to 3 users of
single).

You can add a helper function that is static __always_inline that can
be used by both. But please, no loops for the single case. Actually,
since there's only one multi user case, and it's always 2, I would say
nuke the loop for that one and have a:

 tracing_record_taskinfo_sched_switch()

that takes a prev and a next, and updates both.

I don't envision more than 2, and we don't need this to be robust for a
use case that doesn't exist by sacrificing performance.

> +
> +	if (i == len)
> +		__this_cpu_write(trace_taskinfo_save, false);
> +}
> +
> +/**
> + * tracing_record_taskinfo_single - record the task info of single task
> + *
> + * @task  - task to record
> + * @flags - TRACE_RECORD_CMDLINE for recording comm
> + *        - TRACE_RECORD_TGID for recording tgid
> + */
> +void tracing_record_taskinfo_single(struct task_struct *task, int flags)

Like I said above. Nuke this. Have single be the default, and a special
case for sched_switch.

> +{
> +	struct task_struct *tasks[1];
> +
> +	tasks[0] = task;
> +	tracing_record_taskinfo(tasks, 1, flags);
> +}
> +
> +/* Helpers to record a specific task information */
> +void tracing_record_cmdline(struct task_struct *task)
> +{
> +	tracing_record_taskinfo_single(task, TRACE_RECORD_CMDLINE);
> +}
> +
> +void tracing_record_tgid(struct task_struct *task)
> +{
> +	tracing_record_taskinfo_single(task, TRACE_RECORD_TGID);
>  }
>  
>  /*
> @@ -3144,7 +3223,7 @@ static void *s_start(struct seq_file *m, loff_t *pos)
>  #endif
>  
>  	if (!iter->snapshot)
> -		atomic_inc(&trace_record_cmdline_disabled);
> +		atomic_inc(&trace_record_taskinfo_disabled);
>  
>  	if (*pos != iter->pos) {
>  		iter->ent = NULL;
> @@ -3189,7 +3268,7 @@ static void s_stop(struct seq_file *m, void *p)
>  #endif
>  
>  	if (!iter->snapshot)
> -		atomic_dec(&trace_record_cmdline_disabled);
> +		atomic_dec(&trace_record_taskinfo_disabled);
>  
>  	trace_access_unlock(iter->cpu_file);
>  	trace_event_read_unlock();
> @@ -4236,6 +4315,9 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
>  	if (mask == TRACE_ITER_RECORD_CMD)
>  		trace_event_enable_cmd_record(enabled);
>  
> +	if (mask == TRACE_ITER_RECORD_TGID)
> +		trace_event_enable_tgid_record(enabled);
> +
>  	if (mask == TRACE_ITER_EVENT_FORK)
>  		trace_event_follow_fork(tr, enabled);
>  
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 39fd77330aab..ecc963e25266 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -635,8 +635,12 @@ void trace_graph_return(struct ftrace_graph_ret *trace);
>  int trace_graph_entry(struct ftrace_graph_ent *trace);
>  void set_graph_array(struct trace_array *tr);
>  
> +void tracing_alloc_tgid_map(void);
>  void tracing_start_cmdline_record(void);
>  void tracing_stop_cmdline_record(void);
> +void tracing_start_tgid_record(void);
> +void tracing_stop_tgid_record(void);
> +
>  int register_tracer(struct tracer *type);
>  int is_tracing_stopped(void);
>  
> @@ -697,6 +701,7 @@ static inline void __trace_stack(struct trace_array *tr, unsigned long flags,
>  extern u64 ftrace_now(int cpu);
>  
>  extern void trace_find_cmdline(int pid, char comm[]);
> +extern int trace_find_tgid(int pid);
>  extern void trace_event_follow_fork(struct trace_array *tr, bool enable);
>  
>  #ifdef CONFIG_DYNAMIC_FTRACE
> @@ -1107,6 +1112,7 @@ extern int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
>  		C(CONTEXT_INFO,		"context-info"),   /* Print pid/cpu/time */ \
>  		C(LATENCY_FMT,		"latency-format"),	\
>  		C(RECORD_CMD,		"record-cmd"),		\
> +		C(RECORD_TGID,		"record-tgid"),		\
>  		C(OVERWRITE,		"overwrite"),		\
>  		C(STOP_ON_FREE,		"disable_on_free"),	\
>  		C(IRQ_INFO,		"irq-info"),		\
> @@ -1423,6 +1429,8 @@ struct ftrace_event_field *
>  trace_find_event_field(struct trace_event_call *call, char *name);
>  
>  extern void trace_event_enable_cmd_record(bool enable);
> +extern void trace_event_enable_tgid_record(bool enable);
> +
>  extern int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr);
>  extern int event_trace_del_tracer(struct trace_array *tr);
>  
> diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
> index e7973e10398c..240c6df95ea6 100644
> --- a/kernel/trace/trace_events.c
> +++ b/kernel/trace/trace_events.c
> @@ -343,6 +343,28 @@ void trace_event_enable_cmd_record(bool enable)
>  	mutex_unlock(&event_mutex);
>  }
>  
> +void trace_event_enable_tgid_record(bool enable)
> +{
> +	struct trace_event_file *file;
> +	struct trace_array *tr;
> +
> +	mutex_lock(&event_mutex);
> +	do_for_each_event_file(tr, file) {
> +		if (!(file->flags & EVENT_FILE_FL_ENABLED))
> +			continue;
> +
> +		if (enable) {
> +			tracing_start_tgid_record();
> +			set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
> +		} else {
> +			tracing_stop_tgid_record();
> +			clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
> +				  &file->flags);
> +		}
> +	} while_for_each_event_file();
> +	mutex_unlock(&event_mutex);
> +}
> +
>  static int __ftrace_event_enable_disable(struct trace_event_file *file,
>  					 int enable, int soft_disable)
>  {
> @@ -381,6 +403,12 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
>  				tracing_stop_cmdline_record();
>  				clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
>  			}
> +
> +			if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
> +				tracing_stop_tgid_record();
> +				clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
> +			}
> +
>  			call->class->reg(call, TRACE_REG_UNREGISTER, file);
>  		}
>  		/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
> @@ -407,18 +435,30 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
>  		}
>  
>  		if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
> +			bool cmd = false, tgid = false;
>  
>  			/* Keep the event disabled, when going to SOFT_MODE. */
>  			if (soft_disable)
>  				set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
>  
>  			if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
> +				cmd = true;
>  				tracing_start_cmdline_record();
>  				set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
>  			}
> +
> +			if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
> +				tgid = true;
> +				tracing_start_tgid_record();
> +				set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
> +			}
> +
>  			ret = call->class->reg(call, TRACE_REG_REGISTER, file);
>  			if (ret) {
> -				tracing_stop_cmdline_record();
> +				if (cmd)
> +					tracing_stop_cmdline_record();
> +				if (tgid)
> +					tracing_stop_tgid_record();
>  				pr_info("event trace: Could not enable event "
>  					"%s\n", trace_event_name(call));
>  				break;
> diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c
> index 4c896a0101bd..d13f3cd4182a 100644
> --- a/kernel/trace/trace_sched_switch.c
> +++ b/kernel/trace/trace_sched_switch.c
> @@ -12,27 +12,41 @@
>  
>  #include "trace.h"
>  
> -static int			sched_ref;
> +#define RECORD_CMDLINE	1
> +#define RECORD_TGID	2
> +
> +static int		sched_cmdline_ref;
> +static int		sched_tgid_ref;
>  static DEFINE_MUTEX(sched_register_mutex);
>  
>  static void
>  probe_sched_switch(void *ignore, bool preempt,
>  		   struct task_struct *prev, struct task_struct *next)
>  {
> -	if (unlikely(!sched_ref))
> -		return;
> +	int flags = 0;
> +	struct task_struct *tasks[2] = { prev, next };
> +
> +	if (sched_cmdline_ref != 0)
> +		flags |= RECORD_CMDLINE;
> +
> +	if (sched_tgid_ref != 0)
> +		flags |= RECORD_TGID;

	flags = (RECORD_CMDLINE * !!(sched_cmdline_ref)) +
	        (RECORD_TGID * !!(sched_tgid_ref));

	if (!flags)
		return;

>  
> -	tracing_record_cmdline(prev);
> -	tracing_record_cmdline(next);
> +	tracing_record_taskinfo(tasks, 2, flags);

	tracing_record_taskinfo_sched_switch(prev, next, flags);

>  }
>  
>  static void
>  probe_sched_wakeup(void *ignore, struct task_struct *wakee)
>  {
> -	if (unlikely(!sched_ref))
> -		return;
> +	int flags = 0;
> +
> +	if (sched_cmdline_ref != 0)
> +		flags |= RECORD_CMDLINE;
> +
> +	if (sched_tgid_ref != 0)
> +		flags |= RECORD_TGID;

Same thing here.

>  
> -	tracing_record_cmdline(current);
> +	tracing_record_taskinfo_single(current, flags);
>  }
>  
>  static int tracing_sched_register(void)
> @@ -75,28 +89,65 @@ static void tracing_sched_unregister(void)
>  	unregister_trace_sched_wakeup(probe_sched_wakeup, NULL);
>  }
>  
> -static void tracing_start_sched_switch(void)
> +static void tracing_start_sched_switch(int ops)
>  {
> +	bool sched_register;
>  	mutex_lock(&sched_register_mutex);
> -	if (!(sched_ref++))
> +	sched_register = (!sched_cmdline_ref && !sched_tgid_ref);
> +
> +	switch (ops) {
> +	case RECORD_CMDLINE:
> +		sched_cmdline_ref++;
> +		break;
> +
> +	case RECORD_TGID:
> +		sched_tgid_ref++;
> +		break;
> +	}
> +
> +	if (sched_tgid_ref == 1)
> +		tracing_alloc_tgid_map();

Since tgid_map is never freed, just do:

	if (!tgid_map)
		tracing_alloc_tgid_map();

Then if it fails to allocate this time, there's a chance that it will
allocate another time.

-- Steve

> +
> +	if (sched_register && (sched_cmdline_ref || sched_tgid_ref))
>  		tracing_sched_register();
>  	mutex_unlock(&sched_register_mutex);
>  }
>  
> -static void tracing_stop_sched_switch(void)
> +static void tracing_stop_sched_switch(int ops)
>  {
>  	mutex_lock(&sched_register_mutex);
> -	if (!(--sched_ref))
> +
> +	switch (ops) {
> +	case RECORD_CMDLINE:
> +		sched_cmdline_ref--;
> +		break;
> +
> +	case RECORD_TGID:
> +		sched_tgid_ref--;
> +		break;
> +	}
> +
> +	if (!sched_cmdline_ref && !sched_tgid_ref)
>  		tracing_sched_unregister();
>  	mutex_unlock(&sched_register_mutex);
>  }
>  
>  void tracing_start_cmdline_record(void)
>  {
> -	tracing_start_sched_switch();
> +	tracing_start_sched_switch(RECORD_CMDLINE);
>  }
>  
>  void tracing_stop_cmdline_record(void)
>  {
> -	tracing_stop_sched_switch();
> +	tracing_stop_sched_switch(RECORD_CMDLINE);
> +}
> +
> +void tracing_start_tgid_record(void)
> +{
> +	tracing_start_sched_switch(RECORD_TGID);
> +}
> +
> +void tracing_stop_tgid_record(void)
> +{
> +	tracing_stop_sched_switch(RECORD_TGID);
>  }

	

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ