lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 10 Aug 2023 14:07:08 +0800
From:   Ze Gao <zegao2021@...il.com>
To:     Steven Rostedt <rostedt@...dmis.org>
Cc:     Arnaldo Carvalho de Melo <acme@...nel.org>,
        Adrian Hunter <adrian.hunter@...el.com>,
        Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
        Ian Rogers <irogers@...gle.com>, Jiri Olsa <jolsa@...nel.org>,
        linux-kernel@...r.kernel.org, linux-perf-users@...r.kernel.org,
        linux-trace-kernel@...r.kernel.org,
        Mark Rutland <mark.rutland@....com>,
        Masami Hiramatsu <mhiramat@...nel.org>,
        Ingo Molnar <mingo@...hat.com>,
        Namhyung Kim <namhyung@...nel.org>,
        Peter Zijlstra <peterz@...radead.org>,
        Ze Gao <zegao@...cent.com>
Subject: Re: [PATCH] perf sched: parse task state from tracepoint print format

[REPLIED WITH A SUBJECT GIT-SEND-EMAIL FAILED TO PARSE]

Regards,
Ze

On Thu, Aug 10, 2023 at 1:52 PM Ze Gao <zegao2021@...il.com> wrote:
>
> Hi Steven,
>
> I managed to build task state char map dynamically by parsing
> the tracepoint print format from data recorded by perf. And
> likewise for libtraceevent.
>
> FYI, I tried TEP_PRINT_INFO but no shot. It turns out TEP_PRINT_INFO
> stills relies on libtraceevent (i.e., sched_switch_handler() in
> plugin_sched_switch.c) and we need to parse the print format on our own.
>
> Anyway, it works now and I've tested on some perf.data in old formats
> but not cover all the kernel releases.
>
> Thoughts?
>
> Regards,
> Ze
>
>
> From 6b2035494952efb2963e6459ae4dbfce496c3b97 Mon Sep 17 00:00:00 2001
> From: Ze Gao <zegao@...cent.com>
> Date: Wed, 2 Aug 2023 08:19:54 -0400
> Subject: [PATCH] perf sched: parse task state from tracepoint print format
>
> As of this writing, we use prev_state to report task state,
> which relies on both the task state macros and
> TASK_STATE_TO_CHAR_STR in kernel to interpret its actual
> meaning. In this way, perf gets broken literally each time
> TASK_STATE_TO_CHAR_STR changes as kernel evolves. Counting
> on TASK_STATE_TO_CHAR_STR gurantees no backward compatibilty.
>
> To fix this, we build the state char map from the print
> format parsed from perf.data on the fly and removes
> dependencies on these internal kernel definitions.
>
> Note that we provide an intended helper get_task_prev_state()
> for extracting task state from perf record and passing task
> state in char elsewhere, which helps to eliminate the need to
> know task state macros further.
>
> Signed-off-by: Ze Gao <zegao@...cent.com>
> ---
>  tools/perf/builtin-sched.c | 126 +++++++++++++++++++++++--------------
>  1 file changed, 80 insertions(+), 46 deletions(-)
>
> diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
> index 9ab300b6f131..9366bc0a991d 100644
> --- a/tools/perf/builtin-sched.c
> +++ b/tools/perf/builtin-sched.c
> @@ -52,6 +52,10 @@
>  #define SYM_LEN                        129
>  #define MAX_PID                        1024000
>
> +#define TASK_STATE_MAX 16
> +static char state_to_char[TASK_STATE_MAX];
> +static unsigned int num_sleep_states = 0;
> +
>  static const char *cpu_list;
>  static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
>
> @@ -92,24 +96,6 @@ struct sched_atom {
>         struct task_desc        *wakee;
>  };
>
> -#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
> -
> -/* task state bitmask, copied from include/linux/sched.h */
> -#define TASK_RUNNING           0
> -#define TASK_INTERRUPTIBLE     1
> -#define TASK_UNINTERRUPTIBLE   2
> -#define __TASK_STOPPED         4
> -#define __TASK_TRACED          8
> -/* in tsk->exit_state */
> -#define EXIT_DEAD              16
> -#define EXIT_ZOMBIE            32
> -#define EXIT_TRACE             (EXIT_ZOMBIE | EXIT_DEAD)
> -/* in tsk->state again */
> -#define TASK_DEAD              64
> -#define TASK_WAKEKILL          128
> -#define TASK_WAKING            256
> -#define TASK_PARKED            512
> -
>  enum thread_state {
>         THREAD_SLEEPING = 0,
>         THREAD_WAIT_CPU,
> @@ -266,7 +252,7 @@ struct thread_runtime {
>         u64 total_preempt_time;
>         u64 total_delay_time;
>
> -       int last_state;
> +       char last_state;
>
>         char shortname[3];
>         bool comm_changed;
> @@ -436,7 +422,7 @@ static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *t
>  }
>
>  static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task,
> -                                 u64 timestamp, u64 task_state __maybe_unused)
> +                                 u64 timestamp, char task_state __maybe_unused)
>  {
>         struct sched_atom *event = get_new_event(task, timestamp);
>
> @@ -851,6 +837,72 @@ replay_wakeup_event(struct perf_sched *sched,
>         return 0;
>  }
>
> +static struct tep_print_arg* task_state_print_flag(struct tep_event *event)
> +{
> +       struct tep_print_arg* args = event->print_fmt.args;
> +
> +       while(args)
> +       {
> +               if (args->type == TEP_PRINT_FLAGS)
> +                       return args;
> +               if (args->type == TEP_PRINT_OP) {
> +                       args = args->op.right;
> +                       args = args->op.left;
> +                       continue;
> +               }
> +               args = args->next;
> +       }
> +       return NULL;
> +}
> +
> +static void __parse_print_flag(struct tep_print_flag_sym *field)
> +{
> +
> +       long val = strtol(field->value, NULL, 0);
> +       unsigned int bit = val ? ffs(val) : 0;
> +
> +       state_to_char[bit] = field->str[0];
> +       num_sleep_states++;
> +       if(num_sleep_states > TASK_STATE_MAX - 1) {
> +               pr_warning("too many states parsed, possibly bad format\n");
> +               return;
> +       }
> +       if (field->next) {
> +               __parse_print_flag(field->next);
> +       }
> +}
> +
> +static inline void parse_print_flag(struct tep_print_arg* args)
> +{
> +       __parse_print_flag(args->flags.flags);
> +}
> +
> +static void build_task_state_arr(struct tep_event *event)
> +{
> +       struct tep_print_arg* args;
> +
> +       args = task_state_print_flag(event);
> +       if (!args)
> +               pr_warning("print flag not found, possibly bad format\n");
> +       else
> +               parse_print_flag(args);
> +}
> +
> +static inline char get_prev_task_state(struct evsel *evsel,
> +               struct perf_sample *sample)
> +{
> +       int prev_state = evsel__intval(evsel, sample, "prev_state");
> +       unsigned int bit = prev_state ? ffs(prev_state) : 0;
> +       char state;
> +
> +       if(!num_sleep_states)
> +               build_task_state_arr(evsel->tp_format);
> +
> +       state = (!bit || bit > num_sleep_states) ? 'R' : state_to_char[bit];
> +
> +       return state;
> +}
> +
>  static int replay_switch_event(struct perf_sched *sched,
>                                struct evsel *evsel,
>                                struct perf_sample *sample,
> @@ -860,7 +912,7 @@ static int replay_switch_event(struct perf_sched *sched,
>                    *next_comm  = evsel__strval(evsel, sample, "next_comm");
>         const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
>                   next_pid = evsel__intval(evsel, sample, "next_pid");
> -       const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
> +       const char prev_state = get_prev_task_state(evsel, sample);
>         struct task_desc *prev, __maybe_unused *next;
>         u64 timestamp0, timestamp = sample->time;
>         int cpu = sample->cpu;
> @@ -1050,12 +1102,6 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread)
>         return 0;
>  }
>
> -static char sched_out_state(u64 prev_state)
> -{
> -       const char *str = TASK_STATE_TO_CHAR_STR;
> -
> -       return str[prev_state];
> -}
>
>  static int
>  add_sched_out_event(struct work_atoms *atoms,
> @@ -1132,7 +1178,7 @@ static int latency_switch_event(struct perf_sched *sched,
>  {
>         const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
>                   next_pid = evsel__intval(evsel, sample, "next_pid");
> -       const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
> +       const char prev_state = get_prev_task_state(evsel, sample);
>         struct work_atoms *out_events, *in_events;
>         struct thread *sched_out, *sched_in;
>         u64 timestamp0, timestamp = sample->time;
> @@ -1168,7 +1214,7 @@ static int latency_switch_event(struct perf_sched *sched,
>                         goto out_put;
>                 }
>         }
> -       if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp))
> +       if (add_sched_out_event(out_events, prev_state, timestamp))
>                 return -1;
>
>         in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
> @@ -2033,24 +2079,12 @@ static void timehist_header(struct perf_sched *sched)
>         printf("\n");
>  }
>
> -static char task_state_char(struct thread *thread, int state)
> -{
> -       static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
> -       unsigned bit = state ? ffs(state) : 0;
> -
> -       /* 'I' for idle */
> -       if (thread__tid(thread) == 0)
> -               return 'I';
> -
> -       return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
> -}
> -
>  static void timehist_print_sample(struct perf_sched *sched,
>                                   struct evsel *evsel,
>                                   struct perf_sample *sample,
>                                   struct addr_location *al,
>                                   struct thread *thread,
> -                                 u64 t, int state)
> +                                 u64 t, char state)
>  {
>         struct thread_runtime *tr = thread__priv(thread);
>         const char *next_comm = evsel__strval(evsel, sample, "next_comm");
> @@ -2091,7 +2125,7 @@ static void timehist_print_sample(struct perf_sched *sched,
>         print_sched_time(tr->dt_run, 6);
>
>         if (sched->show_state)
> -               printf(" %5c ", task_state_char(thread, state));
> +               printf(" %5c ", thread->tid == 0 ? 'I' : state);
>
>         if (sched->show_next) {
>                 snprintf(nstr, sizeof(nstr), "next: %s[%d]", next_comm, next_pid);
> @@ -2163,9 +2197,9 @@ static void timehist_update_runtime_stats(struct thread_runtime *r,
>                 else if (r->last_time) {
>                         u64 dt_wait = tprev - r->last_time;
>
> -                       if (r->last_state == TASK_RUNNING)
> +                       if (r->last_state == 'R')
>                                 r->dt_preempt = dt_wait;
> -                       else if (r->last_state == TASK_UNINTERRUPTIBLE)
> +                       else if (r->last_state == 'D')
>                                 r->dt_iowait = dt_wait;
>                         else
>                                 r->dt_sleep = dt_wait;
> @@ -2590,7 +2624,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
>         struct thread_runtime *tr = NULL;
>         u64 tprev, t = sample->time;
>         int rc = 0;
> -       int state = evsel__intval(evsel, sample, "prev_state");
> +       const char state = get_prev_task_state(evsel, sample);
>
>         addr_location__init(&al);
>         if (machine__resolve(machine, &al, sample) < 0) {
> --
> 2.41.0
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ