lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Mon, 14 Aug 2023 10:28:51 +0800
From:   Ze Gao <zegao2021@...il.com>
To:     Steven Rostedt <rostedt@...dmis.org>
Cc:     acme@...nel.org, adrian.hunter@...el.com,
        alexander.shishkin@...ux.intel.com, irogers@...gle.com,
        jolsa@...nel.org, linux-kernel@...r.kernel.org,
        linux-perf-users@...r.kernel.org,
        linux-trace-kernel@...r.kernel.org, mark.rutland@....com,
        mhiramat@...nel.org, mingo@...hat.com, namhyung@...nel.org,
        peterz@...radead.org, zegao@...cent.com
Subject: Re: [PATCH] perf sched: parse task state from tracepoint print format

On Sat, Aug 12, 2023 at 1:28 AM Steven Rostedt <rostedt@...dmis.org> wrote:
>
> On Thu, 10 Aug 2023 01:50:24 -0400
> Ze Gao <zegao2021@...il.com> wrote:
>
> > Hi Steven,
> >
> > I managed to build task state char map dynamically by parsing
> > the tracepoint print format from data recorded by perf. And
> > likewise for libtraceevent.
> >
> > FYI, I tried TEP_PRINT_INFO but no shot. It turns out TEP_PRINT_INFO
> > stills relies on libtraceevent (i.e., sched_switch_handler() in
> > plugin_sched_switch.c) and we need to parse the print format on our own.
>
> There is a way to unload plugins:
>
>         tep_unload_plugins(t->plugin_list, tep);
>
> Hmm, I should add:
>
>         tep_unload_plugin(tep, t->plugin_list, "plugin-name");
>
> To unload a single plugin.
>
> I it can also just override what the plugin does by calling:
>
> static int sched_switch_handler(struct trace_seq *s,
>                                 struct tep_record *record,
>                                 struct tep_event *event, void *context)
> {
>         // do whatever you want.
> }
>
>         tep_register_event_handler(tep, -1, "sched", "sched_switch",
>                                    sched_switch_handler, NULL);
>
Yes,  I chose to fix libtraceevent in a similar way, to not break
users of this plugin,
like perf script, I decided to build state char mapping dynamically
for both instead
of overriding sched_switch_handler in libtraceevent. Here is the patch:

>From e4fae23d9538e60e75a9776fa7938102e7c26bbb Mon Sep 17 00:00:00 2001
From: Ze Gao <zegao@...cent.com>
Date: Wed, 2 Aug 2023 07:20:46 -0400
Subject: [PATCH] libtraceevent: parse task state from tracepoint print format

As of this writing, we use prev_state to report task state, which
relies on both the task state macros and TASK_STATE_TO_CHAR_STR
copied from the kernel to interpret its actual meaning. In this way,
libtraceevent gets broken literally each time TASK_STATE_TO_CHAR_STR
changes as the kernel evolves. Counting on hardcoded
TASK_STATE_TO_CHAR_STR gurantees no backward compatibilty.

To fix this, we build the state char map from the print format
parsed from date recorded on the fly and removes dependencies on
these internal kernel definitions.

Signed-off-by: Ze Gao <zegao@...cent.com>
---
 plugins/plugin_sched_switch.c | 103 ++++++++++++++++++++++++++++------
 1 file changed, 87 insertions(+), 16 deletions(-)

diff --git a/plugins/plugin_sched_switch.c b/plugins/plugin_sched_switch.c
index 8752cae..a0df138 100644
--- a/plugins/plugin_sched_switch.c
+++ b/plugins/plugin_sched_switch.c
@@ -6,28 +6,25 @@
 #include <stdlib.h>
 #include <string.h>

+#include "event-utils.h"
 #include "event-parse.h"
 #include "trace-seq.h"

-static void write_state(struct trace_seq *s, int val)
-{
-       const char states[] = "SDTtZXxW";
-       int found = 0;
-       int i;
+#define TASK_STATE_MAX 16

-       for (i = 0; i < (sizeof(states) - 1); i++) {
-               if (!(val & (1 << i)))
-                       continue;
+static char state_to_char[TASK_STATE_MAX];
+static unsigned int num_sleeping_states = 0;

-               if (found)
-                       trace_seq_putc(s, '|');

-               found = 1;
-               trace_seq_putc(s, states[i]);
-       }
+static void write_state(struct trace_seq *s, int val)
+{
+       unsigned int bit = val ? ffs(val) : 0;
+       char state;

-       if (!found)
-               trace_seq_putc(s, 'R');
+       state = (!bit || bit > num_sleeping_states) ? 'R' : state_to_char[bit];
+       trace_seq_putc(s, state);
+       if(bit > num_sleeping_states)
+               trace_seq_putc(s, '+');
 }

 static void write_and_save_comm(struct tep_format_field *field,
@@ -79,6 +76,76 @@ static int sched_wakeup_handler(struct trace_seq *s,
        return 0;
 }

+static const struct tep_print_arg* task_state_print_flag(const struct
tep_event *event) {
+
+       struct tep_print_arg* args;
+
+       if (!event)
+               goto out;
+
+       args = event->print_fmt.args;
+       while(args)
+       {
+               if (args->type == TEP_PRINT_FLAGS)
+                       return args;
+               if (args->type == TEP_PRINT_OP) {
+                       args = args->op.right;
+                       args = args->op.left;
+                       continue;
+               }
+               args = args->next;
+       }
+out:
+       return NULL;
+}
+
+static int parse_task_state_arr(const char *value, const char *str)
+{
+       long val;
+       unsigned int bit;
+
+       if (!value || !str)
+               return -1;
+
+       val = strtol(value, NULL, 0);
+       bit = val ? ffs(val) : 0;
+       state_to_char[bit] = str[0];
+       num_sleeping_states++;
+       if (num_sleeping_states > TASK_STATE_MAX - 1) {
+               tep_warning("too many states parsed, possibly bad format\n");
+               return -1;
+       }
+       return 0;
+}
+
+static void parse_print_flag(const struct tep_print_flag_sym* field,
+                            int (*parser)(const char *value, const char *str))
+{
+       int err;
+
+       if (!field || !parser)
+               return;
+       err = parser(field->value, field->str);
+       if (err){
+               tep_warning("parsing print flag failed, possibly bad format\n");
+               return;
+       }
+       if (field->next)
+               parse_print_flag(field->next, parser);
+
+}
+
+static void build_task_state_arr(const struct tep_event *event)
+{
+       const struct tep_print_arg* args;
+
+       args = task_state_print_flag(event);
+       if (!args)
+               tep_warning("print flag not found, possibly bad format\n");
+       else
+               parse_print_flag(args->flags.flags, parse_task_state_arr);
+}
+
 static int sched_switch_handler(struct trace_seq *s,
                                struct tep_record *record,
                                struct tep_event *event, void *context)
@@ -99,8 +166,12 @@ static int sched_switch_handler(struct trace_seq *s,
        if (tep_get_field_val(s, event, "prev_prio", record, &val, 1) == 0)
                trace_seq_printf(s, "[%d] ", (int) val);

-       if (tep_get_field_val(s,  event, "prev_state", record, &val, 1) == 0)
+
+       if (tep_get_field_val(s,  event, "prev_state", record, &val, 1) == 0) {
+               if (!num_sleeping_states)
+                       build_task_state_arr(event);
                write_state(s, val);
+       }

        trace_seq_puts(s, " ==> ");

--
2.41.0



> > Anyway, it works now and I've tested on some perf.data in old formats
> > but not cover all the kernel releases.
> >
> > Thoughts?
>
> I don't maintain the perf code. You'll have to talk with the perf
> maintainers.

Roger that! Thank you very much!

Regards,
Ze

> -- Steve

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ