lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAAfh-jMvgYfDrgcbj7o4bv83syAhL2d+1r2vn1Yz+FY_Ws81Pw@mail.gmail.com>
Date:   Fri, 22 Sep 2023 15:20:37 +0800
From:   Enlin Mu <enlinmu@...il.com>
To:     Petr Mladek <pmladek@...e.com>
Cc:     Greg KH <gregkh@...uxfoundation.org>,
        John Ogness <john.ogness@...utronix.de>,
        Enlin Mu <enlin.mu@...look.com>, rostedt@...dmis.org,
        senozhatsky@...omium.org, keescook@...omium.org,
        tony.luck@...el.com, gpiccoli@...lia.com, enlin.mu@...soc.com,
        linux-kernel@...r.kernel.org, linux-hardening@...r.kernel.org
Subject: Re: [PATCH] printk: add cpu id information to printk() output

Petr Mladek <pmladek@...e.com> 于2023年9月16日周六 00:34写道:
>
> On Fri 2023-09-15 11:53:13, Greg KH wrote:
> > On Fri, Sep 15, 2023 at 04:46:02PM +0800, Enlin Mu wrote:
> > > John Ogness <john.ogness@...utronix.de> 于2023年9月15日周五 16:34写道:
> > > >
> > > > On 2023-09-15, Enlin Mu <enlin.mu@...look.com> wrote:
> > > > > Sometimes we want to print cpu id of printk() messages to consoles
> > > > >
> > > > > diff --git a/include/linux/threads.h b/include/linux/threads.h
> > > > > index c34173e6c5f1..6700bd9a174f 100644
> > > > > --- a/include/linux/threads.h
> > > > > +++ b/include/linux/threads.h
> > > > > @@ -34,6 +34,9 @@
> > > > >  #define PID_MAX_LIMIT (CONFIG_BASE_SMALL ? PAGE_SIZE * 8 : \
> > > > >       (sizeof(long) > 4 ? 4 * 1024 * 1024 : PID_MAX_DEFAULT))
> > > > >
> > > > > +#define CPU_ID_SHIFT 23
> > > > > +#define CPU_ID_MASK  0xff800000
> > > >
> > > > This only supports 256 CPUs. I think it doesn't make sense to try to
> > > > squish CPU and Task IDs into 32 bits.
> > > Yes, it is not good way,
> > > >
> > > > What about introducing a caller_id option to always only print the CPU
> > > > ID? Or do you really need Task _and_ CPU?
> > >    Yes, I need it.Because I need to know which CPU is printing the
> > > log, so that I can identify the current system operation, such as load
> > > situation and CPU busy/idle status
> >
> > The cpu that is printing the log isn't the one that added the log
> > message, so I think you will have incorrect data here, right?
>
> We already store some metadata about the caller:
>
>  * All fields are set by the printk code except for @seq, which is
>  * set by the ringbuffer code.
>  */
> struct printk_info {
>         u64     seq;            /* sequence number */
>         u64     ts_nsec;        /* timestamp in nanoseconds */
>         u16     text_len;       /* length of text message */
>         u8      facility;       /* syslog facility */
>         u8      flags:5;        /* internal record flags */
>         u8      level:3;        /* syslog level */
>         u32     caller_id;      /* thread id or processor id */
>
>         struct dev_printk_info  dev_info;
> };
>
> The 32-bit caller ID is generated using:
>
> static inline u32 printk_caller_id(void)
> {
>         return in_task() ? task_pid_nr(current) :
>                 0x80000000 + smp_processor_id();
> }
>
> We could add more metadata and always store the CPU ID and something
> like:
>
>    [CTXT][ Tpid][  Ccpu]
>
> for example
>
>    [TASK][  T234][    C4]
>    [ IRQ][ T4567][   C17]
>    [SIRQ][    T5][    C0]
>    [ NMI][  T356][  C128]
>
Greate!
Do you have a plan to push it to linus?
>
> The biggest problem is that it would change the format of the
> ringbuffer so that it would require updating external tools,
> working with crashdump, especially crash but there are also
> alternative python extensions for gdb.
>
>
> See below POC of the kernel part. It is not even compile tested. The size
> of the buffers is updated by a guess. Comments are not updated, ...
>
> diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h
> index 7d4979d5c3ce..e3e0a9fdc0cf 100644
> --- a/kernel/printk/internal.h
> +++ b/kernel/printk/internal.h
> @@ -15,7 +15,7 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
>  #ifdef CONFIG_PRINTK
>
>  #ifdef CONFIG_PRINTK_CALLER
> -#define PRINTK_PREFIX_MAX      48
> +#define PRINTK_PREFIX_MAX      64
>  #else
>  #define PRINTK_PREFIX_MAX      32
>  #endif
> diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
> index 7e0b4dd02398..bb393ccbd5e9 100644
> --- a/kernel/printk/printk.c
> +++ b/kernel/printk/printk.c
> @@ -621,16 +621,20 @@ static void append_char(char **pp, char *e, char c)
>                 *(*pp)++ = c;
>  }
>
> +char printk_caller_ctxt_txt[][] = { "TSK", "IRQ", "SIRQ", "NMI"' };
> +
>  static ssize_t info_print_ext_header(char *buf, size_t size,
>                                      struct printk_info *info)
>  {
>         u64 ts_usec = info->ts_nsec;
> -       char caller[20];
> +       char caller[40];
>  #ifdef CONFIG_PRINTK_CALLER
> -       u32 id = info->caller_id;
> +       struct printk_caller_info *ci = info->caller_info;
>
> -       snprintf(caller, sizeof(caller), ",caller=%c%u",
> -                id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
> +       snprintf(caller, sizeof(caller),
> +                ",caller.ctxt=%s,caller.pid=%d,caller.cpu=%d",
> +                printk_caller_ctxt_txt[ci->ctxt],
> +                ci->pid,ci->cpu);
>  #else
>         caller[0] = '\0';
>  #endif
> @@ -996,11 +1000,16 @@ void log_buf_vmcoreinfo_setup(void)
>         VMCOREINFO_OFFSET(prb_data_blk_lpos, begin);
>         VMCOREINFO_OFFSET(prb_data_blk_lpos, next);
>
> +       VMCOREINFO_STRUCT_SIZE(printk_caller_info);
> +       VMCOREINFO_OFFSET(printk_caller_info, ctxt);
> +       VMCOREINFO_OFFSET(printk_caller_info, cpu);
> +       VMCOREINFO_OFFSET(printk_caller_info, pid);
> +
>         VMCOREINFO_STRUCT_SIZE(printk_info);
>         VMCOREINFO_OFFSET(printk_info, seq);
>         VMCOREINFO_OFFSET(printk_info, ts_nsec);
>         VMCOREINFO_OFFSET(printk_info, text_len);
> -       VMCOREINFO_OFFSET(printk_info, caller_id);
> +       VMCOREINFO_OFFSET(printk_info, caller_info);
>         VMCOREINFO_OFFSET(printk_info, dev_info);
>
>         VMCOREINFO_STRUCT_SIZE(dev_printk_info);
> @@ -1111,7 +1120,7 @@ static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
>         dest_r.info->level = r->info->level;
>         dest_r.info->flags = r->info->flags;
>         dest_r.info->ts_nsec = r->info->ts_nsec;
> -       dest_r.info->caller_id = r->info->caller_id;
> +       dest_r.info->caller_info = r->info->caller_info;
>         memcpy(&dest_r.info->dev_info, &r->info->dev_info, sizeof(dest_r.info->dev_info));
>
>         prb_final_commit(&e);
> @@ -1329,13 +1338,16 @@ static size_t print_time(u64 ts, char *buf)
>  }
>
>  #ifdef CONFIG_PRINTK_CALLER
> -static size_t print_caller(u32 id, char *buf)
> +static size_t print_caller_info(struct printk_caller_info *ci, char *buf)
>  {
> -       char caller[12];
> +       char pid[12];
> +       char cpu[12];
>
> -       snprintf(caller, sizeof(caller), "%c%u",
> -                id & 0x80000000 ? 'C' : 'T', id & ~0x80000000);
> -       return sprintf(buf, "[%6s]", caller);
> +       snprintf(pid, sizeof(pid), "T%u", ci->pid);
> +       snprintf(cpu, sizeof(cpu), "C%u", ci->cpu);
> +
> +       return sprintf(buf, "[%s][%6s][%6s]",
> +                      printk_caller_ctxt_txt[ci->ctxt], pid, cpu);
>  }
>  #else
>  #define print_caller(id, buf) 0
> diff --git a/kernel/printk/printk_ringbuffer.c b/kernel/printk/printk_ringbuffer.c
> index fde338606ce8..66be9b47be46 100644
> --- a/kernel/printk/printk_ringbuffer.c
> +++ b/kernel/printk/printk_ringbuffer.c
> @@ -431,7 +431,8 @@ static enum desc_state get_desc_state(unsigned long id,
>   */
>  static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
>                                  unsigned long id, struct prb_desc *desc_out,
> -                                u64 *seq_out, u32 *caller_id_out)
> +                                u64 *seq_out,
> +                                struct printk_caller_info *ci_out)
>  {
>         struct printk_info *info = to_info(desc_ring, id);
>         struct prb_desc *desc = to_desc(desc_ring, id);
> @@ -480,8 +481,8 @@ static enum desc_state desc_read(struct prb_desc_ring *desc_ring,
>         }
>         if (seq_out)
>                 *seq_out = info->seq; /* also part of desc_read:C */
> -       if (caller_id_out)
> -               *caller_id_out = info->caller_id; /* also part of desc_read:C */
> +       if (caller_info_out)
> +               *ci_out = info->caller_info; /* also part of desc_read:C */
>
>         /*
>          * 1. Guarantee the descriptor content is loaded before re-checking
> @@ -1251,14 +1252,22 @@ static const char *get_data(struct prb_data_ring *data_ring,
>         return &db->data[0];
>  }
>
> +static bool same_caller(struct printk_caller_info *cur_ci,
> +                       struct printk_caller_info *last_ci)
> +{
> +       return (cur_ci->cpu == last_ci->cpu &&
> +               cur_ci->pid == last_ci->pid &&
> +               cur_ci->ctxt == last_ci->ctxt)
>  /*
>   * Attempt to transition the newest descriptor from committed back to reserved
>   * so that the record can be modified by a writer again. This is only possible
>   * if the descriptor is not yet finalized and the provided @caller_id matches.
>   */
>  static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
> -                                        u32 caller_id, unsigned long *id_out)
> +                                        struct printk_caller_info *ci,
> +                                        unsigned long *id_out)
>  {
> +       struct printk_caller_info last_ci;
>         unsigned long prev_state_val;
>         enum desc_state d_state;
>         struct prb_desc desc;
> @@ -1272,8 +1281,8 @@ static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
>          * To reduce unnecessarily reopening, first check if the descriptor
>          * state and caller ID are correct.
>          */
> -       d_state = desc_read(desc_ring, id, &desc, NULL, &cid);
> -       if (d_state != desc_committed || cid != caller_id)
> +       d_state = desc_read(desc_ring, id, &desc, NULL, &last_ci);
> +       if (d_state != desc_committed || !same_caller(&last_ci, ci))
>                 return NULL;
>
>         d = to_desc(desc_ring, id);
> @@ -1348,7 +1357,7 @@ static struct prb_desc *desc_reopen_last(struct prb_desc_ring *desc_ring,
>   *            @r->info->text_len after concatenating.
>   */
>  bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer *rb,
> -                        struct printk_record *r, u32 caller_id, unsigned int max_size)
> +                        struct printk_record *r, struct printk_caller_info *ci, unsigned int max_size)
>  {
>         struct prb_desc_ring *desc_ring = &rb->desc_ring;
>         struct printk_info *info;
> @@ -1359,7 +1368,7 @@ bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer
>         local_irq_save(e->irqflags);
>
>         /* Transition the newest descriptor back to the reserved state. */
> -       d = desc_reopen_last(desc_ring, caller_id, &id);
> +       d = desc_reopen_last(desc_ring, ci, &id);
>         if (!d) {
>                 local_irq_restore(e->irqflags);
>                 goto fail_reopen;
> @@ -1377,11 +1386,11 @@ bool prb_reserve_in_last(struct prb_reserved_entry *e, struct printk_ringbuffer
>         e->id = id;
>
>         /*
> -        * desc_reopen_last() checked the caller_id, but there was no
> +        * desc_reopen_last() checked the caller_info, but there was no
>          * exclusive access at that point. The descriptor may have
>          * changed since then.
>          */
> -       if (caller_id != info->caller_id)
> +       if (!match(ci, &info->caller_info)
>                 goto fail;
>
>         if (BLK_DATALESS(&d->text_blk_lpos)) {
> diff --git a/kernel/printk/printk_ringbuffer.h b/kernel/printk/printk_ringbuffer.h
> index 18cd25e489b8..9934353965d4 100644
> --- a/kernel/printk/printk_ringbuffer.h
> +++ b/kernel/printk/printk_ringbuffer.h
> @@ -6,6 +6,18 @@
>  #include <linux/atomic.h>
>  #include <linux/dev_printk.h>
>
> +#define PRINTK_CTXT_TASK       0
> +#define PRINTK_CTXT_HARDIRQ    1
> +#define PRINTK_CTXT_SOFTIRQ    2
> +#define PRINTK_CTXT_NMI                3
> +
> +
> +struct printk_caller_info {
> +       u32     ctxt:2;         /* context */
> +       u32     cpu:24;         /* CPU number */
> +       u32     pid;            /* PID of the process */
> +};
> +
>  /*
>   * Meta information about each stored message.
>   *
> @@ -19,8 +31,7 @@ struct printk_info {
>         u8      facility;       /* syslog facility */
>         u8      flags:5;        /* internal record flags */
>         u8      level:3;        /* syslog level */
> -       u32     caller_id;      /* thread id or processor id */
> -
> +       struct printk_caller_info caller_info;
>         struct dev_printk_info  dev_info;
>  };
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ