lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <22482.1341471787@neuling.org>
Date:	Thu, 05 Jul 2012 17:03:07 +1000
From:	Michael Neuling <mikey@...ling.org>
To:	Kay Sievers <kay@...y.org>
cc:	linuxppc-dev@...abs.org,
	Linus Torvalds <torvalds@...ux-foundation.org>,
	Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
	Andrew Morton <akpm@...ux-foundation.org>,
	Steven Rostedt <rostedt@...dmis.org>,
	LKML <linux-kernel@...r.kernel.org>, Ingo Molnar <mingo@...e.hu>,
	Wu Fengguang <fengguang.wu@...el.com>,
	Joe Perches <joe@...ches.com>,
	"Paul E. McKenney" <paulmck@...ibm.com>
Subject: Re: [PATCH v3] printk: Have printk() never buffer its data

> On Mon, 2012-06-25 at 18:40 -0700, Linus Torvalds wrote:
> > On Mon, Jun 25, 2012 at 5:56 PM, Kay Sievers <kay@...y.org> wrote:
> > >
> > > Buffering has nice effects though:
> > > It makes continuation lines appear as one record in the buffer, not as
> > > n individual prints with n headers.
> > 
> > As I already mentioned, buffering for *logging* is different from
> > buffering for *printing*.
> > 
> > I think it might be a great idea to buffer for logging in order to
> > generate one individual buffer record there.
> > 
> > But it needs to be printed as it is generated.
> 
> That's a good idea.
> 
> Something like this could work - only minimally tested at this moment.

This breaks some powerpc configs and is in Linus' tree now as
084681d14e.

When we have printks without a newline (like show_regs()), it
sometimes:
1) drops the console output for that line (dmesg is fine).  Patch to fix
   this below.
2) adds a newline unnecessary to both console and dmesg.  I have no fix
   for this currently.
Reverting this patch fixes both problems.

Post rc4 is way to late to be adding largely untested features like
this.  

I suggest we revert this until the next merge window.

Mikey

diff --git a/kernel/printk.c b/kernel/printk.c
index dba1821..91659da 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1335,7 +1335,7 @@ static void cont_flush(void)
        if (cont.len == 0)
                return;
 
-       log_store(cont.facility, cont.level, LOG_NOCONS, cont.ts_nsec,
+       log_store(cont.facility, cont.level, LOG_DEFAULT, cont.ts_nsec,
                  NULL, 0, cont.buf, cont.len);
 
        cont.flushed = true;



> 
> 
> From: Kay Sievers <kay@...y.org>
> Subject: printk: flush continuation lines immediately to console
> 
> Continuation lines are buffered internally to merge the chunked
> printk()s into a single record, and to isolate potentially racy
> continuation users from usual terminated line users.
> 
> This though, has the effect that partial lines are not printed to
> the console in the moment they are emitted. In case the kernel
> crashes in the meantime, the potentially interesting printed
> information will never reach the consoles.
> 
> Here we share the continuation buffer with the console copy logic,
> and partial lines are immediately flushed to the consoles, but still
> buffered internally to improve the integrity of the messages.
> ---
> 
>  kernel/printk.c |  129 +++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 101 insertions(+), 28 deletions(-)
> 
> diff --git a/kernel/printk.c b/kernel/printk.c
> index a2276b9..92c34bd1 100644
> --- a/kernel/printk.c
> +++ b/kernel/printk.c
> @@ -193,12 +193,19 @@ static int console_may_schedule;
>   * separated by ',', and find the message after the ';' character.
>   */
>  
> +enum log_flags {
> +	LOG_DEFAULT = 0,
> +	LOG_NOCONS = 1,		/* already flushed, do not print to console */
> +};
> +
>  struct log {
>  	u64 ts_nsec;		/* timestamp in nanoseconds */
>  	u16 len;		/* length of entire record */
>  	u16 text_len;		/* length of text buffer */
>  	u16 dict_len;		/* length of dictionary buffer */
> -	u16 level;		/* syslog level + facility */
> +	u8 facility;		/* syslog facility */
> +	u8 flags:5;		/* internal record flags */
> +	u8 level:3;		/* syslog level */
>  };
>  
>  /*
> @@ -286,6 +293,7 @@ static u32 log_next(u32 idx)
>  
>  /* insert record into the buffer, discard old ones, update heads */
>  static void log_store(int facility, int level,
> +		      enum log_flags flags, u64 ts_nsec,
>  		      const char *dict, u16 dict_len,
>  		      const char *text, u16 text_len)
>  {
> @@ -329,8 +337,13 @@ static void log_store(int facility, int level,
>  	msg->text_len = text_len;
>  	memcpy(log_dict(msg), dict, dict_len);
>  	msg->dict_len = dict_len;
> -	msg->level = (facility << 3) | (level & 7);
> -	msg->ts_nsec = local_clock();
> +	msg->facility = facility;
> +	msg->level = level & 7;
> +	msg->flags = flags & 0x1f;
> +	if (ts_nsec > 0)
> +		msg->ts_nsec = ts_nsec;
> +	else
> +		msg->ts_nsec = local_clock();
>  	memset(log_dict(msg) + dict_len, 0, pad_len);
>  	msg->len = sizeof(struct log) + text_len + dict_len + pad_len;
>  
> @@ -446,7 +459,7 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf,
>  	ts_usec = msg->ts_nsec;
>  	do_div(ts_usec, 1000);
>  	len = sprintf(user->buf, "%u,%llu,%llu;",
> -		      msg->level, user->seq, ts_usec);
> +		      (msg->facility << 3) | msg->level, user->seq, ts_usec);
>  
>  	/* escape non-printable characters */
>  	for (i = 0; i < msg->text_len; i++) {
> @@ -787,6 +800,21 @@ static bool printk_time;
>  #endif
>  module_param_named(time, printk_time, bool, S_IRUGO | S_IWUSR);
>  
> +static size_t print_time(u64 ts, char *buf)
> +{
> +	unsigned long rem_nsec;
> +
> +	if (!printk_time)
> +		return 0;
> +
> +	if (!buf)
> +		return 15;
> +
> +	rem_nsec = do_div(ts, 1000000000);
> +	return sprintf(buf, "[%5lu.%06lu] ",
> +		       (unsigned long)ts, rem_nsec / 1000);
> +}
> +
>  static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
>  {
>  	size_t len = 0;
> @@ -803,18 +831,7 @@ static size_t print_prefix(const struct log *msg, bool syslog, char *buf)
>  		}
>  	}
>  
> -	if (printk_time) {
> -		if (buf) {
> -			unsigned long long ts = msg->ts_nsec;
> -			unsigned long rem_nsec = do_div(ts, 1000000000);
> -
> -			len += sprintf(buf + len, "[%5lu.%06lu] ",
> -					 (unsigned long) ts, rem_nsec / 1000);
> -		} else {
> -			len += 15;
> -		}
> -	}
> -
> +	len += print_time(msg->ts_nsec, buf ? buf + len : NULL);
>  	return len;
>  }
>  
> @@ -1272,15 +1289,25 @@ static inline void printk_delay(void)
>  	}
>  }
>  
> +/*
> + * Continuation lines are buffered, and not committed to the record buffer
> + * until the line is complete, or a race forces a flush. The line fragments
> + * though, are printed immediately to the consoles to ensure everything has
> + * reached the console in case of a kernel crash.
> + */
> +static char cont_buf[LOG_LINE_MAX];
> +static size_t cont_len;
> +static size_t cont_cons;
> +static size_t cont_cons_len;
> +static int cont_level;
> +static u64 cont_ts_nsec;
> +static struct task_struct *cont_task;
> +
>  asmlinkage int vprintk_emit(int facility, int level,
>  			    const char *dict, size_t dictlen,
>  			    const char *fmt, va_list args)
>  {
>  	static int recursion_bug;
> -	static char cont_buf[LOG_LINE_MAX];
> -	static size_t cont_len;
> -	static int cont_level;
> -	static struct task_struct *cont_task;
>  	static char textbuf[LOG_LINE_MAX];
>  	char *text = textbuf;
>  	size_t text_len;
> @@ -1326,7 +1353,8 @@ asmlinkage int vprintk_emit(int facility, int level,
>  		recursion_bug = 0;
>  		printed_len += strlen(recursion_msg);
>  		/* emit KERN_CRIT message */
> -		log_store(0, 2, NULL, 0, recursion_msg, printed_len);
> +		log_store(0, 2, LOG_DEFAULT, 0,
> +			  NULL, 0, recursion_msg, printed_len);
>  	}
>  
>  	/*
> @@ -1369,13 +1397,17 @@ asmlinkage int vprintk_emit(int facility, int level,
>  			 * Flush earlier buffer, which is either from a
>  			 * different thread, or when we got a new prefix.
>  			 */
> -			log_store(facility, cont_level, NULL, 0, cont_buf, cont_len);
> +			log_store(facility, cont_level,
> +				  LOG_NOCONS, cont_ts_nsec,
> +				  NULL, 0, cont_buf, cont_len);
>  			cont_len = 0;
>  		}
>  
>  		if (!cont_len) {
> +			cont_cons = 0;
>  			cont_level = level;
>  			cont_task = current;
> +			cont_ts_nsec = local_clock();
>  		}
>  
>  		/* buffer or append to earlier buffer from the same thread */
> @@ -1383,6 +1415,8 @@ asmlinkage int vprintk_emit(int facility, int level,
>  			text_len = sizeof(cont_buf) - cont_len;
>  		memcpy(cont_buf + cont_len, text, text_len);
>  		cont_len += text_len;
> +		cont_cons_len = cont_len;
> +		printed_len = text_len;
>  	} else {
>  		if (cont_len && cont_task == current) {
>  			if (prefix) {
> @@ -1392,6 +1426,7 @@ asmlinkage int vprintk_emit(int facility, int level,
>  				 * with an interrupt.
>  				 */
>  				log_store(facility, cont_level,
> +					  LOG_NOCONS, cont_ts_nsec,
>  					  NULL, 0, cont_buf, cont_len);
>  				cont_len = 0;
>  			}
> @@ -1401,14 +1436,15 @@ asmlinkage int vprintk_emit(int facility, int level,
>  				text_len = sizeof(cont_buf) - cont_len;
>  			memcpy(cont_buf + cont_len, text, text_len);
>  			cont_len += text_len;
> +			cont_cons_len = cont_len;
>  			log_store(facility, cont_level,
> +				  LOG_NOCONS, cont_ts_nsec,
>  				  NULL, 0, cont_buf, cont_len);
>  			cont_len = 0;
> -			cont_task = NULL;
> -			printed_len = cont_len;
> +			printed_len = text_len;
>  		} else {
>  			/* ordinary single and terminated line */
> -			log_store(facility, level,
> +			log_store(facility, level, LOG_DEFAULT, 0,
>  				  dict, dictlen, text, text_len);
>  			printed_len = text_len;
>  		}
> @@ -1795,6 +1831,7 @@ static u32 console_idx;
>   */
>  void console_unlock(void)
>  {
> +	static char text[LOG_LINE_MAX];
>  	static u64 seen_seq;
>  	unsigned long flags;
>  	bool wake_klogd = false;
> @@ -1807,10 +1844,37 @@ void console_unlock(void)
>  
>  	console_may_schedule = 0;
>  
> +	/* flush buffered message fragment immediately to console */
> +	raw_spin_lock_irqsave(&logbuf_lock, flags);
> +	if (cont_cons_len && (cont_cons < cont_cons_len || !cont_len)) {
> +		size_t textlen = 0;
> +		size_t len;
> +
> +		len = cont_cons_len - cont_cons;
> +		if (!cont_cons)
> +			textlen = print_time(cont_ts_nsec, text);
> +		memcpy(text + textlen, cont_buf + cont_cons, len);
> +		textlen += len;
> +		cont_cons = cont_cons_len;
> +
> +		if (!cont_len) {
> +			/* last chunk of line; terminate */
> +			text[textlen++] = '\n';
> +			cont_cons_len = 0;
> +		}
> +		raw_spin_unlock(&logbuf_lock);
> +
> +		stop_critical_timings();
> +		call_console_drivers(cont_level, text, textlen);
> +		start_critical_timings();
> +
> +		local_irq_restore(flags);
> +	} else
> +		raw_spin_unlock_irqrestore(&logbuf_lock, flags);
> +
>  again:
>  	for (;;) {
>  		struct log *msg;
> -		static char text[LOG_LINE_MAX];
>  		size_t len;
>  		int level;
>  
> @@ -1825,13 +1889,22 @@ again:
>  			console_seq = log_first_seq;
>  			console_idx = log_first_idx;
>  		}
> -
> +skip:
>  		if (console_seq == log_next_seq)
>  			break;
>  
>  		msg = log_from_idx(console_idx);
> -		level = msg->level & 7;
> +		if (msg->flags & LOG_NOCONS) {
> +			/*
> +			 * Skip record we have buffered and already printed
> +			 * directly to the console when we received it.
> +			 */
> +			console_idx = log_next(console_idx);
> +			console_seq++;
> +			goto skip;
> +		}
>  
> +		level = msg->level;
>  		len = msg_print_text(msg, false, text, sizeof(text));
>  
>  		console_idx = log_next(console_idx);
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ