lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87sgeh3m5j.fsf@jogness.linutronix.de>
Date:   Fri, 26 Jun 2020 17:02:48 +0200
From:   John Ogness <john.ogness@...utronix.de>
To:     Petr Mladek <pmladek@...e.com>
Cc:     Peter Zijlstra <peterz@...radead.org>,
        Sergey Senozhatsky <sergey.senozhatsky.work@...il.com>,
        Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
        Steven Rostedt <rostedt@...dmis.org>,
        Linus Torvalds <torvalds@...ux-foundation.org>,
        Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
        Andrea Parri <parri.andrea@...il.com>,
        Thomas Gleixner <tglx@...utronix.de>,
        Paul McKenney <paulmck@...nel.org>, kexec@...ts.infradead.org,
        linux-kernel@...r.kernel.org
Subject: Re: buffer allocation: was: [PATCH v3 3/3] printk: use the lockless ringbuffer

On 2020-06-25, Petr Mladek <pmladek@...e.com> wrote:
>> --- a/kernel/printk/printk.c
>> +++ b/kernel/printk/printk.c
>> @@ -1176,11 +1068,46 @@ static void __init set_percpu_data_ready(void)
>>  	__printk_percpu_data_ready = true;
>>  }
>>  
>> +static unsigned int __init add_to_rb(struct printk_ringbuffer *rb,
>> +				     struct printk_record *r)
>> +{
>> +	struct prb_reserved_entry e;
>> +	struct printk_record dest_r;
>> +
>> +	prb_rec_init_wr(&dest_r, r->info->text_len, r->info->dict_len);
>> +
>> +	if (!prb_reserve(&e, rb, &dest_r))
>> +		return 0;
>> +
>> +	memcpy(&dest_r.text_buf[0], &r->text_buf[0], dest_r.text_buf_size);
>> +	if (dest_r.dict_buf) {
>> +		memcpy(&dest_r.dict_buf[0], &r->dict_buf[0],
>> +		       dest_r.dict_buf_size);
>> +	}
>> +	dest_r.info->facility = r->info->facility;
>> +	dest_r.info->level = r->info->level;
>> +	dest_r.info->flags = r->info->flags;
>> +	dest_r.info->ts_nsec = r->info->ts_nsec;
>> +	dest_r.info->caller_id = r->info->caller_id;
>> +
>> +	prb_commit(&e);
>> +
>> +	return prb_record_text_space(&e);
>> +}
>> +
>> +static char setup_text_buf[CONSOLE_EXT_LOG_MAX] __initdata;
>> +static char setup_dict_buf[CONSOLE_EXT_LOG_MAX] __initdata;
>> +
>>  void __init setup_log_buf(int early)
>>  {
>> +	struct prb_desc *new_descs;
>> +	struct printk_info info;
>> +	struct printk_record r;
>>  	unsigned long flags;
>> +	char *new_dict_buf;
>>  	char *new_log_buf;
>>  	unsigned int free;
>> +	u64 seq;
>>  
>>  	/*
>>  	 * Some archs call setup_log_buf() multiple times - first is very
>> @@ -1201,17 +1128,50 @@ void __init setup_log_buf(int early)
>>  
>>  	new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
>>  	if (unlikely(!new_log_buf)) {
>> -		pr_err("log_buf_len: %lu bytes not available\n",
>> +		pr_err("log_buf_len: %lu text bytes not available\n",
>>  			new_log_buf_len);
>>  		return;
>>  	}
>>  
>> +	new_dict_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
>> +	if (unlikely(!new_dict_buf)) {
>> +		/* dictionary failure is allowed */
>> +		pr_err("log_buf_len: %lu dict bytes not available\n",
>> +			new_log_buf_len);
>> +	}
>> +
>> +	new_descs = memblock_alloc((new_log_buf_len >> PRB_AVGBITS) *
>> +				   sizeof(struct prb_desc), LOG_ALIGN);
>> +	if (unlikely(!new_descs)) {
>> +		pr_err("log_buf_len: %lu desc bytes not available\n",
>> +			new_log_buf_len >> PRB_AVGBITS);
>> +		if (new_dict_buf)
>> +			memblock_free(__pa(new_dict_buf), new_log_buf_len);
>> +		memblock_free(__pa(new_log_buf), new_log_buf_len);
>> +		return;
>> +	}
>> +
>> +	prb_rec_init_rd(&r, &info,
>> +			&setup_text_buf[0], sizeof(setup_text_buf),
>> +			&setup_dict_buf[0], sizeof(setup_dict_buf));
>> +
>>  	logbuf_lock_irqsave(flags);
>> +
>> +	prb_init(&printk_rb_dynamic,
>> +		 new_log_buf, bits_per(new_log_buf_len) - 1,
>> +		 new_dict_buf, bits_per(new_log_buf_len) - 1,
>
> This does not check whether new_dict_buf was really allocated.

Thank you.

> I suggest to make it easy and switch to the new buffers only when
> all three could get allocated.

Agreed.

> Well, I still feel a bit uneasy about these PRB_AVGBITS operations,
> including new_log_buf_len >> PRB_AVGBITS used above.
>
> A more safe design would be to add some sanity checks at the beginning
> of the function. And maybe convert new_log_buf_let to number of bits.
> Then operate with the number of bits in the rest of the function.  It
> might be easier to make sure that we are on the safe side.

I will clean it up with additional checks and temporary variables.

>>  	log_buf_len = new_log_buf_len;
>>  	log_buf = new_log_buf;
>>  	new_log_buf_len = 0;
>> -	free = __LOG_BUF_LEN - log_next_idx;
>> -	memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
>> +
>> +	free = __LOG_BUF_LEN;
>> +	prb_for_each_record(0, &printk_rb_static, seq, &r)
>> +		free -= add_to_rb(&printk_rb_dynamic, &r);
>> +
>> +	prb = &printk_rb_dynamic;
>
> This might deserve a comment that this is safe (no lost message)
> because it is called early enough when everything is still running
> on the boot CPU.

I will add a comment and an extra check to make sure.

Once everything is lockless, new messages could appear (for example, due
to NMI messages). The simple check should probably change to a loop. But
let us not worry about that at this point.

Below is a new version of the relevant patch snippets against mainline
just to show you how I plan to make it look for the next version.

John Ogness

--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ @@
+#define PRB_AVGBITS 5	/* 32 character average length */
+
+#if CONFIG_LOG_BUF_SHIFT <= PRB_AVGBITS
+#error CONFIG_LOG_BUF_SHIFT value too small.
+#endif
+_DEFINE_PRINTKRB(printk_rb_static, CONFIG_LOG_BUF_SHIFT - PRB_AVGBITS,
+		 PRB_AVGBITS, PRB_AVGBITS, &__log_buf[0]);
+
@@ @@
 void __init setup_log_buf(int early)
 {
+	unsigned int new_descs_count;
+	struct prb_desc *new_descs;
+	struct printk_info info;
+	struct printk_record r;
+	size_t new_descs_size;
 	unsigned long flags;
+	char *new_dict_buf;
 	char *new_log_buf;
 	unsigned int free;
+	u64 seq;
 
 	/*
 	 * Some archs call setup_log_buf() multiple times - first is very
@@ @@ void __init setup_log_buf(int early)
 	if (!new_log_buf_len)
 		return;
 
+	new_descs_count = new_log_buf_len >> PRB_AVGBITS;
+	if (new_descs_count == 0) {
+		pr_err("new_log_buf_len: %lu too small\n", new_log_buf_len);
+		return;
+	}
+
 	new_log_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
 	if (unlikely(!new_log_buf)) {
-		pr_err("log_buf_len: %lu bytes not available\n",
-			new_log_buf_len);
+		pr_err("log_buf_len: %lu text bytes not available\n",
+		       new_log_buf_len);
+		return;
+	}
+
+	new_dict_buf = memblock_alloc(new_log_buf_len, LOG_ALIGN);
+	if (unlikely(!new_dict_buf)) {
+		pr_err("log_buf_len: %lu dict bytes not available\n",
+		       new_log_buf_len);
+		memblock_free(__pa(new_log_buf), new_log_buf_len);
 		return;
 	}
 
+	new_descs_size = new_descs_count * sizeof(struct prb_desc);
+	new_descs = memblock_alloc(new_descs_size, LOG_ALIGN);
+	if (unlikely(!new_descs)) {
+		pr_err("log_buf_len: %lu desc bytes not available\n",
+		       new_descs_size);
+		memblock_free(__pa(new_dict_buf), new_log_buf_len);
+		memblock_free(__pa(new_log_buf), new_log_buf_len);
+		return;
+	}
+
+	prb_rec_init_rd(&r, &info,
+			&setup_text_buf[0], sizeof(setup_text_buf),
+			&setup_dict_buf[0], sizeof(setup_dict_buf));
+
+	prb_init(&printk_rb_dynamic,
+		 new_log_buf, order_base_2(new_log_buf_len),
+		 new_dict_buf, order_base_2(new_log_buf_len),
+		 new_descs, order_base_2(new_descs_count));
+
 	logbuf_lock_irqsave(flags);
+
 	log_buf_len = new_log_buf_len;
 	log_buf = new_log_buf;
 	new_log_buf_len = 0;
-	free = __LOG_BUF_LEN - log_next_idx;
-	memcpy(log_buf, __log_buf, __LOG_BUF_LEN);
+
+	free = __LOG_BUF_LEN;
+	prb_for_each_record(0, &printk_rb_static, seq, &r)
+		free -= add_to_rb(&printk_rb_dynamic, &r);
+
+	/*
+	 * This is early enough that everything is still running on the
+	 * boot CPU and interrupts are disabled. So no new messages will
+	 * appear during this switch.
+	 */
+	prb = &printk_rb_dynamic;
+
 	logbuf_unlock_irqrestore(flags);
 
+	/* Verify no messages were dropped. */
+	if (seq != prb_next_seq(&printk_rb_static)) {
+		pr_err("dropped %llu messages\n",
+		       prb_next_seq(&printk_rb_static) - seq);
+	}
+
 	pr_info("log_buf_len: %u bytes\n", log_buf_len);
 	pr_info("early log buf free: %u(%u%%)\n",
 		free, (free * 100) / __LOG_BUF_LEN);

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ