lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <02ac44b5-e48c-2abc-9e1c-d49d8d3465c9@acm.org>
Date:   Fri, 17 Nov 2017 08:47:35 -0600
From:   Corey Minyard <minyard@....org>
To:     Masamitsu Yamazaki <m-yamazaki@...jp.nec.com>,
        "openipmi-developer@...ts.sourceforge.net" 
        <openipmi-developer@...ts.sourceforge.net>
Cc:     Junichi Nomura <j-nomura@...jp.nec.com>,
        Kiyoshi Ueda <k-ueda@...jp.nec.com>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
Subject: Re: [Openipmi-developer] [PATCH] ipmi: Stop timers before cleaning up
 the module

On 11/15/2017 01:33 AM, Masamitsu Yamazaki wrote:
> System may crash after unloading ipmi_si.ko module
> because a timer may remain and fire after the module cleaned up resources.

This is a big improvement, thanks.  I'll work on getting this into 4.15.

Thanks,

-corey

> cleanup_one_si() contains the following processing.
>
>          /*
>           * Make sure that interrupts, the timer and the thread are
>           * stopped and will not run again.
>           */
>          if (to_clean->irq_cleanup)
>                  to_clean->irq_cleanup(to_clean);
>          wait_for_timer_and_thread(to_clean);
>
>          /*
>           * Timeouts are stopped, now make sure the interrupts are off
>           * in the BMC.  Note that timers and CPU interrupts are off,
>           * so no need for locks.
>           */
>          while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
>                  poll(to_clean);
>                  schedule_timeout_uninterruptible(1);
>          }
>          
> si_state changes as following in the while loop calling poll(to_clean).
>
>    SI_GETTING_MESSAGES
>      => SI_CHECKING_ENABLES
>       => SI_SETTING_ENABLES
>        => SI_GETTING_EVENTS
>         => SI_NORMAL
>
> As written in the code comments above,
> timers are expected to stop before the polling loop and not to run again.
> But the timer is set again in the following process
> when si_state becomes SI_SETTING_ENABLES.
>
>    => poll
>       => smi_event_handler
>         => handle_transaction_done
>            // smi_info->si_state == SI_SETTING_ENABLES
>           => start_getting_events
>             => start_new_msg
>              => smi_mod_timer
>                => mod_timer
>
> As a result, before the timer set in start_new_msg() expires,
> the polling loop may see si_state becoming SI_NORMAL
> and the module clean-up finishes.
>
> For example, hard LOCKUP and panic occurred as following.
> smi_timeout was called after smi_event_handler,
> kcs_event and hangs at port_inb()
> trying to access I/O port after release.
>
> #11 [ffff88069fdc5ef0] end_repeat_nmi at ffffffff816ac8d3
>      [exception RIP: port_inb+19]
>      RIP: ffffffffc0473053  RSP: ffff88069fdc3d80  RFLAGS: 00000006
>      RAX: ffff8806800f8e00  RBX: ffff880682bd9400  RCX: 0000000000000000
>      RDX: 0000000000000ca3  RSI: 0000000000000ca3  RDI: ffff8806800f8e40
>      RBP: ffff88069fdc3d80   R8: ffffffff81d86dfc   R9: ffffffff81e36426
>      R10: 00000000000509f0  R11: 0000000000100000  R12: 0000000000]:000000
>      R13: 0000000000000000  R14: 0000000000000246  R15: ffff8806800f8e00
>      ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0000
> --- <NMI exception stack> ---
> #12 [ffff88069fdc3d80] port_inb at ffffffffc0473053 [ipmi_si]
> #13 [ffff88069fdc3d88] kcs_event at ffffffffc0477952 [ipmi_si]
> #14 [ffff88069fdc3db0] smi_event_handler at ffffffffc047465d [ipmi_si]
> #15 [ffff88069fdc3df0] smi_timeout at ffffffffc0474f9e [ipmi_si]
>
> To fix the problem I defined a flag, timer_can_start,
> as member of struct smi_info.
> The flag is enabled immediately after initializing the timer
> and disabled immediately before waiting for timer deletion.
>
> Fixes: 0cfec916e86d ("ipmi: Start the timer and thread on internal msgs")
> Signed-off-by: Yamazaki Masamitsu <m-yamazaki@...jp.nec.com>
>
> ----
>
> diff -Nurp a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
> --- a/drivers/char/ipmi/ipmi_si_intf.c	2017-11-09 15:00:31.436926440 +0900
> +++ b/drivers/char/ipmi/ipmi_si_intf.c	2017-11-13 14:14:02.399051610 +0900
> @@ -242,6 +242,9 @@ struct smi_info {
>   	/* The timer for this si. */
>   	struct timer_list   si_timer;
>   
> +	/* This flag is set, if the timer can be set */
> +	bool		    timer_can_start;
> +
>   	/* This flag is set, if the timer is running (timer_pending() isn't enough) */
>   	bool		    timer_running;
>   
> @@ -417,6 +420,8 @@ out:
>   
>   static void smi_mod_timer(struct smi_info *smi_info, unsigned long new_val)
>   {
> +	if (!smi_info->timer_can_start)
> +		return;
>   	smi_info->last_timeout_jiffies = jiffies;
>   	mod_timer(&smi_info->si_timer, new_val);
>   	smi_info->timer_running = true;
> @@ -436,21 +441,18 @@ static void start_new_msg(struct smi_inf
>   	smi_info->handlers->start_transaction(smi_info->si_sm, msg, size);
>   }
>   
> -static void start_check_enables(struct smi_info *smi_info, bool start_timer)
> +static void start_check_enables(struct smi_info *smi_info)
>   {
>   	unsigned char msg[2];
>   
>   	msg[0] = (IPMI_NETFN_APP_REQUEST << 2);
>   	msg[1] = IPMI_GET_BMC_GLOBAL_ENABLES_CMD;
>   
> -	if (start_timer)
> -		start_new_msg(smi_info, msg, 2);
> -	else
> -		smi_info->handlers->start_transaction(smi_info->si_sm, msg, 2);
> +	start_new_msg(smi_info, msg, 2);
>   	smi_info->si_state = SI_CHECKING_ENABLES;
>   }
>   
> -static void start_clear_flags(struct smi_info *smi_info, bool start_timer)
> +static void start_clear_flags(struct smi_info *smi_info)
>   {
>   	unsigned char msg[3];
>   
> @@ -459,10 +461,7 @@ static void start_clear_flags(struct smi
>   	msg[1] = IPMI_CLEAR_MSG_FLAGS_CMD;
>   	msg[2] = WDT_PRE_TIMEOUT_INT;
>   
> -	if (start_timer)
> -		start_new_msg(smi_info, msg, 3);
> -	else
> -		smi_info->handlers->start_transaction(smi_info->si_sm, msg, 3);
> +	start_new_msg(smi_info, msg, 3);
>   	smi_info->si_state = SI_CLEARING_FLAGS;
>   }
>   
> @@ -497,11 +496,11 @@ static void start_getting_events(struct
>    * Note that we cannot just use disable_irq(), since the interrupt may
>    * be shared.
>    */
> -static inline bool disable_si_irq(struct smi_info *smi_info, bool start_timer)
> +static inline bool disable_si_irq(struct smi_info *smi_info)
>   {
>   	if ((smi_info->irq) && (!smi_info->interrupt_disabled)) {
>   		smi_info->interrupt_disabled = true;
> -		start_check_enables(smi_info, start_timer);
> +		start_check_enables(smi_info);
>   		return true;
>   	}
>   	return false;
> @@ -511,7 +510,7 @@ static inline bool enable_si_irq(struct
>   {
>   	if ((smi_info->irq) && (smi_info->interrupt_disabled)) {
>   		smi_info->interrupt_disabled = false;
> -		start_check_enables(smi_info, true);
> +		start_check_enables(smi_info);
>   		return true;
>   	}
>   	return false;
> @@ -529,7 +528,7 @@ static struct ipmi_smi_msg *alloc_msg_ha
>   
>   	msg = ipmi_alloc_smi_msg();
>   	if (!msg) {
> -		if (!disable_si_irq(smi_info, true))
> +		if (!disable_si_irq(smi_info))
>   			smi_info->si_state = SI_NORMAL;
>   	} else if (enable_si_irq(smi_info)) {
>   		ipmi_free_smi_msg(msg);
> @@ -545,7 +544,7 @@ retry:
>   		/* Watchdog pre-timeout */
>   		smi_inc_stat(smi_info, watchdog_pretimeouts);
>   
> -		start_clear_flags(smi_info, true);
> +		start_clear_flags(smi_info);
>   		smi_info->msg_flags &= ~WDT_PRE_TIMEOUT_INT;
>   		if (smi_info->intf)
>   			ipmi_smi_watchdog_pretimeout(smi_info->intf);
> @@ -928,7 +927,7 @@ restart:
>   		 * disable and messages disabled.
>   		 */
>   		if (smi_info->supports_event_msg_buff || smi_info->irq) {
> -			start_check_enables(smi_info, true);
> +			start_check_enables(smi_info);
>   		} else {
>   			smi_info->curr_msg = alloc_msg_handle_irq(smi_info);
>   			if (!smi_info->curr_msg)
> @@ -1235,6 +1234,7 @@ static int smi_start_processing(void
>   
>   	/* Set up the timer that drives the interface. */
>   	setup_timer(&new_smi->si_timer, smi_timeout, (long)new_smi);
> +	new_smi->timer_can_start = true;
>   	smi_mod_timer(new_smi, jiffies + SI_TIMEOUT_JIFFIES);
>   
>   	/* Try to claim any interrupts. */
> @@ -3416,10 +3416,12 @@ static void check_for_broken_irqs(struct
>   	check_set_rcv_irq(smi_info);
>   }
>   
> -static inline void wait_for_timer_and_thread(struct smi_info *smi_info)
> +static inline void stop_timer_and_thread(struct smi_info *smi_info)
>   {
>   	if (smi_info->thread != NULL)
>   		kthread_stop(smi_info->thread);
> +
> +	smi_info->timer_can_start = false;
>   	if (smi_info->timer_running)
>   		del_timer_sync(&smi_info->si_timer);
>   }
> @@ -3593,7 +3595,7 @@ static int try_smi_init(struct smi_info
>   	 * Start clearing the flags before we enable interrupts or the
>   	 * timer to avoid racing with the timer.
>   	 */
> -	start_clear_flags(new_smi, false);
> +	start_clear_flags(new_smi);
>   
>   	/*
>   	 * IRQ is defined to be set when non-zero.  req_events will
> @@ -3662,7 +3664,7 @@ static int try_smi_init(struct smi_info
>   	return 0;
>   
>   out_err_stop_timer:
> -	wait_for_timer_and_thread(new_smi);
> +	stop_timer_and_thread(new_smi);
>   
>   out_err:
>   	new_smi->interrupt_disabled = true;
> @@ -3854,7 +3856,7 @@ static void cleanup_one_si(struct smi_in
>   	 */
>   	if (to_clean->irq_cleanup)
>   		to_clean->irq_cleanup(to_clean);
> -	wait_for_timer_and_thread(to_clean);
> +	stop_timer_and_thread(to_clean);
>   
>   	/*
>   	 * Timeouts are stopped, now make sure the interrupts are off
> @@ -3865,7 +3867,7 @@ static void cleanup_one_si(struct smi_in
>   		poll(to_clean);
>   		schedule_timeout_uninterruptible(1);
>   	}
> -	disable_si_irq(to_clean, false);
> +	disable_si_irq(to_clean);
>   	while (to_clean->curr_msg || (to_clean->si_state != SI_NORMAL)) {
>   		poll(to_clean);
>   		schedule_timeout_uninterruptible(1);
>
> ------------------------------------------------------------------------------
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, Slashdot.org! http://sdm.link/slashdot
> _______________________________________________
> Openipmi-developer mailing list
> Openipmi-developer@...ts.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/openipmi-developer


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ