lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 28 Jul 2017 15:29:32 +0300
From:   Tomi Sarvela <tomi.p.sarvela@...el.com>
To:     Thomas Gleixner <tglx@...utronix.de>
Cc:     Martin Peres <martin.peres@...ux.intel.com>,
        jeffy.chen@...k-chips.com, linux-kernel@...r.kernel.org
Subject: Re: Suspend-resume failure on Intel Eagle Lake Core2Duo

On 28/07/17 00:08, Thomas Gleixner wrote:
> On Thu, 27 Jul 2017, Thomas Gleixner wrote:
>> On Thu, 27 Jul 2017, Thomas Gleixner wrote:
>>> On Thu, 27 Jul 2017, Tomi Sarvela wrote:
>>>> On 27/07/17 10:42, Thomas Gleixner wrote:
>>>>> On Thu, 27 Jul 2017, Tomi Sarvela wrote:
>>>>>> On 26/07/17 17:26, Thomas Gleixner wrote:
>>>>>>> So reverting that commit does not help. Does it help on your machine?
>>>>>>
>>>>>> Yes. Reverting it does not cause the machine to lock up on resume.
>>>>>>
>>>>>> I haven't tested if the machine locks up later on, but at least it
>>>>>> survives
>>>>>> couple of s/r cycles.
>>>>>
>>>>> Can you please try to add 'nohpet' to the kernel command line?
>>>>
>>>> Option nohpet didn't change anything, still hangs on s/r.
>>>
>>> Ok. Was a shot in the dark. I tried on a similar machine, but that one
>>> resumes fine (except that the AHCI controller plays silly buggers, but
>>> nothing interrupt related). I might have access to another core2duo machine
>>> tomorrow.
>>>
>>> I'll send you a debug patch shortly, but can you please first check when
>>> the wreckage happens by testing the states in
>>>
>>>      /sys/power/pm_test
>>>
>>> freezer
>>> devices
>>> platform
>>> processors
>>> core
>>
>> Actually for suspend to ram we only have
>>
>> 	 freezer, devices, platform
>>
>> I assume it's platform because that is where the actual interrupt
>> suspend/resume happens.
>>
>> If that survives, then it's the low level architecture s/r code which
>> fiddles with the interrupt controllers and leaves them in a state which is
>> not known to the core code.
> 
> Debug patch below. It should make the machine resume again. Emphasis on
> "should". Please provide the output of /sys/kernel/debug/tracing/trace
> after resume.

The patch didn't apply cleanly: can you tell exact commit or tag it has 
been created against? I tried to hand-wrangle the changes in, but then I 
got compilation errors:

CC      ipc/compat.o
+0x0): multiple definition of `irq_suspend_resume'
kernel/irq/irqdesc.o:(.bss+0x0): first defined here
kernel/irq/manage.o:(.bss+0x8): multiple definition of `irq_suspend_resume'
kernel/irq/irqdesc.o:/home/testrunner/drm-tip/kernel/irq/irqdesc.c:270: 
first defined here
kernel/irq/spurious.o:(.bss+0x0): multiple definition of 
`irq_suspend_resume'

Also, the usage of /sys/power/pm_test was not intuitive to me. Can you 
explain which kind of combinations do you want to test?

Best regards,

Tomi


> Thanks,
> 
> 	tglx
> 
> 8<-----------
> 
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -304,7 +304,10 @@ void irq_shutdown(struct irq_desc *desc)
>   
>   void irq_enable(struct irq_desc *desc)
>   {
> -	if (!irqd_irq_disabled(&desc->irq_data)) {
> +	if (irq_suspend_resume)
> +		irq_trace_state("preenable", desc);
> +
> +	if (!irqd_irq_disabled(&desc->irq_data) && !irq_suspend_resume) {
>   		unmask_irq(desc);
>   	} else {
>   		irq_state_clr_disabled(desc);
> @@ -315,10 +318,16 @@ void irq_enable(struct irq_desc *desc)
>   			unmask_irq(desc);
>   		}
>   	}
> +
> +	if (irq_suspend_resume)
> +		irq_trace_state("postenable", desc);
>   }
>   
>   static void __irq_disable(struct irq_desc *desc, bool mask)
>   {
> +	if (irq_suspend_resume)
> +		irq_trace_state("predisable", desc);
> +
>   	if (irqd_irq_disabled(&desc->irq_data)) {
>   		if (mask)
>   			mask_irq(desc);
> @@ -331,6 +340,9 @@ static void __irq_disable(struct irq_des
>   			mask_irq(desc);
>   		}
>   	}
> +
> +	if (irq_suspend_resume)
> +		irq_trace_state("postdisable", desc);
>   }
>   
>   /**
> @@ -390,6 +402,9 @@ static inline void mask_ack_irq(struct i
>   
>   void mask_irq(struct irq_desc *desc)
>   {
> +	if (irq_suspend_resume)
> +		irq_trace_state("premask", desc);
> +
>   	if (irqd_irq_masked(&desc->irq_data))
>   		return;
>   
> @@ -397,17 +412,26 @@ void mask_irq(struct irq_desc *desc)
>   		desc->irq_data.chip->irq_mask(&desc->irq_data);
>   		irq_state_set_masked(desc);
>   	}
> +
> +	if (irq_suspend_resume)
> +		irq_trace_state("postmask", desc);
>   }
>   
>   void unmask_irq(struct irq_desc *desc)
>   {
> -	if (!irqd_irq_masked(&desc->irq_data))
> +	if (irq_suspend_resume)
> +		irq_trace_state("preunmask", desc);
> +
> +	if (!irqd_irq_masked(&desc->irq_data) && !irq_suspend_resume)
>   		return;
>   
>   	if (desc->irq_data.chip->irq_unmask) {
>   		desc->irq_data.chip->irq_unmask(&desc->irq_data);
>   		irq_state_clr_masked(desc);
>   	}
> +
> +	if (irq_suspend_resume)
> +		irq_trace_state("postunmask", desc);
>   }
>   
>   void unmask_threaded_irq(struct irq_desc *desc)
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -459,3 +459,11 @@ static inline void irq_remove_debugfs_en
>   {
>   }
>   #endif /* CONFIG_GENERIC_IRQ_DEBUGFS */
> +
> +bool irq_suspend_resume;
> +
> +static inline void irq_trace_state(const char *what, struct irq_desc *desc)
> +{
> +	trace_printk("%s %d state %08x\n", what, irq_desc_get_irq(desc),
> +		     irqd_get(&desc->irq_data));
> +}
> --- a/kernel/irq/pm.c
> +++ b/kernel/irq/pm.c
> @@ -14,6 +14,8 @@
>   
>   #include "internals.h"
>   
> +bool irq_suspend_resume;
> +
>   bool irq_pm_check_wakeup(struct irq_desc *desc)
>   {
>   	if (irqd_is_wakeup_armed(&desc->irq_data)) {
> @@ -120,6 +122,7 @@ void suspend_device_irqs(void)
>   	struct irq_desc *desc;
>   	int irq;
>   
> +	irq_suspend_resume = true;
>   	for_each_irq_desc(irq, desc) {
>   		unsigned long flags;
>   		bool sync;
> @@ -127,7 +130,9 @@ void suspend_device_irqs(void)
>   		if (irq_settings_is_nested_thread(desc))
>   			continue;
>   		raw_spin_lock_irqsave(&desc->lock, flags);
> +		irq_trace_state("presuspend", desc);
>   		sync = suspend_device_irq(desc);
> +		irq_trace_state("postsuspend", desc);
>   		raw_spin_unlock_irqrestore(&desc->lock, flags);
>   
>   		if (sync)
> @@ -172,9 +177,14 @@ static void resume_irqs(bool want_early)
>   			continue;
>   
>   		raw_spin_lock_irqsave(&desc->lock, flags);
> +		irq_trace_state("preresume", desc);
>   		resume_irq(desc);
> +		irq_trace_state("postresume", desc);
>   		raw_spin_unlock_irqrestore(&desc->lock, flags);
>   	}
> +
> +	if (!want_early)
> +		irq_suspend_resume = false;
>   }
>   
>   /**
> 
-- 
Intel Finland Oy - BIC 0357606-4 - Westendinkatu 7, 02160 Espoo

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ