lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200603144725.GA221279@sathnaga86>
Date:   Wed, 3 Jun 2020 20:17:25 +0530
From:   Satheesh Rajendran <sathnaga@...ux.vnet.ibm.com>
To:     Joerg Roedel <jroedel@...e.de>
Cc:     Abdul Haleem <abdhalee@...ux.vnet.ibm.com>,
        sachinp <sachinp@...ux.vnet.ibm.com>,
        Stephen Rothwell <sfr@...b.auug.org.au>,
        linux-kernel <linux-kernel@...r.kernel.org>,
        manvanth <manvanth@...ux.vnet.ibm.com>,
        linux-next <linux-next@...r.kernel.org>,
        Steven Rostedt <rostedt@...dmis.org>,
        "aneesh.kumar" <aneesh.kumar@...ux.vnet.ibm.com>,
        akpm@...ux-foundation.org,
        linuxppc-dev <linuxppc-dev@...ts.ozlabs.org>, hch <hch@....de>
Subject: Re: [mainline][Oops][bisected 2ba3e6 ] 5.7.0 boot fails with kernel
 panic on powerpc

On Wed, Jun 03, 2020 at 03:32:57PM +0200, Joerg Roedel wrote:
> On Wed, Jun 03, 2020 at 04:20:57PM +0530, Abdul Haleem wrote:
> > @Joerg, Could you please have a look?
> 
> Can you please try the attached patch?

Hi Joerg,

I did hit the similar boot failue on a Power9 baremetal box(mentioned in Note) and 
your below patch helped solving that for my environment and 
am able to boot the system fine.

...
Fedora 31 (Thirty One)
Kernel 5.7.0-gd6f9469a0-dirty on an ppc64le (hvc0)

 login:


Tested-by: Satheesh Rajendran <sathnaga@...ux.vnet.ibm.com>

Note: for the record, here is the boot failure call trace.

[    0.023555] mempolicy: Enabling automatic NUMA balancing. Configure with numa_balancing= or the kernel.numa_balancing sysctl
[    0.023582] pid_max: default: 163840 minimum: 1280
[    0.035014] BUG: Unable to handle kernel data access on read at 0xc000006000000000
[    0.035058] Faulting instruction address: 0xc000000000382304
[    0.035074] Oops: Kernel access of bad area, sig: 11 [#1]
[    0.035097] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV
[    0.035113] Modules linked in:
[    0.035136] CPU: 24 PID: 0 Comm: swapper/24 Not tainted 5.7.0-gd6f9469a0 #1
[    0.035161] NIP:  c000000000382304 LR: c00000000038407c CTR: 0000000000000000
[    0.035197] REGS: c00000000167f930 TRAP: 0300   Not tainted  (5.7.0-gd6f9469a0)
[    0.035241] MSR:  9000000002009033 <SF,HV,VEC,EE,ME,IR,DR,RI,LE>  CR: 42022422  XER: 00000000
[    0.035294] CFAR: c0000000003822fc DAR: c000006000000000 DSISR: 40000000 IRQMASK: 0 
[    0.035294] GPR00: c00000000038407c c00000000167fbc0 c00000000168090[  150.252645597,5] OPAL: Reboot request...
[  150.252928266,5] RESET: Initiating fast reboot 1...
0 c008000000000000 
[    0.035294] GPR04: ffffffffffffffff 00000000000001ff c0080000001fffff 0000000000000060 
[    0.035294] GPR08: 0000000060000000 0000000000000005 c000006000000000 c008000000200000 
[    0.035294] GPR12: 0000000022022422 c000000001870000 c000000000000000 c008000000000000 
[    0.035294] GPR16: c008000007ffffff c008000000200000 0000000000000000 c000006000000000 
[    0.035294] GPR20: c008000008000000 c008000008000000 c008000007ffffff c008000007ffffff 
[    0.035294] GPR24: c00000000163f7c8 c00000000172d0c0 0000000000000001 0000000000000001 
[    0.035294] GPR28: c000000001708000 c00000000172d0c8 0000000000000000 c008000008000000 
[    0.035622] NIP [c000000000382304] map_kernel_range_noflush+0x274/0x510
[    0.035657] LR [c00000000038407c] __vmalloc_node_range+0x2ec/0x3a0
[    0.035690] Call Trace:
[    0.035709] [c00000000167fbc0] [c00000000038d848] __alloc_pages_nodemask+0x158/0x3f0 (unreliable)
[    0.035750] [c00000000167fc90] [c00000000038407c] __vmalloc_node_range+0x2ec/0x3a0
[    0.035787] [c00000000167fd40] [c000000000384268] __vmalloc+0x58/0x70
[    0.035823] [c00000000167fdb0] [c000000001056db8] alloc_large_system_hash+0x204/0x304
[    0.035870] [c00000000167fe60] [c00000000105c1f0] vfs_caches_init+0xd8/0x138
[    0.035916] [c00000000167fee0] [c0000000010242a0] start_kernel+0x644/0x6ec
[    0.035960] [c00000000167ff90] [c00000000000ca9c] start_here_common+0x1c/0x400
[    0.036004] Instruction dump:
[    0.036016] 3af4ffff 60000000 60000000 38c90010 7f663036 7d667a14 7cc600d0 7d713038 
[    0.036038] 38d1ffff 7c373040 41810008 7e91a378 <e8b30000> 2c250000 418201b4 7f464830 
[    0.036083] ---[ end trace c7e72029dfacc217 ]---
[    0.036114] 
[    1.036223] Kernel panic - not syncing: Attempted to kill the idle task!
[    1.036858] Rebooting in 10 seconds..


Regards,
-Satheesh.

> 
> diff --git a/include/asm-generic/5level-fixup.h b/include/asm-generic/5level-fixup.h
> index 58046ddc08d0..afbab31fbd7e 100644
> --- a/include/asm-generic/5level-fixup.h
> +++ b/include/asm-generic/5level-fixup.h
> @@ -17,6 +17,11 @@
>  	((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
>  		NULL : pud_offset(p4d, address))
> 
> +#define pud_alloc_track(mm, p4d, address, mask)					\
> +	((unlikely(pgd_none(*(p4d))) &&						\
> +	  (__pud_alloc(mm, p4d, address) || ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?	\
> +	  NULL : pud_offset(p4d, address))
> +
>  #define p4d_alloc(mm, pgd, address)		(pgd)
>  #define p4d_alloc_track(mm, pgd, address, mask)	(pgd)
>  #define p4d_offset(pgd, start)			(pgd)
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 7e07f4f490cb..d46bf03b804f 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
>  		NULL : pud_offset(p4d, address);
>  }
> 
> -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
>  				     unsigned long address,
>  				     pgtbl_mod_mask *mod_mask)
> -
>  {
> -	if (unlikely(pgd_none(*pgd))) {
> -		if (__p4d_alloc(mm, pgd, address))
> +	if (unlikely(p4d_none(*p4d))) {
> +		if (__pud_alloc(mm, p4d, address))
>  			return NULL;
> -		*mod_mask |= PGTBL_PGD_MODIFIED;
> +		*mod_mask |= PGTBL_P4D_MODIFIED;
>  	}
> 
> -	return p4d_offset(pgd, address);
> +	return pud_offset(p4d, address);
>  }
> 
> -#endif /* !__ARCH_HAS_5LEVEL_HACK */
> -
> -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
>  				     unsigned long address,
>  				     pgtbl_mod_mask *mod_mask)
> +
>  {
> -	if (unlikely(p4d_none(*p4d))) {
> -		if (__pud_alloc(mm, p4d, address))
> +	if (unlikely(pgd_none(*pgd))) {
> +		if (__p4d_alloc(mm, pgd, address))
>  			return NULL;
> -		*mod_mask |= PGTBL_P4D_MODIFIED;
> +		*mod_mask |= PGTBL_PGD_MODIFIED;
>  	}
> 
> -	return pud_offset(p4d, address);
> +	return p4d_offset(pgd, address);
>  }
> 
> +#endif /* !__ARCH_HAS_5LEVEL_HACK */
> +
>  static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
>  {
>  	return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ