lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180613072946.2riinain6g2r7pmg@shbuild888>
Date:   Wed, 13 Jun 2018 15:29:46 +0800
From:   Feng Tang <feng.tang@...el.com>
To:     Peter Zijlstra <peterz@...radead.org>,
        Petr Mladek <pmladek@...e.com>
Cc:     Ingo Molnar <mingo@...nel.org>,
        Thomas Gleixner <tglx@...utronix.de>,
        "H . Peter Anvin" <hpa@...or.com>,
        Alan Cox <gnomes@...rguk.ukuu.org.uk>,
        linux-kernel@...r.kernel.org, alek.du@...el.com,
        pasha.tatashin@...cle.com, arjan@...ux.intel.com,
        len.brown@...el.com, feng.tang@...el.com
Subject: Re: [RFC 2/2] x86, tsc: Enable clock for ealry printk timestamp

On Wed, Jun 06, 2018 at 05:38:33PM +0800, Feng Tang wrote:
> On Sat, Jun 02, 2018 at 12:12:13AM +0800, Feng Tang wrote:
> 
> Hi Peter and  all,
> 
> 
> > Hi Peter and Petr,
> > 
> > Thanks for your suggestions, will try to find a cleaner and less hacky way,
> > and it may take some time as dealing with all kinds of TSC is tricky :)
> > 
> > - Feng
> > 
> > On Thu, May 31, 2018 at 05:52:10PM +0200, Peter Zijlstra wrote:
> > > On Thu, May 31, 2018 at 03:55:42PM +0200, Petr Mladek wrote:
> > > > I wonder if we could get some cleaner integration into the timer and
> > > > printk code.
> > > 
> > > Yes, these patches are particularly horrific..
> > > 
> > > There were some earlier patches by Pavel Tatashin, which attempted do
> > > get things running earlier.
> > > 
> > >   http://lkml.kernel.org/r/20180209211143.16215-1-pasha.tatashin@oracle.com
> > > 
> > > I'm not entirely happy with that, but I never did get around to
> > > reviewing that last version :-( In particuarly, now that you made me
> > > look, I dislike his patch 6 almost as much as these patches.
> > > 
> > > The idea was to get regular sched_clock() running earlier, not to botch
> > > some early_sched_clock() into it.
> > > 
> > > Basically run calibrate_tsc() earlier (like _waaay_ earlier, it doesn't
> > > rely on anything other than CPUID) and if you have a recent part (with
> > > exception of SKX) you'll get a usable tsc rate (and TSC_RELIABLE) and
> > > things will work.
> 
> 
> I just did a hacky experiment by moving the tsc_init()earlier into
> setup_arch() and remove the tsc_early_delay_calibrate(). The printk stamp
> does start working much earlier!
> 
> 
> But the __use_tsc and __sched_clock_stable are relying on jump_label,
> which can't be used so early (I tried to call the jump_label_init() before
> tsc_init(), but kernel crashs, and I worked around it for now).

Just figured out the kernel crash when taking jump_label_init() earlier
into setup_arch(), the tsc_init() will enable static_key __use_tsc

    static_key_enable 
	__jump_label_update
	    arch_jump_label_transform
		__jump_label_transform
		    text_poke_bp
			text_poke 
			
text_poke() will involve page , but paging is not initialized so
early yet, so it triggers a panic. 

Beside this __use_tsc, the sched_clock also has one static key
__sched_clock_stable

Thanks,
Feng

> 
> Please review the debug patch, thanks!
> 
> ---
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index 5c623df..b636888 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -1201,7 +1201,8 @@ void __init setup_arch(char **cmdline_p)
>  	kvmclock_init();
>  #endif
>  
> -	tsc_early_delay_calibrate();
> +	tsc_init();
> +
>  	if (!early_xdbc_setup_hardware())
>  		early_xdbc_register_console();
>  
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index 4008dd6..8288f39 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -33,6 +33,7 @@ EXPORT_SYMBOL(cpu_khz);
>  unsigned int __read_mostly tsc_khz;
>  EXPORT_SYMBOL(tsc_khz);
>  
> +int tsc_inited;
>  /*
>   * TSC can be unstable due to cpufreq or due to unsynced TSCs
>   */
> @@ -192,7 +193,7 @@ static void set_cyc2ns_scale(unsigned long khz, int cpu, unsigned long long tsc_
>   */
>  u64 native_sched_clock(void)
>  {
> -	if (static_branch_likely(&__use_tsc)) {
> +	if (static_branch_likely(&__use_tsc) || tsc_inited) {
>  		u64 tsc_now = rdtsc();
>  
>  		/* return the value in ns */
> @@ -1387,30 +1391,16 @@ static int __init init_tsc_clocksource(void)
>   */
>  device_initcall(init_tsc_clocksource);
>  
> -void __init tsc_early_delay_calibrate(void)
> -{
> -	unsigned long lpj;
> -
> -	if (!boot_cpu_has(X86_FEATURE_TSC))
> -		return;
> -
> -	cpu_khz = x86_platform.calibrate_cpu();
> -	tsc_khz = x86_platform.calibrate_tsc();
> -
> -	tsc_khz = tsc_khz ? : cpu_khz;
> -	if (!tsc_khz)
> -		return;
> -
> -	lpj = tsc_khz * 1000;
> -	do_div(lpj, HZ);
> -	loops_per_jiffy = lpj;
> -}
> -
>  void __init tsc_init(void)
>  {
>  	u64 lpj, cyc;
>  	int cpu;
>  
> +	if (tsc_inited)
> +		return;
> +
> +	tsc_inited = 1;
> +
>  	if (!boot_cpu_has(X86_FEATURE_TSC)) {
>  		setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
>  		return;
> @@ -1474,11 +1464,15 @@ void __init tsc_init(void)
>  	lpj = ((u64)tsc_khz * 1000);
>  	do_div(lpj, HZ);
>  	lpj_fine = lpj;
> +	loops_per_jiffy = lpj;
>  
>  	use_tsc_delay();
>  
>  	check_system_tsc_reliable();
>  
> +	extern void early_set_sched_clock_stable(u64 sched_clock_offset);
> +	early_set_sched_clock_stable(div64_u64(rdtsc() * 1000, tsc_khz));
> +
>  	if (unsynchronized_tsc()) {
>  		mark_tsc_unstable("TSCs unsynchronized");
>  		return;
> diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
> index 10c83e7..6c5c22d 100644
> --- a/kernel/sched/clock.c
> +++ b/kernel/sched/clock.c
> @@ -119,6 +119,13 @@ static void __scd_stamp(struct sched_clock_data *scd)
>  	scd->tick_raw = sched_clock();
>  }
>  
> +
> +void early_set_sched_clock_stable(u64 sched_clock_offset)
> +{
> +	__sched_clock_offset = sched_clock_offset;
> +	static_branch_enable(&__sched_clock_stable);
> +}
> +
>  static void __set_sched_clock_stable(void)
>  {
>  	struct sched_clock_data *scd;
> @@ -342,12 +349,14 @@ static u64 sched_clock_remote(struct sched_clock_data *scd)
>   *
>   * See cpu_clock().
>   */
> +
> +extern int tsc_inited;
>  u64 sched_clock_cpu(int cpu)
>  {
>  	struct sched_clock_data *scd;
>  	u64 clock;
>  
> -	if (sched_clock_stable())
> +	if (sched_clock_stable() || tsc_inited)
>  		return sched_clock() + __sched_clock_offset;
>  
>  	if (unlikely(!sched_clock_running))
> 	   
> 
> 
> 
> > > 
> > > If you have a dodgy part (sorry SKX), you'll just have to live with
> > > sched_clock starting late(r).
> > > 
> > > Do not cobble things on the side, try and get the normal things running
> > > earlier.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ