linux-kernel - Re: [PATCH v8 clocksource 2/5] clocksource: Retry clock read if long delays detected

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives

Hash Suite: Windows password security audit tool. GUI, reports in PDF.

[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]

Message-ID: <87y2dhrte0.ffs@nanos.tec.linutronix.de>
Date:   Sat, 17 Apr 2021 14:24:23 +0200
From:   Thomas Gleixner <tglx@...utronix.de>
To:     "Paul E. McKenney" <paulmck@...nel.org>
Cc:     linux-kernel@...r.kernel.org, john.stultz@...aro.org,
        sboyd@...nel.org, corbet@....net, Mark.Rutland@....com,
        maz@...nel.org, kernel-team@...com, neeraju@...eaurora.org,
        ak@...ux.intel.com, "Paul E. McKenney" <paulmck@...nel.org>,
        Chris Mason <clm@...com>
Subject: Re: [PATCH v8 clocksource 2/5] clocksource: Retry clock read if long delays detected

On Tue, Apr 13 2021 at 21:35, Paul E. McKenney wrote:
>  #define WATCHDOG_INTERVAL (HZ >> 1)
>  #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
> +#define WATCHDOG_MAX_SKEW (NSEC_PER_SEC >> 6)

That's ~15ms which is a tad large I'd say...
  
>  static void clocksource_watchdog_work(struct work_struct *work)
>  {
> @@ -213,9 +214,10 @@ static void clocksource_watchdog_inject_delay(void)
>  static void clocksource_watchdog(struct timer_list *unused)
>  {
>  	struct clocksource *cs;
> -	u64 csnow, wdnow, cslast, wdlast, delta;
> -	int64_t wd_nsec, cs_nsec;
> +	u64 csnow, wdnow, wdagain, cslast, wdlast, delta;
> +	int64_t wd_nsec, wdagain_delta, wderr_nsec = 0, cs_nsec;
>  	int next_cpu, reset_pending;
> +	int nretries;
>  
>  	spin_lock(&watchdog_lock);
>  	if (!watchdog_running)
> @@ -224,6 +226,7 @@ static void clocksource_watchdog(struct timer_list *unused)
>  	reset_pending = atomic_read(&watchdog_reset_pending);
>  
>  	list_for_each_entry(cs, &watchdog_list, wd_list) {
> +		nretries = 0;
>  
>  		/* Clocksource already marked unstable? */
>  		if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
> @@ -232,11 +235,24 @@ static void clocksource_watchdog(struct timer_list *unused)
>  			continue;
>  		}
>  
> +retry:
>  		local_irq_disable();
> -		csnow = cs->read(cs);
> -		clocksource_watchdog_inject_delay();
>  		wdnow = watchdog->read(watchdog);
> +		clocksource_watchdog_inject_delay();
> +		csnow = cs->read(cs);
> +		wdagain = watchdog->read(watchdog);
>  		local_irq_enable();
> +		delta = clocksource_delta(wdagain, wdnow, watchdog->mask);
> +		wdagain_delta = clocksource_cyc2ns(delta, watchdog->mult, watchdog->shift);
> +		if (wdagain_delta > WATCHDOG_MAX_SKEW) {
> +			wderr_nsec = wdagain_delta;
> +			if (nretries++ < max_read_retries)
> +				goto retry;
> +		}
> +		if (nretries) {
> +			pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d\n",
> +				smp_processor_id(), watchdog->name, wderr_nsec, nretries);
> +		}
>  
>  		/* Clocksource initialized ? */
>  		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||

This can nicely be split out into a read function which avoids brain
overload when reading. Something like the uncompiled below.

I so wish we could just delete all of this horror instead of making it
more horrible.

Thanks,

        tglx
---
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -124,6 +124,12 @@ static void __clocksource_change_rating(
 #define WATCHDOG_INTERVAL (HZ >> 1)
 #define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
 
+/*
+ * The maximum delay between two consecutive readouts of the watchdog
+ * clocksource to detect SMI,NMI,vCPU preemption.
+ */
+#define WATCHDOG_MAX_DELAY (100 * NSEC_PER_USEC)
+
 static void clocksource_watchdog_work(struct work_struct *work)
 {
 	/*
@@ -184,12 +190,37 @@ void clocksource_mark_unstable(struct cl
 	spin_unlock_irqrestore(&watchdog_lock, flags);
 }
 
+static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
+{
+	unsigned int nretries;
+	u64 wd_end, wd_delta;
+	int64_t wd_delay;
+
+	for (nretries = 0; nretries < max_read_retries; nretries++) {
+		local_irq_disable();
+		*wdnow = watchdog->read(watchdog);
+		clocksource_watchdog_inject_delay();
+		*csnow = cs->read(cs);
+		wd_end = watchdog->read(watchdog);
+		local_irq_enable();
+
+		wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
+		wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift);
+		if (wd_delay < WATCHDOG_MAX_DELAY)
+			return true;
+	}
+
+	pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, %d attempts\n",
+		smp_processor_id(), watchdog->name, wd_delay, nretries);
+	return false;
+}
+
 static void clocksource_watchdog(struct timer_list *unused)
 {
-	struct clocksource *cs;
 	u64 csnow, wdnow, cslast, wdlast, delta;
-	int64_t wd_nsec, cs_nsec;
 	int next_cpu, reset_pending;
+	int64_t wd_nsec, cs_nsec;
+	struct clocksource *cs;
 
 	spin_lock(&watchdog_lock);
 	if (!watchdog_running)
@@ -206,10 +237,14 @@ static void clocksource_watchdog(struct
 			continue;
 		}
 
-		local_irq_disable();
-		csnow = cs->read(cs);
-		wdnow = watchdog->read(watchdog);
-		local_irq_enable();
+		if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
+			/*
+			 * No point to continue if the watchdog readout is
+			 * unreliable.
+			 */
+			__clocksource_unstable(cs);
+			continue;
+		}
 
 		/* Clocksource initialized ? */
 		if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||