linux-kernel - [PATCH 3/3] timer: Reduce unnecessary sighand lock contention

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20150826193312.23551.qmail@ns.horizon.com>
Date:	26 Aug 2015 15:33:12 -0400
From:	"George Spelvin" <linux@...izon.com>
To:	jason.low2@...com
Cc:	akpm@...ux-foundation.org, linux-kernel@...r.kernel.org,
	linux@...izon.com
Subject: [PATCH 3/3] timer: Reduce unnecessary sighand lock contention

And some more comments on the series...

> @@ -626,6 +628,7 @@ struct task_cputime_atomic {
>  struct thread_group_cputimer {
> 	struct task_cputime_atomic cputime_atomic;
> 	int running;
>+	int checking_timer;
> };

Why not make both running and checking_timer actual booleans, so the
pair is only 16 bits rather than 64?

I suppose since sum_exec_runtime in struct task_cputime_atomic is 64 bits,
alignment means there's no actual improvement.  It's just my personal
preference (Linus disagrees with me!) for the bool type for documentation.

(Compile-tested patch appended.)


But when it comes to really understanding this code, I'm struggling.
It seems that this changes the return value of of fastpath_timer_check.
I'm tryng to wrap my head around exactly why that is safe.

Any time I see things like READ_ONCE and WRITE_ONCE, I wonder if there
need to be memory barriers as well.  (Because x86 has strong default
memory ordering, testing there doesn't prove the code right on other
platforms.)

For the writes, the surrounding spinlock does the job.

But on the read side (fastpath_timer_check), I'm not sure
what the rules should be.

Or is it basically okay if this is massively racey, since process-wide
CPU timers are inherently sloppy.  A race will just cause an expiration
check to be missed, but it will be retried soon anyway.


Other half-baked thoughts that went through my head:

If the problem is that you have contention for read access to
sig->cputimer.cputime, can that be changed to a seqlock?

Another possibility is to use a trylock before testing
checking_timer:

	if (sig->cputimer.running) {
		struct task_cputime group_sample;

-		raw_spin_lock(&sig->cputimer.lock);
+		if (!raw_spin_trylock(&sig->cputimer.lock)) {
+			if (READ_ONCE(sig->cputimer.checking_timer))
+				return false;	/* Lock holder's doing it for us */
+			raw_spin_lock(&sig->cputimer.lock);
+		}
		group_sample = sig->cputimer.cputime;
		raw_spin_unlock(&sig->cputimer.lock);

		if (task_cputime_expired(&group_sample, &sig->cputime_expires))
			return true;
	}
	return false;
}




----8<---- This is the patch mentioned above ----8<----

>From 0058bf5ed6a9e483ae33746685332e3ef9562ed5 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@...izon.com>
Date: Wed, 26 Aug 2015 19:15:54 +0000
Subject: [PATCH] timer: Use bool more in kernel/time/posix-cpu-timers.c

This is mostly function return values, for documentation.

One structure field is changed (from int), but alignment
padding precludes any actual space saving.

Signed-off-by: George Spelvin <linux@...izon.com>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e612..fac3a7e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -585,7 +585,7 @@ struct task_cputime {
 /**
  * struct thread_group_cputimer - thread group interval timer counts
  * @cputime:		thread group interval timers.
- * @running:		non-zero when there are timers running and
+ * @running:		%true when there are timers running and
  * 			@cputime receives updates.
  * @lock:		lock for fields in this struct.
  *
@@ -594,7 +594,7 @@ struct task_cputime {
  */
 struct thread_group_cputimer {
 	struct task_cputime cputime;
-	int running;
+	bool running;
 	raw_spinlock_t lock;
 };
 
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 0075da74..d8483ec5 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -113,14 +113,12 @@ static void bump_cpu_timer(struct k_itimer *timer,
  *
  * @cputime:	The struct to compare.
  *
- * Checks @cputime to see if all fields are zero.  Returns true if all fields
- * are zero, false if any field is nonzero.
+ * Checks @cputime to see if all fields are zero.  Returns %true if all fields
+ * are zero, %false if any field is nonzero.
  */
-static inline int task_cputime_zero(const struct task_cputime *cputime)
+static inline bool task_cputime_zero(const struct task_cputime *cputime)
 {
-	if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
-		return 1;
-	return 0;
+	return !cputime->utime && !cputime->stime && !cputime->sum_exec_runtime;
 }
 
 static inline unsigned long long prof_ticks(struct task_struct *p)
@@ -223,7 +221,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
 		 */
 		thread_group_cputime(tsk, &sum);
 		raw_spin_lock_irqsave(&cputimer->lock, flags);
-		cputimer->running = 1;
+		cputimer->running = true;
 		update_gt_cputime(&cputimer->cputime, &sum);
 	} else
 		raw_spin_lock_irqsave(&cputimer->lock, flags);
@@ -435,8 +433,9 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
 	cleanup_timers(tsk->signal->cpu_timers);
 }
 
-static inline int expires_gt(cputime_t expires, cputime_t new_exp)
+static inline bool expires_gt(cputime_t expires, cputime_t new_exp)
 {
+	/* Could also be written "expires - 1 >= new_exp" */
 	return expires == 0 || expires > new_exp;
 }
 
@@ -888,7 +887,7 @@ static void stop_process_timers(struct signal_struct *sig)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&cputimer->lock, flags);
-	cputimer->running = 0;
+	cputimer->running = false;
 	raw_spin_unlock_irqrestore(&cputimer->lock, flags);
 }
 
@@ -1066,20 +1065,20 @@ out:
  * @expires:	Expiration times, against which @sample will be checked.
  *
  * Checks @sample against @expires to see if any field of @sample has expired.
- * Returns true if any field of the former is greater than the corresponding
- * field of the latter if the latter field is set.  Otherwise returns false.
+ * Returns %true if any field of the former is greater than the corresponding
+ * field of the latter if the latter field is set.  Otherwise returns %false.
  */
-static inline int task_cputime_expired(const struct task_cputime *sample,
+static inline bool task_cputime_expired(const struct task_cputime *sample,
 					const struct task_cputime *expires)
 {
 	if (expires->utime && sample->utime >= expires->utime)
-		return 1;
+		return true;
 	if (expires->stime && sample->utime + sample->stime >= expires->stime)
-		return 1;
+		return true;
 	if (expires->sum_exec_runtime != 0 &&
 	    sample->sum_exec_runtime >= expires->sum_exec_runtime)
-		return 1;
-	return 0;
+		return true;
+	return false;
 }
 
 /**
@@ -1088,11 +1087,11 @@ static inline int task_cputime_expired(const struct task_cputime *sample,
  * @tsk:	The task (thread) being checked.
  *
  * Check the task and thread group timers.  If both are zero (there are no
- * timers set) return false.  Otherwise snapshot the task and thread group
+ * timers set) return %false.  Otherwise snapshot the task and thread group
  * timers and compare them with the corresponding expiration times.  Return
- * true if a timer has expired, else return false.
+ * %true if a timer has expired, else return %false.
  */
-static inline int fastpath_timer_check(struct task_struct *tsk)
+static inline bool fastpath_timer_check(struct task_struct *tsk)
 {
 	struct signal_struct *sig;
 	cputime_t utime, stime;
@@ -1107,7 +1106,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 		};
 
 		if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
-			return 1;
+			return true;
 	}
 
 	sig = tsk->signal;
@@ -1119,10 +1118,10 @@ static inline int fastpath_timer_check(struct task_struct *tsk)
 		raw_spin_unlock(&sig->cputimer.lock);
 
 		if (task_cputime_expired(&group_sample, &sig->cputime_expires))
-			return 1;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 /*
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/