lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 09 Jun 2010 14:00:15 -0700
From:	Salman <sqazi@...gle.com>
To:	peterz@...radead.org, akpm@...ux-foundation.org,
	torvalds@...ux-foundation.org, mingo@...e.hu,
	linux-kernel@...r.kernel.org
Cc:	tytso@...gle.com
Subject: [PATCH] Fix a race in pid generation that causes pids to be reused
	immediately.

A program that repeatedly forks and waits is susceptible to having the
same pid repeated, especially when it competes with another instance of the
same program.  This is really bad for bash implementation.  Furthermore, many shell
scripts assume that pid numbers will not be used for some length of time.

Race Description:

A                                    B

// pid == offset == n                // pid == offset == n + 1
test_and_set_bit(offset, map->page)
                                     test_and_set_bit(offset, map->page);
                                     pid_ns->last_pid = pid;
pid_ns->last_pid = pid;
                                     // pid == n + 1 is freed (wait())

                                     // Next fork()...
                                     last = pid_ns->last_pid; // == n
                                     pid = last + 1;

Code to reproduce it (Running multiple instances is more effective):

#include <errno.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>

// The distance mod 32768 between two pids, where the first pid is expected
// to be smaller than the second.
int PidDistance(pid_t first, pid_t second) {
  return (second + 32768 - first) % 32768;
}

int main(int argc, char* argv[]) {
  int failed = 0;
  pid_t last_pid = 0;
  int i;
  printf("%d\n", sizeof(pid_t));
  for (i = 0; i < 10000000; ++i) {
    if (i % 32786 == 0)
      printf("Iter: %d\n", i/32768);
    int child_exit_code = i % 256;
    pid_t pid = fork();
    if (pid == -1) {
      fprintf(stderr, "fork failed, iteration %d, errno=%d", i, errno);
      exit(1);
    }
    if (pid == 0) {
      // Child
      exit(child_exit_code);
    } else {
      // Parent
      if (i > 0) {
        int distance = PidDistance(last_pid, pid);
        if (distance == 0 || distance > 30000) {
          fprintf(stderr,
                  "Unexpected pid sequence: previous fork: pid=%d, "
                  "current fork: pid=%d for iteration=%d.\n",
                  last_pid, pid, i);
          failed = 1;
        }
      }
      last_pid = pid;
      int status;
      int reaped = wait(&status);
      if (reaped != pid) {
        fprintf(stderr,
                "Wait return value: expected pid=%d, "
                "got %d, iteration %d\n",
                pid, reaped, i);
        failed = 1;
      } else if (WEXITSTATUS(status) != child_exit_code) {
        fprintf(stderr,
                "Unexpected exit status %x, iteration %d\n",
                WEXITSTATUS(status), i);
        failed = 1;
      }
    }
  }
  exit(failed);
}


Thanks to Ted Tso for the key ideas of this implementation.

Signed-off-by: Salman Qazi <sqazi@...gle.com>
---
 kernel/pid.c |   39 ++++++++++++++++++++++++++++++++++++++-
 1 files changed, 38 insertions(+), 1 deletions(-)

diff --git a/kernel/pid.c b/kernel/pid.c
index e9fd8c1..865a482 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -122,6 +122,22 @@ static void free_pidmap(struct upid *upid)
 	atomic_inc(&map->nr_free);
 }
 
+/*
+ * If we started walking pids at 'base', is 'a' seen before 'b'?
+ *
+ */
+static int pid_before(int base, int a, int b)
+{
+	int a_lt_b = (a < b);
+	int min_a_b = min(a, b);
+	int max_a_b = max(a, b);
+
+	if ((base <= min_a_b) || (base >= max_a_b))
+		return a_lt_b;
+
+	return !a_lt_b;
+}
+
 static int alloc_pidmap(struct pid_namespace *pid_ns)
 {
 	int i, offset, max_scan, pid, last = pid_ns->last_pid;
@@ -153,8 +169,29 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
 		if (likely(atomic_read(&map->nr_free))) {
 			do {
 				if (!test_and_set_bit(offset, map->page)) {
+					int prev;
+					int last_write = last;
 					atomic_dec(&map->nr_free);
-					pid_ns->last_pid = pid;
+
+					/*
+					 * We might be racing with someone else trying
+					 * to set pid_ns->last_pid.  We want the
+					 * the winner to have the "later" value,
+					 * because if the "earlier" value prevails, then
+					 * a pid may get reused immediately.
+					 *
+					 * Since pids rollover, it is not sufficent
+					 * to just pick the bigger value.  We
+					 * have to consider where we started counting
+					 * from.
+					 */
+					do {
+						prev = last_write;
+						last_write = cmpxchg(&pid_ns->last_pid,
+							       prev, pid);
+					} while ((prev != last_write) &&
+						 (pid_before(last, last_write, pid)));
+
 					return pid;
 				}
 				offset = find_next_offset(map, offset);

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ