lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 23 Sep 2008 14:38:11 -0700
From:	sqazi@...gle.com (Salman Qazi)
To:	akpm@...ux-foundation.org, linux-mm@...ck.org,
	linux-kernel@...r.kernel.org
Subject: mlock: Make the mlock system call interruptible by fatal signals.


Make the mlock system call interruptible by fatal signals, so that programs
that are mlocking a large number of pages terminate quickly when killed.

Signed-off-by: Salman Qazi <sqazi@...gle.com>
---

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 72a15dc..a2531e6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -807,7 +807,8 @@ static inline int handle_mm_fault(struct mm_struct *mm,
 }
 #endif
 
-extern int make_pages_present(unsigned long addr, unsigned long end);
+extern int make_pages_present(unsigned long addr, unsigned long end,
+			int interruptible);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
 
 int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
diff --git a/mm/fremap.c b/mm/fremap.c
index 7881638..f5eff74 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -223,7 +223,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
 			downgrade_write(&mm->mmap_sem);
 			has_write_lock = 0;
 		}
-		make_pages_present(start, start+size);
+		make_pages_present(start, start+size, 0);
 	}
 
 	/*
diff --git a/mm/memory.c b/mm/memory.c
index 1002f47..4088fd0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1129,9 +1129,10 @@ static inline int use_zero_page(struct vm_area_struct *vma)
 	return !vma->vm_ops || !vma->vm_ops->fault;
 }
 
-int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
-		unsigned long start, int len, int write, int force,
-		struct page **pages, struct vm_area_struct **vmas)
+static int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+			unsigned long start, int len, int write, int force,
+			struct page **pages, struct vm_area_struct **vmas,
+			int interruptible)
 {
 	int i;
 	unsigned int vm_flags;
@@ -1223,6 +1224,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 			cond_resched();
 			while (!(page = follow_page(vma, start, foll_flags))) {
 				int ret;
+				if (interruptible && fatal_signal_pending(tsk))
+					return -EINTR;
 				ret = handle_mm_fault(mm, vma, start,
 						foll_flags & FOLL_WRITE);
 				if (ret & VM_FAULT_ERROR) {
@@ -1266,6 +1269,14 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
 	} while (len);
 	return i;
 }
+
+int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+		unsigned long start, int len, int write, int force,
+		struct page **pages, struct vm_area_struct **vmas)
+{
+	return __get_user_pages(tsk, mm, start, len, write, force,
+				pages, vmas, 0);
+}
 EXPORT_SYMBOL(get_user_pages);
 
 pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
@@ -2758,7 +2769,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
 }
 #endif /* __PAGETABLE_PMD_FOLDED */
 
-int make_pages_present(unsigned long addr, unsigned long end)
+int make_pages_present(unsigned long addr, unsigned long end, int interruptible)
 {
 	int ret, len, write;
 	struct vm_area_struct * vma;
@@ -2770,8 +2781,8 @@ int make_pages_present(unsigned long addr, unsigned long end)
 	BUG_ON(addr >= end);
 	BUG_ON(end > vma->vm_end);
 	len = DIV_ROUND_UP(end, PAGE_SIZE) - addr/PAGE_SIZE;
-	ret = get_user_pages(current, current->mm, addr,
-			len, write, 0, NULL, NULL);
+	ret = __get_user_pages(current, current->mm, addr,
+			len, write, 0, NULL, NULL, interruptible);
 	if (ret < 0) {
 		/*
 		   SUS require strange return value to mlock
diff --git a/mm/mlock.c b/mm/mlock.c
index 01fbe93..5586ee4 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -73,7 +73,7 @@ success:
 	if (newflags & VM_LOCKED) {
 		pages = -pages;
 		if (!(newflags & VM_IO))
-			ret = make_pages_present(start, end);
+			ret = make_pages_present(start, end, 1);
 	}
 
 	mm->locked_vm -= pages;
diff --git a/mm/mmap.c b/mm/mmap.c
index e7a5a68..afb8e39 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1225,10 +1225,10 @@ out:
 	vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
 	if (vm_flags & VM_LOCKED) {
 		mm->locked_vm += len >> PAGE_SHIFT;
-		make_pages_present(addr, addr + len);
+		make_pages_present(addr, addr + len, 0);
 	}
 	if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
-		make_pages_present(addr, addr + len);
+		make_pages_present(addr, addr + len, 0);
 	return addr;
 
 unmap_and_free_vma:
@@ -1701,7 +1701,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
 	if (!prev || expand_stack(prev, addr))
 		return NULL;
 	if (prev->vm_flags & VM_LOCKED)
-		make_pages_present(addr, prev->vm_end);
+		make_pages_present(addr, prev->vm_end, 0);
 	return prev;
 }
 #else
@@ -1728,7 +1728,7 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
 	if (expand_stack(vma, addr))
 		return NULL;
 	if (vma->vm_flags & VM_LOCKED)
-		make_pages_present(addr, start);
+		make_pages_present(addr, start, 0);
 	return vma;
 }
 #endif
@@ -2049,7 +2049,7 @@ out:
 	mm->total_vm += len >> PAGE_SHIFT;
 	if (flags & VM_LOCKED) {
 		mm->locked_vm += len >> PAGE_SHIFT;
-		make_pages_present(addr, addr + len);
+		make_pages_present(addr, addr + len, 0);
 	}
 	return addr;
 }
diff --git a/mm/mremap.c b/mm/mremap.c
index 1a77439..c83ffcc 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -239,7 +239,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 		mm->locked_vm += new_len >> PAGE_SHIFT;
 		if (new_len > old_len)
 			make_pages_present(new_addr + old_len,
-					   new_addr + new_len);
+					   new_addr + new_len, 0);
 	}
 
 	return new_addr;
@@ -380,7 +380,7 @@ unsigned long do_mremap(unsigned long addr,
 			if (vma->vm_flags & VM_LOCKED) {
 				mm->locked_vm += pages;
 				make_pages_present(addr + old_len,
-						   addr + new_len);
+						   addr + new_len, 0);
 			}
 			ret = addr;
 			goto out;

---

The following program can be used to verify that the above actually reduces the time required for an mlocking program to die, when a fatal signal is delivered:

---

/*
 * mlock_kill:  Attempts to SIGKILL a process that is mlocking a huge
 * chunk (4GB) of memory.  If the kill succeeds in less than a second
 * then mlock is interruptable.  If not, then there's a regression.
 *
 */
#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/ipc.h>
#include <sys/sem.h>
#include <sys/shm.h>
#include <signal.h>
#include <assert.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>
#include <sys/time.h>
#include <string.h>

#define _LARGEFILE64_SOURCE

#define SHM_NAME	"dripd"
#define NUM_PAGES	1024*1024UL


union semun {
	int              val;    /* Value for SETVAL */
	struct semid_ds *buf;    /* Buffer for IPC_STAT, IPC_SET */
	unsigned short  *array;  /* Array for GETALL, SETALL */
	struct seminfo  *__buf;  /* Buffer for IPC_INFO
					(Linux specific) */
};

/*
 * The plan: Make a massive shared mem file, mmap it in a process,
 * lock its pages, and SIGKILL the process while doing so.
 * The mlock system call must abort early and the process must die
 * shortly afterwards.
 */
int main()
{
	int shm_id;
	int sem_id;
	int status;
	int pid;
	int ret;
	char *large_buffer = NULL;
	union semun data;
	struct timeval start;
	struct timeval end;
	struct timeval res;
	struct sembuf op;
	char cur_limit[256];
	char new_limit[256];
	int num_read;
	FILE *proc_file;

	/* Save the amount of shared memory allowed in the system */
	proc_file = fopen("/proc/sys/kernel/shmmax", "r+");
	num_read = fread(cur_limit, 1, 255, proc_file);
	cur_limit[num_read] = 0;
	printf("Limit was: %s", cur_limit);
	fclose(proc_file);

	/* increase it */
	proc_file = fopen("/proc/sys/kernel/shmmax", "w+");
	sprintf(new_limit, "%ld\n", (NUM_PAGES + 1)*4096);
	printf("New Limit: %s", new_limit);
	fwrite(new_limit, 1, strlen(new_limit),  proc_file);
	fclose(proc_file);

	op.sem_num = 0;
	shm_id = shmget(IPC_PRIVATE, NUM_PAGES*4096, 0600);
	if (errno) {
		perror("mlock_kill shmget");
		return EXIT_FAILURE;
	}
	sem_id = semget(IPC_PRIVATE, 1, IPC_CREAT);
	assert(sem_id >= 0);
	data.val = 0;
	semctl(sem_id, 0, SETVAL, data);
	pid = fork();
	if (pid < 0)
		return EXIT_FAILURE;
	if (pid == 0)
	{
		/* child process */
		large_buffer = shmat(shm_id, NULL, 0);

		if (large_buffer == MAP_FAILED) {
			perror("mlock_kill map failed");
			return 127;
		}
		/* Increment */
		op.sem_op = 1;
		semop(sem_id, &op, 1);
		/* Wait for ack. */
		op.sem_op = 0;
		semop(sem_id, &op, 1);
		printf("%lx\n", (unsigned long)large_buffer);
		ret = mlock(large_buffer, NUM_PAGES * 4096);
		printf("Parent failed to kill me in time!\n");
		return EXIT_FAILURE;
	} else {
		/* parent process */
		op.sem_op = -1;

		/* Wait for increment (and send ack) */
		semop(sem_id, &op, 1);
		printf("synced\n");

		/* when above ack happened, the other process was about to
		 * mlock.
		 * we should sleep a little and then kill it
		 */
		sleep(1);

		/* Kill dash nine - no more CPU time!  */
		kill(pid, SIGKILL);
		printf("waiting...\n");
		gettimeofday(&start, NULL);
		wait(&status);
		gettimeofday(&end, NULL);
		timersub(&end, &start, &res);
		printf("Took %d:%d to die.\n", res.tv_sec, res.tv_usec);
		/* free shared memory */
		shmdt(large_buffer);
		shmctl(shm_id, IPC_RMID, NULL);

		/* reset shmmax */
		proc_file = fopen("/proc/sys/kernel/shmmax", "w+");
		fwrite(cur_limit, 1, strlen(cur_limit),  proc_file);
		fclose(proc_file);
		printf("Limit reset\n");

		if (res.tv_sec > 0) {
			printf("FAIL");
			return EXIT_FAILURE;
		}
		printf("PASS");

	}
	return EXIT_SUCCESS;

}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ