[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <28c262360911060741x3f7ab0a2k15be645e287e05ac@mail.gmail.com>
Date: Sat, 7 Nov 2009 00:41:40 +0900
From: Minchan Kim <minchan.kim@...il.com>
To: Christoph Lameter <cl@...ux-foundation.org>
Cc: npiggin@...e.de, linux-kernel@...r.kernel.org, linux-mm@...ck.org,
Tejun Heo <tj@...nel.org>, Ingo Molnar <mingo@...e.hu>,
KAMEZAWA Hiroyuki <kamezawa.hiroyu@...fujitsu.com>,
"hugh.dickins@...cali.co.uk" <hugh.dickins@...cali.co.uk>
Subject: Re: Subject: [RFC MM] mmap_sem scaling: Use mutex and percpu counter
instead
Hi, Christoph.
How about change from 'mm_readers' to 'is_readers' to improve your
goal 'scalibility'?
===
static inline int is_readers(struct mm_struct *mm)
{
int cpu;
int ret = 0;
for_each_possible_cpu(cpu) {
if (per_cpu(mm->rss->readers, cpu)) {
ret = 1;
break;
}
}
return ret;
}
===
On Fri, Nov 6, 2009 at 4:20 AM, Christoph Lameter
<cl@...ux-foundation.org> wrote:
> From: Christoph Lamter <cl@...ux-foundation.org>
> Subject: [RFC MM] mmap_sem scaling: Use mutex and percpu counter instead
>
> Instead of a rw semaphore use a mutex and a per cpu counter for the number
> of the current readers. read locking then becomes very cheap requiring only
> the increment of a per cpu counter.
>
> Write locking is more expensive since the writer must scan the percpu array
> and wait until all readers are complete. Since the readers are not holding
> semaphores we have no wait queue from which the writer could wakeup. In this
> draft we simply wait for one millisecond between scans of the percpu
> array. A different solution must be found there.
>
> Patch is on top of -next and the percpu counter patches that I posted
> yesterday. The patch adds another per cpu counter to the file and anon rss
> counters.
>
> Signed-off-by: Christoph Lamter <cl@...ux-foundation.org>
>
> ---
> include/linux/mm_types.h | 68 ++++++++++++++++++++++++++++++++++++++---------
> mm/init-mm.c | 2 -
> 2 files changed, 56 insertions(+), 14 deletions(-)
>
> Index: linux-2.6/include/linux/mm_types.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm_types.h 2009-11-05 13:03:11.000000000 -0600
> +++ linux-2.6/include/linux/mm_types.h 2009-11-05 13:06:31.000000000 -0600
> @@ -14,6 +14,7 @@
> #include <linux/page-debug-flags.h>
> #include <asm/page.h>
> #include <asm/mmu.h>
> +#include <linux/percpu.h>
>
> #ifndef AT_VECTOR_SIZE_ARCH
> #define AT_VECTOR_SIZE_ARCH 0
> @@ -27,6 +28,7 @@ struct address_space;
> struct mm_counter {
> long file;
> long anon;
> + long readers;
> };
>
> /*
> @@ -214,7 +216,7 @@ struct mm_struct {
> atomic_t mm_users; /* How many users with user space? */
> atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
> int map_count; /* number of VMAs */
> - struct rw_semaphore sem;
> + struct mutex lock;
> spinlock_t page_table_lock; /* Protects page tables and some counters */
>
> struct list_head mmlist; /* List of maybe swapped mm's. These are globally strung
> @@ -285,64 +287,104 @@ struct mm_struct {
> #endif
> };
>
> +static inline int mm_readers(struct mm_struct *mm)
> +{
> + int cpu;
> + int readers = 0;
> +
> + for_each_possible_cpu(cpu)
> + readers += per_cpu(mm->rss->readers, cpu);
> +
> + return readers;
> +}
> +
> static inline void mm_reader_lock(struct mm_struct *mm)
> {
> - down_read(&mm->sem);
> +redo:
> + this_cpu_inc(mm->rss->readers);
> + if (mutex_is_locked(&mm->lock)) {
> + this_cpu_dec(mm->rss->readers);
> + /* Need to wait till mutex is released */
> + mutex_lock(&mm->lock);
> + mutex_unlock(&mm->lock);
> + goto redo;
> + }
> }
>
> static inline void mm_reader_unlock(struct mm_struct *mm)
> {
> - up_read(&mm->sem);
> + this_cpu_dec(mm->rss->readers);
> }
>
> static inline int mm_reader_trylock(struct mm_struct *mm)
> {
> - return down_read_trylock(&mm->sem);
> + this_cpu_inc(mm->rss->readers);
> + if (mutex_is_locked(&mm->lock)) {
> + this_cpu_dec(mm->rss->readers);
> + return 0;
> + }
> + return 1;
> }
>
> static inline void mm_writer_lock(struct mm_struct *mm)
> {
> - down_write(&mm->sem);
> +redo:
> + mutex_lock(&mm->lock);
> + if (mm_readers(mm) == 0)
We can change this.
if (!is_readers(mm))
return;
> + return;
> +
> + mutex_unlock(&mm->lock);
> + msleep(1);
> + goto redo;
> }
>
> static inline void mm_writer_unlock(struct mm_struct *mm)
> {
> - up_write(&mm->sem);
> + mutex_unlock(&mm->lock);
> }
>
> static inline int mm_writer_trylock(struct mm_struct *mm)
> {
> - return down_write_trylock(&mm->sem);
> + if (!mutex_trylock(&mm->lock))
> + goto fail;
> +
> + if (mm_readers(mm) == 0)
> + return 1;
if (!is_readers(mm))
return 1;
> +
> + mutex_unlock(&mm->lock);
> +fail:
> + return 0;
> }
>
--
Kind regards,
Minchan Kim
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists