lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <m1vd5r628z.fsf@fess.ebiederm.org>
Date:	Mon, 27 Sep 2010 01:04:44 -0700
From:	ebiederm@...ssion.com (Eric W. Biederman)
To:	Pekka Enberg <penberg@...nel.org>
Cc:	Andrew Morton <akpm@...ux-foundation.org>, linux-mm@...ck.org,
	linux-kernel@...r.kernel.org, Rik van Riel <riel@...hat.com>,
	Hugh Dickins <hughd@...gle.com>
Subject: Re: [PATCH 3/3] mm: Cause revoke_mappings to wait until all close methods have completed.

Pekka Enberg <penberg@...nel.org> writes:

> On Sun, Sep 26, 2010 at 2:34 AM, Eric W. Biederman
> <ebiederm@...ssion.com> wrote:
>>
>> Signed-off-by: Eric W. Biederman <ebiederm@...stanetworks.com>
>
> The changelog is slightly too terse for me to review this. What's the
> race you're trying to avoid? If the point is to make the revoke task
> wait until everything has been closed, why can't we use the completion
> API here?

Because as far as I can tell completions don't map at all.
At best we have are times when the set of vmas goes empty.

The close_count is needed as we take vmas off the lists early
to avoid issues with truncate, and so we something to tell
when the close is actually finished.  If a close is actually
in progress.

I used a simple task to wake up instead of a wait queue as in all of the
revoke scenarios I know of, it make sense to serialize at a higher
level, and a task pointer is smaller than a wait queue head, and
I am reluctant to increase the size of struct inode to larger than
necessary.

The count at least has to be always present because objects could start
closing before we start the revoke.   We can't be ham handed and grab
all mm->mmap_sem's because mmput() revoke_vma is called without the
mmap_sem.

So it looked to me that the cleanest and smallest way to go was to write
an old fashioned schedule/wait loop that are usually hidden behind
completions, or wait queue logic.

It is my hope that we can be clever at some point and create a union
either in struct inode proper or in struct address space with a few
other fields that cannot be used while revoking a vma (like potentially
the truncate count) and reuse them.  But I am not clever enough today
to do something like that.

Eric

>> ---
>>  include/linux/fs.h |    2 ++
>>  mm/mmap.c          |   13 ++++++++++++-
>>  mm/revoke.c        |   18 +++++++++++++++---
>>  3 files changed, 29 insertions(+), 4 deletions(-)
>>
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 76041b6..5d3d6b8 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -633,6 +633,8 @@ struct address_space {
>>        const struct address_space_operations *a_ops;   /* methods */
>>        unsigned long           flags;          /* error bits/gfp mask */
>>        struct backing_dev_info *backing_dev_info; /* device readahead, etc */
>> +       struct task_struct      *revoke_task;   /* Who to wake up when all vmas are closed */
>> +       unsigned int            close_count;    /* Cover race conditions with revoke_mappings */
>>        spinlock_t              private_lock;   /* for use by the address_space */
>>        struct list_head        private_list;   /* ditto */
>>        struct address_space    *assoc_mapping; /* ditto */
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index 17dd003..3df3193 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -218,6 +218,7 @@ void unlink_file_vma(struct vm_area_struct *vma)
>>                struct address_space *mapping = file->f_mapping;
>>                spin_lock(&mapping->i_mmap_lock);
>>                __remove_shared_vm_struct(vma, file, mapping);
>> +               mapping->close_count++;
>>                spin_unlock(&mapping->i_mmap_lock);
>>        }
>>  }
>> @@ -233,9 +234,19 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
>>        if (vma->vm_ops && vma->vm_ops->close)
>>                vma->vm_ops->close(vma);
>>        if (vma->vm_file) {
>> -               fput(vma->vm_file);
>> +               struct address_space *mapping = vma->vm_file->f_mapping;
>>                if (vma->vm_flags & VM_EXECUTABLE)
>>                        removed_exe_file_vma(vma->vm_mm);
>> +
>> +               /* Decrement the close count and wake up a revoker if present */
>> +               spin_lock(&mapping->i_mmap_lock);
>> +               mapping->close_count--;
>> +               if ((mapping->close_count == 0) && mapping->revoke_task)
>> +                       /* Is wake_up_process the right variant of try_to_wake_up? */
>> +                       wake_up_process(mapping->revoke_task);
>> +               spin_unlock(&mapping->i_mmap_lock);
>> +
>> +               fput(vma->vm_file);
>>        }
>>        mpol_put(vma_policy(vma));
>>        kmem_cache_free(vm_area_cachep, vma);
>> diff --git a/mm/revoke.c b/mm/revoke.c
>> index a76cd1a..e19f7df 100644
>> --- a/mm/revoke.c
>> +++ b/mm/revoke.c
>> @@ -143,15 +143,17 @@ void revoke_mappings(struct address_space *mapping)
>>        /* Make any access to previously mapped pages trigger a SIGBUS,
>>         * and stop calling vm_ops methods.
>>         *
>> -        * When revoke_mappings returns invocations of vm_ops->close
>> -        * may still be in progress, but no invocations of any other
>> -        * vm_ops methods will be.
>> +        * When revoke_mappings no invocations of any method will be
>> +        * in progress.
>>         */
>>        struct vm_area_struct *vma;
>>        struct prio_tree_iter iter;
>>
>>        spin_lock(&mapping->i_mmap_lock);
>>
>> +       WARN_ON(mapping->revoke_task);
>> +       mapping->revoke_task = current;
>> +
>>  restart_tree:
>>        vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) {
>>                if (revoke_mapping(mapping, vma->vm_mm, vma->vm_start))
>> @@ -164,6 +166,16 @@ restart_list:
>>                        goto restart_list;
>>        }
>>
>> +       while (!list_empty(&mapping->i_mmap_nonlinear) ||
>> +              !prio_tree_empty(&mapping->i_mmap) ||
>> +              mapping->close_count)
>> +       {
>> +               __set_current_state(TASK_UNINTERRUPTIBLE);
>> +               spin_unlock(&mapping->i_mmap_lock);
>> +               schedule();
>> +               spin_lock(&mapping->i_mmap_lock);
>> +       }
>> +       mapping->revoke_task = NULL;
>>        spin_unlock(&mapping->i_mmap_lock);
>>  }
>>  EXPORT_SYMBOL_GPL(revoke_mappings);
>> --
>> 1.7.2.3
>>
>> --
>> To unsubscribe, send a message with 'unsubscribe linux-mm' in
>> the body to majordomo@...ck.org.  For more info on Linux MM,
>> see: http://www.linux-mm.org/ .
>> Don't email: <a href=mailto:"dont@...ck.org"> email@...ck.org </a>
>>
>>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ