lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:	Thu, 26 May 2011 15:14:04 -0700 (PDT)
From:	David Rientjes <rientjes@...gle.com>
To:	Christoph Lameter <cl@...ux.com>
cc:	Pekka Enberg <penberg@...helsinki.fi>,
	Eric Dumazet <eric.dumazet@...il.com>,
	"H. Peter Anvin" <hpa@...or.com>, linux-kernel@...r.kernel.org,
	Thomas Gleixner <tglx@...utronix.de>
Subject: Re: [slubllv6 05/17] mm: Rearrange struct page

On Thu, 26 May 2011, Christoph Lameter wrote:

> We need to be able to use cmpxchg_double on the freelist and object count
> field in struct page. Rearrange the fields in struct page according to
> doubleword entities so that the freelist pointer comes before the counters.
> Do the rearranging with a future in mind where we use more doubleword
> atomics to avoid locking of updates to flags/mapping or lru pointers.
> 
> Create another union to allow access to counters in struct page as a
> single unsigned long value.
> 
> The doublewords must be properly aligned for cmpxchg_double to work.
> Sadly this increases the size of page struct by one word on some architectures.
> But as a resultpage structs are now cacheline aligned on x86_64.
> 
> Signed-off-by: Christoph Lameter <cl@...ux.com>
> 
> ---
>  include/linux/mm_types.h |   89 +++++++++++++++++++++++++++++++----------------
>  1 file changed, 60 insertions(+), 29 deletions(-)
> 
> Index: linux-2.6/include/linux/mm_types.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm_types.h	2011-05-06 12:03:46.000000000 -0500
> +++ linux-2.6/include/linux/mm_types.h	2011-05-06 12:50:40.000000000 -0500
> @@ -30,52 +30,74 @@ struct address_space;
>   * moment. Note that we have no way to track which tasks are using
>   * a page, though if it is a pagecache page, rmap structures can tell us
>   * who is mapping it.
> + *
> + * The objects in struct page are organized in double word blocks in
> + * order to allows us to use atomic double word operations on portions
> + * of struct page. That is currently only used by slub but the arrangement
> + * allows the use of atomic double word operations on the flags/mapping
> + * and lru list pointers also.
>   */
>  struct page {
> +	/* First double word block */
>  	unsigned long flags;		/* Atomic flags, some possibly
>  					 * updated asynchronously */
> -	atomic_t _count;		/* Usage count, see below. */
> +	struct address_space *mapping;	/* If low bit clear, points to
> +					 * inode address_space, or NULL.
> +					 * If page mapped as anonymous
> +					 * memory, low bit is set, and
> +					 * it points to anon_vma object:
> +					 * see PAGE_MAPPING_ANON below.
> +					 */
> +	/* Second double word */
>  	union {
> -		atomic_t _mapcount;	/* Count of ptes mapped in mms,
> -					 * to show when page is mapped
> -					 * & limit reverse map searches.
> +		struct {
> +			pgoff_t index;		/* Our offset within mapping. */
> +			atomic_t _mapcount;	/* Count of ptes mapped in mms,
> +							 * to show when page is mapped
> +							 * & limit reverse map searches.
> +							 */
> +			atomic_t _count;		/* Usage count, see below. */
> +		};
> +
> +		struct {			/* SLUB cmpxchg_double area */
> +			void *freelist;
> +			union {
> +				unsigned long counters;
> +				struct {
> +					unsigned inuse:16;
> +					unsigned objects:15;
> +					unsigned frozen:1;
> +					/*
> +					 * Kernel may make use of this field even when slub
> +					 * uses the rest of the double word!
>  					 */
> -		struct {		/* SLUB */
> -			unsigned inuse:16;
> -			unsigned objects:15;
> -			unsigned frozen:1;
> +					atomic_t _count;
> +				};
> +			};
>  		};
>  	};
> +
> +	/* Third double word block */
> +	struct list_head lru;		/* Pageout list, eg. active_list
> +					 * protected by zone->lru_lock !
> +					 */
> +
> +	/* Remainder is not double word aligned */
>  	union {
> -	    struct {
> -		unsigned long private;		/* Mapping-private opaque data:
> +	 	unsigned long private;		/* Mapping-private opaque data:

Space before ident.

>  					 	 * usually used for buffer_heads
>  						 * if PagePrivate set; used for
>  						 * swp_entry_t if PageSwapCache;
>  						 * indicates order in the buddy
>  						 * system if PG_buddy is set.
>  						 */
> -		struct address_space *mapping;	/* If low bit clear, points to
> -						 * inode address_space, or NULL.
> -						 * If page mapped as anonymous
> -						 * memory, low bit is set, and
> -						 * it points to anon_vma object:
> -						 * see PAGE_MAPPING_ANON below.
> -						 */
> -	    };
>  #if USE_SPLIT_PTLOCKS
> -	    spinlock_t ptl;
> +		spinlock_t ptl;
>  #endif
> -	    struct kmem_cache *slab;	/* SLUB: Pointer to slab */
> -	    struct page *first_page;	/* Compound tail pages */
> +		struct kmem_cache *slab;	/* SLUB: Pointer to slab */
> +		struct page *first_page;	/* Compound tail pages */
>  	};
> -	union {
> -		pgoff_t index;		/* Our offset within mapping. */
> -		void *freelist;		/* SLUB: freelist req. slab lock */
> -	};
> -	struct list_head lru;		/* Pageout list, eg. active_list
> -					 * protected by zone->lru_lock !
> -					 */
> +
>  	/*
>  	 * On machines where all RAM is mapped into kernel address space,
>  	 * we can simply calculate the virtual address. On machines with
> @@ -101,7 +123,16 @@ struct page {
>  	 */
>  	void *shadow;
>  #endif
> -};
> +}
> +/*
> + * If another subsystem starts using the double word pairing for atomic
> + * operations on struct page then it must change the #if to ensure
> + * proper alignment of the page struct.
> + */
> +#if defined(CONFIG_SLUB) && defined(CONFIG_CMPXCHG_LOCAL)
> +	__attribute__((__aligned__(2*sizeof(unsigned long))))
> +#endif
> +;
>  
>  /*
>   * A region containing a mapping of a non-memory backed file under NOMMU

Ah, the joys of rebasing during the merge window.  This doesn't apply 
because of ca16d140af91 ("mm: don't access vm_flags as 'int'") merged 
earlier today.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ