lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAOH1cHkrtzr4o=ygqrMnUfDhwC=U+QJOtuqS+vfQyeXP_TvjaQ@mail.gmail.com>
Date:	Fri, 14 Oct 2011 16:25:03 -0700
From:	Mark Moseley <moseleymark@...il.com>
To:	David Howells <dhowells@...hat.com>
Cc:	Linux filesystem caching discussion list 
	<linux-cachefs@...hat.com>, linux-kernel@...r.kernel.org
Subject: Re: [Linux-cachefs] 3.0.3 64-bit Crash running fscache/cachefilesd

On Fri, Oct 14, 2011 at 2:22 AM, David Howells <dhowells@...hat.com> wrote:
> Mark Moseley <moseleymark@...il.com> wrote:
>
>> > Did you look at /proc/fs/fscache/stats at all?
>>
>> I didn't but I can repeat it. Which of the stats in
>> /proc/fs/fscache/stats would be best to track?
>
> If you could get two snapshots a couple of minutes apart, that'd be useful.
> What I'm interested in is what stops changing and anything in the CacheOp list
> at the bottom that becomes wedged on a non-zero value.

Patch is applied. I'm attaching a file with stats and a df of the
fscache partition. I cleared the cache and waited till it got
reasonably full before starting this capture. No crash yet, btw.

You can see that for the past couple of hours the byte counts on the
partition have only fluctuated a handful of Kb.

Incidentally, my cachefilesd.conf (I don't think I've sent it before
in this thread):

# cat /etc/cachefilesd.conf
dir /var/cache/fscache
tag mycache
brun  40%
bcull 30%
bstop 20%
frun  10%
fcull 7%
fstop 3%
culltable 20


# cachefilesd -v
cachefilesd version 0.10.1


Presumably it gets to bcull and stops storing but nothing's getting
pruned. I can see cachefilesd is in constant activity right now.
Looking at strace, maybe it can't find anything to prune because it
thinks it's all active. I'm seeing a constant loop that looks similar
to this:

read(3, "cull=1 frun=2d82a fcull=1fdb7 fst"..., 4096) = 78
fchdir(11)                              = 0
newfstatat(11, "@9c", {st_mode=02, st_size=17592186044416, ...}, 0) = 0
read(3, "cull=1 frun=2d82a fcull=1fdb7 fst"..., 4096) = 78
openat(11, "@9c", O_RDONLY|O_DIRECTORY) = 12
fstat(12, {st_mode=S_IFDIR, st_size=4096, ...}) = 0
fcntl(12, F_GETFL)                      = 0x18000 (flags
O_RDONLY|O_LARGEFILE|O_DIRECTORY)
fcntl(12, F_SETFD, FD_CLOEXEC)          = 0
fchdir(12)                              = 0
getdents(12, /* 23 entries */, 4096)    = 1896
newfstatat(12, "EI0001000Pgb0020000gvF0l0uU2L1QQG0080000001000IVThl4g000000000000",
{st_mode=01, st_size=5566277615616, ...}, 0) = 0
write(3, "inuse EI0001000Pgb0020000gvF0l0uU"..., 71) = -1 EBUSY
(Device or resource busy)
newfstatat(12, "EI0001000Pgb0020000gUSTv0lC2p9QQG0080000001000IVThl4g000000000000",
{st_mode=01, st_size=87170656239616, ...}, 0) = 0
write(3, "inuse EI0001000Pgb0020000gUSTv0lC"..., 71) = -1 EBUSY
(Device or resource busy)
newfstatat(12, "EI0001000Pgb00200000PHNi0Q1O11QQG0080000001000IVThl4g000000000000",
{st_mode=01, st_size=24099061497856, ...}, 0) = 0
write(3, "inuse EI0001000Pgb00200000PHNi0Q1"..., 71) = -1 EBUSY
(Device or resource busy)
newfstatat(12, "EI0001000Pgb0020000Mfm2f05I6O3QQG0080000001000IVThl4g000000000000",
{st_mode=01, st_size=36807869726720, ...}, 0) = 0
write(3, "inuse EI0001000Pgb0020000Mfm2f05I"..., 71) = -1 EBUSY
(Device or resource busy)
newfstatat(12, "EI0001000Pgb0020000MoS5j0667-2QQG0080000001000IVThl4g000000000000",
{st_mode=01, st_size=10642928959488, ...}, 0) = 0
write(3, "inuse EI0001000Pgb0020000MoS5j066"..., 71) = -1 EBUSY
(Device or resource busy)
newfstatat(12, "EI0001000Pgb0020000gaRds0iBnFKQQG0080000001000IVThl4g000000000000",
{st_mode=01, st_size=7314329305088, ...}, 0) = 0
write(3, "inuse EI0001000Pgb0020000gaRds0iB"..., 71) = -1 EBUSY
(Device or resource busy)
newfstatat(12, "EI0001000Pgb00200000794b0y2CYSQQG0080000001000IVThl4g000000000000",
{st_mode=01, st_size=22020297326592, ...}, 0) = 0
write(3, "inuse EI0001000Pgb00200000794b0y2"..., 71) = -1 EBUSY
(Device or resource busy)
newfstatat(12, "EI0001000Pgb0020000MGKZd0sai09RQG0080000001000IVThl4g000000000000",
{st_mode=01, st_size=210453397504, ...}, 0) = 0
write(3, "inuse EI0001000Pgb0020000MGKZd0sa"..., 71) = -1 EBUSY
(Device or resource busy)
newfstatat(12, "EI0001000Pgb00200000G07d0bH5AiQQG0080000001000IVThl4g000000000000",
{st_mode=01, st_size=16892106375168, ...}, 0) = 0
write(3, "inuse EI0001000Pgb00200000G07d0bH"..., 71) = -1 EBUSY
(Device or resource busy)
newfstatat(12, "EI0001000Pgb00200000ytn40zr8X1QQG0080000001000IVThl4g000000000000",
{st_mode=01, st_size=129596343189504, ...}, 0) = 0




>> >> [20839.802118] kernel BUG at fs/fscache/object-list.c:83!
>> >> [20839.802733] invalid opcode: 0000 [#1] SMP
>> >
>> > That fits with the previous BUG elsewhere in object-list.c.  It sounds like
>> > there's a refcounting problem somewhere.
>>
>> Any sys or proc settings I should turn on to track that?
>
> Not really.  However, if you could apply the attached patch, it will move the
> object list handling to next to where the object allocation and freeing is
> done.  I'm curious to see if this makes a difference.
>
> The 'object list' is an RB tree keyed on the address of an object in RAM - so
> if an object is already there it must have been double-added somehow or must
> not have been removed.
>
> David
> ---
>
>  fs/cachefiles/interface.c     |    1 +
>  fs/fscache/cache.c            |    1 -
>  fs/fscache/cookie.c           |    1 -
>  fs/fscache/object-list.c      |    1 +
>  include/linux/fscache-cache.h |   19 +++++++++++--------
>  5 files changed, 13 insertions(+), 10 deletions(-)
>
>
> diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
> index ef5c02d..3dcecdf 100644
> --- a/fs/cachefiles/interface.c
> +++ b/fs/cachefiles/interface.c
> @@ -104,6 +104,7 @@ nomem_key:
>        kfree(buffer);
>  nomem_buffer:
>        BUG_ON(test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags));
> +       fscache_object_destroy(&object->fscache);
>        kmem_cache_free(cachefiles_object_jar, object);
>        fscache_object_destroyed(&cache->cache);
>  nomem_object:
> diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c
> index b52aed1..98bca68 100644
> --- a/fs/fscache/cache.c
> +++ b/fs/fscache/cache.c
> @@ -263,7 +263,6 @@ int fscache_add_cache(struct fscache_cache *cache,
>        spin_lock(&cache->object_list_lock);
>        list_add_tail(&ifsdef->cache_link, &cache->object_list);
>        spin_unlock(&cache->object_list_lock);
> -       fscache_objlist_add(ifsdef);
>
>        /* add the cache's netfs definition index object to the top level index
>         * cookie as a known backing object */
> diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
> index 8dcb114..47d8cde 100644
> --- a/fs/fscache/cookie.c
> +++ b/fs/fscache/cookie.c
> @@ -360,7 +360,6 @@ static int fscache_attach_object(struct fscache_cookie *cookie,
>        atomic_inc(&cookie->usage);
>        hlist_add_head(&object->cookie_link, &cookie->backing_objects);
>
> -       fscache_objlist_add(object);
>        ret = 0;
>
>  cant_attach_object:
> diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c
> index f27c89d..f8fbb32 100644
> --- a/fs/fscache/object-list.c
> +++ b/fs/fscache/object-list.c
> @@ -69,6 +69,7 @@ void fscache_objlist_add(struct fscache_object *obj)
>
>        write_unlock(&fscache_object_list_lock);
>  }
> +EXPORT_SYMBOL(fscache_objlist_add);
>
>  /**
>  * fscache_object_destroy - Note that a cache object is about to be destroyed
> diff --git a/include/linux/fscache-cache.h b/include/linux/fscache-cache.h
> index 633b65d..f657c0a 100644
> --- a/include/linux/fscache-cache.h
> +++ b/include/linux/fscache-cache.h
> @@ -440,6 +440,14 @@ extern const char *fscache_object_states[];
>
>  extern void fscache_object_work_func(struct work_struct *work);
>
> +#ifdef CONFIG_FSCACHE_OBJECT_LIST
> +extern void fscache_objlist_add(struct fscache_object *obj);
> +extern void fscache_object_destroy(struct fscache_object *object);
> +#else
> +#define fscache_object_destroy(object) do {} while(0)
> +#define fscache_objlist_add(object) do {} while(0)
> +#endif
> +
>  /**
>  * fscache_object_init - Initialise a cache object description
>  * @object: Object description
> @@ -454,8 +462,6 @@ void fscache_object_init(struct fscache_object *object,
>                         struct fscache_cookie *cookie,
>                         struct fscache_cache *cache)
>  {
> -       atomic_inc(&cache->object_count);
> -
>        object->state = FSCACHE_OBJECT_INIT;
>        spin_lock_init(&object->lock);
>        INIT_LIST_HEAD(&object->cache_link);
> @@ -473,17 +479,14 @@ void fscache_object_init(struct fscache_object *object,
>        object->cache = cache;
>        object->cookie = cookie;
>        object->parent = NULL;
> +
> +       atomic_inc(&cache->object_count);
> +       fscache_objlist_add(object);
>  }
>
>  extern void fscache_object_lookup_negative(struct fscache_object *object);
>  extern void fscache_obtained_object(struct fscache_object *object);
>
> -#ifdef CONFIG_FSCACHE_OBJECT_LIST
> -extern void fscache_object_destroy(struct fscache_object *object);
> -#else
> -#define fscache_object_destroy(object) do {} while(0)
> -#endif
> -
>  /**
>  * fscache_object_destroyed - Note destruction of an object in a cache
>  * @cache: The cache from which the object came
>
>

Download attachment "fscache.stats.gz" of type "application/x-gzip" (21977 bytes)

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ