[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAHRSSExdn-aYUK-N9-ANg2Zwtm+B49qMQrLqttG+Ay6yYgiMHQ@mail.gmail.com>
Date: Thu, 20 Aug 2020 15:31:21 -0700
From: Todd Kjos <tkjos@...gle.com>
To: Martijn Coenen <maco@...roid.com>
Cc: Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
Arve Hjønnevåg <arve@...roid.com>,
"Joel Fernandes (Google)" <joel@...lfernandes.org>,
Christian Brauner <christian@...uner.io>,
Hridya Valsaraju <hridya@...gle.com>,
Suren Baghdasaryan <surenb@...gle.com>,
LKML <linux-kernel@...r.kernel.org>,
"open list:ANDROID DRIVERS" <devel@...verdev.osuosl.org>,
Martijn Coenen <maco@...gle.com>
Subject: Re: [PATCH v2] ANDROID: binder: print warnings when detecting oneway spamming.
On Thu, Aug 20, 2020 at 4:50 AM Martijn Coenen <maco@...roid.com> wrote:
>
> The most common cause of the binder transaction buffer filling up is a
> client rapidly firing oneway transactions into a process, before it has
> a chance to handle them. Yet the root cause of this is often hard to
> debug, because either the system or the app will stop, and by that time
> binder debug information we dump in bugreports is no longer relevant.
>
> This change warns as soon as a process dips below 80% of its oneway
> space (less than 100kB available in the configuration), when any one
> process is responsible for either more than 50 transactions, or more
> than 50% of the oneway space.
>
> Signed-off-by: Martijn Coenen <maco@...roid.com>
A few minor comment issues below. When resolved:
Acked-by: Todd Kjos <tkjos@...gle.com>
> ---
> v2: fixed call-site in binder_alloc_selftest
>
> drivers/android/binder.c | 2 +-
> drivers/android/binder_alloc.c | 49 +++++++++++++++++++++++--
> drivers/android/binder_alloc.h | 5 ++-
> drivers/android/binder_alloc_selftest.c | 2 +-
> 4 files changed, 52 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index f936530a19b0..946332bc871a 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -3136,7 +3136,7 @@ static void binder_transaction(struct binder_proc *proc,
>
> t->buffer = binder_alloc_new_buf(&target_proc->alloc, tr->data_size,
> tr->offsets_size, extra_buffers_size,
> - !reply && (t->flags & TF_ONE_WAY));
> + !reply && (t->flags & TF_ONE_WAY), current->tgid);
> if (IS_ERR(t->buffer)) {
> /*
> * -ESRCH indicates VMA cleared. The target is dying.
> diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
> index 69609696a843..76e8e633dbd4 100644
> --- a/drivers/android/binder_alloc.c
> +++ b/drivers/android/binder_alloc.c
> @@ -338,12 +338,48 @@ static inline struct vm_area_struct *binder_alloc_get_vma(
> return vma;
> }
>
> +static void debug_low_async_space_locked(struct binder_alloc *alloc, int pid)
> +{
> + /*
> + * Find the amount and size of buffers allocated by the current caller;
> + * The idea is that once we cross the threshold, whoever is responsible
> + * for the low async space is likely to try to send another async txn,
> + * and at some point we'll catch them in the act. This is more efficient
> + * than keeping a map per pid.
> + */
> + struct rb_node *n = alloc->free_buffers.rb_node;
> + struct binder_buffer *buffer;
> + size_t buffer_size;
> + size_t total_alloc_size = 0;
> + size_t num_buffers = 0;
> +
> + for (n = rb_first(&alloc->allocated_buffers); n != NULL;
> + n = rb_next(n)) {
> + buffer = rb_entry(n, struct binder_buffer, rb_node);
> + if (buffer->pid != pid)
> + continue;
> + if (!buffer->async_transaction)
> + continue;
> + buffer_size = binder_alloc_buffer_size(alloc, buffer);
> + total_alloc_size += buffer_size;
> + num_buffers++;
> + }
> +
> + // Warn if this pid has more than 50% of async space, or more than 50 txns
/* .. */
Folks might be confused by 50% being calculated as (alloc->buffer_size
/4) which on the surface looks like 25%. Maybe either change the
comment to "25% of buffer space" or point out that async space is 50%
of the buffer.
> + if (num_buffers > 50 || total_alloc_size > alloc->buffer_size / 4) {
> + binder_alloc_debug(BINDER_DEBUG_USER_ERROR,
> + "%d: pid %d spamming oneway? %zd buffers allocated for a total size of %zd\n",
> + alloc->pid, pid, num_buffers, total_alloc_size);
> + }
> +}
> +
> static struct binder_buffer *binder_alloc_new_buf_locked(
> struct binder_alloc *alloc,
> size_t data_size,
> size_t offsets_size,
> size_t extra_buffers_size,
> - int is_async)
> + int is_async,
> + int pid)
> {
> struct rb_node *n = alloc->free_buffers.rb_node;
> struct binder_buffer *buffer;
> @@ -486,11 +522,16 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
> buffer->offsets_size = offsets_size;
> buffer->async_transaction = is_async;
> buffer->extra_buffers_size = extra_buffers_size;
> + buffer->pid = pid;
> if (is_async) {
> alloc->free_async_space -= size + sizeof(struct binder_buffer);
> binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC_ASYNC,
> "%d: binder_alloc_buf size %zd async free %zd\n",
> alloc->pid, size, alloc->free_async_space);
> + if (alloc->free_async_space < alloc->buffer_size / 10) {
> + // Start detecting spammers once we reach 80% of async space used
Use /* ... */
> + debug_low_async_space_locked(alloc, pid);
> + }
> }
> return buffer;
>
> @@ -508,6 +549,7 @@ static struct binder_buffer *binder_alloc_new_buf_locked(
> * @offsets_size: user specified buffer offset
> * @extra_buffers_size: size of extra space for meta-data (eg, security context)
> * @is_async: buffer for async transaction
> + * @pid: pid to attribute allocation to (used for debugging)
> *
> * Allocate a new buffer given the requested sizes. Returns
> * the kernel version of the buffer pointer. The size allocated
> @@ -520,13 +562,14 @@ struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
> size_t data_size,
> size_t offsets_size,
> size_t extra_buffers_size,
> - int is_async)
> + int is_async,
> + int pid)
> {
> struct binder_buffer *buffer;
>
> mutex_lock(&alloc->mutex);
> buffer = binder_alloc_new_buf_locked(alloc, data_size, offsets_size,
> - extra_buffers_size, is_async);
> + extra_buffers_size, is_async, pid);
> mutex_unlock(&alloc->mutex);
> return buffer;
> }
> diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
> index db9c1b984695..55d8b4106766 100644
> --- a/drivers/android/binder_alloc.h
> +++ b/drivers/android/binder_alloc.h
> @@ -32,6 +32,7 @@ struct binder_transaction;
> * @offsets_size: size of array of offsets
> * @extra_buffers_size: size of space for other objects (like sg lists)
> * @user_data: user pointer to base of buffer space
> + * @pid: pid to attribute the buffer to (caller)
> *
> * Bookkeeping structure for binder transaction buffers
> */
> @@ -51,6 +52,7 @@ struct binder_buffer {
> size_t offsets_size;
> size_t extra_buffers_size;
> void __user *user_data;
> + int pid;
> };
>
> /**
> @@ -117,7 +119,8 @@ extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
> size_t data_size,
> size_t offsets_size,
> size_t extra_buffers_size,
> - int is_async);
> + int is_async,
> + int pid);
> extern void binder_alloc_init(struct binder_alloc *alloc);
> extern int binder_alloc_shrinker_init(void);
> extern void binder_alloc_vma_close(struct binder_alloc *alloc);
> diff --git a/drivers/android/binder_alloc_selftest.c b/drivers/android/binder_alloc_selftest.c
> index 4151d9938255..c2b323bc3b3a 100644
> --- a/drivers/android/binder_alloc_selftest.c
> +++ b/drivers/android/binder_alloc_selftest.c
> @@ -119,7 +119,7 @@ static void binder_selftest_alloc_buf(struct binder_alloc *alloc,
> int i;
>
> for (i = 0; i < BUFFER_NUM; i++) {
> - buffers[i] = binder_alloc_new_buf(alloc, sizes[i], 0, 0, 0);
> + buffers[i] = binder_alloc_new_buf(alloc, sizes[i], 0, 0, 0, 0);
> if (IS_ERR(buffers[i]) ||
> !check_buffer_pages_allocated(alloc, buffers[i],
> sizes[i])) {
> --
> 2.28.0.220.ged08abb693-goog
>
Powered by blists - more mailing lists