[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200310085747.000018ad@Huawei.com>
Date: Tue, 10 Mar 2020 08:57:47 +0000
From: Jonathan Cameron <Jonathan.Cameron@...wei.com>
To: SeongJae Park <sjpark@...zon.com>
CC: <akpm@...ux-foundation.org>, SeongJae Park <sjpark@...zon.de>,
<aarcange@...hat.com>, <yang.shi@...ux.alibaba.com>,
<acme@...nel.org>, <alexander.shishkin@...ux.intel.com>,
<amit@...nel.org>, <brendan.d.gregg@...il.com>,
<brendanhiggins@...gle.com>, <cai@....pw>,
<colin.king@...onical.com>, <corbet@....net>, <dwmw@...zon.com>,
<jolsa@...hat.com>, <kirill@...temov.name>, <mark.rutland@....com>,
<mgorman@...e.de>, <minchan@...nel.org>, <mingo@...hat.com>,
<namhyung@...nel.org>, <peterz@...radead.org>,
<rdunlap@...radead.org>, <rientjes@...gle.com>,
<rostedt@...dmis.org>, <shuah@...nel.org>, <sj38.park@...il.com>,
<vbabka@...e.cz>, <vdavydov.dev@...il.com>, <linux-mm@...ck.org>,
<linux-doc@...r.kernel.org>, <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH v6 03/14] mm/damon: Adaptively adjust regions
On Mon, 24 Feb 2020 13:30:36 +0100
SeongJae Park <sjpark@...zon.com> wrote:
> From: SeongJae Park <sjpark@...zon.de>
>
> At the beginning of the monitoring, DAMON constructs the initial regions
> by evenly splitting the memory mapped address space of the process into
> the user-specified minimal number of regions. In this initial state,
> the assumption of the regions (pages in same region have similar access
> frequencies) is normally not kept and thus the monitoring quality could
> be low. To keep the assumption as much as possible, DAMON adaptively
> merges and splits each region.
>
> For each ``aggregation interval``, it compares the access frequencies of
> adjacent regions and merges those if the frequency difference is small.
> Then, after it reports and clears the aggregated access frequency of
> each region, it splits each region into two regions if the total number
> of regions is smaller than the half of the user-specified maximum number
> of regions.
>
> In this way, DAMON provides its best-effort quality and minimal overhead
> while keeping the bounds users set for their trade-off.
>
> Signed-off-by: SeongJae Park <sjpark@...zon.de>
Really minor comments inline.
> ---
> mm/damon.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++---
> 1 file changed, 144 insertions(+), 7 deletions(-)
>
> diff --git a/mm/damon.c b/mm/damon.c
> index 6bdeb84d89af..1c8bb71bbce9 100644
> --- a/mm/damon.c
> +++ b/mm/damon.c
> @@ -67,6 +67,7 @@ struct damon_ctx {
> unsigned long sample_interval;
> unsigned long aggr_interval;
> unsigned long min_nr_regions;
> + unsigned long max_nr_regions;
>
> struct timespec64 last_aggregation;
>
> @@ -389,9 +390,12 @@ static int damon_three_regions_of(struct damon_task *t,
> * regions is wasteful. That said, because we can deal with small noises,
> * tracking every mapping is not strictly required but could even incur a high
> * overhead if the mapping frequently changes or the number of mappings is
> - * high. Nonetheless, this may seems very weird. DAMON's dynamic regions
> - * adjustment mechanism, which will be implemented with following commit will
> - * make this more sense.
> + * high. The adaptive regions adjustment mechanism will further help to deal
> + * with the noises by simply identifying the unmapped areas as a region that
> + * has no access. Moreover, applying the real mappings that would have many
> + * unmapped areas inside will make the adaptive mechanism quite complex. That
> + * said, too huge unmapped areas inside the monitoring target should be removed
> + * to not take the time for the adaptive mechanism.
> *
> * For the reason, we convert the complex mappings to three distinct regions
> * that cover every mapped areas of the address space. Also the two gaps
> @@ -550,6 +554,123 @@ static void kdamond_flush_aggregated(struct damon_ctx *c)
> }
> }
>
> +#define sz_damon_region(r) (r->vm_end - r->vm_start)
> +
> +/*
> + * Merge two adjacent regions into one region
> + */
> +static void damon_merge_two_regions(struct damon_region *l,
> + struct damon_region *r)
> +{
> + l->nr_accesses = (l->nr_accesses * sz_damon_region(l) +
> + r->nr_accesses * sz_damon_region(r)) /
> + (sz_damon_region(l) + sz_damon_region(r));
> + l->vm_end = r->vm_end;
> + damon_destroy_region(r);
> +}
> +
> +#define diff_of(a, b) (a > b ? a - b : b - a)
> +
> +/*
> + * Merge adjacent regions having similar access frequencies
> + *
> + * t task that merge operation will make change
> + * thres merge regions having '->nr_accesses' diff smaller than this
> + */
> +static void damon_merge_regions_of(struct damon_task *t, unsigned int thres)
> +{
> + struct damon_region *r, *prev = NULL, *next;
> +
> + damon_for_each_region_safe(r, next, t) {
> + if (!prev || prev->vm_end != r->vm_start)
> + goto next;
> + if (diff_of(prev->nr_accesses, r->nr_accesses) > thres)
> + goto next;
if (!prev || prev->vm_end != r->vm_start ||
diff_of(prev->nr_accesses, r->nr_accesses) > thres) {
prev = r;
continue;
}
Seems more logical to my head. Maybe it's just me though. A goto inside a
loop isn't pretty to my mind.
> + damon_merge_two_regions(prev, r);
> + continue;
> +next:
> + prev = r;
> + }
> +}
> +
> +/*
> + * Merge adjacent regions having similar access frequencies
> + *
> + * threshold merge regions havind nr_accesses diff larger than this
> + *
> + * This function merges monitoring target regions which are adjacent and their
> + * access frequencies are similar. This is for minimizing the monitoring
> + * overhead under the dynamically changeable access pattern. If a merge was
> + * unnecessarily made, later 'kdamond_split_regions()' will revert it.
> + */
> +static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold)
> +{
> + struct damon_task *t;
> +
> + damon_for_each_task(c, t)
> + damon_merge_regions_of(t, threshold);
> +}
> +
> +/*
> + * Split a region into two small regions
> + *
> + * r the region to be split
> + * sz_r size of the first sub-region that will be made
> + */
> +static void damon_split_region_at(struct damon_ctx *ctx,
> + struct damon_region *r, unsigned long sz_r)
> +{
> + struct damon_region *new;
> +
> + new = damon_new_region(ctx, r->vm_start + sz_r, r->vm_end);
> + r->vm_end = new->vm_start;
> +
> + damon_add_region(new, r, damon_next_region(r));
> +}
> +
> +static void damon_split_regions_of(struct damon_ctx *ctx, struct damon_task *t)
> +{
> + struct damon_region *r, *next;
> + unsigned long sz_left_region;
> +
> + damon_for_each_region_safe(r, next, t) {
> + /*
> + * Randomly select size of left sub-region to be at least
> + * 10 percent and at most 90% of original region
> + */
> + sz_left_region = (prandom_u32_state(&ctx->rndseed) % 9 + 1) *
> + (r->vm_end - r->vm_start) / 10;
> + /* Do not allow blank region */
> + if (sz_left_region == 0)
> + continue;
> + damon_split_region_at(ctx, r, sz_left_region);
> + }
> +}
> +
> +/*
> + * splits every target regions into two randomly-sized regions
> + *
> + * This function splits every target regions into two random-sized regions if
> + * current total number of the regions is smaller than the half of the
> + * user-specified maximum number of regions. This is for maximizing the
> + * monitoring accuracy under the dynamically changeable access patterns. If a
> + * split was unnecessarily made, later 'kdamond_merge_regions()' will revert
> + * it.
> + */
> +static void kdamond_split_regions(struct damon_ctx *ctx)
> +{
> + struct damon_task *t;
> + unsigned int nr_regions = 0;
> +
> + damon_for_each_task(ctx, t)
> + nr_regions += nr_damon_regions(t);
> + if (nr_regions > ctx->max_nr_regions / 2)
> + return;
> +
> + damon_for_each_task(ctx, t)
> + damon_split_regions_of(ctx, t);
> +}
> +
> /*
> * Check whether current monitoring should be stopped
> *
> @@ -590,21 +711,29 @@ static int kdamond_fn(void *data)
> struct damon_task *t;
> struct damon_region *r, *next;
> struct mm_struct *mm;
> + unsigned long max_nr_accesses;
>
> pr_info("kdamond (%d) starts\n", ctx->kdamond->pid);
> kdamond_init_regions(ctx);
> while (!kdamond_need_stop(ctx)) {
> + max_nr_accesses = 0;
> damon_for_each_task(ctx, t) {
> mm = damon_get_mm(t);
> if (!mm)
> continue;
> - damon_for_each_region(r, t)
> + damon_for_each_region(r, t) {
> kdamond_check_access(ctx, mm, r);
> + if (r->nr_accesses > max_nr_accesses)
> + max_nr_accesses = r->nr_accesses;
max_nr_accesses = max(r->nr_accesses, max_nr_accesses)
> + }
> mmput(mm);
> }
>
> - if (kdamond_aggregate_interval_passed(ctx))
> + if (kdamond_aggregate_interval_passed(ctx)) {
> + kdamond_merge_regions(ctx, max_nr_accesses / 10);
> kdamond_flush_aggregated(ctx);
> + kdamond_split_regions(ctx);
> + }
>
> usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
> }
> @@ -692,24 +821,32 @@ static int damon_set_pids(struct damon_ctx *ctx,
> * sample_int time interval between samplings
> * aggr_int time interval between aggregations
> * min_nr_reg minimal number of regions
> + * max_nr_reg maximum number of regions
> *
> * This function should not be called while the kdamond is running.
> * Every time interval is in micro-seconds.
> *
> * Returns 0 on success, negative error code otherwise.
> */
> -static int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
> - unsigned long aggr_int, unsigned long min_nr_reg)
> +static int damon_set_attrs(struct damon_ctx *ctx,
> + unsigned long sample_int, unsigned long aggr_int,
> + unsigned long min_nr_reg, unsigned long max_nr_reg)
> {
> if (min_nr_reg < 3) {
> pr_err("min_nr_regions (%lu) should be bigger than 2\n",
> min_nr_reg);
> return -EINVAL;
> }
> + if (min_nr_reg >= ctx->max_nr_regions) {
> + pr_err("invalid nr_regions. min (%lu) >= max (%lu)\n",
> + min_nr_reg, max_nr_reg);
> + return -EINVAL;
> + }
>
> ctx->sample_interval = sample_int;
> ctx->aggr_interval = aggr_int;
> ctx->min_nr_regions = min_nr_reg;
> + ctx->max_nr_regions = max_nr_reg;
> return 0;
> }
>
Powered by blists - more mailing lists