[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20170516052605.GA5429@bbox>
Date: Tue, 16 May 2017 14:26:05 +0900
From: Minchan Kim <minchan@...nel.org>
To: Sergey Senozhatsky <sergey.senozhatsky.work@...il.com>
Cc: Andrew Morton <akpm@...ux-foundation.org>,
linux-kernel@...r.kernel.org, Joonsoo Kim <iamjoonsoo.kim@....com>,
Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
kernel-team <kernel-team@....com>
Subject: Re: [PATCH 2/2] zram: do not count duplicated pages as compressed
On Tue, May 16, 2017 at 11:36:15AM +0900, Sergey Senozhatsky wrote:
> > > > @@ -794,7 +801,15 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
> > > > entry = zram_dedup_find(zram, page, &checksum);
> > > > if (entry) {
> > > > comp_len = entry->len;
> > > > - goto found_dup;
> > > > + zram_slot_lock(zram, index);
> > > > + zram_free_page(zram, index);
> > > > + zram_set_flag(zram, index, ZRAM_DUP);
> > > > + zram_set_entry(zram, index, entry);
> > > > + zram_set_obj_size(zram, index, comp_len);
> > > > + zram_slot_unlock(zram, index);
> > > > + atomic64_add(comp_len, &zram->stats.dup_data_size);
> > > > + atomic64_inc(&zram->stats.pages_stored);
> > > > + return 0;
> > >
> > > hm. that's a somewhat big code duplication. isn't it?
> >
> > Yub. 3 parts. above part, zram_same_page_write and tail of __zram_bvec_write.
>
> hmm... good question... hardly can think of anything significantly
> better, zram object handling is now a mix of flags, entries,
> ref_counters, etc. etc. may be we can merge some of those ops, if we
> would keep slot locked through the entire __zram_bvec_write(), but
> that does not look attractive.
>
> something ABSOLUTELY untested and incomplete. not even compile tested (!).
> 99% broken and stupid (!). but there is one thing that it has revealed, so
> thus incomplete. see below:
>
> ---
>
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index 372602c7da49..b31543c40d54 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -509,11 +509,8 @@ static bool zram_same_page_write(struct zram *zram, u32 index,
> if (page_same_filled(mem, &element)) {
> kunmap_atomic(mem);
> /* Free memory associated with this sector now. */
> - zram_slot_lock(zram, index);
> - zram_free_page(zram, index);
> zram_set_flag(zram, index, ZRAM_SAME);
> zram_set_element(zram, index, element);
> - zram_slot_unlock(zram, index);
>
> atomic64_inc(&zram->stats.same_pages);
> return true;
> @@ -778,7 +775,7 @@ static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
>
> static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
> {
> - int ret;
> + int ret = 0;
> struct zram_entry *entry;
> unsigned int comp_len;
> void *src, *dst;
> @@ -786,12 +783,20 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
> struct page *page = bvec->bv_page;
> u32 checksum;
>
> + /*
> + * Free memory associated with this sector
> + * before overwriting unused sectors.
> + */
> + zram_slot_lock(zram, index);
> + zram_free_page(zram, index);
Hmm, zram_free should happen only if the write is done successfully.
Otherwise, we lose the valid data although write IO was fail.
> +
> if (zram_same_page_write(zram, index, page))
> - return 0;
> + goto out_unlock;
>
> entry = zram_dedup_find(zram, page, &checksum);
> if (entry) {
> comp_len = entry->len;
> + zram_set_flag(zram, index, ZRAM_DUP);
In case of hitting dedup, we shouldn't increase compr_data_size.
If we fix above two problems, do you think it's still cleaner?
(I don't mean to be reluctant with your suggestion. Just a
real question to know your thought.:)
> goto found_dup;
> }
>
> @@ -799,7 +804,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
> ret = zram_compress(zram, &zstrm, page, &entry, &comp_len);
> if (ret) {
> zcomp_stream_put(zram->comp);
> - return ret;
> + goto out_unlock;
> }
>
> dst = zs_map_object(zram->mem_pool,
> @@ -817,20 +822,16 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
> zram_dedup_insert(zram, entry, checksum);
>
> found_dup:
> - /*
> - * Free memory associated with this sector
> - * before overwriting unused sectors.
> - */
> - zram_slot_lock(zram, index);
> - zram_free_page(zram, index);
> zram_set_entry(zram, index, entry);
> zram_set_obj_size(zram, index, comp_len);
> - zram_slot_unlock(zram, index);
>
> /* Update stats */
> atomic64_add(comp_len, &zram->stats.compr_data_size);
> atomic64_inc(&zram->stats.pages_stored);
> - return 0;
> +
> +out_unlock:
> + zram_slot_unlock(zram, index);
> + return ret;
> }
>
> ---
>
>
> namely,
> that zram_compress() error return path from __zram_bvec_write().
>
> currently, we touch the existing compressed object and overwrite it only
> when we successfully compressed a new object. when zram_compress() fails
> we propagate the error, but never touch the old object. so all reads that
> could hit that index later will read stale data. and probably it would
> make more sense to fail those reads as well; IOW to free the old page
> regardless zram_compress() progress.
>
> what do you think?
>
> -ss
Powered by blists - more mailing lists