lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 16 May 2017 11:36:15 +0900
From:   Sergey Senozhatsky <sergey.senozhatsky.work@...il.com>
To:     Minchan Kim <minchan@...nel.org>
Cc:     Sergey Senozhatsky <sergey.senozhatsky.work@...il.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        linux-kernel@...r.kernel.org, Joonsoo Kim <iamjoonsoo.kim@....com>,
        Sergey Senozhatsky <sergey.senozhatsky@...il.com>,
        kernel-team <kernel-team@....com>
Subject: Re: [PATCH 2/2] zram: do not count duplicated pages as compressed

Hello Minchan,

On (05/16/17 10:59), Minchan Kim wrote:
> Hi Sergey,
>

[..]
> You mean this?
> 
> static void zram_free_page(..) {
>         if (zram_test_flag(zram, index, ZRAM_SAME))
>                 ...
> 
>         if (!entry)
>                 return;
> 
>         if (zram_dedup_enabled(zram) && xxxx)) {
>                 zram_clear_flag(ZRAM_DUP);
>                 atomic64_sub(entry->len, &zram->stats.dup_data_size);
>         } else {
>                 atomic64_sub(zram_get_obj_size(zram, index),
>                                         &zram->stats.compr_dat_size);
>         }
> 
>         zram_entry_free
>         zram_set_entry
>         zram_set_obj_size
> }

yeah, something like this.

> > > @@ -794,7 +801,15 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
> > >  	entry = zram_dedup_find(zram, page, &checksum);
> > >  	if (entry) {
> > >  		comp_len = entry->len;
> > > -		goto found_dup;
> > > +		zram_slot_lock(zram, index);
> > > +		zram_free_page(zram, index);
> > > +		zram_set_flag(zram, index, ZRAM_DUP);
> > > +		zram_set_entry(zram, index, entry);
> > > +		zram_set_obj_size(zram, index, comp_len);
> > > +		zram_slot_unlock(zram, index);
> > > +		atomic64_add(comp_len, &zram->stats.dup_data_size);
> > > +		atomic64_inc(&zram->stats.pages_stored);
> > > +		return 0;
> > 
> > hm. that's a somewhat big code duplication. isn't it?
> 
> Yub. 3 parts. above part,  zram_same_page_write and tail of __zram_bvec_write.

hmm... good question... hardly can think of anything significantly
better, zram object handling is now a mix of flags, entries,
ref_counters, etc. etc. may be we can merge some of those ops, if we
would keep slot locked through the entire __zram_bvec_write(), but
that does not look attractive.

something ABSOLUTELY untested and incomplete. not even compile tested (!).
99% broken and stupid (!). but there is one thing that it has revealed, so
thus incomplete. see below:

---

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 372602c7da49..b31543c40d54 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -509,11 +509,8 @@ static bool zram_same_page_write(struct zram *zram, u32 index,
        if (page_same_filled(mem, &element)) {
                kunmap_atomic(mem);
                /* Free memory associated with this sector now. */
-               zram_slot_lock(zram, index);
-               zram_free_page(zram, index);
                zram_set_flag(zram, index, ZRAM_SAME);
                zram_set_element(zram, index, element);
-               zram_slot_unlock(zram, index);
 
                atomic64_inc(&zram->stats.same_pages);
                return true;
@@ -778,7 +775,7 @@ static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
 
 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
 {
-       int ret;
+       int ret = 0;
        struct zram_entry *entry;
        unsigned int comp_len;
        void *src, *dst;
@@ -786,12 +783,20 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
        struct page *page = bvec->bv_page;
        u32 checksum;
 
+       /*
+        * Free memory associated with this sector
+        * before overwriting unused sectors.
+        */
+       zram_slot_lock(zram, index);
+       zram_free_page(zram, index);
+
        if (zram_same_page_write(zram, index, page))
-               return 0;
+               goto out_unlock;
 
        entry = zram_dedup_find(zram, page, &checksum);
        if (entry) {
                comp_len = entry->len;
+               zram_set_flag(zram, index, ZRAM_DUP);
                goto found_dup;
        }
 
@@ -799,7 +804,7 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
        ret = zram_compress(zram, &zstrm, page, &entry, &comp_len);
        if (ret) {
                zcomp_stream_put(zram->comp);
-               return ret;
+               goto out_unlock;
        }
 
        dst = zs_map_object(zram->mem_pool,
@@ -817,20 +822,16 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
        zram_dedup_insert(zram, entry, checksum);
 
 found_dup:
-       /*
-        * Free memory associated with this sector
-        * before overwriting unused sectors.
-        */
-       zram_slot_lock(zram, index);
-       zram_free_page(zram, index);
        zram_set_entry(zram, index, entry);
        zram_set_obj_size(zram, index, comp_len);
-       zram_slot_unlock(zram, index);
 
        /* Update stats */
        atomic64_add(comp_len, &zram->stats.compr_data_size);
        atomic64_inc(&zram->stats.pages_stored);
-       return 0;
+
+out_unlock:
+       zram_slot_unlock(zram, index);
+       return ret;
 }

---


namely,
that zram_compress() error return path from __zram_bvec_write().

currently, we touch the existing compressed object and overwrite it only
when we successfully compressed a new object. when zram_compress() fails
we propagate the error, but never touch the old object. so all reads that
could hit that index later will read stale data. and probably it would
make more sense to fail those reads as well; IOW to free the old page
regardless zram_compress() progress.

what do you think?

	-ss

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ