lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAGsJ_4zFfnwc1kstNO53gdeUzzon5_tamDcC-mTUUS_PQEjF0A@mail.gmail.com>
Date: Wed, 7 Aug 2024 21:48:24 +1200
From: Barry Song <21cnbao@...il.com>
To: David Hildenbrand <david@...hat.com>
Cc: akpm@...ux-foundation.org, linux-mm@...ck.org, justinjiang@...o.com, 
	chrisl@...nel.org, hughd@...gle.com, kaleshsingh@...gle.com, 
	kasong@...cent.com, linux-kernel@...r.kernel.org, ryan.roberts@....com, 
	v-songbaohua@...o.com, ying.huang@...el.com
Subject: Re: [PATCH v2 2/2] mm: attempt to batch free swap entries for zap_pte_range()

On Wed, Aug 7, 2024 at 9:29 PM David Hildenbrand <david@...hat.com> wrote:
>
> >   mm/swapfile.c | 78 +++++++++++++++++++++++++++++++++++++++++++--------
> >   1 file changed, 67 insertions(+), 11 deletions(-)
> >
> > diff --git a/mm/swapfile.c b/mm/swapfile.c
> > index 35cb58373493..25c3f98fa8d5 100644
> > --- a/mm/swapfile.c
> > +++ b/mm/swapfile.c
> > @@ -156,6 +156,25 @@ static bool swap_is_has_cache(struct swap_info_struct *si,
> >       return true;
> >   }
> >
> > +static bool swap_is_last_map(struct swap_info_struct *si,
> > +                           unsigned long offset, int nr_pages,
> > +                           bool *has_cache)
>
> Please use double tabs for indenting parameters on 2nd line on
> new/changed code:
>
>                 unsigned long offset, int nr_pages, bool *has_cache)
>
> Results in less churn when renaming functions and we can frequently
> avoid some lines.

ack.

>
> > +{
> > +     unsigned char *map = si->swap_map + offset;
> > +     unsigned char *map_end = map + nr_pages;
> > +     bool cached = false;
> > +
> > +     do {
> > +             if ((*map & ~SWAP_HAS_CACHE) != 1)
> > +                     return false;
> > +             if (*map & SWAP_HAS_CACHE)
> > +                     cached = true;
> > +     } while (++map < map_end);
> > +
> > +     *has_cache = cached;
> > +     return true;
> > +}
> > +
> >   /*
> >    * returns number of pages in the folio that backs the swap entry. If positive,
> >    * the folio was reclaimed. If negative, the folio was not reclaimed. If 0, no
> > @@ -1469,6 +1488,53 @@ static unsigned char __swap_entry_free(struct swap_info_struct *si,
> >       return usage;
> >   }
> >
> > +static bool __swap_entries_free(struct swap_info_struct *si,
> > +                             swp_entry_t entry, int nr)
>
> Dito.

ack.

>
> > +{
> > +     unsigned long offset = swp_offset(entry);
> > +     unsigned int type = swp_type(entry);
> > +     struct swap_cluster_info *ci;
> > +     bool has_cache = false;
> > +     unsigned char count;
> > +     bool can_batch;
> > +     int i;
> > +
> > +     if (nr <= 1 || swap_count(data_race(si->swap_map[offset])) != 1)
> > +             goto fallback;
> > +     /* cross into another cluster */
> > +     if (nr > SWAPFILE_CLUSTER - offset % SWAPFILE_CLUSTER)
> > +             goto fallback;
> > +
> > +     ci = lock_cluster_or_swap_info(si, offset);
> > +     can_batch = swap_is_last_map(si, offset, nr, &has_cache);
> > +     if (can_batch) {
> > +             for (i = 0; i < nr; i++)
> > +                     WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
> > +     }
> > +     unlock_cluster_or_swap_info(si, ci);
> > +
> > +     if (!can_batch)
> > +             goto fallback;
>
> I'd avoid "can_batch" and just do:
>
> ci = lock_cluster_or_swap_info(si, offset);
> if (!swap_is_last_map(si, offset, nr, &has_cache)) {
>         unlock_cluster_or_swap_info(si, ci);
>         goto fallback;
> }
> for (i = 0; i < nr; i++)
>         WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
> unlock_cluster_or_swap_info(si, ci);

ack.

>
> > +     if (!has_cache) {
> > +             spin_lock(&si->lock);
>
> I'm no expert on that code, but we might drop the cluster lock the take
> the swap_info lock and then retake the cluster lock. I assume there are
> no races we are worrying about here, right?

I suppose so. Even the original single-entry code follows the same pattern:

static unsigned char __swap_entry_free(struct swap_info_struct *p,
       swp_entry_t entry)
{
         struct swap_cluster_info *ci;
         unsigned long offset = swp_offset(entry);
         unsigned char usage;

         ci = lock_cluster_or_swap_info(p, offset);
         usage = __swap_entry_free_locked(p, offset, 1);
         unlock_cluster_or_swap_info(p, ci);
         if (!usage)
                  free_swap_slot(entry);

         return usage;
}

I assume that once we mark them as SWAP_HAS_CACHE, no one else
will touch them except ourselves.

>
> > +             swap_entry_range_free(si, entry, nr);
> > +             spin_unlock(&si->lock);
> > +     }
> > +     return has_cache;
> > +
> > +fallback:
> > +     for (i = 0; i  < nr; i++) {
>
> One space too much before the "<".

ack.

>
> > +             if (data_race(si->swap_map[offset + i])) {
> > +                     count = __swap_entry_free(si, swp_entry(type, offset + i));
> > +                     if (count == SWAP_HAS_CACHE)
> > +                             has_cache = true;
> > +             } else {
> > +                     WARN_ON_ONCE(1);
> > +             }
> > +     }
> > +     return has_cache;
> > +}
> > +
>
> --
> Cheers,
>
> David / dhildenb
>

Thanks
Barry

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ