lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Date:   Tue, 29 Nov 2022 08:40:33 -0800
From:   Rob Clark <robdclark@...il.com>
To:     Dmitry Osipenko <dmitry.osipenko@...labora.com>
Cc:     David Airlie <airlied@...il.com>,
        Gerd Hoffmann <kraxel@...hat.com>,
        Gurchetan Singh <gurchetansingh@...omium.org>,
        Chia-I Wu <olvaffe@...il.com>, Daniel Vetter <daniel@...ll.ch>,
        Daniel Almeida <daniel.almeida@...labora.com>,
        Gustavo Padovan <gustavo.padovan@...labora.com>,
        Daniel Stone <daniel@...ishbar.org>,
        Tomeu Vizoso <tomeu.vizoso@...labora.com>,
        Maarten Lankhorst <maarten.lankhorst@...ux.intel.com>,
        Maxime Ripard <mripard@...nel.org>,
        Thomas Zimmermann <tzimmermann@...e.de>,
        Sumit Semwal <sumit.semwal@...aro.org>,
        Christian König <christian.koenig@....com>,
        Qiang Yu <yuq825@...il.com>,
        Steven Price <steven.price@....com>,
        Alyssa Rosenzweig <alyssa.rosenzweig@...labora.com>,
        Rob Herring <robh@...nel.org>, Sean Paul <sean@...rly.run>,
        Dmitry Baryshkov <dmitry.baryshkov@...aro.org>,
        Abhinav Kumar <quic_abhinavk@...cinc.com>,
        dri-devel@...ts.freedesktop.org, linux-kernel@...r.kernel.org,
        kernel@...labora.com, virtualization@...ts.linux-foundation.org
Subject: Re: [PATCH v9 01/11] drm/msm/gem: Prevent blocking within shrinker loop

On Tue, Nov 22, 2022 at 7:00 PM Dmitry Osipenko
<dmitry.osipenko@...labora.com> wrote:
>
> Consider this scenario:
>
> 1. APP1 continuously creates lots of small GEMs
> 2. APP2 triggers `drop_caches`
> 3. Shrinker starts to evict APP1 GEMs, while APP1 produces new purgeable
>    GEMs
> 4. msm_gem_shrinker_scan() returns non-zero number of freed pages
>    and causes shrinker to try shrink more
> 5. msm_gem_shrinker_scan() returns non-zero number of freed pages again,
>    goto 4
> 6. The APP2 is blocked in `drop_caches` until APP1 stops producing
>    purgeable GEMs
>
> To prevent this blocking scenario, check number of remaining pages
> that GPU shrinker couldn't release due to a GEM locking contention
> or shrinking rejection. If there are no remaining pages left to shrink,
> then there is no need to free up more pages and shrinker may break out
> from the loop.
>
> This problem was found during shrinker/madvise IOCTL testing of
> virtio-gpu driver. The MSM driver is affected in the same way.
>
> Signed-off-by: Dmitry Osipenko <dmitry.osipenko@...labora.com>

Reviewed-by: Rob Clark <robdclark@...il.com>

> ---
>  drivers/gpu/drm/drm_gem.c              | 9 +++++++--
>  drivers/gpu/drm/msm/msm_gem_shrinker.c | 8 ++++++--
>  include/drm/drm_gem.h                  | 4 +++-
>  3 files changed, 16 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
> index b8db675e7fb5..299bca1390aa 100644
> --- a/drivers/gpu/drm/drm_gem.c
> +++ b/drivers/gpu/drm/drm_gem.c
> @@ -1375,10 +1375,13 @@ EXPORT_SYMBOL(drm_gem_lru_move_tail);
>   *
>   * @lru: The LRU to scan
>   * @nr_to_scan: The number of pages to try to reclaim
> + * @remaining: The number of pages left to reclaim
>   * @shrink: Callback to try to shrink/reclaim the object.
>   */
>  unsigned long
> -drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
> +drm_gem_lru_scan(struct drm_gem_lru *lru,
> +                unsigned int nr_to_scan,
> +                unsigned long *remaining,
>                  bool (*shrink)(struct drm_gem_object *obj))
>  {
>         struct drm_gem_lru still_in_lru;
> @@ -1417,8 +1420,10 @@ drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
>                  * hit shrinker in response to trying to get backing pages
>                  * for this obj (ie. while it's lock is already held)
>                  */
> -               if (!dma_resv_trylock(obj->resv))
> +               if (!dma_resv_trylock(obj->resv)) {
> +                       *remaining += obj->size >> PAGE_SHIFT;
>                         goto tail;
> +               }
>
>                 if (shrink(obj)) {
>                         freed += obj->size >> PAGE_SHIFT;
> diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
> index 1de14e67f96b..4c8b0ab61ce4 100644
> --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
> +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
> @@ -116,12 +116,14 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>         };
>         long nr = sc->nr_to_scan;
>         unsigned long freed = 0;
> +       unsigned long remaining = 0;
>
>         for (unsigned i = 0; (nr > 0) && (i < ARRAY_SIZE(stages)); i++) {
>                 if (!stages[i].cond)
>                         continue;
>                 stages[i].freed =
> -                       drm_gem_lru_scan(stages[i].lru, nr, stages[i].shrink);
> +                       drm_gem_lru_scan(stages[i].lru, nr, &remaining,
> +                                        stages[i].shrink);
>                 nr -= stages[i].freed;
>                 freed += stages[i].freed;
>         }
> @@ -132,7 +134,7 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
>                                      stages[3].freed);
>         }
>
> -       return (freed > 0) ? freed : SHRINK_STOP;
> +       return (freed > 0 && remaining > 0) ? freed : SHRINK_STOP;
>  }
>
>  #ifdef CONFIG_DEBUG_FS
> @@ -182,10 +184,12 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
>                 NULL,
>         };
>         unsigned idx, unmapped = 0;
> +       unsigned long remaining = 0;
>
>         for (idx = 0; lrus[idx] && unmapped < vmap_shrink_limit; idx++) {
>                 unmapped += drm_gem_lru_scan(lrus[idx],
>                                              vmap_shrink_limit - unmapped,
> +                                            &remaining,
>                                              vmap_shrink);
>         }
>
> diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
> index a17c2f903f81..b46ade812443 100644
> --- a/include/drm/drm_gem.h
> +++ b/include/drm/drm_gem.h
> @@ -475,7 +475,9 @@ int drm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev,
>  void drm_gem_lru_init(struct drm_gem_lru *lru, struct mutex *lock);
>  void drm_gem_lru_remove(struct drm_gem_object *obj);
>  void drm_gem_lru_move_tail(struct drm_gem_lru *lru, struct drm_gem_object *obj);
> -unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru, unsigned nr_to_scan,
> +unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru,
> +                              unsigned int nr_to_scan,
> +                              unsigned long *remaining,
>                                bool (*shrink)(struct drm_gem_object *obj));
>
>  #endif /* __DRM_GEM_H__ */
> --
> 2.38.1
>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ