linux-kernel - Re: [PATCH] habanalabs: handle MMU cache invalidation timeout

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <CAFCwf11T=xe3qqYFCC3Ma3ukff2OizDoUWR03Lhs7Wpp6zdN6A@mail.gmail.com>
Date:   Mon, 25 May 2020 08:19:05 +0300
From:   Oded Gabbay <oded.gabbay@...il.com>
To:     Omer Shpigelman <oshpigelman@...ana.ai>
Cc:     "Linux-Kernel@...r. Kernel. Org" <linux-kernel@...r.kernel.org>
Subject: Re: [PATCH] habanalabs: handle MMU cache invalidation timeout

On Sun, May 24, 2020 at 11:07 PM Omer Shpigelman <oshpigelman@...ana.ai> wrote:
>
> MMU cache invalidation timeout indicates that the device is unstable and
> therefore unusable.
> Hence in such case do hard reset and return an error to the user if was
> called from ioctl.
> In addition, change the print to error level and rephrase its text.
>
> Signed-off-by: Omer Shpigelman <oshpigelman@...ana.ai>
> ---
>  drivers/misc/habanalabs/gaudi/gaudi.c | 36 ++++++++++++++++-----------
>  drivers/misc/habanalabs/goya/goya.c   | 34 +++++++++++++++----------
>  drivers/misc/habanalabs/habanalabs.h  |  8 +++---
>  drivers/misc/habanalabs/memory.c      | 35 ++++++++++++++++++++------
>  4 files changed, 75 insertions(+), 38 deletions(-)
>
> diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
> index 92a5130f06fb..61f88e9884ce 100644
> --- a/drivers/misc/habanalabs/gaudi/gaudi.c
> +++ b/drivers/misc/habanalabs/gaudi/gaudi.c
> @@ -5975,7 +5975,7 @@ static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate,
>         return gaudi->events_stat;
>  }
>
> -static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
> +static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
>                                         u32 flags)
>  {
>         struct gaudi_device *gaudi = hdev->asic_specific;
> @@ -5984,15 +5984,15 @@ static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
>
>         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
>                 hdev->hard_reset_pending)
> -               return;
> -
> -       mutex_lock(&hdev->mmu_cache_lock);
> +               return 0;
>
>         if (hdev->pldm)
>                 timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
>         else
>                 timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
>
> +       mutex_lock(&hdev->mmu_cache_lock);
> +
>         /* L0 & L1 invalidation */
>         WREG32(mmSTLB_INV_PS, 2);
>
> @@ -6006,14 +6006,18 @@ static void gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
>
>         WREG32(mmSTLB_INV_SET, 0);
>
> -       if (rc)
> -               dev_notice_ratelimited(hdev->dev,
> -                       "Timeout when waiting for MMU cache invalidation\n");
> -
>         mutex_unlock(&hdev->mmu_cache_lock);
> +
> +       if (rc) {
> +               dev_err_ratelimited(hdev->dev,
> +                                       "MMU cache invalidation timeout\n");
> +               hl_device_reset(hdev, true, false);
> +       }
> +
> +       return rc;
>  }
>
> -static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
> +static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
>                                 bool is_hard, u32 asid, u64 va, u64 size)
>  {
>         struct gaudi_device *gaudi = hdev->asic_specific;
> @@ -6024,7 +6028,7 @@ static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
>
>         if (!(gaudi->hw_cap_initialized & HW_CAP_MMU) ||
>                 hdev->hard_reset_pending)
> -               return;
> +               return 0;
>
>         mutex_lock(&hdev->mmu_cache_lock);
>
> @@ -6055,11 +6059,15 @@ static void gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
>                 1000,
>                 timeout_usec);
>
> -       if (rc)
> -               dev_notice_ratelimited(hdev->dev,
> -                       "Timeout when waiting for MMU cache invalidation\n");
> -
>         mutex_unlock(&hdev->mmu_cache_lock);
> +
> +       if (rc) {
> +               dev_err_ratelimited(hdev->dev,
> +                                       "MMU cache invalidation timeout\n");
> +               hl_device_reset(hdev, true, false);
> +       }
> +
> +       return rc;
>  }
>
>  static int gaudi_mmu_update_asid_hop0_addr(struct hl_device *hdev,
> diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
> index 152418dfe20c..0d2952bb58df 100644
> --- a/drivers/misc/habanalabs/goya/goya.c
> +++ b/drivers/misc/habanalabs/goya/goya.c
> @@ -4884,7 +4884,7 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
>                 goya_mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
>  }
>
> -static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
> +static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
>                                         u32 flags)
>  {
>         struct goya_device *goya = hdev->asic_specific;
> @@ -4893,11 +4893,11 @@ static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
>
>         if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
>                 hdev->hard_reset_pending)
> -               return;
> +               return 0;
>
>         /* no need in L1 only invalidation in Goya */
>         if (!is_hard)
> -               return;
> +               return 0;
>
>         if (hdev->pldm)
>                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
> @@ -4919,13 +4919,17 @@ static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
>
>         mutex_unlock(&hdev->mmu_cache_lock);
>
> -       if (rc)
> -               dev_notice_ratelimited(hdev->dev,
> -                       "Timeout when waiting for MMU cache invalidation\n");
> +       if (rc) {
> +               dev_err_ratelimited(hdev->dev,
> +                                       "MMU cache invalidation timeout\n");
> +               hl_device_reset(hdev, true, false);
> +       }
> +
> +       return rc;
>  }
>
> -static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
> -               bool is_hard, u32 asid, u64 va, u64 size)
> +static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
> +                               bool is_hard, u32 asid, u64 va, u64 size)
>  {
>         struct goya_device *goya = hdev->asic_specific;
>         u32 status, timeout_usec, inv_data, pi;
> @@ -4933,11 +4937,11 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
>
>         if (!(goya->hw_cap_initialized & HW_CAP_MMU) ||
>                 hdev->hard_reset_pending)
> -               return;
> +               return 0;
>
>         /* no need in L1 only invalidation in Goya */
>         if (!is_hard)
> -               return;
> +               return 0;
>
>         if (hdev->pldm)
>                 timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
> @@ -4970,9 +4974,13 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
>
>         mutex_unlock(&hdev->mmu_cache_lock);
>
> -       if (rc)
> -               dev_notice_ratelimited(hdev->dev,
> -                       "Timeout when waiting for MMU cache invalidation\n");
> +       if (rc) {
> +               dev_err_ratelimited(hdev->dev,
> +                                       "MMU cache invalidation timeout\n");
> +               hl_device_reset(hdev, true, false);
> +       }
> +
> +       return rc;
>  }
>
>  int goya_send_heartbeat(struct hl_device *hdev)
> diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
> index 0f0691875298..1ecdcf8b763a 100644
> --- a/drivers/misc/habanalabs/habanalabs.h
> +++ b/drivers/misc/habanalabs/habanalabs.h
> @@ -675,9 +675,9 @@ struct hl_asic_funcs {
>                                 u32 *size);
>         u64 (*read_pte)(struct hl_device *hdev, u64 addr);
>         void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val);
> -       void (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
> +       int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
>                                         u32 flags);
> -       void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
> +       int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
>                         u32 asid, u64 va, u64 size);
>         int (*send_heartbeat)(struct hl_device *hdev);
>         void (*enable_clock_gating)(struct hl_device *hdev);
> @@ -755,8 +755,8 @@ struct hl_va_range {
>   *                      with huge pages.
>   * @dram_va_range: holds available virtual addresses for DRAM mappings.
>   * @mem_hash_lock: protects the mem_hash.
> - * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifing the
> - *            MMU hash or walking the PGT requires talking this lock
> + * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
> + *            MMU hash or walking the PGT requires talking this lock.
>   * @debugfs_list: node in debugfs list of contexts.
>   * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
>   *                     to user so user could inquire about CS. It is used as
> diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
> index a72f766ca470..4b8eed1ca513 100644
> --- a/drivers/misc/habanalabs/memory.c
> +++ b/drivers/misc/habanalabs/memory.c
> @@ -886,6 +886,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
>
>                 vm_type = (enum vm_type_t *) userptr;
>                 hint_addr = args->map_host.hint_addr;
> +               handle = phys_pg_pack->handle;
>         } else {
>                 handle = lower_32_bits(args->map_device.handle);
>
> @@ -954,10 +955,17 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
>                 goto map_err;
>         }
>
> -       hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
> +       rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, false, *vm_type);
>
>         mutex_unlock(&ctx->mmu_lock);
>
> +       if (rc) {
> +               dev_err(hdev->dev,
> +                       "mapping handle %u failed due to MMU cache invalidation\n",
> +                       handle);
> +               goto map_err;
> +       }
> +
>         ret_vaddr += phys_pg_pack->offset;
>
>         hnode->ptr = vm_type;
> @@ -1083,21 +1091,34 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
>          * at the loop end rather than for each iteration
>          */
>         if (!ctx_free)
> -               hdev->asic_funcs->mmu_invalidate_cache(hdev, true, *vm_type);
> +               rc = hdev->asic_funcs->mmu_invalidate_cache(hdev, true,
> +                                                               *vm_type);
>
>         mutex_unlock(&ctx->mmu_lock);
>
>         /*
> -        * No point in maintaining the free VA block list if the context is
> -        * closing as the list will be freed anyway
> +        * If the context is closing we don't need to check for the MMU cache
> +        * invalidation return code and update the VA free list as in this flow
> +        * we invalidate the MMU cache outside of this unmap function and the VA
> +        * free list will be freed anyway.
>          */
>         if (!ctx_free) {
> -               rc = add_va_block(hdev, va_range, vaddr,
> -                                       vaddr + phys_pg_pack->total_size - 1);
> +               int tmp_rc;
> +
>                 if (rc)
> +                       dev_err(hdev->dev,
> +                               "unmapping vaddr 0x%llx failed due to MMU cache invalidation\n",
> +                               vaddr);
> +
> +               tmp_rc = add_va_block(hdev, va_range, vaddr,
> +                                       vaddr + phys_pg_pack->total_size - 1);
> +               if (tmp_rc) {
>                         dev_warn(hdev->dev,
>                                         "add va block failed for vaddr: 0x%llx\n",
>                                         vaddr);
> +                       if (!rc)
> +                               rc = tmp_rc;
> +               }
>         }
>
>         atomic_dec(&phys_pg_pack->mapping_cnt);
> @@ -1108,7 +1129,7 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
>                 dma_unmap_host_va(hdev, userptr);
>         }
>
> -       return 0;
> +       return rc;
>
>  mapping_cnt_err:
>         if (is_userptr)
> --
> 2.17.1
>

This patch is:
Reviewed-by: Oded Gabbay <oded.gabbay@...il.com>