[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20221109223257.GB34247@hpe.com>
Date: Wed, 9 Nov 2022 16:32:57 -0600
From: Dimitri Sivanich <sivanich@....com>
To: Zheng Wang <zyytlz.wz@....com>
Cc: gregkh@...uxfoundation.org, zhengyejian1@...wei.com,
dimitri.sivanich@....com, arnd@...db.de,
linux-kernel@...r.kernel.org, hackerzheng666@...il.com,
alex000young@...il.com, security@...nel.org, lkp@...el.com
Subject: Re: [PATCH v9] misc: sgi-gru: fix use-after-free error in
gru_set_context_option, gru_fault and gru_handle_user_call_os
On Wed, Nov 09, 2022 at 10:17:01PM +0800, Zheng Wang wrote:
> In some bad situation, the gts may be freed gru_check_chiplet_assignment.
> The call chain can be gru_unload_context->gru_free_gru_context->gts_drop
> and kfree finally. However, the caller didn't know if the gts is freed
> or not and use it afterwards. This will trigger a Use after Free bug.
>
> Fix it by introducing a return value to see if it's in error path or not.
> Free the gts in caller if gru_check_chiplet_assignment check failed.
>
> Fixes: 55484c45dbec ("gru: allow users to specify gru chiplet 2")
> Signed-off-by: Zheng Wang <zyytlz.wz@....com>
> Acked-by: Dimitri Sivanich <sivanich@....com>
> ---
> v9:
> - rewrite changelog and add comment in the code to make it more clear
>
> v8:
> - remove tested-by tag suggested by Greg
>
> v7:
> - fix some spelling problems suggested by Greg, change kernel test robot from reported-by tag to tested-by tag
>
> v6:
> - remove unused var checked by kernel test robot
>
> v5:
> - fix logical issue and remove unnecessary variable suggested by Dimitri Sivanich
>
> v4:
> - use VM_FAULT_NOPAGE as failure code in gru_fault and -EINVAL in other functions suggested by Yejian
>
> v3:
> - add preempt_enable and use VM_FAULT_NOPAGE as failure code suggested by Yejian
>
> v2:
> - commit message changes suggested by Greg
>
> v1: https://lore.kernel.org/lkml/CAJedcCzY72jqgF-pCPtx66vXXwdPn-KMagZnqrxcpWw1NxTLaA@mail.gmail.com/
> ---
> drivers/misc/sgi-gru/grufault.c | 14 ++++++++++++--
> drivers/misc/sgi-gru/grumain.c | 22 ++++++++++++++++++----
> drivers/misc/sgi-gru/grutables.h | 2 +-
> 3 files changed, 31 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
> index d7ef61e602ed..bdd515d33225 100644
> --- a/drivers/misc/sgi-gru/grufault.c
> +++ b/drivers/misc/sgi-gru/grufault.c
> @@ -656,7 +656,9 @@ int gru_handle_user_call_os(unsigned long cb)
> if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
> goto exit;
>
> - gru_check_context_placement(gts);
> + ret = gru_check_context_placement(gts);
Below we do not want to skip over the rest of the logic in either return case.
You will have to do the gru_find_lock_gts again after unloading the gru context,
then check the gts value, then return EINVAL if not set (same as earlier in the
function).
> + if (ret)
> + goto err;
>
> /*
> * CCH may contain stale data if ts_force_cch_reload is set.
> @@ -677,6 +679,10 @@ int gru_handle_user_call_os(unsigned long cb)
> exit:
> gru_unlock_gts(gts);
> return ret;
> +err:
> + gru_unlock_gts(gts);
> + gru_unload_context(gts, 1);
> + return -EINVAL;
> }
>
> /*
> @@ -874,7 +880,11 @@ int gru_set_context_option(unsigned long arg)
> } else {
> gts->ts_user_blade_id = req.val1;
> gts->ts_user_chiplet_id = req.val0;
> - gru_check_context_placement(gts);
> + if (gru_check_context_placement(gts)) {
> + gru_unlock_gts(gts);
> + gru_unload_context(gts, 1);
Looking at this again, I think we should return 0, as we originally would
have done in this case anyway.
> + return -EINVAL;
> + }
> }
> break;
> case sco_gseg_owner:
> diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
> index 6706ef3c5977..816def778078 100644
> --- a/drivers/misc/sgi-gru/grumain.c
> +++ b/drivers/misc/sgi-gru/grumain.c
> @@ -716,9 +716,10 @@ static int gru_check_chiplet_assignment(struct gru_state *gru,
> * chiplet. Misassignment can occur if the process migrates to a different
> * blade or if the user changes the selected blade/chiplet.
> */
> -void gru_check_context_placement(struct gru_thread_state *gts)
> +int gru_check_context_placement(struct gru_thread_state *gts)
> {
> struct gru_state *gru;
> + int ret = 0;
>
> /*
> * If the current task is the context owner, verify that the
> @@ -726,15 +727,23 @@ void gru_check_context_placement(struct gru_thread_state *gts)
> * references. Pthread apps use non-owner references to the CBRs.
> */
> gru = gts->ts_gru;
> + /*
> + * If gru or gts->ts_tgid_owner isn't initialized properly, return
> + * success is fine, for it's not a deadly error. The related variable
> + * can be reconfigure in other function.The caller is responsible
> + * for their inspection, and reinitialization if needed.
> + */
How about this instead?
"If gru or gts->ts_tgid_owner isn't initialized properly, return
success to indicate that the caller does not need to unload the gru
context. The caller is responsible for their inspection and
reinitialization if needed."
> if (!gru || gts->ts_tgid_owner != current->tgid)
> - return;
> + return ret;
>
> if (!gru_check_chiplet_assignment(gru, gts)) {
> STAT(check_context_unload);
> - gru_unload_context(gts, 1);
> + ret = -EINVAL;
> } else if (gru_retarget_intr(gts)) {
> STAT(check_context_retarget_intr);
> }
> +
> + return ret;
> }
>
>
> @@ -934,7 +943,12 @@ vm_fault_t gru_fault(struct vm_fault *vmf)
> mutex_lock(>s->ts_ctxlock);
> preempt_disable();
>
> - gru_check_context_placement(gts);
> + if (gru_check_context_placement(gts)) {
> + preempt_enable();
> + mutex_unlock(>s->ts_ctxlock);
> + gru_unload_context(gts, 1);
> + return VM_FAULT_NOPAGE;
> + }
>
> if (!gts->ts_gru) {
> STAT(load_user_context);
> diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
> index 8c52776db234..640daf1994df 100644
> --- a/drivers/misc/sgi-gru/grutables.h
> +++ b/drivers/misc/sgi-gru/grutables.h
> @@ -632,7 +632,7 @@ extern int gru_user_flush_tlb(unsigned long arg);
> extern int gru_user_unload_context(unsigned long arg);
> extern int gru_get_exception_detail(unsigned long arg);
> extern int gru_set_context_option(unsigned long address);
> -extern void gru_check_context_placement(struct gru_thread_state *gts);
> +extern int gru_check_context_placement(struct gru_thread_state *gts);
> extern int gru_cpu_fault_map_id(void);
> extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
> extern void gru_flush_all_tlb(struct gru_state *gru);
> --
> 2.25.1
Powered by blists - more mailing lists