lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <0646275c-0ab9-2fad-8db1-7098656d6e1d@intel.com>
Date:   Fri, 2 Dec 2022 14:33:14 -0800
From:   Dave Hansen <dave.hansen@...el.com>
To:     Kristen Carlson Accardi <kristen@...ux.intel.com>,
        jarkko@...nel.org, dave.hansen@...ux.intel.com, tj@...nel.org,
        linux-kernel@...r.kernel.org, linux-sgx@...r.kernel.org,
        cgroups@...r.kernel.org, Thomas Gleixner <tglx@...utronix.de>,
        Ingo Molnar <mingo@...hat.com>, Borislav Petkov <bp@...en8.de>,
        x86@...nel.org, "H. Peter Anvin" <hpa@...or.com>
Cc:     zhiquan1.li@...el.com, Sean Christopherson <seanjc@...gle.com>
Subject: Re: [PATCH v2 07/18] x86/sgx: Use a list to track to-be-reclaimed
 pages during reclaim

On 12/2/22 10:36, Kristen Carlson Accardi wrote:
> From: Sean Christopherson <sean.j.christopherson@...el.com>
> 
> Change sgx_reclaim_pages() to use a list rather than an array for
> storing the epc_pages which will be reclaimed. This change is needed
> to transition to the LRU implementation for EPC cgroup support.
> 
> This change requires keeping track of whether newly recorded
> EPC pages are pages for VA Arrays, or for Enclave data. In addition,
> helper functions are added to move pages from one list to another and
> enforce a consistent queue like behavior for the LRU lists.

More changelog nit: Please use imperative voice, not passive voice.
Move from:

	In addition, helper functions are added

to:

	In addition, add helper functions

> diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
> index 4683da9ef4f1..9ee306ac2a8e 100644
> --- a/arch/x86/kernel/cpu/sgx/encl.c
> +++ b/arch/x86/kernel/cpu/sgx/encl.c
> @@ -252,7 +252,7 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
>  		epc_page = sgx_encl_eldu(&encl->secs, NULL);
>  		if (IS_ERR(epc_page))
>  			return ERR_CAST(epc_page);
> -		sgx_record_epc_page(epc_page, 0);
> +		sgx_record_epc_page(epc_page, SGX_EPC_PAGE_ENCLAVE);
>  	}

This is one of those patches where the first hunk seems like it is
entirely disconnected from what the changelog made me expect I would see.

I don't see sgx_reclaim_pages(), or lists or arrays.

If you need to pass additional data down into a function, then do *that*
in a separate patch.

I'm glad it eventually got fixed up, but I don't really ever like to see
bare integers that don't have obvious meaning:

	sgx_record_epc_page(epc_page, 0);

Even if you had:

#define SGX_EPC_PAGE_RECLAIMER_UNTRACKED 0

	sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMER_UNTRACKED);

makes a *LOT* of sense compared to other callers that do

	sgx_record_epc_page(epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);

>  	epc_page = sgx_encl_eldu(entry, encl->secs.epc_page);
> @@ -260,7 +260,8 @@ static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
>  		return ERR_CAST(epc_page);
>  
>  	encl->secs_child_cnt++;
> -	sgx_record_epc_page(entry->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
> +	sgx_record_epc_page(entry->epc_page,
> +			    (SGX_EPC_PAGE_ENCLAVE | SGX_EPC_PAGE_RECLAIMER_TRACKED));
>  
>  	return entry;
>  }
> @@ -1221,7 +1222,7 @@ struct sgx_epc_page *sgx_alloc_va_page(struct sgx_encl *encl, bool reclaim)
>  		sgx_encl_free_epc_page(epc_page);
>  		return ERR_PTR(-EFAULT);
>  	}
> -	sgx_record_epc_page(epc_page, 0);
> +	sgx_record_epc_page(epc_page, SGX_EPC_PAGE_VERSION_ARRAY);
>  
>  	return epc_page;
>  }
> diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c
> index aca80a3f38a1..c3a9bffbc37e 100644
> --- a/arch/x86/kernel/cpu/sgx/ioctl.c
> +++ b/arch/x86/kernel/cpu/sgx/ioctl.c
> @@ -114,7 +114,7 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
>  	encl->attributes = secs->attributes;
>  	encl->attributes_mask = SGX_ATTR_DEBUG | SGX_ATTR_MODE64BIT | SGX_ATTR_KSS;
>  
> -	sgx_record_epc_page(encl->secs.epc_page, 0);
> +	sgx_record_epc_page(encl->secs.epc_page, SGX_EPC_PAGE_ENCLAVE);
>  
>  	/* Set only after completion, as encl->lock has not been taken. */
>  	set_bit(SGX_ENCL_CREATED, &encl->flags);
> @@ -325,7 +325,8 @@ static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
>  			goto err_out;
>  	}
>  
> -	sgx_record_epc_page(encl_page->epc_page, SGX_EPC_PAGE_RECLAIMER_TRACKED);
> +	sgx_record_epc_page(encl_page->epc_page,
> +			    (SGX_EPC_PAGE_ENCLAVE | SGX_EPC_PAGE_RECLAIMER_TRACKED));
>  	mutex_unlock(&encl->lock);
>  	mmap_read_unlock(current->mm);
>  	return ret;
> diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c
> index bad72498b0a7..83aaf5cea7b9 100644
> --- a/arch/x86/kernel/cpu/sgx/main.c
> +++ b/arch/x86/kernel/cpu/sgx/main.c
> @@ -288,37 +288,43 @@ static void sgx_reclaimer_write(struct sgx_epc_page *epc_page,
>   */
>  static void __sgx_reclaim_pages(void)
>  {
> -	struct sgx_epc_page *chunk[SGX_NR_TO_SCAN];
>  	struct sgx_backing backing[SGX_NR_TO_SCAN];
> +	struct sgx_epc_page *epc_page, *tmp;
>  	struct sgx_encl_page *encl_page;
> -	struct sgx_epc_page *epc_page;
>  	pgoff_t page_index;
> -	int cnt = 0;
> +	LIST_HEAD(iso);
>  	int ret;
>  	int i;
>  
>  	spin_lock(&sgx_global_lru.lock);
>  	for (i = 0; i < SGX_NR_TO_SCAN; i++) {
> -		epc_page = sgx_epc_pop_reclaimable(&sgx_global_lru);
> +		epc_page = sgx_epc_peek_reclaimable(&sgx_global_lru);
>  		if (!epc_page)
>  			break;
>  
>  		encl_page = epc_page->encl_owner;
>  
> +		if (WARN_ON_ONCE(!(epc_page->flags & SGX_EPC_PAGE_ENCLAVE)))
> +			continue;
> +
>  		if (kref_get_unless_zero(&encl_page->encl->refcount) != 0) {
>  			epc_page->flags |= SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> -			chunk[cnt++] = epc_page;
> +			list_move_tail(&epc_page->list, &iso);
>  		} else {
> -			/* The owner is freeing the page. No need to add the
> -			 * page back to the list of reclaimable pages.
> +			/* The owner is freeing the page, remove it from the
> +			 * LRU list
>  			 */
>  			epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> +			list_del_init(&epc_page->list);
>  		}
>  	}
>  	spin_unlock(&sgx_global_lru.lock);
>  
> -	for (i = 0; i < cnt; i++) {
> -		epc_page = chunk[i];
> +	if (list_empty(&iso))
> +		return;
> +
> +	i = 0;
> +	list_for_each_entry_safe(epc_page, tmp, &iso, list) {
>  		encl_page = epc_page->encl_owner;
>  
>  		if (!sgx_reclaimer_age(epc_page))
> @@ -333,6 +339,7 @@ static void __sgx_reclaim_pages(void)
>  			goto skip;
>  		}
>  
> +		i++;
>  		encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED;
>  		mutex_unlock(&encl_page->encl->lock);
>  		continue;
> @@ -340,31 +347,25 @@ static void __sgx_reclaim_pages(void)
>  skip:
>  		spin_lock(&sgx_global_lru.lock);
>  		epc_page->flags &= ~SGX_EPC_PAGE_RECLAIM_IN_PROGRESS;
> -		sgx_epc_push_reclaimable(&sgx_global_lru, epc_page);
> +		sgx_epc_move_reclaimable(&sgx_global_lru, epc_page);
>  		spin_unlock(&sgx_global_lru.lock);
>  
>  		kref_put(&encl_page->encl->refcount, sgx_encl_release);
> -
> -		chunk[i] = NULL;
>  	}
>  
> -	for (i = 0; i < cnt; i++) {
> -		epc_page = chunk[i];
> -		if (epc_page)
> -			sgx_reclaimer_block(epc_page);
> -	}
> -
> -	for (i = 0; i < cnt; i++) {
> -		epc_page = chunk[i];
> -		if (!epc_page)
> -			continue;
> -
> +	list_for_each_entry(epc_page, &iso, list)
> +		sgx_reclaimer_block(epc_page);
> + 
> +	i = 0;
> +	list_for_each_entry_safe(epc_page, tmp, &iso, list) {
>  		encl_page = epc_page->encl_owner;
> -		sgx_reclaimer_write(epc_page, &backing[i]);
> +		sgx_reclaimer_write(epc_page, &backing[i++]);
>  
>  		kref_put(&encl_page->encl->refcount, sgx_encl_release);
>  		epc_page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
> -				     SGX_EPC_PAGE_RECLAIM_IN_PROGRESS);
> +				     SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
> +				     SGX_EPC_PAGE_ENCLAVE |
> +				     SGX_EPC_PAGE_VERSION_ARRAY);
>  
>  		sgx_free_epc_page(epc_page);
>  	}
> @@ -505,6 +506,7 @@ struct sgx_epc_page *__sgx_alloc_epc_page(void)
>  /**
>   * sgx_record_epc_page() - Add a page to the LRU tracking
>   * @page:	EPC page
> + * @flags:	Reclaim flags for the page.
>   *
>   * Mark a page with the specified flags and add it to the appropriate
>   * (un)reclaimable list.
> @@ -535,18 +537,19 @@ void sgx_record_epc_page(struct sgx_epc_page *page, unsigned long flags)
>  int sgx_drop_epc_page(struct sgx_epc_page *page)
>  {
>  	spin_lock(&sgx_global_lru.lock);
> -	if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) {
> -		/* The page is being reclaimed. */
> -		if (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS) {
> -			spin_unlock(&sgx_global_lru.lock);
> -			return -EBUSY;
> -		}
> -
> -		page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED;
> +	if ((page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) &&
> +	    (page->flags & SGX_EPC_PAGE_RECLAIM_IN_PROGRESS)) {
> +		spin_unlock(&sgx_global_lru.lock);
> +		return -EBUSY;
>  	}
>  	list_del(&page->list);
>  	spin_unlock(&sgx_global_lru.lock);
>  
> +	page->flags &= ~(SGX_EPC_PAGE_RECLAIMER_TRACKED |
> +			 SGX_EPC_PAGE_RECLAIM_IN_PROGRESS |
> +			 SGX_EPC_PAGE_ENCLAVE |
> +			 SGX_EPC_PAGE_VERSION_ARRAY);
> +
>  	return 0;
>  }
>  
> diff --git a/arch/x86/kernel/cpu/sgx/sgx.h b/arch/x86/kernel/cpu/sgx/sgx.h
> index 37d66bc6ca27..ec8d567cd975 100644
> --- a/arch/x86/kernel/cpu/sgx/sgx.h
> +++ b/arch/x86/kernel/cpu/sgx/sgx.h
> @@ -32,6 +32,8 @@
>  #define SGX_EPC_PAGE_KVM_GUEST		BIT(2)
>  /* page flag to indicate reclaim is in progress */
>  #define SGX_EPC_PAGE_RECLAIM_IN_PROGRESS BIT(3)
> +#define SGX_EPC_PAGE_ENCLAVE		BIT(4)
> +#define SGX_EPC_PAGE_VERSION_ARRAY	BIT(5)

Could you please spend some time to clearly document what each bit means?

> +static inline void __sgx_epc_page_list_move(struct list_head *list, struct sgx_epc_page *page)
> +{
> +	list_move_tail(&page->list, list);
> +}

I'm not sure I get the point of a helper like this.  Why not just have
the caller call list_move() directly?

>  /*
>   * Must be called with queue lock acquired
>   */
> @@ -157,6 +167,38 @@ static inline void sgx_epc_push_unreclaimable(struct sgx_epc_lru_lists *lrus,
>  	__sgx_epc_page_list_push(&(lrus)->unreclaimable, page);
>  }
>  
> +/*
> + * Must be called with queue lock acquired
> + */
> +static inline struct sgx_epc_page * __sgx_epc_page_list_peek(struct list_head *list)
> +{
> +	struct sgx_epc_page *epc_page;
> +
> +	if (list_empty(list))
> +		return NULL;
> +
> +	epc_page = list_first_entry(list, struct sgx_epc_page, list);
> +	return epc_page;
> +}

list_first_entry_or_null() perhaps?

> +static inline struct sgx_epc_page *
> +sgx_epc_peek_reclaimable(struct sgx_epc_lru_lists *lrus)
> +{
> +	return __sgx_epc_page_list_peek(&(lrus)->reclaimable);
> +}
> +
> +static inline void sgx_epc_move_reclaimable(struct sgx_epc_lru_lists *lru,
> +					    struct sgx_epc_page *page)
> +{
> +	__sgx_epc_page_list_move(&(lru)->reclaimable, page);
> +}
> +
> +static inline struct sgx_epc_page *
> +sgx_epc_peek_unreclaimable(struct sgx_epc_lru_lists *lrus)
> +{
> +	return __sgx_epc_page_list_peek(&(lrus)->unreclaimable);
> +}

In general, I'm not becoming more fond of these helpers as the series
goes along.  My worry is that they're an abstraction where we don't
*really* need one.  I don't seem them growing much functionality as the
series goes along.

I'll reserve judgement until the end though.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ