lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <AADFC41AFE54684AB9EE6CBC0274A5D1912F2C24@SHSMSX101.ccr.corp.intel.com>
Date:   Thu, 6 Sep 2018 03:11:50 +0000
From:   "Tian, Kevin" <kevin.tian@...el.com>
To:     Lu Baolu <baolu.lu@...ux.intel.com>,
        Joerg Roedel <joro@...tes.org>,
        "David Woodhouse" <dwmw2@...radead.org>
CC:     "Raj, Ashok" <ashok.raj@...el.com>,
        "Kumar, Sanjay K" <sanjay.k.kumar@...el.com>,
        "Pan, Jacob jun" <jacob.jun.pan@...el.com>,
        "Liu, Yi L" <yi.l.liu@...el.com>, "Sun, Yi Y" <yi.y.sun@...el.com>,
        "peterx@...hat.com" <peterx@...hat.com>,
        Jean-Philippe Brucker <jean-philippe.brucker@....com>,
        "iommu@...ts.linux-foundation.org" <iommu@...ts.linux-foundation.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Jacob Pan <jacob.jun.pan@...ux.intel.com>
Subject: RE: [PATCH v2 06/12] iommu/vt-d: Add second level page table
 interface

> From: Lu Baolu [mailto:baolu.lu@...ux.intel.com]
> Sent: Thursday, August 30, 2018 9:35 AM
> 
> This adds the interfaces to setup or tear down the structures
> for second level page table translations. This includes types
> of second level only translation and pass through.
> 
> Cc: Ashok Raj <ashok.raj@...el.com>
> Cc: Jacob Pan <jacob.jun.pan@...ux.intel.com>
> Cc: Kevin Tian <kevin.tian@...el.com>
> Cc: Liu Yi L <yi.l.liu@...el.com>
> Signed-off-by: Sanjay Kumar <sanjay.k.kumar@...el.com>
> Signed-off-by: Lu Baolu <baolu.lu@...ux.intel.com>
> Reviewed-by: Ashok Raj <ashok.raj@...el.com>
> ---
>  drivers/iommu/intel-iommu.c |   2 +-
>  drivers/iommu/intel-pasid.c | 246
> ++++++++++++++++++++++++++++++++++++
>  drivers/iommu/intel-pasid.h |   7 +
>  include/linux/intel-iommu.h |   3 +
>  4 files changed, 257 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
> index 562da10bf93e..de6b909bb47a 100644
> --- a/drivers/iommu/intel-iommu.c
> +++ b/drivers/iommu/intel-iommu.c
> @@ -1232,7 +1232,7 @@ static void iommu_set_root_entry(struct
> intel_iommu *iommu)
>  	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
>  }
> 
> -static void iommu_flush_write_buffer(struct intel_iommu *iommu)
> +void iommu_flush_write_buffer(struct intel_iommu *iommu)
>  {
>  	u32 val;
>  	unsigned long flag;
> diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c
> index d6e90cd5b062..edcea1d8b9fc 100644
> --- a/drivers/iommu/intel-pasid.c
> +++ b/drivers/iommu/intel-pasid.c
> @@ -9,6 +9,7 @@
> 
>  #define pr_fmt(fmt)	"DMAR: " fmt
> 
> +#include <linux/bitops.h>
>  #include <linux/dmar.h>
>  #include <linux/intel-iommu.h>
>  #include <linux/iommu.h>
> @@ -291,3 +292,248 @@ void intel_pasid_clear_entry(struct device *dev,
> int pasid)
> 
>  	pasid_clear_entry(pe);
>  }
> +
> +static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
> +{
> +	u64 old;
> +
> +	old = READ_ONCE(*ptr);
> +	WRITE_ONCE(*ptr, (old & ~mask) | bits);
> +}
> +
> +/*
> + * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
> + * PASID entry.
> + */
> +static inline void
> +pasid_set_domain_id(struct pasid_entry *pe, u64 value)
> +{
> +	pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value);
> +}
> +
> +/*
> + * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63)
> + * of a scalable mode PASID entry.
> + */
> +static inline void
> +pasid_set_address_root(struct pasid_entry *pe, u64 value)

is address_root too general? especially when the entry could contain both
1st level and 2nd level pointers.

> +{
> +	pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value);
> +}
> +
> +/*
> + * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID
> + * entry.
> + */
> +static inline void
> +pasid_set_address_width(struct pasid_entry *pe, u64 value)
> +{
> +	pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2);
> +}
> +
> +/*
> + * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8)
> + * of a scalable mode PASID entry.
> + */
> +static inline void
> +pasid_set_translation_type(struct pasid_entry *pe, u64 value)
> +{
> +	pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6);
> +}
> +
> +/*
> + * Enable fault processing by clearing the FPD(Fault Processing
> + * Disable) field (Bit 1) of a scalable mode PASID entry.
> + */
> +static inline void pasid_set_fault_enable(struct pasid_entry *pe)
> +{
> +	pasid_set_bits(&pe->val[0], 1 << 1, 0);
> +}
> +
> +/*
> + * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a
> + * scalable mode PASID entry.
> + */
> +static inline void pasid_set_sre(struct pasid_entry *pe)
> +{
> +	pasid_set_bits(&pe->val[2], 1 << 0, 1);
> +}
> +
> +/*
> + * Setup the P(Present) field (Bit 0) of a scalable mode PASID
> + * entry.
> + */
> +static inline void pasid_set_present(struct pasid_entry *pe)
> +{
> +	pasid_set_bits(&pe->val[0], 1 << 0, 1);
> +}

it's a long list and there could be more in the future. What about
defining some macro to simplify LOC, e.g.

#define PASID_SET(name, i, m, b)				\
static inline void pasid_set_name(struct pasid_entry *pe)	\
{								\
	pasid_set_bits(&pe->val[i], m, b);			\
}

PASID_SET(present, 0, 1<<0, 1);
PASID_SET(sre, 2, 1<<0, 1);
...

> +
> +/*
> + * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID
> + * entry.
> + */
> +static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value)
> +{
> +	pasid_set_bits(&pe->val[1], 1 << 23, value);
> +}
> +
> +static void
> +pasid_based_pasid_cache_invalidation(struct intel_iommu *iommu,
> +				     int did, int pasid)

pasid_cache_invalidation_with_pasid

> +{
> +	struct qi_desc desc;
> +
> +	desc.qw0 = QI_PC_DID(did) | QI_PC_PASID_SEL |
> QI_PC_PASID(pasid);
> +	desc.qw1 = 0;
> +	desc.qw2 = 0;
> +	desc.qw3 = 0;
> +
> +	qi_submit_sync(&desc, iommu);
> +}
> +
> +static void
> +pasid_based_iotlb_cache_invalidation(struct intel_iommu *iommu,
> +				     u16 did, u32 pasid)

iotlb_invalidation_with_pasid

> +{
> +	struct qi_desc desc;
> +
> +	desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) |
> +			QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) |
> QI_EIOTLB_TYPE;
> +	desc.qw1 = 0;
> +	desc.qw2 = 0;
> +	desc.qw3 = 0;
> +
> +	qi_submit_sync(&desc, iommu);
> +}
> +
> +static void
> +pasid_based_dev_iotlb_cache_invalidation(struct intel_iommu *iommu,
> +					 struct device *dev, int pasid)

devtlb_invalidation_with_pasid

> +{
> +	struct device_domain_info *info;
> +	u16 sid, qdep, pfsid;
> +
> +	info = dev->archdata.iommu;
> +	if (!info || !info->ats_enabled)
> +		return;
> +
> +	sid = info->bus << 8 | info->devfn;
> +	qdep = info->ats_qdep;
> +	pfsid = info->pfsid;
> +
> +	qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 -
> VTD_PAGE_SHIFT);
> +}
> +
> +static void tear_down_one_pasid_entry(struct intel_iommu *iommu,
> +				      struct device *dev, u16 did,
> +				      int pasid)
> +{
> +	struct pasid_entry *pte;

ptep

> +
> +	intel_pasid_clear_entry(dev, pasid);
> +
> +	if (!ecap_coherent(iommu->ecap)) {
> +		pte = intel_pasid_get_entry(dev, pasid);
> +		clflush_cache_range(pte, sizeof(*pte));
> +	}
> +
> +	pasid_based_pasid_cache_invalidation(iommu, did, pasid);
> +	pasid_based_iotlb_cache_invalidation(iommu, did, pasid);
> +
> +	/* Device IOTLB doesn't need to be flushed in caching mode. */
> +	if (!cap_caching_mode(iommu->cap))
> +		pasid_based_dev_iotlb_cache_invalidation(iommu, dev,
> pasid);

can you elaborate, or point to any spec reference?

> +}
> +
> +/*
> + * Set up the scalable mode pasid table entry for second only or
> + * passthrough translation type.
> + */
> +int intel_pasid_setup_second_level(struct intel_iommu *iommu,

second_level doesn't imply passthrough. what about intel_pasid_
setup_common, which is then invoked by SL or PT individually (
or even FL)?

> +				   struct dmar_domain *domain,
> +				   struct device *dev, int pasid,
> +				   bool pass_through)
> +{
> +	struct pasid_entry *pte;
> +	struct dma_pte *pgd;
> +	u64 pgd_val;
> +	int agaw;
> +	u16 did;
> +
> +	/*
> +	 * If hardware advertises no support for second level translation,
> +	 * we only allow pass through translation setup.
> +	 */
> +	if (!(ecap_slts(iommu->ecap) || pass_through)) {
> +		pr_err("No first level translation support on %s, only pass-

first->second

> through mode allowed\n",
> +		       iommu->name);
> +		return -EINVAL;
> +	}
> +
> +	/*
> +	 * Skip top levels of page tables for iommu which has less agaw

skip doesn't mean error

> +	 * than default. Unnecessary for PT mode.
> +	 */
> +	pgd = domain->pgd;
> +	if (!pass_through) {
> +		for (agaw = domain->agaw; agaw != iommu->agaw; agaw--)
> {
> +			pgd = phys_to_virt(dma_pte_addr(pgd));
> +			if (!dma_pte_present(pgd)) {
> +				dev_err(dev, "Invalid domain page table\n");
> +				return -EINVAL;
> +			}
> +		}
> +	}
> +	pgd_val = pass_through ? 0 : virt_to_phys(pgd);
> +	did = pass_through ? FLPT_DEFAULT_DID :
> +			domain->iommu_did[iommu->seq_id];
> +
> +	pte = intel_pasid_get_entry(dev, pasid);
> +	if (!pte) {
> +		dev_err(dev, "Failed to get pasid entry of PASID %d\n",
> pasid);
> +		return -ENODEV;
> +	}
> +
> +	pasid_clear_entry(pte);
> +	pasid_set_domain_id(pte, did);
> +
> +	if (!pass_through)
> +		pasid_set_address_root(pte, pgd_val);
> +
> +	pasid_set_address_width(pte, iommu->agaw);
> +	pasid_set_translation_type(pte, pass_through ? 4 : 2);
> +	pasid_set_fault_enable(pte);
> +	pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
> +
> +	/*
> +	 * Since it is a second level only translation setup, we should
> +	 * set SRE bit as well (addresses are expected to be GPAs).
> +	 */
> +	pasid_set_sre(pte);
> +	pasid_set_present(pte);
> +
> +	if (!ecap_coherent(iommu->ecap))
> +		clflush_cache_range(pte, sizeof(*pte));
> +
> +	if (cap_caching_mode(iommu->cap)) {
> +		pasid_based_pasid_cache_invalidation(iommu, did, pasid);
> +		pasid_based_iotlb_cache_invalidation(iommu, did, pasid);
> +	} else {
> +		iommu_flush_write_buffer(iommu);
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Tear down the scalable mode pasid table entry for second only or
> + * passthrough translation type.
> + */
> +void intel_pasid_tear_down_second_level(struct intel_iommu *iommu,
> +					struct dmar_domain *domain,
> +					struct device *dev, int pasid)
> +{
> +	u16 did = domain->iommu_did[iommu->seq_id];
> +
> +	tear_down_one_pasid_entry(iommu, dev, did, pasid);
> +}
> diff --git a/drivers/iommu/intel-pasid.h b/drivers/iommu/intel-pasid.h
> index 03c1612d173c..85b158a1826a 100644
> --- a/drivers/iommu/intel-pasid.h
> +++ b/drivers/iommu/intel-pasid.h
> @@ -49,5 +49,12 @@ struct pasid_table *intel_pasid_get_table(struct
> device *dev);
>  int intel_pasid_get_dev_max_id(struct device *dev);
>  struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid);
>  void intel_pasid_clear_entry(struct device *dev, int pasid);
> +int intel_pasid_setup_second_level(struct intel_iommu *iommu,
> +				   struct dmar_domain *domain,
> +				   struct device *dev, int pasid,
> +				   bool pass_through);
> +void intel_pasid_tear_down_second_level(struct intel_iommu *iommu,
> +					struct dmar_domain *domain,
> +					struct device *dev, int pasid);
> 
>  #endif /* __INTEL_PASID_H */
> diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h
> index 72aff482b293..d77d23dfd221 100644
> --- a/include/linux/intel-iommu.h
> +++ b/include/linux/intel-iommu.h
> @@ -115,6 +115,8 @@
>   * Extended Capability Register
>   */
> 
> +#define ecap_smpwc(e)		(((e) >> 48) & 0x1)
> +#define ecap_slts(e)		(((e) >> 46) & 0x1)
>  #define ecap_smts(e)		(((e) >> 43) & 0x1)
>  #define ecap_dit(e)		((e >> 41) & 0x1)
>  #define ecap_pasid(e)		((e >> 40) & 0x1)
> @@ -571,6 +573,7 @@ void free_pgtable_page(void *vaddr);
>  struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
>  int for_each_device_domain(int (*fn)(struct device_domain_info *info,
>  				     void *data), void *data);
> +void iommu_flush_write_buffer(struct intel_iommu *iommu);
> 
>  #ifdef CONFIG_INTEL_IOMMU_SVM
>  int intel_svm_init(struct intel_iommu *iommu);
> --
> 2.17.1

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ