lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20091127162918G.fujita.tomonori@lab.ntt.co.jp>
Date:	Fri, 27 Nov 2009 16:29:32 +0900
From:	FUJITA Tomonori <fujita.tomonori@....ntt.co.jp>
To:	yinghai@...nel.org
Cc:	fujita.tomonori@....ntt.co.jp, mingo@...e.hu,
	linux-kernel@...r.kernel.org
Subject: Re: [PATCH -tip] x86: fix iommu=soft boot option

On Wed, 25 Nov 2009 14:33:27 -0800
Yinghai Lu <yinghai@...nel.org> wrote:

> in aperture_64.c::gart_iommu_hole_init()
>         printk(KERN_INFO  "Checking aperture...\n");
> 
>         if (!fallback_aper_force)
>                 agp_aper_base = search_agp_bridge(&agp_aper_order, &valid_agp);
> 
>         fix = 0;
>         node = 0;
>         for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
>                 int bus;
>                 int dev_base, dev_limit;
> 
>                 bus = bus_dev_ranges[i].bus;
>                 dev_base = bus_dev_ranges[i].dev_base;
>                 dev_limit = bus_dev_ranges[i].dev_limit;
> 
>                 for (slot = dev_base; slot < dev_limit; slot++) {
> 
>                         if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
>                                 continue;
> 
>                         iommu_detected = 1;
>                         gart_iommu_aperture = 1;
> 
>                         aper_order = (read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL) >> 1) & 7;
>                         aper_size = (32 * 1024 * 1024) << aper_order;
>                         aper_base = read_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE) & 0x7fff;
>                         aper_base <<= 25;
> 
>                         printk(KERN_INFO "Node %d: aperture @ %Lx size %u MB\n",
>                                         node, aper_base, aper_size >> 20);
>                         node++;
> 
>                         if (!aperture_valid(aper_base, aper_size, 64<<20)) {
>                                 if (valid_agp && agp_aper_base &&
>                                     agp_aper_base == aper_base &&
>                                     agp_aper_order == aper_order) {
>                                         /* the same between two setting from NB and agp */
>                                         if (!no_iommu &&
>                                             max_pfn > MAX_DMA32_PFN &&
>                                             !printed_gart_size_msg) {
>                                                 printk(KERN_ERR "you are using iommu with agp, but GART size is less than 64M\n");
>                                                 printk(KERN_ERR "please increase GART size in your BIOS setup\n");
>                                                 printk(KERN_ERR "if BIOS doesn't have that option, contact your HW vendor!\n");
>                                                 printed_gart_size_msg = 1;
>                                         }
>                                 } else {
> POINT A:
>                                         fix = 1;
>                                         goto out;
>                                 }
>                         }
> 
>                         if ((last_aper_order && aper_order != last_aper_order) ||
>                             (last_aper_base && aper_base != last_aper_base)) {
>                                 fix = 1;
>                                 goto out;
>                         }
>                         last_aper_order = aper_order;
>                         last_aper_base = aper_base;
>                 }
>         }
> 
> out:
>         if (!fix && !fallback_aper_force) {
>                 if (last_aper_base) {
>                         unsigned long n = (32 * 1024 * 1024) << last_aper_order;
> 
>                         insert_aperture_resource((u32)last_aper_base, n);
>                 }
>                 return;
>         }
> 
>         if (!fallback_aper_force) {
>                 aper_alloc = agp_aper_base;
>                 aper_order = agp_aper_order;
>         }
> 
>         if (aper_alloc) {
>                 /* Got the aperture from the AGP bridge */
>         } else if (swiotlb && !valid_agp) {
> POINT: B
>                 /* Do nothing */
>         } else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
>                    force_iommu ||
>                    valid_agp ||
>                    fallback_aper_force) {
> POINT: C
>                 printk(KERN_INFO
>                         "Your BIOS doesn't leave a aperture memory hole\n");
>                 printk(KERN_INFO
>                         "Please enable the IOMMU option in the BIOS setup\n");
>                 printk(KERN_INFO
>                         "This costs you %d MB of RAM\n",
>                                 32 << fallback_aper_order);
> 
>                 aper_order = fallback_aper_order;
>                 aper_alloc = allocate_aperture();
>                 if (!aper_alloc) {
>                         /*
>                          * Could disable AGP and IOMMU here, but it's
>                          * probably not worth it. But the later users
>                          * cannot deal with bad apertures and turning
>                          * on the aperture over memory causes very
>                          * strange problems, so it's better to panic
>                          * early.
>                          */
>                         panic("Not enough memory for aperture");
>                 }
>         } else {
>                 return;
>         }
> 
> POINT X:
> 
>         /* Fix up the north bridges */
>         for (i = 0; i < ARRAY_SIZE(bus_dev_ranges); i++) {
>                 int bus;
>                 int dev_base, dev_limit;
> 
>                 bus = bus_dev_ranges[i].bus;
>                 dev_base = bus_dev_ranges[i].dev_base;
>                 dev_limit = bus_dev_ranges[i].dev_limit;
>                 for (slot = dev_base; slot < dev_limit; slot++) {
>                         if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
>                                 continue;
> 
>                         /* Don't enable translation yet. That is done later.
>                            Assume this BIOS didn't initialise the GART so
>                            just overwrite all previous bits */
>                         write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, aper_order << 1);
>                         write_pci_config(bus, slot, 3, AMD64_GARTAPERTUREBASE, aper_alloc >> 25);
>                 }
>         }
> 
> when AMD64 mem > 4g, no AGP, BIOS set all gart iommu on all nodes size to 32M, and enable bit are set.

I have such machine (with sane BIOS).

But if BIOS is broken, early_gart_iommu_check() disables GART_EN bit
(bits 0)?

> 1. iommu=soft, will go through POINT A and POINT B

Not always. if aperture_valid() is true, it doesn't go POINT A. My
GART machine doesn't go.


> 2. no "iommu=soft", will go through POINT A and POINT C

As I said, it's not true about POINT A.


> and all will reach POINT X to make sure ENABLE bit is not set.

POINT X doesn't look make sure ENABLE bit is not set. It just fixes up
(sets) the address and its size.

And with 2.6.31, if I use iommu=soft, my GART machine uses swiotlb but
it still keeps GART_EN bit enabled.

So for what does 'iommu=soft' go to POINT B and POINT X? That is, why
we can't skip them with 'iommu=soft'?


> please check
> 
> [PATCH] x86: fix gart iommu using for amd 64 bit system -v3
> 
> after
> |commit 75f1cdf1dda92cae037ec848ae63690d91913eac
> |Author: FUJITA Tomonori <fujita.tomonori@....ntt.co.jp>
> |Date:   Tue Nov 10 19:46:20 2009 +0900
> |
> |    x86: Handle HW IOMMU initialization failure gracefully
> |    
> |    If HW IOMMU initialization fails (Intel VT-d often does this,
> |    typically due to BIOS bugs), we fall back to nommu. It doesn't
> |    work for the majority since nowadays we have more than 4GB
> |    memory so we must use swiotlb instead of nommu.
> ...
> 
> amd 64 systems that
> 1. do not have  AGP
> 2. do not have IOMMU
> 3. mem > 4g
> 4. BIOS do not allocate  correct gart in NB.
> will leave them to use SWIOTLB forcely.

What should such system use in this case?


> also in pci_iommu_alloc()
> pci_swiotlb_init is stealing the preallocate range that is for
> gart_iommu_hole workaround.
> 
> so restore the sequence...
> 
> will get back
> [    0.000000] Your BIOS doesn't leave a aperture memory hole
> [    0.000000] Please enable the IOMMU option in the BIOS setup
> [    0.000000] This costs you 64 MB of RAM
> [    0.000000] Mapping aperture over 65536 KB of RAM @ 20000000
> ...
> [   12.702822] calling  pci_iommu_init+0x0/0x31 @ 1
> [   12.708728] PCI-DMA: Disabling AGP.
> [   12.712812] PCI-DMA: aperture base @ 20000000 size 65536 KB
> [   12.718384] PCI-DMA: using GART IOMMU.
> [   12.722139] PCI-DMA: Reserving 64MB of IOMMU area in the AGP aperture
> [   12.736944] initcall pci_iommu_init+0x0/0x31 returned 0 after 28802 usecs
> 
> -v2: call shutdown when no agp is there
> -v3: add check_store to restore state for swiotlb
>      separate pci_swiotlb_detect and pci_swiotlb_init from FUJITA
> 
> Signed-off-by: Yinghai Lu <yinghai@...nel.org>
> 
> ---
>  arch/x86/include/asm/swiotlb.h |    8 ++++++--
>  arch/x86/kernel/aperture_64.c  |   17 ++++++++++++++---
>  arch/x86/kernel/pci-dma.c      |   11 +++++++++--
>  arch/x86/kernel/pci-gart_64.c  |    3 ++-
>  arch/x86/kernel/pci-swiotlb.c  |   11 +++++++----
>  5 files changed, 38 insertions(+), 12 deletions(-)
> 
> Index: linux-2.6/arch/x86/kernel/aperture_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/aperture_64.c
> +++ linux-2.6/arch/x86/kernel/aperture_64.c
> @@ -375,6 +375,9 @@ void __init gart_iommu_hole_init(void)
>  	u64 aper_base, last_aper_base = 0;
>  	int fix, slot, valid_agp = 0;
>  	int i, node;
> +	int iommu_detected_old = iommu_detected;
> +	int gart_iommu_aperture_old = gart_iommu_aperture;
> +	int (*iommu_init_old)(void) = x86_init.iommu.iommu_init;

This 'setting first and restoring later' code is hacky. It's better to
set only when we necessary.


>  	if (gart_iommu_aperture_disabled || !fix_aperture ||
>  	    !early_pci_allowed())
> @@ -448,7 +451,7 @@ out:
>  
>  			insert_aperture_resource((u32)last_aper_base, n);
>  		}
> -		return;
> +		goto check_restore;
>  	}
>  
>  	if (!fallback_aper_force) {
> @@ -458,7 +461,7 @@ out:
>  
>  	if (aper_alloc) {
>  		/* Got the aperture from the AGP bridge */
> -	} else if (!valid_agp) {
> +	} else if (swiotlb && !valid_agp) {
>  		/* Do nothing */
>  	} else if ((!no_iommu && max_pfn > MAX_DMA32_PFN) ||
>  		   force_iommu ||
> @@ -486,7 +489,7 @@ out:
>  			panic("Not enough memory for aperture");
>  		}
>  	} else {
> -		return;
> +		goto check_restore;
>  	}
>  
>  	/* Fix up the north bridges */
> @@ -510,4 +513,12 @@ out:
>  	}
>  
>  	set_up_gart_resume(aper_order, aper_alloc);
> +
> +check_restore:
> +	if (swiotlb) {
> +		/* restore the setting */
> +		iommu_detected = iommu_detected_old;
> +		gart_iommu_aperture = gart_iommu_aperture_old;
> +		x86_init.iommu.iommu_init = iommu_init_old;
> +	}
>  }
> Index: linux-2.6/arch/x86/kernel/pci-dma.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/pci-dma.c
> +++ linux-2.6/arch/x86/kernel/pci-dma.c
> @@ -120,21 +120,28 @@ static void __init dma32_free_bootmem(vo
>  
>  void __init pci_iommu_alloc(void)
>  {
> +	int ret;
> +
>  #ifdef CONFIG_X86_64
>  	/* free the range so iommu could get some range less than 4G */
>  	dma32_free_bootmem();
>  #endif
> -	if (pci_swiotlb_init())
> -		return;
> +	ret = pci_swiotlb_detect();
>  
>  	gart_iommu_hole_init();
>  
> +	if (ret)
> +		goto out;
> +
>  	detect_calgary();
>  
>  	detect_intel_iommu();
>  
>  	/* needs to be called after gart_iommu_hole_init */
>  	amd_iommu_detect();
> +
> +out:
> +	pci_swiotlb_init();
>  }
>  
>  void *dma_generic_alloc_coherent(struct device *dev, size_t size,
> Index: linux-2.6/arch/x86/kernel/pci-gart_64.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/pci-gart_64.c
> +++ linux-2.6/arch/x86/kernel/pci-gart_64.c
> @@ -710,7 +710,8 @@ static void gart_iommu_shutdown(void)
>  	struct pci_dev *dev;
>  	int i;
>  
> -	if (no_agp)
> +	/* don't shutdown it if there is AGP installed */
> +	if (!no_agp)
>  		return;
>  
>  	for (i = 0; i < num_k8_northbridges; i++) {
> Index: linux-2.6/arch/x86/include/asm/swiotlb.h
> ===================================================================
> --- linux-2.6.orig/arch/x86/include/asm/swiotlb.h
> +++ linux-2.6/arch/x86/include/asm/swiotlb.h
> @@ -5,13 +5,17 @@
>  
>  #ifdef CONFIG_SWIOTLB
>  extern int swiotlb;
> -extern int pci_swiotlb_init(void);
> +int pci_swiotlb_detect(void);
> +void pci_swiotlb_init(void);
>  #else
>  #define swiotlb 0
> -static inline int pci_swiotlb_init(void)
> +static inline int pci_swiotlb_detect(void)
>  {
>  	return 0;
>  }
> +static inline void pci_swiotlb_init(void)
> +{
> +}
>  #endif
>  
>  static inline void dma_mark_clean(void *addr, size_t size) {}
> Index: linux-2.6/arch/x86/kernel/pci-swiotlb.c
> ===================================================================
> --- linux-2.6.orig/arch/x86/kernel/pci-swiotlb.c
> +++ linux-2.6/arch/x86/kernel/pci-swiotlb.c
> @@ -43,12 +43,12 @@ static struct dma_map_ops swiotlb_dma_op
>  };
>  
>  /*
> - * pci_swiotlb_init - initialize swiotlb if necessary
> + * pci_swiotlb_detect - initialize swiotlb if necessary
>   *
>   * This returns non-zero if we are forced to use swiotlb (by the boot
>   * option).
>   */
> -int __init pci_swiotlb_init(void)
> +int __init pci_swiotlb_detect(void)
>  {
>  	int use_swiotlb = swiotlb | swiotlb_force;
>  
> @@ -60,10 +60,13 @@ int __init pci_swiotlb_init(void)
>  	if (swiotlb_force)
>  		swiotlb = 1;
>  
> +	return use_swiotlb;
> +}
> +
> +void __init pci_swiotlb_init(void)
> +{
>  	if (swiotlb) {
>  		swiotlb_init(0);
>  		dma_ops = &swiotlb_dma_ops;
>  	}
> -
> -	return use_swiotlb;
>  }
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ