When a hw iommu is detected during pci_iommu_alloc() it will disable the swiotlb setup. If the subsequent hw iommu initialization in pci_iommu_init() fails, the box may be left in an unusable state. The swiotlb is normally allocated early from bootmem to ensure a large (64M) contiguous allocation. This patch adds some logic to go ahead and allocate the swiotlb despite the presence of a hw iommu, and later free the swiotlb if it is not needed or enable it if it is. Because pci_iommu_init() is called after bootmem has been released to the page allocator, we use free_bootmem_late, a new mechanism for freeing pages directly back to the allocator. This patch relies on (iommu_detected && !dma_ops) being true as a way to see the failed hw iommu initialization. This will not work w/ AMD IOMMU in passthrough mode. https://bugzilla.redhat.com/show_bug.cgi?id=524808 Cc: David Woodhouse Cc: Joerg Roedel Signed-off-by: Chris Wright --- arch/x86/include/asm/swiotlb.h | 4 ++++ arch/x86/kernel/pci-dma.c | 4 +++- arch/x86/kernel/pci-swiotlb.c | 27 +++++++++++++++++++++------ include/linux/swiotlb.h | 3 +++ lib/swiotlb.c | 10 ++++++++++ 5 files changed, 41 insertions(+), 7 deletions(-) --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h @@ -9,9 +9,13 @@ extern int swiotlb_force; #ifdef CONFIG_SWIOTLB extern int swiotlb; +extern void pci_swiotlb_alloc(void); extern void pci_swiotlb_init(void); #else #define swiotlb 0 +static inline void pci_swiotlb_alloc(void) +{ +} static inline void pci_swiotlb_init(void) { } --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -141,7 +141,7 @@ void __init pci_iommu_alloc(void) amd_iommu_detect(); - pci_swiotlb_init(); + pci_swiotlb_alloc(); } void *dma_generic_alloc_coherent(struct device *dev, size_t size, @@ -300,6 +300,8 @@ static int __init pci_iommu_init(void) gart_iommu_init(); + pci_swiotlb_init(); + no_iommu_init(); return 0; } --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -42,18 +42,33 @@ static struct dma_map_ops swiotlb_dma_op .dma_supported = NULL, }; -void __init pci_swiotlb_init(void) +static int swiotlb_try_init; + +void __init pci_swiotlb_alloc(void) { /* don't initialize swiotlb if iommu=off (no_iommu=1) */ #ifdef CONFIG_X86_64 - if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)) - swiotlb = 1; + if (!no_iommu && max_pfn > MAX_DMA32_PFN) { + if (!iommu_detected) + swiotlb = 1; + else + swiotlb_try_init = 1; + } #endif if (swiotlb_force) swiotlb = 1; - if (swiotlb) { - printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); + if (swiotlb || swiotlb_try_init) swiotlb_init(); +} + +void __init pci_swiotlb_init(void) +{ + if (!swiotlb && !swiotlb_try_init) + return; + + if (iommu_detected && !dma_ops) { + pr_info("PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); dma_ops = &swiotlb_dma_ops; - } + } else + swiotlb_free(); } --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -24,6 +24,9 @@ extern void swiotlb_init(void); extern void +swiotlb_free(void); + +extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, dma_addr_t *dma_handle, gfp_t flags); --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -181,6 +181,16 @@ swiotlb_init_with_default_size(size_t de } void __init +swiotlb_free(void) +{ + + free_bootmem_late(__pa(io_tlb_overflow_buffer), io_tlb_overflow); + free_bootmem_late(__pa(io_tlb_orig_addr), io_tlb_nslabs * sizeof(phys_addr_t)); + free_bootmem_late(__pa(io_tlb_list), io_tlb_nslabs * sizeof(int)); + free_bootmem_late(__pa(io_tlb_start) , io_tlb_nslabs << IO_TLB_SHIFT); +} + +void __init swiotlb_init(void) { swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */ -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/