diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/arch/x86_64/Kconfig dmaops/arch/x86_64/Kconfig --- quilt/arch/x86_64/Kconfig 2005-12-18 12:17:31.000000000 +0200 +++ dmaops/arch/x86_64/Kconfig 2005-12-18 12:32:41.000000000 +0200 @@ -369,15 +369,6 @@ config SWIOTLB depends on GART_IOMMU default y -config DUMMY_IOMMU - bool - depends on !GART_IOMMU && !SWIOTLB - default y - help - Don't use IOMMU code. This will cause problems when you have more than 4GB - of memory and any 32-bit devices. Don't turn on unless you know what you - are doing. - config X86_MCE bool "Machine check support" if EMBEDDED default y diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/arch/x86_64/kernel/Makefile dmaops/arch/x86_64/kernel/Makefile --- quilt/arch/x86_64/kernel/Makefile 2005-12-18 12:17:31.000000000 +0200 +++ dmaops/arch/x86_64/kernel/Makefile 2005-12-18 12:33:14.000000000 +0200 @@ -8,7 +8,7 @@ obj-y := process.o signal.o entry.o trap ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ x8664_ksyms.o i387.o syscall.o vsyscall.o \ setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ - dmi_scan.o + dmi_scan.o pci-nommu.o obj-$(CONFIG_X86_MCE) += mce.o obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o @@ -28,7 +28,6 @@ obj-$(CONFIG_SOFTWARE_SUSPEND) += suspen obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o -obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/arch/x86_64/kernel/pci-dma.c dmaops/arch/x86_64/kernel/pci-dma.c --- quilt/arch/x86_64/kernel/pci-dma.c 2005-06-17 22:48:29.000000000 +0300 +++ dmaops/arch/x86_64/kernel/pci-dma.c 1970-01-01 02:00:00.000000000 +0200 @@ -1,60 +0,0 @@ -/* - * Dynamic DMA mapping support. - */ - -#include -#include -#include -#include -#include -#include - -/* Map a set of buffers described by scatterlist in streaming - * mode for DMA. This is the scatter-gather version of the - * above pci_map_single interface. Here the scatter gather list - * elements are each tagged with the appropriate dma address - * and length. They are obtained via sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for pci_map_single are - * the same here. - */ -int dma_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction) -{ - int i; - - BUG_ON(direction == DMA_NONE); - for (i = 0; i < nents; i++ ) { - struct scatterlist *s = &sg[i]; - BUG_ON(!s->page); - s->dma_address = virt_to_bus(page_address(s->page) +s->offset); - s->dma_length = s->length; - } - return nents; -} - -EXPORT_SYMBOL(dma_map_sg); - -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, int dir) -{ - int i; - for (i = 0; i < nents; i++) { - struct scatterlist *s = &sg[i]; - BUG_ON(s->page == NULL); - BUG_ON(s->dma_address == 0); - dma_unmap_single(dev, s->dma_address, s->dma_length, dir); - } -} - -EXPORT_SYMBOL(dma_unmap_sg); diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/arch/x86_64/kernel/pci-gart.c dmaops/arch/x86_64/kernel/pci-gart.c --- quilt/arch/x86_64/kernel/pci-gart.c 2005-12-18 12:17:32.000000000 +0200 +++ dmaops/arch/x86_64/kernel/pci-gart.c 2005-12-18 12:37:20.000000000 +0200 @@ -30,8 +30,7 @@ #include #include #include - -dma_addr_t bad_dma_address; +#include unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ @@ -39,8 +38,7 @@ static unsigned long iommu_pages; /* .. u32 *iommu_gatt_base; /* Remapping table */ -int no_iommu; -static int no_agp; +int no_iommu; #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow = 1; int force_iommu = 1; @@ -48,8 +46,6 @@ int force_iommu = 1; int panic_on_overflow = 0; int force_iommu = 0; #endif -int iommu_merge = 1; -int iommu_sac_force = 0; /* If this is disabled the IOMMU will use an optimized flushing strategy of only flushing when an mapping is reused. With it true the GART is flushed @@ -58,10 +54,6 @@ int iommu_sac_force = 0; also seen with Qlogic at least). */ int iommu_fullflush = 1; -/* This tells the BIO block layer to assume merging. Default to off - because we cannot guarantee merging later. */ -int iommu_bio_merge = 0; - #define MAX_NB 8 /* Allocation bitmap for the remapping area */ @@ -203,8 +195,8 @@ static void *dma_alloc_pages(struct devi * Allocate memory for a coherent mapping. */ void * -dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp) +gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp) { void *memory; unsigned long dma_mask = 0; @@ -273,27 +265,11 @@ dma_alloc_coherent(struct device *dev, s error: if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", size); + panic("gart_alloc_coherent: IOMMU overflow by %lu bytes\n", size); free_pages((unsigned long)memory, get_order(size)); return NULL; } -/* - * Unmap coherent memory. - * The caller must ensure that the device has finished accessing the mapping. - */ -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t bus) -{ - if (swiotlb) { - swiotlb_free_coherent(dev, size, vaddr, bus); - return; - } - - dma_unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, get_order(size)); -} - #ifdef CONFIG_IOMMU_LEAK #define SET_LEAK(x) if (iommu_leak_tab) \ @@ -409,14 +385,12 @@ static dma_addr_t dma_map_area(struct de } /* Map a single area into the IOMMU */ -dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir) +dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) { unsigned long phys_mem, bus; BUG_ON(dir == DMA_NONE); - if (swiotlb) - return swiotlb_map_single(dev,addr,size,dir); if (!dev) dev = &fallback_dev; @@ -427,7 +401,22 @@ dma_addr_t dma_map_single(struct device bus = dma_map_area(dev, phys_mem, size, dir, 1); flush_gart(dev); return bus; -} +} + +/* + * Wrapper for pci_unmap_single working with scatterlists. + */ +void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +{ + int i; + + for (i = 0; i < nents; i++) { + struct scatterlist *s = &sg[i]; + if (!s->dma_length || !s->length) + break; + dma_unmap_single(dev, s->dma_address, s->dma_length, dir); + } +} /* Fallback for dma_map_sg in case of overflow */ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, @@ -446,7 +435,7 @@ static int dma_map_sg_nonforce(struct de addr = dma_map_area(dev, addr, s->length, dir, 0); if (addr == bad_dma_address) { if (i > 0) - dma_unmap_sg(dev, sg, i, dir); + gart_unmap_sg(dev, sg, i, dir); nents = 0; sg[0].dma_length = 0; break; @@ -515,7 +504,7 @@ static inline int dma_map_cont(struct sc * DMA map all entries in a scatterlist. * Merge chunks that have page aligned sizes into a continuous mapping. */ -int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) +int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) { int i; int out; @@ -527,8 +516,6 @@ int dma_map_sg(struct device *dev, struc if (nents == 0) return 0; - if (swiotlb) - return swiotlb_map_sg(dev,sg,nents,dir); if (!dev) dev = &fallback_dev; @@ -571,7 +558,7 @@ int dma_map_sg(struct device *dev, struc error: flush_gart(NULL); - dma_unmap_sg(dev, sg, nents, dir); + gart_unmap_sg(dev, sg, nents, dir); /* When it was forced try again unforced */ if (force_iommu) return dma_map_sg_nonforce(dev, sg, nents, dir); @@ -586,18 +573,13 @@ error: /* * Free a DMA mapping. */ -void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, +void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, int direction) { unsigned long iommu_page; int npages; int i; - if (swiotlb) { - swiotlb_unmap_single(dev,dma_addr,size,direction); - return; - } - if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || dma_addr >= iommu_bus_base + iommu_size) return; @@ -611,24 +593,17 @@ void dma_unmap_single(struct device *dev } /* - * Wrapper for pci_unmap_single working with scatterlists. - */ -void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) + * Unmap coherent memory. + * The caller must ensure that the device has finished accessing the mapping. + */ +void gart_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t bus) { - int i; - if (swiotlb) { - swiotlb_unmap_sg(dev,sg,nents,dir); - return; - } - for (i = 0; i < nents; i++) { - struct scatterlist *s = &sg[i]; - if (!s->dma_length || !s->length) - break; - dma_unmap_single(dev, s->dma_address, s->dma_length, dir); - } + gart_unmap_single(dev, bus, size, 0); + free_pages((unsigned long)vaddr, get_order(size)); } -int dma_supported(struct device *dev, u64 mask) +int gart_dma_supported(struct device *dev, u64 mask) { /* Copied from i386. Doesn't make much sense, because it will only work for pci_alloc_coherent. @@ -654,24 +629,10 @@ int dma_supported(struct device *dev, u6 return 1; } -int dma_get_cache_alignment(void) -{ - return boot_cpu_data.x86_clflush_size; -} - -EXPORT_SYMBOL(dma_unmap_sg); -EXPORT_SYMBOL(dma_map_sg); -EXPORT_SYMBOL(dma_map_single); -EXPORT_SYMBOL(dma_unmap_single); -EXPORT_SYMBOL(dma_supported); EXPORT_SYMBOL(no_iommu); -EXPORT_SYMBOL(force_iommu); -EXPORT_SYMBOL(bad_dma_address); -EXPORT_SYMBOL(iommu_bio_merge); -EXPORT_SYMBOL(iommu_sac_force); -EXPORT_SYMBOL(dma_get_cache_alignment); -EXPORT_SYMBOL(dma_alloc_coherent); -EXPORT_SYMBOL(dma_free_coherent); +EXPORT_SYMBOL(force_iommu); + +static int no_agp; static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) { @@ -778,6 +739,23 @@ static __init int init_k8_gatt(struct ag extern int agp_amd64_init(void); +static struct dma_mapping_ops gart_dma_ops = { + .mapping_error = NULL, + .alloc_coherent = gart_alloc_coherent, + .free_coherent = gart_free_coherent, + .map_single = gart_map_single, + .unmap_single = gart_unmap_single, + .sync_single_for_cpu = NULL, + .sync_single_for_device = NULL, + .sync_single_range_for_cpu = NULL, + .sync_single_range_for_device = NULL, + .sync_sg_for_cpu = NULL, + .sync_sg_for_device = NULL, + .map_sg = gart_map_sg, + .unmap_sg = gart_unmap_sg, + .dma_supported = gart_dma_supported, +}; + static int __init pci_iommu_init(void) { struct agp_kern_info info; @@ -799,7 +777,6 @@ static int __init pci_iommu_init(void) if (swiotlb) { no_iommu = 1; - printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); return -1; } @@ -807,8 +784,8 @@ static int __init pci_iommu_init(void) (!force_iommu && (end_pfn-1) < 0xffffffff>>PAGE_SHIFT) || !iommu_aperture || (no_agp && init_k8_gatt(&info) < 0)) { - printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); no_iommu = 1; + no_iommu_init(); return -1; } @@ -885,6 +862,9 @@ static int __init pci_iommu_init(void) flush_gart(NULL); + printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); + dma_ops = &gart_dma_ops; + return 0; } @@ -916,6 +896,8 @@ __init int iommu_setup(char *p) { int arg; + iommu_merge = 1; + while (*p) { if (!strncmp(p,"noagp",5)) no_agp = 1; diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/arch/x86_64/kernel/pci-nommu.c dmaops/arch/x86_64/kernel/pci-nommu.c --- quilt/arch/x86_64/kernel/pci-nommu.c 2005-12-18 12:12:27.000000000 +0200 +++ dmaops/arch/x86_64/kernel/pci-nommu.c 2005-12-18 12:32:41.000000000 +0200 @@ -7,12 +7,15 @@ #include #include +/* these are defined here because pci-nommu.c is always compiled in */ int iommu_merge = 0; EXPORT_SYMBOL(iommu_merge); dma_addr_t bad_dma_address; EXPORT_SYMBOL(bad_dma_address); +/* This tells the BIO block layer to assume merging. Default to off + because we cannot guarantee merging later. */ int iommu_bio_merge = 0; EXPORT_SYMBOL(iommu_bio_merge); @@ -23,8 +26,8 @@ EXPORT_SYMBOL(iommu_sac_force); * Dummy IO MMU functions */ -void *dma_alloc_coherent(struct device *hwdev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) +void *nommu_alloc_coherent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) { void *ret; u64 mask; @@ -50,45 +53,69 @@ void *dma_alloc_coherent(struct device * memset(ret, 0, size); return ret; } -EXPORT_SYMBOL(dma_alloc_coherent); +EXPORT_SYMBOL(nommu_alloc_coherent); -void dma_free_coherent(struct device *hwdev, size_t size, +void nommu_free_coherent(struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle) { free_pages((unsigned long)vaddr, get_order(size)); } -EXPORT_SYMBOL(dma_free_coherent); +EXPORT_SYMBOL(nommu_free_coherent); -int dma_supported(struct device *hwdev, u64 mask) +/* Map a set of buffers described by scatterlist in streaming + * mode for DMA. This is the scatter-gather version of the + * above pci_map_single interface. Here the scatter gather list + * elements are each tagged with the appropriate dma address + * and length. They are obtained via sg_dma_{address,length}(SG). + * + * NOTE: An implementation may be able to use a smaller number of + * DMA address/length pairs than there are SG table elements. + * (for example via virtual mapping capabilities) + * The routine returns the number of addr/length pairs actually + * used, at most nents. + * + * Device ownership issues as mentioned above for pci_map_single are + * the same here. + */ +int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction) { - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - * RED-PEN this won't work for pci_map_single. Caller has to - * use GFP_DMA in the first place. - */ - if (mask < 0x00ffffff) - return 0; - - return 1; -} -EXPORT_SYMBOL(dma_supported); + int i; -int dma_get_cache_alignment(void) -{ - return boot_cpu_data.x86_clflush_size; + BUG_ON(direction == DMA_NONE); + for (i = 0; i < nents; i++ ) { + struct scatterlist *s = &sg[i]; + BUG_ON(!s->page); + s->dma_address = virt_to_bus(page_address(s->page) +s->offset); + s->dma_length = s->length; + } + return nents; } -EXPORT_SYMBOL(dma_get_cache_alignment); +EXPORT_SYMBOL(nommu_map_sg); -static int __init check_ram(void) -{ - if (end_pfn >= 0xffffffff>>PAGE_SHIFT) { - printk( - KERN_ERR "WARNING more than 4GB of memory but IOMMU not compiled in.\n" - KERN_ERR "WARNING 32bit PCI may malfunction.\n"); +/* Unmap a set of streaming mode DMA translations. + * Again, cpu read rules concerning calls here are the same as for + * pci_unmap_single() above. + */ +void nommu_unmap_sg(struct device *dev, struct scatterlist *sg, + int nents, int dir) +{ + int i; + for (i = 0; i < nents; i++) { + struct scatterlist *s = &sg[i]; + BUG_ON(s->page == NULL); + BUG_ON(s->dma_address == 0); + dma_unmap_single(dev, s->dma_address, s->dma_length, dir); } - return 0; -} -__initcall(check_ram); +} +EXPORT_SYMBOL(nommu_unmap_sg); +void __init no_iommu_init(void) +{ + printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); + dma_ops = NULL; + if (end_pfn >= 0xffffffff>>PAGE_SHIFT) { + printk(KERN_ERR "WARNING more than 4GB of memory but IOMMU disabled.\n" + KERN_ERR "WARNING 32bit PCI may malfunction.\n"); + } +} diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/arch/x86_64/kernel/setup.c dmaops/arch/x86_64/kernel/setup.c --- quilt/arch/x86_64/kernel/setup.c 2005-12-18 12:17:31.000000000 +0200 +++ dmaops/arch/x86_64/kernel/setup.c 2005-12-18 12:52:47.000000000 +0200 @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -61,7 +62,9 @@ #include #include #include +#include #include +#include /* * Machine setup.. @@ -88,6 +91,25 @@ unsigned long saved_video_mode; #ifdef CONFIG_SWIOTLB int swiotlb; + +struct dma_mapping_ops swiotlb_dma_ops = { + .mapping_error = swiotlb_dma_mapping_error, + .alloc_coherent = gart_alloc_coherent, /* FIXME: we are called via gart_alloc_coherent */ + .free_coherent = swiotlb_free_coherent, + .map_single = swiotlb_map_single, + .unmap_single = swiotlb_unmap_single, + .sync_single_for_cpu = swiotlb_sync_single_for_cpu, + .sync_single_for_device = swiotlb_sync_single_for_device, + .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, + .sync_single_range_for_device = swiotlb_sync_single_range_for_device, + .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, + .sync_sg_for_device = swiotlb_sync_sg_for_device, + .map_sg = swiotlb_map_sg, + .unmap_sg = swiotlb_unmap_sg, + /* FIXME: historically we used gart_dma_supported, keep it the same way */ + .dma_supported = gart_dma_supported, +}; + EXPORT_SYMBOL(swiotlb); #endif diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/arch/x86_64/mm/init.c dmaops/arch/x86_64/mm/init.c --- quilt/arch/x86_64/mm/init.c 2005-12-18 12:17:32.000000000 +0200 +++ dmaops/arch/x86_64/mm/init.c 2005-12-18 12:43:55.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -38,11 +39,16 @@ #include #include #include +#include +#include #ifndef Dprintk #define Dprintk(x...) #endif +struct dma_mapping_ops* dma_ops; +EXPORT_SYMBOL(dma_ops); + static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); @@ -423,11 +429,17 @@ void __init mem_init(void) long codesize, reservedpages, datasize, initsize; #ifdef CONFIG_SWIOTLB - if (!iommu_aperture && + /* don't initialize swiotlb if iommu=off (no_iommu=1) */ + if (!iommu_aperture && !no_iommu && ((end_pfn-1) >= 0xffffffff>>PAGE_SHIFT || force_iommu)) swiotlb = 1; - if (swiotlb) - swiotlb_init(); + if (swiotlb) { + swiotlb_init(); + printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); + dma_ops = &swiotlb_dma_ops; + } +#else /* no SWIOTLB implies no HW IOMMU */ + no_iommu_init(); #endif /* How many end-of-memory variables you have, grandma! */ diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/include/asm-x86_64/dma-mapping.h dmaops/include/asm-x86_64/dma-mapping.h --- quilt/include/asm-x86_64/dma-mapping.h 2005-12-18 12:12:41.000000000 +0200 +++ dmaops/include/asm-x86_64/dma-mapping.h 2005-12-18 12:32:41.000000000 +0200 @@ -11,143 +11,256 @@ #include #include #include +#include + +struct dma_mapping_ops { + int (*mapping_error)(dma_addr_t dma_addr); + void* (*alloc_coherent)(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp); + void (*free_coherent)(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle); + dma_addr_t (*map_single)(struct device *hwdev, void *ptr, + size_t size, int direction); + void (*unmap_single)(struct device *dev, dma_addr_t addr, + size_t size, int direction); + void (*sync_single_for_cpu)(struct device *hwdev, + dma_addr_t dma_handle, size_t size, + int direction); + void (*sync_single_for_device)(struct device *hwdev, + dma_addr_t dma_handle, size_t size, + int direction); + void (*sync_single_range_for_cpu)(struct device *hwdev, + dma_addr_t dma_handle, unsigned long offset, + size_t size, int direction); + void (*sync_single_range_for_device)(struct device *hwdev, + dma_addr_t dma_handle, unsigned long offset, + size_t size, int direction); + void (*sync_sg_for_cpu)(struct device *hwdev, + struct scatterlist *sg, int nelems, + int direction); + void (*sync_sg_for_device)(struct device *hwdev, + struct scatterlist *sg, int nelems, + int direction); + int (*map_sg)(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); + void (*unmap_sg)(struct device *hwdev, + struct scatterlist *sg, int nents, + int direction); + int (*dma_supported)(struct device *hwdev, u64 mask); +}; extern dma_addr_t bad_dma_address; -#define dma_mapping_error(x) \ - (swiotlb ? swiotlb_dma_mapping_error(x) : ((x) == bad_dma_address)) +extern struct dma_mapping_ops* dma_ops; +extern int iommu_merge; -void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp); -void dma_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle); +#define have_iommu (unlikely(dma_ops != NULL)) -#ifdef CONFIG_GART_IOMMU +static inline int dma_mapping_error(dma_addr_t dma_addr) +{ + if (have_iommu && dma_ops->mapping_error) + return dma_ops->mapping_error(dma_addr); -extern dma_addr_t dma_map_single(struct device *hwdev, void *ptr, size_t size, - int direction); -extern void dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size, - int direction); + return (dma_addr == bad_dma_address); +} -#else +static inline void* +dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp) +{ + if (have_iommu && dma_ops->alloc_coherent) + return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); -/* No IOMMU */ + return nommu_alloc_coherent(dev, size, dma_handle, gfp); +} -static inline dma_addr_t dma_map_single(struct device *hwdev, void *ptr, - size_t size, int direction) +static inline void +dma_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle) { - dma_addr_t addr; + if (have_iommu && dma_ops->free_coherent) { + dma_ops->free_coherent(dev, size, vaddr, dma_handle); + return; + } - if (direction == DMA_NONE) - out_of_line_bug(); - addr = virt_to_bus(ptr); - - if ((addr+size) & ~*hwdev->dma_mask) - out_of_line_bug(); - return addr; + nommu_free_coherent(dev, size, vaddr, dma_handle); } -static inline void dma_unmap_single(struct device *hwdev, dma_addr_t dma_addr, - size_t size, int direction) +static inline dma_addr_t +dma_map_single(struct device *hwdev, void *ptr, size_t size, + int direction) { - if (direction == DMA_NONE) - out_of_line_bug(); - /* Nothing to do */ + if (have_iommu && dma_ops->map_single) + return dma_ops->map_single(hwdev, ptr, size, direction); + + return nommu_map_single(hwdev, ptr, size, direction); } -#endif +static inline void +dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size, + int direction) +{ + if (have_iommu && dma_ops->unmap_single) { + dma_ops->unmap_single(dev, addr, size, direction); + return; + } + + nommu_unmap_single(dev, addr, size, direction); +} #define dma_map_page(dev,page,offset,size,dir) \ dma_map_single((dev), page_address(page)+(offset), (size), (dir)) -static inline void dma_sync_single_for_cpu(struct device *hwdev, - dma_addr_t dma_handle, - size_t size, int direction) +#define dma_unmap_page dma_unmap_single + +static inline void +dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, + size_t size, int direction) { + void (*f)(struct device *hwdev, dma_addr_t dma_handle, + size_t size, int direction); + if (direction == DMA_NONE) out_of_line_bug(); - if (swiotlb) - return swiotlb_sync_single_for_cpu(hwdev,dma_handle,size,direction); + if (have_iommu && dma_ops->sync_single_for_cpu) { + f = dma_ops->sync_single_for_cpu; + f(hwdev, dma_handle, size, direction); + return; + } flush_write_buffers(); } -static inline void dma_sync_single_for_device(struct device *hwdev, - dma_addr_t dma_handle, - size_t size, int direction) +static inline void +dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, + size_t size, int direction) { - if (direction == DMA_NONE) + void (*f)(struct device *hwdev, dma_addr_t dma_handle, + size_t size, int direction); + + if (direction == DMA_NONE) out_of_line_bug(); - if (swiotlb) - return swiotlb_sync_single_for_device(hwdev,dma_handle,size,direction); + if (have_iommu && dma_ops->sync_single_for_device) { + f = dma_ops->sync_single_for_device; + f(hwdev, dma_handle, size, direction); + return; + } flush_write_buffers(); } -static inline void dma_sync_single_range_for_cpu(struct device *hwdev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, int direction) +static inline void +dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, + unsigned long offset, size_t size, int direction) { + void (*f)(struct device *hwdev, dma_addr_t dma_handle, + unsigned long offset, size_t size, int direction); + if (direction == DMA_NONE) out_of_line_bug(); - if (swiotlb) - return swiotlb_sync_single_range_for_cpu(hwdev,dma_handle,offset,size,direction); + if (have_iommu && dma_ops->sync_single_range_for_cpu) { + f = dma_ops->sync_single_range_for_cpu; + f(hwdev, dma_handle, offset, size, direction); + return; + } flush_write_buffers(); } -static inline void dma_sync_single_range_for_device(struct device *hwdev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, int direction) +static inline void +dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, + unsigned long offset, size_t size, int direction) { - if (direction == DMA_NONE) + void (*f)(struct device *hwdev, dma_addr_t dma_handle, + unsigned long offset, size_t size, int direction); + + if (direction == DMA_NONE) out_of_line_bug(); - if (swiotlb) - return swiotlb_sync_single_range_for_device(hwdev,dma_handle,offset,size,direction); + if (have_iommu && dma_ops->sync_single_range_for_device) { + f = dma_ops->sync_single_range_for_device; + f(hwdev, dma_handle, offset, size, direction); + return; + } flush_write_buffers(); } -static inline void dma_sync_sg_for_cpu(struct device *hwdev, - struct scatterlist *sg, - int nelems, int direction) +static inline void +dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, int direction) { + void (*f)(struct device *hwdev, struct scatterlist *sg, + int nelems, int direction); + if (direction == DMA_NONE) out_of_line_bug(); - if (swiotlb) - return swiotlb_sync_sg_for_cpu(hwdev,sg,nelems,direction); + if (have_iommu && dma_ops->sync_sg_for_cpu) { + f = dma_ops->sync_sg_for_cpu; + f(hwdev, sg, nelems, direction); + return; + } flush_write_buffers(); } -static inline void dma_sync_sg_for_device(struct device *hwdev, - struct scatterlist *sg, - int nelems, int direction) +static inline void +dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, int direction) { + void (*f)(struct device *hwdev, struct scatterlist *sg, + int nelems, int direction); + if (direction == DMA_NONE) out_of_line_bug(); - if (swiotlb) - return swiotlb_sync_sg_for_device(hwdev,sg,nelems,direction); + if (have_iommu && dma_ops->sync_sg_for_device) { + f = dma_ops->sync_sg_for_device; + f(hwdev, sg, nelems, direction); + return; + } flush_write_buffers(); } -extern int dma_map_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); -extern void dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); +static inline int +dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) +{ + if (have_iommu && dma_ops->map_sg) + return dma_ops->map_sg(hwdev, sg, nents, direction); -#define dma_unmap_page dma_unmap_single + return nommu_map_sg(hwdev, sg, nents, direction); +} + +static inline void +dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, + int direction) +{ + if (have_iommu && dma_ops->unmap_sg) { + dma_ops->unmap_sg(hwdev, sg, nents, direction); + return; + } + + nommu_unmap_sg(hwdev, sg, nents, direction); +} + +static inline int dma_supported(struct device *hwdev, u64 mask) +{ + if (have_iommu && dma_ops->dma_supported) + return dma_ops->dma_supported(hwdev, mask); + + return nommu_dma_supported(hwdev, mask); +} + +/* same for gart, swiotlb, and nommu */ +static inline int dma_get_cache_alignment(void) +{ + return boot_cpu_data.x86_clflush_size; +} -extern int dma_supported(struct device *hwdev, u64 mask); -extern int dma_get_cache_alignment(void); #define dma_is_consistent(h) 1 static inline int dma_set_mask(struct device *dev, u64 mask) @@ -158,9 +271,10 @@ static inline int dma_set_mask(struct de return 0; } -static inline void dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir) +static inline void +dma_cache_sync(void *vaddr, size_t size, enum dma_data_direction dir) { flush_write_buffers(); } -#endif +#endif /* _X8664_DMA_MAPPING_H */ diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/include/asm-x86_64/gart-mapping.h dmaops/include/asm-x86_64/gart-mapping.h --- quilt/include/asm-x86_64/gart-mapping.h 1970-01-01 02:00:00.000000000 +0200 +++ dmaops/include/asm-x86_64/gart-mapping.h 2005-12-18 12:32:41.000000000 +0200 @@ -0,0 +1,16 @@ +#ifndef _X8664_GART_MAPPING_H +#define _X8664_GART_MAPPING_H 1 + +#include +#include + +struct device; + +extern void* +gart_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp); + +extern int +gart_dma_supported(struct device *hwdev, u64 mask); + +#endif /* _X8664_GART_MAPPING_H */ diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/include/asm-x86_64/nommu-mapping.h dmaops/include/asm-x86_64/nommu-mapping.h --- quilt/include/asm-x86_64/nommu-mapping.h 1970-01-01 02:00:00.000000000 +0200 +++ dmaops/include/asm-x86_64/nommu-mapping.h 2005-12-18 12:32:41.000000000 +0200 @@ -0,0 +1,65 @@ +#ifndef _ASM_NOMMU_MAPPING_H +#define _ASM_NOMMU_MAPPING_H 1 + +#include + +/* NOMMU DMA mapping implemenation */ +extern void* +nommu_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp); + +extern void +nommu_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle); + +static inline dma_addr_t +nommu_map_single(struct device *hwdev, void *ptr, size_t size, int direction) +{ + dma_addr_t addr; + + if (direction == DMA_NONE) + out_of_line_bug(); + addr = virt_to_bus(ptr); + + if ((addr+size) & ~*hwdev->dma_mask) + out_of_line_bug(); + + return addr; +} + +static inline void +nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size, + int direction) +{ + if (direction == DMA_NONE) + out_of_line_bug(); + /* Nothing to do */ +} + +extern int +nommu_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); + +extern void +nommu_unmap_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); + +static inline int +nommu_dma_supported(struct device *hwdev, u64 mask) +{ + /* + * we fall back to GFP_DMA when the mask isn't all 1s, + * so we can't guarantee allocations that must be + * within a tighter range than GFP_DMA.. + * RED-PEN this won't work for pci_map_single. Caller has to + * use GFP_DMA in the first place. + */ + if (mask < 0x00ffffff) + return 0; + + return 1; +} + +extern void __init no_iommu_init(void); + +#endif /* _ASM_NOMMU_MAPPING_H */ diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/include/asm-x86_64/pci.h dmaops/include/asm-x86_64/pci.h --- quilt/include/asm-x86_64/pci.h 2005-12-18 12:07:10.000000000 +0200 +++ dmaops/include/asm-x86_64/pci.h 2005-12-18 12:32:41.000000000 +0200 @@ -42,18 +42,20 @@ int pcibios_set_irq_routing(struct pci_d #include #include #include +#include /* for have_iommu */ extern int iommu_setup(char *opt); -#ifdef CONFIG_GART_IOMMU /* The PCI address space does equal the physical memory * address space. The networking and block device layers use * this boolean for bounce buffer decisions * - * On AMD64 it mostly equals, but we set it to zero to tell some subsystems - * that an IOMMU is available. + * On AMD64 it mostly equals, but we set it to zero if a hardware + * IOMMU (gart) of sotware IOMMU (swiotlb) is available. */ -#define PCI_DMA_BUS_IS_PHYS (no_iommu ? 1 : 0) +#define PCI_DMA_BUS_IS_PHYS (have_iommu ? 0 : 1) + +#ifdef CONFIG_GART_IOMMU /* * x86-64 always supports DAC, but sometimes it is useful to force @@ -79,7 +81,6 @@ extern int iommu_sac_force; #else /* No IOMMU */ -#define PCI_DMA_BUS_IS_PHYS 1 #define pci_dac_dma_supported(pci_dev, mask) 1 #define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) diff -Naurp --exclude-from /home/muli/w/dontdiff quilt/include/asm-x86_64/swiotlb.h dmaops/include/asm-x86_64/swiotlb.h --- quilt/include/asm-x86_64/swiotlb.h 2005-12-18 12:12:41.000000000 +0200 +++ dmaops/include/asm-x86_64/swiotlb.h 2005-12-18 12:32:41.000000000 +0200 @@ -3,6 +3,8 @@ #include +#include + /* SWIOTLB interface */ extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, @@ -38,6 +40,9 @@ extern void *swiotlb_alloc_coherent (str dma_addr_t *dma_handle, gfp_t flags); extern void swiotlb_free_coherent (struct device *hwdev, size_t size, void *vaddr, dma_addr_t dma_handle); +extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); + +extern struct dma_mapping_ops swiotlb_dma_ops; #ifdef CONFIG_SWIOTLB extern int swiotlb; @@ -45,4 +50,4 @@ extern int swiotlb; #define swiotlb 0 #endif -#endif +#endif /* _ASM_SWTIOLB_H */