/*
* Dynamic DMA mapping support for AMD Hammer.
*
* Use the integrated AGP GART in the Hammer northbridge as an IOMMU for PCI.
* This allows to use PCI devices that only support 32bit addresses on systems
* with more than 4GB.
*
* See Documentation/DMA-mapping.txt for the interface specification.
*
* Copyright 2002 Andi Kleen, SuSE Labs.
*/
#include <linux/config.h>
#include <linux/types.h>
#include <linux/ctype.h>
#include <linux/agp_backend.h>
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/topology.h>
#include <linux/interrupt.h>
#include <linux/bitops.h>
#include <asm/atomic.h>
#include <asm/io.h>
#include <asm/mtrr.h>
#include <asm/pgtable.h>
#include <asm/proto.h>
#include <asm/cacheflush.h>
#include <asm/kdebug.h>
dma_addr_t bad_dma_address;
unsigned long iommu_bus_base; /* GART remapping area (physical) */
static unsigned long iommu_size; /* size of remapping area bytes */
static unsigned long iommu_pages; /* .. and in pages */
u32 *iommu_gatt_base; /* Remapping table */
int no_iommu;
static int no_agp;
#ifdef CONFIG_IOMMU_DEBUG
int panic_on_overflow = 1;
int force_iommu = 1;
#else
int panic_on_overflow = 0;
int force_iommu = 0;
#endif
int iommu_merge = 1;
int iommu_sac_force = 0;
/* If this is disabled the IOMMU will use an optimized flushing strategy
of only flushing when an mapping is reused. With it true the GART is flushed
for every mapping. Problem is that doing the lazy flush seems to trigger
bugs with some popular PCI cards, in particular 3ware (but has been also
also seen with Qlogic at least). */
int iommu_fullflush = 1;
/* This tells the BIO block layer to assume merging. Default to off
because we cannot guarantee merging later. */
int iommu_bio_merge = 0;
#define MAX_NB 8
/* Allocation bitmap for the remapping area */
static DEFINE_SPINLOCK(iommu_bitmap_lock);
static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */
static u32 gart_unmapped_entry;
#define GPTE_VALID 1
#define GPTE_COHERENT 2
#define GPTE_ENCODE(x) \
(((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT)
#define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28))
#define to_pages(addr,size) \
(round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT)
#define for_all_nb(dev) \
dev = NULL; \
while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL)\
if (dev->bus->number == 0 && \
(PCI_SLOT(dev->devfn) >= 24) && (PCI_SLOT(dev->devfn) <= 31))
static struct pci_dev *northbridges[MAX_NB];
static u32 northbridge_flush_word[MAX_NB];
#define EMERGENCY_PAGES 32 /* = 128KB */
#ifdef CONFIG_AGP
#define AGPEXTERN extern
#else
#define AGPEXTERN
#endif
/* backdoor interface to AGP driver */
AGPEXTERN int agp_memory_reserved;
AGPEXTERN __u32 *agp_gatt_table;
static unsigned long next_bit; /* protected by iommu_bitmap_lock */
static int need_flush; /* global flush state. set for each gart wrap */
static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem,
size_t size, int dir, int do_panic);
/* Dummy device used for NULL arguments (normally ISA). Better would
be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */
static struct device fallback_dev = {
.bus_id = "fallback device",
.coherent_dma_mask = 0xffffffff,
.dma_mask = &fallback_dev.coherent_dma_mask,
};
static unsigned long alloc_iommu(int size)
{
unsigned long offset, flags;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
offset = find_next_zero_string(iommu_gart_bitmap,next_bit,iommu_pages,size);
if (offset == -1) {
need_flush = 1;
offset = find_next_zero_string(iommu_gart_bitmap,0,next_bit,size);
}
if (offset != -1) {
set_bit_string(iommu_gart_bitmap, offset, size);
next_bit = offset+size;
if (next_bit >= iommu_pages) {
next_bit = 0;
need_flush = 1;
}
}
if (iommu_fullflush)
need_flush = 1;
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
return offset;
}
static void free_iommu(unsigned long offset, int size)
{
unsigned long flags;
if (size == 1) {
clear_bit(offset, iommu_gart_bitmap);
return;
}
spin_lock_irqsave(&iommu_bitmap_lock, flags);
__clear_bit_string(iommu_gart_bitmap, offset, size);
spin_unlock_irqrestore(&iommu_bitmap_lock, flags);
}
/*
* Use global flush state to avoid races with multiple flushers.
*/
static void flush_gart(struct device *dev)
{
unsigned long flags;
int flushed = 0;
int i, max;
spin_lock_irqsave(&iommu_bitmap_lock, flags);
if (need_flush) {
max = 0;
for (i = 0; i < MAX_NB; i++) {
if (!northbridges[i])
continue;
pci_write_config_dword(northbridges[i], 0x9c,
northbridge_flush_word[i] | 1);
flushed++;
max = i;
}
for (i = 0; i <= max