diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /drivers/parisc/sba_iommu.c |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'drivers/parisc/sba_iommu.c')
-rw-r--r-- | drivers/parisc/sba_iommu.c | 2165 |
1 files changed, 2165 insertions, 0 deletions
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c new file mode 100644 index 00000000000..82ea68b55df --- /dev/null +++ b/drivers/parisc/sba_iommu.c @@ -0,0 +1,2165 @@ +/* +** System Bus Adapter (SBA) I/O MMU manager +** +** (c) Copyright 2000-2004 Grant Grundler <grundler @ parisc-linux x org> +** (c) Copyright 2004 Naresh Kumar Inna <knaresh at india x hp x com> +** (c) Copyright 2000-2004 Hewlett-Packard Company +** +** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) +** +** This program is free software; you can redistribute it and/or modify +** it under the terms of the GNU General Public License as published by +** the Free Software Foundation; either version 2 of the License, or +** (at your option) any later version. +** +** +** This module initializes the IOC (I/O Controller) found on B1000/C3000/ +** J5000/J7000/N-class/L-class machines and their successors. +** +** FIXME: add DMA hint support programming in both sba and lba modules. +*/ + +#include <linux/config.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/slab.h> +#include <linux/init.h> + +#include <linux/mm.h> +#include <linux/string.h> +#include <linux/pci.h> + +#include <asm/byteorder.h> +#include <asm/io.h> +#include <asm/dma.h> /* for DMA_CHUNK_SIZE */ + +#include <asm/hardware.h> /* for register_parisc_driver() stuff */ + +#include <linux/proc_fs.h> +#include <asm/runway.h> /* for proc_runway_root */ +#include <asm/pdc.h> /* for PDC_MODEL_* */ +#include <asm/pdcpat.h> /* for is_pdc_pat() */ +#include <asm/parisc-device.h> + + +/* declared in arch/parisc/kernel/setup.c */ +extern struct proc_dir_entry * proc_mckinley_root; + +#define MODULE_NAME "SBA" + +#ifdef CONFIG_PROC_FS +/* depends on proc fs support. But costs CPU performance */ +#undef SBA_COLLECT_STATS +#endif + +/* +** The number of debug flags is a clue - this code is fragile. +** Don't even think about messing with it unless you have +** plenty of 710's to sacrifice to the computer gods. :^) +*/ +#undef DEBUG_SBA_INIT +#undef DEBUG_SBA_RUN +#undef DEBUG_SBA_RUN_SG +#undef DEBUG_SBA_RESOURCE +#undef ASSERT_PDIR_SANITY +#undef DEBUG_LARGE_SG_ENTRIES +#undef DEBUG_DMB_TRAP + +#ifdef DEBUG_SBA_INIT +#define DBG_INIT(x...) printk(x) +#else +#define DBG_INIT(x...) +#endif + +#ifdef DEBUG_SBA_RUN +#define DBG_RUN(x...) printk(x) +#else +#define DBG_RUN(x...) +#endif + +#ifdef DEBUG_SBA_RUN_SG +#define DBG_RUN_SG(x...) printk(x) +#else +#define DBG_RUN_SG(x...) +#endif + + +#ifdef DEBUG_SBA_RESOURCE +#define DBG_RES(x...) printk(x) +#else +#define DBG_RES(x...) +#endif + +#if defined(__LP64__) && !defined(CONFIG_PDC_NARROW) +/* "low end" PA8800 machines use ZX1 chipset */ +#define ZX1_SUPPORT +#endif + +#define SBA_INLINE __inline__ + + +/* +** The number of pdir entries to "free" before issueing +** a read to PCOM register to flush out PCOM writes. +** Interacts with allocation granularity (ie 4 or 8 entries +** allocated and free'd/purged at a time might make this +** less interesting). +*/ +#define DELAYED_RESOURCE_CNT 16 + +#define DEFAULT_DMA_HINT_REG 0 + +#define ASTRO_RUNWAY_PORT 0x582 +#define IKE_MERCED_PORT 0x803 +#define REO_MERCED_PORT 0x804 +#define REOG_MERCED_PORT 0x805 +#define PLUTO_MCKINLEY_PORT 0x880 + +#define SBA_FUNC_ID 0x0000 /* function id */ +#define SBA_FCLASS 0x0008 /* function class, bist, header, rev... */ + +#define IS_ASTRO(id) ((id)->hversion == ASTRO_RUNWAY_PORT) +#define IS_IKE(id) ((id)->hversion == IKE_MERCED_PORT) +#define IS_PLUTO(id) ((id)->hversion == PLUTO_MCKINLEY_PORT) + +#define SBA_FUNC_SIZE 4096 /* SBA configuration function reg set */ + +#define ASTRO_IOC_OFFSET (32 * SBA_FUNC_SIZE) +#define PLUTO_IOC_OFFSET (1 * SBA_FUNC_SIZE) +/* Ike's IOC's occupy functions 2 and 3 */ +#define IKE_IOC_OFFSET(p) ((p+2) * SBA_FUNC_SIZE) + +#define IOC_CTRL 0x8 /* IOC_CTRL offset */ +#define IOC_CTRL_TC (1 << 0) /* TOC Enable */ +#define IOC_CTRL_CE (1 << 1) /* Coalesce Enable */ +#define IOC_CTRL_DE (1 << 2) /* Dillon Enable */ +#define IOC_CTRL_RM (1 << 8) /* Real Mode */ +#define IOC_CTRL_NC (1 << 9) /* Non Coherent Mode */ +#define IOC_CTRL_D4 (1 << 11) /* Disable 4-byte coalescing */ +#define IOC_CTRL_DD (1 << 13) /* Disable distr. LMMIO range coalescing */ + +#define MAX_IOC 2 /* per Ike. Pluto/Astro only have 1. */ + +#define ROPES_PER_IOC 8 /* per Ike half or Pluto/Astro */ + + +/* +** Offsets into MBIB (Function 0 on Ike and hopefully Astro) +** Firmware programs this stuff. Don't touch it. +*/ +#define LMMIO_DIRECT0_BASE 0x300 +#define LMMIO_DIRECT0_MASK 0x308 +#define LMMIO_DIRECT0_ROUTE 0x310 + +#define LMMIO_DIST_BASE 0x360 +#define LMMIO_DIST_MASK 0x368 +#define LMMIO_DIST_ROUTE 0x370 + +#define IOS_DIST_BASE 0x390 +#define IOS_DIST_MASK 0x398 +#define IOS_DIST_ROUTE 0x3A0 + +#define IOS_DIRECT_BASE 0x3C0 +#define IOS_DIRECT_MASK 0x3C8 +#define IOS_DIRECT_ROUTE 0x3D0 + +/* +** Offsets into I/O TLB (Function 2 and 3 on Ike) +*/ +#define ROPE0_CTL 0x200 /* "regbus pci0" */ +#define ROPE1_CTL 0x208 +#define ROPE2_CTL 0x210 +#define ROPE3_CTL 0x218 +#define ROPE4_CTL 0x220 +#define ROPE5_CTL 0x228 +#define ROPE6_CTL 0x230 +#define ROPE7_CTL 0x238 + +#define HF_ENABLE 0x40 + + +#define IOC_IBASE 0x300 /* IO TLB */ +#define IOC_IMASK 0x308 +#define IOC_PCOM 0x310 +#define IOC_TCNFG 0x318 +#define IOC_PDIR_BASE 0x320 + +/* AGP GART driver looks for this */ +#define SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL + + +/* +** IOC supports 4/8/16/64KB page sizes (see TCNFG register) +** It's safer (avoid memory corruption) to keep DMA page mappings +** equivalently sized to VM PAGE_SIZE. +** +** We really can't avoid generating a new mapping for each +** page since the Virtual Coherence Index has to be generated +** and updated for each page. +** +** PAGE_SIZE could be greater than IOVP_SIZE. But not the inverse. +*/ +#define IOVP_SIZE PAGE_SIZE +#define IOVP_SHIFT PAGE_SHIFT +#define IOVP_MASK PAGE_MASK + +#define SBA_PERF_CFG 0x708 /* Performance Counter stuff */ +#define SBA_PERF_MASK1 0x718 +#define SBA_PERF_MASK2 0x730 + + +/* +** Offsets into PCI Performance Counters (functions 12 and 13) +** Controlled by PERF registers in function 2 & 3 respectively. +*/ +#define SBA_PERF_CNT1 0x200 +#define SBA_PERF_CNT2 0x208 +#define SBA_PERF_CNT3 0x210 + + +struct ioc { + void __iomem *ioc_hpa; /* I/O MMU base address */ + char *res_map; /* resource map, bit == pdir entry */ + u64 *pdir_base; /* physical base address */ + unsigned long ibase; /* pdir IOV Space base - shared w/lba_pci */ + unsigned long imask; /* pdir IOV Space mask - shared w/lba_pci */ +#ifdef ZX1_SUPPORT + unsigned long iovp_mask; /* help convert IOVA to IOVP */ +#endif + unsigned long *res_hint; /* next avail IOVP - circular search */ + spinlock_t res_lock; + unsigned int res_bitshift; /* from the LEFT! */ + unsigned int res_size; /* size of resource map in bytes */ +#if SBA_HINT_SUPPORT +/* FIXME : DMA HINTs not used */ + unsigned long hint_mask_pdir; /* bits used for DMA hints */ + unsigned int hint_shift_pdir; +#endif +#if DELAYED_RESOURCE_CNT > 0 + int saved_cnt; + struct sba_dma_pair { + dma_addr_t iova; + size_t size; + } saved[DELAYED_RESOURCE_CNT]; +#endif + +#ifdef SBA_COLLECT_STATS +#define SBA_SEARCH_SAMPLE 0x100 + unsigned long avg_search[SBA_SEARCH_SAMPLE]; + unsigned long avg_idx; /* current index into avg_search */ + unsigned long used_pages; + unsigned long msingle_calls; + unsigned long msingle_pages; + unsigned long msg_calls; + unsigned long msg_pages; + unsigned long usingle_calls; + unsigned long usingle_pages; + unsigned long usg_calls; + unsigned long usg_pages; +#endif + + /* STUFF We don't need in performance path */ + unsigned int pdir_size; /* in bytes, determined by IOV Space size */ +}; + +struct sba_device { + struct sba_device *next; /* list of SBA's in system */ + struct parisc_device *dev; /* dev found in bus walk */ + struct parisc_device_id *iodc; /* data about dev from firmware */ + const char *name; + void __iomem *sba_hpa; /* base address */ + spinlock_t sba_lock; + unsigned int flags; /* state/functionality enabled */ + unsigned int hw_rev; /* HW revision of chip */ + + struct resource chip_resv; /* MMIO reserved for chip */ + struct resource iommu_resv; /* MMIO reserved for iommu */ + + unsigned int num_ioc; /* number of on-board IOC's */ + struct ioc ioc[MAX_IOC]; +}; + + +static struct sba_device *sba_list; + +static unsigned long ioc_needs_fdc = 0; + +/* global count of IOMMUs in the system */ +static unsigned int global_ioc_cnt = 0; + +/* PA8700 (Piranha 2.2) bug workaround */ +static unsigned long piranha_bad_128k = 0; + +/* Looks nice and keeps the compiler happy */ +#define SBA_DEV(d) ((struct sba_device *) (d)) + +#if SBA_AGP_SUPPORT +static int reserve_sba_gart = 1; +#endif + +#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1)) + + +/************************************ +** SBA register read and write support +** +** BE WARNED: register writes are posted. +** (ie follow writes which must reach HW with a read) +** +** Superdome (in particular, REO) allows only 64-bit CSR accesses. +*/ +#define READ_REG32(addr) le32_to_cpu(__raw_readl(addr)) +#define READ_REG64(addr) le64_to_cpu(__raw_readq(addr)) +#define WRITE_REG32(val, addr) __raw_writel(cpu_to_le32(val), addr) +#define WRITE_REG64(val, addr) __raw_writeq(cpu_to_le64(val), addr) + +#ifdef __LP64__ +#define READ_REG(addr) READ_REG64(addr) +#define WRITE_REG(value, addr) WRITE_REG64(value, addr) +#else +#define READ_REG(addr) READ_REG32(addr) +#define WRITE_REG(value, addr) WRITE_REG32(value, addr) +#endif + +#ifdef DEBUG_SBA_INIT + +/* NOTE: When __LP64__ isn't defined, READ_REG64() is two 32-bit reads */ + +/** + * sba_dump_ranges - debugging only - print ranges assigned to this IOA + * @hpa: base address of the sba + * + * Print the MMIO and IO Port address ranges forwarded by an Astro/Ike/RIO + * IO Adapter (aka Bus Converter). + */ +static void +sba_dump_ranges(void __iomem *hpa) +{ + DBG_INIT("SBA at 0x%p\n", hpa); + DBG_INIT("IOS_DIST_BASE : %Lx\n", READ_REG64(hpa+IOS_DIST_BASE)); + DBG_INIT("IOS_DIST_MASK : %Lx\n", READ_REG64(hpa+IOS_DIST_MASK)); + DBG_INIT("IOS_DIST_ROUTE : %Lx\n", READ_REG64(hpa+IOS_DIST_ROUTE)); + DBG_INIT("\n"); + DBG_INIT("IOS_DIRECT_BASE : %Lx\n", READ_REG64(hpa+IOS_DIRECT_BASE)); + DBG_INIT("IOS_DIRECT_MASK : %Lx\n", READ_REG64(hpa+IOS_DIRECT_MASK)); + DBG_INIT("IOS_DIRECT_ROUTE: %Lx\n", READ_REG64(hpa+IOS_DIRECT_ROUTE)); +} + +/** + * sba_dump_tlb - debugging only - print IOMMU operating parameters + * @hpa: base address of the IOMMU + * + * Print the size/location of the IO MMU PDIR. + */ +static void sba_dump_tlb(void __iomem *hpa) +{ + DBG_INIT("IO TLB at 0x%p\n", hpa); + DBG_INIT("IOC_IBASE : 0x%Lx\n", READ_REG64(hpa+IOC_IBASE)); + DBG_INIT("IOC_IMASK : 0x%Lx\n", READ_REG64(hpa+IOC_IMASK)); + DBG_INIT("IOC_TCNFG : 0x%Lx\n", READ_REG64(hpa+IOC_TCNFG)); + DBG_INIT("IOC_PDIR_BASE: 0x%Lx\n", READ_REG64(hpa+IOC_PDIR_BASE)); + DBG_INIT("\n"); +} +#else +#define sba_dump_ranges(x) +#define sba_dump_tlb(x) +#endif + + +#ifdef ASSERT_PDIR_SANITY + +/** + * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @msg: text to print ont the output line. + * @pide: pdir index. + * + * Print one entry of the IO MMU PDIR in human readable form. + */ +static void +sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide) +{ + /* start printing from lowest pde in rval */ + u64 *ptr = &(ioc->pdir_base[pide & (~0U * BITS_PER_LONG)]); + unsigned long *rptr = (unsigned long *) &(ioc->res_map[(pide >>3) & ~(sizeof(unsigned long) - 1)]); + uint rcnt; + + printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", + msg, + rptr, pide & (BITS_PER_LONG - 1), *rptr); + + rcnt = 0; + while (rcnt < BITS_PER_LONG) { + printk(KERN_DEBUG "%s %2d %p %016Lx\n", + (rcnt == (pide & (BITS_PER_LONG - 1))) + ? " -->" : " ", + rcnt, ptr, *ptr ); + rcnt++; + ptr++; + } + printk(KERN_DEBUG "%s", msg); +} + + +/** + * sba_check_pdir - debugging only - consistency checker + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @msg: text to print ont the output line. + * + * Verify the resource map and pdir state is consistent + */ +static int +sba_check_pdir(struct ioc *ioc, char *msg) +{ + u32 *rptr_end = (u32 *) &(ioc->res_map[ioc->res_size]); + u32 *rptr = (u32 *) ioc->res_map; /* resource map ptr */ + u64 *pptr = ioc->pdir_base; /* pdir ptr */ + uint pide = 0; + + while (rptr < rptr_end) { + u32 rval = *rptr; + int rcnt = 32; /* number of bits we might check */ + + while (rcnt) { + /* Get last byte and highest bit from that */ + u32 pde = ((u32) (((char *)pptr)[7])) << 24; + if ((rval ^ pde) & 0x80000000) + { + /* + ** BUMMER! -- res_map != pdir -- + ** Dump rval and matching pdir entries + */ + sba_dump_pdir_entry(ioc, msg, pide); + return(1); + } + rcnt--; + rval <<= 1; /* try the next bit */ + pptr++; + pide++; + } + rptr++; /* look at next word of res_map */ + } + /* It'd be nice if we always got here :^) */ + return 0; +} + + +/** + * sba_dump_sg - debugging only - print Scatter-Gather list + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @startsg: head of the SG list + * @nents: number of entries in SG list + * + * print the SG list so we can verify it's correct by hand. + */ +static void +sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) +{ + while (nents-- > 0) { + printk(KERN_DEBUG " %d : %08lx/%05x %p/%05x\n", + nents, + (unsigned long) sg_dma_address(startsg), + sg_dma_len(startsg), + sg_virt_addr(startsg), startsg->length); + startsg++; + } +} + +#endif /* ASSERT_PDIR_SANITY */ + + + + +/************************************************************** +* +* I/O Pdir Resource Management +* +* Bits set in the resource map are in use. +* Each bit can represent a number of pages. +* LSbs represent lower addresses (IOVA's). +* +***************************************************************/ +#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */ + +/* Convert from IOVP to IOVA and vice versa. */ + +#ifdef ZX1_SUPPORT +/* Pluto (aka ZX1) boxes need to set or clear the ibase bits appropriately */ +#define SBA_IOVA(ioc,iovp,offset,hint_reg) ((ioc->ibase) | (iovp) | (offset)) +#define SBA_IOVP(ioc,iova) ((iova) & (ioc)->iovp_mask) +#else +/* only support Astro and ancestors. Saves a few cycles in key places */ +#define SBA_IOVA(ioc,iovp,offset,hint_reg) ((iovp) | (offset)) +#define SBA_IOVP(ioc,iova) (iova) +#endif + +#define PDIR_INDEX(iovp) ((iovp)>>IOVP_SHIFT) + +#define RESMAP_MASK(n) (~0UL << (BITS_PER_LONG - (n))) +#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1) + + +/** + * sba_search_bitmap - find free space in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @bits_wanted: number of entries we need. + * + * Find consecutive free bits in resource bitmap. + * Each bit represents one entry in the IO Pdir. + * Cool perf optimization: search for log2(size) bits at a time. + */ +static SBA_INLINE unsigned long +sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted) +{ + unsigned long *res_ptr = ioc->res_hint; + unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); + unsigned long pide = ~0UL; + + if (bits_wanted > (BITS_PER_LONG/2)) { + /* Search word at a time - no mask needed */ + for(; res_ptr < res_end; ++res_ptr) { + if (*res_ptr == 0) { + *res_ptr = RESMAP_MASK(bits_wanted); + pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); + pide <<= 3; /* convert to bit address */ + break; + } + } + /* point to the next word on next pass */ + res_ptr++; + ioc->res_bitshift = 0; + } else { + /* + ** Search the resource bit map on well-aligned values. + ** "o" is the alignment. + ** We need the alignment to invalidate I/O TLB using + ** SBA HW features in the unmap path. + */ + unsigned long o = 1 << get_order(bits_wanted << PAGE_SHIFT); + uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); + unsigned long mask; + + if (bitshiftcnt >= BITS_PER_LONG) { + bitshiftcnt = 0; + res_ptr++; + } + mask = RESMAP_MASK(bits_wanted) >> bitshiftcnt; + + DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr); + while(res_ptr < res_end) + { + DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); + WARN_ON(mask == 0); + if(((*res_ptr) & mask) == 0) { + *res_ptr |= mask; /* mark resources busy! */ + pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map); + pide <<= 3; /* convert to bit address */ + pide += bitshiftcnt; + break; + } + mask >>= o; + bitshiftcnt += o; + if (mask == 0) { + mask = RESMAP_MASK(bits_wanted); + bitshiftcnt=0; + res_ptr++; + } + } + /* look in the same word on the next pass */ + ioc->res_bitshift = bitshiftcnt + bits_wanted; + } + + /* wrapped ? */ + if (res_end <= res_ptr) { + ioc->res_hint = (unsigned long *) ioc->res_map; + ioc->res_bitshift = 0; + } else { + ioc->res_hint = res_ptr; + } + return (pide); +} + + +/** + * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @size: number of bytes to create a mapping for + * + * Given a size, find consecutive unmarked and then mark those bits in the + * resource bit map. + */ +static int +sba_alloc_range(struct ioc *ioc, size_t size) +{ + unsigned int pages_needed = size >> IOVP_SHIFT; +#ifdef SBA_COLLECT_STATS + unsigned long cr_start = mfctl(16); +#endif + unsigned long pide; + + pide = sba_search_bitmap(ioc, pages_needed); + if (pide >= (ioc->res_size << 3)) { + pide = sba_search_bitmap(ioc, pages_needed); + if (pide >= (ioc->res_size << 3)) + panic("%s: I/O MMU @ %p is out of mapping resources\n", + __FILE__, ioc->ioc_hpa); + } + +#ifdef ASSERT_PDIR_SANITY + /* verify the first enable bit is clear */ + if(0x00 != ((u8 *) ioc->pdir_base)[pide*sizeof(u64) + 7]) { + sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide); + } +#endif + + DBG_RES("%s(%x) %d -> %lx hint %x/%x\n", + __FUNCTION__, size, pages_needed, pide, + (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), + ioc->res_bitshift ); + +#ifdef SBA_COLLECT_STATS + { + unsigned long cr_end = mfctl(16); + unsigned long tmp = cr_end - cr_start; + /* check for roll over */ + cr_start = (cr_end < cr_start) ? -(tmp) : (tmp); + } + ioc->avg_search[ioc->avg_idx++] = cr_start; + ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; + + ioc->used_pages += pages_needed; +#endif + + return (pide); +} + + +/** + * sba_free_range - unmark bits in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @iova: IO virtual address which was previously allocated. + * @size: number of bytes to create a mapping for + * + * clear bits in the ioc's resource map + */ +static SBA_INLINE void +sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size) +{ + unsigned long iovp = SBA_IOVP(ioc, iova); + unsigned int pide = PDIR_INDEX(iovp); + unsigned int ridx = pide >> 3; /* convert bit to byte address */ + unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]); + + int bits_not_wanted = size >> IOVP_SHIFT; + + /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ + unsigned long m = RESMAP_MASK(bits_not_wanted) >> (pide & (BITS_PER_LONG - 1)); + + DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", + __FUNCTION__, (uint) iova, size, + bits_not_wanted, m, pide, res_ptr, *res_ptr); + +#ifdef SBA_COLLECT_STATS + ioc->used_pages -= bits_not_wanted; +#endif + + *res_ptr &= ~m; +} + + +/************************************************************** +* +* "Dynamic DMA Mapping" support (aka "Coherent I/O") +* +***************************************************************/ + +#if SBA_HINT_SUPPORT +#define SBA_DMA_HINT(ioc, val) ((val) << (ioc)->hint_shift_pdir) +#endif + +typedef unsigned long space_t; +#define KERNEL_SPACE 0 + +/** + * sba_io_pdir_entry - fill in one IO PDIR entry + * @pdir_ptr: pointer to IO PDIR entry + * @sid: process Space ID - currently only support KERNEL_SPACE + * @vba: Virtual CPU address of buffer to map + * @hint: DMA hint set to use for this mapping + * + * SBA Mapping Routine + * + * Given a virtual address (vba, arg2) and space id, (sid, arg1) + * sba_io_pdir_entry() loads the I/O PDIR entry pointed to by + * pdir_ptr (arg0). + * Using the bass-ackwards HP bit numbering, Each IO Pdir entry + * for Astro/Ike looks like: + * + * + * 0 19 51 55 63 + * +-+---------------------+----------------------------------+----+--------+ + * |V| U | PPN[43:12] | U | VI | + * +-+---------------------+----------------------------------+----+--------+ + * + * Pluto is basically identical, supports fewer physical address bits: + * + * 0 23 51 55 63 + * +-+------------------------+-------------------------------+----+--------+ + * |V| U | PPN[39:12] | U | VI | + * +-+------------------------+-------------------------------+----+--------+ + * + * V == Valid Bit (Most Significant Bit is bit 0) + * U == Unused + * PPN == Physical Page Number + * VI == Virtual Index (aka Coherent Index) + * + * LPA instruction output is put into PPN field. + * LCI (Load Coherence Index) instruction provides the "VI" bits. + * + * We pre-swap the bytes since PCX-W is Big Endian and the + * IOMMU uses little endian for the pdir. + */ + +void SBA_INLINE +sba_io_pdir_entry(u64 *pdir_ptr, space_t sid, unsigned long vba, + unsigned long hint) +{ + u64 pa; /* physical address */ + register unsigned ci; /* coherent index */ + + pa = virt_to_phys(vba); + pa &= IOVP_MASK; + + mtsp(sid,1); + asm("lci 0(%%sr1, %1), %0" : "=r" (ci) : "r" (vba)); + pa |= (ci >> 12) & 0xff; /* move CI (8 bits) into lowest byte */ + + pa |= 0x8000000000000000ULL; /* set "valid" bit */ + *pdir_ptr = cpu_to_le64(pa); /* swap and store into I/O Pdir */ + + /* + * If the PDC_MODEL capabilities has Non-coherent IO-PDIR bit set + * (bit #61, big endian), we have to flush and sync every time + * IO-PDIR is changed in Ike/Astro. + */ + if (ioc_needs_fdc) { + asm volatile("fdc 0(%%sr1,%0)\n\tsync" : : "r" (pdir_ptr)); + } +} + + +/** + * sba_mark_invalid - invalidate one or more IO PDIR entries + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @iova: IO Virtual Address mapped earlier + * @byte_cnt: number of bytes this mapping covers. + * + * Marking the IO PDIR entry(ies) as Invalid and invalidate + * corresponding IO TLB entry. The Ike PCOM (Purge Command Register) + * is to purge stale entries in the IO TLB when unmapping entries. + * + * The PCOM register supports purging of multiple pages, with a minium + * of 1 page and a maximum of 2GB. Hardware requires the address be + * aligned to the size of the range being purged. The size of the range + * must be a power of 2. The "Cool perf optimization" in the + * allocation routine helps keep that true. + */ +static SBA_INLINE void +sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) +{ + u32 iovp = (u32) SBA_IOVP(ioc,iova); + + /* Even though this is a big-endian machine, the entries + ** in the iopdir are little endian. That's why we clear the byte + ** at +7 instead of at +0. + */ + int off = PDIR_INDEX(iovp)*sizeof(u64)+7; + +#ifdef ASSERT_PDIR_SANITY + /* Assert first pdir entry is set */ + if (0x80 != (((u8 *) ioc->pdir_base)[off])) { + sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp)); + } +#endif + + if (byte_cnt <= IOVP_SIZE) + { + iovp |= IOVP_SHIFT; /* set "size" field for PCOM */ + + /* + ** clear I/O PDIR entry "valid" bit + ** Do NOT clear the rest - save it for debugging. + ** We should only clear bits that have previously + ** been enabled. + */ + ((u8 *)(ioc->pdir_base))[off] = 0; + } else { + u32 t = get_order(byte_cnt) + PAGE_SHIFT; + + iovp |= t; + do { + /* clear I/O Pdir entry "valid" bit first */ + ((u8 *)(ioc->pdir_base))[off] = 0; + off += sizeof(u64); + byte_cnt -= IOVP_SIZE; + } while (byte_cnt > 0); + } + + WRITE_REG( SBA_IOVA(ioc, iovp, 0, 0), ioc->ioc_hpa+IOC_PCOM); +} + +/** + * sba_dma_supported - PCI driver can query DMA support + * @dev: instance of PCI owned by the driver that's asking + * @mask: number of address bits this PCI device can handle + * + * See Documentation/DMA-mapping.txt + */ +static int sba_dma_supported( struct device *dev, u64 mask) +{ + struct ioc *ioc; + if (dev == NULL) { + printk(KERN_ERR MODULE_NAME ": EISA/ISA/et al not supported\n"); + BUG(); + return(0); + } + + ioc = GET_IOC(dev); + + /* check if mask is > than the largest IO Virt Address */ + + return((int) (mask >= (ioc->ibase + + (ioc->pdir_size / sizeof(u64) * IOVP_SIZE) ))); +} + + +/** + * sba_map_single - map one buffer and return IOVA for DMA + * @dev: instance of PCI owned by the driver that's asking. + * @addr: driver buffer to map. + * @size: number of bytes to map in driver buffer. + * @direction: R/W or both. + * + * See Documentation/DMA-mapping.txt + */ +static dma_addr_t +sba_map_single(struct device *dev, void *addr, size_t size, + enum dma_data_direction direction) +{ + struct ioc *ioc; + unsigned long flags; + dma_addr_t iovp; + dma_addr_t offset; + u64 *pdir_start; + int pide; + + ioc = GET_IOC(dev); + + /* save offset bits */ + offset = ((dma_addr_t) (long) addr) & ~IOVP_MASK; + + /* round up to nearest IOVP_SIZE */ + size = (size + offset + ~IOVP_MASK) & IOVP_MASK; + + spin_lock_irqsave(&ioc->res_lock, flags); +#ifdef ASSERT_PDIR_SANITY + sba_check_pdir(ioc,"Check before sba_map_single()"); +#endif + +#ifdef SBA_COLLECT_STATS + ioc->msingle_calls++; + ioc->msingle_pages += size >> IOVP_SHIFT; +#endif + pide = sba_alloc_range(ioc, size); + iovp = (dma_addr_t) pide << IOVP_SHIFT; + + DBG_RUN("%s() 0x%p -> 0x%lx\n", + __FUNCTION__, addr, (long) iovp | offset); + + pdir_start = &(ioc->pdir_base[pide]); + + while (size > 0) { + sba_io_pdir_entry(pdir_start, KERNEL_SPACE, (unsigned long) addr, 0); + + DBG_RUN(" pdir 0x%p %02x%02x%02x%02x%02x%02x%02x%02x\n", + pdir_start, + (u8) (((u8 *) pdir_start)[7]), + (u8) (((u8 *) pdir_start)[6]), + (u8) (((u8 *) pdir_start)[5]), + (u8) (((u8 *) pdir_start)[4]), + (u8) (((u8 *) pdir_start)[3]), + (u8) (((u8 *) pdir_start)[2]), + (u8) (((u8 *) pdir_start)[1]), + (u8) (((u8 *) pdir_start)[0]) + ); + + addr += IOVP_SIZE; + size -= IOVP_SIZE; + pdir_start++; + } + /* form complete address */ +#ifdef ASSERT_PDIR_SANITY + sba_check_pdir(ioc,"Check after sba_map_single()"); +#endif + spin_unlock_irqrestore(&ioc->res_lock, flags); + return SBA_IOVA(ioc, iovp, offset, DEFAULT_DMA_HINT_REG); +} + + +/** + * sba_unmap_single - unmap one IOVA and free resources + * @dev: instance of PCI owned by the driver that's asking. + * @iova: IOVA of driver buffer previously mapped. + * @size: number of bytes mapped in driver buffer. + * @direction: R/W or both. + * + * See Documentation/DMA-mapping.txt + */ +static void +sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, + enum dma_data_direction direction) +{ + struct ioc *ioc; +#if DELAYED_RESOURCE_CNT > 0 + struct sba_dma_pair *d; +#endif + unsigned long flags; + dma_addr_t offset; + + DBG_RUN("%s() iovp 0x%lx/%x\n", __FUNCTION__, (long) iova, size); + + ioc = GET_IOC(dev); + offset = iova & ~IOVP_MASK; + iova ^= offset; /* clear offset bits */ + size += offset; + size = ROUNDUP(size, IOVP_SIZE); + + spin_lock_irqsave(&ioc->res_lock, flags); + +#ifdef SBA_COLLECT_STATS + ioc->usingle_calls++; + ioc->usingle_pages += size >> IOVP_SHIFT; +#endif + + sba_mark_invalid(ioc, iova, size); + +#if DELAYED_RESOURCE_CNT > 0 + /* Delaying when we re-use a IO Pdir entry reduces the number + * of MMIO reads needed to flush writes to the PCOM register. + */ + d = &(ioc->saved[ioc->saved_cnt]); + d->iova = iova; + d->size = size; + if (++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT) { + int cnt = ioc->saved_cnt; + while (cnt--) { + sba_free_range(ioc, d->iova, d->size); + d--; + } + ioc->saved_cnt = 0; + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + } +#else /* DELAYED_RESOURCE_CNT == 0 */ + sba_free_range(ioc, iova, size); + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ +#endif /* DELAYED_RESOURCE_CNT == 0 */ + spin_unlock_irqrestore(&ioc->res_lock, flags); + + /* XXX REVISIT for 2.5 Linux - need syncdma for zero-copy support. + ** For Astro based systems this isn't a big deal WRT performance. + ** As long as 2.4 kernels copyin/copyout data from/to userspace, + ** we don't need the syncdma. The issue here is I/O MMU cachelines + ** are *not* coherent in all cases. May be hwrev dependent. + ** Need to investigate more. + asm volatile("syncdma"); + */ +} + + +/** + * sba_alloc_consistent - allocate/map shared mem for DMA + * @hwdev: instance of PCI owned by the driver that's asking. + * @size: number of bytes mapped in driver buffer. + * @dma_handle: IOVA of new buffer. + * + * See Documentation/DMA-mapping.txt + */ +static void *sba_alloc_consistent(struct device *hwdev, size_t size, + dma_addr_t *dma_handle, int gfp) +{ + void *ret; + + if (!hwdev) { + /* only support PCI */ + *dma_handle = 0; + return 0; + } + + ret = (void *) __get_free_pages(gfp, get_order(size)); + + if (ret) { + memset(ret, 0, size); + *dma_handle = sba_map_single(hwdev, ret, size, 0); + } + + return ret; +} + + +/** + * sba_free_consistent - free/unmap shared mem for DMA + * @hwdev: instance of PCI owned by the driver that's asking. + * @size: number of bytes mapped in driver buffer. + * @vaddr: virtual address IOVA of "consistent" buffer. + * @dma_handler: IO virtual address of "consistent" buffer. + * + * See Documentation/DMA-mapping.txt + */ +static void +sba_free_consistent(struct device *hwdev, size_t size, void *vaddr, + dma_addr_t dma_handle) +{ + sba_unmap_single(hwdev, dma_handle, size, 0); + free_pages((unsigned long) vaddr, get_order(size)); +} + + +/* +** Since 0 is a valid pdir_base index value, can't use that +** to determine if a value is valid or not. Use a flag to indicate +** the SG list entry contains a valid pdir index. +*/ +#define PIDE_FLAG 0x80000000UL + +#ifdef SBA_COLLECT_STATS +#define IOMMU_MAP_STATS +#endif +#include "iommu-helpers.h" + +#ifdef DEBUG_LARGE_SG_ENTRIES +int dump_run_sg = 0; +#endif + + +/** + * sba_map_sg - map Scatter/Gather list + * @dev: instance of PCI owned by the driver that's asking. + * @sglist: array of buffer/length pairs + * @nents: number of entries in list + * @direction: R/W or both. + * + * See Documentation/DMA-mapping.txt + */ +static int +sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, + enum dma_data_direction direction) +{ + struct ioc *ioc; + int coalesced, filled = 0; + unsigned long flags; + + DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents); + + ioc = GET_IOC(dev); + + /* Fast path single entry scatterlists. */ + if (nents == 1) { + sg_dma_address(sglist) = sba_map_single(dev, + (void *)sg_virt_addr(sglist), + sglist->length, direction); + sg_dma_len(sglist) = sglist->length; + return 1; + } + + spin_lock_irqsave(&ioc->res_lock, flags); + +#ifdef ASSERT_PDIR_SANITY + if (sba_check_pdir(ioc,"Check before sba_map_sg()")) + { + sba_dump_sg(ioc, sglist, nents); + panic("Check before sba_map_sg()"); + } +#endif + +#ifdef SBA_COLLECT_STATS + ioc->msg_calls++; +#endif + + /* + ** First coalesce the chunks and allocate I/O pdir space + ** + ** If this is one DMA stream, we can properly map using the + ** correct virtual address associated with each DMA page. + ** w/o this association, we wouldn't have coherent DMA! + ** Access to the virtual address is what forces a two pass algorithm. + */ + coalesced = iommu_coalesce_chunks(ioc, sglist, nents, sba_alloc_range); + + /* + ** Program the I/O Pdir + ** + ** map the virtual addresses to the I/O Pdir + ** o dma_address will contain the pdir index + ** o dma_len will contain the number of bytes to map + ** o address contains the virtual address. + */ + filled = iommu_fill_pdir(ioc, sglist, nents, 0, sba_io_pdir_entry); + +#ifdef ASSERT_PDIR_SANITY + if (sba_check_pdir(ioc,"Check after sba_map_sg()")) + { + sba_dump_sg(ioc, sglist, nents); + panic("Check after sba_map_sg()\n"); + } +#endif + + spin_unlock_irqrestore(&ioc->res_lock, flags); + + DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled); + + return filled; +} + + +/** + * sba_unmap_sg - unmap Scatter/Gather list + * @dev: instance of PCI owned by the driver that's asking. + * @sglist: array of buffer/length pairs |