diff options
Diffstat (limited to 'arch/s390/mm')
-rw-r--r-- | arch/s390/mm/Makefile | 7 | ||||
-rw-r--r-- | arch/s390/mm/cmm.c | 443 | ||||
-rw-r--r-- | arch/s390/mm/extmem.c | 588 | ||||
-rw-r--r-- | arch/s390/mm/fault.c | 586 | ||||
-rw-r--r-- | arch/s390/mm/init.c | 310 | ||||
-rw-r--r-- | arch/s390/mm/ioremap.c | 138 | ||||
-rw-r--r-- | arch/s390/mm/mmap.c | 86 |
7 files changed, 2158 insertions, 0 deletions
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile new file mode 100644 index 00000000000..aa9a42b6e62 --- /dev/null +++ b/arch/s390/mm/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the linux s390-specific parts of the memory manager. +# + +obj-y := init.o fault.o ioremap.o extmem.o mmap.o +obj-$(CONFIG_CMM) += cmm.o + diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c new file mode 100644 index 00000000000..d30cdb4248a --- /dev/null +++ b/arch/s390/mm/cmm.c @@ -0,0 +1,443 @@ +/* + * arch/s390/mm/cmm.c + * + * S390 version + * Copyright (C) 2003 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + * + * Collaborative memory management interface. + */ + +#include <linux/config.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/sysctl.h> +#include <linux/ctype.h> + +#include <asm/pgalloc.h> +#include <asm/uaccess.h> + +#include "../../../drivers/s390/net/smsgiucv.h" + +#define CMM_NR_PAGES ((PAGE_SIZE / sizeof(unsigned long)) - 2) + +struct cmm_page_array { + struct cmm_page_array *next; + unsigned long index; + unsigned long pages[CMM_NR_PAGES]; +}; + +static long cmm_pages = 0; +static long cmm_timed_pages = 0; +static volatile long cmm_pages_target = 0; +static volatile long cmm_timed_pages_target = 0; +static long cmm_timeout_pages = 0; +static long cmm_timeout_seconds = 0; + +static struct cmm_page_array *cmm_page_list = 0; +static struct cmm_page_array *cmm_timed_page_list = 0; + +static unsigned long cmm_thread_active = 0; +static struct work_struct cmm_thread_starter; +static wait_queue_head_t cmm_thread_wait; +static struct timer_list cmm_timer; + +static void cmm_timer_fn(unsigned long); +static void cmm_set_timer(void); + +static long +cmm_strtoul(const char *cp, char **endp) +{ + unsigned int base = 10; + + if (*cp == '0') { + base = 8; + cp++; + if ((*cp == 'x' || *cp == 'X') && isxdigit(cp[1])) { + base = 16; + cp++; + } + } + return simple_strtoul(cp, endp, base); +} + +static long +cmm_alloc_pages(long pages, long *counter, struct cmm_page_array **list) +{ + struct cmm_page_array *pa; + unsigned long page; + + pa = *list; + while (pages) { + page = __get_free_page(GFP_NOIO); + if (!page) + break; + if (!pa || pa->index >= CMM_NR_PAGES) { + /* Need a new page for the page list. */ + pa = (struct cmm_page_array *) + __get_free_page(GFP_NOIO); + if (!pa) { + free_page(page); + break; + } + pa->next = *list; + pa->index = 0; + *list = pa; + } + diag10(page); + pa->pages[pa->index++] = page; + (*counter)++; + pages--; + } + return pages; +} + +static void +cmm_free_pages(long pages, long *counter, struct cmm_page_array **list) +{ + struct cmm_page_array *pa; + unsigned long page; + + pa = *list; + while (pages) { + if (!pa || pa->index <= 0) + break; + page = pa->pages[--pa->index]; + if (pa->index == 0) { + pa = pa->next; + free_page((unsigned long) *list); + *list = pa; + } + free_page(page); + (*counter)--; + pages--; + } +} + +static int +cmm_thread(void *dummy) +{ + int rc; + + daemonize("cmmthread"); + while (1) { + rc = wait_event_interruptible(cmm_thread_wait, + (cmm_pages != cmm_pages_target || + cmm_timed_pages != cmm_timed_pages_target)); + if (rc == -ERESTARTSYS) { + /* Got kill signal. End thread. */ + clear_bit(0, &cmm_thread_active); + cmm_pages_target = cmm_pages; + cmm_timed_pages_target = cmm_timed_pages; + break; + } + if (cmm_pages_target > cmm_pages) { + if (cmm_alloc_pages(1, &cmm_pages, &cmm_page_list)) + cmm_pages_target = cmm_pages; + } else if (cmm_pages_target < cmm_pages) { + cmm_free_pages(1, &cmm_pages, &cmm_page_list); + } + if (cmm_timed_pages_target > cmm_timed_pages) { + if (cmm_alloc_pages(1, &cmm_timed_pages, + &cmm_timed_page_list)) + cmm_timed_pages_target = cmm_timed_pages; + } else if (cmm_timed_pages_target < cmm_timed_pages) { + cmm_free_pages(1, &cmm_timed_pages, + &cmm_timed_page_list); + } + if (cmm_timed_pages > 0 && !timer_pending(&cmm_timer)) + cmm_set_timer(); + } + return 0; +} + +static void +cmm_start_thread(void) +{ + kernel_thread(cmm_thread, 0, 0); +} + +static void +cmm_kick_thread(void) +{ + if (!test_and_set_bit(0, &cmm_thread_active)) + schedule_work(&cmm_thread_starter); + wake_up(&cmm_thread_wait); +} + +static void +cmm_set_timer(void) +{ + if (cmm_timed_pages_target <= 0 || cmm_timeout_seconds <= 0) { + if (timer_pending(&cmm_timer)) + del_timer(&cmm_timer); + return; + } + if (timer_pending(&cmm_timer)) { + if (mod_timer(&cmm_timer, jiffies + cmm_timeout_seconds*HZ)) + return; + } + cmm_timer.function = cmm_timer_fn; + cmm_timer.data = 0; + cmm_timer.expires = jiffies + cmm_timeout_seconds*HZ; + add_timer(&cmm_timer); +} + +static void +cmm_timer_fn(unsigned long ignored) +{ + long pages; + + pages = cmm_timed_pages_target - cmm_timeout_pages; + if (pages < 0) + cmm_timed_pages_target = 0; + else + cmm_timed_pages_target = pages; + cmm_kick_thread(); + cmm_set_timer(); +} + +void +cmm_set_pages(long pages) +{ + cmm_pages_target = pages; + cmm_kick_thread(); +} + +long +cmm_get_pages(void) +{ + return cmm_pages; +} + +void +cmm_add_timed_pages(long pages) +{ + cmm_timed_pages_target += pages; + cmm_kick_thread(); +} + +long +cmm_get_timed_pages(void) +{ + return cmm_timed_pages; +} + +void +cmm_set_timeout(long pages, long seconds) +{ + cmm_timeout_pages = pages; + cmm_timeout_seconds = seconds; + cmm_set_timer(); +} + +static inline int +cmm_skip_blanks(char *cp, char **endp) +{ + char *str; + + for (str = cp; *str == ' ' || *str == '\t'; str++); + *endp = str; + return str != cp; +} + +#ifdef CONFIG_CMM_PROC +/* These will someday get removed. */ +#define VM_CMM_PAGES 1111 +#define VM_CMM_TIMED_PAGES 1112 +#define VM_CMM_TIMEOUT 1113 + +static struct ctl_table cmm_table[]; + +static int +cmm_pages_handler(ctl_table *ctl, int write, struct file *filp, + void *buffer, size_t *lenp, loff_t *ppos) +{ + char buf[16], *p; + long pages; + int len; + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + + if (write) { + len = *lenp; + if (copy_from_user(buf, buffer, + len > sizeof(buf) ? sizeof(buf) : len)) + return -EFAULT; + buf[sizeof(buf) - 1] = '\0'; + cmm_skip_blanks(buf, &p); + pages = cmm_strtoul(p, &p); + if (ctl == &cmm_table[0]) + cmm_set_pages(pages); + else + cmm_add_timed_pages(pages); + } else { + if (ctl == &cmm_table[0]) + pages = cmm_get_pages(); + else + pages = cmm_get_timed_pages(); + len = sprintf(buf, "%ld\n", pages); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + } + *lenp = len; + *ppos += len; + return 0; +} + +static int +cmm_timeout_handler(ctl_table *ctl, int write, struct file *filp, + void *buffer, size_t *lenp, loff_t *ppos) +{ + char buf[64], *p; + long pages, seconds; + int len; + + if (!*lenp || (*ppos && !write)) { + *lenp = 0; + return 0; + } + + if (write) { + len = *lenp; + if (copy_from_user(buf, buffer, + len > sizeof(buf) ? sizeof(buf) : len)) + return -EFAULT; + buf[sizeof(buf) - 1] = '\0'; + cmm_skip_blanks(buf, &p); + pages = cmm_strtoul(p, &p); + cmm_skip_blanks(p, &p); + seconds = cmm_strtoul(p, &p); + cmm_set_timeout(pages, seconds); + } else { + len = sprintf(buf, "%ld %ld\n", + cmm_timeout_pages, cmm_timeout_seconds); + if (len > *lenp) + len = *lenp; + if (copy_to_user(buffer, buf, len)) + return -EFAULT; + } + *lenp = len; + *ppos += len; + return 0; +} + +static struct ctl_table cmm_table[] = { + { + .ctl_name = VM_CMM_PAGES, + .procname = "cmm_pages", + .mode = 0600, + .proc_handler = &cmm_pages_handler, + }, + { + .ctl_name = VM_CMM_TIMED_PAGES, + .procname = "cmm_timed_pages", + .mode = 0600, + .proc_handler = &cmm_pages_handler, + }, + { + .ctl_name = VM_CMM_TIMEOUT, + .procname = "cmm_timeout", + .mode = 0600, + .proc_handler = &cmm_timeout_handler, + }, + { .ctl_name = 0 } +}; + +static struct ctl_table cmm_dir_table[] = { + { + .ctl_name = CTL_VM, + .procname = "vm", + .maxlen = 0, + .mode = 0555, + .child = cmm_table, + }, + { .ctl_name = 0 } +}; +#endif + +#ifdef CONFIG_CMM_IUCV +#define SMSG_PREFIX "CMM" +static void +cmm_smsg_target(char *msg) +{ + long pages, seconds; + + if (!cmm_skip_blanks(msg + strlen(SMSG_PREFIX), &msg)) + return; + if (strncmp(msg, "SHRINK", 6) == 0) { + if (!cmm_skip_blanks(msg + 6, &msg)) + return; + pages = cmm_strtoul(msg, &msg); + cmm_skip_blanks(msg, &msg); + if (*msg == '\0') + cmm_set_pages(pages); + } else if (strncmp(msg, "RELEASE", 7) == 0) { + if (!cmm_skip_blanks(msg + 7, &msg)) + return; + pages = cmm_strtoul(msg, &msg); + cmm_skip_blanks(msg, &msg); + if (*msg == '\0') + cmm_add_timed_pages(pages); + } else if (strncmp(msg, "REUSE", 5) == 0) { + if (!cmm_skip_blanks(msg + 5, &msg)) + return; + pages = cmm_strtoul(msg, &msg); + if (!cmm_skip_blanks(msg, &msg)) + return; + seconds = cmm_strtoul(msg, &msg); + cmm_skip_blanks(msg, &msg); + if (*msg == '\0') + cmm_set_timeout(pages, seconds); + } +} +#endif + +struct ctl_table_header *cmm_sysctl_header; + +static int +cmm_init (void) +{ +#ifdef CONFIG_CMM_PROC + cmm_sysctl_header = register_sysctl_table(cmm_dir_table, 1); +#endif +#ifdef CONFIG_CMM_IUCV + smsg_register_callback(SMSG_PREFIX, cmm_smsg_target); +#endif + INIT_WORK(&cmm_thread_starter, (void *) cmm_start_thread, 0); + init_waitqueue_head(&cmm_thread_wait); + init_timer(&cmm_timer); + return 0; +} + +static void +cmm_exit(void) +{ + cmm_free_pages(cmm_pages, &cmm_pages, &cmm_page_list); + cmm_free_pages(cmm_timed_pages, &cmm_timed_pages, &cmm_timed_page_list); +#ifdef CONFIG_CMM_PROC + unregister_sysctl_table(cmm_sysctl_header); +#endif +#ifdef CONFIG_CMM_IUCV + smsg_unregister_callback(SMSG_PREFIX, cmm_smsg_target); +#endif +} + +module_init(cmm_init); +module_exit(cmm_exit); + +EXPORT_SYMBOL(cmm_set_pages); +EXPORT_SYMBOL(cmm_get_pages); +EXPORT_SYMBOL(cmm_add_timed_pages); +EXPORT_SYMBOL(cmm_get_timed_pages); +EXPORT_SYMBOL(cmm_set_timeout); + +MODULE_LICENSE("GPL"); diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c new file mode 100644 index 00000000000..648deed17e2 --- /dev/null +++ b/arch/s390/mm/extmem.c @@ -0,0 +1,588 @@ +/* + * File...........: arch/s390/mm/extmem.c + * Author(s)......: Carsten Otte <cotte@de.ibm.com> + * Rob M van der Heij <rvdheij@nl.ibm.com> + * Steven Shultz <shultzss@us.ibm.com> + * Bugreports.to..: <Linux390@de.ibm.com> + * (C) IBM Corporation 2002-2004 + */ + +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/slab.h> +#include <linux/module.h> +#include <linux/bootmem.h> +#include <asm/page.h> +#include <asm/ebcdic.h> +#include <asm/errno.h> +#include <asm/extmem.h> +#include <asm/cpcmd.h> +#include <linux/ctype.h> + +#define DCSS_DEBUG /* Debug messages on/off */ + +#define DCSS_NAME "extmem" +#ifdef DCSS_DEBUG +#define PRINT_DEBUG(x...) printk(KERN_DEBUG DCSS_NAME " debug:" x) +#else +#define PRINT_DEBUG(x...) do {} while (0) +#endif +#define PRINT_INFO(x...) printk(KERN_INFO DCSS_NAME " info:" x) +#define PRINT_WARN(x...) printk(KERN_WARNING DCSS_NAME " warning:" x) +#define PRINT_ERR(x...) printk(KERN_ERR DCSS_NAME " error:" x) + + +#define DCSS_LOADSHR 0x00 +#define DCSS_LOADNSR 0x04 +#define DCSS_PURGESEG 0x08 +#define DCSS_FINDSEG 0x0c +#define DCSS_LOADNOLY 0x10 +#define DCSS_SEGEXT 0x18 +#define DCSS_FINDSEGA 0x0c + +struct qrange { + unsigned int start; // 3byte start address, 1 byte type + unsigned int end; // 3byte end address, 1 byte reserved +}; + +struct qout64 { + int segstart; + int segend; + int segcnt; + int segrcnt; + struct qrange range[6]; +}; + +struct qin64 { + char qopcode; + char rsrv1[3]; + char qrcode; + char rsrv2[3]; + char qname[8]; + unsigned int qoutptr; + short int qoutlen; +}; + +struct dcss_segment { + struct list_head list; + char dcss_name[8]; + unsigned long start_addr; + unsigned long end; + atomic_t ref_count; + int do_nonshared; + unsigned int vm_segtype; + struct qrange range[6]; + int segcnt; +}; + +static DEFINE_SPINLOCK(dcss_lock); +static struct list_head dcss_list = LIST_HEAD_INIT(dcss_list); +static char *segtype_string[] = { "SW", "EW", "SR", "ER", "SN", "EN", "SC", + "EW/EN-MIXED" }; + +extern struct { + unsigned long addr, size, type; +} memory_chunk[MEMORY_CHUNKS]; + +/* + * Create the 8 bytes, ebcdic VM segment name from + * an ascii name. + */ +static void inline +dcss_mkname(char *name, char *dcss_name) +{ + int i; + + for (i = 0; i < 8; i++) { + if (name[i] == '\0') + break; + dcss_name[i] = toupper(name[i]); + }; + for (; i < 8; i++) + dcss_name[i] = ' '; + ASCEBC(dcss_name, 8); +} + + +/* + * search all segments in dcss_list, and return the one + * namend *name. If not found, return NULL. + */ +static struct dcss_segment * +segment_by_name (char *name) +{ + char dcss_name[9]; + struct list_head *l; + struct dcss_segment *tmp, *retval = NULL; + + assert_spin_locked(&dcss_lock); + dcss_mkname (name, dcss_name); + list_for_each (l, &dcss_list) { + tmp = list_entry (l, struct dcss_segment, list); + if (memcmp(tmp->dcss_name, dcss_name, 8) == 0) { + retval = tmp; + break; + } + } + return retval; +} + + +/* + * Perform a function on a dcss segment. + */ +static inline int +dcss_diag (__u8 func, void *parameter, + unsigned long *ret1, unsigned long *ret2) +{ + unsigned long rx, ry; + int rc; + + rx = (unsigned long) parameter; + ry = (unsigned long) func; + __asm__ __volatile__( +#ifdef CONFIG_ARCH_S390X + " sam31\n" // switch to 31 bit + " diag %0,%1,0x64\n" + " sam64\n" // switch back to 64 bit +#else + " diag %0,%1,0x64\n" +#endif + " ipm %2\n" + " srl %2,28\n" + : "+d" (rx), "+d" (ry), "=d" (rc) : : "cc" ); + *ret1 = rx; + *ret2 = ry; + return rc; +} + +static inline int +dcss_diag_translate_rc (int vm_rc) { + if (vm_rc == 44) + return -ENOENT; + return -EIO; +} + + +/* do a diag to get info about a segment. + * fills start_address, end and vm_segtype fields + */ +static int +query_segment_type (struct dcss_segment *seg) +{ + struct qin64 *qin = kmalloc (sizeof(struct qin64), GFP_DMA); + struct qout64 *qout = kmalloc (sizeof(struct qout64), GFP_DMA); + + int diag_cc, rc, i; + unsigned long dummy, vmrc; + + if ((qin == NULL) || (qout == NULL)) { + rc = -ENOMEM; + goto out_free; + } + + /* initialize diag input parameters */ + qin->qopcode = DCSS_FINDSEGA; + qin->qoutptr = (unsigned long) qout; + qin->qoutlen = sizeof(struct qout64); + memcpy (qin->qname, seg->dcss_name, 8); + + diag_cc = dcss_diag (DCSS_SEGEXT, qin, &dummy, &vmrc); + + if (diag_cc > 1) { + rc = dcss_diag_translate_rc (vmrc); + goto out_free; + } + + if (qout->segcnt > 6) { + rc = -ENOTSUPP; + goto out_free; + } + + if (qout->segcnt == 1) { + seg->vm_segtype = qout->range[0].start & 0xff; + } else { + /* multi-part segment. only one type supported here: + - all parts are contiguous + - all parts are either EW or EN type + - maximum 6 parts allowed */ + unsigned long start = qout->segstart >> PAGE_SHIFT; + for (i=0; i<qout->segcnt; i++) { + if (((qout->range[i].start & 0xff) != SEG_TYPE_EW) && + ((qout->range[i].start & 0xff) != SEG_TYPE_EN)) { + rc = -ENOTSUPP; + goto out_free; + } + if (start != qout->range[i].start >> PAGE_SHIFT) { + rc = -ENOTSUPP; + goto out_free; + } + start = (qout->range[i].end >> PAGE_SHIFT) + 1; + } + seg->vm_segtype = SEG_TYPE_EWEN; + } + + /* analyze diag output and update seg */ + seg->start_addr = qout->segstart; + seg->end = qout->segend; + + memcpy (seg->range, qout->range, 6*sizeof(struct qrange)); + seg->segcnt = qout->segcnt; + + rc = 0; + + out_free: + if (qin) kfree(qin); + if (qout) kfree(qout); + return rc; +} + +/* + * check if the given segment collides with guest storage. + * returns 1 if this is the case, 0 if no collision was found + */ +static int +segment_overlaps_storage(struct dcss_segment *seg) +{ + int i; + + for (i=0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) { + if (memory_chunk[i].type != 0) + continue; + if ((memory_chunk[i].addr >> 20) > (seg->end >> 20)) + continue; + if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20) + < (seg->start_addr >> 20)) + continue; + return 1; + } + return 0; +} + +/* + * check if segment collides with other segments that are currently loaded + * returns 1 if this is the case, 0 if no collision was found + */ +static int +segment_overlaps_others (struct dcss_segment *seg) +{ + struct list_head *l; + struct dcss_segment *tmp; + + assert_spin_locked(&dcss_lock); + list_for_each(l, &dcss_list) { + tmp = list_entry(l, struct dcss_segment, list); + if ((tmp->start_addr >> 20) > (seg->end >> 20)) + continue; + if ((tmp->end >> 20) < (seg->start_addr >> 20)) + continue; + if (seg == tmp) + continue; + return 1; + } + return 0; +} + +/* + * check if segment exceeds the kernel mapping range (detected or set via mem=) + * returns 1 if this is the case, 0 if segment fits into the range + */ +static inline int +segment_exceeds_range (struct dcss_segment *seg) +{ + int seg_last_pfn = (seg->end) >> PAGE_SHIFT; + if (seg_last_pfn > max_pfn) + return 1; + return 0; +} + +/* + * get info about a segment + * possible return values: + * -ENOSYS : we are not running on VM + * -EIO : could not perform query diagnose + * -ENOENT : no such segment + * -ENOTSUPP: multi-part segment cannot be used with linux + * -ENOSPC : segment cannot be used (overlaps with storage) + * -ENOMEM : out of memory + * 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h + */ +int +segment_type (char* name) +{ + int rc; + struct dcss_segment seg; + + if (!MACHINE_IS_VM) + return -ENOSYS; + + dcss_mkname(name, seg.dcss_name); + rc = query_segment_type (&seg); + if (rc < 0) + return rc; + return seg.vm_segtype; +} + +/* + * real segment loading function, called from segment_load + */ +static int +__segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long *end) +{ + struct dcss_segment *seg = kmalloc(sizeof(struct dcss_segment), + GFP_DMA); + int dcss_command, rc, diag_cc; + + if (seg == NULL) { + rc = -ENOMEM; + goto out; + } + dcss_mkname (name, seg->dcss_name); + rc = query_segment_type (seg); + if (rc < 0) + goto out_free; + if (segment_exceeds_range(seg)) { + PRINT_WARN ("segment_load: not loading segment %s - exceeds" + " kernel mapping range\n",name); + rc = -ERANGE; + goto out_free; + } + if (segment_overlaps_storage(seg)) { + PRINT_WARN ("segment_load: not loading segment %s - overlaps" + " storage\n",name); + rc = -ENOSPC; + goto out_free; + } + if (segment_overlaps_others(seg)) { + PRINT_WARN ("segment_load: not loading segment %s - overlaps" + " other segments\n",name); + rc = -EBUSY; + goto out_free; + } + if (do_nonshared) + dcss_command = DCSS_LOADNSR; + else + dcss_command = DCSS_LOADNOLY; + + diag_cc = dcss_diag(dcss_command, seg->dcss_name, + &seg->start_addr, &seg->end); + if (diag_cc > 1) { + PRINT_WARN ("segment_load: could not load segment %s - " + "diag returned error (%ld)\n",name,seg->end); + rc = dcss_diag_translate_rc (seg->end); + dcss_diag(DCSS_PURGESEG, seg->dcss_name, + &seg->start_addr, &seg->end); + goto out_free; + } + seg->do_nonshared = do_nonshared; + atomic_set(&seg->ref_count, 1); + list_add(&seg->list, &dcss_list); + rc = seg->vm_segtype; + *addr = seg->start_addr; + *end = seg->end; + if (do_nonshared) + PRINT_INFO ("segment_load: loaded segment %s range %p .. %p " + "type %s in non-shared mode\n", name, + (void*)seg->start_addr, (void*)seg->end, + segtype_string[seg->vm_segtype]); + else + PRINT_INFO ("segment_load: loaded segment %s range %p .. %p " + "type %s in shared mode\n", name, + (void*)seg->start_addr, (void*)seg->end, + segtype_string[seg->vm_segtype]); + goto out; + out_free: + kfree (seg); + out: + return rc; +} + +/* + * this function loads a DCSS segment + * name : name of the DCSS + * do_nonshared : 0 indicates that the dcss should be shared with other linux images + * 1 indicates that the dcss should be exclusive for this linux image + * addr : will be filled with start address of the segment + * end : will be filled with end address of the segment + * return values: + * -ENOSYS : we are not running on VM + * -EIO : could not perform query or load diagnose + * -ENOENT : no such segment + * -ENOTSUPP: multi-part segment cannot be used with linux + * -ENOSPC : segment cannot be used (overlaps with storage) + * -EBUSY : segment can temporarily not be used (overlaps with dcss) + * -ERANGE : segment cannot be used (exceeds kernel mapping range) + * -EPERM : segment is currently loaded with incompatible permissions + * -ENOMEM : out of memory + * 0 .. 6 : type of segment as defined in include/asm-s390/extmem.h + */ +int +segment_load (char *name, int do_nonshared, unsigned long *addr, + unsigned long *end) +{ + struct dcss_segment *seg; + int rc; + + if (!MACHINE_IS_VM) + return -ENOSYS; + + spin_lock (&dcss_lock); + seg = segment_by_name (name); + if (seg == NULL) + rc = __segment_load (name, do_nonshared, addr, end); + else { + if (do_nonshared == seg->do_nonshared) { + atomic_inc(&seg->ref_count); + *addr = seg->start_addr; + *end = seg->end; + rc = seg->vm_segtype; + } else { + *addr = *end = 0; + rc = -EPERM; + } + } + spin_unlock (&dcss_lock); + return rc; +} + +/* + * this function modifies the shared state of a DCSS segment. note that + * name : name of the DCSS + * do_nonshared : 0 indicates that the dcss should be shared with other linux images + * 1 indicates that the dcss should be exclusive for this linux image + * return values: + * -EIO : could not perform load diagnose (segment gone!) + * -ENOENT : no such segment (segment gone!) + * -EAGAIN : segment is in use by other exploiters, try later + * -EINVAL : no segment with the given name is currently loaded - name invalid + * 0 : operation succeeded + */ +int +segment_modify_shared (char *name, int do_nonshared) +{ + struct dcss_segment *seg; + unsigned long dummy; + int dcss_command, rc, diag_cc; + + spin_lock (&dcss_lock); + seg = segment_by_name (name); + if (seg == NULL) { + rc = -EINVAL; + goto out_unlock; + } + if (do_nonshared == seg->do_nonshared) { + PRINT_INFO ("segment_modify_shared: not reloading segment %s" + " - already in requested mode\n",name); + rc = 0; + goto out_unlock; + } + if (atomic_read (&seg->ref_count) != 1) { + PRINT_WARN ("segment_modify_shared: not reloading segment %s - " + "segment is in use by other driver(s)\n",name); + rc = -EAGAIN; + goto out_unlock; + } + dcss_diag(DCSS_PURGESEG, seg->dcss_name, + &dummy, &dummy); + if (do_nonshared) + dcss_command = DCSS_LOADNSR; + else + dcss_command = DCSS_LOADNOLY; + diag_cc = dcss_diag(dcss_command, seg->dcss_name, + &seg->start_addr, &seg->end); + if (diag_cc > 1) { + PRINT_WARN ("segment_modify_shared: could not reload segment %s" + " - diag returned error (%ld)\n",name,seg->end); + rc = dcss_diag_translate_rc (seg->end); + goto out_del; + } + seg->do_nonshared = do_nonshared; + rc = 0; + goto out_unlock; + out_del: + list_del(&seg->list); + dcss_diag(DCSS_PURGESEG, seg->dcss_name, + &dummy, &dummy); + kfree (seg); + out_unlock: + spin_unlock(&dcss_lock); + return rc; +} + +/* + * Decrease the use count of a DCSS segment and remove + * it from the address space if nobody is using it + * any longer. + */ +void +segment_unload(char *name) +{ + unsigned long dummy; + struct dcss_segment *seg; + + if (!MACHINE_IS_VM) + return; + + spin_lock(&dcss_lock); + seg = segment_by_name (name); + if (seg == NULL) { + PRINT_ERR ("could not find segment %s in segment_unload, " + "please report to linux390@de.ibm.com\n",name); + goto out_unlock; + } + if (atomic_dec_return(&seg->ref_count) == 0) { + list_del(&seg->list); + dcss_diag(DCSS_PURGESEG, seg->dcss_name, + &dummy, &dummy); + kfree(seg); + } +out_unlock: + spin_unlock(&dcss_lock); +} + +/* + * save segment content permanently + */ +void +segment_save(char *name) +{ + struct dcss_segment *seg; + int startpfn = 0; + int endpfn = 0; + char cmd1[160]; + char cmd2[80]; + int i; + + if (!MACHINE_IS_VM) + return; + + spin_lock(&dcss_lock); + seg = segment_by_name (name); + + if (seg == NULL) { + PRINT_ERR ("could not find segment %s in segment_save, please report to linux390@de.ibm.com\n",name); + return; + } + + startpfn = seg->start_addr >> PAGE_SHIFT; + endpfn = (seg->end) >> PAGE_SHIFT; + sprintf(cmd1, "DEFSEG %s", name); + for (i=0; i<seg->segcnt; i++) { + sprintf(cmd1+strlen(cmd1), " %X-%X %s", + seg->range[i].start >> PAGE_SHIFT, + seg->range[i].end >> PAGE_SHIFT, + segtype_string[seg->range[i].start & 0xff]); + } + sprintf(cmd2, "SAVESEG %s", name); + cpcmd(cmd1, NULL, 0); + cpcmd(cmd2, NULL, 0); + spin_unlock(&dcss_lock); +} + +EXPORT_SYMBOL(segment_load); +EXPORT_SYMBOL(segment_unload); +EXPORT_SYMBOL(segment_save); +EXPORT_SYMBOL(segment_type); +EXPORT_SYMBOL(segment_modify_shared); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c new file mode 100644 index 00000000000..80306bc8c79 --- /dev/null +++ b/arch/s390/mm/fault.c @@ -0,0 +1,586 @@ +/* + * arch/s390/mm/fault.c + * + * S390 version + * Copyright (C) 1999 IBM Deutschland Entwicklung GmbH, IBM Corporation + * Author(s): Hartmut Penner (hp@de.ibm.com) + * Ulrich Weigand (uweigand@de.ibm.com) + * + * Derived from "arch/i386/mm/fault.c" + * Copyright (C) 1995 Linus Torvalds + */ + +#include <linux/config.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/kernel.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> +#include <linux/ptrace.h> +#include <linux/mman.h> +#include <linux/mm.h> +#include <linux/smp.h> +#include <linux/smp_lock.h> +#include <linux/init.h> +#include <linux/console.h> +#include <linux/module.h> +#include <linux/hardirq.h> + +#include <asm/system.h> +#include <asm/uaccess.h> +#include <asm/pgtable.h> + +#ifndef CONFIG_ARCH_S390X +#define __FAIL_ADDR_MASK 0x7ffff000 +#define __FIXUP_MASK 0x7fffffff +#define __SUBCODE_MASK 0x0200 +#define __PF_RES_FIELD 0ULL +#else /* CONFIG_ARCH_S390X */ +#define __FAIL_ADDR_MASK -4096L +#define __FIXUP_MASK ~0L +#define __SUBCODE_MASK 0x0600 +#define __PF_RES_FIELD 0x8000000000000000ULL +#endif /* CONFIG_ARCH_S390X */ + +#ifdef CONFIG_SYSCTL +extern int sysctl_userprocess_debug; +#endif + +extern void die(const char *,struct pt_regs *,long); + +extern spinlock_t timerlist_lock; + +/* + * Unlock any spinlocks which will prevent us from getting the + * message out (timerlist_lock is acquired through the + * console unblank code) + */ +void bust_spinlocks(int yes) +{ + if (yes) { + oops_in_progress = 1; + } else { + int loglevel_save = console_loglevel; + console_unblank(); + oops_in_progress = 0; + /* + * OK, the message is on the console. Now we call printk() + * without oops_in_progress set so that printk will give klogd + * a poke. Hold onto your hats... + */ + console_loglevel = 15; + printk(" "); + console_loglevel = loglevel_save; + } +} + +/* + * Check which address space is addressed by the access + * register in S390_lowcore.exc_access_id. + * Returns 1 for user space and 0 for kernel space. + */ +static int __check_access_register(struct pt_regs *regs, int error_code) +{ + int areg = S390_lowcore.exc_access_id; + + if (areg == 0) + /* Access via access register 0 -> kernel address */ + return 0; + save_access_regs(current->thread.acrs); + if (regs && areg < NUM_ACRS && current->thread.acrs[areg] <= 1) + /* + * access register contains 0 -> kernel address, + * access register contains 1 -> user space address + */ + return current->thread.acrs[areg]; + + /* Something unhealthy was done with the access registers... */ + die("page fault via unknown access register", regs, error_code); + do_exit(SIGKILL); + return 0; +} + +/* + * Check which address space the address belongs to. + * Returns 1 for user space and 0 for kernel space. + */ +static inline int check_user_space(struct pt_regs *regs, int error_code) +{ + /* + * The lowest two bits of S390_lowcore.trans_exc_code indicate + * which paging table was used: + * 0: Primary Segment Table Descriptor + * 1: STD determined via access register + * 2: Secondary Segment Table Descriptor + * 3: Home Segment Table Descriptor + */ + int descriptor = S390_lowcore.trans_exc_code & 3; + if (unlikely(descriptor == 1)) + return __check_access_register(regs, error_code); + if (descriptor == 2) + return current->thread.mm_segment.ar4; + return descriptor != 0; +} + +/* + * Send SIGSEGV to task. This is an external routine + * to keep the stack usage of do_page_fault small. + */ +static void do_sigsegv(struct pt_regs *regs, unsigned long error_code, + int si_code, unsigned long address) +{ + struct siginfo si; + +#if defined(CONFIG_SYSCTL) || defined(CONFIG_PROCESS_DEBUG) +#if defined(CONFIG_SYSCTL) + if (sysctl_userprocess_debug) +#endif + { + printk("User process fault: interruption code 0x%lX\n", + error_code); + printk("failing address: %lX\n", address); + show_regs(regs); + } +#endif + si.si_signo = SIGSEGV; + si.si_code = si_code; + si.si_addr = (void *) address; + force_sig_info(SIGSEGV, &si, current); +} + +/* + * This routine handles page faults. It determines the address, + * and the problem, and then passes it off to one of the appropriate + * routines. |