aboutsummaryrefslogtreecommitdiff
path: root/Documentation/ia64
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/ia64')
-rw-r--r--Documentation/ia64/.gitignore1
-rw-r--r--Documentation/ia64/Makefile8
-rw-r--r--Documentation/ia64/aliasing-test.c263
-rw-r--r--Documentation/ia64/aliasing.txt221
-rw-r--r--Documentation/ia64/efirtc.txt2
-rw-r--r--Documentation/ia64/err_inject.txt1068
-rw-r--r--Documentation/ia64/fsys.txt2
-rw-r--r--Documentation/ia64/kvm.txt83
-rw-r--r--Documentation/ia64/mca.txt194
-rw-r--r--Documentation/ia64/paravirt_ops.txt137
-rw-r--r--Documentation/ia64/serial.txt9
-rw-r--r--Documentation/ia64/xen.txt183
12 files changed, 2168 insertions, 3 deletions
diff --git a/Documentation/ia64/.gitignore b/Documentation/ia64/.gitignore
new file mode 100644
index 00000000000..ab806edc873
--- /dev/null
+++ b/Documentation/ia64/.gitignore
@@ -0,0 +1 @@
+aliasing-test
diff --git a/Documentation/ia64/Makefile b/Documentation/ia64/Makefile
new file mode 100644
index 00000000000..b75db69ec48
--- /dev/null
+++ b/Documentation/ia64/Makefile
@@ -0,0 +1,8 @@
+# kbuild trick to avoid linker error. Can be omitted if a module is built.
+obj- := dummy.o
+
+# List of programs to build
+hostprogs-y := aliasing-test
+
+# Tell kbuild to always build the programs
+always := $(hostprogs-y)
diff --git a/Documentation/ia64/aliasing-test.c b/Documentation/ia64/aliasing-test.c
new file mode 100644
index 00000000000..62a190d45f3
--- /dev/null
+++ b/Documentation/ia64/aliasing-test.c
@@ -0,0 +1,263 @@
+/*
+ * Exercise /dev/mem mmap cases that have been troublesome in the past
+ *
+ * (c) Copyright 2007 Hewlett-Packard Development Company, L.P.
+ * Bjorn Helgaas <bjorn.helgaas@hp.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/pci.h>
+
+int sum;
+
+static int map_mem(char *path, off_t offset, size_t length, int touch)
+{
+ int fd, rc;
+ void *addr;
+ int *c;
+
+ fd = open(path, O_RDWR);
+ if (fd == -1) {
+ perror(path);
+ return -1;
+ }
+
+ if (fnmatch("/proc/bus/pci/*", path, 0) == 0) {
+ rc = ioctl(fd, PCIIOC_MMAP_IS_MEM);
+ if (rc == -1)
+ perror("PCIIOC_MMAP_IS_MEM ioctl");
+ }
+
+ addr = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_SHARED, fd, offset);
+ if (addr == MAP_FAILED)
+ return 1;
+
+ if (touch) {
+ c = (int *) addr;
+ while (c < (int *) (addr + length))
+ sum += *c++;
+ }
+
+ rc = munmap(addr, length);
+ if (rc == -1) {
+ perror("munmap");
+ return -1;
+ }
+
+ close(fd);
+ return 0;
+}
+
+static int scan_tree(char *path, char *file, off_t offset, size_t length, int touch)
+{
+ struct dirent **namelist;
+ char *name, *path2;
+ int i, n, r, rc = 0, result = 0;
+ struct stat buf;
+
+ n = scandir(path, &namelist, 0, alphasort);
+ if (n < 0) {
+ perror("scandir");
+ return -1;
+ }
+
+ for (i = 0; i < n; i++) {
+ name = namelist[i]->d_name;
+
+ if (fnmatch(".", name, 0) == 0)
+ goto skip;
+ if (fnmatch("..", name, 0) == 0)
+ goto skip;
+
+ path2 = malloc(strlen(path) + strlen(name) + 3);
+ strcpy(path2, path);
+ strcat(path2, "/");
+ strcat(path2, name);
+
+ if (fnmatch(file, name, 0) == 0) {
+ rc = map_mem(path2, offset, length, touch);
+ if (rc == 0)
+ fprintf(stderr, "PASS: %s 0x%lx-0x%lx is %s\n", path2, offset, offset + length, touch ? "readable" : "mappable");
+ else if (rc > 0)
+ fprintf(stderr, "PASS: %s 0x%lx-0x%lx not mappable\n", path2, offset, offset + length);
+ else {
+ fprintf(stderr, "FAIL: %s 0x%lx-0x%lx not accessible\n", path2, offset, offset + length);
+ return rc;
+ }
+ } else {
+ r = lstat(path2, &buf);
+ if (r == 0 && S_ISDIR(buf.st_mode)) {
+ rc = scan_tree(path2, file, offset, length, touch);
+ if (rc < 0)
+ return rc;
+ }
+ }
+
+ result |= rc;
+ free(path2);
+
+skip:
+ free(namelist[i]);
+ }
+ free(namelist);
+ return result;
+}
+
+char buf[1024];
+
+static int read_rom(char *path)
+{
+ int fd, rc;
+ size_t size = 0;
+
+ fd = open(path, O_RDWR);
+ if (fd == -1) {
+ perror(path);
+ return -1;
+ }
+
+ rc = write(fd, "1", 2);
+ if (rc <= 0) {
+ close(fd);
+ perror("write");
+ return -1;
+ }
+
+ do {
+ rc = read(fd, buf, sizeof(buf));
+ if (rc > 0)
+ size += rc;
+ } while (rc > 0);
+
+ close(fd);
+ return size;
+}
+
+static int scan_rom(char *path, char *file)
+{
+ struct dirent **namelist;
+ char *name, *path2;
+ int i, n, r, rc = 0, result = 0;
+ struct stat buf;
+
+ n = scandir(path, &namelist, 0, alphasort);
+ if (n < 0) {
+ perror("scandir");
+ return -1;
+ }
+
+ for (i = 0; i < n; i++) {
+ name = namelist[i]->d_name;
+
+ if (fnmatch(".", name, 0) == 0)
+ goto skip;
+ if (fnmatch("..", name, 0) == 0)
+ goto skip;
+
+ path2 = malloc(strlen(path) + strlen(name) + 3);
+ strcpy(path2, path);
+ strcat(path2, "/");
+ strcat(path2, name);
+
+ if (fnmatch(file, name, 0) == 0) {
+ rc = read_rom(path2);
+
+ /*
+ * It's OK if the ROM is unreadable. Maybe there
+ * is no ROM, or some other error occurred. The
+ * important thing is that no MCA happened.
+ */
+ if (rc > 0)
+ fprintf(stderr, "PASS: %s read %d bytes\n", path2, rc);
+ else {
+ fprintf(stderr, "PASS: %s not readable\n", path2);
+ return rc;
+ }
+ } else {
+ r = lstat(path2, &buf);
+ if (r == 0 && S_ISDIR(buf.st_mode)) {
+ rc = scan_rom(path2, file);
+ if (rc < 0)
+ return rc;
+ }
+ }
+
+ result |= rc;
+ free(path2);
+
+skip:
+ free(namelist[i]);
+ }
+ free(namelist);
+ return result;
+}
+
+int main(void)
+{
+ int rc;
+
+ if (map_mem("/dev/mem", 0, 0xA0000, 1) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0xa0000 is readable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0x0-0xa0000 not accessible\n");
+
+ /*
+ * It's not safe to blindly read the VGA frame buffer. If you know
+ * how to poke the card the right way, it should respond, but it's
+ * not safe in general. Many machines, e.g., Intel chipsets, cover
+ * up a non-responding card by just returning -1, but others will
+ * report the failure as a machine check.
+ */
+ if (map_mem("/dev/mem", 0xA0000, 0x20000, 0) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0xa0000-0xc0000 is mappable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0xa0000-0xc0000 not accessible\n");
+
+ if (map_mem("/dev/mem", 0xC0000, 0x40000, 1) == 0)
+ fprintf(stderr, "PASS: /dev/mem 0xc0000-0x100000 is readable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0xc0000-0x100000 not accessible\n");
+
+ /*
+ * Often you can map all the individual pieces above (0-0xA0000,
+ * 0xA0000-0xC0000, and 0xC0000-0x100000), but can't map the whole
+ * thing at once. This is because the individual pieces use different
+ * attributes, and there's no single attribute supported over the
+ * whole region.
+ */
+ rc = map_mem("/dev/mem", 0, 1024*1024, 0);
+ if (rc == 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 is mappable\n");
+ else if (rc > 0)
+ fprintf(stderr, "PASS: /dev/mem 0x0-0x100000 not mappable\n");
+ else
+ fprintf(stderr, "FAIL: /dev/mem 0x0-0x100000 not accessible\n");
+
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 0xA0000, 1);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0xA0000, 0x20000, 0);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0xC0000, 0x40000, 1);
+ scan_tree("/sys/class/pci_bus", "legacy_mem", 0, 1024*1024, 0);
+
+ scan_rom("/sys/devices", "rom");
+
+ scan_tree("/proc/bus/pci", "??.?", 0, 0xA0000, 1);
+ scan_tree("/proc/bus/pci", "??.?", 0xA0000, 0x20000, 0);
+ scan_tree("/proc/bus/pci", "??.?", 0xC0000, 0x40000, 1);
+ scan_tree("/proc/bus/pci", "??.?", 0, 1024*1024, 0);
+
+ return rc;
+}
diff --git a/Documentation/ia64/aliasing.txt b/Documentation/ia64/aliasing.txt
new file mode 100644
index 00000000000..5a4dea6abeb
--- /dev/null
+++ b/Documentation/ia64/aliasing.txt
@@ -0,0 +1,221 @@
+ MEMORY ATTRIBUTE ALIASING ON IA-64
+
+ Bjorn Helgaas
+ <bjorn.helgaas@hp.com>
+ May 4, 2006
+
+
+MEMORY ATTRIBUTES
+
+ Itanium supports several attributes for virtual memory references.
+ The attribute is part of the virtual translation, i.e., it is
+ contained in the TLB entry. The ones of most interest to the Linux
+ kernel are:
+
+ WB Write-back (cacheable)
+ UC Uncacheable
+ WC Write-coalescing
+
+ System memory typically uses the WB attribute. The UC attribute is
+ used for memory-mapped I/O devices. The WC attribute is uncacheable
+ like UC is, but writes may be delayed and combined to increase
+ performance for things like frame buffers.
+
+ The Itanium architecture requires that we avoid accessing the same
+ page with both a cacheable mapping and an uncacheable mapping[1].
+
+ The design of the chipset determines which attributes are supported
+ on which regions of the address space. For example, some chipsets
+ support either WB or UC access to main memory, while others support
+ only WB access.
+
+MEMORY MAP
+
+ Platform firmware describes the physical memory map and the
+ supported attributes for each region. At boot-time, the kernel uses
+ the EFI GetMemoryMap() interface. ACPI can also describe memory
+ devices and the attributes they support, but Linux/ia64 currently
+ doesn't use this information.
+
+ The kernel uses the efi_memmap table returned from GetMemoryMap() to
+ learn the attributes supported by each region of physical address
+ space. Unfortunately, this table does not completely describe the
+ address space because some machines omit some or all of the MMIO
+ regions from the map.
+
+ The kernel maintains another table, kern_memmap, which describes the
+ memory Linux is actually using and the attribute for each region.
+ This contains only system memory; it does not contain MMIO space.
+
+ The kern_memmap table typically contains only a subset of the system
+ memory described by the efi_memmap. Linux/ia64 can't use all memory
+ in the system because of constraints imposed by the identity mapping
+ scheme.
+
+ The efi_memmap table is preserved unmodified because the original
+ boot-time information is required for kexec.
+
+KERNEL IDENTITY MAPPINGS
+
+ Linux/ia64 identity mappings are done with large pages, currently
+ either 16MB or 64MB, referred to as "granules." Cacheable mappings
+ are speculative[2], so the processor can read any location in the
+ page at any time, independent of the programmer's intentions. This
+ means that to avoid attribute aliasing, Linux can create a cacheable
+ identity mapping only when the entire granule supports cacheable
+ access.
+
+ Therefore, kern_memmap contains only full granule-sized regions that
+ can referenced safely by an identity mapping.
+
+ Uncacheable mappings are not speculative, so the processor will
+ generate UC accesses only to locations explicitly referenced by
+ software. This allows UC identity mappings to cover granules that
+ are only partially populated, or populated with a combination of UC
+ and WB regions.
+
+USER MAPPINGS
+
+ User mappings are typically done with 16K or 64K pages. The smaller
+ page size allows more flexibility because only 16K or 64K has to be
+ homogeneous with respect to memory attributes.
+
+POTENTIAL ATTRIBUTE ALIASING CASES
+
+ There are several ways the kernel creates new mappings:
+
+ mmap of /dev/mem
+
+ This uses remap_pfn_range(), which creates user mappings. These
+ mappings may be either WB or UC. If the region being mapped
+ happens to be in kern_memmap, meaning that it may also be mapped
+ by a kernel identity mapping, the user mapping must use the same
+ attribute as the kernel mapping.
+
+ If the region is not in kern_memmap, the user mapping should use
+ an attribute reported as being supported in the EFI memory map.
+
+ Since the EFI memory map does not describe MMIO on some
+ machines, this should use an uncacheable mapping as a fallback.
+
+ mmap of /sys/class/pci_bus/.../legacy_mem
+
+ This is very similar to mmap of /dev/mem, except that legacy_mem
+ only allows mmap of the one megabyte "legacy MMIO" area for a
+ specific PCI bus. Typically this is the first megabyte of
+ physical address space, but it may be different on machines with
+ several VGA devices.
+
+ "X" uses this to access VGA frame buffers. Using legacy_mem
+ rather than /dev/mem allows multiple instances of X to talk to
+ different VGA cards.
+
+ The /dev/mem mmap constraints apply.
+
+ mmap of /proc/bus/pci/.../??.?
+
+ This is an MMIO mmap of PCI functions, which additionally may or
+ may not be requested as using the WC attribute.
+
+ If WC is requested, and the region in kern_memmap is either WC
+ or UC, and the EFI memory map designates the region as WC, then
+ the WC mapping is allowed.
+
+ Otherwise, the user mapping must use the same attribute as the
+ kernel mapping.
+
+ read/write of /dev/mem
+
+ This uses copy_from_user(), which implicitly uses a kernel
+ identity mapping. This is obviously safe for things in
+ kern_memmap.
+
+ There may be corner cases of things that are not in kern_memmap,
+ but could be accessed this way. For example, registers in MMIO
+ space are not in kern_memmap, but could be accessed with a UC
+ mapping. This would not cause attribute aliasing. But
+ registers typically can be accessed only with four-byte or
+ eight-byte accesses, and the copy_from_user() path doesn't allow
+ any control over the access size, so this would be dangerous.
+
+ ioremap()
+
+ This returns a mapping for use inside the kernel.
+
+ If the region is in kern_memmap, we should use the attribute
+ specified there.
+
+ If the EFI memory map reports that the entire granule supports
+ WB, we should use that (granules that are partially reserved
+ or occupied by firmware do not appear in kern_memmap).
+
+ If the granule contains non-WB memory, but we can cover the
+ region safely with kernel page table mappings, we can use
+ ioremap_page_range() as most other architectures do.
+
+ Failing all of the above, we have to fall back to a UC mapping.
+
+PAST PROBLEM CASES
+
+ mmap of various MMIO regions from /dev/mem by "X" on Intel platforms
+
+ The EFI memory map may not report these MMIO regions.
+
+ These must be allowed so that X will work. This means that
+ when the EFI memory map is incomplete, every /dev/mem mmap must
+ succeed. It may create either WB or UC user mappings, depending
+ on whether the region is in kern_memmap or the EFI memory map.
+
+ mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled
+
+ The EFI memory map reports the following attributes:
+ 0x00000-0x9FFFF WB only
+ 0xA0000-0xBFFFF UC only (VGA frame buffer)
+ 0xC0000-0xFFFFF WB only
+
+ This mmap is done with user pages, not kernel identity mappings,
+ so it is safe to use WB mappings.
+
+ The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000,
+ which uses a granule-sized UC mapping. This granule will cover some
+ WB-only memory, but since UC is non-speculative, the processor will
+ never generate an uncacheable reference to the WB-only areas unless
+ the driver explicitly touches them.
+
+ mmap of 0x0-0xFFFFF legacy_mem by "X"
+
+ If the EFI memory map reports that the entire range supports the
+ same attributes, we can allow the mmap (and we will prefer WB if
+ supported, as is the case with HP sx[12]000 machines with VGA
+ disabled).
+
+ If EFI reports the range as partly WB and partly UC (as on sx[12]000
+ machines with VGA enabled), we must fail the mmap because there's no
+ safe attribute to use.
+
+ If EFI reports some of the range but not all (as on Intel firmware
+ that doesn't report the VGA frame buffer at all), we should fail the
+ mmap and force the user to map just the specific region of interest.
+
+ mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled
+
+ The EFI memory map reports the following attributes:
+ 0x00000-0xFFFFF WB only (no VGA MMIO hole)
+
+ This is a special case of the previous case, and the mmap should
+ fail for the same reason as above.
+
+ read of /sys/devices/.../rom
+
+ For VGA devices, this may cause an ioremap() of 0xC0000. This
+ used to be done with a UC mapping, because the VGA frame buffer
+ at 0xA0000 prevents use of a WB granule. The UC mapping causes
+ an MCA on HP sx[12]000 chipsets.
+
+ We should use WB page table mappings to avoid covering the VGA
+ frame buffer.
+
+NOTES
+
+ [1] SDM rev 2.2, vol 2, sec 4.4.1.
+ [2] SDM rev 2.2, vol 2, sec 4.4.6.
diff --git a/Documentation/ia64/efirtc.txt b/Documentation/ia64/efirtc.txt
index ede2c1e51cd..057e6bebda8 100644
--- a/Documentation/ia64/efirtc.txt
+++ b/Documentation/ia64/efirtc.txt
@@ -26,7 +26,7 @@ to initialize the system view of the time during boot.
Because we wanted to minimize the impact on existing user-level apps using
the CMOS clock, we decided to expose an API that was very similar to the one
used today with the legacy RTC driver (driver/char/rtc.c). However, because
-EFI provides a simpler services, not all all ioctl() are available. Also
+EFI provides a simpler services, not all ioctl() are available. Also
new ioctl()s have been introduced for things that EFI provides but not the
legacy.
diff --git a/Documentation/ia64/err_inject.txt b/Documentation/ia64/err_inject.txt
new file mode 100644
index 00000000000..9f651c18142
--- /dev/null
+++ b/Documentation/ia64/err_inject.txt
@@ -0,0 +1,1068 @@
+
+IPF Machine Check (MC) error inject tool
+========================================
+
+IPF Machine Check (MC) error inject tool is used to inject MC
+errors from Linux. The tool is a test bed for IPF MC work flow including
+hardware correctable error handling, OS recoverable error handling, MC
+event logging, etc.
+
+The tool includes two parts: a kernel driver and a user application
+sample. The driver provides interface to PAL to inject error
+and query error injection capabilities. The driver code is in
+arch/ia64/kernel/err_inject.c. The application sample (shown below)
+provides a combination of various errors and calls the driver's interface
+(sysfs interface) to inject errors or query error injection capabilities.
+
+The tool can be used to test Intel IPF machine MC handling capabilities.
+It's especially useful for people who can not access hardware MC injection
+tool to inject error. It's also very useful to integrate with other
+software test suits to do stressful testing on IPF.
+
+Below is a sample application as part of the whole tool. The sample
+can be used as a working test tool. Or it can be expanded to include
+more features. It also can be a integrated into a library or other user
+application to have more thorough test.
+
+The sample application takes err.conf as error configuration input. GCC
+compiles the code. After you install err_inject driver, you can run
+this sample application to inject errors.
+
+Errata: Itanium 2 Processors Specification Update lists some errata against
+the pal_mc_error_inject PAL procedure. The following err.conf has been tested
+on latest Montecito PAL.
+
+err.conf:
+
+#This is configuration file for err_inject_tool.
+#The format of the each line is:
+#cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer
+#where
+# cpu: logical cpu number the error will be inject in.
+# loop: times the error will be injected.
+# interval: In second. every so often one error is injected.
+# err_type_info, err_struct_info: PAL parameters.
+#
+#Note: All values are hex w/o or w/ 0x prefix.
+
+
+#On cpu2, inject only total 0x10 errors, interval 5 seconds
+#corrected, data cache, hier-2, physical addr(assigned by tool code).
+#working on Montecito latest PAL.
+2, 10, 5, 4101, 95
+
+#On cpu4, inject and consume total 0x10 errors, interval 5 seconds
+#corrected, data cache, hier-2, physical addr(assigned by tool code).
+#working on Montecito latest PAL.
+4, 10, 5, 4109, 95
+
+#On cpu15, inject and consume total 0x10 errors, interval 5 seconds
+#recoverable, DTR0, hier-2.
+#working on Montecito latest PAL.
+0xf, 0x10, 5, 4249, 15
+
+The sample application source code:
+
+err_injection_tool.c:
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Copyright (C) 2006 Intel Co
+ * Fenghua Yu <fenghua.yu@intel.com>
+ *
+ */
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <sched.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/ipc.h>
+#include <sys/sem.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+
+#define MAX_FN_SIZE 256
+#define MAX_BUF_SIZE 256
+#define DATA_BUF_SIZE 256
+#define NR_CPUS 512
+#define MAX_TASK_NUM 2048
+#define MIN_INTERVAL 5 // seconds
+#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte.
+#define PARA_FIELD_NUM 5
+#define MASK_SIZE (NR_CPUS/64)
+#define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/"
+
+int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask);
+
+int verbose;
+#define vbprintf if (verbose) printf
+
+int log_info(int cpu, const char *fmt, ...)
+{
+ FILE *log;
+ char fn[MAX_FN_SIZE];
+ char buf[MAX_BUF_SIZE];
+ va_list args;
+
+ sprintf(fn, "%d.log", cpu);
+ log=fopen(fn, "a+");
+ if (log==NULL) {
+ perror("Error open:");
+ return -1;
+ }
+
+ va_start(args, fmt);
+ vprintf(fmt, args);
+ memset(buf, 0, MAX_BUF_SIZE);
+ vsprintf(buf, fmt, args);
+ va_end(args);
+
+ fwrite(buf, sizeof(buf), 1, log);
+ fclose(log);
+
+ return 0;
+}
+
+typedef unsigned long u64;
+typedef unsigned int u32;
+
+typedef union err_type_info_u {
+ struct {
+ u64 mode : 3, /* 0-2 */
+ err_inj : 3, /* 3-5 */
+ err_sev : 2, /* 6-7 */
+ err_struct : 5, /* 8-12 */
+ struct_hier : 3, /* 13-15 */
+ reserved : 48; /* 16-63 */
+ } err_type_info_u;
+ u64 err_type_info;
+} err_type_info_t;
+
+typedef union err_struct_info_u {
+ struct {
+ u64 siv : 1, /* 0 */
+ c_t : 2, /* 1-2 */
+ cl_p : 3, /* 3-5 */
+ cl_id : 3, /* 6-8 */
+ cl_dp : 1, /* 9 */
+ reserved1 : 22, /* 10-31 */
+ tiv : 1, /* 32 */
+ trigger : 4, /* 33-36 */
+ trigger_pl : 3, /* 37-39 */
+ reserved2 : 24; /* 40-63 */
+ } err_struct_info_cache;
+ struct {
+ u64 siv : 1, /* 0 */
+ tt : 2, /* 1-2 */
+ tc_tr : 2, /* 3-4 */
+ tr_slot : 8, /* 5-12 */
+ reserved1 : 19, /* 13-31 */
+ tiv : 1, /* 32 */
+ trigger : 4, /* 33-36 */
+ trigger_pl : 3, /* 37-39 */
+ reserved2 : 24; /* 40-63 */
+ } err_struct_info_tlb;
+ struct {
+ u64 siv : 1, /* 0 */
+ regfile_id : 4, /* 1-4 */
+ reg_num : 7, /* 5-11 */
+ reserved1 : 20, /* 12-31 */
+ tiv : 1, /* 32 */
+ trigger : 4, /* 33-36 */
+ trigger_pl : 3, /* 37-39 */
+ reserved2 : 24; /* 40-63 */
+ } err_struct_info_register;
+ struct {
+ u64 reserved;
+ } err_struct_info_bus_processor_interconnect;
+ u64 err_struct_info;
+} err_struct_info_t;
+
+typedef union err_data_buffer_u {
+ struct {
+ u64 trigger_addr; /* 0-63 */
+ u64 inj_addr; /* 64-127 */
+ u64 way : 5, /* 128-132 */
+ index : 20, /* 133-152 */
+ : 39; /* 153-191 */
+ } err_data_buffer_cache;
+ struct {
+ u64 trigger_addr; /* 0-63 */
+ u64 inj_addr; /* 64-127 */
+ u64 way : 5, /* 128-132 */
+ index : 20, /* 133-152 */
+ reserved : 39; /* 153-191 */
+ } err_data_buffer_tlb;
+ struct {
+ u64 trigger_addr; /* 0-63 */
+ } err_data_buffer_register;
+ struct {
+ u64 reserved; /* 0-63 */
+ } err_data_buffer_bus_processor_interconnect;
+ u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
+} err_data_buffer_t;
+
+typedef union capabilities_u {
+ struct {
+ u64 i : 1,
+ d : 1,
+ rv : 1,
+ tag : 1,
+ data : 1,
+ mesi : 1,
+ dp : 1,
+ reserved1 : 3,
+ pa : 1,
+ va : 1,
+ wi : 1,
+ reserved2 : 20,
+ trigger : 1,
+ trigger_pl : 1,
+ reserved3 : 30;
+ } capabilities_cache;
+ struct {
+ u64 d : 1,
+ i : 1,
+ rv : 1,
+ tc : 1,
+ tr : 1,
+ reserved1 : 27,
+ trigger : 1,
+ trigger_pl : 1,
+ reserved2 : 30;
+ } capabilities_tlb;
+ struct {
+ u64 gr_b0 : 1,
+ gr_b1 : 1,
+ fr : 1,
+ br : 1,
+ pr : 1,
+ ar : 1,
+ cr : 1,
+ rr : 1,
+ pkr : 1,
+ dbr : 1,
+ ibr : 1,
+ pmc : 1,
+ pmd : 1,
+ reserved1 : 3,
+ regnum : 1,
+ reserved2 : 15,
+ trigger : 1,
+ trigger_pl : 1,
+ reserved3 : 30;
+ } capabilities_register;
+ struct {
+ u64 reserved;
+ } capabilities_bus_processor_interconnect;
+} capabilities_t;
+
+typedef struct resources_s {
+ u64 ibr0 : 1,
+ ibr2 : 1,
+ ibr4 : 1,
+ ibr6 : 1,
+ dbr0 : 1,
+ dbr2 : 1,
+ dbr4 : 1,
+ dbr6 : 1,
+ reserved : 48;
+} resources_t;
+
+
+long get_page_size(void)
+{
+ long page_size=sysconf(_SC_PAGESIZE);
+ return page_size;
+}
+
+#define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size())
+#define SHM_SIZE (2*PAGE_SIZE*NR_CPUS)
+#define SHM_VA 0x2000000100000000
+
+int shmid;
+void *shmaddr;
+
+int create_shm(void)
+{
+ key_t key;
+ char fn[MAX_FN_SIZE];
+
+ /* cpu0 is always existing */
+ sprintf(fn, PATH_FORMAT, 0);
+ if ((key = ftok(fn, 's')) == -1) {
+ perror("ftok");
+ return -1;
+ }
+
+ shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT);
+ if (shmid == -1) {
+ if (errno==EEXIST) {
+ shmid = shmget(key, SHM_SIZE, 0);
+ if (shmid == -1) {
+ perror("shmget");
+ return -1;
+ }
+ }
+ else {
+ perror("shmget");
+ return -1;
+ }
+ }
+ vbprintf("shmid=%d", shmid);
+
+ /* connect to the segment: */
+ shmaddr = shmat(shmid, (void *)SHM_VA, 0);
+ if (shmaddr == (void*)-1) {
+ perror("shmat");
+ return -1;
+ }
+
+ memset(shmaddr, 0, SHM_SIZE);
+ mlock(shmaddr, SHM_SIZE);
+
+ return 0;
+}
+
+int free_shm()
+{
+ munlock(shmaddr, SHM_SIZE);
+ shmdt(shmaddr);
+ semctl(shmid, 0, IPC_RMID);
+
+ return 0;
+}
+
+#ifdef _SEM_SEMUN_UNDEFINED
+union semun
+{
+ int val;
+ struct semid_ds *buf;
+ unsigned short int *array;
+ struct seminfo *__buf;
+};
+#endif
+
+u32 mode=1; /* 1: physical mode; 2: virtual mode. */
+int one_lock=1;
+key_t key[NR_CPUS];
+int semid[NR_CPUS];
+
+int create_sem(int cpu)
+{
+ union semun arg;
+ char fn[MAX_FN_SIZE];
+ int sid;
+
+ sprintf(fn, PATH_FORMAT, cpu);
+ sprintf(fn, "%s/%s", fn, "err_type_info");
+ if ((key[cpu] = ftok(fn, 'e')) == -1) {
+ perror("ftok");
+ return -1;
+ }
+
+ if (semid[cpu]!=0)
+ return 0;
+
+ /* clear old semaphore */
+ if ((sid = semget(key[cpu], 1, 0)) != -1)
+ semctl(sid, 0, IPC_RMID);
+
+ /* get one semaphore */
+ if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) {
+ perror("semget");
+ printf("Please remove semaphore with key=0x%lx, then run the tool.\n",
+ (u64)key[cpu]);
+ return -1;
+ }
+
+ vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu,
+ (u64)key[cpu]);
+ /* initialize the semaphore to 1: */
+ arg.val = 1;
+ if (semctl(semid[cpu], 0, SETVAL, arg) == -1) {
+ perror("semctl");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int lock(int cpu)
+{
+ struct sembuf lock;
+
+ lock.sem_num = cpu;
+ lock.sem_op = 1;
+ semop(semid[cpu], &lock, 1);
+
+ return 0;
+}
+
+static int unlock(int cpu)
+{
+ struct sembuf unlock;
+
+ unlock.sem_num = cpu;
+ unlock.sem_op = -1;
+ semop(semid[cpu], &unlock, 1);
+
+ return 0;
+}
+
+void free_sem(int cpu)
+{
+ semctl(semid[cpu], 0, IPC_RMID);
+}
+
+int wr_multi(char *fn, unsigned long *data, int size)
+{
+ int fd;
+ char buf[MAX_BUF_SIZE];
+ int ret;
+
+ if (size==1)
+ sprintf(buf, "%lx", *data);
+ else if (size==3)
+ sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]);
+ else {
+ fprintf(stderr,"write to file with wrong size!\n");
+ return -1;
+ }
+
+ fd=open(fn, O_RDWR);
+ if (!fd) {
+ perror("Error:");
+ return -1;
+ }
+ ret=write(fd, buf, sizeof(buf));
+ close(fd);
+ return ret;
+}
+
+int wr(char *fn, unsigned long data)
+{
+ return wr_multi(fn, &data, 1);
+}
+
+int rd(char *fn, unsigned long *data)
+{
+ int fd;
+ char buf[MAX_BUF_SIZE];
+
+ fd=open(fn, O_RDONLY);
+ if (fd<0) {
+ perror("Error:");
+ return -1;
+ }
+ read(fd, buf, MAX_BUF_SIZE);
+ *data=strtoul(buf, NULL, 16);
+ close(fd);
+ return 0;
+}
+
+int rd_status(char *path, int *status)
+{
+ char fn[MAX_FN_SIZE];
+ sprintf(fn, "%s/status", path);
+ if (rd(fn, (u64*)status)<0) {
+ perror("status reading error.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int rd_capabilities(char *path, u64 *capabilities)
+{
+ char fn[MAX_FN_SIZE];
+ sprintf(fn, "%s/capabilities", path);
+ if (rd(fn, capabilities)<0) {
+ perror("capabilities reading error.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+int rd_all(char *path)
+{
+ unsigned long err_type_info, err_struct_info, err_data_buffer;
+ int status;
+ unsigned long capabilities, resources;
+ char fn[MAX_FN_SIZE];
+
+ sprintf(fn, "%s/err_type_info", path);
+ if (rd(fn, &err_type_info)<0) {
+ perror("err_type_info reading error.\n");
+ return -1;
+ }
+ printf("err_type_info=%lx\n", err_type_info);
+
+ sprintf(fn, "%s/err_struct_info", path);
+ if (rd(fn, &err_struct_info)<0) {
+ perror("err_struct_info reading error.\n");
+ return -1;
+ }
+ printf("err_struct_info=%lx\n", err_struct_info);
+
+ sprintf(fn, "%s/err_data_buffer", path);
+ if (rd(fn, &err_data_buffer)<0) {
+ perror("err_data_buffer reading error.\n");
+ return -1;
+ }
+ printf("err_data_buffer=%lx\n", err_data_buffer);
+
+ sprintf(fn, "%s/status", path);
+ if (rd("status", (u64*)&status)<0) {
+ perror("status reading error.\n");
+ return -1;
+ }
+ printf("status=%d\n", status);
+
+ sprintf(fn, "%s/capabilities", path);
+ if (rd(fn,&capabilities)<0) {
+ perror("capabilities reading error.\n");
+ return -1;
+ }
+ printf("capabilities=%lx\n", capabilities);
+
+ sprintf(fn, "%s/resources", path);
+ if (rd(fn, &resources)<0) {
+ perror("resources reading error.\n");
+ return -1;
+ }
+ printf("resources=%lx\n", resources);
+
+ return 0;
+}
+
+int query_capabilities(char *path, err_type_info_t err_type_info,
+ u64 *capabilities)
+{
+ char fn[MAX_FN_SIZE];
+ err_struct_info_t err_struct_info;
+ err_data_buffer_t err_data_buffer;
+
+ err_struct_info.err_struct_info=0;
+ memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8);
+
+ sprintf(fn, "%s/err_type_info", path);
+ wr(fn, err_type_info.err_type_info);
+ sprintf(fn, "%s/err_struct_info", path);
+ wr(fn, 0x0);
+ sprintf(fn, "%s/err_data_buffer", path);
+ wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
+
+ // Fire pal_mc_error_inject procedure.
+ sprintf(fn, "%s/call_start", path);
+ wr(fn, mode);
+
+ if (rd_capabilities(path, capabilities)<0)
+ return -1;
+
+ return 0;
+}
+
+int query_all_capabilities()
+{
+ int status;
+ err_type_info_t err_type_info;
+ int err_sev, err_struct, struct_hier;
+ int cap=0;
+ u64 capabilities;
+ char path[MAX_FN_SIZE];
+
+ err_type_info.err_type_info=0; // Initial
+ err_type_info.err_type_info_u.mode=0; // Query mode;
+ err_type_info.err_type_info_u.err_inj=0;
+
+ printf("All capabilities implemented in pal_mc_error_inject:\n");
+ sprintf(path, PATH_FORMAT ,0);
+ for (err_sev=0;err_sev<3;err_sev++)
+ for (err_struct=0;err_struct<5;err_struct++)
+ for (struct_hier=0;struct_hier<5;struct_hier++)
+ {
+ status=-1;
+ capabilities=0;
+ err_type_info.err_type_info_u.err_sev=err_sev;
+ err_type_info.err_type_info_u.err_struct=err_struct;
+ err_type_info.err_type_info_u.struct_hier=struct_hier;
+
+ if (query_capabilities(path, err_type_info, &capabilities)<0)
+ continue;
+
+ if (rd_status(path, &status)<0)
+ continue;
+
+ if (status==0) {
+ cap=1;
+ printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ",
+ err_sev, err_struct, struct_hier);
+ printf("capabilities 0x%lx\n", capabilities);
+ }
+ }
+ if (!cap) {
+ printf("No capabilities supported.\n");
+ return 0;
+ }
+
+ return 0;
+}
+
+int err_inject(int cpu, char *path, err_type_info_t err_type_info,
+ err_struct_info_t err_struct_info,
+ err_data_buffer_t err_data_buffer)
+{
+ int status;
+ char fn[MAX_FN_SIZE];
+
+ log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ",
+ err_type_info.err_type_info,
+ err_struct_info.err_struct_info);
+ log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n",
+ err_data_buffer.err_data_buffer[0],
+ err_data_buffer.err_data_buffer[1],
+ err_data_buffer.err_data_buffer[2]);
+ sprintf(fn, "%s/err_type_info", path);
+ wr(fn, err_type_info.err_type_info);
+ sprintf(fn, "%s/err_struct_info", path);
+ wr(fn, err_struct_info.err_struct_info);
+ sprintf(fn, "%s/err_data_buffer", path);
+ wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE);
+
+ // Fire pal_mc_error_inject procedure.
+ sprintf(fn, "%s/call_start", path);
+ wr(fn,mode);
+
+ if (rd_status(path, &status)<0) {
+ vbprintf("fail: read status\n");
+ return -100;
+ }
+
+ if (status!=0) {
+ log_info(cpu, "fail: status=%d\n", status);
+ return status;
+ }
+
+ return status;
+}
+
+static int construct_data_buf(char *path, err_type_info_t err_type_info,
+ err_struct_info_t err_struct_info,
+ err_data_buffer_t *err_data_buffer,
+ void *va1)
+{
+ char fn[MAX_FN_SIZE];
+ u64 virt_addr=0, phys_addr=0;
+
+ vbprintf("va1=%lx\n", (u64)va1);
+ memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8);
+
+ switch (err_type_info.err_type_info_u.err_struct) {
+ case 1: // Cache
+ switch (err_struct_info.err_struct_info_cache.cl_id) {
+ case 1: //Virtual addr
+ err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1;
+ break;
+ case 2: //Phys addr
+ sprintf(fn, "%s/virtual_to_phys", path);
+ virt_addr=(u64)va1;
+ if (wr(fn,virt_addr)<0)
+ return -1;
+ rd(fn, &phys_addr);
+ err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr;
+ break;
+ default:
+ printf("Not supported cl_id\n");
+ break;
+ }
+ break;
+ case 2: // TLB
+ break;
+ case 3: // Register file
+ break;
+ case 4: // Bus/system interconnect
+ default:
+ printf("Not supported err_struct\n");
+ break;
+ }
+
+ return 0;
+}
+
+typedef struct {
+ u64 cpu;
+ u64 loop;
+ u64 interval;
+ u64 err_type_info;
+ u64 err_struct_info;
+ u64 err_data_buffer[ERR_DATA_BUFFER_SIZE];
+} parameters_t;
+
+parameters_t line_para;
+int para;
+
+static int empty_data_buffer(u64 *err_data_buffer)
+{
+ int empty=1;
+ int i;
+
+ for (i=0;i<ERR_DATA_BUFFER_SIZE; i++)
+ if (err_data_buffer[i]!=-1)
+ empty=0;
+
+ return empty;
+}
+
+int err_inj()
+{
+ err_type_info_t err_type_info;
+ err_struct_info_t err_struct_info;
+ err_data_buffer_t err_data_buffer;
+ int count;
+ FILE *fp;
+ unsigned long cpu, loop, interval, err_type_info_conf, err_struct_info_conf;
+ u64 err_data_buffer_conf[ERR_DATA_BUFFER_SIZE];
+ int num;
+ int i;
+ char path[MAX_FN_SIZE];
+ parameters_t parameters[MAX_TASK_NUM]={};
+ pid_t child_pid[MAX_TASK_NUM];
+ time_t current_time;
+ int status;
+
+ if (!para) {
+ fp=fopen("err.conf", "r");
+ if (fp==NULL) {
+ perror("Error open err.conf");
+ return -1;
+ }
+
+ num=0;
+ while (!feof(fp)) {
+ char buf[256];
+ memset(buf,0,256);
+ fgets(buf, 256, fp);
+ count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
+ &cpu, &loop, &interval,&err_type_info_conf,
+ &err_struct_info_conf,
+ &err_data_buffer_conf[0],
+ &err_data_buffer_conf[1],
+ &err_data_buffer_conf[2]);
+ if (count!=PARA_FIELD_NUM+3) {
+ err_data_buffer_conf[0]=-1;
+ err_data_buffer_conf[1]=-1;
+ err_data_buffer_conf[2]=-1;
+ count=sscanf(buf, "%lx, %lx, %lx, %lx, %lx\n",
+ &cpu, &loop, &interval,&err_type_info_conf,
+ &err_struct_info_conf);
+ if (count!=PARA_FIELD_NUM)
+ continue;
+ }
+
+ parameters[num].cpu=cpu;
+ parameters[num].loop=loop;
+ parameters[num].interval= interval>MIN_INTERVAL
+ ?interval:MIN_INTERVAL;
+ parameters[num].err_type_info=err_type_info_conf;
+ parameters[num].err_struct_info=err_struct_info_conf;
+ memcpy(parameters[num++].err_data_buffer,
+ err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ;
+
+ if (num>=MAX_TASK_NUM)
+ break;
+ }
+ }
+ else {
+ parameters[0].cpu=line_para.cpu;
+ parameters[0].loop=line_para.loop;
+ parameters[0].interval= line_para.interval>MIN_INTERVAL
+ ?line_para.interval:MIN_INTERVAL;
+ parameters[0].err_type_info=line_para.err_type_info;
+ parameters[0].err_struct_info=line_para.err_struct_info;
+ memcpy(parameters[0].err_data_buffer,
+ line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ;
+
+ num=1;
+ }
+
+ /* Create semaphore: If one_lock, one semaphore for all processors.
+ Otherwise, one semaphore for each processor. */
+ if (one_lock) {
+ if (create_sem(0)) {
+ printf("Can not create semaphore...exit\n");
+ free_sem(0);
+ return -1;
+ }
+ }
+ else {
+ for (i=0;i<num;i++) {
+ if (create_sem(parameters[i].cpu)) {
+ printf("Can not create semaphore for cpu%d...exit\n",i);
+ free_sem(parameters[num].cpu);
+ return -1;
+ }
+ }
+ }
+
+ /* Create a shm segment which will be used to inject/consume errors on.*/
+ if (create_shm()==-1) {
+ printf("Error to create shm...exit\n");
+ return -1;
+ }
+
+ for (i=0;i<num;i++) {
+ pid_t pid;
+
+ current_time=time(NULL);
+ log_info(parameters[i].cpu, "\nBegine at %s", ctime(&current_time));
+ log_info(parameters[i].cpu, "Configurations:\n");
+ log_info(parameters[i].cpu,"On cpu%ld: loop=%lx, interval=%lx(s)",
+ parameters[i].cpu,
+ parameters[i].loop,
+ parameters[i].interval);
+ log_info(parameters[i].cpu," err_type_info=%lx,err_struct_info=%lx\n",
+ parameters[i].err_type_info,
+ parameters[i].err_struct_info);
+
+ sprintf(path, PATH_FORMAT, (int)parameters[i].cpu);
+ err_type_info.err_type_info=parameters[i].err_type_info;
+ err_struct_info.err_struct_info=parameters[i].err_struct_info;
+ memcpy(err_data_buffer.err_data_buffer,
+ parameters[i].err_data_buffer,
+ ERR_DATA_BUFFER_SIZE*8);
+
+ pid=fork();
+ if (pid==0) {
+ unsigned long mask[MASK_SIZE];
+ int j, k;
+
+ void *va1, *va2;
+
+ /* Allocate two memory areas va1 and va2 in shm */
+ va1=shmaddr+parameters[i].cpu*PAGE_SIZE;
+ va2=shmaddr+parameters[i].cpu*PAGE_SIZE+PAGE_SIZE;
+
+ vbprintf("va1=%lx, va2=%lx\n", (u64)va1, (u64)va2);
+ memset(va1, 0x1, PAGE_SIZE);
+ memset(va2, 0x2, PAGE_SIZE);
+
+ if (empty_data_buffer(err_data_buffer.err_data_buffer))
+ /* If not specified yet, construct data buffer
+ * with va1
+ */
+ construct_data_buf(path, err_type_info,
+ err_struct_info, &err_data_buffer,va1);
+
+ for (j=0;j<MASK_SIZE;j++)
+ mask[j]=0;
+
+ cpu=parameters[i].cpu;
+ k = cpu%64;
+ j = cpu/64;
+ mask[j] = 1UL << k;
+
+ if (sched_setaffinity(0, MASK_SIZE*8, mask)==-1) {
+ perror("Error sched_setaffinity:");
+ return -1;
+ }
+
+ for (j=0; j<parameters[i].loop; j++) {
+ log_info(parameters[i].cpu,"Injection ");
+ log_info(parameters[i].cpu,"on cpu%ld: #%d/%ld ",
+
+ parameters[i].cpu,j+1, parameters[i].loop);
+
+ /* Hold the lock */
+ if (one_lock)
+ lock(0);
+ else
+ /* Hold lock on this cpu */
+ lock(parameters[i].cpu);
+
+ if ((status=err_inject(parameters[i].cpu,
+ path, err_type_info,
+ err_struct_info, err_data_buffer))
+ ==0) {
+ /* consume the error for "inject only"*/
+ memcpy(va2, va1, PAGE_SIZE);
+ memcpy(va1, va2, PAGE_SIZE);
+ log_info(parameters[i].cpu,
+ "successful\n");
+ }
+ else {
+ log_info(parameters[i].cpu,"fail:");
+ log_info(parameters[i].cpu,
+ "status=%d\n", status);
+ unlock(parameters[i].cpu);
+ break;
+ }
+ if (one_lock)
+ /* Release the lock */
+ unlock(0);
+ /* Release lock on this cpu */
+ else
+ unlock(parameters[i].cpu);
+
+ if (j < parameters[i].loop-1)
+ sleep(parameters[i].interval);
+ }
+ current_time=time(NULL);
+ log_info(parameters[i].cpu, "Done at %s", ctime(&current_time));
+ return 0;
+ }
+ else if (pid<0) {
+ perror("Error fork:");
+ continue;
+ }
+ child_pid[i]=pid;
+ }
+ for (i=0;i<num;i++)
+ waitpid(child_pid[i], NULL, 0);
+
+ if (one_lock)
+ free_sem(0);
+ else
+ for (i=0;i<num;i++)
+ free_sem(parameters[i].cpu);
+
+ printf("All done.\n");
+
+ return 0;
+}
+
+void help()
+{
+ printf("err_inject_tool:\n");
+ printf("\t-q: query all capabilities. default: off\n");
+ printf("\t-m: procedure mode. 1: physical 2: virtual. default: 1\n");
+ printf("\t-i: inject errors. default: off\n");
+ printf("\t-l: one lock per cpu. default: one lock for all\n");
+ printf("\t-e: error parameters:\n");
+ printf("\t\tcpu,loop,interval,err_type_info,err_struct_info[,err_data_buffer[0],err_data_buffer[1],err_data_buffer[2]]\n");
+ printf("\t\t cpu: logical cpu number the error will be inject in.\n");
+ printf("\t\t loop: times the error will be injected.\n");
+ printf("\t\t interval: In second. every so often one error is injected.\n");
+ printf("\t\t err_type_info, err_struct_info: PAL parameters.\n");
+ printf("\t\t err_data_buffer: PAL parameter. Optional. If not present,\n");
+ printf("\t\t it's constructed by tool automatically. Be\n");
+ printf("\t\t careful to provide err_data_buffer and make\n");
+ printf("\t\t sure it's working with the environment.\n");
+ printf("\t Note:no space between error parameters.\n");
+ printf("\t default: Take error parameters from err.conf instead of command line.\n");
+ printf("\t-v: verbose. default: off\n");
+ printf("\t-h: help\n\n");
+ printf("The tool will take err.conf file as ");
+ printf("input to inject single or multiple errors ");
+ printf("on one or multiple cpus in parallel.\n");
+}
+
+int main(int argc, char **argv)
+{
+ char c;
+ int do_err_inj=0;
+ int do_query_all=0;
+ int count;
+ u32 m;
+
+ /* Default one lock for all cpu's */
+ one_lock=1;
+ while ((c = getopt(argc, argv, "m:iqvhle:")) != EOF)
+ switch (c) {
+ case 'm': /* Procedure mode. 1: phys 2: virt */
+ count=sscanf(optarg, "%x", &m);
+ if (count!=1 || (m!=1 && m!=2)) {
+ printf("Wrong mode number.\n");
+ help();
+ return -1;
+ }
+ mode=m;
+ break;
+ case 'i': /* Inject errors */
+ do_err_inj=1;
+ break;
+ case 'q': /* Query */
+ do_query_all=1;
+ break;
+ case 'v': /* Verbose */
+ verbose=1;
+ break;
+ case 'l': /* One lock per cpu */
+ one_lock=0;
+ break;
+ case 'e': /* error arguments */
+ /* Take parameters:
+ * #cpu, loop, interval, err_type_info, err_struct_info[, err_data_buffer]
+ * err_data_buffer is optional. Recommend not to specify
+ * err_data_buffer. Better to use tool to generate it.
+ */
+ count=sscanf(optarg,
+ "%lx, %lx, %lx, %lx, %lx, %lx, %lx, %lx\n",
+ &line_para.cpu,
+ &line_para.loop,
+ &line_para.interval,
+ &line_para.err_type_info,
+ &line_para.err_struct_info,
+ &line_para.err_data_buffer[0],
+ &line_para.err_data_buffer[1],
+ &line_para.err_data_buffer[2]);
+ if (count!=PARA_FIELD_NUM+3) {
+ line_para.err_data_buffer[0]=-1,
+ line_para.err_data_buffer[1]=-1,
+ line_para.err_data_buffer[2]=-1;
+ count=sscanf(optarg, "%lx, %lx, %lx, %lx, %lx\n",
+ &line_para.cpu,
+ &line_para.loop,
+ &line_para.interval,
+ &line_para.err_type_info,
+ &line_para.err_struct_info);
+ if (count!=PARA_FIELD_NUM) {
+ printf("Wrong error arguments.\n");
+ help();
+ return -1;
+ }
+ }
+ para=1;
+ break;
+ continue;
+ break;
+ case 'h':
+ help();
+ return 0;
+ default:
+ break;
+ }
+
+ if (do_query_all)
+ query_all_capabilities();
+ if (do_err_inj)
+ err_inj();
+
+ if (!do_query_all && !do_err_inj)
+ help();
+
+ return 0;
+}
+
diff --git a/Documentation/ia64/fsys.txt b/Documentation/ia64/fsys.txt
index 28da181f996..59dd689d9b8 100644
--- a/Documentation/ia64/fsys.txt
+++ b/Documentation/ia64/fsys.txt
@@ -165,7 +165,7 @@ complicated cases.
* Signal handling
The delivery of (asynchronous) signals must be delayed until fsys-mode
-is exited. This is acomplished with the help of the lower-privilege
+is exited. This is accomplished with the help of the lower-privilege
transfer trap: arch/ia64/kernel/process.c:do_notify_resume_user()
checks whether the interrupted task was in fsys-mode and, if so, sets
PSR.lp and returns immediately. When fsys-mode is exited via the
diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt
new file mode 100644
index 00000000000..ffb5c80bec3
--- /dev/null
+++ b/Documentation/ia64/kvm.txt
@@ -0,0 +1,83 @@
+Currently, kvm module is in EXPERIMENTAL stage on IA64. This means that
+interfaces are not stable enough to use. So, please don't run critical
+applications in virtual machine.
+We will try our best to improve it in future versions!
+
+ Guide: How to boot up guests on kvm/ia64
+
+This guide is to describe how to enable kvm support for IA-64 systems.
+
+1. Get the kvm source from git.kernel.org.
+ Userspace source:
+ git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git
+ Kernel Source:
+ git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git
+
+2. Compile the source code.
+ 2.1 Compile userspace code:
+ (1)cd ./kvm-userspace
+ (2)./configure
+ (3)cd kernel
+ (4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.)
+ (5)cd ..
+ (6)make qemu
+ (7)cd qemu; make install
+
+ 2.2 Compile kernel source code:
+ (1) cd ./$kernel_dir
+ (2) Make menuconfig
+ (3) Enter into virtualization option, and choose kvm.
+ (4) make
+ (5) Once (4) done, make modules_install
+ (6) Make initrd, and use new kernel to reboot up host machine.
+ (7) Once (6) done, cd $kernel_dir/arch/ia64/kvm
+ (8) insmod kvm.ko; insmod kvm-intel.ko
+
+Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail.
+
+3. Get Guest Firmware named as Flash.fd, and put it under right place:
+ (1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly.
+
+ (2) If you have no firmware at hand, Please download its source from
+ hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
+ you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
+
+ (3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu
+
+4. Boot up Linux or Windows guests:
+ 4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
+
+ 4.2 Boot up guests use the following command.
+ /usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image
+ (xx is the number of virtual processors for the guest, now the maximum value is 4)
+
+5. Known possible issue on some platforms with old Firmware.
+
+In the event of strange host crash issues, try to solve it through either of the following ways:
+
+(1): Upgrade your Firmware to the latest one.
+
+(2): Applying the below patch to kernel source.
+diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
+index 0b53344..f02b0f7 100644
+--- a/arch/ia64/kernel/pal.S
++++ b/arch/ia64/kernel/pal.S
+@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static)
+ mov ar.pfs = loc1
+ mov rp = loc0
+ ;;
+- srlz.d // serialize restoration of psr.l
++ srlz.i // serialize restoration of psr.l
++ ;;
+ br.ret.sptk.many b0
+ END(ia64_pal_call_static)
+
+6. Bug report:
+ If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list.
+ https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/
+
+Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger!
+
+
+ Xiantao Zhang <xiantao.zhang@intel.com>
+ 2008.3.10
diff --git a/Documentation/ia64/mca.txt b/Documentation/ia64/mca.txt
new file mode 100644
index 00000000000..f097c60cba1
--- /dev/null
+++ b/Documentation/ia64/mca.txt
@@ -0,0 +1,194 @@
+An ad-hoc collection of notes on IA64 MCA and INIT processing. Feel
+free to update it with notes about any area that is not clear.
+
+---
+
+MCA/INIT are completely asynchronous. They can occur at any time, when
+the OS is in any state. Including when one of the cpus is already
+holding a spinlock. Trying to get any lock from MCA/INIT state is
+asking for deadlock. Also the state of structures that are protected
+by locks is indeterminate, including linked lists.
+
+---
+
+The complicated ia64 MCA process. All of this is mandated by Intel's
+specification for ia64 SAL, error recovery and unwind, it is not as
+if we have a choice here.
+
+* MCA occurs on one cpu, usually due to a double bit memory error.
+ This is the monarch cpu.
+
+* SAL sends an MCA rendezvous interrupt (which is a normal interrupt)
+ to all the other cpus, the slaves.
+
+* Slave cpus that receive the MCA interrupt call down into SAL, they
+ end up spinning disabled while the MCA is being serviced.
+
+* If any slave cpu was already spinning disabled when the MCA occurred
+ then it cannot service the MCA interrupt. SAL waits ~20 seconds then
+ sends an unmaskable INIT event to the slave cpus that have not
+ already rendezvoused.
+
+* Because MCA/INIT can be delivered at any time, including when the cpu
+ is down in PAL in physical mode, the registers at the time of the
+ event are _completely_ undefined. In particular the MCA/INIT
+ handlers cannot rely on the thread pointer, PAL physical mode can
+ (and does) modify TP. It is allowed to do that as long as it resets
+ TP on return. However MCA/INIT events expose us to these PAL
+ internal TP changes. Hence curr_task().
+
+* If an MCA/INIT event occurs while the kernel was running (not user
+ space) and the kernel has called PAL then the MCA/INIT handler cannot
+ assume that the kernel stack is in a fit state to be used. Mainly
+ because PAL may or may not maintain the stack pointer internally.
+ Because the MCA/INIT handlers cannot trust the kernel stack, they
+ have to use their own, per-cpu stacks. The MCA/INIT stacks are
+ preformatted with just enough task state to let the relevant handlers
+ do their job.
+
+* Unlike most other architectures, the ia64 struct task is embedded in
+ the kernel stack[1]. So switching to a new kernel stack means that
+ we switch to a new task as well. Because various bits of the kernel
+ assume that current points into the struct task, switching to a new
+ stack also means a new value for current.
+
+* Once all slaves have rendezvoused and are spinning disabled, the
+ monarch is entered. The monarch now tries to diagnose the problem
+ and decide if it can recover or not.
+
+* Part of the monarch's job is to look at the state of all the other
+ tasks. The only way to do that on ia64 is to call the unwinder,
+ as mandated by Intel.
+
+* The starting point for the unwind depends on whether a task is
+ running or not. That is, whether it is on a cpu or is blocked. The
+ monarch has to determine whether or not a task is on a cpu before it
+ knows how to start unwinding it. The tasks that received an MCA or
+ INIT event are no longer running, they have been converted to blocked
+ tasks. But (and its a big but), the cpus that received the MCA
+ rendezvous interrupt are still running on their normal kernel stacks!
+
+* To distinguish between these two cases, the monarch must know which
+ tasks are on a cpu and which are not. Hence each slave cpu that
+ switches to an MCA/INIT stack, registers its new stack using
+ set_curr_task(), so the monarch can tell that the _original_ task is
+ no longer running on that cpu. That gives us a decent chance of
+ getting a valid backtrace of the _original_ task.
+
+* MCA/INIT can be nested, to a depth of 2 on any cpu. In the case of a
+ nested error, we want diagnostics on the MCA/INIT handler that
+ failed, not on the task that was originally running. Again this
+ requires set_curr_task() so the MCA/INIT handlers can register their
+ own stack as running on that cpu. Then a recursive error gets a
+ trace of the failing handler's "task".
+
+[1] My (Keith Owens) original design called for ia64 to separate its
+ struct task and the kernel stacks. Then the MCA/INIT data would be
+ chained stacks like i386 interrupt stacks. But that required
+ radical surgery on the rest of ia64, plus extra hard wired TLB
+ entries with its associated performance degradation. David
+ Mosberger vetoed that approach. Which meant that separate kernel
+ stacks meant separate "tasks" for the MCA/INIT handlers.
+
+---
+
+INIT is less complicated than MCA. Pressing the nmi button or using
+the equivalent command on the management console sends INIT to all
+cpus. SAL picks one of the cpus as the monarch and the rest are
+slaves. All the OS INIT handlers are entered at approximately the same
+time. The OS monarch prints the state of all tasks and returns, after
+which the slaves return and the system resumes.
+
+At least that is what is supposed to happen. Alas there are broken
+versions of SAL out there. Some drive all the cpus as monarchs. Some
+drive them all as slaves. Some drive one cpu as monarch, wait for that
+cpu to return from the OS then drive the rest as slaves. Some versions
+of SAL cannot even cope with returning from the OS, they spin inside
+SAL on resume. The OS INIT code has workarounds for some of these
+broken SAL symptoms, but some simply cannot be fixed from the OS side.
+
+---
+
+The scheduler hooks used by ia64 (curr_task, set_curr_task) are layer
+violations. Unfortunately MCA/INIT start off as massive layer
+violations (can occur at _any_ time) and they build from there.
+
+At least ia64 makes an attempt at recovering from hardware errors, but
+it is a difficult problem because of the asynchronous nature of these
+errors. When processing an unmaskable interrupt we sometimes need
+special code to cope with our inability to take any locks.
+
+---
+
+How is ia64 MCA/INIT different from x86 NMI?
+
+* x86 NMI typically gets delivered to one cpu. MCA/INIT gets sent to
+ all cpus.
+
+* x86 NMI cannot be nested. MCA/INIT can be nested, to a depth of 2
+ per cpu.
+
+* x86 has a separate struct task which points to one of multiple kernel
+ stacks. ia64 has the struct task embedded in the single kernel
+ stack, so switching stack means switching task.
+
+* x86 does not call the BIOS so the NMI handler does not have to worry
+ about any registers having changed. MCA/INIT can occur while the cpu
+ is in PAL in physical mode, with undefined registers and an undefined
+ kernel stack.
+
+* i386 backtrace is not very sensitive to whether a process is running
+ or not. ia64 unwind is very, very sensitive to whether a process is
+ running or not.
+
+---
+
+What happens when MCA/INIT is delivered what a cpu is running user
+space code?
+
+The user mode registers are stored in the RSE area of the MCA/INIT on
+entry to the OS and are restored from there on return to SAL, so user
+mode registers are preserved across a recoverable MCA/INIT. Since the
+OS has no idea what unwind data is available for the user space stack,
+MCA/INIT never tries to backtrace user space. Which means that the OS
+does not bother making the user space process look like a blocked task,
+i.e. the OS does not copy pt_regs and switch_stack to the user space
+stack. Also the OS has no idea how big the user space RSE and memory
+stacks are, which makes it too risky to copy the saved state to a user
+mode stack.
+
+---
+
+How do we get a backtrace on the tasks that were running when MCA/INIT
+was delivered?
+
+mca.c:::ia64_mca_modify_original_stack(). That identifies and
+verifies the original kernel stack, copies the dirty registers from
+the MCA/INIT stack's RSE to the original stack's RSE, copies the
+skeleton struct pt_regs and switch_stack to the original stack, fills
+in the skeleton structures from the PAL minstate area and updates the
+original stack's thread.ksp. That makes the original stack look
+exactly like any other blocked task, i.e. it now appears to be
+sleeping. To get a backtrace, just start with thread.ksp for the
+original task and unwind like any other sleeping task.
+
+---
+
+How do we identify the tasks that were running when MCA/INIT was
+delivered?
+
+If the previous task has been verified and converted to a blocked
+state, then sos->prev_task on the MCA/INIT stack is updated to point to
+the previous task. You can look at that field in dumps or debuggers.
+To help distinguish between the handler and the original tasks,
+handlers have _TIF_MCA_INIT set in thread_info.flags.
+
+The sos data is always in the MCA/INIT handler stack, at offset
+MCA_SOS_OFFSET. You can get that value from mca_asm.h or calculate it
+as KERNEL_STACK_SIZE - sizeof(struct pt_regs) - sizeof(struct
+ia64_sal_os_state), with 16 byte alignment for all structures.
+
+Also the comm field of the MCA/INIT task is modified to include the pid
+of the original task, for humans to use. For example, a comm field of
+'MCA 12159' means that pid 12159 was running when the MCA was
+delivered.
diff --git a/Documentation/ia64/paravirt_ops.txt b/Documentation/ia64/paravirt_ops.txt
new file mode 100644
index 00000000000..39ded02ec33
--- /dev/null
+++ b/Documentation/ia64/paravirt_ops.txt
@@ -0,0 +1,137 @@
+Paravirt_ops on IA64
+====================
+ 21 May 2008, Isaku Yamahata <yamahata@valinux.co.jp>
+
+
+Introduction
+------------
+The aim of this documentation is to help with maintainability and/or to
+encourage people to use paravirt_ops/IA64.
+
+paravirt_ops (pv_ops in short) is a way for virtualization support of
+Linux kernel on x86. Several ways for virtualization support were
+proposed, paravirt_ops is the winner.
+On the other hand, now there are also several IA64 virtualization
+technologies like kvm/IA64, xen/IA64 and many other academic IA64
+hypervisors so that it is good to add generic virtualization
+infrastructure on Linux/IA64.
+
+
+What is paravirt_ops?
+---------------------
+It has been developed on x86 as virtualization support via API, not ABI.
+It allows each hypervisor to override operations which are important for
+hypervisors at API level. And it allows a single kernel binary to run on
+all supported execution environments including native machine.
+Essentially paravirt_ops is a set of function pointers which represent
+operations corresponding to low level sensitive instructions and high
+level functionalities in various area. But one significant difference
+from usual function pointer table is that it allows optimization with
+binary patch. It is because some of these operations are very
+performance sensitive and indirect call overhead is not negligible.
+With binary patch, indirect C function call can be transformed into
+direct C function call or in-place execution to eliminate the overhead.
+
+Thus, operations of paravirt_ops are classified into three categories.
+- simple indirect call
+ These operations correspond to high level functionality so that the
+ overhead of indirect call isn't very important.
+
+- indirect call which allows optimization with binary patch
+ Usually these operations correspond to low level instructions. They
+ are called frequently and performance critical. So the overhead is
+ very important.
+
+- a set of macros for hand written assembly code
+ Hand written assembly codes (.S files) also need paravirtualization
+ because they include sensitive instructions or some of code paths in
+ them are very performance critical.
+
+
+The relation to the IA64 machine vector
+---------------------------------------
+Linux/IA64 has the IA64 machine vector functionality which allows the
+kernel to switch implementations (e.g. initialization, ipi, dma api...)
+depending on executing platform.
+We can replace some implementations very easily defining a new machine
+vector. Thus another approach for virtualization support would be
+enhancing the machine vector functionality.
+But paravirt_ops approach was taken because
+- virtualization support needs wider support than machine vector does.
+ e.g. low level instruction paravirtualization. It must be
+ initialized very early before platform detection.
+
+- virtualization support needs more functionality like binary patch.
+ Probably the calling overhead might not be very large compared to the
+ emulation overhead of virtualization. However in the native case, the
+ overhead should be eliminated completely.
+ A single kernel binary should run on each environment including native,
+ and the overhead of paravirt_ops on native environment should be as
+ small as possible.
+
+- for full virtualization technology, e.g. KVM/IA64 or
+ Xen/IA64 HVM domain, the result would be
+ (the emulated platform machine vector. probably dig) + (pv_ops).
+ This means that the virtualization support layer should be under
+ the machine vector layer.
+
+Possibly it might be better to move some function pointers from
+paravirt_ops to machine vector. In fact, Xen domU case utilizes both
+pv_ops and machine vector.
+
+
+IA64 paravirt_ops
+-----------------
+In this section, the concrete paravirt_ops will be discussed.
+Because of the architecture difference between ia64 and x86, the
+resulting set of functions is very different from x86 pv_ops.
+
+- C function pointer tables
+They are not very performance critical so that simple C indirect
+function call is acceptable. The following structures are defined at
+this moment. For details see linux/include/asm-ia64/paravirt.h
+ - struct pv_info
+ This structure describes the execution environment.
+ - struct pv_init_ops
+ This structure describes the various initialization hooks.
+ - struct pv_iosapic_ops
+ This structure describes hooks to iosapic operations.
+ - struct pv_irq_ops
+ This structure describes hooks to irq related operations
+ - struct pv_time_op
+ This structure describes hooks to steal time accounting.
+
+- a set of indirect calls which need optimization
+Currently this class of functions correspond to a subset of IA64
+intrinsics. At this moment the optimization with binary patch isn't
+implemented yet.
+struct pv_cpu_op is defined. For details see
+linux/include/asm-ia64/paravirt_privop.h
+Mostly they correspond to ia64 intrinsics 1-to-1.
+Caveat: Now they are defined as C indirect function pointers, but in
+order to support binary patch optimization, they will be changed
+using GCC extended inline assembly code.
+
+- a set of macros for hand written assembly code (.S files)
+For maintenance purpose, the taken approach for .S files is single
+source code and compile multiple times with different macros definitions.
+Each pv_ops instance must define those macros to compile.
+The important thing here is that sensitive, but non-privileged
+instructions must be paravirtualized and that some privileged
+instructions also need paravirtualization for reasonable performance.
+Developers who modify .S files must be aware of that. At this moment
+an easy checker is implemented to detect paravirtualization breakage.
+But it doesn't cover all the cases.
+
+Sometimes this set of macros is called pv_cpu_asm_op. But there is no
+corresponding structure in the source code.
+Those macros mostly 1:1 correspond to a subset of privileged
+instructions. See linux/include/asm-ia64/native/inst.h.
+And some functions written in assembly also need to be overrided so
+that each pv_ops instance have to define some macros. Again see
+linux/include/asm-ia64/native/inst.h.
+
+
+Those structures must be initialized very early before start_kernel.
+Probably initialized in head.S using multi entry point or some other trick.
+For native case implementation see linux/arch/ia64/kernel/paravirt.c.
diff --git a/Documentation/ia64/serial.txt b/Documentation/ia64/serial.txt
index f51eb4bc2ff..6869c73de4e 100644
--- a/Documentation/ia64/serial.txt
+++ b/Documentation/ia64/serial.txt
@@ -124,9 +124,16 @@ TROUBLESHOOTING SERIAL CONSOLE PROBLEMS
- Add entry to /etc/securetty for console tty.
+ No ACPI serial devices found in 2.6.17 or later:
+ - Turn on CONFIG_PNP and CONFIG_PNPACPI. Prior to 2.6.17, ACPI
+ serial devices were discovered by 8250_acpi. In 2.6.17,
+ 8250_acpi was replaced by the combination of 8250_pnp and
+ CONFIG_PNPACPI.
-[1] http://www.dig64.org/specifications/DIG64_PCDPv20.pdf
+
+
+[1] http://www.dig64.org/specifications/agreement
The table was originally defined as the "HCDP" for "Headless
Console/Debug Port." The current version is the "PCDP" for
"Primary Console and Debug Port Devices."
diff --git a/Documentation/ia64/xen.txt b/Documentation/ia64/xen.txt
new file mode 100644
index 00000000000..c61a99f7c8b
--- /dev/null
+++ b/Documentation/ia64/xen.txt
@@ -0,0 +1,183 @@
+ Recipe for getting/building/running Xen/ia64 with pv_ops
+ --------------------------------------------------------
+
+This recipe describes how to get xen-ia64 source and build it,
+and run domU with pv_ops.
+
+============
+Requirements
+============
+
+ - python
+ - mercurial
+ it (aka "hg") is an open-source source code
+ management software. See the below.
+ http://www.selenic.com/mercurial/wiki/
+ - git
+ - bridge-utils
+
+=================================
+Getting and Building Xen and Dom0
+=================================
+
+ My environment is;
+ Machine : Tiger4
+ Domain0 OS : RHEL5
+ DomainU OS : RHEL5
+
+ 1. Download source
+ # hg clone http://xenbits.xensource.com/ext/ia64/xen-unstable.hg
+ # cd xen-unstable.hg
+ # hg clone http://xenbits.xensource.com/ext/ia64/linux-2.6.18-xen.hg
+
+ 2. # make world
+
+ 3. # make install-tools
+
+ 4. copy kernels and xen
+ # cp xen/xen.gz /boot/efi/efi/redhat/
+ # cp build-linux-2.6.18-xen_ia64/vmlinux.gz \
+ /boot/efi/efi/redhat/vmlinuz-2.6.18.8-xen
+
+ 5. make initrd for Dom0/DomU
+ # make -C linux-2.6.18-xen.hg ARCH=ia64 modules_install \
+ O=$(/bin/pwd)/build-linux-2.6.18-xen_ia64
+ # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6.18.8-xen.img \
+ 2.6.18.8-xen --builtin mptspi --builtin mptbase \
+ --builtin mptscsih --builtin uhci-hcd --builtin ohci-hcd \
+ --builtin ehci-hcd
+
+================================
+Making a disk image for guest OS
+================================
+
+ 1. make file
+ # dd if=/dev/zero of=/root/rhel5.img bs=1M seek=4096 count=0
+ # mke2fs -F -j /root/rhel5.img
+ # mount -o loop /root/rhel5.img /mnt
+ # cp -ax /{dev,var,etc,usr,bin,sbin,lib} /mnt
+ # mkdir /mnt/{root,proc,sys,home,tmp}
+
+ Note: You may miss some device files. If so, please create them
+ with mknod. Or you can use tar instead of cp.
+
+ 2. modify DomU's fstab
+ # vi /mnt/etc/fstab
+ /dev/xvda1 / ext3 defaults 1 1
+ none /dev/pts devpts gid=5,mode=620 0 0
+ none /dev/shm tmpfs defaults 0 0
+ none /proc proc defaults 0 0
+ none /sys sysfs defaults 0 0
+
+ 3. modify inittab
+ set runlevel to 3 to avoid X trying to start
+ # vi /mnt/etc/inittab
+ id:3:initdefault:
+ Start a getty on the hvc0 console
+ X0:2345:respawn:/sbin/mingetty hvc0
+ tty1-6 mingetty can be commented out
+
+ 4. add hvc0 into /etc/securetty
+ # vi /mnt/etc/securetty (add hvc0)
+
+ 5. umount
+ # umount /mnt
+
+FYI, virt-manager can also make a disk image for guest OS.
+It's GUI tools and easy to make it.
+
+==================
+Boot Xen & Domain0
+==================
+
+ 1. replace elilo
+ elilo of RHEL5 can boot Xen and Dom0.
+ If you use old elilo (e.g RHEL4), please download from the below
+ http://elilo.sourceforge.net/cgi-bin/blosxom
+ and copy into /boot/efi/efi/redhat/
+ # cp elilo-3.6-ia64.efi /boot/efi/efi/redhat/elilo.efi
+
+ 2. modify elilo.conf (like the below)
+ # vi /boot/efi/efi/redhat/elilo.conf
+ prompt
+ timeout=20
+ default=xen
+ relocatable
+
+ image=vmlinuz-2.6.18.8-xen
+ label=xen
+ vmm=xen.gz
+ initrd=initrd-2.6.18.8-xen.img
+ read-only
+ append=" -- rhgb root=/dev/sda2"
+
+The append options before "--" are for xen hypervisor,
+the options after "--" are for dom0.
+
+FYI, your machine may need console options like
+"com1=19200,8n1 console=vga,com1". For example,
+append="com1=19200,8n1 console=vga,com1 -- rhgb console=tty0 \
+console=ttyS0 root=/dev/sda2"
+
+=====================================
+Getting and Building domU with pv_ops
+=====================================
+
+ 1. get pv_ops tree
+ # git clone http://people.valinux.co.jp/~yamahata/xen-ia64/linux-2.6-xen-ia64.git/
+
+ 2. git branch (if necessary)
+ # cd linux-2.6-xen-ia64/
+ # git checkout -b your_branch origin/xen-ia64-domu-minimal-2008may19
+ (Note: The current branch is xen-ia64-domu-minimal-2008may19.
+ But you would find the new branch. You can see with
+ "git branch -r" to get the branch lists.
+ http://people.valinux.co.jp/~yamahata/xen-ia64/for_eagl/linux-2.6-ia64-pv-ops.git/
+ is also available. The tree is based on
+ git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6 test)
+
+
+ 3. copy .config for pv_ops of domU
+ # cp arch/ia64/configs/xen_domu_wip_defconfig .config
+
+ 4. make kernel with pv_ops
+ # make oldconfig
+ # make
+
+ 5. install the kernel and initrd
+ # cp vmlinux.gz /boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU
+ # make modules_install
+ # mkinitrd -f /boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img \
+ 2.6.26-rc3xen-ia64-08941-g1b12161 --builtin mptspi \
+ --builtin mptbase --builtin mptscsih --builtin uhci-hcd \
+ --builtin ohci-hcd --builtin ehci-hcd
+
+========================
+Boot DomainU with pv_ops
+========================
+
+ 1. make config of DomU
+ # vi /etc/xen/rhel5
+ kernel = "/boot/efi/efi/redhat/vmlinuz-2.6-pv_ops-xenU"
+ ramdisk = "/boot/efi/efi/redhat/initrd-2.6-pv_ops-xenU.img"
+ vcpus = 1
+ memory = 512
+ name = "rhel5"
+ disk = [ 'file:/root/rhel5.img,xvda1,w' ]
+ root = "/dev/xvda1 ro"
+ extra= "rhgb console=hvc0"
+
+ 2. After boot xen and dom0, start xend
+ # /etc/init.d/xend start
+ ( In the debugging case, # XEND_DEBUG=1 xend trace_start )
+
+ 3. start domU
+ # xm create -c rhel5
+
+=========
+Reference
+=========
+- Wiki of Xen/IA64 upstream merge
+ http://wiki.xensource.com/xenwiki/XenIA64/UpstreamMerge
+
+Written by Akio Takebe <takebe_akio@jp.fujitsu.com> on 28 May 2008