aboutsummaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/cgroups/cgroups.txt11
-rw-r--r--Documentation/filesystems/ext3.txt16
-rw-r--r--Documentation/infiniband/user_mad.txt4
-rw-r--r--Documentation/infiniband/user_verbs.txt2
-rw-r--r--Documentation/isdn/INTERFACE.CAPI83
-rw-r--r--Documentation/kernel-parameters.txt1
-rw-r--r--Documentation/networking/pktgen.txt8
-rw-r--r--Documentation/scsi/hptiop.txt21
-rw-r--r--Documentation/vm/ksm.txt13
-rw-r--r--Documentation/vm/page-types.c304
-rw-r--r--Documentation/vm/pagemap.txt8
11 files changed, 345 insertions, 126 deletions
diff --git a/Documentation/cgroups/cgroups.txt b/Documentation/cgroups/cgroups.txt
index 455d4e6d346..0b33bfe7dde 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -227,7 +227,14 @@ as the path relative to the root of the cgroup file system.
Each cgroup is represented by a directory in the cgroup file system
containing the following files describing that cgroup:
- - tasks: list of tasks (by pid) attached to that cgroup
+ - tasks: list of tasks (by pid) attached to that cgroup. This list
+ is not guaranteed to be sorted. Writing a thread id into this file
+ moves the thread into this cgroup.
+ - cgroup.procs: list of tgids in the cgroup. This list is not
+ guaranteed to be sorted or free of duplicate tgids, and userspace
+ should sort/uniquify the list if this property is required.
+ Writing a tgid into this file moves all threads with that tgid into
+ this cgroup.
- notify_on_release flag: run the release agent on exit?
- release_agent: the path to use for release notifications (this file
exists in the top cgroup only)
@@ -374,7 +381,7 @@ Now you want to do something with this cgroup.
In this directory you can find several files:
# ls
-notify_on_release tasks
+cgroup.procs notify_on_release tasks
(plus whatever files added by the attached subsystems)
Now attach your shell to this cgroup:
diff --git a/Documentation/filesystems/ext3.txt b/Documentation/filesystems/ext3.txt
index 570f9bd9be2..05d5cf1d743 100644
--- a/Documentation/filesystems/ext3.txt
+++ b/Documentation/filesystems/ext3.txt
@@ -123,10 +123,18 @@ resuid=n The user ID which may use the reserved blocks.
sb=n Use alternate superblock at this location.
-quota
-noquota
-grpquota
-usrquota
+quota These options are ignored by the filesystem. They
+noquota are used only by quota tools to recognize volumes
+grpquota where quota should be turned on. See documentation
+usrquota in the quota-tools package for more details
+ (http://sourceforge.net/projects/linuxquota).
+
+jqfmt=<quota type> These options tell filesystem details about quota
+usrjquota=<file> so that quota information can be properly updated
+grpjquota=<file> during journal replay. They replace the above
+ quota options. See documentation in the quota-tools
+ package for more details
+ (http://sourceforge.net/projects/linuxquota).
bh (*) ext3 associates buffer heads to data pages to
nobh (a) cache disk block mapping information
diff --git a/Documentation/infiniband/user_mad.txt b/Documentation/infiniband/user_mad.txt
index 744687dd195..8a366959f5c 100644
--- a/Documentation/infiniband/user_mad.txt
+++ b/Documentation/infiniband/user_mad.txt
@@ -128,8 +128,8 @@ Setting IsSM Capability Bit
To create the appropriate character device files automatically with
udev, a rule like
- KERNEL="umad*", NAME="infiniband/%k"
- KERNEL="issm*", NAME="infiniband/%k"
+ KERNEL=="umad*", NAME="infiniband/%k"
+ KERNEL=="issm*", NAME="infiniband/%k"
can be used. This will create device nodes named
diff --git a/Documentation/infiniband/user_verbs.txt b/Documentation/infiniband/user_verbs.txt
index f847501e50b..afe3f8da901 100644
--- a/Documentation/infiniband/user_verbs.txt
+++ b/Documentation/infiniband/user_verbs.txt
@@ -58,7 +58,7 @@ Memory pinning
To create the appropriate character device files automatically with
udev, a rule like
- KERNEL="uverbs*", NAME="infiniband/%k"
+ KERNEL=="uverbs*", NAME="infiniband/%k"
can be used. This will create device nodes named
diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI
index 686e107923e..5fe8de5cc72 100644
--- a/Documentation/isdn/INTERFACE.CAPI
+++ b/Documentation/isdn/INTERFACE.CAPI
@@ -60,10 +60,9 @@ open() operation on regular files or character devices.
After a successful return from register_appl(), CAPI messages from the
application may be passed to the driver for the device via calls to the
-send_message() callback function. The CAPI message to send is stored in the
-data portion of an skb. Conversely, the driver may call Kernel CAPI's
-capi_ctr_handle_message() function to pass a received CAPI message to Kernel
-CAPI for forwarding to an application, specifying its ApplID.
+send_message() callback function. Conversely, the driver may call Kernel
+CAPI's capi_ctr_handle_message() function to pass a received CAPI message to
+Kernel CAPI for forwarding to an application, specifying its ApplID.
Deregistration requests (CAPI operation CAPI_RELEASE) from applications are
forwarded as calls to the release_appl() callback function, passing the same
@@ -142,6 +141,7 @@ u16 (*send_message)(struct capi_ctr *ctrlr, struct sk_buff *skb)
to accepting or queueing the message. Errors occurring during the
actual processing of the message should be signaled with an
appropriate reply message.
+ May be called in process or interrupt context.
Calls to this function are not serialized by Kernel CAPI, ie. it must
be prepared to be re-entered.
@@ -154,7 +154,8 @@ read_proc_t *ctr_read_proc
system entry, /proc/capi/controllers/<n>; will be called with a
pointer to the device's capi_ctr structure as the last (data) argument
-Note: Callback functions are never called in interrupt context.
+Note: Callback functions except send_message() are never called in interrupt
+context.
- to be filled in before calling capi_ctr_ready():
@@ -171,14 +172,40 @@ u8 serial[CAPI_SERIAL_LEN]
value to return for CAPI_GET_SERIAL
-4.3 The _cmsg Structure
+4.3 SKBs
+
+CAPI messages are passed between Kernel CAPI and the driver via send_message()
+and capi_ctr_handle_message(), stored in the data portion of a socket buffer
+(skb). Each skb contains a single CAPI message coded according to the CAPI 2.0
+standard.
+
+For the data transfer messages, DATA_B3_REQ and DATA_B3_IND, the actual
+payload data immediately follows the CAPI message itself within the same skb.
+The Data and Data64 parameters are not used for processing. The Data64
+parameter may be omitted by setting the length field of the CAPI message to 22
+instead of 30.
+
+
+4.4 The _cmsg Structure
(declared in <linux/isdn/capiutil.h>)
The _cmsg structure stores the contents of a CAPI 2.0 message in an easily
-accessible form. It contains members for all possible CAPI 2.0 parameters, of
-which only those appearing in the message type currently being processed are
-actually used. Unused members should be set to zero.
+accessible form. It contains members for all possible CAPI 2.0 parameters,
+including subparameters of the Additional Info and B Protocol structured
+parameters, with the following exceptions:
+
+* second Calling party number (CONNECT_IND)
+
+* Data64 (DATA_B3_REQ and DATA_B3_IND)
+
+* Sending complete (subparameter of Additional Info, CONNECT_REQ and INFO_REQ)
+
+* Global Configuration (subparameter of B Protocol, CONNECT_REQ, CONNECT_RESP
+ and SELECT_B_PROTOCOL_REQ)
+
+Only those parameters appearing in the message type currently being processed
+are actually used. Unused members should be set to zero.
Members are named after the CAPI 2.0 standard names of the parameters they
represent. See <linux/isdn/capiutil.h> for the exact spelling. Member data
@@ -190,18 +217,19 @@ u16 for CAPI parameters of type 'word'
u32 for CAPI parameters of type 'dword'
-_cstruct for CAPI parameters of type 'struct' not containing any
- variably-sized (struct) subparameters (eg. 'Called Party Number')
+_cstruct for CAPI parameters of type 'struct'
The member is a pointer to a buffer containing the parameter in
CAPI encoding (length + content). It may also be NULL, which will
be taken to represent an empty (zero length) parameter.
+ Subparameters are stored in encoded form within the content part.
-_cmstruct for CAPI parameters of type 'struct' containing 'struct'
- subparameters ('Additional Info' and 'B Protocol')
+_cmstruct alternative representation for CAPI parameters of type 'struct'
+ (used only for the 'Additional Info' and 'B Protocol' parameters)
The representation is a single byte containing one of the values:
- CAPI_DEFAULT: the parameter is empty
- CAPI_COMPOSE: the values of the subparameters are stored
- individually in the corresponding _cmsg structure members
+ CAPI_DEFAULT: The parameter is empty/absent.
+ CAPI_COMPOSE: The parameter is present.
+ Subparameter values are stored individually in the corresponding
+ _cmsg structure members.
Functions capi_cmsg2message() and capi_message2cmsg() are provided to convert
messages between their transport encoding described in the CAPI 2.0 standard
@@ -297,3 +325,26 @@ char *capi_cmd2str(u8 Command, u8 Subcommand)
be NULL if the command/subcommand is not one of those defined in the
CAPI 2.0 standard.
+
+7. Debugging
+
+The module kernelcapi has a module parameter showcapimsgs controlling some
+debugging output produced by the module. It can only be set when the module is
+loaded, via a parameter "showcapimsgs=<n>" to the modprobe command, either on
+the command line or in the configuration file.
+
+If the lowest bit of showcapimsgs is set, kernelcapi logs controller and
+application up and down events.
+
+In addition, every registered CAPI controller has an associated traceflag
+parameter controlling how CAPI messages sent from and to tha controller are
+logged. The traceflag parameter is initialized with the value of the
+showcapimsgs parameter when the controller is registered, but can later be
+changed via the MANUFACTURER_REQ command KCAPI_CMD_TRACE.
+
+If the value of traceflag is non-zero, CAPI messages are logged.
+DATA_B3 messages are only logged if the value of traceflag is > 2.
+
+If the lowest bit of traceflag is set, only the command/subcommand and message
+length are logged. Otherwise, kernelcapi logs a readable representation of
+the entire message.
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6fa7292947e..9107b387e91 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -671,6 +671,7 @@ and is between 256 and 4096 characters. It is defined in the file
earlyprintk= [X86,SH,BLACKFIN]
earlyprintk=vga
earlyprintk=serial[,ttySn[,baudrate]]
+ earlyprintk=ttySn[,baudrate]
earlyprintk=dbgp[debugController#]
Append ",keep" to not disable it when the real console
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
index c6cf4a3c16e..61bb645d50e 100644
--- a/Documentation/networking/pktgen.txt
+++ b/Documentation/networking/pktgen.txt
@@ -90,6 +90,11 @@ Examples:
pgset "dstmac 00:00:00:00:00:00" sets MAC destination address
pgset "srcmac 00:00:00:00:00:00" sets MAC source address
+ pgset "queue_map_min 0" Sets the min value of tx queue interval
+ pgset "queue_map_max 7" Sets the max value of tx queue interval, for multiqueue devices
+ To select queue 1 of a given device,
+ use queue_map_min=1 and queue_map_max=1
+
pgset "src_mac_count 1" Sets the number of MACs we'll range through.
The 'minimum' MAC is what you set with srcmac.
@@ -101,6 +106,9 @@ Examples:
IPDST_RND, UDPSRC_RND,
UDPDST_RND, MACSRC_RND, MACDST_RND
MPLS_RND, VID_RND, SVID_RND
+ QUEUE_MAP_RND # queue map random
+ QUEUE_MAP_CPU # queue map mirrors smp_processor_id()
+
pgset "udp_src_min 9" set UDP source port min, If < udp_src_max, then
cycle through the port range.
diff --git a/Documentation/scsi/hptiop.txt b/Documentation/scsi/hptiop.txt
index a6eb4add1be..9605179711f 100644
--- a/Documentation/scsi/hptiop.txt
+++ b/Documentation/scsi/hptiop.txt
@@ -3,6 +3,25 @@ HIGHPOINT ROCKETRAID 3xxx/4xxx ADAPTER DRIVER (hptiop)
Controller Register Map
-------------------------
+For RR44xx Intel IOP based adapters, the controller IOP is accessed via PCI BAR0 and BAR2:
+
+ BAR0 offset Register
+ 0x11C5C Link Interface IRQ Set
+ 0x11C60 Link Interface IRQ Clear
+
+ BAR2 offset Register
+ 0x10 Inbound Message Register 0
+ 0x14 Inbound Message Register 1
+ 0x18 Outbound Message Register 0
+ 0x1C Outbound Message Register 1
+ 0x20 Inbound Doorbell Register
+ 0x24 Inbound Interrupt Status Register
+ 0x28 Inbound Interrupt Mask Register
+ 0x30 Outbound Interrupt Status Register
+ 0x34 Outbound Interrupt Mask Register
+ 0x40 Inbound Queue Port
+ 0x44 Outbound Queue Port
+
For Intel IOP based adapters, the controller IOP is accessed via PCI BAR0:
BAR0 offset Register
@@ -93,7 +112,7 @@ The driver exposes following sysfs attributes:
-----------------------------------------------------------------------------
-Copyright (C) 2006-2007 HighPoint Technologies, Inc. All Rights Reserved.
+Copyright (C) 2006-2009 HighPoint Technologies, Inc. All Rights Reserved.
This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
diff --git a/Documentation/vm/ksm.txt b/Documentation/vm/ksm.txt
index 72a22f65960..262d8e6793a 100644
--- a/Documentation/vm/ksm.txt
+++ b/Documentation/vm/ksm.txt
@@ -52,15 +52,15 @@ The KSM daemon is controlled by sysfs files in /sys/kernel/mm/ksm/,
readable by all but writable only by root:
max_kernel_pages - set to maximum number of kernel pages that KSM may use
- e.g. "echo 2000 > /sys/kernel/mm/ksm/max_kernel_pages"
+ e.g. "echo 100000 > /sys/kernel/mm/ksm/max_kernel_pages"
Value 0 imposes no limit on the kernel pages KSM may use;
but note that any process using MADV_MERGEABLE can cause
KSM to allocate these pages, unswappable until it exits.
- Default: 2000 (chosen for demonstration purposes)
+ Default: quarter of memory (chosen to not pin too much)
pages_to_scan - how many present pages to scan before ksmd goes to sleep
- e.g. "echo 200 > /sys/kernel/mm/ksm/pages_to_scan"
- Default: 200 (chosen for demonstration purposes)
+ e.g. "echo 100 > /sys/kernel/mm/ksm/pages_to_scan"
+ Default: 100 (chosen for demonstration purposes)
sleep_millisecs - how many milliseconds ksmd should sleep before next scan
e.g. "echo 20 > /sys/kernel/mm/ksm/sleep_millisecs"
@@ -70,7 +70,8 @@ run - set 0 to stop ksmd from running but keep merged pages,
set 1 to run ksmd e.g. "echo 1 > /sys/kernel/mm/ksm/run",
set 2 to stop ksmd and unmerge all pages currently merged,
but leave mergeable areas registered for next run
- Default: 1 (for immediate use by apps which register)
+ Default: 0 (must be changed to 1 to activate KSM,
+ except if CONFIG_SYSFS is disabled)
The effectiveness of KSM and MADV_MERGEABLE is shown in /sys/kernel/mm/ksm/:
@@ -86,4 +87,4 @@ pages_volatile embraces several different kinds of activity, but a high
proportion there would also indicate poor use of madvise MADV_MERGEABLE.
Izik Eidus,
-Hugh Dickins, 30 July 2009
+Hugh Dickins, 24 Sept 2009
diff --git a/Documentation/vm/page-types.c b/Documentation/vm/page-types.c
index fa1a30d9e9d..3ec4f2a2258 100644
--- a/Documentation/vm/page-types.c
+++ b/Documentation/vm/page-types.c
@@ -2,7 +2,10 @@
* page-types: Tool for querying page flags
*
* Copyright (C) 2009 Intel corporation
- * Copyright (C) 2009 Wu Fengguang <fengguang.wu@intel.com>
+ *
+ * Authors: Wu Fengguang <fengguang.wu@intel.com>
+ *
+ * Released under the General Public License (GPL).
*/
#define _LARGEFILE64_SOURCE
@@ -69,7 +72,9 @@
#define KPF_COMPOUND_TAIL 16
#define KPF_HUGE 17
#define KPF_UNEVICTABLE 18
+#define KPF_HWPOISON 19
#define KPF_NOPAGE 20
+#define KPF_KSM 21
/* [32-] kernel hacking assistances */
#define KPF_RESERVED 32
@@ -116,7 +121,9 @@ static char *page_flag_names[] = {
[KPF_COMPOUND_TAIL] = "T:compound_tail",
[KPF_HUGE] = "G:huge",
[KPF_UNEVICTABLE] = "u:unevictable",
+ [KPF_HWPOISON] = "X:hwpoison",
[KPF_NOPAGE] = "n:nopage",
+ [KPF_KSM] = "x:ksm",
[KPF_RESERVED] = "r:reserved",
[KPF_MLOCKED] = "m:mlocked",
@@ -152,9 +159,6 @@ static unsigned long opt_size[MAX_ADDR_RANGES];
static int nr_vmas;
static unsigned long pg_start[MAX_VMAS];
static unsigned long pg_end[MAX_VMAS];
-static unsigned long voffset;
-
-static int pagemap_fd;
#define MAX_BIT_FILTERS 64
static int nr_bit_filters;
@@ -163,9 +167,16 @@ static uint64_t opt_bits[MAX_BIT_FILTERS];
static int page_size;
-#define PAGES_BATCH (64 << 10) /* 64k pages */
+static int pagemap_fd;
static int kpageflags_fd;
+static int opt_hwpoison;
+static int opt_unpoison;
+
+static char *hwpoison_debug_fs = "/debug/hwpoison";
+static int hwpoison_inject_fd;
+static int hwpoison_forget_fd;
+
#define HASH_SHIFT 13
#define HASH_SIZE (1 << HASH_SHIFT)
#define HASH_MASK (HASH_SIZE - 1)
@@ -207,6 +218,74 @@ static void fatal(const char *x, ...)
exit(EXIT_FAILURE);
}
+int checked_open(const char *pathname, int flags)
+{
+ int fd = open(pathname, flags);
+
+ if (fd < 0) {
+ perror(pathname);
+ exit(EXIT_FAILURE);
+ }
+
+ return fd;
+}
+
+/*
+ * pagemap/kpageflags routines
+ */
+
+static unsigned long do_u64_read(int fd, char *name,
+ uint64_t *buf,
+ unsigned long index,
+ unsigned long count)
+{
+ long bytes;
+
+ if (index > ULONG_MAX / 8)
+ fatal("index overflow: %lu\n", index);
+
+ if (lseek(fd, index * 8, SEEK_SET) < 0) {
+ perror(name);
+ exit(EXIT_FAILURE);
+ }
+
+ bytes = read(fd, buf, count * 8);
+ if (bytes < 0) {
+ perror(name);
+ exit(EXIT_FAILURE);
+ }
+ if (bytes % 8)
+ fatal("partial read: %lu bytes\n", bytes);
+
+ return bytes / 8;
+}
+
+static unsigned long kpageflags_read(uint64_t *buf,
+ unsigned long index,
+ unsigned long pages)
+{
+ return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
+}
+
+static unsigned long pagemap_read(uint64_t *buf,
+ unsigned long index,
+ unsigned long pages)
+{
+ return do_u64_read(pagemap_fd, "/proc/pid/pagemap", buf, index, pages);
+}
+
+static unsigned long pagemap_pfn(uint64_t val)
+{
+ unsigned long pfn;
+
+ if (val & PM_PRESENT)
+ pfn = PM_PFRAME(val);
+ else
+ pfn = 0;
+
+ return pfn;
+}
+
/*
* page flag names
@@ -255,7 +334,8 @@ static char *page_flag_longname(uint64_t flags)
* page list and summary
*/
-static void show_page_range(unsigned long offset, uint64_t flags)
+static void show_page_range(unsigned long voffset,
+ unsigned long offset, uint64_t flags)
{
static uint64_t flags0;
static unsigned long voff;
@@ -281,7 +361,8 @@ static void show_page_range(unsigned long offset, uint64_t flags)
count = 1;
}
-static void show_page(unsigned long offset, uint64_t flags)
+static void show_page(unsigned long voffset,
+ unsigned long offset, uint64_t flags)
{
if (opt_pid)
printf("%lx\t", voffset);
@@ -362,6 +443,62 @@ static uint64_t well_known_flags(uint64_t flags)
return flags;
}
+static uint64_t kpageflags_flags(uint64_t flags)
+{
+ flags = expand_overloaded_flags(flags);
+
+ if (!opt_raw)
+ flags = well_known_flags(flags);
+
+ return flags;
+}
+
+/*
+ * page actions
+ */
+
+static void prepare_hwpoison_fd(void)
+{
+ char buf[100];
+
+ if (opt_hwpoison && !hwpoison_inject_fd) {
+ sprintf(buf, "%s/corrupt-pfn", hwpoison_debug_fs);
+ hwpoison_inject_fd = checked_open(buf, O_WRONLY);
+ }
+
+ if (opt_unpoison && !hwpoison_forget_fd) {
+ sprintf(buf, "%s/renew-pfn", hwpoison_debug_fs);
+ hwpoison_forget_fd = checked_open(buf, O_WRONLY);
+ }
+}
+
+static int hwpoison_page(unsigned long offset)
+{
+ char buf[100];
+ int len;
+
+ len = sprintf(buf, "0x%lx\n", offset);
+ len = write(hwpoison_inject_fd, buf, len);
+ if (len < 0) {
+ perror("hwpoison inject");
+ return len;
+ }
+ return 0;
+}
+
+static int unpoison_page(unsigned long offset)
+{
+ char buf[100];
+ int len;
+
+ len = sprintf(buf, "0x%lx\n", offset);
+ len = write(hwpoison_forget_fd, buf, len);
+ if (len < 0) {
+ perror("hwpoison forget");
+ return len;
+ }
+ return 0;
+}
/*
* page frame walker
@@ -394,104 +531,83 @@ static int hash_slot(uint64_t flags)
exit(EXIT_FAILURE);
}
-static void add_page(unsigned long offset, uint64_t flags)
+static void add_page(unsigned long voffset,
+ unsigned long offset, uint64_t flags)
{
- flags = expand_overloaded_flags(flags);
-
- if (!opt_raw)
- flags = well_known_flags(flags);
+ flags = kpageflags_flags(flags);
if (!bit_mask_ok(flags))
return;
+ if (opt_hwpoison)
+ hwpoison_page(offset);
+ if (opt_unpoison)
+ unpoison_page(offset);
+
if (opt_list == 1)
- show_page_range(offset, flags);
+ show_page_range(voffset, offset, flags);
else if (opt_list == 2)
- show_page(offset, flags);
+ show_page(voffset, offset, flags);
nr_pages[hash_slot(flags)]++;
total_pages++;
}
-static void walk_pfn(unsigned long index, unsigned long count)
+#define KPAGEFLAGS_BATCH (64 << 10) /* 64k pages */
+static void walk_pfn(unsigned long voffset,
+ unsigned long index,
+ unsigned long count)
{
+ uint64_t buf[KPAGEFLAGS_BATCH];
unsigned long batch;
- unsigned long n;
+ unsigned long pages;
unsigned long i;
- if (index > ULONG_MAX / KPF_BYTES)
- fatal("index overflow: %lu\n", index);
-
- lseek(kpageflags_fd, index * KPF_BYTES, SEEK_SET);
-
while (count) {
- uint64_t kpageflags_buf[KPF_BYTES * PAGES_BATCH];
-
- batch = min_t(unsigned long, count, PAGES_BATCH);
- n = read(kpageflags_fd, kpageflags_buf, batch * KPF_BYTES);
- if (n == 0)
+ batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH);
+ pages = kpageflags_read(buf, index, batch);
+ if (pages == 0)
break;
- if (n < 0) {
- perror(PROC_KPAGEFLAGS);
- exit(EXIT_FAILURE);
- }
- if (n % KPF_BYTES != 0)
- fatal("partial read: %lu bytes\n", n);
- n = n / KPF_BYTES;
+ for (i = 0; i < pages; i++)
+ add_page(voffset + i, index + i, buf[i]);
- for (i = 0; i < n; i++)
- add_page(index + i, kpageflags_buf[i]);
-
- index += batch;
- count -= batch;
+ index += pages;
+ count -= pages;
}
}
-
-#define PAGEMAP_BATCH 4096
-static unsigned long task_pfn(unsigned long pgoff)
+#define PAGEMAP_BATCH (64 << 10)
+static void walk_vma(unsigned long index, unsigned long count)
{
- static uint64_t buf[PAGEMAP_BATCH];
- static unsigned long start;
- static long count;
- uint64_t pfn;
+ uint64_t buf[PAGEMAP_BATCH];
+ unsigned long batch;
+ unsigned long pages;
+ unsigned long pfn;
+ unsigned long i;
- if (pgoff < start || pgoff >= start + count) {
- if (lseek64(pagemap_fd,
- (uint64_t)pgoff * PM_ENTRY_BYTES,
- SEEK_SET) < 0) {
- perror("pagemap seek");
- exit(EXIT_FAILURE);
- }
- count = read(pagemap_fd, buf, sizeof(buf));
- if (count == 0)
- return 0;
- if (count < 0) {
- perror("pagemap read");
- exit(EXIT_FAILURE);
- }
- if (count % PM_ENTRY_BYTES) {
- fatal("pagemap read not aligned.\n");
- exit(EXIT_FAILURE);
- }
- count /= PM_ENTRY_BYTES;
- start = pgoff;
- }
+ while (count) {
+ batch = min_t(unsigned long, count, PAGEMAP_BATCH);
+ pages = pagemap_read(buf, index, batch);
+ if (pages == 0)
+ break;
- pfn = buf[pgoff - start];
- if (pfn & PM_PRESENT)
- pfn = PM_PFRAME(pfn);
- else
- pfn = 0;
+ for (i = 0; i < pages; i++) {
+ pfn = pagemap_pfn(buf[i]);
+ if (pfn)
+ walk_pfn(index + i, pfn, 1);
+ }
- return pfn;
+ index += pages;
+ count -= pages;
+ }
}
static void walk_task(unsigned long index, unsigned long count)
{
- int i = 0;
const unsigned long end = index + count;
+ unsigned long start;
+ int i = 0;
while (index < end) {
@@ -501,15 +617,11 @@ static void walk_task(unsigned long index, unsigned long count)
if (pg_start[i] >= end)
return;
- voffset = max_t(unsigned long, pg_start[i], index);
- index = min_t(unsigned long, pg_end[i], end);
+ start = max_t(unsigned long, pg_start[i], index);
+ index = min_t(unsigned long, pg_end[i], end);
- assert(voffset < index);
- for (; voffset < index; voffset++) {
- unsigned long pfn = task_pfn(voffset);
- if (pfn)
- walk_pfn(pfn, 1);
- }
+ assert(start < index);
+ walk_vma(start, index - start);
}
}
@@ -527,18 +639,14 @@ static void walk_addr_ranges(void)
{
int i;
- kpageflags_fd = open(PROC_KPAGEFLAGS, O_RDONLY);
- if (kpageflags_fd < 0) {
- perror(PROC_KPAGEFLAGS);
- exit(EXIT_FAILURE);
- }
+ kpageflags_fd = checked_open(PROC_KPAGEFLAGS, O_RDONLY);
if (!nr_addr_ranges)
add_addr_range(0, ULONG_MAX);
for (i = 0; i < nr_addr_ranges; i++)
if (!opt_pid)
- walk_pfn(opt_offset[i], opt_size[i]);
+ walk_pfn(0, opt_offset[i], opt_size[i]);
else
walk_task(opt_offset[i], opt_size[i]);
@@ -575,6 +683,8 @@ static void usage(void)
" -l|--list Show page details in ranges\n"
" -L|--list-each Show page details one by one\n"
" -N|--no-summary Don't show summay info\n"
+" -X|--hwpoison hwpoison pages\n"
+" -x|--unpoison unpoison pages\n"
" -h|--help Show this usage message\n"
"addr-spec:\n"
" N one page at offset N (unit: pages)\n"
@@ -624,11 +734,7 @@ static void parse_pid(const char *str)
opt_pid = parse_number(str);
sprintf(buf, "/proc/%d/pagemap", opt_pid);
- pagemap_fd = open(buf, O_RDONLY);
- if (pagemap_fd < 0) {
- perror(buf);
- exit(EXIT_FAILURE);
- }
+ pagemap_fd = checked_open(buf, O_RDONLY);
sprintf(buf, "/proc/%d/maps", opt_pid);
file = fopen(buf, "r");
@@ -788,6 +894,8 @@ static struct option opts[] = {
{ "list" , 0, NULL, 'l' },
{ "list-each" , 0, NULL, 'L' },
{ "no-summary", 0, NULL, 'N' },
+ { "hwpoison" , 0, NULL, 'X' },
+ { "unpoison" , 0, NULL, 'x' },
{ "help" , 0, NULL, 'h' },
{ NULL , 0, NULL, 0 }
};
@@ -799,7 +907,7 @@ int main(int argc, char *argv[])
page_size = getpagesize();
while ((c = getopt_long(argc, argv,
- "rp:f:a:b:lLNh", opts, NULL)) != -1) {
+ "rp:f:a:b:lLNXxh", opts, NULL)) != -1) {
switch (c) {
case 'r':
opt_raw = 1;
@@ -825,6 +933,14 @@ int main(int argc, char *argv[])
case 'N':
opt_no_summary = 1;
break;
+ case 'X':
+ opt_hwpoison = 1;
+ prepare_hwpoison_fd();
+ break;
+ case 'x':
+ opt_unpoison = 1;
+ prepare_hwpoison_fd();
+ break;
case 'h':
usage();
exit(0);
@@ -844,7 +960,7 @@ int main(int argc, char *argv[])
walk_addr_ranges();
if (opt_list == 1)
- show_page_range(0, 0); /* drain the buffer */
+ show_page_range(0, 0, 0); /* drain the buffer */
if (opt_no_summary)
return 0;
diff --git a/Documentation/vm/pagemap.txt b/Documentation/vm/pagemap.txt
index 600a304a828..df09b9650a8 100644
--- a/Documentation/vm/pagemap.txt
+++ b/Documentation/vm/pagemap.txt
@@ -57,7 +57,9 @@ There are three components to pagemap:
16. COMPOUND_TAIL
16. HUGE
18. UNEVICTABLE
+ 19. HWPOISON
20. NOPAGE
+ 21. KSM
Short descriptions to the page flags:
@@ -86,9 +88,15 @@ Short descriptions to the page flags:
17. HUGE
this is an integral part of a HugeTLB page
+19. HWPOISON
+ hardware detected memory corruption on this page: don't touch the data!
+
20. NOPAGE
no page frame exists at the requested address
+21. KSM
+ identical memory pages dynamically shared between one or more processes
+
[IO related page flags]
1. ERROR IO error occurred
3. UPTODATE page has up-to-date data