Merge branch 'linus' into tracing/core

Conflicts: kernel/trace/trace_events_filter.c We use the tracing/core version. Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2009-08-11 14:19:09 +0200
committer: Ingo Molnar <mingo@elte.hu> 2009-08-11 14:19:09 +0200
commit: 89034bc2c7b839702c00a704e79d112737f98be0 (patch)
tree: e65b1f3d4c751baa840efc81bc4734f089379eb3 /Documentation
parent: fb82ad719831db58e9baa4c67015aae3fe27e7e3 (diff)
parent: 85dfd81dc57e8183a277ddd7a56aa65c96f3f487 (diff)
16 files changed, 568 insertions, 394 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index cbbd3e06994..5f3bedaf8e3 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -94,28 +94,37 @@ What:		/sys/block/<disk>/queue/physical_block_size
 Date:		May 2009
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
-		This is the smallest unit the storage device can write
-		without resorting to read-modify-write operation.  It is
-		usually the same as the logical block size but may be
-		bigger.  One example is SATA drives with 4KB sectors
-		that expose a 512-byte logical block size to the
-		operating system.
+		This is the smallest unit a physical storage device can
+		write atomically.  It is usually the same as the logical
+		block size but may be bigger.  One example is SATA
+		drives with 4KB sectors that expose a 512-byte logical
+		block size to the operating system.  For stacked block
+		devices the physical_block_size variable contains the
+		maximum physical_block_size of the component devices.
 
 What:		/sys/block/<disk>/queue/minimum_io_size
 Date:		April 2009
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
-		Storage devices may report a preferred minimum I/O size,
-		which is the smallest request the device can perform
-		without incurring a read-modify-write penalty.  For disk
-		drives this is often the physical block size.  For RAID
-		arrays it is often the stripe chunk size.
+		Storage devices may report a granularity or preferred
+		minimum I/O size which is the smallest request the
+		device can perform without incurring a performance
+		penalty.  For disk drives this is often the physical
+		block size.  For RAID arrays it is often the stripe
+		chunk size.  A properly aligned multiple of
+		minimum_io_size is the preferred request size for
+		workloads where a high number of I/O operations is
+		desired.
 
 What:		/sys/block/<disk>/queue/optimal_io_size
 Date:		April 2009
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
 Description:
 		Storage devices may report an optimal I/O size, which is
-		the device's preferred unit of receiving I/O.  This is
-		rarely reported for disk drives.  For RAID devices it is
-		usually the stripe width or the internal block size.
+		the device's preferred unit for sustained I/O.  This is
+		rarely reported for disk drives.  For RAID arrays it is
+		usually the stripe width or the internal track size.  A
+		properly aligned multiple of optimal_io_size is the
+		preferred request size for workloads where sustained
+		throughput is desired.  If no optimal I/O size is
+		reported this file contains 0.
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index a50d6cd5857..992e67e6be7 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -449,8 +449,8 @@ printk(KERN_INFO "i = %u\n", i);
    </para>
 
    <programlisting>
-__u32 ipaddress;
-printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress));
+__be32 ipaddress;
+printk(KERN_INFO "my ip: %pI4\n", &amp;ipaddress);
    </programlisting>
 
    <para>
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 93cb28d05dc..18f9651ff23 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -83,11 +83,12 @@ not detect it missed following items in original chain.
 obj = kmem_cache_alloc(...);
 lock_chain(); // typically a spin_lock()
 obj->key = key;
-atomic_inc(&obj->refcnt);
 /*
  * we need to make sure obj->key is updated before obj->next
+ * or obj->refcnt
  */
 smp_wmb();
+atomic_set(&obj->refcnt, 1);
 hlist_add_head_rcu(&obj->obj_node, list);
 unlock_chain(); // typically a spin_unlock()
 
@@ -159,6 +160,10 @@ out:
 obj = kmem_cache_alloc(cachep);
 lock_chain(); // typically a spin_lock()
 obj->key = key;
+/*
+ * changes to obj->key must be visible before refcnt one
+ */
+smp_wmb();
 atomic_set(&obj->refcnt, 1);
 /*
  * insert obj in RCU way (readers might be traversing chain)
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
index 43cb1004d35..9d58c7c5edd 100644
--- a/Documentation/arm/memory.txt
+++ b/Documentation/arm/memory.txt
@@ -21,6 +21,8 @@ ffff8000	ffffffff	copy_user_page / clear_user_page use.
 				For SA11xx and Xscale, this is used to
 				setup a minicache mapping.
 
+ffff4000	ffffffff	cache aliasing on ARMv6 and later CPUs.
+
 ffff1000	ffff7fff	Reserved.
 				Platforms must not use this address range.
 
diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index f688eba8770..6a5be5d5c8e 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -1,7 +1,7 @@
 /*
  * 	cn_test.c
  * 
- * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * 2004+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
  * All rights reserved.
  * 
  * This program is free software; you can redistribute it and/or modify
@@ -194,5 +194,5 @@ module_init(cn_test_init);
 module_exit(cn_test_fini);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Connector's test module");
diff --git a/Documentation/connector/ucon.c b/Documentation/connector/ucon.c
index d738cde2a8d..c5092ad0ce4 100644
--- a/Documentation/connector/ucon.c
+++ b/Documentation/connector/ucon.c
@@ -1,7 +1,7 @@
 /*
  * 	ucon.c
  *
- * Copyright (c) 2004+ Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004+ Evgeniy Polyakov <zbr@ioremap.net>
  *
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index 7e81e37c0b1..b245d524d56 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -23,7 +23,8 @@ interface.
 Using sysfs
 ~~~~~~~~~~~
 
-sysfs is always compiled in. You can access it by doing:
+sysfs is always compiled in if CONFIG_SYSFS is defined. You can access
+it by doing:
 
     mount -t sysfs sysfs /sys 
 
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 7bb0d934b6d..dbea4f95fc8 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -139,6 +139,7 @@ Code	Seq#	Include File		Comments
 'm'	all	linux/synclink.h	conflict!
 'm'	00-1F	net/irda/irmod.h	conflict!
 'n'	00-7F	linux/ncp_fs.h
+'n'	80-8F	linux/nilfs2_fs.h	NILFS2
 'n'	E0-FF	video/matrox.h          matroxfb
 'o'	00-1F	fs/ocfs2/ocfs2_fs.h	OCFS2
 'o'     00-03   include/mtd/ubi-user.h  conflict! (OCFS2 and UBI overlaps)
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index f2296ecedb8..e2ddcdeb61b 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -36,8 +36,6 @@ detailed description):
 	- Bluetooth enable and disable
 	- video output switching, expansion control
 	- ThinkLight on and off
-	- limited docking and undocking
-	- UltraBay eject
 	- CMOS/UCMS control
 	- LED control
 	- ACPI sounds
@@ -729,131 +727,6 @@ cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
 It is impossible to know if the status returned through sysfs is valid.
 
 
-Docking / undocking -- /proc/acpi/ibm/dock
-------------------------------------------
-
-Docking and undocking (e.g. with the X4 UltraBase) requires some
-actions to be taken by the operating system to safely make or break
-the electrical connections with the dock.
-
-The docking feature of this driver generates the following ACPI events:
-
-	ibm/dock GDCK 00000003 00000001 -- eject request
-	ibm/dock GDCK 00000003 00000002 -- undocked
-	ibm/dock GDCK 00000000 00000003 -- docked
-
-NOTE: These events will only be generated if the laptop was docked
-when originally booted. This is due to the current lack of support for
-hot plugging of devices in the Linux ACPI framework. If the laptop was
-booted while not in the dock, the following message is shown in the
-logs:
-
-	Mar 17 01:42:34 aero kernel: thinkpad_acpi: dock device not present
-
-In this case, no dock-related events are generated but the dock and
-undock commands described below still work. They can be executed
-manually or triggered by Fn key combinations (see the example acpid
-configuration files included in the driver tarball package available
-on the web site).
-
-When the eject request button on the dock is pressed, the first event
-above is generated. The handler for this event should issue the
-following command:
-
-	echo undock > /proc/acpi/ibm/dock
-
-After the LED on the dock goes off, it is safe to eject the laptop.
-Note: if you pressed this key by mistake, go ahead and eject the
-laptop, then dock it back in. Otherwise, the dock may not function as
-expected.
-
-When the laptop is docked, the third event above is generated. The
-handler for this event should issue the following command to fully
-enable the dock:
-
-	echo dock > /proc/acpi/ibm/dock
-
-The contents of the /proc/acpi/ibm/dock file shows the current status
-of the dock, as provided by the ACPI framework.
-
-The docking support in this driver does not take care of enabling or
-disabling any other devices you may have attached to the dock. For
-example, a CD drive plugged into the UltraBase needs to be disabled or
-enabled separately. See the provided example acpid configuration files
-for how this can be accomplished.
-
-There is no support yet for PCI devices that may be attached to a
-docking station, e.g. in the ThinkPad Dock II. The driver currently
-does not recognize, enable or disable such devices. This means that
-the only docking stations currently supported are the X-series
-UltraBase docks and "dumb" port replicators like the Mini Dock (the
-latter don't need any ACPI support, actually).
-
-
-UltraBay eject -- /proc/acpi/ibm/bay
-------------------------------------
-
-Inserting or ejecting an UltraBay device requires some actions to be
-taken by the operating system to safely make or break the electrical
-connections with the device.
-
-This feature generates the following ACPI events:
-
-	ibm/bay MSTR 00000003 00000000 -- eject request
-	ibm/bay MSTR 00000001 00000000 -- eject lever inserted
-
-NOTE: These events will only be generated if the UltraBay was present
-when the laptop was originally booted (on the X series, the UltraBay
-is in the dock, so it may not be present if the laptop was undocked).
-This is due to the current lack of support for hot plugging of devices
-in the Linux ACPI framework. If the laptop was booted without the
-UltraBay, the following message is shown in the logs:
-
-	Mar 17 01:42:34 aero kernel: thinkpad_acpi: bay device not present
-
-In this case, no bay-related events are generated but the eject
-command described below still works. It can be executed manually or
-triggered by a hot key combination.
-
-Sliding the eject lever generates the first event shown above. The
-handler for this event should take whatever actions are necessary to
-shut down the device in the UltraBay (e.g. call idectl), then issue
-the following command:
-
-	echo eject > /proc/acpi/ibm/bay
-
-After the LED on the UltraBay goes off, it is safe to pull out the
-device.
-
-When the eject lever is inserted, the second event above is
-generated. The handler for this event should take whatever actions are
-necessary to enable the UltraBay device (e.g. call idectl).
-
-The contents of the /proc/acpi/ibm/bay file shows the current status
-of the UltraBay, as provided by the ACPI framework.
-
-EXPERIMENTAL warm eject support on the 600e/x, A22p and A3x (To use
-this feature, you need to supply the experimental=1 parameter when
-loading the module):
-
-These models do not have a button near the UltraBay device to request
-a hot eject but rather require the laptop to be put to sleep
-(suspend-to-ram) before the bay device is ejected or inserted).
-The sequence of steps to eject the device is as follows:
-
-	echo eject > /proc/acpi/ibm/bay
-	put the ThinkPad to sleep
-	remove the drive
-	resume from sleep
-	cat /proc/acpi/ibm/bay should show that the drive was removed
-
-On the A3x, both the UltraBay 2000 and UltraBay Plus devices are
-supported. Use "eject2" instead of "eject" for the second bay.
-
-Note: the UltraBay eject support on the 600e/x, A22p and A3x is
-EXPERIMENTAL and may not work as expected. USE WITH CAUTION!
-
-
 CMOS/UCMS control
 -----------------
 
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 9ebcd6ef361..950cde6d6e5 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1,7 +1,9 @@
-/*P:100 This is the Launcher code, a simple program which lays out the
- * "physical" memory for the new Guest by mapping the kernel image and
- * the virtual devices, then opens /dev/lguest to tell the kernel
- * about the Guest and control it. :*/
+/*P:100
+ * This is the Launcher code, a simple program which lays out the "physical"
+ * memory for the new Guest by mapping the kernel image and the virtual
+ * devices, then opens /dev/lguest to tell the kernel about the Guest and
+ * control it.
+:*/
 #define _LARGEFILE64_SOURCE
 #define _GNU_SOURCE
 #include <stdio.h>
@@ -46,13 +48,15 @@
 #include "linux/virtio_rng.h"
 #include "linux/virtio_ring.h"
 #include "asm/bootparam.h"
-/*L:110 We can ignore the 39 include files we need for this program, but I do
- * want to draw attention to the use of kernel-style types.
+/*L:110
+ * We can ignore the 42 include files we need for this program, but I do want
+ * to draw attention to the use of kernel-style types.
  *
  * As Linus said, "C is a Spartan language, and so should your naming be."  I
  * like these abbreviations, so we define them here.  Note that u64 is always
  * unsigned long long, which works on all Linux systems: this means that we can
- * use %llu in printf for any u64. */
+ * use %llu in printf for any u64.
+ */
 typedef unsigned long long u64;
 typedef uint32_t u32;
 typedef uint16_t u16;
@@ -69,8 +73,10 @@ typedef uint8_t u8;
 /* This will occupy 3 pages: it must be a power of 2. */
 #define VIRTQUEUE_NUM 256
 
-/*L:120 verbose is both a global flag and a macro.  The C preprocessor allows
- * this, and although I wouldn't recommend it, it works quite nicely here. */
+/*L:120
+ * verbose is both a global flag and a macro.  The C preprocessor allows
+ * this, and although I wouldn't recommend it, it works quite nicely here.
+ */
 static bool verbose;
 #define verbose(args...) \
 	do { if (verbose) printf(args); } while(0)
@@ -87,8 +93,7 @@ static int lguest_fd;
 static unsigned int __thread cpu_id;
 
 /* This is our list of devices. */
-struct device_list
-{
+struct device_list {
 	/* Counter to assign interrupt numbers. */
 	unsigned int next_irq;
 
@@ -100,8 +105,7 @@ struct device_list
 
 	/* A single linked list of devices. */
 	struct device *dev;
-	/* And a pointer to the last device for easy append and also for
-	 * configuration appending. */
+	/* And a pointer to the last device for easy append. */
 	struct device *lastdev;
 };
 
@@ -109,8 +113,7 @@ struct device_list
 static struct device_list devices;
 
 /* The device structure describes a single device. */
-struct device
-{
+struct device {
 	/* The linked-list pointer. */
 	struct device *next;
 
@@ -135,8 +138,7 @@ struct device
 };
 
 /* The virtqueue structure describes a queue attached to a device. */
-struct virtqueue
-{
+struct virtqueue {
 	struct virtqueue *next;
 
 	/* Which device owns me. */
@@ -168,20 +170,24 @@ static char **main_args;
 /* The original tty settings to restore on exit. */
 static struct termios orig_term;
 
-/* We have to be careful with barriers: our devices are all run in separate
+/*
+ * We have to be careful with barriers: our devices are all run in separate
  * threads and so we need to make sure that changes visible to the Guest happen
- * in precise order. */
+ * in precise order.
+ */
 #define wmb() __asm__ __volatile__("" : : : "memory")
 #define mb() __asm__ __volatile__("" : : : "memory")
 
-/* Convert an iovec element to the given type.
+/*
+ * Convert an iovec element to the given type.
  *
  * This is a fairly ugly trick: we need to know the size of the type and
  * alignment requirement to check the pointer is kosher.  It's also nice to
  * have the name of the type in case we report failure.
  *
  * Typing those three things all the time is cumbersome and error prone, so we
- * have a macro which sets them all up and passes to the real function. */
+ * have a macro which sets them all up and passes to the real function.
+ */
 #define convert(iov, type) \
 	((type *)_convert((iov), sizeof(type), __alignof__(type), #type))
 
@@ -198,8 +204,10 @@ static void *_convert(struct iovec *iov, size_t size, size_t align,
 /* Wrapper for the last available index.  Makes it easier to change. */
 #define lg_last_avail(vq)	((vq)->last_avail_idx)
 
-/* The virtio configuration space is defined to be little-endian.  x86 is
- * little-endian too, but it's nice to be explicit so we have these helpers. */
+/*
+ * The virtio configuration space is defined to be little-endian.  x86 is
+ * little-endian too, but it's nice to be explicit so we have these helpers.
+ */
 #define cpu_to_le16(v16) (v16)
 #define cpu_to_le32(v32) (v32)
 #define cpu_to_le64(v64) (v64)
@@ -241,11 +249,12 @@ static u8 *get_feature_bits(struct device *dev)
 		+ dev->num_vq * sizeof(struct lguest_vqconfig);
 }
 
-/*L:100 The Launcher code itself takes us out into userspace, that scary place
- * where pointers run wild and free!  Unfortunately, like most userspace
- * programs, it's quite boring (which is why everyone likes to hack on the
- * kernel!).  Perhaps if you make up an Lguest Drinking Game at this point, it
- * will get you through this section.  Or, maybe not.
+/*L:100
+ * The Launcher code itself takes us out into userspace, that scary place where
+ * pointers run wild and free!  Unfortunately, like most userspace programs,
+ * it's quite boring (which is why everyone likes to hack on the kernel!).
+ * Perhaps if you make up an Lguest Drinking Game at this point, it will get
+ * you through this section.  Or, maybe not.
  *
  * The Launcher sets up a big chunk of memory to be the Guest's "physical"
  * memory and stores it in "guest_base".  In other words, Guest physical ==
@@ -253,7 +262,8 @@ static u8 *get_feature_bits(struct device *dev)
  *
  * This can be tough to get your head around, but usually it just means that we
  * use these trivial conversion functions when the Guest gives us it's
- * "physical" addresses: */
+ * "physical" addresses:
+ */
 static void *from_guest_phys(unsigned long addr)
 {
 	return guest_base + addr;
@@ -268,7 +278,8 @@ static unsigned long to_guest_phys(const void *addr)
  * Loading the Kernel.
  *
  * We start with couple of simple helper routines.  open_or_die() avoids
- * error-checking code cluttering the callers: */
+ * error-checking code cluttering the callers:
+ */
 static int open_or_die(const char *name, int flags)
 {
 	int fd = open(name, flags);
@@ -283,12 +294,19 @@ static void *map_zeroed_pages(unsigned int num)
 	int fd = open_or_die("/dev/zero", O_RDONLY);
 	void *addr;
 
-	/* We use a private mapping (ie. if we write to the page, it will be
-	 * copied). */
+	/*
+	 * We use a private mapping (ie. if we write to the page, it will be
+	 * copied).
+	 */
 	addr = mmap(NULL, getpagesize() * num,
 		    PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0);
 	if (addr == MAP_FAILED)
 		err(1, "Mmaping %u pages of /dev/zero", num);
+
+	/*
+	 * One neat mmap feature is that you can close the fd, and it
+	 * stays mapped.
+	 */
 	close(fd);
 
 	return addr;
@@ -305,20 +323,24 @@ static void *get_pages(unsigned int num)
 	return addr;
 }
 
-/* This routine is used to load the kernel or initrd.  It tries mmap, but if
+/*
+ * This routine is used to load the kernel or initrd.  It tries mmap, but if
  * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries),
- * it falls back to reading the memory in. */
+ * it falls back to reading the memory in.
+ */
 static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
 {
 	ssize_t r;
 
-	/* We map writable even though for some segments are marked read-only.
+	/*
+	 * We map writable even though for some segments are marked read-only.
 	 * The kernel really wants to be writable: it patches its own
 	 * instructions.
 	 *
 	 * MAP_PRIVATE means that the page won't be copied until a write is
 	 * done to it.  This allows us to share untouched memory between
-	 * Guests. */
+	 * Guests.
+	 */
 	if (mmap(addr, len, PROT_READ|PROT_WRITE|PROT_EXEC,
 		 MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED)
 		return;
@@ -329,7 +351,8 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
 		err(1, "Reading offset %lu len %lu gave %zi", offset, len, r);
 }
 
-/* This routine takes an open vmlinux image, which is in ELF, and maps it into
+/*
+ * This routine takes an open vmlinux image, which is in ELF, and maps it into
  * the Guest memory.  ELF = Embedded Linking Format, which is the format used
  * by all modern binaries on Linux including the kernel.
  *
@@ -337,23 +360,28 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
  * address.  We use the physical address; the Guest will map itself to the
  * virtual address.
  *
- * We return the starting address. */
+ * We return the starting address.
+ */
 static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
 {
 	Elf32_Phdr phdr[ehdr->e_phnum];
 	unsigned int i;
 
-	/* Sanity checks on the main ELF header: an x86 executable with a
-	 * reasonable number of correctly-sized program headers. */
+	/*
+	 * Sanity checks on the main ELF header: an x86 executable with a
+	 * reasonable number of correctly-sized program headers.
+	 */
 	if (ehdr->e_type != ET_EXEC
 	    || ehdr->e_machine != EM_386
 	    || ehdr->e_phentsize != sizeof(Elf32_Phdr)
 	    || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr))
 		errx(1, "Malformed elf header");
 
-	/* An ELF executable contains an ELF header and a number of "program"
+	/*
+	 * An ELF executable contains an ELF header and a number of "program"
 	 * headers which indicate which parts ("segments") of the program to
-	 * load where. */
+	 * load where.
+	 */
 
 	/* We read in all the program headers at once: */
 	if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0)
@@ -361,8 +389,10 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
 	if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
 		err(1, "Reading program headers");
 
-	/* Try all the headers: there are usually only three.  A read-only one,
-	 * a read-write one, and a "note" section which we don't load. */
+	/*
+	 * Try all the headers: there are usually only three.  A read-only one,
+	 * a read-write one, and a "note" section which we don't load.
+	 */
 	for (i = 0; i < ehdr->e_phnum; i++) {
 		/* If this isn't a loadable segment, we ignore it */
 		if (phdr[i].p_type != PT_LOAD)
@@ -380,13 +410,15 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
 	return ehdr->e_entry;
 }
 
-/*L:150 A bzImage, unlike an ELF file, is not meant to be loaded.  You're
- * supposed to jump into it and it will unpack itself.  We used to have to
- * perform some hairy magic because the unpacking code scared me.
+/*L:150
+ * A bzImage, unlike an ELF file, is not meant to be loaded.  You're supposed
+ * to jump into it and it will unpack itself.  We used to have to perform some
+ * hairy magic because the unpacking code scared me.
  *
  * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote
  * a small patch to jump over the tricky bits in the Guest, so now we just read
- * the funky header so we know where in the file to load, and away we go! */
+ * the funky header so we know where in the file to load, and away we go!
+ */
 static unsigned long load_bzimage(int fd)
 {
 	struct boot_params boot;
@@ -394,8 +426,10 @@ static unsigned long load_bzimage(int fd)
 	/* Modern bzImages get loaded at 1M. */
 	void *p = from_guest_phys(0x100000);
 
-	/* Go back to the start of the file and read the header.  It should be
-	 * a Linux boot header (see Documentation/x86/i386/boot.txt) */
+	/*
+	 * Go back to the start of the file and read the header.  It should be
+	 * a Linux boot header (see Documentation/x86/i386/boot.txt)
+	 */
 	lseek(fd, 0, SEEK_SET);
 	read(fd, &boot, sizeof(boot));
 
@@ -414,9 +448,11 @@ static unsigned long load_bzimage(int fd)
 	return boot.hdr.code32_start;
 }
 
-/*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels
+/*L:140
+ * Loading the kernel is easy when it's a "vmlinux", but most kernels
  * come wrapped up in the self-decompressing "bzImage" format.  With a little
- * work, we can load those, too. */
+ * work, we can load those, too.
+ */
 static unsigned long load_kernel(int fd)
 {
 	Elf32_Ehdr hdr;
@@ -433,24 +469,28 @@ static unsigned long load_kernel(int fd)
 	return load_bzimage(fd);
 }
 
-/* This is a trivial little helper to align pages.  Andi Kleen hated it because
+/*
+ * This is a trivial little helper to align pages.  Andi Kleen hated it because
  * it calls getpagesize() twice: "it's dumb code."
  *
  * Kernel guys get really het up about optimization, even when it's not
- * necessary.  I leave this code as a reaction against that. */
+ * necessary.  I leave this code as a reaction against that.
+ */
 static inline unsigned long page_align(unsigned long addr)
 {
 	/* Add upwards and truncate downwards. */
 	return ((addr + getpagesize()-1) & ~(getpagesize()-1));
 }
 
-/*L:180 An "initial ram disk" is a disk image loaded into memory along with
- * the kernel which the kernel can use to boot from without needing any
- * drivers.  Most distributions now use this as standard: the initrd contains
- * the code to load the appropriate driver modules for the current machine.
+/*L:180
+ * An "initial ram disk" is a disk image loaded into memory along with the
+ * kernel which the kernel can use to boot from without needing any drivers.
+ * Most distributions now use this as standard: the initrd contains the code to
+ * load the appropriate driver modules for the current machine.
  *
  * Importantly, James Morris works for RedHat, and Fedora uses initrds for its
- * kernels.  He sent me this (and tells me when I break it). */
+ * kernels.  He sent me this (and tells me when I break it).
+ */
 static unsigned long load_initrd(const char *name, unsigned long mem)
 {
 	int ifd;
@@ -462,12 +502,16 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
 	if (fstat(ifd, &st) < 0)
 		err(1, "fstat() on initrd '%s'", name);
 
-	/* We map the initrd at the top of memory, but mmap wants it to be
-	 * page-aligned, so we round the size up for that. */
+	/*
+	 * We map the initrd at the top of memory, but mmap wants it to be
+	 * page-aligned, so we round the size up for that.
+	 */
 	len = page_align(st.st_size);
 	map_at(ifd, from_guest_phys(mem - len), 0, st.st_size);
-	/* Once a file is mapped, you can close the file descriptor.  It's a
-	 * little odd, but quite useful. */
+	/*
+	 * Once a file is mapped, you can close the file descriptor.  It's a
+	 * little odd, but quite useful.
+	 */
 	close(ifd);
 	verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len);
 
@@ -476,8 +520,10 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
 }
 /*:*/
 
-/* Simple routine to roll all the commandline arguments together with spaces
- * between them. */
+/*
+ * Simple routine to roll all the commandline arguments together with spaces
+ * between them.
+ */
 static void concat(char *dst, char *args[])
 {
 	unsigned int i, len = 0;
@@ -494,10 +540,12 @@ static void concat(char *dst, char *args[])
 	dst[len] = '\0';
 }
 
-/*L:185 This is where we actually tell the kernel to initialize the Guest.  We
+/*L:185
+ * This is where we actually tell the kernel to initialize the Guest.  We
  * saw the arguments it expects when we looked at initialize() in lguest_user.c:
  * the base of Guest "physical" memory, the top physical page to allow and the
- * entry point for the Guest. */
+ * entry point for the Guest.
+ */
 static void tell_kernel(unsigned long start)
 {
 	unsigned long args[] = { LHREQ_INITIALIZE,
@@ -511,7 +559,7 @@ static void tell_kernel(unsigned long start)
 }
 /*:*/
 
-/*
+/*L:200
  * Device Handling.
  *
  * When the Guest gives us a buffer, it sends an array of addresses and sizes.
@@ -522,20 +570,26 @@ static void tell_kernel(unsigned long start)
 static void *_check_pointer(unsigned long addr, unsigned int size,
 			    unsigned int line)
 {
-	/* We have to separately check addr and addr+size, because size could
-	 * be huge and addr + size might wrap around. */
+	/*
+	 * We have to separately check addr and addr+size, because size could
+	 * be huge and addr + size might wrap around.
+	 */
 	if (addr >= guest_limit || addr + size >= guest_limit)
 		errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr);
-	/* We return a pointer for the caller's convenience, now we know it's
-	 * safe to use. */
+	/*
+	 * We return a pointer for the caller's convenience, now we know it's
+	 * safe to use.
+	 */
 	return from_guest_phys(addr);
 }
 /* A macro which transparently hands the line number to the real function. */
 #define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
 
-/* Each buffer in the virtqueues is actually a chain of descriptors.  This
+/*
+ * Each buffer in the virtqueues is actually a chain of descriptors.  This
  * function returns the next descriptor in the chain, or vq->vring.num if we're
- * at the end. */
+ * at the end.
+ */
 static unsigned next_desc(struct vring_desc *desc,
 			  unsigned int i, unsigned int max)
 {
@@ -556,7 +610,10 @@ static unsigned next_desc(struct vring_desc *desc,
 	return next;
 }
 
-/* This actually sends the interrupt for this virtqueue */
+/*
+ * This actually sends the interrupt for this virtqueue, if we've used a
+ * buffer.
+ */
 static void trigger_irq(struct virtqueue *vq)
 {
 	unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
@@ -576,12 +633,14 @@ static void trigger_irq(struct virtqueue *vq)
 		err(1, "Triggering irq %i", vq->config.irq);
 }
 
-/* This looks in the virtqueue and for the first available buffer, and converts
+/*
+ * This looks in the virtqueue for the first available buffer, and converts
  * it to an iovec for convenient access.  Since descriptors consist of some
  * number of output then some number of input descriptors, it's actually two
  * iovecs, but we pack them into one and note how many of each there were.
  *
- * This function returns the descriptor number found. */
+ * This function waits if necessary, and returns the descriptor number found.
+ */
 static unsigned wait_for_vq_desc(struct virtqueue *vq,
 				 struct iovec iov[],
 				 unsigned int *out_num, unsigned int *in_num)
@@ -590,17 +649,23 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
 	struct vring_desc *desc;
 	u16 last_avail = lg_last_avail(vq);
 
+	/* There's nothing available? */
 	while (last_avail == vq->vring.avail->idx) {
 		u64 event;
 
-		/* OK, tell Guest about progress up to now. */
+		/*
+		 * Since we're about to sleep, now is a good time to tell the
+		 * Guest about what we've used up to now.
+		 */
 		trigger_irq(vq);
 
 		/* OK, now we need to know about added descriptors. */
 		vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
 
-		/* They could have slipped one in as we were doing that: make
-		 * sure it's written, then check again. */
+		/*
+		 * They could have slipped one in as we were doing that: make
+		 * sure it's written, then c
author	Ingo Molnar <mingo@elte.hu>	2009-08-11 14:19:09 +0200
committer	Ingo Molnar <mingo@elte.hu>	2009-08-11 14:19:09 +0200
commit	89034bc2c7b839702c00a704e79d112737f98be0 (patch)
tree	e65b1f3d4c751baa840efc81bc4734f089379eb3 /Documentation
parent	fb82ad719831db58e9baa4c67015aae3fe27e7e3 (diff)
parent	85dfd81dc57e8183a277ddd7a56aa65c96f3f487 (diff)