aboutsummaryrefslogtreecommitdiff
path: root/Documentation
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-08-11 14:19:09 +0200
committerIngo Molnar <mingo@elte.hu>2009-08-11 14:19:09 +0200
commit89034bc2c7b839702c00a704e79d112737f98be0 (patch)
treee65b1f3d4c751baa840efc81bc4734f089379eb3 /Documentation
parentfb82ad719831db58e9baa4c67015aae3fe27e7e3 (diff)
parent85dfd81dc57e8183a277ddd7a56aa65c96f3f487 (diff)
Merge branch 'linus' into tracing/core
Conflicts: kernel/trace/trace_events_filter.c We use the tracing/core version. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'Documentation')
-rw-r--r--Documentation/ABI/testing/sysfs-block37
-rw-r--r--Documentation/DocBook/kernel-hacking.tmpl4
-rw-r--r--Documentation/RCU/rculist_nulls.txt7
-rw-r--r--Documentation/arm/memory.txt2
-rw-r--r--Documentation/connector/cn_test.c4
-rw-r--r--Documentation/connector/ucon.c2
-rw-r--r--Documentation/filesystems/sysfs.txt3
-rw-r--r--Documentation/ioctl/ioctl-number.txt1
-rw-r--r--Documentation/laptops/thinkpad-acpi.txt127
-rw-r--r--Documentation/lguest/lguest.c721
-rw-r--r--Documentation/lockdep-design.txt6
-rw-r--r--Documentation/networking/6pack.txt2
-rw-r--r--Documentation/sound/alsa/Procfile.txt5
-rw-r--r--Documentation/sysrq.txt7
-rw-r--r--Documentation/video4linux/CARDLIST.em28xx2
-rw-r--r--Documentation/video4linux/gspca.txt32
16 files changed, 568 insertions, 394 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index cbbd3e06994..5f3bedaf8e3 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -94,28 +94,37 @@ What: /sys/block/<disk>/queue/physical_block_size
Date: May 2009
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
- This is the smallest unit the storage device can write
- without resorting to read-modify-write operation. It is
- usually the same as the logical block size but may be
- bigger. One example is SATA drives with 4KB sectors
- that expose a 512-byte logical block size to the
- operating system.
+ This is the smallest unit a physical storage device can
+ write atomically. It is usually the same as the logical
+ block size but may be bigger. One example is SATA
+ drives with 4KB sectors that expose a 512-byte logical
+ block size to the operating system. For stacked block
+ devices the physical_block_size variable contains the
+ maximum physical_block_size of the component devices.
What: /sys/block/<disk>/queue/minimum_io_size
Date: April 2009
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
- Storage devices may report a preferred minimum I/O size,
- which is the smallest request the device can perform
- without incurring a read-modify-write penalty. For disk
- drives this is often the physical block size. For RAID
- arrays it is often the stripe chunk size.
+ Storage devices may report a granularity or preferred
+ minimum I/O size which is the smallest request the
+ device can perform without incurring a performance
+ penalty. For disk drives this is often the physical
+ block size. For RAID arrays it is often the stripe
+ chunk size. A properly aligned multiple of
+ minimum_io_size is the preferred request size for
+ workloads where a high number of I/O operations is
+ desired.
What: /sys/block/<disk>/queue/optimal_io_size
Date: April 2009
Contact: Martin K. Petersen <martin.petersen@oracle.com>
Description:
Storage devices may report an optimal I/O size, which is
- the device's preferred unit of receiving I/O. This is
- rarely reported for disk drives. For RAID devices it is
- usually the stripe width or the internal block size.
+ the device's preferred unit for sustained I/O. This is
+ rarely reported for disk drives. For RAID arrays it is
+ usually the stripe width or the internal track size. A
+ properly aligned multiple of optimal_io_size is the
+ preferred request size for workloads where sustained
+ throughput is desired. If no optimal I/O size is
+ reported this file contains 0.
diff --git a/Documentation/DocBook/kernel-hacking.tmpl b/Documentation/DocBook/kernel-hacking.tmpl
index a50d6cd5857..992e67e6be7 100644
--- a/Documentation/DocBook/kernel-hacking.tmpl
+++ b/Documentation/DocBook/kernel-hacking.tmpl
@@ -449,8 +449,8 @@ printk(KERN_INFO "i = %u\n", i);
</para>
<programlisting>
-__u32 ipaddress;
-printk(KERN_INFO "my ip: %d.%d.%d.%d\n", NIPQUAD(ipaddress));
+__be32 ipaddress;
+printk(KERN_INFO "my ip: %pI4\n", &amp;ipaddress);
</programlisting>
<para>
diff --git a/Documentation/RCU/rculist_nulls.txt b/Documentation/RCU/rculist_nulls.txt
index 93cb28d05dc..18f9651ff23 100644
--- a/Documentation/RCU/rculist_nulls.txt
+++ b/Documentation/RCU/rculist_nulls.txt
@@ -83,11 +83,12 @@ not detect it missed following items in original chain.
obj = kmem_cache_alloc(...);
lock_chain(); // typically a spin_lock()
obj->key = key;
-atomic_inc(&obj->refcnt);
/*
* we need to make sure obj->key is updated before obj->next
+ * or obj->refcnt
*/
smp_wmb();
+atomic_set(&obj->refcnt, 1);
hlist_add_head_rcu(&obj->obj_node, list);
unlock_chain(); // typically a spin_unlock()
@@ -159,6 +160,10 @@ out:
obj = kmem_cache_alloc(cachep);
lock_chain(); // typically a spin_lock()
obj->key = key;
+/*
+ * changes to obj->key must be visible before refcnt one
+ */
+smp_wmb();
atomic_set(&obj->refcnt, 1);
/*
* insert obj in RCU way (readers might be traversing chain)
diff --git a/Documentation/arm/memory.txt b/Documentation/arm/memory.txt
index 43cb1004d35..9d58c7c5edd 100644
--- a/Documentation/arm/memory.txt
+++ b/Documentation/arm/memory.txt
@@ -21,6 +21,8 @@ ffff8000 ffffffff copy_user_page / clear_user_page use.
For SA11xx and Xscale, this is used to
setup a minicache mapping.
+ffff4000 ffffffff cache aliasing on ARMv6 and later CPUs.
+
ffff1000 ffff7fff Reserved.
Platforms must not use this address range.
diff --git a/Documentation/connector/cn_test.c b/Documentation/connector/cn_test.c
index f688eba8770..6a5be5d5c8e 100644
--- a/Documentation/connector/cn_test.c
+++ b/Documentation/connector/cn_test.c
@@ -1,7 +1,7 @@
/*
* cn_test.c
*
- * 2004-2005 Copyright (c) Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * 2004+ Copyright (c) Evgeniy Polyakov <zbr@ioremap.net>
* All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
@@ -194,5 +194,5 @@ module_init(cn_test_init);
module_exit(cn_test_fini);
MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Evgeniy Polyakov <johnpol@2ka.mipt.ru>");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
MODULE_DESCRIPTION("Connector's test module");
diff --git a/Documentation/connector/ucon.c b/Documentation/connector/ucon.c
index d738cde2a8d..c5092ad0ce4 100644
--- a/Documentation/connector/ucon.c
+++ b/Documentation/connector/ucon.c
@@ -1,7 +1,7 @@
/*
* ucon.c
*
- * Copyright (c) 2004+ Evgeniy Polyakov <johnpol@2ka.mipt.ru>
+ * Copyright (c) 2004+ Evgeniy Polyakov <zbr@ioremap.net>
*
*
* This program is free software; you can redistribute it and/or modify
diff --git a/Documentation/filesystems/sysfs.txt b/Documentation/filesystems/sysfs.txt
index 7e81e37c0b1..b245d524d56 100644
--- a/Documentation/filesystems/sysfs.txt
+++ b/Documentation/filesystems/sysfs.txt
@@ -23,7 +23,8 @@ interface.
Using sysfs
~~~~~~~~~~~
-sysfs is always compiled in. You can access it by doing:
+sysfs is always compiled in if CONFIG_SYSFS is defined. You can access
+it by doing:
mount -t sysfs sysfs /sys
diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 7bb0d934b6d..dbea4f95fc8 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -139,6 +139,7 @@ Code Seq# Include File Comments
'm' all linux/synclink.h conflict!
'm' 00-1F net/irda/irmod.h conflict!
'n' 00-7F linux/ncp_fs.h
+'n' 80-8F linux/nilfs2_fs.h NILFS2
'n' E0-FF video/matrox.h matroxfb
'o' 00-1F fs/ocfs2/ocfs2_fs.h OCFS2
'o' 00-03 include/mtd/ubi-user.h conflict! (OCFS2 and UBI overlaps)
diff --git a/Documentation/laptops/thinkpad-acpi.txt b/Documentation/laptops/thinkpad-acpi.txt
index f2296ecedb8..e2ddcdeb61b 100644
--- a/Documentation/laptops/thinkpad-acpi.txt
+++ b/Documentation/laptops/thinkpad-acpi.txt
@@ -36,8 +36,6 @@ detailed description):
- Bluetooth enable and disable
- video output switching, expansion control
- ThinkLight on and off
- - limited docking and undocking
- - UltraBay eject
- CMOS/UCMS control
- LED control
- ACPI sounds
@@ -729,131 +727,6 @@ cannot be read or if it is unknown, thinkpad-acpi will report it as "off".
It is impossible to know if the status returned through sysfs is valid.
-Docking / undocking -- /proc/acpi/ibm/dock
-------------------------------------------
-
-Docking and undocking (e.g. with the X4 UltraBase) requires some
-actions to be taken by the operating system to safely make or break
-the electrical connections with the dock.
-
-The docking feature of this driver generates the following ACPI events:
-
- ibm/dock GDCK 00000003 00000001 -- eject request
- ibm/dock GDCK 00000003 00000002 -- undocked
- ibm/dock GDCK 00000000 00000003 -- docked
-
-NOTE: These events will only be generated if the laptop was docked
-when originally booted. This is due to the current lack of support for
-hot plugging of devices in the Linux ACPI framework. If the laptop was
-booted while not in the dock, the following message is shown in the
-logs:
-
- Mar 17 01:42:34 aero kernel: thinkpad_acpi: dock device not present
-
-In this case, no dock-related events are generated but the dock and
-undock commands described below still work. They can be executed
-manually or triggered by Fn key combinations (see the example acpid
-configuration files included in the driver tarball package available
-on the web site).
-
-When the eject request button on the dock is pressed, the first event
-above is generated. The handler for this event should issue the
-following command:
-
- echo undock > /proc/acpi/ibm/dock
-
-After the LED on the dock goes off, it is safe to eject the laptop.
-Note: if you pressed this key by mistake, go ahead and eject the
-laptop, then dock it back in. Otherwise, the dock may not function as
-expected.
-
-When the laptop is docked, the third event above is generated. The
-handler for this event should issue the following command to fully
-enable the dock:
-
- echo dock > /proc/acpi/ibm/dock
-
-The contents of the /proc/acpi/ibm/dock file shows the current status
-of the dock, as provided by the ACPI framework.
-
-The docking support in this driver does not take care of enabling or
-disabling any other devices you may have attached to the dock. For
-example, a CD drive plugged into the UltraBase needs to be disabled or
-enabled separately. See the provided example acpid configuration files
-for how this can be accomplished.
-
-There is no support yet for PCI devices that may be attached to a
-docking station, e.g. in the ThinkPad Dock II. The driver currently
-does not recognize, enable or disable such devices. This means that
-the only docking stations currently supported are the X-series
-UltraBase docks and "dumb" port replicators like the Mini Dock (the
-latter don't need any ACPI support, actually).
-
-
-UltraBay eject -- /proc/acpi/ibm/bay
-------------------------------------
-
-Inserting or ejecting an UltraBay device requires some actions to be
-taken by the operating system to safely make or break the electrical
-connections with the device.
-
-This feature generates the following ACPI events:
-
- ibm/bay MSTR 00000003 00000000 -- eject request
- ibm/bay MSTR 00000001 00000000 -- eject lever inserted
-
-NOTE: These events will only be generated if the UltraBay was present
-when the laptop was originally booted (on the X series, the UltraBay
-is in the dock, so it may not be present if the laptop was undocked).
-This is due to the current lack of support for hot plugging of devices
-in the Linux ACPI framework. If the laptop was booted without the
-UltraBay, the following message is shown in the logs:
-
- Mar 17 01:42:34 aero kernel: thinkpad_acpi: bay device not present
-
-In this case, no bay-related events are generated but the eject
-command described below still works. It can be executed manually or
-triggered by a hot key combination.
-
-Sliding the eject lever generates the first event shown above. The
-handler for this event should take whatever actions are necessary to
-shut down the device in the UltraBay (e.g. call idectl), then issue
-the following command:
-
- echo eject > /proc/acpi/ibm/bay
-
-After the LED on the UltraBay goes off, it is safe to pull out the
-device.
-
-When the eject lever is inserted, the second event above is
-generated. The handler for this event should take whatever actions are
-necessary to enable the UltraBay device (e.g. call idectl).
-
-The contents of the /proc/acpi/ibm/bay file shows the current status
-of the UltraBay, as provided by the ACPI framework.
-
-EXPERIMENTAL warm eject support on the 600e/x, A22p and A3x (To use
-this feature, you need to supply the experimental=1 parameter when
-loading the module):
-
-These models do not have a button near the UltraBay device to request
-a hot eject but rather require the laptop to be put to sleep
-(suspend-to-ram) before the bay device is ejected or inserted).
-The sequence of steps to eject the device is as follows:
-
- echo eject > /proc/acpi/ibm/bay
- put the ThinkPad to sleep
- remove the drive
- resume from sleep
- cat /proc/acpi/ibm/bay should show that the drive was removed
-
-On the A3x, both the UltraBay 2000 and UltraBay Plus devices are
-supported. Use "eject2" instead of "eject" for the second bay.
-
-Note: the UltraBay eject support on the 600e/x, A22p and A3x is
-EXPERIMENTAL and may not work as expected. USE WITH CAUTION!
-
-
CMOS/UCMS control
-----------------
diff --git a/Documentation/lguest/lguest.c b/Documentation/lguest/lguest.c
index 9ebcd6ef361..950cde6d6e5 100644
--- a/Documentation/lguest/lguest.c
+++ b/Documentation/lguest/lguest.c
@@ -1,7 +1,9 @@
-/*P:100 This is the Launcher code, a simple program which lays out the
- * "physical" memory for the new Guest by mapping the kernel image and
- * the virtual devices, then opens /dev/lguest to tell the kernel
- * about the Guest and control it. :*/
+/*P:100
+ * This is the Launcher code, a simple program which lays out the "physical"
+ * memory for the new Guest by mapping the kernel image and the virtual
+ * devices, then opens /dev/lguest to tell the kernel about the Guest and
+ * control it.
+:*/
#define _LARGEFILE64_SOURCE
#define _GNU_SOURCE
#include <stdio.h>
@@ -46,13 +48,15 @@
#include "linux/virtio_rng.h"
#include "linux/virtio_ring.h"
#include "asm/bootparam.h"
-/*L:110 We can ignore the 39 include files we need for this program, but I do
- * want to draw attention to the use of kernel-style types.
+/*L:110
+ * We can ignore the 42 include files we need for this program, but I do want
+ * to draw attention to the use of kernel-style types.
*
* As Linus said, "C is a Spartan language, and so should your naming be." I
* like these abbreviations, so we define them here. Note that u64 is always
* unsigned long long, which works on all Linux systems: this means that we can
- * use %llu in printf for any u64. */
+ * use %llu in printf for any u64.
+ */
typedef unsigned long long u64;
typedef uint32_t u32;
typedef uint16_t u16;
@@ -69,8 +73,10 @@ typedef uint8_t u8;
/* This will occupy 3 pages: it must be a power of 2. */
#define VIRTQUEUE_NUM 256
-/*L:120 verbose is both a global flag and a macro. The C preprocessor allows
- * this, and although I wouldn't recommend it, it works quite nicely here. */
+/*L:120
+ * verbose is both a global flag and a macro. The C preprocessor allows
+ * this, and although I wouldn't recommend it, it works quite nicely here.
+ */
static bool verbose;
#define verbose(args...) \
do { if (verbose) printf(args); } while(0)
@@ -87,8 +93,7 @@ static int lguest_fd;
static unsigned int __thread cpu_id;
/* This is our list of devices. */
-struct device_list
-{
+struct device_list {
/* Counter to assign interrupt numbers. */
unsigned int next_irq;
@@ -100,8 +105,7 @@ struct device_list
/* A single linked list of devices. */
struct device *dev;
- /* And a pointer to the last device for easy append and also for
- * configuration appending. */
+ /* And a pointer to the last device for easy append. */
struct device *lastdev;
};
@@ -109,8 +113,7 @@ struct device_list
static struct device_list devices;
/* The device structure describes a single device. */
-struct device
-{
+struct device {
/* The linked-list pointer. */
struct device *next;
@@ -135,8 +138,7 @@ struct device
};
/* The virtqueue structure describes a queue attached to a device. */
-struct virtqueue
-{
+struct virtqueue {
struct virtqueue *next;
/* Which device owns me. */
@@ -168,20 +170,24 @@ static char **main_args;
/* The original tty settings to restore on exit. */
static struct termios orig_term;
-/* We have to be careful with barriers: our devices are all run in separate
+/*
+ * We have to be careful with barriers: our devices are all run in separate
* threads and so we need to make sure that changes visible to the Guest happen
- * in precise order. */
+ * in precise order.
+ */
#define wmb() __asm__ __volatile__("" : : : "memory")
#define mb() __asm__ __volatile__("" : : : "memory")
-/* Convert an iovec element to the given type.
+/*
+ * Convert an iovec element to the given type.
*
* This is a fairly ugly trick: we need to know the size of the type and
* alignment requirement to check the pointer is kosher. It's also nice to
* have the name of the type in case we report failure.
*
* Typing those three things all the time is cumbersome and error prone, so we
- * have a macro which sets them all up and passes to the real function. */
+ * have a macro which sets them all up and passes to the real function.
+ */
#define convert(iov, type) \
((type *)_convert((iov), sizeof(type), __alignof__(type), #type))
@@ -198,8 +204,10 @@ static void *_convert(struct iovec *iov, size_t size, size_t align,
/* Wrapper for the last available index. Makes it easier to change. */
#define lg_last_avail(vq) ((vq)->last_avail_idx)
-/* The virtio configuration space is defined to be little-endian. x86 is
- * little-endian too, but it's nice to be explicit so we have these helpers. */
+/*
+ * The virtio configuration space is defined to be little-endian. x86 is
+ * little-endian too, but it's nice to be explicit so we have these helpers.
+ */
#define cpu_to_le16(v16) (v16)
#define cpu_to_le32(v32) (v32)
#define cpu_to_le64(v64) (v64)
@@ -241,11 +249,12 @@ static u8 *get_feature_bits(struct device *dev)
+ dev->num_vq * sizeof(struct lguest_vqconfig);
}
-/*L:100 The Launcher code itself takes us out into userspace, that scary place
- * where pointers run wild and free! Unfortunately, like most userspace
- * programs, it's quite boring (which is why everyone likes to hack on the
- * kernel!). Perhaps if you make up an Lguest Drinking Game at this point, it
- * will get you through this section. Or, maybe not.
+/*L:100
+ * The Launcher code itself takes us out into userspace, that scary place where
+ * pointers run wild and free! Unfortunately, like most userspace programs,
+ * it's quite boring (which is why everyone likes to hack on the kernel!).
+ * Perhaps if you make up an Lguest Drinking Game at this point, it will get
+ * you through this section. Or, maybe not.
*
* The Launcher sets up a big chunk of memory to be the Guest's "physical"
* memory and stores it in "guest_base". In other words, Guest physical ==
@@ -253,7 +262,8 @@ static u8 *get_feature_bits(struct device *dev)
*
* This can be tough to get your head around, but usually it just means that we
* use these trivial conversion functions when the Guest gives us it's
- * "physical" addresses: */
+ * "physical" addresses:
+ */
static void *from_guest_phys(unsigned long addr)
{
return guest_base + addr;
@@ -268,7 +278,8 @@ static unsigned long to_guest_phys(const void *addr)
* Loading the Kernel.
*
* We start with couple of simple helper routines. open_or_die() avoids
- * error-checking code cluttering the callers: */
+ * error-checking code cluttering the callers:
+ */
static int open_or_die(const char *name, int flags)
{
int fd = open(name, flags);
@@ -283,12 +294,19 @@ static void *map_zeroed_pages(unsigned int num)
int fd = open_or_die("/dev/zero", O_RDONLY);
void *addr;
- /* We use a private mapping (ie. if we write to the page, it will be
- * copied). */
+ /*
+ * We use a private mapping (ie. if we write to the page, it will be
+ * copied).
+ */
addr = mmap(NULL, getpagesize() * num,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, fd, 0);
if (addr == MAP_FAILED)
err(1, "Mmaping %u pages of /dev/zero", num);
+
+ /*
+ * One neat mmap feature is that you can close the fd, and it
+ * stays mapped.
+ */
close(fd);
return addr;
@@ -305,20 +323,24 @@ static void *get_pages(unsigned int num)
return addr;
}
-/* This routine is used to load the kernel or initrd. It tries mmap, but if
+/*
+ * This routine is used to load the kernel or initrd. It tries mmap, but if
* that fails (Plan 9's kernel file isn't nicely aligned on page boundaries),
- * it falls back to reading the memory in. */
+ * it falls back to reading the memory in.
+ */
static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
{
ssize_t r;
- /* We map writable even though for some segments are marked read-only.
+ /*
+ * We map writable even though for some segments are marked read-only.
* The kernel really wants to be writable: it patches its own
* instructions.
*
* MAP_PRIVATE means that the page won't be copied until a write is
* done to it. This allows us to share untouched memory between
- * Guests. */
+ * Guests.
+ */
if (mmap(addr, len, PROT_READ|PROT_WRITE|PROT_EXEC,
MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED)
return;
@@ -329,7 +351,8 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
err(1, "Reading offset %lu len %lu gave %zi", offset, len, r);
}
-/* This routine takes an open vmlinux image, which is in ELF, and maps it into
+/*
+ * This routine takes an open vmlinux image, which is in ELF, and maps it into
* the Guest memory. ELF = Embedded Linking Format, which is the format used
* by all modern binaries on Linux including the kernel.
*
@@ -337,23 +360,28 @@ static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
* address. We use the physical address; the Guest will map itself to the
* virtual address.
*
- * We return the starting address. */
+ * We return the starting address.
+ */
static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
{
Elf32_Phdr phdr[ehdr->e_phnum];
unsigned int i;
- /* Sanity checks on the main ELF header: an x86 executable with a
- * reasonable number of correctly-sized program headers. */
+ /*
+ * Sanity checks on the main ELF header: an x86 executable with a
+ * reasonable number of correctly-sized program headers.
+ */
if (ehdr->e_type != ET_EXEC
|| ehdr->e_machine != EM_386
|| ehdr->e_phentsize != sizeof(Elf32_Phdr)
|| ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr))
errx(1, "Malformed elf header");
- /* An ELF executable contains an ELF header and a number of "program"
+ /*
+ * An ELF executable contains an ELF header and a number of "program"
* headers which indicate which parts ("segments") of the program to
- * load where. */
+ * load where.
+ */
/* We read in all the program headers at once: */
if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0)
@@ -361,8 +389,10 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
err(1, "Reading program headers");
- /* Try all the headers: there are usually only three. A read-only one,
- * a read-write one, and a "note" section which we don't load. */
+ /*
+ * Try all the headers: there are usually only three. A read-only one,
+ * a read-write one, and a "note" section which we don't load.
+ */
for (i = 0; i < ehdr->e_phnum; i++) {
/* If this isn't a loadable segment, we ignore it */
if (phdr[i].p_type != PT_LOAD)
@@ -380,13 +410,15 @@ static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
return ehdr->e_entry;
}
-/*L:150 A bzImage, unlike an ELF file, is not meant to be loaded. You're
- * supposed to jump into it and it will unpack itself. We used to have to
- * perform some hairy magic because the unpacking code scared me.
+/*L:150
+ * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed
+ * to jump into it and it will unpack itself. We used to have to perform some
+ * hairy magic because the unpacking code scared me.
*
* Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote
* a small patch to jump over the tricky bits in the Guest, so now we just read
- * the funky header so we know where in the file to load, and away we go! */
+ * the funky header so we know where in the file to load, and away we go!
+ */
static unsigned long load_bzimage(int fd)
{
struct boot_params boot;
@@ -394,8 +426,10 @@ static unsigned long load_bzimage(int fd)
/* Modern bzImages get loaded at 1M. */
void *p = from_guest_phys(0x100000);
- /* Go back to the start of the file and read the header. It should be
- * a Linux boot header (see Documentation/x86/i386/boot.txt) */
+ /*
+ * Go back to the start of the file and read the header. It should be
+ * a Linux boot header (see Documentation/x86/i386/boot.txt)
+ */
lseek(fd, 0, SEEK_SET);
read(fd, &boot, sizeof(boot));
@@ -414,9 +448,11 @@ static unsigned long load_bzimage(int fd)
return boot.hdr.code32_start;
}
-/*L:140 Loading the kernel is easy when it's a "vmlinux", but most kernels
+/*L:140
+ * Loading the kernel is easy when it's a "vmlinux", but most kernels
* come wrapped up in the self-decompressing "bzImage" format. With a little
- * work, we can load those, too. */
+ * work, we can load those, too.
+ */
static unsigned long load_kernel(int fd)
{
Elf32_Ehdr hdr;
@@ -433,24 +469,28 @@ static unsigned long load_kernel(int fd)
return load_bzimage(fd);
}
-/* This is a trivial little helper to align pages. Andi Kleen hated it because
+/*
+ * This is a trivial little helper to align pages. Andi Kleen hated it because
* it calls getpagesize() twice: "it's dumb code."
*
* Kernel guys get really het up about optimization, even when it's not
- * necessary. I leave this code as a reaction against that. */
+ * necessary. I leave this code as a reaction against that.
+ */
static inline unsigned long page_align(unsigned long addr)
{
/* Add upwards and truncate downwards. */
return ((addr + getpagesize()-1) & ~(getpagesize()-1));
}
-/*L:180 An "initial ram disk" is a disk image loaded into memory along with
- * the kernel which the kernel can use to boot from without needing any
- * drivers. Most distributions now use this as standard: the initrd contains
- * the code to load the appropriate driver modules for the current machine.
+/*L:180
+ * An "initial ram disk" is a disk image loaded into memory along with the
+ * kernel which the kernel can use to boot from without needing any drivers.
+ * Most distributions now use this as standard: the initrd contains the code to
+ * load the appropriate driver modules for the current machine.
*
* Importantly, James Morris works for RedHat, and Fedora uses initrds for its
- * kernels. He sent me this (and tells me when I break it). */
+ * kernels. He sent me this (and tells me when I break it).
+ */
static unsigned long load_initrd(const char *name, unsigned long mem)
{
int ifd;
@@ -462,12 +502,16 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
if (fstat(ifd, &st) < 0)
err(1, "fstat() on initrd '%s'", name);
- /* We map the initrd at the top of memory, but mmap wants it to be
- * page-aligned, so we round the size up for that. */
+ /*
+ * We map the initrd at the top of memory, but mmap wants it to be
+ * page-aligned, so we round the size up for that.
+ */
len = page_align(st.st_size);
map_at(ifd, from_guest_phys(mem - len), 0, st.st_size);
- /* Once a file is mapped, you can close the file descriptor. It's a
- * little odd, but quite useful. */
+ /*
+ * Once a file is mapped, you can close the file descriptor. It's a
+ * little odd, but quite useful.
+ */
close(ifd);
verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len);
@@ -476,8 +520,10 @@ static unsigned long load_initrd(const char *name, unsigned long mem)
}
/*:*/
-/* Simple routine to roll all the commandline arguments together with spaces
- * between them. */
+/*
+ * Simple routine to roll all the commandline arguments together with spaces
+ * between them.
+ */
static void concat(char *dst, char *args[])
{
unsigned int i, len = 0;
@@ -494,10 +540,12 @@ static void concat(char *dst, char *args[])
dst[len] = '\0';
}
-/*L:185 This is where we actually tell the kernel to initialize the Guest. We
+/*L:185
+ * This is where we actually tell the kernel to initialize the Guest. We
* saw the arguments it expects when we looked at initialize() in lguest_user.c:
* the base of Guest "physical" memory, the top physical page to allow and the
- * entry point for the Guest. */
+ * entry point for the Guest.
+ */
static void tell_kernel(unsigned long start)
{
unsigned long args[] = { LHREQ_INITIALIZE,
@@ -511,7 +559,7 @@ static void tell_kernel(unsigned long start)
}
/*:*/
-/*
+/*L:200
* Device Handling.
*
* When the Guest gives us a buffer, it sends an array of addresses and sizes.
@@ -522,20 +570,26 @@ static void tell_kernel(unsigned long start)
static void *_check_pointer(unsigned long addr, unsigned int size,
unsigned int line)
{
- /* We have to separately check addr and addr+size, because size could
- * be huge and addr + size might wrap around. */
+ /*
+ * We have to separately check addr and addr+size, because size could
+ * be huge and addr + size might wrap around.
+ */
if (addr >= guest_limit || addr + size >= guest_limit)
errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr);
- /* We return a pointer for the caller's convenience, now we know it's
- * safe to use. */
+ /*
+ * We return a pointer for the caller's convenience, now we know it's
+ * safe to use.
+ */
return from_guest_phys(addr);
}
/* A macro which transparently hands the line number to the real function. */
#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
-/* Each buffer in the virtqueues is actually a chain of descriptors. This
+/*
+ * Each buffer in the virtqueues is actually a chain of descriptors. This
* function returns the next descriptor in the chain, or vq->vring.num if we're
- * at the end. */
+ * at the end.
+ */
static unsigned next_desc(struct vring_desc *desc,
unsigned int i, unsigned int max)
{
@@ -556,7 +610,10 @@ static unsigned next_desc(struct vring_desc *desc,
return next;
}
-/* This actually sends the interrupt for this virtqueue */
+/*
+ * This actually sends the interrupt for this virtqueue, if we've used a
+ * buffer.
+ */
static void trigger_irq(struct virtqueue *vq)
{
unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
@@ -576,12 +633,14 @@ static void trigger_irq(struct virtqueue *vq)
err(1, "Triggering irq %i", vq->config.irq);
}
-/* This looks in the virtqueue and for the first available buffer, and converts
+/*
+ * This looks in the virtqueue for the first available buffer, and converts
* it to an iovec for convenient access. Since descriptors consist of some
* number of output then some number of input descriptors, it's actually two
* iovecs, but we pack them into one and note how many of each there were.
*
- * This function returns the descriptor number found. */
+ * This function waits if necessary, and returns the descriptor number found.
+ */
static unsigned wait_for_vq_desc(struct virtqueue *vq,
struct iovec iov[],
unsigned int *out_num, unsigned int *in_num)
@@ -590,17 +649,23 @@ static unsigned wait_for_vq_desc(struct virtqueue *vq,
struct vring_desc *desc;
u16 last_avail = lg_last_avail(vq);
+ /* There's nothing available? */
while (last_avail == vq->vring.avail->idx) {
u64 event;
- /* OK, tell Guest about progress up to now. */
+ /*
+ * Since we're about to sleep, now is a good time to tell the
+ * Guest about what we've used up to now.
+ */
trigger_irq(vq);
/* OK, now we need to know about added descriptors. */
vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
- /* They could have slipped one in as we were doing that: make
- * sure it's written, then check again. */
+ /*
+ * They could have slipped one in as we were doing that: make
+ * sure it's written, then c