aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--Documentation/virtual/lguest/Makefile2
-rw-r--r--Documentation/virtual/lguest/lguest.c22
-rw-r--r--arch/ia64/include/asm/unistd.h3
-rw-r--r--arch/ia64/kernel/entry.S1
-rw-r--r--arch/powerpc/platforms/powermac/pic.c3
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/lguest/boot.c1
-rw-r--r--drivers/block/paride/pcd.c1
-rw-r--r--drivers/block/virtio_blk.c91
-rw-r--r--drivers/cdrom/viocd.c1
-rw-r--r--drivers/char/virtio_console.c5
-rw-r--r--drivers/ide/ide-cd.c1
-rw-r--r--drivers/misc/kgdbts.c5
-rw-r--r--drivers/net/virtio_net.c2
-rw-r--r--drivers/pci/dmar.c7
-rw-r--r--drivers/pci/intel-iommu.c240
-rw-r--r--drivers/pci/iova.c12
-rw-r--r--drivers/vhost/net.c12
-rw-r--r--drivers/vhost/test.c6
-rw-r--r--drivers/vhost/vhost.c138
-rw-r--r--drivers/vhost/vhost.h21
-rw-r--r--drivers/virtio/virtio_balloon.c21
-rw-r--r--drivers/virtio/virtio_ring.c53
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/namei.c3
-rw-r--r--include/linux/dma_remapping.h4
-rw-r--r--include/linux/mtd/physmap.h1
-rw-r--r--include/linux/virtio.h9
-rw-r--r--include/linux/virtio_9p.h25
-rw-r--r--include/linux/virtio_balloon.h25
-rw-r--r--include/linux/virtio_blk.h25
-rw-r--r--include/linux/virtio_config.h25
-rw-r--r--include/linux/virtio_console.h26
-rw-r--r--include/linux/virtio_ids.h24
-rw-r--r--include/linux/virtio_net.h25
-rw-r--r--include/linux/virtio_pci.h23
-rw-r--r--include/linux/virtio_ring.h52
-rw-r--r--kernel/events/core.c22
-rw-r--r--kernel/rcutree.c54
-rw-r--r--kernel/rcutree_plugin.h11
-rw-r--r--kernel/sysctl.c6
-rw-r--r--mm/page_alloc.c4
-rw-r--r--security/apparmor/lsm.c3
-rw-r--r--tools/virtio/virtio_test.c19
47 files changed, 844 insertions, 203 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5438a2d7907..d9a203b058f 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
With this option on every unmap_single operation will
result in a hardware IOTLB flush operation as opposed
to batching them for performance.
-
+ sp_off [Default Off]
+ By default, super page will be supported if Intel IOMMU
+ has the capability. With this option, super page will
+ not be supported.
intremap= [X86-64, Intel-IOMMU]
Format: { on (default) | off | nosid }
on enable Interrupt Remapping (default)
diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile
index bebac6b4f33..0ac34206f7a 100644
--- a/Documentation/virtual/lguest/Makefile
+++ b/Documentation/virtual/lguest/Makefile
@@ -1,5 +1,5 @@
# This creates the demonstration utility "lguest" which runs a Linux guest.
-# Missing headers? Add "-I../../include -I../../arch/x86/include"
+# Missing headers? Add "-I../../../include -I../../../arch/x86/include"
CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE
all: lguest
diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c
index d9da7e14853..cd9d6af61d0 100644
--- a/Documentation/virtual/lguest/lguest.c
+++ b/Documentation/virtual/lguest/lguest.c
@@ -49,7 +49,7 @@
#include <linux/virtio_rng.h>
#include <linux/virtio_ring.h>
#include <asm/bootparam.h>
-#include "../../include/linux/lguest_launcher.h"
+#include "../../../include/linux/lguest_launcher.h"
/*L:110
* We can ignore the 42 include files we need for this program, but I do want
* to draw attention to the use of kernel-style types.
@@ -135,9 +135,6 @@ struct device {
/* Is it operational */
bool running;
- /* Does Guest want an intrrupt on empty? */
- bool irq_on_empty;
-
/* Device-specific data. */
void *priv;
};
@@ -637,10 +634,7 @@ static void trigger_irq(struct virtqueue *vq)
/* If they don't want an interrupt, don't send one... */
if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
- /* ... unless they've asked us to force one on empty. */
- if (!vq->dev->irq_on_empty
- || lg_last_avail(vq) != vq->vring.avail->idx)
- return;
+ return;
}
/* Send the Guest an interrupt tell them we used something up. */
@@ -1057,15 +1051,6 @@ static void create_thread(struct virtqueue *vq)
close(vq->eventfd);
}
-static bool accepted_feature(struct device *dev, unsigned int bit)
-{
- const u8 *features = get_feature_bits(dev) + dev->feature_len;
-
- if (dev->feature_len < bit / CHAR_BIT)
- return false;
- return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT));
-}
-
static void start_device(struct device *dev)
{
unsigned int i;
@@ -1079,8 +1064,6 @@ static void start_device(struct device *dev)
verbose(" %02x", get_feature_bits(dev)
[dev->feature_len+i]);
- dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
-
for (vq = dev->vq; vq; vq = vq->next) {
if (vq->service)
create_thread(vq);
@@ -1564,7 +1547,6 @@ static void setup_tun_net(char *arg)
/* Set up the tun device. */
configure_device(ipfd, tapif, ip);
- add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY);
/* Expect Guest to handle everything except UFO */
add_feature(dev, VIRTIO_NET_F_CSUM);
add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h
index 1cf0f496f74..7c928da35b1 100644
--- a/arch/ia64/include/asm/unistd.h
+++ b/arch/ia64/include/asm/unistd.h
@@ -320,11 +320,12 @@
#define __NR_clock_adjtime 1328
#define __NR_syncfs 1329
#define __NR_setns 1330
+#define __NR_sendmmsg 1331
#ifdef __KERNEL__
-#define NR_syscalls 307 /* length of syscall table */
+#define NR_syscalls 308 /* length of syscall table */
/*
* The following defines stop scripts/checksyscalls.sh from complaining about
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 9ca80193cd4..97dd2abdeb1 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1776,6 +1776,7 @@ sys_call_table:
data8 sys_clock_adjtime
data8 sys_syncfs
data8 sys_setns // 1330
+ data8 sys_sendmmsg
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
#endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */
diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c
index 9089b042119..7667db448aa 100644
--- a/arch/powerpc/platforms/powermac/pic.c
+++ b/arch/powerpc/platforms/powermac/pic.c
@@ -715,7 +715,8 @@ static struct syscore_ops pmacpic_syscore_ops = {
static int __init init_pmacpic_syscore(void)
{
- register_syscore_ops(&pmacpic_syscore_ops);
+ if (pmac_irq_hw[0])
+ register_syscore_ops(&pmacpic_syscore_ops);
return 0;
}
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f5abe3a245b..90b06d4daee 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -8,6 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
+CFLAGS_REMOVE_tsc.o = -pg
CFLAGS_REMOVE_rtc.o = -pg
CFLAGS_REMOVE_paravirt-spinlocks.o = -pg
CFLAGS_REMOVE_pvclock.o = -pg
@@ -28,6 +29,7 @@ CFLAGS_paravirt.o := $(nostackp)
GCOV_PROFILE_vsyscall_64.o := n
GCOV_PROFILE_hpet.o := n
GCOV_PROFILE_tsc.o := n
+GCOV_PROFILE_vread_tsc_64.o := n
GCOV_PROFILE_paravirt.o := n
# vread_tsc_64 is hot and should be fully optimized:
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 426a5b66f7e..2e4928d45a2 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -642,7 +642,7 @@ static int __init idle_setup(char *str)
boot_option_idle_override = IDLE_POLL;
} else if (!strcmp(str, "mwait")) {
boot_option_idle_override = IDLE_FORCE_MWAIT;
- WARN_ONCE(1, "\idle=mwait\" will be removed in 2012\"\n");
+ WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n");
} else if (!strcmp(str, "halt")) {
/*
* When the boot option of idle=halt is added, halt is
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index eefd96765e7..33a0c11797d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1332,7 +1332,7 @@ static inline void mwait_play_dead(void)
void *mwait_ptr;
struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
- if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
+ if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)))
return;
if (!this_cpu_has(X86_FEATURE_CLFLSH))
return;
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index e191c096ab9..db832fd65ec 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -993,6 +993,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc)
static void lguest_time_init(void)
{
/* Set up the timer interrupt (0) to go to our simple timer routine */
+ lguest_setup_irq(0);
irq_set_handler(0, lguest_time_irq);
clocksource_register_hz(&lguest_clock, NSEC_PER_SEC);
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index a0aabd904a5..46b8136c31b 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -321,7 +321,6 @@ static void pcd_init_units(void)
strcpy(disk->disk_name, cd->name); /* umm... */
disk->fops = &pcd_bdops;
disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
- disk->events = DISK_EVENT_MEDIA_CHANGE;
}
}
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6ecf89cdf00..079c08808d8 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -6,10 +6,13 @@
#include <linux/virtio.h>
#include <linux/virtio_blk.h>
#include <linux/scatterlist.h>
+#include <linux/string_helpers.h>
+#include <scsi/scsi_cmnd.h>
#define PART_BITS 4
static int major, index;
+struct workqueue_struct *virtblk_wq;
struct virtio_blk
{
@@ -26,6 +29,9 @@ struct virtio_blk
mempool_t *pool;
+ /* Process context for config space updates */
+ struct work_struct config_work;
+
/* What host tells us, plus 2 for header & tailer. */
unsigned int sg_elems;
@@ -141,7 +147,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
- sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96);
+ sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
sizeof(vbr->in_hdr));
}
@@ -291,6 +297,46 @@ static ssize_t virtblk_serial_show(struct device *dev,
}
DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
+static void virtblk_config_changed_work(struct work_struct *work)
+{
+ struct virtio_blk *vblk =
+ container_of(work, struct virtio_blk, config_work);
+ struct virtio_device *vdev = vblk->vdev;
+ struct request_queue *q = vblk->disk->queue;
+ char cap_str_2[10], cap_str_10[10];
+ u64 capacity, size;
+
+ /* Host must always specify the capacity. */
+ vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
+ &capacity, sizeof(capacity));
+
+ /* If capacity is too big, truncate with warning. */
+ if ((sector_t)capacity != capacity) {
+ dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
+ (unsigned long long)capacity);
+ capacity = (sector_t)-1;
+ }
+
+ size = capacity * queue_logical_block_size(q);
+ string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
+ string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
+
+ dev_notice(&vdev->dev,
+ "new size: %llu %d-byte logical blocks (%s/%s)\n",
+ (unsigned long long)capacity,
+ queue_logical_block_size(q),
+ cap_str_10, cap_str_2);
+
+ set_capacity(vblk->disk, capacity);
+}
+
+static void virtblk_config_changed(struct virtio_device *vdev)
+{
+ struct virtio_blk *vblk = vdev->priv;
+
+ queue_work(virtblk_wq, &vblk->config_work);
+}
+
static int __devinit virtblk_probe(struct virtio_device *vdev)
{
struct virtio_blk *vblk;
@@ -327,6 +373,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev)
vblk->vdev = vdev;
vblk->sg_elems = sg_elems;
sg_init_table(vblk->sg, vblk->sg_elems);
+ INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
/* We expect one virtqueue, for output. */
vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests");
@@ -477,6 +524,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev)
{
struct virtio_blk *vblk = vdev->priv;
+ flush_work(&vblk->config_work);
+
/* Nothing should be pending. */
BUG_ON(!list_empty(&vblk->reqs));
@@ -508,27 +557,47 @@ static unsigned int features[] = {
* Use __refdata to avoid this warning.
*/
static struct virtio_driver __refdata virtio_blk = {
- .feature_table = features,
- .feature_table_size = ARRAY_SIZE(features),
- .driver.name = KBUILD_MODNAME,
- .driver.owner = THIS_MODULE,
- .id_table = id_table,
- .probe = virtblk_probe,
- .remove = __devexit_p(virtblk_remove),
+ .feature_table = features,
+ .feature_table_size = ARRAY_SIZE(features),
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtblk_probe,
+ .remove = __devexit_p(virtblk_remove),
+ .config_changed = virtblk_config_changed,
};
static int __init init(void)
{
+ int error;
+
+ virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
+ if (!virtblk_wq)
+ return -ENOMEM;
+
major = register_blkdev(0, "virtblk");
- if (major < 0)
- return major;
- return register_virtio_driver(&virtio_blk);
+ if (major < 0) {
+ error = major;
+ goto out_destroy_workqueue;
+ }
+
+ error = register_virtio_driver(&virtio_blk);
+ if (error)
+ goto out_unregister_blkdev;
+ return 0;
+
+out_unregister_blkdev:
+ unregister_blkdev(major, "virtblk");
+out_destroy_workqueue:
+ destroy_workqueue(virtblk_wq);
+ return error;
}
static void __exit fini(void)
{
unregister_blkdev(major, "virtblk");
unregister_virtio_driver(&virtio_blk);
+ destroy_workqueue(virtblk_wq);
}
module_init(init);
module_exit(fini);
diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c
index ae15a4ddaa9..7878da89d29 100644
--- a/drivers/cdrom/viocd.c
+++ b/drivers/cdrom/viocd.c
@@ -627,7 +627,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id)
gendisk->fops = &viocd_fops;
gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE |
GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
- gendisk->events = DISK_EVENT_MEDIA_CHANGE;
set_capacity(gendisk, 0);
gendisk->private_data = d;
d->viocd_disk = gendisk;
diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 838568a7dbf..fb68b129537 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -1677,17 +1677,12 @@ static int __devinit virtcons_probe(struct virtio_device *vdev)
portdev->config.max_nr_ports = 1;
if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) {
multiport = true;
- vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT;
-
vdev->config->get(vdev, offsetof(struct virtio_console_config,
max_nr_ports),
&portdev->config.max_nr_ports,
sizeof(portdev->config.max_nr_ports));
}
- /* Let the Host know we support multiple ports.*/
- vdev->config->finalize_features(vdev);
-
err = init_vqs(portdev);
if (err < 0) {
dev_err(&vdev->dev, "Error %d initializing vqs\n", err);
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 6e5123b1d34..144d27261e4 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -1782,7 +1782,6 @@ static int ide_cd_probe(ide_drive_t *drive)
ide_cd_read_toc(drive, &sense);
g->fops = &idecd_ops;
g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE;
- g->events = DISK_EVENT_MEDIA_CHANGE;
add_disk(g);
return 0;
diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
index b0c56313dbb..8cebec5e85e 100644
--- a/drivers/misc/kgdbts.c
+++ b/drivers/misc/kgdbts.c
@@ -304,7 +304,10 @@ static int check_and_rewind_pc(char *put_str, char *arg)
return 1;
}
/* Readjust the instruction pointer if needed */
- instruction_pointer_set(&kgdbts_regs, ip + offset);
+ ip += offset;
+#ifdef GDB_ADJUSTS_BREAK_OFFSET
+ instruction_pointer_set(&kgdbts_regs, ip);
+#endif
return 0;
}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0cb0b063267..f6853247a62 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -609,7 +609,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
* before it gets out of hand. Naturally, this wastes entries. */
if (capacity < 2+MAX_SKB_FRAGS) {
netif_stop_queue(dev);
- if (unlikely(!virtqueue_enable_cb(vi->svq))) {
+ if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) {
/* More just got used, free them then recheck. */
capacity += free_old_xmit_skbs(vi);
if (capacity >= 2+MAX_SKB_FRAGS) {
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 12e02bf92c4..3dc9befa5ae 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -698,12 +698,7 @@ int __init detect_intel_iommu(void)
{
#ifdef CONFIG_INTR_REMAP
struct acpi_table_dmar *dmar;
- /*
- * for now we will disable dma-remapping when interrupt
- * remapping is enabled.
- * When support for queued invalidation for IOTLB invalidation
- * is added, we will not need this any more.
- */
+
dmar = (struct acpi_table_dmar *) dmar_tbl;
if (ret && cpu_has_x2apic && dmar->flags & 0x1)
printk(KERN_INFO
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6af6b628175..59f17acf7f6 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -47,6 +47,8 @@
#define ROOT_SIZE VTD_PAGE_SIZE
#define CONTEXT_SIZE VTD_PAGE_SIZE
+#define IS_BRIDGE_HOST_DEVICE(pdev) \
+ ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST)
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
@@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level)
return (pfn + level_size(level) - 1) & level_mask(level);
}
+static inline unsigned long lvl_to_nr_pages(unsigned int lvl)
+{
+ return 1 << ((lvl - 1) * LEVEL_STRIDE);
+}
+
/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
are never going to work. */
static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
@@ -143,6 +150,12 @@ static void __init check_tylersburg_isoch(void);
static int rwbf_quirk;
/*
+ * set to 1 to panic kernel if can't successfully enable VT-d
+ * (used when kernel is launched w/ TXT)
+ */
+static int force_on = 0;
+
+/*
* 0: Present
* 1-11: Reserved
* 12-63: Context Ptr (12 - (haw-1))
@@ -338,6 +351,9 @@ struct dmar_domain {
int iommu_coherency;/* indicate coherency of iommu access */
int iommu_snooping; /* indicate snooping control feature*/
int iommu_count; /* reference count of iommu */
+ int iommu_superpage;/* Level of superpages supported:
+ 0 == 4KiB (no superpages), 1 == 2MiB,
+ 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
spinlock_t iommu_lock; /* protect iommu set in domain */
u64 max_addr; /* maximum mapped address */
};
@@ -387,6 +403,7 @@ int dmar_disabled = 1;
static int dmar_map_gfx = 1;
static int dmar_forcedac;
static int intel_iommu_strict;
+static int intel_iommu_superpage = 1;
#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
static DEFINE_SPINLOCK(device_domain_lock);
@@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str)
printk(KERN_INFO
"Intel-IOMMU: disable batched IOTLB flush\n");
intel_iommu_strict = 1;
+ } else if (!strncmp(str, "sp_off", 6)) {
+ printk(KERN_INFO
+ "Intel-IOMMU: disable supported super page\n");
+ intel_iommu_superpage = 0;
}
str += strcspn(str, ",");
@@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain)
}
}
+static void domain_update_iommu_superpage(struct dmar_domain *domain)
+{
+ int i, mask = 0xf;
+
+ if (!intel_iommu_superpage) {
+ domain->iommu_superpage = 0;
+ return;
+ }
+
+ domain->iommu_superpage = 4; /* 1TiB */
+
+ for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) {
+ mask |= cap_super_page_val(g_iommus[i]->cap);
+ if (!mask) {
+ break;
+ }
+ }
+ domain->iommu_superpage = fls(mask);
+}
+
/* Some capabilities may be different across iommus */
static void domain_update_iommu_cap(struct dmar_domain *domain)
{
domain_update_iommu_coherency(domain);
domain_update_iommu_snooping(domain);
+ domain_update_iommu_superpage(domain);
}
static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
@@ -689,23 +731,31 @@ out:
}
static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
- unsigned long pfn)
+ unsigned long pfn, int large_level)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
struct dma_pte *parent, *pte = NULL;
int level = agaw_to_level(domain->agaw);
- int offset;
+ int offset, target_level;
BUG_ON(!domain->pgd);
BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
parent = domain->pgd;
+ /* Search pte */
+ if (!large_level)
+ target_level = 1;
+ else
+ target_level = large_level;
+
while (level > 0) {
void *tmp_page;
offset = pfn_level_offset(pfn, level);
pte = &parent[offset];
- if (level == 1)
+ if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE))
+ break;
+ if (level == target_level)
break;
if (!dma_pte_present(pte)) {
@@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
return pte;
}
+
/* return address's pte at specific level */
static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
unsigned long pfn,
- int level)
+ int level, int *large_page)
{
struct dma_pte *parent, *pte = NULL;
int total = agaw_to_level(domain->agaw);
@@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
if (level == total)
return pte;
- if (!dma_pte_present(pte))
+ if (!dma_pte_present(pte)) {
+ *large_page = total;
break;
+ }
+
+ if (pte->val & DMA_PTE_LARGE_PAGE) {
+ *large_page = total;
+ return pte;
+ }
+
parent = phys_to_virt(dma_pte_addr(pte));
total--;
}
@@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
unsigned long last_pfn)
{
int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
+ unsigned int large_page = 1;
struct dma_pte *first_pte, *pte;
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
@@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain,
/* we don't need lock here; nobody else touches the iova range */
do {
- first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
+ large_page = 1;
+ first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page);
if (!pte) {
- start_pfn = align_to_level(start_pfn + 1, 2);
+ start_pfn = align_to_level(start_pfn + 1, large_page + 1);
continue;
}
- do {
+ do {
dma_clear_pte(pte);
- start_pfn++;
+ start_pfn += lvl_to_nr_pages(large_page);
pte++;
} while (start_pfn <= last_pfn && !first_pte_in_page(pte));
@@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
int total = agaw_to_level(domain->agaw);
int level;
unsigned long tmp;
+ int large_page = 2;
BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
@@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
return;
do {
- first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
+ large_page = level;
+ first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
+ if (large_page > level)
+ level = large_page + 1;
if (!pte) {
tmp = align_to_level(tmp + 1, level + 1);
continue;
@@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width)
else
domain->iommu_snooping = 0;
+ domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
domain->iommu_count = 1;
domain->nid = iommu->node;
@@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain)
if (!domain