aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_execbuffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_execbuffer.c391
1 files changed, 82 insertions, 309 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 8dd9a6f47db..3eea143749f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -33,180 +33,6 @@
#include "intel_drv.h"
#include <linux/dma_remapping.h>
-struct change_domains {
- uint32_t invalidate_domains;
- uint32_t flush_domains;
- uint32_t flush_rings;
- uint32_t flips;
-};
-
-/*
- * Set the next domain for the specified object. This
- * may not actually perform the necessary flushing/invaliding though,
- * as that may want to be batched with other set_domain operations
- *
- * This is (we hope) the only really tricky part of gem. The goal
- * is fairly simple -- track which caches hold bits of the object
- * and make sure they remain coherent. A few concrete examples may
- * help to explain how it works. For shorthand, we use the notation
- * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
- * a pair of read and write domain masks.
- *
- * Case 1: the batch buffer
- *
- * 1. Allocated
- * 2. Written by CPU
- * 3. Mapped to GTT
- * 4. Read by GPU
- * 5. Unmapped from GTT
- * 6. Freed
- *
- * Let's take these a step at a time
- *
- * 1. Allocated
- * Pages allocated from the kernel may still have
- * cache contents, so we set them to (CPU, CPU) always.
- * 2. Written by CPU (using pwrite)
- * The pwrite function calls set_domain (CPU, CPU) and
- * this function does nothing (as nothing changes)
- * 3. Mapped by GTT
- * This function asserts that the object is not
- * currently in any GPU-based read or write domains
- * 4. Read by GPU
- * i915_gem_execbuffer calls set_domain (COMMAND, 0).
- * As write_domain is zero, this function adds in the
- * current read domains (CPU+COMMAND, 0).
- * flush_domains is set to CPU.
- * invalidate_domains is set to COMMAND
- * clflush is run to get data out of the CPU caches
- * then i915_dev_set_domain calls i915_gem_flush to
- * emit an MI_FLUSH and drm_agp_chipset_flush
- * 5. Unmapped from GTT
- * i915_gem_object_unbind calls set_domain (CPU, CPU)
- * flush_domains and invalidate_domains end up both zero
- * so no flushing/invalidating happens
- * 6. Freed
- * yay, done
- *
- * Case 2: The shared render buffer
- *
- * 1. Allocated
- * 2. Mapped to GTT
- * 3. Read/written by GPU
- * 4. set_domain to (CPU,CPU)
- * 5. Read/written by CPU
- * 6. Read/written by GPU
- *
- * 1. Allocated
- * Same as last example, (CPU, CPU)
- * 2. Mapped to GTT
- * Nothing changes (assertions find that it is not in the GPU)
- * 3. Read/written by GPU
- * execbuffer calls set_domain (RENDER, RENDER)
- * flush_domains gets CPU
- * invalidate_domains gets GPU
- * clflush (obj)
- * MI_FLUSH and drm_agp_chipset_flush
- * 4. set_domain (CPU, CPU)
- * flush_domains gets GPU
- * invalidate_domains gets CPU
- * wait_rendering (obj) to make sure all drawing is complete.
- * This will include an MI_FLUSH to get the data from GPU
- * to memory
- * clflush (obj) to invalidate the CPU cache
- * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
- * 5. Read/written by CPU
- * cache lines are loaded and dirtied
- * 6. Read written by GPU
- * Same as last GPU access
- *
- * Case 3: The constant buffer
- *
- * 1. Allocated
- * 2. Written by CPU
- * 3. Read by GPU
- * 4. Updated (written) by CPU again
- * 5. Read by GPU
- *
- * 1. Allocated
- * (CPU, CPU)
- * 2. Written by CPU
- * (CPU, CPU)
- * 3. Read by GPU
- * (CPU+RENDER, 0)
- * flush_domains = CPU
- * invalidate_domains = RENDER
- * clflush (obj)
- * MI_FLUSH
- * drm_agp_chipset_flush
- * 4. Updated (written) by CPU again
- * (CPU, CPU)
- * flush_domains = 0 (no previous write domain)
- * invalidate_domains = 0 (no new read domains)
- * 5. Read by GPU
- * (CPU+RENDER, 0)
- * flush_domains = CPU
- * invalidate_domains = RENDER
- * clflush (obj)
- * MI_FLUSH
- * drm_agp_chipset_flush
- */
-static void
-i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj,
- struct intel_ring_buffer *ring,
- struct change_domains *cd)
-{
- uint32_t invalidate_domains = 0, flush_domains = 0;
-
- /*
- * If the object isn't moving to a new write domain,
- * let the object stay in multiple read domains
- */
- if (obj->base.pending_write_domain == 0)
- obj->base.pending_read_domains |= obj->base.read_domains;
-
- /*
- * Flush the current write domain if
- * the new read domains don't match. Invalidate
- * any read domains which differ from the old
- * write domain
- */
- if (obj->base.write_domain &&
- (((obj->base.write_domain != obj->base.pending_read_domains ||
- obj->ring != ring)) ||
- (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) {
- flush_domains |= obj->base.write_domain;
- invalidate_domains |=
- obj->base.pending_read_domains & ~obj->base.write_domain;
- }
- /*
- * Invalidate any read caches which may have
- * stale data. That is, any new read domains.
- */
- invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains;
- if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU)
- i915_gem_clflush_object(obj);
-
- if (obj->base.pending_write_domain)
- cd->flips |= atomic_read(&obj->pending_flip);
-
- /* The actual obj->write_domain will be updated with
- * pending_write_domain after we emit the accumulated flush for all
- * of our domain changes in execbuffers (which clears objects'
- * write_domains). So if we have a current write domain that we
- * aren't changing, set pending_write_domain to that.
- */
- if (flush_domains == 0 && obj->base.pending_write_domain == 0)
- obj->base.pending_write_domain = obj->base.write_domain;
-
- cd->invalidate_domains |= invalidate_domains;
- cd->flush_domains |= flush_domains;
- if (flush_domains & I915_GEM_GPU_DOMAINS)
- cd->flush_rings |= intel_ring_flag(obj->ring);
- if (invalidate_domains & I915_GEM_GPU_DOMAINS)
- cd->flush_rings |= intel_ring_flag(ring);
-}
-
struct eb_objects {
int and;
struct hlist_head buckets[0];
@@ -217,6 +43,7 @@ eb_create(int size)
{
struct eb_objects *eb;
int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
+ BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
while (count > size)
count >>= 1;
eb = kzalloc(count*sizeof(struct hlist_head) +
@@ -268,6 +95,7 @@ eb_destroy(struct eb_objects *eb)
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
{
return (obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
+ !obj->map_and_fenceable ||
obj->cache_level != I915_CACHE_NONE);
}
@@ -382,7 +210,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
if (ret)
return ret;
- vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
+ vaddr = kmap_atomic(i915_gem_object_get_page(obj,
+ reloc->offset >> PAGE_SHIFT));
*(uint32_t *)(vaddr + page_offset) = reloc->delta;
kunmap_atomic(vaddr);
} else {
@@ -503,7 +332,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
return ret;
}
-#define __EXEC_OBJECT_HAS_FENCE (1<<31)
+#define __EXEC_OBJECT_HAS_PIN (1<<31)
+#define __EXEC_OBJECT_HAS_FENCE (1<<30)
static int
need_reloc_mappable(struct drm_i915_gem_object *obj)
@@ -513,9 +343,10 @@ need_reloc_mappable(struct drm_i915_gem_object *obj)
}
static int
-pin_and_fence_object(struct drm_i915_gem_object *obj,
- struct intel_ring_buffer *ring)
+i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
+ struct intel_ring_buffer *ring)
{
+ struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
bool need_fence, need_mappable;
@@ -527,15 +358,17 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(obj);
- ret = i915_gem_object_pin(obj, entry->alignment, need_mappable);
+ ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false);
if (ret)
return ret;
+ entry->flags |= __EXEC_OBJECT_HAS_PIN;
+
if (has_fenced_gpu_access) {
if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
ret = i915_gem_object_get_fence(obj);
if (ret)
- goto err_unpin;
+ return ret;
if (i915_gem_object_pin_fence(obj))
entry->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -544,12 +377,35 @@ pin_and_fence_object(struct drm_i915_gem_object *obj,
}
}
+ /* Ensure ppgtt mapping exists if needed */
+ if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
+ i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
+ obj, obj->cache_level);
+
+ obj->has_aliasing_ppgtt_mapping = 1;
+ }
+
entry->offset = obj->gtt_offset;
return 0;
+}
-err_unpin:
- i915_gem_object_unpin(obj);
- return ret;
+static void
+i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_gem_exec_object2 *entry;
+
+ if (!obj->gtt_space)
+ return;
+
+ entry = obj->exec_entry;
+
+ if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
+ i915_gem_object_unpin_fence(obj);
+
+ if (entry->flags & __EXEC_OBJECT_HAS_PIN)
+ i915_gem_object_unpin(obj);
+
+ entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
}
static int
@@ -557,11 +413,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct drm_file *file,
struct list_head *objects)
{
- drm_i915_private_t *dev_priv = ring->dev->dev_private;
struct drm_i915_gem_object *obj;
- int ret, retry;
- bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
struct list_head ordered_objects;
+ bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
+ int retry;
INIT_LIST_HEAD(&ordered_objects);
while (!list_empty(objects)) {
@@ -586,6 +441,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
obj->base.pending_read_domains = 0;
obj->base.pending_write_domain = 0;
+ obj->pending_fenced_gpu_access = false;
}
list_splice(&ordered_objects, objects);
@@ -598,12 +454,12 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
* 2. Bind new objects.
* 3. Decrement pin count.
*
- * This avoid unnecessary unbinding of later objects in order to makr
+ * This avoid unnecessary unbinding of later objects in order to make
* room for the earlier objects *unless* we need to defragment.
*/
retry = 0;
do {
- ret = 0;
+ int ret = 0;
/* Unbind any ill-fitting objects or pin. */
list_for_each_entry(obj, objects, exec_list) {
@@ -623,7 +479,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
(need_mappable && !obj->map_and_fenceable))
ret = i915_gem_object_unbind(obj);
else
- ret = pin_and_fence_object(obj, ring);
+ ret = i915_gem_execbuffer_reserve_object(obj, ring);
if (ret)
goto err;
}
@@ -633,77 +489,22 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
if (obj->gtt_space)
continue;
- ret = pin_and_fence_object(obj, ring);
- if (ret) {
- int ret_ignore;
-
- /* This can potentially raise a harmless
- * -EINVAL if we failed to bind in the above
- * call. It cannot raise -EINTR since we know
- * that the bo is freshly bound and so will
- * not need to be flushed or waited upon.
- */
- ret_ignore = i915_gem_object_unbind(obj);
- (void)ret_ignore;
- WARN_ON(obj->gtt_space);
- break;
- }
+ ret = i915_gem_execbuffer_reserve_object(obj, ring);
+ if (ret)
+ goto err;
}
- /* Decrement pin count for bound objects */
- list_for_each_entry(obj, objects, exec_list) {
- struct drm_i915_gem_exec_object2 *entry;
-
- if (!obj->gtt_space)
- continue;
-
- entry = obj->exec_entry;
- if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
- i915_gem_object_unpin_fence(obj);
- entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
- }
-
- i915_gem_object_unpin(obj);
-
- /* ... and ensure ppgtt mapping exist if needed. */
- if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
- i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
- obj, obj->cache_level);
+err: /* Decrement pin count for bound objects */
+ list_for_each_entry(obj, objects, exec_list)
+ i915_gem_execbuffer_unreserve_object(obj);
- obj->has_aliasing_ppgtt_mapping = 1;
- }
- }
-
- if (ret != -ENOSPC || retry > 1)
+ if (ret != -ENOSPC || retry++)
return ret;
- /* First attempt, just clear anything that is purgeable.
- * Second attempt, clear the entire GTT.
- */
- ret = i915_gem_evict_everything(ring->dev, retry == 0);
+ ret = i915_gem_evict_everything(ring->dev);
if (ret)
return ret;
-
- retry++;
} while (1);
-
-err:
- list_for_each_entry_continue_reverse(obj, objects, exec_list) {
- struct drm_i915_gem_exec_object2 *entry;
-
- if (!obj->gtt_space)
- continue;
-
- entry = obj->exec_entry;
- if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
- i915_gem_object_unpin_fence(obj);
- entry->flags &= ~__EXEC_OBJECT_HAS_FENCE;
- }
-
- i915_gem_object_unpin(obj);
- }
-
- return ret;
}
static int
@@ -809,18 +610,6 @@ err:
return ret;
}
-static void
-i915_gem_execbuffer_flush(struct drm_device *dev,
- uint32_t invalidate_domains,
- uint32_t flush_domains)
-{
- if (flush_domains & I915_GEM_DOMAIN_CPU)
- intel_gtt_chipset_flush();
-
- if (flush_domains & I915_GEM_DOMAIN_GTT)
- wmb();
-}
-
static int
i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
{
@@ -853,48 +642,45 @@ i915_gem_execbuffer_wait_for_flips(struct intel_ring_buffer *ring, u32 flips)
return 0;
}
-
static int
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
struct list_head *objects)
{
struct drm_i915_gem_object *obj;
- struct change_domains cd;
+ uint32_t flush_domains = 0;
+ uint32_t flips = 0;
int ret;
- memset(&cd, 0, sizeof(cd));
- list_for_each_entry(obj, objects, exec_list)
- i915_gem_object_set_to_gpu_domain(obj, ring, &cd);
-
- if (cd.invalidate_domains | cd.flush_domains) {
- i915_gem_execbuffer_flush(ring->dev,
- cd.invalidate_domains,
- cd.flush_domains);
- }
-
- if (cd.flips) {
- ret = i915_gem_execbuffer_wait_for_flips(ring, cd.flips);
+ list_for_each_entry(obj, objects, exec_list) {
+ ret = i915_gem_object_sync(obj, ring);
if (ret)
return ret;
+
+ if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
+ i915_gem_clflush_object(obj);
+
+ if (obj->base.pending_write_domain)
+ flips |= atomic_read(&obj->pending_flip);
+
+ flush_domains |= obj->base.write_domain;
}
- list_for_each_entry(obj, objects, exec_list) {
- ret = i915_gem_object_sync(obj, ring);
+ if (flips) {
+ ret = i915_gem_execbuffer_wait_for_flips(ring, flips);
if (ret)
return ret;
}
+ if (flush_domains & I915_GEM_DOMAIN_CPU)
+ intel_gtt_chipset_flush();
+
+ if (flush_domains & I915_GEM_DOMAIN_GTT)
+ wmb();
+
/* Unconditionally invalidate gpu caches and ensure that we do flush
* any residual writes from the previous batch.
*/
- ret = i915_gem_flush_ring(ring,
- I915_GEM_GPU_DOMAINS,
- ring->gpu_caches_dirty ? I915_GEM_GPU_DOMAINS : 0);
- if (ret)
- return ret;
-
- ring->gpu_caches_dirty = false;
- return 0;
+ return intel_ring_invalidate_all_caches(ring);
}
static bool
@@ -942,9 +728,8 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
struct drm_i915_gem_object *obj;
list_for_each_entry(obj, objects, exec_list) {
- u32 old_read = obj->base.read_domains;
- u32 old_write = obj->base.write_domain;
-
+ u32 old_read = obj->base.read_domains;
+ u32 old_write = obj->base.write_domain;
obj->base.read_domains = obj->base.pending_read_domains;
obj->base.write_domain = obj->base.pending_write_domain;
@@ -953,17 +738,13 @@ i915_gem_execbuffer_move_to_active(struct list_head *objects,
i915_gem_object_move_to_active(obj, ring, seqno);
if (obj->base.write_domain) {
obj->dirty = 1;
- obj->pending_gpu_write = true;
- list_move_tail(&obj->gpu_write_list,
- &ring->gpu_write_list);
+ obj->last_write_seqno = seqno;
if (obj->pin_count) /* check for potential scanout */
- intel_mark_busy(ring->dev, obj);
+ intel_mark_fb_busy(obj);
}
trace_i915_gem_object_change_domain(obj, old_read, old_write);
}
-
- intel_mark_busy(ring->dev, NULL);
}
static void
@@ -971,16 +752,11 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
struct drm_file *file,
struct intel_ring_buffer *ring)
{
- struct drm_i915_gem_request *request;
-
/* Unconditionally force add_request to emit a full flush. */
ring->gpu_caches_dirty = true;
/* Add a breadcrumb for the completion of the batch buffer */
- request = kzalloc(sizeof(*request), GFP_KERNEL);
- if (request == NULL || i915_add_request(ring, file, request)) {
- kfree(request);
- }
+ (void)i915_add_request(ring, file, NULL);
}
static int
@@ -1326,8 +1102,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
return -ENOMEM;
}
ret = copy_from_user(exec_list,
- (struct drm_i915_relocation_entry __user *)
- (uintptr_t) args->buffers_ptr,
+ (void __user *)(uintptr_t)args->buffers_ptr,
sizeof(*exec_list) * args->buffer_count);
if (ret != 0) {
DRM_DEBUG("copy %d exec entries failed %d\n",
@@ -1366,8 +1141,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
for (i = 0; i < args->buffer_count; i++)
exec_list[i].offset = exec2_list[i].offset;
/* ... and back out to userspace */
- ret = copy_to_user((struct drm_i915_relocation_entry __user *)
- (uintptr_t) args->buffers_ptr,
+ ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
exec_list,
sizeof(*exec_list) * args->buffer_count);
if (ret) {
@@ -1421,8 +1195,7 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
if (!ret) {
/* Copy the new buffer offsets back to the user's exec list. */
- ret = copy_to_user((struct drm_i915_relocation_entry __user *)
- (uintptr_t) args->buffers_ptr,
+ ret = copy_to_user((void __user *)(uintptr_t)args->buffers_ptr,
exec2_list,
sizeof(*exec2_list) * args->buffer_count);
if (ret) {