aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu
diff options
context:
space:
mode:
authorMarek Olšák <maraeo@gmail.com>2011-08-13 20:32:11 +0000
committerDave Airlie <airlied@redhat.com>2011-08-31 19:25:35 +0100
commitdfadbbdb57b3f2bb33e14f129a43047c6f0caefa (patch)
tree0a6e00b5fa56d9f5a5b4dc833e3057e4660efd1c /drivers/gpu
parentb464e9a25c27884eb8ee2c2bb904ec50bd3990ea (diff)
drm/ttm: add a way to bo_wait for either the last read or last write
Sometimes we want to know whether a buffer is busy and wait for it (bo_wait). However, sometimes it would be more useful to be able to query whether a buffer is busy and being either read or written, and wait until it's stopped being either read or written. The point of this is to be able to avoid unnecessary waiting, e.g. if a GPU has written something to a buffer and is now reading that buffer, and a CPU wants to map that buffer for read, it needs to only wait for the last write. If there were no write, there wouldn't be any waiting needed. This, or course, requires user space drivers to send read/write flags with each relocation (like we have read/write domains in radeon, so we can actually use those for something useful now). Now how this patch works: The read/write flags should passed to ttm_validate_buffer. TTM maintains separate sync objects of the last read and write for each buffer, in addition to the sync object of the last use of a buffer. ttm_bo_wait then operates with one the sync objects. Signed-off-by: Marek Olšák <maraeo@gmail.com> Reviewed-by: Jerome Glisse <jglisse@redhat.com> Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu')
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_bo.c3
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_gem.c5
-rw-r--r--drivers/gpu/drm/radeon/radeon_cs.c1
-rw-r--r--drivers/gpu/drm/radeon/radeon_object.h2
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c97
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_util.c26
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c2
-rw-r--r--drivers/gpu/drm/ttm/ttm_execbuf_util.c19
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c1
9 files changed, 119 insertions, 37 deletions
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 890d50e4d68..e87e24b9c0a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1104,7 +1104,8 @@ nouveau_bo_vma_del(struct nouveau_bo *nvbo, struct nouveau_vma *vma)
if (vma->node) {
if (nvbo->bo.mem.mem_type != TTM_PL_SYSTEM) {
spin_lock(&nvbo->bo.bdev->fence_lock);
- ttm_bo_wait(&nvbo->bo, false, false, false);
+ ttm_bo_wait(&nvbo->bo, false, false, false,
+ TTM_USAGE_READWRITE);
spin_unlock(&nvbo->bo.bdev->fence_lock);
nouveau_vm_unmap(vma);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 5f0bc57fdaa..322bf62a064 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -589,7 +589,8 @@ nouveau_gem_pushbuf_reloc_apply(struct drm_device *dev,
}
spin_lock(&nvbo->bo.bdev->fence_lock);
- ret = ttm_bo_wait(&nvbo->bo, false, false, false);
+ ret = ttm_bo_wait(&nvbo->bo, false, false, false,
+ TTM_USAGE_READWRITE);
spin_unlock(&nvbo->bo.bdev->fence_lock);
if (ret) {
NV_ERROR(dev, "reloc wait_idle failed: %d\n", ret);
@@ -825,7 +826,7 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
nvbo = nouveau_gem_object(gem);
spin_lock(&nvbo->bo.bdev->fence_lock);
- ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait);
+ ret = ttm_bo_wait(&nvbo->bo, true, true, no_wait, TTM_USAGE_READWRITE);
spin_unlock(&nvbo->bo.bdev->fence_lock);
drm_gem_object_unreference_unlocked(gem);
return ret;
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index fae00c0d75a..14e85315110 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -80,6 +80,7 @@ int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
p->relocs[i].lobj.wdomain = r->write_domain;
p->relocs[i].lobj.rdomain = r->read_domains;
p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
+ p->relocs[i].lobj.tv.usage = TTM_USAGE_READWRITE;
p->relocs[i].handle = r->handle;
p->relocs[i].flags = r->flags;
radeon_bo_list_add_object(&p->relocs[i].lobj,
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index ede6c13628f..e9dc8b249c5 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -130,7 +130,7 @@ static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
if (mem_type)
*mem_type = bo->tbo.mem.mem_type;
if (bo->tbo.sync_obj)
- r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
+ r = ttm_bo_wait(&bo->tbo, true, true, no_wait, TTM_USAGE_READWRITE);
spin_unlock(&bo->tbo.bdev->fence_lock);
ttm_bo_unreserve(&bo->tbo);
return r;
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index a4d38d85909..b824d9bdd87 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -498,7 +498,7 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
int ret;
spin_lock(&bdev->fence_lock);
- (void) ttm_bo_wait(bo, false, false, true);
+ (void) ttm_bo_wait(bo, false, false, true, TTM_USAGE_READWRITE);
if (!bo->sync_obj) {
spin_lock(&glob->lru_lock);
@@ -566,7 +566,8 @@ static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo,
retry:
spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
+ ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
+ TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0))
@@ -725,7 +726,8 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
int ret = 0;
spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
+ ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
+ TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) {
@@ -1072,7 +1074,8 @@ int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
* instead of doing it here.
*/
spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu);
+ ret = ttm_bo_wait(bo, false, interruptible, no_wait_gpu,
+ TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (ret)
return ret;
@@ -1692,34 +1695,83 @@ out_unlock:
return ret;
}
+static void ttm_bo_unref_sync_obj_locked(struct ttm_buffer_object *bo,
+ void *sync_obj,
+ void **extra_sync_obj)
+{
+ struct ttm_bo_device *bdev = bo->bdev;
+ struct ttm_bo_driver *driver = bdev->driver;
+ void *tmp_obj = NULL, *tmp_obj_read = NULL, *tmp_obj_write = NULL;
+
+ /* We must unref the sync obj wherever it's ref'd.
+ * Note that if we unref bo->sync_obj, we can unref both the read
+ * and write sync objs too, because they can't be newer than
+ * bo->sync_obj, so they are no longer relevant. */
+ if (sync_obj == bo->sync_obj ||
+ sync_obj == bo->sync_obj_read) {
+ tmp_obj_read = bo->sync_obj_read;
+ bo->sync_obj_read = NULL;
+ }
+ if (sync_obj == bo->sync_obj ||
+ sync_obj == bo->sync_obj_write) {
+ tmp_obj_write = bo->sync_obj_write;
+ bo->sync_obj_write = NULL;
+ }
+ if (sync_obj == bo->sync_obj) {
+ tmp_obj = bo->sync_obj;
+ bo->sync_obj = NULL;
+ }
+
+ clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+ spin_unlock(&bdev->fence_lock);
+ if (tmp_obj)
+ driver->sync_obj_unref(&tmp_obj);
+ if (tmp_obj_read)
+ driver->sync_obj_unref(&tmp_obj_read);
+ if (tmp_obj_write)
+ driver->sync_obj_unref(&tmp_obj_write);
+ if (extra_sync_obj)
+ driver->sync_obj_unref(extra_sync_obj);
+ spin_lock(&bdev->fence_lock);
+}
+
int ttm_bo_wait(struct ttm_buffer_object *bo,
- bool lazy, bool interruptible, bool no_wait)
+ bool lazy, bool interruptible, bool no_wait,
+ enum ttm_buffer_usage usage)
{
struct ttm_bo_driver *driver = bo->bdev->driver;
struct ttm_bo_device *bdev = bo->bdev;
void *sync_obj;
void *sync_obj_arg;
int ret = 0;
+ void **bo_sync_obj;
- if (likely(bo->sync_obj == NULL))
+ switch (usage) {
+ case TTM_USAGE_READ:
+ bo_sync_obj = &bo->sync_obj_read;
+ break;
+ case TTM_USAGE_WRITE:
+ bo_sync_obj = &bo->sync_obj_write;
+ break;
+ case TTM_USAGE_READWRITE:
+ default:
+ bo_sync_obj = &bo->sync_obj;
+ }
+
+ if (likely(*bo_sync_obj == NULL))
return 0;
- while (bo->sync_obj) {
+ while (*bo_sync_obj) {
- if (driver->sync_obj_signaled(bo->sync_obj, bo->sync_obj_arg)) {
- void *tmp_obj = bo->sync_obj;
- bo->sync_obj = NULL;
- clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
- spin_unlock(&bdev->fence_lock);
- driver->sync_obj_unref(&tmp_obj);
- spin_lock(&bdev->fence_lock);
+ if (driver->sync_obj_signaled(*bo_sync_obj, bo->sync_obj_arg)) {
+ ttm_bo_unref_sync_obj_locked(bo, *bo_sync_obj, NULL);
continue;
}
if (no_wait)
return -EBUSY;
- sync_obj = driver->sync_obj_ref(bo->sync_obj);
+ sync_obj = driver->sync_obj_ref(*bo_sync_obj);
sync_obj_arg = bo->sync_obj_arg;
spin_unlock(&bdev->fence_lock);
ret = driver->sync_obj_wait(sync_obj, sync_obj_arg,
@@ -1730,16 +1782,9 @@ int ttm_bo_wait(struct ttm_buffer_object *bo,
return ret;
}
spin_lock(&bdev->fence_lock);
- if (likely(bo->sync_obj == sync_obj &&
+ if (likely(*bo_sync_obj == sync_obj &&
bo->sync_obj_arg == sync_obj_arg)) {
- void *tmp_obj = bo->sync_obj;
- bo->sync_obj = NULL;
- clear_bit(TTM_BO_PRIV_FLAG_MOVING,
- &bo->priv_flags);
- spin_unlock(&bdev->fence_lock);
- driver->sync_obj_unref(&sync_obj);
- driver->sync_obj_unref(&tmp_obj);
- spin_lock(&bdev->fence_lock);
+ ttm_bo_unref_sync_obj_locked(bo, *bo_sync_obj, &sync_obj);
} else {
spin_unlock(&bdev->fence_lock);
driver->sync_obj_unref(&sync_obj);
@@ -1763,7 +1808,7 @@ int ttm_bo_synccpu_write_grab(struct ttm_buffer_object *bo, bool no_wait)
if (unlikely(ret != 0))
return ret;
spin_lock(&bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, true, no_wait);
+ ret = ttm_bo_wait(bo, false, true, no_wait, TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (likely(ret == 0))
atomic_inc(&bo->cpu_writers);
@@ -1837,7 +1882,7 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
*/
spin_lock(&bo->bdev->fence_lock);
- ret = ttm_bo_wait(bo, false, false, false);
+ ret = ttm_bo_wait(bo, false, false, false, TTM_USAGE_READWRITE);
spin_unlock(&bo->bdev->fence_lock);
if (unlikely(ret != 0))
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index ae3c6f5dd2b..6135f58169c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -436,6 +436,8 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
atomic_set(&fbo->cpu_writers, 0);
fbo->sync_obj = driver->sync_obj_ref(bo->sync_obj);
+ fbo->sync_obj_read = driver->sync_obj_ref(bo->sync_obj_read);
+ fbo->sync_obj_write = driver->sync_obj_ref(bo->sync_obj_write);
kref_init(&fbo->list_kref);
kref_init(&fbo->kref);
fbo->destroy = &ttm_transfered_destroy;
@@ -618,20 +620,30 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
struct ttm_mem_reg *old_mem = &bo->mem;
int ret;
struct ttm_buffer_object *ghost_obj;
- void *tmp_obj = NULL;
+ void *tmp_obj = NULL, *tmp_obj_read = NULL, *tmp_obj_write = NULL;
spin_lock(&bdev->fence_lock);
- if (bo->sync_obj) {
+ if (bo->sync_obj)
tmp_obj = bo->sync_obj;
- bo->sync_obj = NULL;
- }
+ if (bo->sync_obj_read)
+ tmp_obj_read = bo->sync_obj_read;
+ if (bo->sync_obj_write)
+ tmp_obj_write = bo->sync_obj_write;
+
bo->sync_obj = driver->sync_obj_ref(sync_obj);
+ bo->sync_obj_read = driver->sync_obj_ref(sync_obj);
+ bo->sync_obj_write = driver->sync_obj_ref(sync_obj);
bo->sync_obj_arg = sync_obj_arg;
if (evict) {
- ret = ttm_bo_wait(bo, false, false, false);
+ ret = ttm_bo_wait(bo, false, false, false,
+ TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
+ if (tmp_obj_read)
+ driver->sync_obj_unref(&tmp_obj_read);
+ if (tmp_obj_write)
+ driver->sync_obj_unref(&tmp_obj_write);
if (ret)
return ret;
@@ -655,6 +667,10 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
spin_unlock(&bdev->fence_lock);
if (tmp_obj)
driver->sync_obj_unref(&tmp_obj);
+ if (tmp_obj_read)
+ driver->sync_obj_unref(&tmp_obj_read);
+ if (tmp_obj_write)
+ driver->sync_obj_unref(&tmp_obj_write);
ret = ttm_buffer_object_transfer(bo, &ghost_obj);
if (ret)
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 221b924aceb..ff1e26f4b09 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -122,7 +122,7 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
spin_lock(&bdev->fence_lock);
if (test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)) {
- ret = ttm_bo_wait(bo, false, true, false);
+ ret = ttm_bo_wait(bo, false, true, false, TTM_USAGE_READWRITE);
spin_unlock(&bdev->fence_lock);
if (unlikely(ret != 0)) {
retval = (ret != -ERESTARTSYS) ?
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index 3832fe10b4d..36d111a8823 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -221,8 +221,18 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
list_for_each_entry(entry, list, head) {
bo = entry->bo;
+ entry->old_sync_obj_read = NULL;
+ entry->old_sync_obj_write = NULL;
entry->old_sync_obj = bo->sync_obj;
bo->sync_obj = driver->sync_obj_ref(sync_obj);
+ if (entry->usage & TTM_USAGE_READ) {
+ entry->old_sync_obj_read = bo->sync_obj_read;
+ bo->sync_obj_read = driver->sync_obj_ref(sync_obj);
+ }
+ if (entry->usage & TTM_USAGE_WRITE) {
+ entry->old_sync_obj_write = bo->sync_obj_write;
+ bo->sync_obj_write = driver->sync_obj_ref(sync_obj);
+ }
bo->sync_obj_arg = entry->new_sync_obj_arg;
ttm_bo_unreserve_locked(bo);
entry->reserved = false;
@@ -231,8 +241,15 @@ void ttm_eu_fence_buffer_objects(struct list_head *list, void *sync_obj)
spin_unlock(&bdev->fence_lock);
list_for_each_entry(entry, list, head) {
- if (entry->old_sync_obj)
+ if (entry->old_sync_obj) {
driver->sync_obj_unref(&entry->old_sync_obj);
+ }
+ if (entry->old_sync_obj_read) {
+ driver->sync_obj_unref(&entry->old_sync_obj_read);
+ }
+ if (entry->old_sync_obj_write) {
+ driver->sync_obj_unref(&entry->old_sync_obj_write);
+ }
}
}
EXPORT_SYMBOL(ttm_eu_fence_buffer_objects);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 41b95ed6dbc..8ca3ddb2ebc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -224,6 +224,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
if (unlikely(cur_validate_node == sw_context->cur_val_buf)) {
val_buf = &sw_context->val_bufs[cur_validate_node];
val_buf->bo = ttm_bo_reference(bo);
+ val_buf->usage = TTM_USAGE_READWRITE;
val_buf->new_sync_obj_arg = (void *) dev_priv;
list_add_tail(&val_buf->head, &sw_context->validate_nodes);
++sw_context->cur_val_buf;