diff options
Diffstat (limited to 'drivers/gpu/drm/radeon/radeon_cs.c')
| -rw-r--r-- | drivers/gpu/drm/radeon/radeon_cs.c | 508 | 
1 files changed, 255 insertions, 253 deletions
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 66c22283663..ae763f60c8a 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -24,16 +24,59 @@   * Authors:   *    Jerome Glisse <glisse@freedesktop.org>   */ +#include <linux/list_sort.h>  #include <drm/drmP.h>  #include <drm/radeon_drm.h>  #include "radeon_reg.h"  #include "radeon.h"  #include "radeon_trace.h" +#define RADEON_CS_MAX_PRIORITY		32u +#define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1) + +/* This is based on the bucket sort with O(n) time complexity. + * An item with priority "i" is added to bucket[i]. The lists are then + * concatenated in descending order. + */ +struct radeon_cs_buckets { +	struct list_head bucket[RADEON_CS_NUM_BUCKETS]; +}; + +static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) +{ +	unsigned i; + +	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) +		INIT_LIST_HEAD(&b->bucket[i]); +} + +static void radeon_cs_buckets_add(struct radeon_cs_buckets *b, +				  struct list_head *item, unsigned priority) +{ +	/* Since buffers which appear sooner in the relocation list are +	 * likely to be used more often than buffers which appear later +	 * in the list, the sort mustn't change the ordering of buffers +	 * with the same priority, i.e. it must be stable. +	 */ +	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]); +} + +static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b, +				       struct list_head *out_list) +{ +	unsigned i; + +	/* Connect the sorted buckets in the output list. */ +	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) { +		list_splice(&b->bucket[i], out_list); +	} +} +  static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)  {  	struct drm_device *ddev = p->rdev->ddev;  	struct radeon_cs_chunk *chunk; +	struct radeon_cs_buckets buckets;  	unsigned i, j;  	bool duplicate; @@ -52,8 +95,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)  	if (p->relocs == NULL) {  		return -ENOMEM;  	} + +	radeon_cs_buckets_init(&buckets); +  	for (i = 0; i < p->nrelocs; i++) {  		struct drm_radeon_cs_reloc *r; +		unsigned priority;  		duplicate = false;  		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4]; @@ -78,39 +125,59 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)  		}  		p->relocs_ptr[i] = &p->relocs[i];  		p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); -		p->relocs[i].lobj.bo = p->relocs[i].robj; -		p->relocs[i].lobj.written = !!r->write_domain; + +		/* The userspace buffer priorities are from 0 to 15. A higher +		 * number means the buffer is more important. +		 * Also, the buffers used for write have a higher priority than +		 * the buffers used for read only, which doubles the range +		 * to 0 to 31. 32 is reserved for the kernel driver. +		 */ +		priority = (r->flags & 0xf) * 2 + !!r->write_domain;  		/* the first reloc of an UVD job is the msg and that must be in  		   VRAM, also but everything into VRAM on AGP cards to avoid  		   image corruptions */  		if (p->ring == R600_RING_TYPE_UVD_INDEX && -		    p->rdev->family < CHIP_PALM &&  		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) { - -			p->relocs[i].lobj.domain = +			/* TODO: is this still needed for NI+ ? */ +			p->relocs[i].prefered_domains =  				RADEON_GEM_DOMAIN_VRAM; -			p->relocs[i].lobj.alt_domain = +			p->relocs[i].allowed_domains =  				RADEON_GEM_DOMAIN_VRAM; +			/* prioritize this over any other relocation */ +			priority = RADEON_CS_MAX_PRIORITY;  		} else {  			uint32_t domain = r->write_domain ?  				r->write_domain : r->read_domains; -			p->relocs[i].lobj.domain = domain; +			if (domain & RADEON_GEM_DOMAIN_CPU) { +				DRM_ERROR("RADEON_GEM_DOMAIN_CPU is not valid " +					  "for command submission\n"); +				return -EINVAL; +			} + +			p->relocs[i].prefered_domains = domain;  			if (domain == RADEON_GEM_DOMAIN_VRAM)  				domain |= RADEON_GEM_DOMAIN_GTT; -			p->relocs[i].lobj.alt_domain = domain; +			p->relocs[i].allowed_domains = domain;  		} -		p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; +		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;  		p->relocs[i].handle = r->handle; -		radeon_bo_list_add_object(&p->relocs[i].lobj, -					  &p->validated); +		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head, +				      priority);  	} -	return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); + +	radeon_cs_buckets_get_list(&buckets, &p->validated); + +	if (p->cs_flags & RADEON_CS_USE_VM) +		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm, +					      &p->validated); + +	return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);  }  static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority) @@ -139,7 +206,7 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority  				p->ring = R600_RING_TYPE_DMA_INDEX;  			else  				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX; -		} else if (p->rdev->family >= CHIP_R600) { +		} else if (p->rdev->family >= CHIP_RV770) {  			p->ring = R600_RING_TYPE_DMA_INDEX;  		} else {  			return -EINVAL; @@ -148,6 +215,10 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority  	case RADEON_CS_RING_UVD:  		p->ring = R600_RING_TYPE_UVD_INDEX;  		break; +	case RADEON_CS_RING_VCE: +		/* TODO: only use the low priority ring for now */ +		p->ring = TN_RING_TYPE_VCE1_INDEX; +		break;  	}  	return 0;  } @@ -160,7 +231,8 @@ static void radeon_cs_sync_rings(struct radeon_cs_parser *p)  		if (!p->relocs[i].robj)  			continue; -		radeon_ib_sync_to(&p->ib, p->relocs[i].robj->tbo.sync_obj); +		radeon_semaphore_sync_to(p->ib.semaphore, +					 p->relocs[i].robj->tbo.sync_obj);  	}  } @@ -192,7 +264,7 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)  		return -ENOMEM;  	}  	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks); -	if (DRM_COPY_FROM_USER(p->chunks_array, chunk_array_ptr, +	if (copy_from_user(p->chunks_array, chunk_array_ptr,  			       sizeof(uint64_t)*cs->num_chunks)) {  		return -EFAULT;  	} @@ -208,14 +280,12 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)  		uint32_t __user *cdata;  		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i]; -		if (DRM_COPY_FROM_USER(&user_chunk, chunk_ptr, +		if (copy_from_user(&user_chunk, chunk_ptr,  				       sizeof(struct drm_radeon_cs_chunk))) {  			return -EFAULT;  		}  		p->chunks[i].length_dw = user_chunk.length_dw; -		p->chunks[i].kdata = NULL;  		p->chunks[i].chunk_id = user_chunk.chunk_id; -		p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data;  		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {  			p->chunk_relocs_idx = i;  		} @@ -238,25 +308,31 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)  				return -EINVAL;  		} -		cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data; -		if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) || -		    (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) { -			size = p->chunks[i].length_dw * sizeof(uint32_t); -			p->chunks[i].kdata = kmalloc(size, GFP_KERNEL); -			if (p->chunks[i].kdata == NULL) { -				return -ENOMEM; -			} -			if (DRM_COPY_FROM_USER(p->chunks[i].kdata, -					       p->chunks[i].user_ptr, size)) { -				return -EFAULT; -			} -			if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { -				p->cs_flags = p->chunks[i].kdata[0]; -				if (p->chunks[i].length_dw > 1) -					ring = p->chunks[i].kdata[1]; -				if (p->chunks[i].length_dw > 2) -					priority = (s32)p->chunks[i].kdata[2]; -			} +		size = p->chunks[i].length_dw; +		cdata = (void __user *)(unsigned long)user_chunk.chunk_data; +		p->chunks[i].user_ptr = cdata; +		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) +			continue; + +		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) { +			if (!p->rdev || !(p->rdev->flags & RADEON_IS_AGP)) +				continue; +		} + +		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t)); +		size *= sizeof(uint32_t); +		if (p->chunks[i].kdata == NULL) { +			return -ENOMEM; +		} +		if (copy_from_user(p->chunks[i].kdata, cdata, size)) { +			return -EFAULT; +		} +		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) { +			p->cs_flags = p->chunks[i].kdata[0]; +			if (p->chunks[i].length_dw > 1) +				ring = p->chunks[i].kdata[1]; +			if (p->chunks[i].length_dw > 2) +				priority = (s32)p->chunks[i].kdata[2];  		}  	} @@ -272,44 +348,33 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)  			return -EINVAL;  		/* we only support VM on some SI+ rings */ -		if ((p->rdev->asic->ring[p->ring]->cs_parse == NULL) && -		   ((p->cs_flags & RADEON_CS_USE_VM) == 0)) { -			DRM_ERROR("Ring %d requires VM!\n", p->ring); -			return -EINVAL; -		} -	} - -	/* deal with non-vm */ -	if ((p->chunk_ib_idx != -1) && -	    ((p->cs_flags & RADEON_CS_USE_VM) == 0) && -	    (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) { -		if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) { -			DRM_ERROR("cs IB too big: %d\n", -				  p->chunks[p->chunk_ib_idx].length_dw); -			return -EINVAL; -		} -		if (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) { -			p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL); -			p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL); -			if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL || -			    p->chunks[p->chunk_ib_idx].kpage[1] == NULL) { -				kfree(p->chunks[p->chunk_ib_idx].kpage[0]); -				kfree(p->chunks[p->chunk_ib_idx].kpage[1]); -				p->chunks[p->chunk_ib_idx].kpage[0] = NULL; -				p->chunks[p->chunk_ib_idx].kpage[1] = NULL; -				return -ENOMEM; +		if ((p->cs_flags & RADEON_CS_USE_VM) == 0) { +			if (p->rdev->asic->ring[p->ring]->cs_parse == NULL) { +				DRM_ERROR("Ring %d requires VM!\n", p->ring); +				return -EINVAL; +			} +		} else { +			if (p->rdev->asic->ring[p->ring]->ib_parse == NULL) { +				DRM_ERROR("VM not supported on ring %d!\n", +					  p->ring); +				return -EINVAL;  			}  		} -		p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1; -		p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1; -		p->chunks[p->chunk_ib_idx].last_copied_page = -1; -		p->chunks[p->chunk_ib_idx].last_page_index = -			((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE;  	}  	return 0;  } +static int cmp_size_smaller_first(void *priv, struct list_head *a, +				  struct list_head *b) +{ +	struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head); +	struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head); + +	/* Sort A before B if A is smaller. */ +	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages; +} +  /**   * cs_parser_fini() - clean parser states   * @parser:	parser structure holding parsing context. @@ -323,6 +388,18 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo  	unsigned i;  	if (!error) { +		/* Sort the buffer list from the smallest to largest buffer, +		 * which affects the order of buffers in the LRU list. +		 * This assures that the smallest buffers are added first +		 * to the LRU list, so they are likely to be later evicted +		 * first, instead of large buffers whose eviction is more +		 * expensive. +		 * +		 * This slightly lowers the number of bytes moved by TTM +		 * per frame under memory pressure. +		 */ +		list_sort(NULL, &parser->validated, cmp_size_smaller_first); +  		ttm_eu_fence_buffer_objects(&parser->ticket,  					    &parser->validated,  					    parser->ib.fence); @@ -340,13 +417,9 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo  	kfree(parser->track);  	kfree(parser->relocs);  	kfree(parser->relocs_ptr); -	for (i = 0; i < parser->nchunks; i++) { -		kfree(parser->chunks[i].kdata); -		if ((parser->rdev->flags & RADEON_IS_AGP)) { -			kfree(parser->chunks[i].kpage[0]); -			kfree(parser->chunks[i].kpage[1]); -		} -	} +	kfree(parser->vm_bos); +	for (i = 0; i < parser->nchunks; i++) +		drm_free_large(parser->chunks[i].kdata);  	kfree(parser->chunks);  	kfree(parser->chunks_array);  	radeon_ib_free(parser->rdev, &parser->ib); @@ -356,7 +429,6 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo  static int radeon_cs_ib_chunk(struct radeon_device *rdev,  			      struct radeon_cs_parser *parser)  { -	struct radeon_cs_chunk *ib_chunk;  	int r;  	if (parser->chunk_ib_idx == -1) @@ -365,31 +437,17 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,  	if (parser->cs_flags & RADEON_CS_USE_VM)  		return 0; -	ib_chunk = &parser->chunks[parser->chunk_ib_idx]; -	/* Copy the packet into the IB, the parser will read from the -	 * input memory (cached) and write to the IB (which can be -	 * uncached). -	 */ -	r =  radeon_ib_get(rdev, parser->ring, &parser->ib, -			   NULL, ib_chunk->length_dw * 4); -	if (r) { -		DRM_ERROR("Failed to get ib !\n"); -		return r; -	} -	parser->ib.length_dw = ib_chunk->length_dw;  	r = radeon_cs_parse(rdev, parser->ring, parser);  	if (r || parser->parser_error) {  		DRM_ERROR("Invalid command stream !\n");  		return r;  	} -	r = radeon_cs_finish_pages(parser); -	if (r) { -		DRM_ERROR("Invalid command stream !\n"); -		return r; -	}  	if (parser->ring == R600_RING_TYPE_UVD_INDEX)  		radeon_uvd_note_usage(rdev); +	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) || +		 (parser->ring == TN_RING_TYPE_VCE2_INDEX)) +		radeon_vce_note_usage(rdev);  	radeon_cs_sync_rings(parser);  	r = radeon_ib_schedule(rdev, &parser->ib, NULL); @@ -399,24 +457,48 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,  	return r;  } -static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser, +static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,  				   struct radeon_vm *vm)  { -	struct radeon_device *rdev = parser->rdev; -	struct radeon_bo_list *lobj; -	struct radeon_bo *bo; -	int r; +	struct radeon_device *rdev = p->rdev; +	struct radeon_bo_va *bo_va; +	int i, r; -	r = radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem); -	if (r) { +	r = radeon_vm_update_page_directory(rdev, vm); +	if (r) +		return r; + +	r = radeon_vm_clear_freed(rdev, vm); +	if (r)  		return r; + +	if (vm->ib_bo_va == NULL) { +		DRM_ERROR("Tmp BO not in VM!\n"); +		return -EINVAL;  	} -	list_for_each_entry(lobj, &parser->validated, tv.head) { -		bo = lobj->bo; -		r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem); -		if (r) { -			return r; + +	r = radeon_vm_bo_update(rdev, vm->ib_bo_va, +				&rdev->ring_tmp_bo.bo->tbo.mem); +	if (r) +		return r; + +	for (i = 0; i < p->nrelocs; i++) { +		struct radeon_bo *bo; + +		/* ignore duplicates */ +		if (p->relocs_ptr[i] != &p->relocs[i]) +			continue; + +		bo = p->relocs[i].robj; +		bo_va = radeon_vm_bo_find(vm, bo); +		if (bo_va == NULL) { +			dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm); +			return -EINVAL;  		} + +		r = radeon_vm_bo_update(rdev, bo_va, &bo->tbo.mem); +		if (r) +			return r;  	}  	return 0;  } @@ -424,7 +506,6 @@ static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser,  static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,  				 struct radeon_cs_parser *parser)  { -	struct radeon_cs_chunk *ib_chunk;  	struct radeon_fpriv *fpriv = parser->filp->driver_priv;  	struct radeon_vm *vm = &fpriv->vm;  	int r; @@ -434,49 +515,13 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,  	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)  		return 0; -	if ((rdev->family >= CHIP_TAHITI) && -	    (parser->chunk_const_ib_idx != -1)) { -		ib_chunk = &parser->chunks[parser->chunk_const_ib_idx]; -		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { -			DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); -			return -EINVAL; -		} -		r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib, -				   vm, ib_chunk->length_dw * 4); -		if (r) { -			DRM_ERROR("Failed to get const ib !\n"); -			return r; -		} -		parser->const_ib.is_const_ib = true; -		parser->const_ib.length_dw = ib_chunk->length_dw; -		/* Copy the packet into the IB */ -		if (DRM_COPY_FROM_USER(parser->const_ib.ptr, ib_chunk->user_ptr, -				       ib_chunk->length_dw * 4)) { -			return -EFAULT; -		} +	if (parser->const_ib.length_dw) {  		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);  		if (r) {  			return r;  		}  	} -	ib_chunk = &parser->chunks[parser->chunk_ib_idx]; -	if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { -		DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); -		return -EINVAL; -	} -	r =  radeon_ib_get(rdev, parser->ring, &parser->ib, -			   vm, ib_chunk->length_dw * 4); -	if (r) { -		DRM_ERROR("Failed to get ib !\n"); -		return r; -	} -	parser->ib.length_dw = ib_chunk->length_dw; -	/* Copy the packet into the IB */ -	if (DRM_COPY_FROM_USER(parser->ib.ptr, ib_chunk->user_ptr, -			       ib_chunk->length_dw * 4)) { -		return -EFAULT; -	}  	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);  	if (r) {  		return r; @@ -485,20 +530,13 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,  	if (parser->ring == R600_RING_TYPE_UVD_INDEX)  		radeon_uvd_note_usage(rdev); -	mutex_lock(&rdev->vm_manager.lock);  	mutex_lock(&vm->mutex); -	r = radeon_vm_alloc_pt(rdev, vm); -	if (r) { -		goto out; -	}  	r = radeon_bo_vm_update_pte(parser, vm);  	if (r) {  		goto out;  	}  	radeon_cs_sync_rings(parser); -	radeon_ib_sync_to(&parser->ib, vm->fence); -	radeon_ib_sync_to(&parser->ib, radeon_vm_grab_id( -		rdev, vm, parser->ring)); +	radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);  	if ((rdev->family >= CHIP_TAHITI) &&  	    (parser->chunk_const_ib_idx != -1)) { @@ -507,14 +545,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,  		r = radeon_ib_schedule(rdev, &parser->ib, NULL);  	} -	if (!r) { -		radeon_vm_fence(rdev, vm, parser->ib.fence); -	} -  out: -	radeon_vm_add_to_lru(rdev, vm);  	mutex_unlock(&vm->mutex); -	mutex_unlock(&rdev->vm_manager.lock);  	return r;  } @@ -528,6 +560,62 @@ static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)  	return r;  } +static int radeon_cs_ib_fill(struct radeon_device *rdev, struct radeon_cs_parser *parser) +{ +	struct radeon_cs_chunk *ib_chunk; +	struct radeon_vm *vm = NULL; +	int r; + +	if (parser->chunk_ib_idx == -1) +		return 0; + +	if (parser->cs_flags & RADEON_CS_USE_VM) { +		struct radeon_fpriv *fpriv = parser->filp->driver_priv; +		vm = &fpriv->vm; + +		if ((rdev->family >= CHIP_TAHITI) && +		    (parser->chunk_const_ib_idx != -1)) { +			ib_chunk = &parser->chunks[parser->chunk_const_ib_idx]; +			if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { +				DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw); +				return -EINVAL; +			} +			r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib, +					   vm, ib_chunk->length_dw * 4); +			if (r) { +				DRM_ERROR("Failed to get const ib !\n"); +				return r; +			} +			parser->const_ib.is_const_ib = true; +			parser->const_ib.length_dw = ib_chunk->length_dw; +			if (copy_from_user(parser->const_ib.ptr, +					       ib_chunk->user_ptr, +					       ib_chunk->length_dw * 4)) +				return -EFAULT; +		} + +		ib_chunk = &parser->chunks[parser->chunk_ib_idx]; +		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) { +			DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw); +			return -EINVAL; +		} +	} +	ib_chunk = &parser->chunks[parser->chunk_ib_idx]; + +	r =  radeon_ib_get(rdev, parser->ring, &parser->ib, +			   vm, ib_chunk->length_dw * 4); +	if (r) { +		DRM_ERROR("Failed to get ib !\n"); +		return r; +	} +	parser->ib.length_dw = ib_chunk->length_dw; +	if (ib_chunk->kdata) +		memcpy(parser->ib.ptr, ib_chunk->kdata, ib_chunk->length_dw * 4); +	else if (copy_from_user(parser->ib.ptr, ib_chunk->user_ptr, ib_chunk->length_dw * 4)) +		return -EFAULT; +	return 0; +} +  int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  {  	struct radeon_device *rdev = dev->dev_private; @@ -553,10 +641,15 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  		r = radeon_cs_handle_lockup(rdev, r);  		return r;  	} -	r = radeon_cs_parser_relocs(&parser); -	if (r) { -		if (r != -ERESTARTSYS) + +	r = radeon_cs_ib_fill(rdev, &parser); +	if (!r) { +		r = radeon_cs_parser_relocs(&parser); +		if (r && r != -ERESTARTSYS)  			DRM_ERROR("Failed to parse relocation %d!\n", r); +	} + +	if (r) {  		radeon_cs_parser_fini(&parser, r, false);  		up_read(&rdev->exclusive_lock);  		r = radeon_cs_handle_lockup(rdev, r); @@ -580,97 +673,6 @@ out:  	return r;  } -int radeon_cs_finish_pages(struct radeon_cs_parser *p) -{ -	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; -	int i; -	int size = PAGE_SIZE; - -	for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) { -		if (i == ibc->last_page_index) { -			size = (ibc->length_dw * 4) % PAGE_SIZE; -			if (size == 0) -				size = PAGE_SIZE; -		} -		 -		if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), -				       ibc->user_ptr + (i * PAGE_SIZE), -				       size)) -			return -EFAULT; -	} -	return 0; -} - -static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx) -{ -	int new_page; -	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; -	int i; -	int size = PAGE_SIZE; -	bool copy1 = (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) ? -		false : true; - -	for (i = ibc->last_copied_page + 1; i < pg_idx; i++) { -		if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)), -				       ibc->user_ptr + (i * PAGE_SIZE), -				       PAGE_SIZE)) { -			p->parser_error = -EFAULT; -			return 0; -		} -	} - -	if (pg_idx == ibc->last_page_index) { -		size = (ibc->length_dw * 4) % PAGE_SIZE; -		if (size == 0) -			size = PAGE_SIZE; -	} - -	new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1; -	if (copy1) -		ibc->kpage[new_page] = p->ib.ptr + (pg_idx * (PAGE_SIZE / 4)); - -	if (DRM_COPY_FROM_USER(ibc->kpage[new_page], -			       ibc->user_ptr + (pg_idx * PAGE_SIZE), -			       size)) { -		p->parser_error = -EFAULT; -		return 0; -	} - -	/* copy to IB for non single case */ -	if (!copy1) -		memcpy((void *)(p->ib.ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size); - -	ibc->last_copied_page = pg_idx; -	ibc->kpage_idx[new_page] = pg_idx; - -	return new_page; -} - -u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx) -{ -	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx]; -	u32 pg_idx, pg_offset; -	u32 idx_value = 0; -	int new_page; - -	pg_idx = (idx * 4) / PAGE_SIZE; -	pg_offset = (idx * 4) % PAGE_SIZE; - -	if (ibc->kpage_idx[0] == pg_idx) -		return ibc->kpage[0][pg_offset/4]; -	if (ibc->kpage_idx[1] == pg_idx) -		return ibc->kpage[1][pg_offset/4]; - -	new_page = radeon_cs_update_pages(p, pg_idx); -	if (new_page < 0) { -		p->parser_error = new_page; -		return 0; -	} - -	idx_value = ibc->kpage[new_page][pg_offset/4]; -	return idx_value; -} -  /**   * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet   * @parser:	parser structure holding parsing context. @@ -812,9 +814,9 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,  	/* FIXME: we assume reloc size is 4 dwords */  	if (nomm) {  		*cs_reloc = p->relocs; -		(*cs_reloc)->lobj.gpu_offset = +		(*cs_reloc)->gpu_offset =  			(u64)relocs_chunk->kdata[idx + 3] << 32; -		(*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0]; +		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];  	} else  		*cs_reloc = p->relocs_ptr[(idx / 4)];  	return 0;  | 
