diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 2833 | 
1 files changed, 2105 insertions, 728 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index b83306f9244..279488addf3 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -27,1002 +27,2379 @@   *   */ -#include "drmP.h" -#include "drm.h" +#include <drm/drmP.h>  #include "i915_drv.h" -#include "i915_drm.h" +#include <drm/i915_drm.h>  #include "i915_trace.h"  #include "intel_drv.h" -static u32 i915_gem_get_seqno(struct drm_device *dev) +/* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, + * but keeps the logic simple. Indeed, the whole purpose of this macro is just + * to give some inclination as to some of the magic values used in the various + * workarounds! + */ +#define CACHELINE_BYTES 64 + +static inline int __ring_space(int head, int tail, int size)  { -	drm_i915_private_t *dev_priv = dev->dev_private; -	u32 seqno; +	int space = head - (tail + I915_RING_FREE_SPACE); +	if (space < 0) +		space += size; +	return space; +} -	seqno = dev_priv->next_seqno; +static inline int ring_space(struct intel_engine_cs *ring) +{ +	struct intel_ringbuffer *ringbuf = ring->buffer; +	return __ring_space(ringbuf->head & HEAD_ADDR, ringbuf->tail, ringbuf->size); +} -	/* reserve 0 for non-seqno */ -	if (++dev_priv->next_seqno == 0) -		dev_priv->next_seqno = 1; +static bool intel_ring_stopped(struct intel_engine_cs *ring) +{ +	struct drm_i915_private *dev_priv = ring->dev->dev_private; +	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring); +} -	return seqno; +void __intel_ring_advance(struct intel_engine_cs *ring) +{ +	struct intel_ringbuffer *ringbuf = ring->buffer; +	ringbuf->tail &= ringbuf->size - 1; +	if (intel_ring_stopped(ring)) +		return; +	ring->write_tail(ring, ringbuf->tail);  } -static void -render_ring_flush(struct drm_device *dev, -		  struct intel_ring_buffer *ring, -		  u32	invalidate_domains, -		  u32	flush_domains) +static int +gen2_render_ring_flush(struct intel_engine_cs *ring, +		       u32	invalidate_domains, +		       u32	flush_domains) +{ +	u32 cmd; +	int ret; + +	cmd = MI_FLUSH; +	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0) +		cmd |= MI_NO_WRITE_FLUSH; + +	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) +		cmd |= MI_READ_FLUSH; + +	ret = intel_ring_begin(ring, 2); +	if (ret) +		return ret; + +	intel_ring_emit(ring, cmd); +	intel_ring_emit(ring, MI_NOOP); +	intel_ring_advance(ring); + +	return 0; +} + +static int +gen4_render_ring_flush(struct intel_engine_cs *ring, +		       u32	invalidate_domains, +		       u32	flush_domains)  { -	drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_device *dev = ring->dev;  	u32 cmd; +	int ret; + +	/* +	 * read/write caches: +	 * +	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is +	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is +	 * also flushed at 2d versus 3d pipeline switches. +	 * +	 * read-only caches: +	 * +	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if +	 * MI_READ_FLUSH is set, and is always flushed on 965. +	 * +	 * I915_GEM_DOMAIN_COMMAND may not exist? +	 * +	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is +	 * invalidated when MI_EXE_FLUSH is set. +	 * +	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is +	 * invalidated with every MI_FLUSH. +	 * +	 * TLBs: +	 * +	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND +	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and +	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER +	 * are flushed at any MI_FLUSH. +	 */ + +	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; +	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) +		cmd &= ~MI_NO_WRITE_FLUSH; +	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) +		cmd |= MI_EXE_FLUSH; + +	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND && +	    (IS_G4X(dev) || IS_GEN5(dev))) +		cmd |= MI_INVALIDATE_ISP; + +	ret = intel_ring_begin(ring, 2); +	if (ret) +		return ret; -#if WATCH_EXEC -	DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, -		  invalidate_domains, flush_domains); -#endif +	intel_ring_emit(ring, cmd); +	intel_ring_emit(ring, MI_NOOP); +	intel_ring_advance(ring); + +	return 0; +} + +/** + * Emits a PIPE_CONTROL with a non-zero post-sync operation, for + * implementing two workarounds on gen6.  From section 1.4.7.1 + * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1: + * + * [DevSNB-C+{W/A}] Before any depth stall flush (including those + * produced by non-pipelined state commands), software needs to first + * send a PIPE_CONTROL with no bits set except Post-Sync Operation != + * 0. + * + * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable + * =1, a PIPE_CONTROL with any non-zero post-sync-op is required. + * + * And the workaround for these two requires this workaround first: + * + * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent + * BEFORE the pipe-control with a post-sync op and no write-cache + * flushes. + * + * And this last workaround is tricky because of the requirements on + * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM + * volume 2 part 1: + * + *     "1 of the following must also be set: + *      - Render Target Cache Flush Enable ([12] of DW1) + *      - Depth Cache Flush Enable ([0] of DW1) + *      - Stall at Pixel Scoreboard ([1] of DW1) + *      - Depth Stall ([13] of DW1) + *      - Post-Sync Operation ([13] of DW1) + *      - Notify Enable ([8] of DW1)" + * + * The cache flushes require the workaround flush that triggered this + * one, so we can't use it.  Depth stall would trigger the same. + * Post-sync nonzero is what triggered this second workaround, so we + * can't use that one either.  Notify enable is IRQs, which aren't + * really our business.  That leaves only stall at scoreboard. + */ +static int +intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) +{ +	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; +	int ret; -	trace_i915_gem_request_flush(dev, dev_priv->next_seqno, -				     invalidate_domains, flush_domains); -	if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) { +	ret = intel_ring_begin(ring, 6); +	if (ret) +		return ret; + +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); +	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | +			PIPE_CONTROL_STALL_AT_SCOREBOARD); +	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ +	intel_ring_emit(ring, 0); /* low dword */ +	intel_ring_emit(ring, 0); /* high dword */ +	intel_ring_emit(ring, MI_NOOP); +	intel_ring_advance(ring); + +	ret = intel_ring_begin(ring, 6); +	if (ret) +		return ret; + +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5)); +	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE); +	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */ +	intel_ring_emit(ring, 0); +	intel_ring_emit(ring, 0); +	intel_ring_emit(ring, MI_NOOP); +	intel_ring_advance(ring); + +	return 0; +} + +static int +gen6_render_ring_flush(struct intel_engine_cs *ring, +                         u32 invalidate_domains, u32 flush_domains) +{ +	u32 flags = 0; +	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; +	int ret; + +	/* Force SNB workarounds for PIPE_CONTROL flushes */ +	ret = intel_emit_post_sync_nonzero_flush(ring); +	if (ret) +		return ret; + +	/* Just flush everything.  Experiments have shown that reducing the +	 * number of bits based on the write domains has little performance +	 * impact. +	 */ +	if (flush_domains) { +		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; +		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; +		/* +		 * Ensure that any following seqno writes only happen +		 * when the render cache is indeed flushed. +		 */ +		flags |= PIPE_CONTROL_CS_STALL; +	} +	if (invalidate_domains) { +		flags |= PIPE_CONTROL_TLB_INVALIDATE; +		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;  		/* -		 * read/write caches: -		 * -		 * I915_GEM_DOMAIN_RENDER is always invalidated, but is -		 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is -		 * also flushed at 2d versus 3d pipeline switches. -		 * -		 * read-only caches: -		 * -		 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if -		 * MI_READ_FLUSH is set, and is always flushed on 965. -		 * -		 * I915_GEM_DOMAIN_COMMAND may not exist? -		 * -		 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is -		 * invalidated when MI_EXE_FLUSH is set. -		 * -		 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is -		 * invalidated with every MI_FLUSH. -		 * -		 * TLBs: -		 * -		 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND -		 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and -		 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER -		 * are flushed at any MI_FLUSH. +		 * TLB invalidate requires a post-sync write.  		 */ +		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL; +	} -		cmd = MI_FLUSH | MI_NO_WRITE_FLUSH; -		if ((invalidate_domains|flush_domains) & -		    I915_GEM_DOMAIN_RENDER) -			cmd &= ~MI_NO_WRITE_FLUSH; -		if (INTEL_INFO(dev)->gen < 4) { -			/* -			 * On the 965, the sampler cache always gets flushed -			 * and this bit is reserved. -			 */ -			if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER) -				cmd |= MI_READ_FLUSH; -		} -		if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION) -			cmd |= MI_EXE_FLUSH; - -#if WATCH_EXEC -		DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); -#endif -		intel_ring_begin(dev, ring, 2); -		intel_ring_emit(dev, ring, cmd); -		intel_ring_emit(dev, ring, MI_NOOP); -		intel_ring_advance(dev, ring); +	ret = intel_ring_begin(ring, 4); +	if (ret) +		return ret; + +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); +	intel_ring_emit(ring, flags); +	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); +	intel_ring_emit(ring, 0); +	intel_ring_advance(ring); + +	return 0; +} + +static int +gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) +{ +	int ret; + +	ret = intel_ring_begin(ring, 4); +	if (ret) +		return ret; + +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); +	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL | +			      PIPE_CONTROL_STALL_AT_SCOREBOARD); +	intel_ring_emit(ring, 0); +	intel_ring_emit(ring, 0); +	intel_ring_advance(ring); + +	return 0; +} + +static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value) +{ +	int ret; + +	if (!ring->fbc_dirty) +		return 0; + +	ret = intel_ring_begin(ring, 6); +	if (ret) +		return ret; +	/* WaFbcNukeOn3DBlt:ivb/hsw */ +	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1)); +	intel_ring_emit(ring, MSG_FBC_REND_STATE); +	intel_ring_emit(ring, value); +	intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT); +	intel_ring_emit(ring, MSG_FBC_REND_STATE); +	intel_ring_emit(ring, ring->scratch.gtt_offset + 256); +	intel_ring_advance(ring); + +	ring->fbc_dirty = false; +	return 0; +} + +static int +gen7_render_ring_flush(struct intel_engine_cs *ring, +		       u32 invalidate_domains, u32 flush_domains) +{ +	u32 flags = 0; +	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; +	int ret; + +	/* +	 * Ensure that any following seqno writes only happen when the render +	 * cache is indeed flushed. +	 * +	 * Workaround: 4th PIPE_CONTROL command (except the ones with only +	 * read-cache invalidate bits set) must have the CS_STALL bit set. We +	 * don't try to be clever and just set it unconditionally. +	 */ +	flags |= PIPE_CONTROL_CS_STALL; + +	/* Just flush everything.  Experiments have shown that reducing the +	 * number of bits based on the write domains has little performance +	 * impact. +	 */ +	if (flush_domains) { +		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; +		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; +	} +	if (invalidate_domains) { +		flags |= PIPE_CONTROL_TLB_INVALIDATE; +		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; +		/* +		 * TLB invalidate requires a post-sync write. +		 */ +		flags |= PIPE_CONTROL_QW_WRITE; +		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; + +		/* Workaround: we must issue a pipe_control with CS-stall bit +		 * set before a pipe_control command that has the state cache +		 * invalidate bit set. */ +		gen7_render_ring_cs_stall_wa(ring);  	} + +	ret = intel_ring_begin(ring, 4); +	if (ret) +		return ret; + +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4)); +	intel_ring_emit(ring, flags); +	intel_ring_emit(ring, scratch_addr); +	intel_ring_emit(ring, 0); +	intel_ring_advance(ring); + +	if (!invalidate_domains && flush_domains) +		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE); + +	return 0;  } -static void ring_write_tail(struct drm_device *dev, -			    struct intel_ring_buffer *ring, +static int +gen8_render_ring_flush(struct intel_engine_cs *ring, +		       u32 invalidate_domains, u32 flush_domains) +{ +	u32 flags = 0; +	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; +	int ret; + +	flags |= PIPE_CONTROL_CS_STALL; + +	if (flush_domains) { +		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; +		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; +	} +	if (invalidate_domains) { +		flags |= PIPE_CONTROL_TLB_INVALIDATE; +		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; +		flags |= PIPE_CONTROL_QW_WRITE; +		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB; +	} + +	ret = intel_ring_begin(ring, 6); +	if (ret) +		return ret; + +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6)); +	intel_ring_emit(ring, flags); +	intel_ring_emit(ring, scratch_addr); +	intel_ring_emit(ring, 0); +	intel_ring_emit(ring, 0); +	intel_ring_emit(ring, 0); +	intel_ring_advance(ring); + +	return 0; + +} + +static void ring_write_tail(struct intel_engine_cs *ring,  			    u32 value)  { -	drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_i915_private *dev_priv = ring->dev->dev_private;  	I915_WRITE_TAIL(ring, value);  } -u32 intel_ring_get_active_head(struct drm_device *dev, -			       struct intel_ring_buffer *ring) +u64 intel_ring_get_active_head(struct intel_engine_cs *ring)  { -	drm_i915_private_t *dev_priv = dev->dev_private; -	u32 acthd_reg = INTEL_INFO(dev)->gen >= 4 ? -			RING_ACTHD(ring->mmio_base) : ACTHD; +	struct drm_i915_private *dev_priv = ring->dev->dev_private; +	u64 acthd; + +	if (INTEL_INFO(ring->dev)->gen >= 8) +		acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base), +					 RING_ACTHD_UDW(ring->mmio_base)); +	else if (INTEL_INFO(ring->dev)->gen >= 4) +		acthd = I915_READ(RING_ACTHD(ring->mmio_base)); +	else +		acthd = I915_READ(ACTHD); -	return I915_READ(acthd_reg); +	return acthd;  } -static int init_ring_common(struct drm_device *dev, -			    struct intel_ring_buffer *ring) +static void ring_setup_phys_status_page(struct intel_engine_cs *ring)  { -	u32 head; -	drm_i915_private_t *dev_priv = dev->dev_private; -	struct drm_i915_gem_object *obj_priv; -	obj_priv = to_intel_bo(ring->gem_object); +	struct drm_i915_private *dev_priv = ring->dev->dev_private; +	u32 addr; + +	addr = dev_priv->status_page_dmah->busaddr; +	if (INTEL_INFO(ring->dev)->gen >= 4) +		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0; +	I915_WRITE(HWS_PGA, addr); +} + +static bool stop_ring(struct intel_engine_cs *ring) +{ +	struct drm_i915_private *dev_priv = to_i915(ring->dev); + +	if (!IS_GEN2(ring->dev)) { +		I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING)); +		if (wait_for_atomic((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) { +			DRM_ERROR("%s :timed out trying to stop ring\n", ring->name); +			return false; +		} +	} -	/* Stop the ring if it's running. */  	I915_WRITE_CTL(ring, 0);  	I915_WRITE_HEAD(ring, 0); -	ring->write_tail(dev, ring, 0); - -	/* Initialize the ring. */ -	I915_WRITE_START(ring, obj_priv->gtt_offset); -	head = I915_READ_HEAD(ring) & HEAD_ADDR; - -	/* G45 ring initialization fails to reset head to zero */ -	if (head != 0) { -		DRM_ERROR("%s head not reset to zero " -				"ctl %08x head %08x tail %08x start %08x\n", -				ring->name, -				I915_READ_CTL(ring), -				I915_READ_HEAD(ring), -				I915_READ_TAIL(ring), -				I915_READ_START(ring)); - -		I915_WRITE_HEAD(ring, 0); - -		DRM_ERROR("%s head forced to zero " -				"ctl %08x head %08x tail %08x start %08x\n", -				ring->name, -				I915_READ_CTL(ring), -				I915_READ_HEAD(ring), -				I915_READ_TAIL(ring), -				I915_READ_START(ring)); +	ring->write_tail(ring, 0); + +	if (!IS_GEN2(ring->dev)) { +		(void)I915_READ_CTL(ring); +		I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING)); +	} + +	return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0; +} + +static int init_ring_common(struct intel_engine_cs *ring) +{ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	struct intel_ringbuffer *ringbuf = ring->buffer; +	struct drm_i915_gem_object *obj = ringbuf->obj; +	int ret = 0; + +	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL); + +	if (!stop_ring(ring)) { +		/* G45 ring initialization often fails to reset head to zero */ +		DRM_DEBUG_KMS("%s head not reset to zero " +			      "ctl %08x head %08x tail %08x start %08x\n", +			      ring->name, +			      I915_READ_CTL(ring), +			      I915_READ_HEAD(ring), +			      I915_READ_TAIL(ring), +			      I915_READ_START(ring)); + +		if (!stop_ring(ring)) { +			DRM_ERROR("failed to set %s head to zero " +				  "ctl %08x head %08x tail %08x start %08x\n", +				  ring->name, +				  I915_READ_CTL(ring), +				  I915_READ_HEAD(ring), +				  I915_READ_TAIL(ring), +				  I915_READ_START(ring)); +			ret = -EIO; +			goto out; +		}  	} +	if (I915_NEED_GFX_HWS(dev)) +		intel_ring_setup_status_page(ring); +	else +		ring_setup_phys_status_page(ring); + +	/* Initialize the ring. This must happen _after_ we've cleared the ring +	 * registers with the above sequence (the readback of the HEAD registers +	 * also enforces ordering), otherwise the hw might lose the new ring +	 * register values. */ +	I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));  	I915_WRITE_CTL(ring, -			((ring->gem_object->size - PAGE_SIZE) & RING_NR_PAGES) -			| RING_REPORT_64K | RING_VALID); +			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES) +			| RING_VALID); -	head = I915_READ_HEAD(ring) & HEAD_ADDR;  	/* If the head is still not zero, the ring is dead */ -	if (head != 0) { +	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 && +		     I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) && +		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {  		DRM_ERROR("%s initialization failed " -				"ctl %08x head %08x tail %08x start %08x\n", -				ring->name, -				I915_READ_CTL(ring), -				I915_READ_HEAD(ring), -				I915_READ_TAIL(ring), -				I915_READ_START(ring)); -		return -EIO; +			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n", +			  ring->name, +			  I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID, +			  I915_READ_HEAD(ring), I915_READ_TAIL(ring), +			  I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj)); +		ret = -EIO; +		goto out;  	} -	if (!drm_core_check_feature(dev, DRIVER_MODESET)) -		i915_kernel_lost_context(dev); +	if (!drm_core_check_feature(ring->dev, DRIVER_MODESET)) +		i915_kernel_lost_context(ring->dev);  	else { -		ring->head = I915_READ_HEAD(ring) & HEAD_ADDR; -		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; -		ring->space = ring->head - (ring->tail + 8); -		if (ring->space < 0) -			ring->space += ring->size; +		ringbuf->head = I915_READ_HEAD(ring); +		ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR; +		ringbuf->space = ring_space(ring); +		ringbuf->last_retired_head = -1; +	} + +	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck)); + +out: +	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL); + +	return ret; +} + +static int +init_pipe_control(struct intel_engine_cs *ring) +{ +	int ret; + +	if (ring->scratch.obj) +		return 0; + +	ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096); +	if (ring->scratch.obj == NULL) { +		DRM_ERROR("Failed to allocate seqno page\n"); +		ret = -ENOMEM; +		goto err;  	} + +	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC); +	if (ret) +		goto err_unref; + +	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0); +	if (ret) +		goto err_unref; + +	ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj); +	ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl)); +	if (ring->scratch.cpu_page == NULL) { +		ret = -ENOMEM; +		goto err_unpin; +	} + +	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n", +			 ring->name, ring->scratch.gtt_offset);  	return 0; + +err_unpin: +	i915_gem_object_ggtt_unpin(ring->scratch.obj); +err_unref: +	drm_gem_object_unreference(&ring->scratch.obj->base); +err: +	return ret;  } -static int init_render_ring(struct drm_device *dev, -			    struct intel_ring_buffer *ring) +static int init_render_ring(struct intel_engine_cs *ring)  { -	drm_i915_private_t *dev_priv = dev->dev_private; -	int ret = init_ring_common(dev, ring); -	int mode; +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	int ret = init_ring_common(ring); + +	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */ +	if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7) +		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH)); + +	/* We need to disable the AsyncFlip performance optimisations in order +	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be +	 * programmed to '1' on all products. +	 * +	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv +	 */ +	if (INTEL_INFO(dev)->gen >= 6) +		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE)); + +	/* Required for the hardware to program scanline values for waiting */ +	/* WaEnableFlushTlbInvalidationMode:snb */ +	if (INTEL_INFO(dev)->gen == 6) +		I915_WRITE(GFX_MODE, +			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT)); + +	/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ +	if (IS_GEN7(dev)) +		I915_WRITE(GFX_MODE_GEN7, +			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) | +			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE)); + +	if (INTEL_INFO(dev)->gen >= 5) { +		ret = init_pipe_control(ring); +		if (ret) +			return ret; +	} -	if (INTEL_INFO(dev)->gen > 3) { -		mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH; -		if (IS_GEN6(dev)) -			mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE; -		I915_WRITE(MI_MODE, mode); +	if (IS_GEN6(dev)) { +		/* From the Sandybridge PRM, volume 1 part 3, page 24: +		 * "If this bit is set, STCunit will have LRA as replacement +		 *  policy. [...] This bit must be reset.  LRA replacement +		 *  policy is not supported." +		 */ +		I915_WRITE(CACHE_MODE_0, +			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));  	} + +	if (INTEL_INFO(dev)->gen >= 6) +		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING)); + +	if (HAS_L3_DPF(dev)) +		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); +  	return ret;  } -#define PIPE_CONTROL_FLUSH(addr)					\ -do {									\ -	OUT_RING(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE |		\ -		 PIPE_CONTROL_DEPTH_STALL | 2);				\ -	OUT_RING(addr | PIPE_CONTROL_GLOBAL_GTT);			\ -	OUT_RING(0);							\ -	OUT_RING(0);							\ -} while (0) +static void render_ring_cleanup(struct intel_engine_cs *ring) +{ +	struct drm_device *dev = ring->dev; + +	if (ring->scratch.obj == NULL) +		return; + +	if (INTEL_INFO(dev)->gen >= 5) { +		kunmap(sg_page(ring->scratch.obj->pages->sgl)); +		i915_gem_object_ggtt_unpin(ring->scratch.obj); +	} + +	drm_gem_object_unreference(&ring->scratch.obj->base); +	ring->scratch.obj = NULL; +} + +static int gen6_signal(struct intel_engine_cs *signaller, +		       unsigned int num_dwords) +{ +	struct drm_device *dev = signaller->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	struct intel_engine_cs *useless; +	int i, ret; + +	/* NB: In order to be able to do semaphore MBOX updates for varying +	 * number of rings, it's easiest if we round up each individual update +	 * to a multiple of 2 (since ring updates must always be a multiple of +	 * 2) even though the actual update only requires 3 dwords. +	 */ +#define MBOX_UPDATE_DWORDS 4 +	if (i915_semaphore_is_enabled(dev)) +		num_dwords += ((I915_NUM_RINGS-1) * MBOX_UPDATE_DWORDS); +	else +		return intel_ring_begin(signaller, num_dwords); + +	ret = intel_ring_begin(signaller, num_dwords); +	if (ret) +		return ret; +#undef MBOX_UPDATE_DWORDS + +	for_each_ring(useless, dev_priv, i) { +		u32 mbox_reg = signaller->semaphore.mbox.signal[i]; +		if (mbox_reg != GEN6_NOSYNC) { +			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1)); +			intel_ring_emit(signaller, mbox_reg); +			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno); +			intel_ring_emit(signaller, MI_NOOP); +		} else { +			intel_ring_emit(signaller, MI_NOOP); +			intel_ring_emit(signaller, MI_NOOP); +			intel_ring_emit(signaller, MI_NOOP); +			intel_ring_emit(signaller, MI_NOOP); +		} +	} + +	return 0; +}  /** - * Creates a new sequence number, emitting a write of it to the status page - * plus an interrupt, which will trigger i915_user_interrupt_handler. + * gen6_add_request - Update the semaphore mailbox registers + *  + * @ring - ring that is adding a request + * @seqno - return seqno stuck into the ring   * - * Must be called with struct_lock held. + * Update the mailbox registers in the *other* rings with the current seqno. + * This acts like a signal in the canonical semaphore. + */ +static int +gen6_add_request(struct intel_engine_cs *ring) +{ +	int ret; + +	ret = ring->semaphore.signal(ring, 4); +	if (ret) +		return ret; + +	intel_ring_emit(ring, MI_STORE_DWORD_INDEX); +	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); +	intel_ring_emit(ring, ring->outstanding_lazy_seqno); +	intel_ring_emit(ring, MI_USER_INTERRUPT); +	__intel_ring_advance(ring); + +	return 0; +} + +static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev, +					      u32 seqno) +{ +	struct drm_i915_private *dev_priv = dev->dev_private; +	return dev_priv->last_seqno < seqno; +} + +/** + * intel_ring_sync - sync the waiter to the signaller on seqno   * - * Returned sequence numbers are nonzero on success. + * @waiter - ring that is waiting + * @signaller - ring which has, or will signal + * @seqno - seqno which the waiter will block on   */ -static u32 -render_ring_add_request(struct drm_device *dev, -			struct intel_ring_buffer *ring, -			u32 flush_domains) +static int +gen6_ring_sync(struct intel_engine_cs *waiter, +	       struct intel_engine_cs *signaller, +	       u32 seqno)  { -	drm_i915_private_t *dev_priv = dev->dev_private; -	u32 seqno; +	u32 dw1 = MI_SEMAPHORE_MBOX | +		  MI_SEMAPHORE_COMPARE | +		  MI_SEMAPHORE_REGISTER; +	u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id]; +	int ret; -	seqno = i915_gem_get_seqno(dev); +	/* Throughout all of the GEM code, seqno passed implies our current +	 * seqno is >= the last seqno executed. However for hardware the +	 * comparison is strictly greater than. +	 */ +	seqno -= 1; -	if (IS_GEN6(dev)) { -		BEGIN_LP_RING(6); -		OUT_RING(GFX_OP_PIPE_CONTROL | 3); -		OUT_RING(PIPE_CONTROL_QW_WRITE | -			 PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_IS_FLUSH | -			 PIPE_CONTROL_NOTIFY); -		OUT_RING(dev_priv->seqno_gfx_addr | PIPE_CONTROL_GLOBAL_GTT); -		OUT_RING(seqno); -		OUT_RING(0); -		OUT_RING(0); -		ADVANCE_LP_RING(); -	} else if (HAS_PIPE_CONTROL(dev)) { -		u32 scratch_addr = dev_priv->seqno_gfx_addr + 128; +	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); -		/* -		 * Workaround qword write incoherence by flushing the -		 * PIPE_NOTIFY buffers out to memory before requesting -		 * an interrupt. -		 */ -		BEGIN_LP_RING(32); -		OUT_RING(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE | -			 PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH); -		OUT_RING(dev_priv->seqno_gfx_addr | PIPE_CONTROL_GLOBAL_GTT); -		OUT_RING(seqno); -		OUT_RING(0); -		PIPE_CONTROL_FLUSH(scratch_addr); -		scratch_addr += 128; /* write to separate cachelines */ -		PIPE_CONTROL_FLUSH(scratch_addr); -		scratch_addr += 128; -		PIPE_CONTROL_FLUSH(scratch_addr); -		scratch_addr += 128; -		PIPE_CONTROL_FLUSH(scratch_addr); -		scratch_addr += 128; -		PIPE_CONTROL_FLUSH(scratch_addr); -		scratch_addr += 128; -		PIPE_CONTROL_FLUSH(scratch_addr); -		OUT_RING(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_QW_WRITE | -			 PIPE_CONTROL_WC_FLUSH | PIPE_CONTROL_TC_FLUSH | -			 PIPE_CONTROL_NOTIFY); -		OUT_RING(dev_priv->seqno_gfx_addr | PIPE_CONTROL_GLOBAL_GTT); -		OUT_RING(seqno); -		OUT_RING(0); -		ADVANCE_LP_RING(); +	ret = intel_ring_begin(waiter, 4); +	if (ret) +		return ret; + +	/* If seqno wrap happened, omit the wait with no-ops */ +	if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) { +		intel_ring_emit(waiter, dw1 | wait_mbox); +		intel_ring_emit(waiter, seqno); +		intel_ring_emit(waiter, 0); +		intel_ring_emit(waiter, MI_NOOP);  	} else { -		BEGIN_LP_RING(4); -		OUT_RING(MI_STORE_DWORD_INDEX); -		OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); -		OUT_RING(seqno); +		intel_ring_emit(waiter, MI_NOOP); +		intel_ring_emit(waiter, MI_NOOP); +		intel_ring_emit(waiter, MI_NOOP); +		intel_ring_emit(waiter, MI_NOOP); +	} +	intel_ring_advance(waiter); + +	return 0; +} + +#define PIPE_CONTROL_FLUSH(ring__, addr__)					\ +do {									\ +	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\ +		 PIPE_CONTROL_DEPTH_STALL);				\ +	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\ +	intel_ring_emit(ring__, 0);							\ +	intel_ring_emit(ring__, 0);							\ +} while (0) + +static int +pc_render_add_request(struct intel_engine_cs *ring) +{ +	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; +	int ret; + +	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently +	 * incoherent with writes to memory, i.e. completely fubar, +	 * so we need to use PIPE_NOTIFY instead. +	 * +	 * However, we also need to workaround the qword write +	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to +	 * memory before requesting an interrupt. +	 */ +	ret = intel_ring_begin(ring, 32); +	if (ret) +		return ret; + +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | +			PIPE_CONTROL_WRITE_FLUSH | +			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE); +	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); +	intel_ring_emit(ring, ring->outstanding_lazy_seqno); +	intel_ring_emit(ring, 0); +	PIPE_CONTROL_FLUSH(ring, scratch_addr); +	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ +	PIPE_CONTROL_FLUSH(ring, scratch_addr); +	scratch_addr += 2 * CACHELINE_BYTES; +	PIPE_CONTROL_FLUSH(ring, scratch_addr); +	scratch_addr += 2 * CACHELINE_BYTES; +	PIPE_CONTROL_FLUSH(ring, scratch_addr); +	scratch_addr += 2 * CACHELINE_BYTES; +	PIPE_CONTROL_FLUSH(ring, scratch_addr); +	scratch_addr += 2 * CACHELINE_BYTES; +	PIPE_CONTROL_FLUSH(ring, scratch_addr); + +	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE | +			PIPE_CONTROL_WRITE_FLUSH | +			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | +			PIPE_CONTROL_NOTIFY); +	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); +	intel_ring_emit(ring, ring->outstanding_lazy_seqno); +	intel_ring_emit(ring, 0); +	__intel_ring_advance(ring); + +	return 0; +} -		OUT_RING(MI_USER_INTERRUPT); -		ADVANCE_LP_RING(); +static u32 +gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) +{ +	/* Workaround to force correct ordering between irq and seqno writes on +	 * ivb (and maybe also on snb) by reading from a CS register (like +	 * ACTHD) before reading the status page. */ +	if (!lazy_coherency) { +		struct drm_i915_private *dev_priv = ring->dev->dev_private; +		POSTING_READ(RING_ACTHD(ring->mmio_base));  	} -	return seqno; + +	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);  }  static u32 -render_ring_get_seqno(struct drm_device *dev, -		      struct intel_ring_buffer *ring) +ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)  { -	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; -	if (HAS_PIPE_CONTROL(dev)) -		return ((volatile u32 *)(dev_priv->seqno_page))[0]; -	else -		return intel_read_status_page(ring, I915_GEM_HWS_INDEX); +	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);  }  static void -render_ring_get_user_irq(struct drm_device *dev, -			 struct intel_ring_buffer *ring) +ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)  { -	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; -	unsigned long irqflags; +	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno); +} -	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags); -	if (dev->irq_enabled && (++ring->user_irq_refcount == 1)) { -		if (HAS_PCH_SPLIT(dev)) -			ironlake_enable_graphics_irq(dev_priv, GT_PIPE_NOTIFY); -		else -			i915_enable_irq(dev_priv, I915_USER_INTERRUPT); +static u32 +pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency) +{ +	return ring->scratch.cpu_page[0]; +} + +static void +pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno) +{ +	ring->scratch.cpu_page[0] = seqno; +} + +static bool +gen5_ring_get_irq(struct intel_engine_cs *ring) +{ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	if (!dev->irq_enabled) +		return false; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (ring->irq_refcount++ == 0) +		ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + +	return true; +} + +static void +gen5_ring_put_irq(struct intel_engine_cs *ring) +{ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (--ring->irq_refcount == 0) +		ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags); +} + +static bool +i9xx_ring_get_irq(struct intel_engine_cs *ring) +{ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	if (!dev->irq_enabled) +		return false; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (ring->irq_refcount++ == 0) { +		dev_priv->irq_mask &= ~ring->irq_enable_mask; +		I915_WRITE(IMR, dev_priv->irq_mask); +		POSTING_READ(IMR);  	} -	spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags); +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + +	return true;  }  static void -render_ring_put_user_irq(struct drm_device *dev, -			 struct intel_ring_buffer *ring) +i9xx_ring_put_irq(struct intel_engine_cs *ring)  { -	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; -	unsigned long irqflags; +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; -	spin_lock_irqsave(&dev_priv->user_irq_lock, irqflags); -	BUG_ON(dev->irq_enabled && ring->user_irq_refcount <= 0); -	if (dev->irq_enabled && (--ring->user_irq_refcount == 0)) { -		if (HAS_PCH_SPLIT(dev)) -			ironlake_disable_graphics_irq(dev_priv, GT_PIPE_NOTIFY); -		else -			i915_disable_irq(dev_priv, I915_USER_INTERRUPT); +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (--ring->irq_refcount == 0) { +		dev_priv->irq_mask |= ring->irq_enable_mask; +		I915_WRITE(IMR, dev_priv->irq_mask); +		POSTING_READ(IMR);  	} -	spin_unlock_irqrestore(&dev_priv->user_irq_lock, irqflags); +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);  } -void intel_ring_setup_status_page(struct drm_device *dev, -				  struct intel_ring_buffer *ring) +static bool +i8xx_ring_get_irq(struct intel_engine_cs *ring)  { -	drm_i915_private_t *dev_priv = dev->dev_private; -	if (IS_GEN6(dev)) { -		I915_WRITE(RING_HWS_PGA_GEN6(ring->mmio_base), -			   ring->status_page.gfx_addr); -		I915_READ(RING_HWS_PGA_GEN6(ring->mmio_base)); /* posting read */ -	} else { -		I915_WRITE(RING_HWS_PGA(ring->mmio_base), -			   ring->status_page.gfx_addr); -		I915_READ(RING_HWS_PGA(ring->mmio_base)); /* posting read */ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	if (!dev->irq_enabled) +		return false; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (ring->irq_refcount++ == 0) { +		dev_priv->irq_mask &= ~ring->irq_enable_mask; +		I915_WRITE16(IMR, dev_priv->irq_mask); +		POSTING_READ16(IMR);  	} +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags); +	return true;  }  static void -bsd_ring_flush(struct drm_device *dev, -		struct intel_ring_buffer *ring, -		u32     invalidate_domains, -		u32     flush_domains) +i8xx_ring_put_irq(struct intel_engine_cs *ring)  { -	intel_ring_begin(dev, ring, 2); -	intel_ring_emit(dev, ring, MI_FLUSH); -	intel_ring_emit(dev, ring, MI_NOOP); -	intel_ring_advance(dev, ring); +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (--ring->irq_refcount == 0) { +		dev_priv->irq_mask |= ring->irq_enable_mask; +		I915_WRITE16(IMR, dev_priv->irq_mask); +		POSTING_READ16(IMR); +	} +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);  } -static int init_bsd_ring(struct drm_device *dev, -			 struct intel_ring_buffer *ring) +void intel_ring_setup_status_page(struct intel_engine_cs *ring)  { -	return init_ring_common(dev, ring); +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = ring->dev->dev_private; +	u32 mmio = 0; + +	/* The ring status page addresses are no longer next to the rest of +	 * the ring registers as of gen7. +	 */ +	if (IS_GEN7(dev)) { +		switch (ring->id) { +		case RCS: +			mmio = RENDER_HWS_PGA_GEN7; +			break; +		case BCS: +			mmio = BLT_HWS_PGA_GEN7; +			break; +		/* +		 * VCS2 actually doesn't exist on Gen7. Only shut up +		 * gcc switch check warning +		 */ +		case VCS2: +		case VCS: +			mmio = BSD_HWS_PGA_GEN7; +			break; +		case VECS: +			mmio = VEBOX_HWS_PGA_GEN7; +			break; +		} +	} else if (IS_GEN6(ring->dev)) { +		mmio = RING_HWS_PGA_GEN6(ring->mmio_base); +	} else { +		/* XXX: gen8 returns to sanity */ +		mmio = RING_HWS_PGA(ring->mmio_base); +	} + +	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr); +	POSTING_READ(mmio); + +	/* +	 * Flush the TLB for this page +	 * +	 * FIXME: These two bits have disappeared on gen8, so a question +	 * arises: do we still need this and if so how should we go about +	 * invalidating the TLB? +	 */ +	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) { +		u32 reg = RING_INSTPM(ring->mmio_base); + +		/* ring should be idle before issuing a sync flush*/ +		WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0); + +		I915_WRITE(reg, +			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE | +					      INSTPM_SYNC_FLUSH)); +		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0, +			     1000)) +			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n", +				  ring->name); +	}  } -static u32 -ring_add_request(struct drm_device *dev, -		 struct intel_ring_buffer *ring, -		 u32 flush_domains) +static int +bsd_ring_flush(struct intel_engine_cs *ring, +	       u32     invalidate_domains, +	       u32     flush_domains)  { -	u32 seqno; +	int ret; -	seqno = i915_gem_get_seqno(dev); +	ret = intel_ring_begin(ring, 2); +	if (ret) +		return ret; -	intel_ring_begin(dev, ring, 4); -	intel_ring_emit(dev, ring, MI_STORE_DWORD_INDEX); -	intel_ring_emit(dev, ring, -			I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); -	intel_ring_emit(dev, ring, seqno); -	intel_ring_emit(dev, ring, MI_USER_INTERRUPT); -	intel_ring_advance(dev, ring); +	intel_ring_emit(ring, MI_FLUSH); +	intel_ring_emit(ring, MI_NOOP); +	intel_ring_advance(ring); +	return 0; +} -	DRM_DEBUG_DRIVER("%s %d\n", ring->name, seqno); +static int +i9xx_add_request(struct intel_engine_cs *ring) +{ +	int ret; + +	ret = intel_ring_begin(ring, 4); +	if (ret) +		return ret; + +	intel_ring_emit(ring, MI_STORE_DWORD_INDEX); +	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); +	intel_ring_emit(ring, ring->outstanding_lazy_seqno); +	intel_ring_emit(ring, MI_USER_INTERRUPT); +	__intel_ring_advance(ring); -	return seqno; +	return 0;  } -static void -bsd_ring_get_user_irq(struct drm_device *dev, -		      struct intel_ring_buffer *ring) +static bool +gen6_ring_get_irq(struct intel_engine_cs *ring)  { -	/* do nothing */ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	if (!dev->irq_enabled) +	       return false; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (ring->irq_refcount++ == 0) { +		if (HAS_L3_DPF(dev) && ring->id == RCS) +			I915_WRITE_IMR(ring, +				       ~(ring->irq_enable_mask | +					 GT_PARITY_ERROR(dev))); +		else +			I915_WRITE_IMR(ring, ~ring->irq_enable_mask); +		ilk_enable_gt_irq(dev_priv, ring->irq_enable_mask); +	} +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + +	return true;  } +  static void -bsd_ring_put_user_irq(struct drm_device *dev, -		      struct intel_ring_buffer *ring) +gen6_ring_put_irq(struct intel_engine_cs *ring)  { -	/* do nothing */ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (--ring->irq_refcount == 0) { +		if (HAS_L3_DPF(dev) && ring->id == RCS) +			I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev)); +		else +			I915_WRITE_IMR(ring, ~0); +		ilk_disable_gt_irq(dev_priv, ring->irq_enable_mask); +	} +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);  } -static u32 -ring_status_page_get_seqno(struct drm_device *dev, -			   struct intel_ring_buffer *ring) +static bool +hsw_vebox_get_irq(struct intel_engine_cs *ring)  { -	return intel_read_status_page(ring, I915_GEM_HWS_INDEX); +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	if (!dev->irq_enabled) +		return false; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (ring->irq_refcount++ == 0) { +		I915_WRITE_IMR(ring, ~ring->irq_enable_mask); +		snb_enable_pm_irq(dev_priv, ring->irq_enable_mask); +	} +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + +	return true;  } -static int -ring_dispatch_gem_execbuffer(struct drm_device *dev, -			     struct intel_ring_buffer *ring, -			     struct drm_i915_gem_execbuffer2 *exec, -			     struct drm_clip_rect *cliprects, -			     uint64_t exec_offset) -{ -	uint32_t exec_start; -	exec_start = (uint32_t) exec_offset + exec->batch_start_offset; -	intel_ring_begin(dev, ring, 2); -	intel_ring_emit(dev, ring, MI_BATCH_BUFFER_START | -			(2 << 6) | MI_BATCH_NON_SECURE_I965); -	intel_ring_emit(dev, ring, exec_start); -	intel_ring_advance(dev, ring); -	return 0; +static void +hsw_vebox_put_irq(struct intel_engine_cs *ring) +{ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	if (!dev->irq_enabled) +		return; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (--ring->irq_refcount == 0) { +		I915_WRITE_IMR(ring, ~0); +		snb_disable_pm_irq(dev_priv, ring->irq_enable_mask); +	} +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);  } -static int -render_ring_dispatch_gem_execbuffer(struct drm_device *dev, -				    struct intel_ring_buffer *ring, -				    struct drm_i915_gem_execbuffer2 *exec, -				    struct drm_clip_rect *cliprects, -				    uint64_t exec_offset) -{ -	drm_i915_private_t *dev_priv = dev->dev_private; -	int nbox = exec->num_cliprects; -	int i = 0, count; -	uint32_t exec_start, exec_len; -	exec_start = (uint32_t) exec_offset + exec->batch_start_offset; -	exec_len = (uint32_t) exec->batch_len; - -	trace_i915_gem_request_submit(dev, dev_priv->next_seqno + 1); - -	count = nbox ? nbox : 1; - -	for (i = 0; i < count; i++) { -		if (i < nbox) { -			int ret = i915_emit_box(dev, cliprects, i, -						exec->DR1, exec->DR4); -			if (ret) -				return ret; +static bool +gen8_ring_get_irq(struct intel_engine_cs *ring) +{ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	if (!dev->irq_enabled) +		return false; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (ring->irq_refcount++ == 0) { +		if (HAS_L3_DPF(dev) && ring->id == RCS) { +			I915_WRITE_IMR(ring, +				       ~(ring->irq_enable_mask | +					 GT_RENDER_L3_PARITY_ERROR_INTERRUPT)); +		} else { +			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);  		} +		POSTING_READ(RING_IMR(ring->mmio_base)); +	} +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags); + +	return true; +} -		if (IS_I830(dev) || IS_845G(dev)) { -			intel_ring_begin(dev, ring, 4); -			intel_ring_emit(dev, ring, MI_BATCH_BUFFER); -			intel_ring_emit(dev, ring, -					exec_start | MI_BATCH_NON_SECURE); -			intel_ring_emit(dev, ring, exec_start + exec_len - 4); -			intel_ring_emit(dev, ring, 0); +static void +gen8_ring_put_irq(struct intel_engine_cs *ring) +{ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	unsigned long flags; + +	spin_lock_irqsave(&dev_priv->irq_lock, flags); +	if (--ring->irq_refcount == 0) { +		if (HAS_L3_DPF(dev) && ring->id == RCS) { +			I915_WRITE_IMR(ring, +				       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);  		} else { -			intel_ring_begin(dev, ring, 2); -			if (INTEL_INFO(dev)->gen >= 4) { -				intel_ring_emit(dev, ring, -						MI_BATCH_BUFFER_START | (2 << 6) -						| MI_BATCH_NON_SECURE_I965); -				intel_ring_emit(dev, ring, exec_start); -			} else { -				intel_ring_emit(dev, ring, MI_BATCH_BUFFER_START -						| (2 << 6)); -				intel_ring_emit(dev, ring, exec_start | -						MI_BATCH_NON_SECURE); -			} +			I915_WRITE_IMR(ring, ~0);  		} -		intel_ring_advance(dev, ring); +		POSTING_READ(RING_IMR(ring->mmio_base));  	} +	spin_unlock_irqrestore(&dev_priv->irq_lock, flags); +} -	if (IS_G4X(dev) || IS_GEN5(dev)) { -		intel_ring_begin(dev, ring, 2); -		intel_ring_emit(dev, ring, MI_FLUSH | -				MI_NO_WRITE_FLUSH | -				MI_INVALIDATE_ISP ); -		intel_ring_emit(dev, ring, MI_NOOP); -		intel_ring_advance(dev, ring); +static int +i965_dispatch_execbuffer(struct intel_engine_cs *ring, +			 u64 offset, u32 length, +			 unsigned flags) +{ +	int ret; + +	ret = intel_ring_begin(ring, 2); +	if (ret) +		return ret; + +	intel_ring_emit(ring, +			MI_BATCH_BUFFER_START | +			MI_BATCH_GTT | +			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); +	intel_ring_emit(ring, offset); +	intel_ring_advance(ring); + +	return 0; +} + +/* Just userspace ABI convention to limit the wa batch bo to a resonable size */ +#define I830_BATCH_LIMIT (256*1024) +static int +i830_dispatch_execbuffer(struct intel_engine_cs *ring, +				u64 offset, u32 len, +				unsigned flags) +{ +	int ret; + +	if (flags & I915_DISPATCH_PINNED) { +		ret = intel_ring_begin(ring, 4); +		if (ret) +			return ret; + +		intel_ring_emit(ring, MI_BATCH_BUFFER); +		intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); +		intel_ring_emit(ring, offset + len - 8); +		intel_ring_emit(ring, MI_NOOP); +		intel_ring_advance(ring); +	} else { +		u32 cs_offset = ring->scratch.gtt_offset; + +		if (len > I830_BATCH_LIMIT) +			return -ENOSPC; + +		ret = intel_ring_begin(ring, 9+3); +		if (ret) +			return ret; +		/* Blit the batch (which has now all relocs applied) to the stable batch +		 * scratch bo area (so that the CS never stumbles over its tlb +		 * invalidation bug) ... */ +		intel_ring_emit(ring, XY_SRC_COPY_BLT_CMD | +				XY_SRC_COPY_BLT_WRITE_ALPHA | +				XY_SRC_COPY_BLT_WRITE_RGB); +		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_GXCOPY | 4096); +		intel_ring_emit(ring, 0); +		intel_ring_emit(ring, (DIV_ROUND_UP(len, 4096) << 16) | 1024); +		intel_ring_emit(ring, cs_offset); +		intel_ring_emit(ring, 0); +		intel_ring_emit(ring, 4096); +		intel_ring_emit(ring, offset); +		intel_ring_emit(ring, MI_FLUSH); + +		/* ... and execute it. */ +		intel_ring_emit(ring, MI_BATCH_BUFFER); +		intel_ring_emit(ring, cs_offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); +		intel_ring_emit(ring, cs_offset + len - 8); +		intel_ring_advance(ring);  	} -	/* XXX breadcrumb */  	return 0;  } -static void cleanup_status_page(struct drm_device *dev, -				struct intel_ring_buffer *ring) +static int +i915_dispatch_execbuffer(struct intel_engine_cs *ring, +			 u64 offset, u32 len, +			 unsigned flags)  { -	drm_i915_private_t *dev_priv = dev->dev_private; -	struct drm_gem_object *obj; -	struct drm_i915_gem_object *obj_priv; +	int ret; + +	ret = intel_ring_begin(ring, 2); +	if (ret) +		return ret; + +	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT); +	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE)); +	intel_ring_advance(ring); + +	return 0; +} + +static void cleanup_status_page(struct intel_engine_cs *ring) +{ +	struct drm_i915_gem_object *obj;  	obj = ring->status_page.obj;  	if (obj == NULL)  		return; -	obj_priv = to_intel_bo(obj); -	kunmap(obj_priv->pages[0]); -	i915_gem_object_unpin(obj); -	drm_gem_object_unreference(obj); +	kunmap(sg_page(obj->pages->sgl)); +	i915_gem_object_ggtt_unpin(obj); +	drm_gem_object_unreference(&obj->base);  	ring->status_page.obj = NULL; - -	memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));  } -static int init_status_page(struct drm_device *dev, -			    struct intel_ring_buffer *ring) +static int init_status_page(struct intel_engine_cs *ring)  { -	drm_i915_private_t *dev_priv = dev->dev_private; -	struct drm_gem_object *obj; -	struct drm_i915_gem_object *obj_priv; -	int ret; +	struct drm_i915_gem_object *obj; -	obj = i915_gem_alloc_object(dev, 4096); -	if (obj == NULL) { -		DRM_ERROR("Failed to allocate status page\n"); -		ret = -ENOMEM; -		goto err; -	} -	obj_priv = to_intel_bo(obj); -	obj_priv->agp_type = AGP_USER_CACHED_MEMORY; +	if ((obj = ring->status_page.obj) == NULL) { +		int ret; -	ret = i915_gem_object_pin(obj, 4096); -	if (ret != 0) { -		goto err_unref; -	} +		obj = i915_gem_alloc_object(ring->dev, 4096); +		if (obj == NULL) { +			DRM_ERROR("Failed to allocate status page\n"); +			return -ENOMEM; +		} -	ring->status_page.gfx_addr = obj_priv->gtt_offset; -	ring->status_page.page_addr = kmap(obj_priv->pages[0]); -	if (ring->status_page.page_addr == NULL) { -		memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map)); -		goto err_unpin; +		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC); +		if (ret) +			goto err_unref; + +		ret = i915_gem_obj_ggtt_pin(obj, 4096, 0); +		if (ret) { +err_unref: +			drm_gem_object_unreference(&obj->base); +			return ret; +		} + +		ring->status_page.obj = obj;  	} -	ring->status_page.obj = obj; + +	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj); +	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));  	memset(ring->status_page.page_addr, 0, PAGE_SIZE); -	intel_ring_setup_status_page(dev, ring);  	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",  			ring->name, ring->status_page.gfx_addr);  	return 0; +} + +static int init_phys_status_page(struct intel_engine_cs *ring) +{ +	struct drm_i915_private *dev_priv = ring->dev->dev_private; + +	if (!dev_priv->status_page_dmah) { +		dev_priv->status_page_dmah = +			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE); +		if (!dev_priv->status_page_dmah) +			return -ENOMEM; +	} + +	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr; +	memset(ring->status_page.page_addr, 0, PAGE_SIZE); + +	return 0; +} + +static int allocate_ring_buffer(struct intel_engine_cs *ring) +{ +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = to_i915(dev); +	struct intel_ringbuffer *ringbuf = ring->buffer; +	struct drm_i915_gem_object *obj; +	int ret; + +	if (intel_ring_initialized(ring)) +		return 0; + +	obj = NULL; +	if (!HAS_LLC(dev)) +		obj = i915_gem_object_create_stolen(dev, ringbuf->size); +	if (obj == NULL) +		obj = i915_gem_alloc_object(dev, ringbuf->size); +	if (obj == NULL) +		return -ENOMEM; + +	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE); +	if (ret) +		goto err_unref; + +	ret = i915_gem_object_set_to_gtt_domain(obj, true); +	if (ret) +		goto err_unpin; + +	ringbuf->virtual_start = +		ioremap_wc(dev_priv->gtt.mappable_base + i915_gem_obj_ggtt_offset(obj), +				ringbuf->size); +	if (ringbuf->virtual_start == NULL) { +		ret = -EINVAL; +		goto err_unpin; +	} + +	ringbuf->obj = obj; +	return 0;  err_unpin: -	i915_gem_object_unpin(obj); +	i915_gem_object_ggtt_unpin(obj);  err_unref: -	drm_gem_object_unreference(obj); -err: +	drm_gem_object_unreference(&obj->base);  	return ret;  } -int intel_init_ring_buffer(struct drm_device *dev, -			   struct intel_ring_buffer *ring) +static int intel_init_ring_buffer(struct drm_device *dev, +				  struct intel_engine_cs *ring)  { -	struct drm_i915_private *dev_priv = dev->dev_private; -	struct drm_i915_gem_object *obj_priv; -	struct drm_gem_object *obj; +	struct intel_ringbuffer *ringbuf = ring->buffer;  	int ret; +	if (ringbuf == NULL) { +		ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); +		if (!ringbuf) +			return -ENOMEM; +		ring->buffer = ringbuf; +	} +  	ring->dev = dev;  	INIT_LIST_HEAD(&ring->active_list);  	INIT_LIST_HEAD(&ring->request_list); -	INIT_LIST_HEAD(&ring->gpu_write_list); +	ringbuf->size = 32 * PAGE_SIZE; +	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno)); + +	init_waitqueue_head(&ring->irq_queue);  	if (I915_NEED_GFX_HWS(dev)) { -		ret = init_status_page(dev, ring); +		ret = init_status_page(ring);  		if (ret) -			return ret; +			goto error; +	} else { +		BUG_ON(ring->id != RCS); +		ret = init_phys_status_page(ring); +		if (ret) +			goto error;  	} -	obj = i915_gem_alloc_object(dev, ring->size); -	if (obj == NULL) { -		DRM_ERROR("Failed to allocate ringbuffer\n"); -		ret = -ENOMEM; -		goto err_hws; +	ret = allocate_ring_buffer(ring); +	if (ret) { +		DRM_ERROR("Failed to allocate ringbuffer %s: %d\n", ring->name, ret); +		goto error;  	} -	ring->gem_object = obj; +	/* Workaround an erratum on the i830 which causes a hang if +	 * the TAIL pointer points to within the last 2 cachelines +	 * of the buffer. +	 */ +	ringbuf->effective_size = ringbuf->size; +	if (IS_I830(dev) || IS_845G(dev)) +		ringbuf->effective_size -= 2 * CACHELINE_BYTES; -	ret = i915_gem_object_pin(obj, PAGE_SIZE); +	ret = i915_cmd_parser_init_ring(ring);  	if (ret) -		goto err_unref; - -	obj_priv = to_intel_bo(obj); -	ring->map.size = ring->size; -	ring->map.offset = dev->agp->base + obj_priv->gtt_offset; -	ring->map.type = 0; -	ring->map.flags = 0; -	ring->map.mtrr = 0; +		goto error; -	drm_core_ioremap_wc(&ring->map, dev); -	if (ring->map.handle == NULL) { -		DRM_ERROR("Failed to map ringbuffer.\n"); -		ret = -EINVAL; -		goto err_unpin; -	} - -	ring->virtual_start = ring->map.handle; -	ret = ring->init(dev, ring); +	ret = ring->init(ring);  	if (ret) -		goto err_unmap; +		goto error; -	if (!drm_core_check_feature(dev, DRIVER_MODESET)) -		i915_kernel_lost_context(dev); -	else { -		ring->head = I915_READ_HEAD(ring) & HEAD_ADDR; -		ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR; -		ring->space = ring->head - (ring->tail + 8); -		if (ring->space < 0) -			ring->space += ring->size; -	} -	return ret; +	return 0; -err_unmap: -	drm_core_ioremapfree(&ring->map, dev); -err_unpin: -	i915_gem_object_unpin(obj); -err_unref: -	drm_gem_object_unreference(obj); -	ring->gem_object = NULL; -err_hws: -	cleanup_status_page(dev, ring); +error: +	kfree(ringbuf); +	ring->buffer = NULL;  	return ret;  } -void intel_cleanup_ring_buffer(struct drm_device *dev, -			       struct intel_ring_buffer *ring) +void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)  { -	if (ring->gem_object == NULL) +	struct drm_i915_private *dev_priv = to_i915(ring->dev); +	struct intel_ringbuffer *ringbuf = ring->buffer; + +	if (!intel_ring_initialized(ring))  		return; -	drm_core_ioremapfree(&ring->map, dev); +	intel_stop_ring_buffer(ring); +	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0); -	i915_gem_object_unpin(ring->gem_object); -	drm_gem_object_unreference(ring->gem_object); -	ring->gem_object = NULL; +	iounmap(ringbuf->virtual_start); + +	i915_gem_object_ggtt_unpin(ringbuf->obj); +	drm_gem_object_unreference(&ringbuf->obj->base); +	ringbuf->obj = NULL; +	ring->preallocated_lazy_request = NULL; +	ring->outstanding_lazy_seqno = 0;  	if (ring->cleanup)  		ring->cleanup(ring); -	cleanup_status_page(dev, ring); +	cleanup_status_page(ring); + +	i915_cmd_parser_fini_ring(ring); + +	kfree(ringbuf); +	ring->buffer = NULL;  } -static int intel_wrap_ring_buffer(struct drm_device *dev, -				  struct intel_ring_buffer *ring) +static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)  { -	unsigned int *virt; -	int rem; -	rem = ring->size - ring->tail; +	struct intel_ringbuffer *ringbuf = ring->buffer; +	struct drm_i915_gem_request *request; +	u32 seqno = 0; +	int ret; -	if (ring->space < rem) { -		int ret = intel_wait_ring_buffer(dev, ring, rem); -		if (ret) -			return ret; +	if (ringbuf->last_retired_head != -1) { +		ringbuf->head = ringbuf->last_retired_head; +		ringbuf->last_retired_head = -1; + +		ringbuf->space = ring_space(ring); +		if (ringbuf->space >= n) +			return 0;  	} -	virt = (unsigned int *)(ring->virtual_start + ring->tail); -	rem /= 8; -	while (rem--) { -		*virt++ = MI_NOOP; -		*virt++ = MI_NOOP; +	list_for_each_entry(request, &ring->request_list, list) { +		if (__ring_space(request->tail, ringbuf->tail, ringbuf->size) >= n) { +			seqno = request->seqno; +			break; +		}  	} -	ring->tail = 0; -	ring->space = ring->head - 8; +	if (seqno == 0) +		return -ENOSPC; + +	ret = i915_wait_seqno(ring, seqno); +	if (ret) +		return ret; + +	i915_gem_retire_requests_ring(ring); +	ringbuf->head = ringbuf->last_retired_head; +	ringbuf->last_retired_head = -1; +	ringbuf->space = ring_space(ring);  	return 0;  } -int intel_wait_ring_buffer(struct drm_device *dev, -			   struct intel_ring_buffer *ring, int n) +static int ring_wait_for_space(struct intel_engine_cs *ring, int n)  { +	struct drm_device *dev = ring->dev; +	struct drm_i915_private *dev_priv = dev->dev_private; +	struct intel_ringbuffer *ringbuf = ring->buffer;  	unsigned long end; -	drm_i915_private_t *dev_priv = dev->dev_private; -	u32 head; - -	head = intel_read_status_page(ring, 4); -	if (head) { -		ring->head = head & HEAD_ADDR; -		ring->space = ring->head - (ring->tail + 8); -		if (ring->space < 0) -			ring->space += ring->size; -		if (ring->space >= n) -			return 0; -	} +	int ret; + +	ret = intel_ring_wait_request(ring, n); +	if (ret != -ENOSPC) +		return ret; + +	/* force the tail write in case we have been skipping them */ +	__intel_ring_advance(ring); + +	/* With GEM the hangcheck timer should kick us out of the loop, +	 * leaving it early runs the risk of corrupting GEM state (due +	 * to running on almost untested codepaths). But on resume +	 * timers don't work yet, so prevent a complete hang in that +	 * case by choosing an insanely large timeout. */ +	end = jiffies + 60 * HZ; -	trace_i915_ring_wait_begin (dev); -	end = jiffies + 3 * HZ; +	trace_i915_ring_wait_begin(ring);  	do { -		ring->head = I915_READ_HEAD(ring) & HEAD_ADDR; -		ring->space = ring->head - (ring->tail + 8); -		if (ring->space < 0) -			ring->space += ring->size; -		if (ring->space >= n) { -			trace_i915_ring_wait_end (dev); -			return 0; +		ringbuf->head = I915_READ_HEAD(ring); +		ringbuf->space = ring_space(ring); +		if (ringbuf->space >= n) { +			ret = 0; +			break;  		} -		if (dev->primary->master) { +		if (!drm_core_check_feature(dev, DRIVER_MODESET) && +		    dev->primary->master) {  			struct drm_i915_master_private *master_priv = dev->primary->master->driver_priv;  			if (master_priv->sarea_priv)  				master_priv->sarea_priv->perf_boxes |= I915_BOX_WAIT;  		}  		msleep(1); -	} while (!time_after(jiffies, end)); -	trace_i915_ring_wait_end (dev); -	return -EBUSY; -} - -void intel_ring_begin(struct drm_device *dev, -		      struct intel_ring_buffer *ring, -		      int num_dwords) -{ -	int n = 4*num_dwords; -	if (unlikely(ring->tail + n > ring->size)) -		intel_wrap_ring_buffer(dev, ring); -	if (unlikely(ring->space < n)) -		intel_wait_ring_buffer(dev, ring, n); - -	ring->space -= n; -} - -void intel_ring_advance(struct drm_device *dev, -			struct intel_ring_buffer *ring) -{ -	ring->tail &= ring->size - 1; -	ring->write_tail(dev, ring, ring->tail); -} - -static const struct intel_ring_buffer render_ring = { -	.name			= "render ring", -	.id			= RING_RENDER, -	.mmio_base		= RENDER_RING_BASE, -	.size			= 32 * PAGE_SIZE, -	.init			= init_render_ring, -	.write_tail		= ring_write_tail, -	.flush			= render_ring_flush, -	.add_request		= render_ring_add_request, -	.get_seqno		= render_ring_get_seqno, -	.user_irq_get		= render_ring_get_user_irq, -	.user_irq_put		= render_ring_put_user_irq, -	.dispatch_gem_execbuffer = render_ring_dispatch_gem_execbuffer, -}; - -/* ring buffer for bit-stream decoder */ - -static const struct intel_ring_buffer bsd_ring = { -	.name                   = "bsd ring", -	.id			= RING_BSD, -	.mmio_base		= BSD_RING_BASE, -	.size			= 32 * PAGE_SIZE, -	.init			= init_bsd_ring, -	.write_tail		= ring_write_tail, -	.flush			= bsd_ring_flush, -	.add_request		= ring_add_request, -	.get_seqno		= ring_status_page_get_seqno, -	.user_irq_get		= bsd_ring_get_user_irq, -	.user_irq_put		= bsd_ring_put_user_irq, -	.dispatch_gem_execbuffer = ring_dispatch_gem_execbuffer, -}; - - -static void gen6_bsd_ring_write_tail(struct drm_device *dev, -				     struct intel_ring_buffer *ring, + +		if (dev_priv->mm.interruptible && signal_pending(current)) { +			ret = -ERESTARTSYS; +			break; +		} + +		ret = i915_gem_check_wedge(&dev_priv->gpu_error, +					   dev_priv->mm.interruptible); +		if (ret) +			break; + +		if (time_after(jiffies, end)) { +			ret = -EBUSY; +			break; +		} +	} while (1); +	trace_i915_ring_wait_end(ring); +	return ret; +} + +static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) +{ +	uint32_t __iomem *virt; +	struct intel_ringbuffer *ringbuf = ring->buffer; +	int rem = ringbuf->size - ringbuf->tail; + +	if (ringbuf->space < rem) { +		int ret = ring_wait_for_space(ring, rem); +		if (ret) +			return ret; +	} + +	virt = ringbuf->virtual_start + ringbuf->tail; +	rem /= 4; +	while (rem--) +		iowrite32(MI_NOOP, virt++); + +	ringbuf->tail = 0; +	ringbuf->space = ring_space(ring); + +	return 0; +} + +int intel_ring_idle(struct intel_engine_cs *ring) +{ +	u32 seqno; +	int ret; + +	/* We need to add any requests required to flush the objects and ring */ +	if (ring->outstanding_lazy_seqno) { +		ret = i915_add_request(ring, NULL); +		if (ret) +			return ret; +	} + +	/* Wait upon the last request to be completed */ +	if (list_empty(&ring->request_list)) +		return 0; + +	seqno = list_entry(ring->request_list.prev, +			   struct drm_i915_gem_request, +			   list)->seqno; + +	return i915_wait_seqno(ring, seqno); +} + +static int +intel_ring_alloc_seqno(struct intel_engine_cs *ring) +{ +	if (ring->outstanding_lazy_seqno) +		return 0; + +	if (ring->preallocated_lazy_request == NULL) { +		struct drm_i915_gem_request *request; + +		request = kmalloc(sizeof(*request), GFP_KERNEL); +		if (request == NULL) +			return -ENOMEM; + +		ring->preallocated_lazy_request = request; +	} + +	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno); +} + +static int __intel_ring_prepare(struct intel_engine_cs *ring, +				int bytes) +{ +	struct intel_ringbuffer *ringbuf = ring->buffer; +	int ret; + +	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { +		ret = intel_wrap_ring_buffer(ring); +		if (unlikely(ret)) +			return ret; +	} + +	if (unlikely(ringbuf->space < bytes)) { +		ret = ring_wait_for_space(ring, bytes); +		if (unlikely(ret)) +			return ret; +	} + +	return 0; +} + +int intel_ring_begin(struct intel_engine_cs *ring, +		     int num_dwords) +{ +	struct drm_i915_private *dev_priv = ring->dev->dev_private; +	int ret; + +	ret = i915_gem_check_wedge(&dev_priv->gpu_error, +				   dev_priv->mm.interruptible); +	if (ret) +		return ret; + +	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t)); +	if (ret) +		return ret; + +	/* Preallocate the olr before touching the ring */ +	ret = intel_ring_alloc_seqno(ring); +	if (ret) +		return ret; + +	ring->buffer->space -= num_dwords * sizeof(uint32_t); +	return 0; +} + +/* Align the ring tail to a cacheline boundary */ +int intel_ring_cacheline_align(struct intel_engine_cs *ring) +{ +	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t); +	int ret; + +	if (num_dwords == 0) +		return 0; + +	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; +	ret = intel_ring_begin(ring, num_dwords); +	if (ret) +		return ret; + +	while (num_dwords--) +		intel_ring_emit(ring, MI_NOOP); + +	intel_ring_advance(ring); + +	return 0; +} + +void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno) +{ +	struct drm_i915_private *dev_priv = ring->dev->dev_private; + +	BUG_ON(ring->outstanding_lazy_seqno); + +	if (INTEL_INFO(ring->dev)->gen >= 6) { +		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0); +		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0); +		if (HAS_VEBOX(ring->dev)) +			I915_WRITE(RING_SYNC_2(ring->mmio_base), 0); +	} + +	ring->set_seqno(ring, seqno); +	ring->hangcheck.seqno = seqno; +} + +static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,  				     u32 value)  { -       drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_i915_private *dev_priv = ring->dev->dev_private;         /* Every tail move must follow the sequence below */ -       I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, -	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK | -	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_DISABLE); -       I915_WRITE(GEN6_BSD_RNCID, 0x0); -       if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & -                               GEN6_BSD_SLEEP_PSMI_CONTROL_IDLE_INDICATOR) == 0, -                       50)) -               DRM_ERROR("timed out waiting for IDLE Indicator\n"); +	/* Disable notification that the ring is IDLE. The GT +	 * will then assume that it is busy and bring it out of rc6. +	 */ +	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, +		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE)); + +	/* Clear the context id. Here be magic! */ +	I915_WRITE64(GEN6_BSD_RNCID, 0x0); -       I915_WRITE_TAIL(ring, value); -       I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, -	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_MODIFY_MASK | -	       GEN6_BSD_SLEEP_PSMI_CONTROL_RC_ILDL_MESSAGE_ENABLE); +	/* Wait for the ring not to be idle, i.e. for it to wake up. */ +	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) & +		      GEN6_BSD_SLEEP_INDICATOR) == 0, +		     50)) +		DRM_ERROR("timed out waiting for the BSD ring to wake up\n"); + +	/* Now that the ring is fully powered up, update the tail */ +	I915_WRITE_TAIL(ring, value); +	POSTING_READ(RING_TAIL(ring->mmio_base)); + +	/* Let the ring send IDLE messages to the GT again, +	 * and so let it sleep to conserve power when idle. +	 */ +	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL, +		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));  } -static void gen6_ring_flush(struct drm_device *dev, -			    struct intel_ring_buffer *ring, -			    u32 invalidate_domains, -			    u32 flush_domains) +static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, +			       u32 invalidate, u32 flush)  { -       intel_ring_begin(dev, ring, 4); -       intel_ring_emit(dev, ring, MI_FLUSH_DW); -       intel_ring_emit(dev, ring, 0); -       intel_ring_emit(dev, ring, 0); -       intel_ring_emit(dev, ring, 0); -       intel_ring_advance(dev, ring); +	uint32_t cmd; +	int ret; + +	ret = intel_ring_begin(ring, 4); +	if (ret) +		return ret; + +	cmd = MI_FLUSH_DW; +	if (INTEL_INFO(ring->dev)->gen >= 8) +		cmd += 1; +	/* +	 * Bspec vol 1c.5 - video engine command streamer: +	 * "If ENABLED, all TLBs will be invalidated once the flush +	 * operation is complete. This bit is only valid when the +	 * Post-Sync Operation field is a value of 1h or 3h." +	 */ +	if (invalidate & I915_GEM_GPU_DOMAINS) +		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD | +			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; +	intel_ring_emit(ring, cmd); +	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); +	if (INTEL_INFO(ring->dev)->gen >= 8) { +		intel_ring_emit(ring, 0); /* upper addr */ +		intel_ring_emit(ring, 0); /* value */ +	} else  { +		intel_ring_emit(ring, 0); +		intel_ring_emit(ring, MI_NOOP); +	} +	intel_ring_advance(ring); +	return 0;  }  static int -gen6_ring_dispatch_gem_execbuffer(struct drm_device *dev, -				  struct intel_ring_buffer *ring, -				  struct drm_i915_gem_execbuffer2 *exec, -				  struct drm_clip_rect *cliprects, -				  uint64_t exec_offset) -{ -       uint32_t exec_start; - -       exec_start = (uint32_t) exec_offset + exec->batch_start_offset; - -       intel_ring_begin(dev, ring, 2); -       intel_ring_emit(dev, ring, -		       MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965); -       /* bit0-7 is the length on GEN6+ */ -       intel_ring_emit(dev, ring, exec_start); -       intel_ring_advance(dev, ring); - -       return 0; -} - -/* ring buffer for Video Codec for Gen6+ */ -static const struct intel_ring_buffer gen6_bsd_ring = { -       .name			= "gen6 bsd ring", -       .id			= RING_BSD, -       .mmio_base		= GEN6_BSD_RING_BASE, -       .size			= 32 * PAGE_SIZE, -       .init			= init_bsd_ring, -       .write_tail		= gen6_bsd_ring_write_tail, -       .flush			= gen6_ring_flush, -       .add_request		= ring_add_request, -       .get_seqno		= ring_status_page_get_seqno, -       .user_irq_get		= bsd_ring_get_user_irq, -       .user_irq_put		= bsd_ring_put_user_irq, -       .dispatch_gem_execbuffer	= gen6_ring_dispatch_gem_execbuffer, -}; +gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +			      u64 offset, u32 len, +			      unsigned flags) +{ +	struct drm_i915_private *dev_priv = ring->dev->dev_private; +	bool ppgtt = dev_priv->mm.aliasing_ppgtt != NULL && +		!(flags & I915_DISPATCH_SECURE); +	int ret; -/* Blitter support (SandyBridge+) */ +	ret = intel_ring_begin(ring, 4); +	if (ret) +		return ret; -static void -blt_ring_get_user_irq(struct drm_device *dev, -		      struct intel_ring_buffer *ring) -{ -	/* do nothing */ +	/* FIXME(BDW): Address space and security selectors. */ +	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); +	intel_ring_emit(ring, lower_32_bits(offset)); +	intel_ring_emit(ring, upper_32_bits(offset)); +	intel_ring_emit(ring, MI_NOOP); +	intel_ring_advance(ring); + +	return 0;  } -static void -blt_ring_put_user_irq(struct drm_device *dev, -		      struct intel_ring_buffer *ring) + +static int +hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +			      u64 offset, u32 len, +			      unsigned flags)  { -	/* do nothing */ +	int ret; + +	ret = intel_ring_begin(ring, 2); +	if (ret) +		return ret; + +	intel_ring_emit(ring, +			MI_BATCH_BUFFER_START | MI_BATCH_PPGTT_HSW | +			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_HSW)); +	/* bit0-7 is the length on GEN6+ */ +	intel_ring_emit(ring, offset); +	intel_ring_advance(ring); + +	return 0;  } +static int +gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +			      u64 offset, u32 len, +			      unsigned flags) +{ +	int ret; -/* Workaround for some stepping of SNB, - * each time when BLT engine ring tail moved, - * the first command in the ring to be parsed - * should be MI_BATCH_BUFFER_START - */ -#define NEED_BLT_WORKAROUND(dev) \ -	(IS_GEN6(dev) && (dev->pdev->revision < 8)) +	ret = intel_ring_begin(ring, 2); +	if (ret) +		return ret; -static inline struct drm_i915_gem_object * -to_blt_workaround(struct intel_ring_buffer *ring) +	intel_ring_emit(ring, +			MI_BATCH_BUFFER_START | +			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965)); +	/* bit0-7 is the length on GEN6+ */ +	intel_ring_emit(ring, offset); +	intel_ring_advance(ring); + +	return 0; +} + +/* Blitter support (SandyBridge+) */ + +static int gen6_ring_flush(struct intel_engine_cs *ring, +			   u32 invalidate, u32 flush)  { -	return ring->private; +	struct drm_device *dev = ring->dev; +	uint32_t cmd; +	int ret; + +	ret = intel_ring_begin(ring, 4); +	if (ret) +		return ret; + +	cmd = MI_FLUSH_DW; +	if (INTEL_INFO(ring->dev)->gen >= 8) +		cmd += 1; +	/* +	 * Bspec vol 1c.3 - blitter engine command streamer: +	 * "If ENABLED, all TLBs will be invalidated once the flush +	 * operation is complete. This bit is only valid when the +	 * Post-Sync Operation field is a value of 1h or 3h." +	 */ +	if (invalidate & I915_GEM_DOMAIN_RENDER) +		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX | +			MI_FLUSH_DW_OP_STOREDW; +	intel_ring_emit(ring, cmd); +	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT); +	if (INTEL_INFO(ring->dev)->gen >= 8) { +		intel_ring_emit(ring, 0); /* upper addr */ +		intel_ring_emit(ring, 0); /* value */ +	} else  { +		intel_ring_emit(ring, 0); +		intel_ring_emit(ring, MI_NOOP); +	} +	intel_ring_advance(ring); + +	if (IS_GEN7(dev) && !invalidate && flush) +		return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN); + +	return 0;  } -static int blt_ring_init(struct drm_device *dev, -			 struct intel_ring_buffer *ring) +int intel_init_render_ring_buffer(struct drm_device *dev)  { -	if (NEED_BLT_WORKAROUND(dev)) { +	struct drm_i915_private *dev_priv = dev->dev_private; +	struct intel_engine_cs *ring = &dev_priv->ring[RCS]; + +	ring->name = "render ring"; +	ring->id = RCS; +	ring->mmio_base = RENDER_RING_BASE; + +	if (INTEL_INFO(dev)->gen >= 6) { +		ring->add_request = gen6_add_request; +		ring->flush = gen7_render_ring_flush; +		if (INTEL_INFO(dev)->gen == 6) +			ring->flush = gen6_render_ring_flush; +		if (INTEL_INFO(dev)->gen >= 8) { +			ring->flush = gen8_render_ring_flush; +			ring->irq_get = gen8_ring_get_irq; +			ring->irq_put = gen8_ring_put_irq; +		} else { +			ring->irq_get = gen6_ring_get_irq; +			ring->irq_put = gen6_ring_put_irq; +		} +		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT; +		ring->get_seqno = gen6_ring_get_seqno; +		ring->set_seqno = ring_set_seqno; +		ring->semaphore.sync_to = gen6_ring_sync; +		ring->semaphore.signal = gen6_signal; +		/* +		 * The current semaphore is only applied on pre-gen8 platform. +		 * And there is no VCS2 ring on the pre-gen8 platform. So the +		 * semaphore between RCS and VCS2 is initialized as INVALID. +		 * Gen8 will initialize the sema between VCS2 and RCS later. +		 */ +		ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; +		ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV; +		ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB; +		ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE; +		ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; +		ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; +		ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC; +		ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC; +		ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC; +		ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; +	} else if (IS_GEN5(dev)) { +		ring->add_request = pc_render_add_request; +		ring->flush = gen4_render_ring_flush; +		ring->get_seqno = pc_render_get_seqno; +		ring->set_seqno = pc_render_set_seqno; +		ring->irq_get = gen5_ring_get_irq; +		ring->irq_put = gen5_ring_put_irq; +		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT | +					GT_RENDER_PIPECTL_NOTIFY_INTERRUPT; +	} else { +		ring->add_request = i9xx_add_request; +		if (INTEL_INFO(dev)->gen < 4) +			ring->flush = gen2_render_ring_flush; +		else +			ring->flush = gen4_render_ring_flush; +		ring->get_seqno = ring_get_seqno; +		ring->set_seqno = ring_set_seqno; +		if (IS_GEN2(dev)) { +			ring->irq_get = i8xx_ring_get_irq; +			ring->irq_put = i8xx_ring_put_irq; +		} else { +			ring->irq_get = i9xx_ring_get_irq; +			ring->irq_put = i9xx_ring_put_irq; +		} +		ring->irq_enable_mask = I915_USER_INTERRUPT; +	} +	ring->write_tail = ring_write_tail; +	if (IS_HASWELL(dev)) +		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer; +	else if (IS_GEN8(dev)) +		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; +	else if (INTEL_INFO(dev)->gen >= 6) +		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; +	else if (INTEL_INFO(dev)->gen >= 4) +		ring->dispatch_execbuffer = i965_dispatch_execbuffer; +	else if (IS_I830(dev) || IS_845G(dev)) +		ring->dispatch_execbuffer = i830_dispatch_execbuffer; +	else +		ring->dispatch_execbuffer = i915_dispatch_execbuffer; +	ring->init = init_render_ring; +	ring->cleanup = render_ring_cleanup; + +	/* Workaround batchbuffer to combat CS tlb bug. */ +	if (HAS_BROKEN_CS_TLB(dev)) {  		struct drm_i915_gem_object *obj; -		u32 __iomem *ptr;  		int ret; -		obj = to_intel_bo(i915_gem_alloc_object(dev, 4096)); -		if (obj == NULL) +		obj = i915_gem_alloc_object(dev, I830_BATCH_LIMIT); +		if (obj == NULL) { +			DRM_ERROR("Failed to allocate batch bo\n");  			return -ENOMEM; - -		ret = i915_gem_object_pin(&obj->base, 4096); -		if (ret) { -			drm_gem_object_unreference(&obj->base); -			return ret;  		} -		ptr = kmap(obj->pages[0]); -		iowrite32(MI_BATCH_BUFFER_END, ptr); -		iowrite32(MI_NOOP, ptr+1); -		kunmap(obj->pages[0]); - -		ret = i915_gem_object_set_to_gtt_domain(&obj->base, false); -		if (ret) { -			i915_gem_object_unpin(&obj->base); +		ret = i915_gem_obj_ggtt_pin(obj, 0, 0); +		if (ret != 0) {  			drm_gem_object_unreference(&obj->base); +			DRM_ERROR("Failed to ping batch bo\n");  			return ret;  		} -		ring->private = obj; +		ring->scratch.obj = obj; +		ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);  	} -	return init_ring_common(dev, ring); +	return intel_init_ring_buffer(dev, ring);  } -static void blt_ring_begin(struct drm_device *dev, -			   struct intel_ring_buffer *ring, -			  int num_dwords) +int intel_render_ring_init_dri(struct drm_device *dev, u64 start, u32 size)  { -	if (ring->private) { -		intel_ring_begin(dev, ring, num_dwords+2); -		intel_ring_emit(dev, ring, MI_BATCH_BUFFER_START); -		intel_ring_emit(dev, ring, to_blt_workaround(ring)->gtt_offset); -	} else -		intel_ring_begin(dev, ring, 4); +	struct drm_i915_private *dev_priv = dev->dev_private; +	struct intel_engine_cs *ring = &dev_priv->ring[RCS]; +	struct intel_ringbuffer *ringbuf = ring->buffer; +	int ret; + +	if (ringbuf == NULL) { +		ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL); +		if (!ringbuf) +			return -ENOMEM; +		ring->buffer = ringbuf; +	} + +	ring->name = "render ring"; +	ring->id = RCS; +	ring->mmio_base = RENDER_RING_BASE; + +	if (INTEL_INFO(dev)->gen >= 6) { +		/* non-kms not supported on gen6+ */ +		ret = -ENODEV; +		goto err_ringbuf; +	} + +	/* Note: gem is not supported on gen5/ilk without kms (the corresponding +	 * gem_init ioctl returns with -ENODEV). Hence we do not need to set up +	 * the special gen5 functions. */ +	ring->add_request = i9xx_add_request; +	if (INTEL_INFO(dev)->gen < 4) +		ring->flush = gen2_render_ring_flush; +	else +		ring->flush = gen4_render_ring_flush; +	ring->get_seqno = ring_get_seqno; +	ring->set_seqno = ring_set_seqno; +	if (IS_GEN2(dev)) { +		ring->irq_get = i8xx_ring_get_irq; +		ring->irq_put = i8xx_ring_put_irq; +	} else { +		ring->irq_get = i9xx_ring_get_irq; +		ring->irq_put = i9xx_ring_put_irq; +	} +	ring->irq_enable_mask = I915_USER_INTERRUPT; +	ring->write_tail = ring_write_tail; +	if (INTEL_INFO(dev)->gen >= 4) +		ring->dispatch_execbuffer = i965_dispatch_execbuffer; +	else if (IS_I830(dev) || IS_845G(dev)) +		ring->dispatch_execbuffer = i830_dispatch_execbuffer; +	else +		ring->dispatch_execbuffer = i915_dispatch_execbuffer; +	ring->init = init_render_ring; +	ring->cleanup = render_ring_cleanup; + +	ring->dev = dev; +	INIT_LIST_HEAD(&ring->active_list); +	INIT_LIST_HEAD(&ring->request_list); + +	ringbuf->size = size; +	ringbuf->effective_size = ringbuf->size; +	if (IS_I830(ring->dev) || IS_845G(ring->dev)) +		ringbuf->effective_size -= 2 * CACHELINE_BYTES; + +	ringbuf->virtual_start = ioremap_wc(start, size); +	if (ringbuf->virtual_start == NULL) { +		DRM_ERROR("can not ioremap virtual address for" +			  " ring buffer\n"); +		ret = -ENOMEM; +		goto err_ringbuf; +	} + +	if (!I915_NEED_GFX_HWS(dev)) { +		ret = init_phys_status_page(ring); +		if (ret) +			goto err_vstart; +	} + +	return 0; + +err_vstart: +	iounmap(ringbuf->virtual_start); +err_ringbuf: +	kfree(ringbuf); +	ring->buffer = NULL; +	return ret;  } -static void blt_ring_flush(struct drm_device *dev, -			   struct intel_ring_buffer *ring, -			   u32 invalidate_domains, -			   u32 flush_domains) +int intel_init_bsd_ring_buffer(struct drm_device *dev)  { -	blt_ring_begin(dev, ring, 4); -	intel_ring_emit(dev, ring, MI_FLUSH_DW); -	intel_ring_emit(dev, ring, 0); -	intel_ring_emit(dev, ring, 0); -	intel_ring_emit(dev, ring, 0); -	intel_ring_advance(dev, ring); +	struct drm_i915_private *dev_priv = dev->dev_private; +	struct intel_engine_cs *ring = &dev_priv->ring[VCS]; + +	ring->name = "bsd ring"; +	ring->id = VCS; + +	ring->write_tail = ring_write_tail; +	if (INTEL_INFO(dev)->gen >= 6) { +		ring->mmio_base = GEN6_BSD_RING_BASE; +		/* gen6 bsd needs a special wa for tail updates */ +		if (IS_GEN6(dev)) +			ring->write_tail = gen6_bsd_ring_write_tail; +		ring->flush = gen6_bsd_ring_flush; +		ring->add_request = gen6_add_request; +		ring->get_seqno = gen6_ring_get_seqno; +		ring->set_seqno = ring_set_seqno; +		if (INTEL_INFO(dev)->gen >= 8) { +			ring->irq_enable_mask = +				GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT; +			ring->irq_get = gen8_ring_get_irq; +			ring->irq_put = gen8_ring_put_irq; +			ring->dispatch_execbuffer = +				gen8_ring_dispatch_execbuffer; +		} else { +			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT; +			ring->irq_get = gen6_ring_get_irq; +			ring->irq_put = gen6_ring_put_irq; +			ring->dispatch_execbuffer = +				gen6_ring_dispatch_execbuffer; +		} +		ring->semaphore.sync_to = gen6_ring_sync; +		ring->semaphore.signal = gen6_signal; +		/* +		 * The current semaphore is only applied on pre-gen8 platform. +		 * And there is no VCS2 ring on the pre-gen8 platform. So the +		 * semaphore between VCS and VCS2 is initialized as INVALID. +		 * Gen8 will initialize the sema between VCS2 and VCS later. +		 */ +		ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR; +		ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; +		ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB; +		ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE; +		ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; +		ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC; +		ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; +		ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC; +		ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC; +		ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; +	} else { +		ring->mmio_base = BSD_RING_BASE; +		ring->flush = bsd_ring_flush; +		ring->add_request = i9xx_add_request; +		ring->get_seqno = ring_get_seqno; +		ring->set_seqno = ring_set_seqno; +		if (IS_GEN5(dev)) { +			ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT; +			ring->irq_get = gen5_ring_get_irq; +			ring->irq_put = gen5_ring_put_irq; +		} else { +			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT; +			ring->irq_get = i9xx_ring_get_irq; +			ring->irq_put = i9xx_ring_put_irq; +		} +		ring->dispatch_execbuffer = i965_dispatch_execbuffer; +	} +	ring->init = init_ring_common; + +	return intel_init_ring_buffer(dev, ring);  } -static u32 -blt_ring_add_request(struct drm_device *dev, -		     struct intel_ring_buffer *ring, -		     u32 flush_domains) +/** + * Initialize the second BSD ring for Broadwell GT3. + * It is noted that this only exists on Broadwell GT3. + */ +int intel_init_bsd2_ring_buffer(struct drm_device *dev)  { -	u32 seqno = i915_gem_get_seqno(dev); +	struct drm_i915_private *dev_priv = dev->dev_private; +	struct intel_engine_cs *ring = &dev_priv->ring[VCS2]; -	blt_ring_begin(dev, ring, 4); -	intel_ring_emit(dev, ring, MI_STORE_DWORD_INDEX); -	intel_ring_emit(dev, ring, -			I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); -	intel_ring_emit(dev, ring, seqno); -	intel_ring_emit(dev, ring, MI_USER_INTERRUPT); -	intel_ring_advance(dev, ring); +	if ((INTEL_INFO(dev)->gen != 8)) { +		DRM_ERROR("No dual-BSD ring on non-BDW machine\n"); +		return -EINVAL; +	} -	DRM_DEBUG_DRIVER("%s %d\n", ring->name, seqno); -	return seqno; +	ring->name = "bds2_ring"; +	ring->id = VCS2; + +	ring->write_tail = ring_write_tail; +	ring->mmio_base = GEN8_BSD2_RING_BASE; +	ring->flush = gen6_bsd_ring_flush; +	ring->add_request = gen6_add_request; +	ring->get_seqno = gen6_ring_get_seqno; +	ring->set_seqno = ring_set_seqno; +	ring->irq_enable_mask = +			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT; +	ring->irq_get = gen8_ring_get_irq; +	ring->irq_put = gen8_ring_put_irq; +	ring->dispatch_execbuffer = +			gen8_ring_dispatch_execbuffer; +	ring->semaphore.sync_to = gen6_ring_sync; +	ring->semaphore.signal = gen6_signal; +	/* +	 * The current semaphore is only applied on the pre-gen8. And there +	 * is no bsd2 ring on the pre-gen8. So now the semaphore_register +	 * between VCS2 and other ring is initialized as invalid. +	 * Gen8 will initialize the sema between VCS2 and other ring later. +	 */ +	ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID; +	ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID; +	ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; +	ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; +	ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; +	ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC; +	ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC; +	ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; +	ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; +	ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; + +	ring->init = init_ring_common; + +	return intel_init_ring_buffer(dev, ring);  } -static void blt_ring_cleanup(struct intel_ring_buffer *ring) +int intel_init_blt_ring_buffer(struct drm_device *dev)  { -	if (!ring->private) -		return; +	struct drm_i915_private *dev_priv = dev->dev_private; +	struct intel_engine_cs *ring = &dev_priv->ring[BCS]; + +	ring->name = "blitter ring"; +	ring->id = BCS; + +	ring->mmio_base = BLT_RING_BASE; +	ring->write_tail = ring_write_tail; +	ring->flush = gen6_ring_flush; +	ring->add_request = gen6_add_request; +	ring->get_seqno = gen6_ring_get_seqno; +	ring->set_seqno = ring_set_seqno; +	if (INTEL_INFO(dev)->gen >= 8) { +		ring->irq_enable_mask = +			GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT; +		ring->irq_get = gen8_ring_get_irq; +		ring->irq_put = gen8_ring_put_irq; +		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; +	} else { +		ring->irq_enable_mask = GT_BLT_USER_INTERRUPT; +		ring->irq_get = gen6_ring_get_irq; +		ring->irq_put = gen6_ring_put_irq; +		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; +	} +	ring->semaphore.sync_to = gen6_ring_sync; +	ring->semaphore.signal = gen6_signal; +	/* +	 * The current semaphore is only applied on pre-gen8 platform. And +	 * there is no VCS2 ring on the pre-gen8 platform. So the semaphore +	 * between BCS and VCS2 is initialized as INVALID. +	 * Gen8 will initialize the sema between BCS and VCS2 later. +	 */ +	ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR; +	ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV; +	ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID; +	ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE; +	ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; +	ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC; +	ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC; +	ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC; +	ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC; +	ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; +	ring->init = init_ring_common; + +	return intel_init_ring_buffer(dev, ring); +} -	i915_gem_object_unpin(ring->private); -	drm_gem_object_unreference(ring->private); -	ring->private = NULL; -} - -static const struct intel_ring_buffer gen6_blt_ring = { -       .name			= "blt ring", -       .id			= RING_BLT, -       .mmio_base		= BLT_RING_BASE, -       .size			= 32 * PAGE_SIZE, -       .init			= blt_ring_init, -       .write_tail		= ring_write_tail, -       .flush			= blt_ring_flush, -       .add_request		= blt_ring_add_request, -       .get_seqno		= ring_status_page_get_seqno, -       .user_irq_get		= blt_ring_get_user_irq, -       .user_irq_put		= blt_ring_put_user_irq, -       .dispatch_gem_execbuffer	= gen6_ring_dispatch_gem_execbuffer, -       .cleanup			= blt_ring_cleanup, -}; +int intel_init_vebox_ring_buffer(struct drm_device *dev) +{ +	struct drm_i915_private *dev_priv = dev->dev_private; +	struct intel_engine_cs *ring = &dev_priv->ring[VECS]; + +	ring->name = "video enhancement ring"; +	ring->id = VECS; + +	ring->mmio_base = VEBOX_RING_BASE; +	ring->write_tail = ring_write_tail; +	ring->flush = gen6_ring_flush; +	ring->add_request = gen6_add_request; +	ring->get_seqno = gen6_ring_get_seqno; +	ring->set_seqno = ring_set_seqno; + +	if (INTEL_INFO(dev)->gen >= 8) { +		ring->irq_enable_mask = +			GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT; +		ring->irq_get = gen8_ring_get_irq; +		ring->irq_put = gen8_ring_put_irq; +		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer; +	} else { +		ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT; +		ring->irq_get = hsw_vebox_get_irq; +		ring->irq_put = hsw_vebox_put_irq; +		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer; +	} +	ring->semaphore.sync_to = gen6_ring_sync; +	ring->semaphore.signal = gen6_signal; +	ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER; +	ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV; +	ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB; +	ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID; +	ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID; +	ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC; +	ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC; +	ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC; +	ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC; +	ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC; +	ring->init = init_ring_common; + +	return intel_init_ring_buffer(dev, ring); +} -int intel_init_render_ring_buffer(struct drm_device *dev) +int +intel_ring_flush_all_caches(struct intel_engine_cs *ring)  { -	drm_i915_private_t *dev_priv = dev->dev_private; +	int ret; -	dev_priv->render_ring = render_ring; +	if (!ring->gpu_caches_dirty) +		return 0; -	if (!I915_NEED_GFX_HWS(dev)) { -		dev_priv->render_ring.status_page.page_addr -			= dev_priv->status_page_dmah->vaddr; -		memset(dev_priv->render_ring.status_page.page_addr, -				0, PAGE_SIZE); -	} +	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); +	if (ret) +		return ret; + +	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); -	return intel_init_ring_buffer(dev, &dev_priv->render_ring); +	ring->gpu_caches_dirty = false; +	return 0;  } -int intel_init_bsd_ring_buffer(struct drm_device *dev) +int +intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)  { -	drm_i915_private_t *dev_priv = dev->dev_private; +	uint32_t flush_domains; +	int ret; -	if (IS_GEN6(dev)) -		dev_priv->bsd_ring = gen6_bsd_ring; -	else -		dev_priv->bsd_ring = bsd_ring; +	flush_domains = 0; +	if (ring->gpu_caches_dirty) +		flush_domains = I915_GEM_GPU_DOMAINS; + +	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); +	if (ret) +		return ret; + +	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); -	return intel_init_ring_buffer(dev, &dev_priv->bsd_ring); +	ring->gpu_caches_dirty = false; +	return 0;  } -int intel_init_blt_ring_buffer(struct drm_device *dev) +void +intel_stop_ring_buffer(struct intel_engine_cs *ring)  { -	drm_i915_private_t *dev_priv = dev->dev_private; +	int ret; + +	if (!intel_ring_initialized(ring)) +		return; -	dev_priv->blt_ring = gen6_blt_ring; +	ret = intel_ring_idle(ring); +	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error)) +		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n", +			  ring->name, ret); -	return intel_init_ring_buffer(dev, &dev_priv->blt_ring); +	stop_ring(ring);  }  | 
