From b1ee26bab14886350ba12a5c10cbc0696ac679bf Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 15 Oct 2008 22:03:46 -0700 Subject: radeonfb: accelerate imageblit and other improvements Implement support for HW color expansion of 1bpp images, along with some improvements to the FIFO handling and other accel operations. The offset fixup code is now unnecessary as the fbcon core will call our set_par upon switch back from KD_GRAPHICS before anything else happens. I removed it as it would slow down accel operations. The fifo wait has been improved to avoid hitting the HW register as often, and the various accel ops are now performing better caching of register values. Overall, this improve accel performances. The imageblit acceleration does result in a small overall regression in performances on some machines (on the order of 5% on some x86), probably becaus the SW path provides a better bus utilisation, but I decided to ingnore that as the performances is still very good, and on the other hand, some machines such as some sparc64 get a 3 fold performance improvement. Signed-off-by: Benjamin Herrenschmidt Acked-by: David S. Miller Cc: Krzysztof Halasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/video/aty/radeon_accel.c | 291 +++++++++++++++++++++++++++------------ 1 file changed, 205 insertions(+), 86 deletions(-) (limited to 'drivers/video/aty/radeon_accel.c') diff --git a/drivers/video/aty/radeon_accel.c b/drivers/video/aty/radeon_accel.c index a469a3d6edc..8718f7349d6 100644 --- a/drivers/video/aty/radeon_accel.c +++ b/drivers/video/aty/radeon_accel.c @@ -5,61 +5,61 @@ * --dte */ -static void radeon_fixup_offset(struct radeonfb_info *rinfo) +#define FLUSH_CACHE_WORKAROUND 1 + +void radeon_fifo_update_and_wait(struct radeonfb_info *rinfo, int entries) { - u32 local_base; - - /* *** Ugly workaround *** */ - /* - * On some platforms, the video memory is mapped at 0 in radeon chip space - * (like PPCs) by the firmware. X will always move it up so that it's seen - * by the chip to be at the same address as the PCI BAR. - * That means that when switching back from X, there is a mismatch between - * the offsets programmed into the engine. This means that potentially, - * accel operations done before radeonfb has a chance to re-init the engine - * will have incorrect offsets, and potentially trash system memory ! - * - * The correct fix is for fbcon to never call any accel op before the engine - * has properly been re-initialized (by a call to set_var), but this is a - * complex fix. This workaround in the meantime, called before every accel - * operation, makes sure the offsets are in sync. - */ + int i; - radeon_fifo_wait (1); - local_base = INREG(MC_FB_LOCATION) << 16; - if (local_base == rinfo->fb_local_base) - return; + for (i=0; i<2000000; i++) { + rinfo->fifo_free = INREG(RBBM_STATUS) & 0x7f; + if (rinfo->fifo_free >= entries) + return; + udelay(10); + } + printk(KERN_ERR "radeonfb: FIFO Timeout !\n"); + /* XXX Todo: attempt to reset the engine */ +} - rinfo->fb_local_base = local_base; +static inline void radeon_fifo_wait(struct radeonfb_info *rinfo, int entries) +{ + if (entries <= rinfo->fifo_free) + rinfo->fifo_free -= entries; + else + radeon_fifo_update_and_wait(rinfo, entries); +} - radeon_fifo_wait (3); - OUTREG(DEFAULT_PITCH_OFFSET, (rinfo->pitch << 0x16) | - (rinfo->fb_local_base >> 10)); - OUTREG(DST_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10)); - OUTREG(SRC_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10)); +static inline void radeonfb_set_creg(struct radeonfb_info *rinfo, u32 reg, + u32 *cache, u32 new_val) +{ + if (new_val == *cache) + return; + *cache = new_val; + radeon_fifo_wait(rinfo, 1); + OUTREG(reg, new_val); } static void radeonfb_prim_fillrect(struct radeonfb_info *rinfo, const struct fb_fillrect *region) { - radeon_fifo_wait(4); - - OUTREG(DP_GUI_MASTER_CNTL, - rinfo->dp_gui_master_cntl /* contains, like GMC_DST_32BPP */ - | GMC_BRUSH_SOLID_COLOR - | ROP3_P); - if (radeon_get_dstbpp(rinfo->depth) != DST_8BPP) - OUTREG(DP_BRUSH_FRGD_CLR, rinfo->pseudo_palette[region->color]); - else - OUTREG(DP_BRUSH_FRGD_CLR, region->color); - OUTREG(DP_WRITE_MSK, 0xffffffff); - OUTREG(DP_CNTL, (DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM)); - - radeon_fifo_wait(2); + radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache, + rinfo->dp_gui_mc_base | GMC_BRUSH_SOLID_COLOR | ROP3_P); + radeonfb_set_creg(rinfo, DP_CNTL, &rinfo->dp_cntl_cache, + DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM); + radeonfb_set_creg(rinfo, DP_BRUSH_FRGD_CLR, &rinfo->dp_brush_fg_cache, + region->color); + + /* Ensure the dst cache is flushed and the engine idle before + * issuing the operation. + * + * This works around engine lockups on some cards + */ +#if FLUSH_CACHE_WORKAROUND + radeon_fifo_wait(rinfo, 2); OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL); OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE)); - - radeon_fifo_wait(2); +#endif + radeon_fifo_wait(rinfo, 2); OUTREG(DST_Y_X, (region->dy << 16) | region->dx); OUTREG(DST_WIDTH_HEIGHT, (region->width << 16) | region->height); } @@ -70,15 +70,14 @@ void radeonfb_fillrect(struct fb_info *info, const struct fb_fillrect *region) struct fb_fillrect modded; int vxres, vyres; - if (info->state != FBINFO_STATE_RUNNING) + WARN_ON(rinfo->gfx_mode); + if (info->state != FBINFO_STATE_RUNNING || rinfo->gfx_mode) return; if (info->flags & FBINFO_HWACCEL_DISABLED) { cfb_fillrect(info, region); return; } - radeon_fixup_offset(rinfo); - vxres = info->var.xres_virtual; vyres = info->var.yres_virtual; @@ -91,6 +90,10 @@ void radeonfb_fillrect(struct fb_info *info, const struct fb_fillrect *region) if(modded.dx + modded.width > vxres) modded.width = vxres - modded.dx; if(modded.dy + modded.height > vyres) modded.height = vyres - modded.dy; + if (info->fix.visual == FB_VISUAL_TRUECOLOR || + info->fix.visual == FB_VISUAL_DIRECTCOLOR ) + modded.color = ((u32 *) (info->pseudo_palette))[region->color]; + radeonfb_prim_fillrect(rinfo, &modded); } @@ -109,22 +112,22 @@ static void radeonfb_prim_copyarea(struct radeonfb_info *rinfo, if ( xdir < 0 ) { sx += w-1; dx += w-1; } if ( ydir < 0 ) { sy += h-1; dy += h-1; } - radeon_fifo_wait(3); - OUTREG(DP_GUI_MASTER_CNTL, - rinfo->dp_gui_master_cntl /* i.e. GMC_DST_32BPP */ - | GMC_BRUSH_NONE - | GMC_SRC_DSTCOLOR - | ROP3_S - | DP_SRC_SOURCE_MEMORY ); - OUTREG(DP_WRITE_MSK, 0xffffffff); - OUTREG(DP_CNTL, (xdir>=0 ? DST_X_LEFT_TO_RIGHT : 0) - | (ydir>=0 ? DST_Y_TOP_TO_BOTTOM : 0)); - - radeon_fifo_wait(2); + radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache, + rinfo->dp_gui_mc_base | + GMC_BRUSH_NONE | + GMC_SRC_DATATYPE_COLOR | + ROP3_S | + DP_SRC_SOURCE_MEMORY); + radeonfb_set_creg(rinfo, DP_CNTL, &rinfo->dp_cntl_cache, + (xdir>=0 ? DST_X_LEFT_TO_RIGHT : 0) | + (ydir>=0 ? DST_Y_TOP_TO_BOTTOM : 0)); + +#if FLUSH_CACHE_WORKAROUND + radeon_fifo_wait(rinfo, 2); OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL); OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE)); - - radeon_fifo_wait(3); +#endif + radeon_fifo_wait(rinfo, 3); OUTREG(SRC_Y_X, (sy << 16) | sx); OUTREG(DST_Y_X, (dy << 16) | dx); OUTREG(DST_HEIGHT_WIDTH, (h << 16) | w); @@ -143,15 +146,14 @@ void radeonfb_copyarea(struct fb_info *info, const struct fb_copyarea *area) modded.width = area->width; modded.height = area->height; - if (info->state != FBINFO_STATE_RUNNING) + WARN_ON(rinfo->gfx_mode); + if (info->state != FBINFO_STATE_RUNNING || rinfo->gfx_mode) return; if (info->flags & FBINFO_HWACCEL_DISABLED) { cfb_copyarea(info, area); return; } - radeon_fixup_offset(rinfo); - vxres = info->var.xres_virtual; vyres = info->var.yres_virtual; @@ -168,13 +170,112 @@ void radeonfb_copyarea(struct fb_info *info, const struct fb_copyarea *area) radeonfb_prim_copyarea(rinfo, &modded); } +static void radeonfb_prim_imageblit(struct radeonfb_info *rinfo, + const struct fb_image *image, + u32 fg, u32 bg) +{ + unsigned int src_bytes, dwords; + u32 *bits; + + radeonfb_set_creg(rinfo, DP_GUI_MASTER_CNTL, &rinfo->dp_gui_mc_cache, + rinfo->dp_gui_mc_base | + GMC_BRUSH_NONE | + GMC_SRC_DATATYPE_MONO_FG_BG | + ROP3_S | + GMC_BYTE_ORDER_MSB_TO_LSB | + DP_SRC_SOURCE_HOST_DATA); + radeonfb_set_creg(rinfo, DP_CNTL, &rinfo->dp_cntl_cache, + DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM); + radeonfb_set_creg(rinfo, DP_SRC_FRGD_CLR, &rinfo->dp_src_fg_cache, fg); + radeonfb_set_creg(rinfo, DP_SRC_BKGD_CLR, &rinfo->dp_src_bg_cache, bg); + + radeon_fifo_wait(rinfo, 1); + OUTREG(DST_Y_X, (image->dy << 16) | image->dx); + + /* Ensure the dst cache is flushed and the engine idle before + * issuing the operation. + * + * This works around engine lockups on some cards + */ +#if FLUSH_CACHE_WORKAROUND + radeon_fifo_wait(rinfo, 2); + OUTREG(DSTCACHE_CTLSTAT, RB2D_DC_FLUSH_ALL); + OUTREG(WAIT_UNTIL, (WAIT_2D_IDLECLEAN | WAIT_DMA_GUI_IDLE)); +#endif + + /* X here pads width to a multiple of 32 and uses the clipper to + * adjust the result. Is that really necessary ? Things seem to + * work ok for me without that and the doco doesn't seem to imply + * there is such a restriction. + */ + OUTREG(DST_WIDTH_HEIGHT, (image->width << 16) | image->height); + + src_bytes = (((image->width * image->depth) + 7) / 8) * image->height; + dwords = (src_bytes + 3) / 4; + bits = (u32*)(image->data); + + while(dwords >= 8) { + radeon_fifo_wait(rinfo, 8); +#if BITS_PER_LONG == 64 + __raw_writeq(*((u64 *)(bits)), rinfo->mmio_base + HOST_DATA0); + __raw_writeq(*((u64 *)(bits+2)), rinfo->mmio_base + HOST_DATA2); + __raw_writeq(*((u64 *)(bits+4)), rinfo->mmio_base + HOST_DATA4); + __raw_writeq(*((u64 *)(bits+6)), rinfo->mmio_base + HOST_DATA6); + bits += 8; +#else + __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA0); + __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA1); + __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA2); + __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA3); + __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA4); + __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA5); + __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA6); + __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA7); +#endif + dwords -= 8; + } + while(dwords--) { + radeon_fifo_wait(rinfo, 1); + __raw_writel(*(bits++), rinfo->mmio_base + HOST_DATA0); + } +} + void radeonfb_imageblit(struct fb_info *info, const struct fb_image *image) { struct radeonfb_info *rinfo = info->par; + u32 fg, bg; - if (info->state != FBINFO_STATE_RUNNING) + WARN_ON(rinfo->gfx_mode); + if (info->state != FBINFO_STATE_RUNNING || rinfo->gfx_mode) + return; + + if (!image->width || !image->height) return; - radeon_engine_idle(); + + /* We only do 1 bpp color expansion for now */ + if (info->flags & FBINFO_HWACCEL_DISABLED || image->depth != 1) + goto fallback; + + /* Fallback if running out of the screen. We may do clipping + * in the future */ + if ((image->dx + image->width) > info->var.xres_virtual || + (image->dy + image->height) > info->var.yres_virtual) + goto fallback; + + if (info->fix.visual == FB_VISUAL_TRUECOLOR || + info->fix.visual == FB_VISUAL_DIRECTCOLOR) { + fg = ((u32*)(info->pseudo_palette))[image->fg_color]; + bg = ((u32*)(info->pseudo_palette))[image->bg_color]; + } else { + fg = image->fg_color; + bg = image->bg_color; + } + + radeonfb_prim_imageblit(rinfo, image, fg, bg); + return; + + fallback: + radeon_engine_idle(rinfo); cfb_imageblit(info, image); } @@ -185,7 +286,8 @@ int radeonfb_sync(struct fb_info *info) if (info->state != FBINFO_STATE_RUNNING) return 0; - radeon_engine_idle(); + + radeon_engine_idle(rinfo); return 0; } @@ -261,9 +363,10 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo) /* disable 3D engine */ OUTREG(RB3D_CNTL, 0); + rinfo->fifo_free = 0; radeonfb_engine_reset(rinfo); - radeon_fifo_wait (1); + radeon_fifo_wait(rinfo, 1); if (IS_R300_VARIANT(rinfo)) { OUTREG(RB2D_DSTCACHE_MODE, INREG(RB2D_DSTCACHE_MODE) | RB2D_DC_AUTOFLUSH_ENABLE | @@ -277,7 +380,7 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo) OUTREG(RB2D_DSTCACHE_MODE, 0); } - radeon_fifo_wait (3); + radeon_fifo_wait(rinfo, 3); /* We re-read MC_FB_LOCATION from card as it can have been * modified by XFree drivers (ouch !) */ @@ -288,41 +391,57 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo) OUTREG(DST_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10)); OUTREG(SRC_PITCH_OFFSET, (rinfo->pitch << 0x16) | (rinfo->fb_local_base >> 10)); - radeon_fifo_wait (1); -#if defined(__BIG_ENDIAN) + radeon_fifo_wait(rinfo, 1); +#ifdef __BIG_ENDIAN OUTREGP(DP_DATATYPE, HOST_BIG_ENDIAN_EN, ~HOST_BIG_ENDIAN_EN); #else OUTREGP(DP_DATATYPE, 0, ~HOST_BIG_ENDIAN_EN); #endif - radeon_fifo_wait (2); + radeon_fifo_wait(rinfo, 2); OUTREG(DEFAULT_SC_TOP_LEFT, 0); OUTREG(DEFAULT_SC_BOTTOM_RIGHT, (DEFAULT_SC_RIGHT_MAX | DEFAULT_SC_BOTTOM_MAX)); + /* set default DP_GUI_MASTER_CNTL */ temp = radeon_get_dstbpp(rinfo->depth); - rinfo->dp_gui_master_cntl = ((temp << 8) | GMC_CLR_CMP_CNTL_DIS); + rinfo->dp_gui_mc_base = ((temp << 8) | GMC_CLR_CMP_CNTL_DIS); - radeon_fifo_wait (1); - OUTREG(DP_GUI_MASTER_CNTL, (rinfo->dp_gui_master_cntl | - GMC_BRUSH_SOLID_COLOR | - GMC_SRC_DATATYPE_COLOR)); + rinfo->dp_gui_mc_cache = rinfo->dp_gui_mc_base | + GMC_BRUSH_SOLID_COLOR | + GMC_SRC_DATATYPE_COLOR; + radeon_fifo_wait(rinfo, 1); + OUTREG(DP_GUI_MASTER_CNTL, rinfo->dp_gui_mc_cache); - radeon_fifo_wait (7); /* clear line drawing regs */ + radeon_fifo_wait(rinfo, 2); OUTREG(DST_LINE_START, 0); OUTREG(DST_LINE_END, 0); - /* set brush color regs */ - OUTREG(DP_BRUSH_FRGD_CLR, 0xffffffff); - OUTREG(DP_BRUSH_BKGD_CLR, 0x00000000); - - /* set source color regs */ - OUTREG(DP_SRC_FRGD_CLR, 0xffffffff); - OUTREG(DP_SRC_BKGD_CLR, 0x00000000); + /* set brush and source color regs */ + rinfo->dp_brush_fg_cache = 0xffffffff; + rinfo->dp_brush_bg_cache = 0x00000000; + rinfo->dp_src_fg_cache = 0xffffffff; + rinfo->dp_src_bg_cache = 0x00000000; + radeon_fifo_wait(rinfo, 4); + OUTREG(DP_BRUSH_FRGD_CLR, rinfo->dp_brush_fg_cache); + OUTREG(DP_BRUSH_BKGD_CLR, rinfo->dp_brush_bg_cache); + OUTREG(DP_SRC_FRGD_CLR, rinfo->dp_src_fg_cache); + OUTREG(DP_SRC_BKGD_CLR, rinfo->dp_src_bg_cache); + + /* Default direction */ + rinfo->dp_cntl_cache = DST_X_LEFT_TO_RIGHT | DST_Y_TOP_TO_BOTTOM; + radeon_fifo_wait(rinfo, 1); + OUTREG(DP_CNTL, rinfo->dp_cntl_cache); /* default write mask */ + radeon_fifo_wait(rinfo, 1); OUTREG(DP_WRITE_MSK, 0xffffffff); - radeon_engine_idle (); + /* Default to no swapping of host data */ + radeon_fifo_wait(rinfo, 1); + OUTREG(RBBM_GUICNTL, RBBM_GUICNTL_HOST_DATA_SWAP_NONE); + + /* Make sure it's settled */ + radeon_engine_idle(rinfo); } -- cgit v1.2.3-18-g5258