diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/debian/changelog intel-gpu-tools-1.0.2+git20110408+1c9ede3/debian/changelog --- intel-gpu-tools-1.0.2+git20110324+d641f0f/debian/changelog 2011-04-08 20:12:43.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/debian/changelog 2011-04-08 20:12:43.000000000 +0000 @@ -1,7 +1,6 @@ -intel-gpu-tools (1.0.2+git20110324+d641f0f-0ubuntu1) natty; urgency=low +intel-gpu-tools (1.0.2+git20110408+1c9ede3-0ubuntu1) natty; urgency=low - * New git checkout (master branch) up to commit - d641f0f + * New git checkout (master branch) up to commit 1c9ede3 -- Robert Hooker Mon, 24 Jan 2011 15:31:39 -0500 diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/lib/i830_reg.h intel-gpu-tools-1.0.2+git20110408+1c9ede3/lib/i830_reg.h --- intel-gpu-tools-1.0.2+git20110324+d641f0f/lib/i830_reg.h 1970-01-01 00:00:00.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/lib/i830_reg.h 2011-04-08 19:58:59.000000000 +0000 @@ -0,0 +1,805 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _I830_REG_H_ +#define _I830_REG_H_ + +#define I830_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +/* Flush */ +#define MI_FLUSH (0x04<<23) +#define MI_FLUSH_DW (0x26<<23) + +#define MI_WRITE_DIRTY_STATE (1<<4) +#define MI_END_SCENE (1<<3) +#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3) +#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2) +#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1) +#define MI_INVALIDATE_MAP_CACHE (1<<0) +/* broadwater flush bits */ +#define BRW_MI_GLOBAL_SNAPSHOT_RESET (1 << 3) + +#define MI_BATCH_BUFFER_END (0xA << 23) + +/* Noop */ +#define MI_NOOP 0x00 +#define MI_NOOP_WRITE_ID (1<<22) +#define MI_NOOP_ID_MASK (1<<22 - 1) + +/* Wait for Events */ +#define MI_WAIT_FOR_EVENT (0x03<<23) +#define MI_WAIT_FOR_PIPEB_SVBLANK (1<<18) +#define MI_WAIT_FOR_PIPEA_SVBLANK (1<<17) +#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) +#define MI_WAIT_FOR_PIPEB_VBLANK (1<<7) +#define MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW (1<<5) +#define MI_WAIT_FOR_PIPEA_VBLANK (1<<3) +#define MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW (1<<1) + +/* Set the scan line for MI_WAIT_FOR_PIPE?_SCAN_LINE_WINDOW */ +#define MI_LOAD_SCAN_LINES_INCL (0x12<<23) +#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0) +#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20) + +/* BLT commands */ +#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) +#define COLOR_BLT_WRITE_ALPHA (1<<21) +#define COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|(0x4)) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) +#define XY_COLOR_BLT_TILED (1<<11) + +#define XY_SETUP_CLIP_BLT_CMD ((2<<29)|(3<<22)|1) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) +#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) +#define XY_SRC_COPY_BLT_DST_TILED (1<<11) + +#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4) +#define SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22)) + +#define XY_MONO_PAT_BLT_CMD ((0x2<<29)|(0x52<<22)|0x7) +#define XY_MONO_PAT_VERT_SEED ((1<<10)|(1<<9)|(1<<8)) +#define XY_MONO_PAT_HORT_SEED ((1<<14)|(1<<13)|(1<<12)) +#define XY_MONO_PAT_BLT_WRITE_ALPHA (1<<21) +#define XY_MONO_PAT_BLT_WRITE_RGB (1<<20) + +#define XY_MONO_SRC_BLT_CMD ((0x2<<29)|(0x54<<22)|(0x6)) +#define XY_MONO_SRC_BLT_WRITE_ALPHA (1<<21) +#define XY_MONO_SRC_BLT_WRITE_RGB (1<<20) + +#define CMD_3D (0x3<<29) + +#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_CLEAR_RECT (0xa<<18) +#define PRIM3D_ZONE_INIT (0xd<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) +#define AA_LINE_ENABLE ((1<<1) | 1) +#define AA_LINE_DISABLE (1<<1) + +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +#define _3DSTATE_COLOR_FACTOR_CMD (CMD_3D | (0x1d<<24) | (0x1<<16)) + +#define _3DSTATE_COLOR_FACTOR_N_CMD(stage) (CMD_3D | (0x1d<<24) | \ + ((0x90+(stage))<<16)) + +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLR_BUF_8BIT 0 +#define COLR_BUF_RGB555 (1<<8) +#define COLR_BUF_RGB565 (2<<8) +#define COLR_BUF_ARGB8888 (3<<8) +#define COLR_BUF_ARGB4444 (8<<8) +#define COLR_BUF_ARGB1555 (9<<8) +#define DEPTH_IS_Z 0 +#define DEPTH_IS_W (1<<6) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 0 +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + +#define _3DSTATE_ENABLES_1_CMD (CMD_3D|(0x3<<24)) +#define ENABLE_LOGIC_OP_MASK ((1<<23)|(1<<22)) +#define ENABLE_LOGIC_OP ((1<<23)|(1<<22)) +#define DISABLE_LOGIC_OP (1<<23) +#define ENABLE_STENCIL_TEST ((1<<21)|(1<<20)) +#define DISABLE_STENCIL_TEST (1<<21) +#define ENABLE_DEPTH_BIAS ((1<<11)|(1<<10)) +#define DISABLE_DEPTH_BIAS (1<<11) +#define ENABLE_SPEC_ADD_MASK ((1<<9)|(1<<8)) +#define ENABLE_SPEC_ADD ((1<<9)|(1<<8)) +#define DISABLE_SPEC_ADD (1<<9) +#define ENABLE_DIS_FOG_MASK ((1<<7)|(1<<6)) +#define ENABLE_FOG ((1<<7)|(1<<6)) +#define DISABLE_FOG (1<<7) +#define ENABLE_DIS_ALPHA_TEST_MASK ((1<<5)|(1<<4)) +#define ENABLE_ALPHA_TEST ((1<<5)|(1<<4)) +#define DISABLE_ALPHA_TEST (1<<5) +#define ENABLE_DIS_CBLEND_MASK ((1<<3)|(1<<2)) +#define ENABLE_COLOR_BLEND ((1<<3)|(1<<2)) +#define DISABLE_COLOR_BLEND (1<<3) +#define ENABLE_DIS_DEPTH_TEST_MASK ((1<<1)|1) +#define ENABLE_DEPTH_TEST ((1<<1)|1) +#define DISABLE_DEPTH_TEST (1<<1) + +/* _3DSTATE_ENABLES_2, p138 */ +#define _3DSTATE_ENABLES_2_CMD (CMD_3D|(0x4<<24)) +#define ENABLE_STENCIL_WRITE ((1<<21)|(1<<20)) +#define DISABLE_STENCIL_WRITE (1<<21) +#define ENABLE_TEX_CACHE ((1<<17)|(1<<16)) +#define DISABLE_TEX_CACHE (1<<17) +#define ENABLE_DITHER ((1<<9)|(1<<8)) +#define DISABLE_DITHER (1<<9) +#define ENABLE_COLOR_MASK (1<<10) +#define WRITEMASK_ALPHA (1<<7) +#define WRITEMASK_ALPHA_SHIFT 7 +#define WRITEMASK_RED (1<<6) +#define WRITEMASK_RED_SHIFT 6 +#define WRITEMASK_GREEN (1<<5) +#define WRITEMASK_GREEN_SHIFT 5 +#define WRITEMASK_BLUE (1<<4) +#define WRITEMASK_BLUE_SHIFT 4 +#define WRITEMASK_MASK ((1<<4)|(1<<5)|(1<<6)|(1<<7)) +#define ENABLE_COLOR_WRITE ((1<<3)|(1<<2)) +#define DISABLE_COLOR_WRITE (1<<3) +#define ENABLE_DIS_DEPTH_WRITE_MASK 0x3 +#define ENABLE_DEPTH_WRITE ((1<<1)|1) +#define DISABLE_DEPTH_WRITE (1<<1) + +/* _3DSTATE_FOG_COLOR, p139 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p140 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FOGFUNC_ENABLE (1<<31) +#define FOGFUNC_VERTEX 0 +#define FOGFUNC_PIXEL_EXP (1<<28) +#define FOGFUNC_PIXEL_EXP2 (2<<28) +#define FOGFUNC_PIXEL_LINEAR (3<<28) +#define FOGSRC_INDEX_Z (1<<27) +#define FOGSRC_INDEX_W ((1<<27)|(1<<25)) +#define FOG_LINEAR_CONST (1<<24) +#define FOG_CONST_1(x) ((x)<<4) +#define ENABLE_FOG_DENSITY (1<<23) +/* Dword 2 */ +#define FOG_CONST_2(x) (x) +/* Dword 3 */ +#define FOG_DENSITY(x) (x) + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p142 */ +#define _3DSTATE_INDPT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define ENABLE_INDPT_ALPHA_BLEND ((1<<23)|(1<<22)) +#define DISABLE_INDPT_ALPHA_BLEND (1<<23) +#define ALPHA_BLENDFUNC_MASK 0x3f0000 +#define ENABLE_ALPHA_BLENDFUNC (1<<21) +#define ABLENDFUNC_ADD 0 +#define ABLENDFUNC_SUB (1<<16) +#define ABLENDFUNC_RVSE_SUB (2<<16) +#define ABLENDFUNC_MIN (3<<16) +#define ABLENDFUNC_MAX (4<<16) +#define SRC_DST_ABLEND_MASK 0xfff +#define ENABLE_SRC_ABLEND_FACTOR (1<<11) +#define SRC_ABLEND_FACT(x) ((x)<<6) +#define ENABLE_DST_ABLEND_FACTOR (1<<5) +#define DST_ABLEND_FACT(x) (x) + +#define BLENDFACTOR_ZERO 0x01 +#define BLENDFACTOR_ONE 0x02 +#define BLENDFACTOR_SRC_COLR 0x03 +#define BLENDFACTOR_INV_SRC_COLR 0x04 +#define BLENDFACTOR_SRC_ALPHA 0x05 +#define BLENDFACTOR_INV_SRC_ALPHA 0x06 +#define BLENDFACTOR_DST_ALPHA 0x07 +#define BLENDFACTOR_INV_DST_ALPHA 0x08 +#define BLENDFACTOR_DST_COLR 0x09 +#define BLENDFACTOR_INV_DST_COLR 0x0a +#define BLENDFACTOR_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACTOR_CONST_COLOR 0x0c +#define BLENDFACTOR_INV_CONST_COLOR 0x0d +#define BLENDFACTOR_CONST_ALPHA 0x0e +#define BLENDFACTOR_INV_CONST_ALPHA 0x0f +#define BLENDFACTOR_MASK 0x0f + +/* _3DSTATE_MAP_BLEND_ARG, p152 */ +#define _3DSTATE_MAP_BLEND_ARG_CMD(stage) (CMD_3D|(0x0e<<24)|((stage)<<20)) + +#define TEXPIPE_COLOR 0 +#define TEXPIPE_ALPHA (1<<18) +#define TEXPIPE_KILL (2<<18) +#define TEXBLEND_ARG0 0 +#define TEXBLEND_ARG1 (1<<15) +#define TEXBLEND_ARG2 (2<<15) +#define TEXBLEND_ARG3 (3<<15) +#define TEXBLENDARG_MODIFY_PARMS (1<<6) +#define TEXBLENDARG_REPLICATE_ALPHA (1<<5) +#define TEXBLENDARG_INV_ARG (1<<4) +#define TEXBLENDARG_ONE 0 +#define TEXBLENDARG_FACTOR 0x01 +#define TEXBLENDARG_ACCUM 0x02 +#define TEXBLENDARG_DIFFUSE 0x03 +#define TEXBLENDARG_SPEC 0x04 +#define TEXBLENDARG_CURRENT 0x05 +#define TEXBLENDARG_TEXEL0 0x06 +#define TEXBLENDARG_TEXEL1 0x07 +#define TEXBLENDARG_TEXEL2 0x08 +#define TEXBLENDARG_TEXEL3 0x09 +#define TEXBLENDARG_FACTOR_N 0x0e + +/* _3DSTATE_MAP_BLEND_OP, p155 */ +#define _3DSTATE_MAP_BLEND_OP_CMD(stage) (CMD_3D|(0x0d<<24)|((stage)<<20)) +#if 0 +# define TEXPIPE_COLOR 0 +# define TEXPIPE_ALPHA (1<<18) +# define TEXPIPE_KILL (2<<18) +#endif +#define ENABLE_TEXOUTPUT_WRT_SEL (1<<17) +#define TEXOP_OUTPUT_CURRENT 0 +#define TEXOP_OUTPUT_ACCUM (1<<15) +#define ENABLE_TEX_CNTRL_STAGE ((1<<12)|(1<<11)) +#define DISABLE_TEX_CNTRL_STAGE (1<<12) +#define TEXOP_SCALE_SHIFT 9 +#define TEXOP_SCALE_1X (0 << TEXOP_SCALE_SHIFT) +#define TEXOP_SCALE_2X (1 << TEXOP_SCALE_SHIFT) +#define TEXOP_SCALE_4X (2 << TEXOP_SCALE_SHIFT) +#define TEXOP_MODIFY_PARMS (1<<8) +#define TEXOP_LAST_STAGE (1<<7) +#define TEXBLENDOP_KILLPIXEL 0x02 +#define TEXBLENDOP_ARG1 0x01 +#define TEXBLENDOP_ARG2 0x02 +#define TEXBLENDOP_MODULATE 0x03 +#define TEXBLENDOP_ADD 0x06 +#define TEXBLENDOP_ADDSIGNED 0x07 +#define TEXBLENDOP_BLEND 0x08 +#define TEXBLENDOP_BLEND_AND_ADD 0x09 +#define TEXBLENDOP_SUBTRACT 0x0a +#define TEXBLENDOP_DOT3 0x0b +#define TEXBLENDOP_DOT4 0x0c +#define TEXBLENDOP_MODULATE_AND_ADD 0x0d +#define TEXBLENDOP_MODULATE_2X_AND_ADD 0x0e +#define TEXBLENDOP_MODULATE_4X_AND_ADD 0x0f + +/* _3DSTATE_MAP_BUMP_TABLE, p160 TODO */ +/* _3DSTATE_MAP_COLOR_CHROMA_KEY, p161 TODO */ + +#define _3DSTATE_MAP_COORD_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8c<<16)) +#define DISABLE_TEX_TRANSFORM (1<<28) +#define TEXTURE_SET(x) (x<<29) + +#define _3DSTATE_VERTEX_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8b<<16)) +#define DISABLE_VIEWPORT_TRANSFORM (1<<31) +#define DISABLE_PERSPECTIVE_DIVIDE (1<<29) + +/* _3DSTATE_MAP_COORD_SET_BINDINGS, p162 */ +#define _3DSTATE_MAP_COORD_SETBIND_CMD (CMD_3D|(0x1d<<24)|(0x02<<16)) +#define TEXBIND_MASK3 ((1<<15)|(1<<14)|(1<<13)|(1<<12)) +#define TEXBIND_MASK2 ((1<<11)|(1<<10)|(1<<9)|(1<<8)) +#define TEXBIND_MASK1 ((1<<7)|(1<<6)|(1<<5)|(1<<4)) +#define TEXBIND_MASK0 ((1<<3)|(1<<2)|(1<<1)|1) + +#define TEXBIND_SET3(x) ((x)<<12) +#define TEXBIND_SET2(x) ((x)<<8) +#define TEXBIND_SET1(x) ((x)<<4) +#define TEXBIND_SET0(x) (x) + +#define TEXCOORDSRC_KEEP 0 +#define TEXCOORDSRC_DEFAULT 0x01 +#define TEXCOORDSRC_VTXSET_0 0x08 +#define TEXCOORDSRC_VTXSET_1 0x09 +#define TEXCOORDSRC_VTXSET_2 0x0a +#define TEXCOORDSRC_VTXSET_3 0x0b +#define TEXCOORDSRC_VTXSET_4 0x0c +#define TEXCOORDSRC_VTXSET_5 0x0d +#define TEXCOORDSRC_VTXSET_6 0x0e +#define TEXCOORDSRC_VTXSET_7 0x0f + +#define MAP_UNIT(unit) ((unit)<<16) +#define MAP_UNIT_MASK (0x7<<16) + +/* _3DSTATE_MAP_COORD_SETS, p164 */ +#define _3DSTATE_MAP_COORD_SET_CMD (CMD_3D|(0x1c<<24)|(0x01<<19)) +#define TEXCOORD_SET(n) ((n)<<16) +#define ENABLE_TEXCOORD_PARAMS (1<<15) +#define TEXCOORDS_ARE_NORMAL (1<<14) +#define TEXCOORDS_ARE_IN_TEXELUNITS 0 +#define TEXCOORDTYPE_CARTESIAN 0 +#define TEXCOORDTYPE_HOMOGENEOUS (1<<11) +#define TEXCOORDTYPE_VECTOR (2<<11) +#define TEXCOORDTYPE_MASK (0x7<<11) +#define ENABLE_ADDR_V_CNTL (1<<7) +#define ENABLE_ADDR_U_CNTL (1<<3) +#define TEXCOORD_ADDR_V_MODE(x) ((x)<<4) +#define TEXCOORD_ADDR_U_MODE(x) (x) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP 2 +#define TEXCOORDMODE_WRAP_SHORTEST 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORD_ADDR_V_MASK 0x70 +#define TEXCOORD_ADDR_U_MASK 0x7 + +/* _3DSTATE_MAP_CUBE, p168 TODO */ +#define _3DSTATE_MAP_CUBE (CMD_3D|(0x1c<<24)|(0x0a<<19)) +#define CUBE_NEGX_ENABLE (1<<5) +#define CUBE_POSX_ENABLE (1<<4) +#define CUBE_NEGY_ENABLE (1<<3) +#define CUBE_POSY_ENABLE (1<<2) +#define CUBE_NEGZ_ENABLE (1<<1) +#define CUBE_POSZ_ENABLE (1<<0) + +#define _3DSTATE_MAP_INFO_CMD (CMD_3D|(0x1d<<24)|(0x0<<16)|3) +#define TEXMAP_INDEX(x) ((x)<<28) +#define MAP_SURFACE_8BIT (1<<24) +#define MAP_SURFACE_16BIT (2<<24) +#define MAP_SURFACE_32BIT (3<<24) +#define MAP_FORMAT_2D (0) +#define MAP_FORMAT_3D_CUBE (1<<11) + +/* _3DSTATE_MODES_1, p190 */ +#define _3DSTATE_MODES_1_CMD (CMD_3D|(0x08<<24)) +#define BLENDFUNC_MASK 0x3f0000 +#define ENABLE_COLR_BLND_FUNC (1<<21) +#define BLENDFUNC_ADD 0 +#define BLENDFUNC_SUB (1<<16) +#define BLENDFUNC_RVRSE_SUB (2<<16) +#define BLENDFUNC_MIN (3<<16) +#define BLENDFUNC_MAX (4<<16) +#define SRC_DST_BLND_MASK 0xfff +#define ENABLE_SRC_BLND_FACTOR (1<<11) +#define ENABLE_DST_BLND_FACTOR (1<<5) +#define SRC_BLND_FACT(x) ((x)<<6) +#define DST_BLND_FACT(x) (x) + +/* _3DSTATE_MODES_2, p192 */ +#define _3DSTATE_MODES_2_CMD (CMD_3D|(0x0f<<24)) +#define ENABLE_GLOBAL_DEPTH_BIAS (1<<22) +#define GLOBAL_DEPTH_BIAS(x) ((x)<<14) +#define ENABLE_ALPHA_TEST_FUNC (1<<13) +#define ENABLE_ALPHA_REF_VALUE (1<<8) +#define ALPHA_TEST_FUNC(x) ((x)<<9) +#define ALPHA_REF_VALUE(x) (x) + +#define ALPHA_TEST_REF_MASK 0x3fff + +/* _3DSTATE_MODES_3, p193 */ +#define _3DSTATE_MODES_3_CMD (CMD_3D|(0x02<<24)) +#define DEPTH_TEST_FUNC_MASK 0x1f0000 +#define ENABLE_DEPTH_TEST_FUNC (1<<20) +/* Uses COMPAREFUNC */ +#define DEPTH_TEST_FUNC(x) ((x)<<16) +#define ENABLE_ALPHA_SHADE_MODE (1<<11) +#define ENABLE_FOG_SHADE_MODE (1<<9) +#define ENABLE_SPEC_SHADE_MODE (1<<7) +#define ENABLE_COLOR_SHADE_MODE (1<<5) +#define ALPHA_SHADE_MODE(x) ((x)<<10) +#define FOG_SHADE_MODE(x) ((x)<<8) +#define SPEC_SHADE_MODE(x) ((x)<<6) +#define COLOR_SHADE_MODE(x) ((x)<<4) +#define CULLMODE_MASK 0xf +#define ENABLE_CULL_MODE (1<<3) +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_CW 2 +#define CULLMODE_CCW 3 + +#define SHADE_MODE_LINEAR 0 +#define SHADE_MODE_FLAT 0x1 + +/* _3DSTATE_MODES_4, p195 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x16<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK ((1<<18)|(1<<19)|(1<<20)|(1<<21)) +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) ((x)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p196 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define ENABLE_SPRITE_POINT_TEX (1<<23) +#define SPRITE_POINT_TEX_ON (1<<22) +#define SPRITE_POINT_TEX_OFF 0 +#define FLUSH_RENDER_CACHE (1<<18) +#define FLUSH_TEXTURE_CACHE (1<<16) +#define FIXED_LINE_WIDTH_MASK 0xfc00 +#define ENABLE_FIXED_LINE_WIDTH (1<<15) +#define FIXED_LINE_WIDTH(x) ((x)<<10) +#define FIXED_POINT_WIDTH_MASK 0x3ff +#define ENABLE_FIXED_POINT_WIDTH (1<<9) +#define FIXED_POINT_WIDTH(x) (x) + +/* _3DSTATE_RASTERIZATION_RULES, p198 */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) +#define TRI_STRIP_PROVOKE_VRTX(x) (x) + +/* _3DSTATE_SCISSOR_ENABLE, p200 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p201 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* _3DSTATE_STENCIL_TEST, p202 */ +#define _3DSTATE_STENCIL_TEST_CMD (CMD_3D|(0x09<<24)) +#define ENABLE_STENCIL_PARMS (1<<23) +#define STENCIL_OPS_MASK (0xffc000) +#define STENCIL_FAIL_OP(x) ((x)<<20) +#define STENCIL_PASS_DEPTH_FAIL_OP(x) ((x)<<17) +#define STENCIL_PASS_DEPTH_PASS_OP(x) ((x)<<14) + +#define ENABLE_STENCIL_TEST_FUNC_MASK ((1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)) +#define ENABLE_STENCIL_TEST_FUNC (1<<13) +/* Uses COMPAREFUNC */ +#define STENCIL_TEST_FUNC(x) ((x)<<9) +#define STENCIL_REF_VALUE_MASK ((1<<8)|0xff) +#define ENABLE_STENCIL_REF_VALUE (1<<8) +#define STENCIL_REF_VALUE(x) (x) + +/* _3DSTATE_VERTEX_FORMAT, p204 */ +#define _3DSTATE_VFT0_CMD (CMD_3D|(0x05<<24)) +#define VFT0_POINT_WIDTH (1<<12) +#define VFT0_TEX_COUNT_MASK (7<<8) +#define VFT0_TEX_COUNT_SHIFT 8 +#define VFT0_TEX_COUNT(x) ((x)<<8) +#define VFT0_SPEC (1<<7) +#define VFT0_DIFFUSE (1<<6) +#define VFT0_DEPTH_OFFSET (1<<5) +#define VFT0_XYZ (1<<1) +#define VFT0_XYZW (2<<1) +#define VFT0_XY (3<<1) +#define VFT0_XYW (4<<1) +#define VFT0_XYZW_MASK (7<<1) + +/* _3DSTATE_VERTEX_FORMAT_2, p206 */ +#define _3DSTATE_VERTEX_FORMAT_2_CMD (CMD_3D|(0x0a<<24)) +#define VFT1_TEX7_FMT(x) ((x)<<14) +#define VFT1_TEX6_FMT(x) ((x)<<12) +#define VFT1_TEX5_FMT(x) ((x)<<10) +#define VFT1_TEX4_FMT(x) ((x)<<8) +#define VFT1_TEX3_FMT(x) ((x)<<6) +#define VFT1_TEX2_FMT(x) ((x)<<4) +#define VFT1_TEX1_FMT(x) ((x)<<2) +#define VFT1_TEX0_FMT(x) (x) +#define VFT1_TEX0_MASK 3 +#define VFT1_TEX1_SHIFT 2 +#define TEXCOORDFMT_2D 0 +#define TEXCOORDFMT_3D 1 +#define TEXCOORDFMT_4D 2 +#define TEXCOORDFMT_1D 3 + +/*New stuff picked up along the way */ + +#define MLC_LOD_BIAS_MASK ((1<<7)-1) + +/* _3DSTATE_VERTEX_TRANSFORM, p207 */ +#define _3DSTATE_VERTEX_TRANS_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|0) +#define _3DSTATE_VERTEX_TRANS_MTX_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|6) +/* Dword 1 */ +#define ENABLE_VIEWPORT_TRANSFORM ((1<<31)|(1<<30)) +#define DISABLE_VIEWPORT_TRANSFORM (1<<31) +#define ENABLE_PERSP_DIVIDE ((1<<29)|(1<<28)) +#define DISABLE_PERSP_DIVIDE (1<<29) +#define VRTX_TRANS_LOAD_MATRICES 0x7421 +#define VRTX_TRANS_NO_LOAD_MATRICES 0x0000 +/* Dword 2 -> 7 are matrix elements */ + +/* _3DSTATE_W_STATE, p209 */ +#define _3DSTATE_W_STATE_CMD (CMD_3D|(0x1d<<24)|(0x8d<<16)|1) +/* Dword 1 */ +#define MAGIC_W_STATE_DWORD1 0x00000008 +/* Dword 2 */ +#define WFAR_VALUE(x) (x) + +/* Stipple command, carried over from the i810, apparently: + */ +#define _3DSTATE_STIPPLE (CMD_3D|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<((n)+4)) +#define S3_POINT_WIDTH_SHIFT 23 +#define S3_LINE_WIDTH_SHIFT 19 +#define S3_ALPHA_SHADE_MODE_SHIFT 18 +#define S3_FOG_SHADE_MODE_SHIFT 17 +#define S3_SPEC_SHADE_MODE_SHIFT 16 +#define S3_COLOR_SHADE_MODE_SHIFT 15 +#define S3_CULL_MODE_SHIFT 13 +#define S3_CULLMODE_BOTH (0) +#define S3_CULLMODE_NONE (1<<13) +#define S3_CULLMODE_CW (2<<13) +#define S3_CULLMODE_CCW (3<<13) +#define S3_POINT_WIDTH_PRESENT (1<<12) +#define S3_SPEC_FOG_PRESENT (1<<11) +#define S3_DIFFUSE_PRESENT (1<<10) +#define S3_DEPTH_OFFSET_PRESENT (1<<9) +#define S3_POSITION_SHIFT 6 +#define S3_VERTEXHAS_XYZ (1<<6) +#define S3_VERTEXHAS_XYZW (2<<6) +#define S3_VERTEXHAS_XY (3<<6) +#define S3_VERTEXHAS_XYW (4<<6) +#define S3_ENABLE_SPEC_ADD (1<<5) +#define S3_ENABLE_FOG (1<<4) +#define S3_ENABLE_LOCAL_DEPTH_BIAS (1<<3) +#define S3_ENABLE_SPRITE_POINT (1<<1) +#define S3_ENABLE_ANTIALIASING 1 +#define S8_ENABLE_ALPHA_TEST (1<<31) +#define S8_ALPHA_TEST_FUNC_SHIFT 28 +#define S8_ALPHA_REFVALUE_SHIFT 20 +#define S8_ENABLE_DEPTH_TEST (1<<19) +#define S8_DEPTH_TEST_FUNC_SHIFT 16 +#define S8_ENABLE_COLOR_BLEND (1<<15) +#define S8_COLOR_BLEND_FUNC_SHIFT 12 +#define S8_BLENDFUNC_ADD (0) +#define S8_BLENDFUNC_SUB (1<<12) +#define S8_BLENDFUNC_RVRSE_SUB (2<<12) +#define S8_BLENDFUNC_MIN (3<<12) +#define S8_BLENDFUNC_MAX (4<<12) +#define S8_SRC_BLEND_FACTOR_SHIFT 8 +#define S8_DST_BLEND_FACTOR_SHIFT 4 +#define S8_ENABLE_DEPTH_BUFFER_WRITE (1<<3) +#define S8_ENABLE_COLOR_BUFFER_WRITE (1<<2) + +#define _3DSTATE_LOAD_STATE_IMMEDIATE_2 (CMD_3D|(0x1d<<24)|(0x03<<16)) +#define LOAD_TEXTURE_MAP(x) (1<<((x)+11)) +#define LOAD_TEXTURE_BLEND_STAGE(x) (1<<((x)+7)) +#define LOAD_GLOBAL_COLOR_FACTOR (1<<6) + +#define TM0S0_ADDRESS_MASK 0xfffffffc +#define TM0S0_USE_FENCE (1<<1) + +#define TM0S1_HEIGHT_SHIFT 21 +#define TM0S1_WIDTH_SHIFT 10 +#define TM0S1_PALETTE_SELECT (1<<9) +#define TM0S1_MAPSURF_FORMAT_MASK (0x7 << 6) +#define TM0S1_MAPSURF_FORMAT_SHIFT 6 +#define MAPSURF_8BIT_INDEXED (0<<6) +#define MAPSURF_8BIT (1<<6) +#define MAPSURF_16BIT (2<<6) +#define MAPSURF_32BIT (3<<6) +#define MAPSURF_411 (4<<6) +#define MAPSURF_422 (5<<6) +#define MAPSURF_COMPRESSED (6<<6) +#define MAPSURF_4BIT_INDEXED (7<<6) +#define TM0S1_MT_FORMAT_MASK (0x7 << 3) +#define TM0S1_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_IDX_RGB565 (0<<3) /* SURFACE_8BIT_INDEXED */ +#define MT_8BIT_IDX_ARGB1555 (1<<3) +#define MT_8BIT_IDX_ARGB4444 (2<<3) +#define MT_8BIT_IDX_AY88 (3<<3) +#define MT_8BIT_IDX_ABGR8888 (4<<3) +#define MT_8BIT_IDX_BUMP_88DVDU (5<<3) +#define MT_8BIT_IDX_BUMP_655LDVDU (6<<3) +#define MT_8BIT_IDX_ARGB8888 (7<<3) +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_DIB_ARGB1555_8888 (4<<3) +#define MT_16BIT_BUMP_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_DIB_RGB565_8888 (7<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_BUMP_XLDVDU_8888 (6<<3) +#define MT_32BIT_DIB_8888 (7<<3) +#define MT_411_YUV411 (0<<3) /* SURFACE_411 */ +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define TM0S1_COLORSPACE_CONVERSION (1 << 2) +#define TM0S1_TILED_SURFACE (1 << 1) +#define TM0S1_TILE_WALK (1 << 0) + +#define TM0S2_PITCH_SHIFT 21 +#define TM0S2_CUBE_FACE_ENA_SHIFT 15 +#define TM0S2_CUBE_FACE_ENA_MASK (1<<15) +#define TM0S2_MAP_FORMAT (1<<14) +#define TM0S2_MAP_2D (0<<14) +#define TM0S2_MAP_3D_CUBE (1<<14) +#define TM0S2_VERTICAL_LINE_STRIDE (1<<13) +#define TM0S2_VERITCAL_LINE_STRIDE_OFF (1<<12) +#define TM0S2_OUTPUT_CHAN_SHIFT 10 +#define TM0S2_OUTPUT_CHAN_MASK (3<<10) + +#define TM0S3_MIP_FILTER_MASK (0x3<<30) +#define TM0S3_MIP_FILTER_SHIFT 30 +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define TM0S3_MAG_FILTER_MASK (0x3<<28) +#define TM0S3_MAG_FILTER_SHIFT 28 +#define TM0S3_MIN_FILTER_MASK (0x3<<26) +#define TM0S3_MIN_FILTER_SHIFT 26 +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 + +#define TM0S3_LOD_BIAS_SHIFT 17 +#define TM0S3_LOD_BIAS_MASK (0x1ff<<17) +#define TM0S3_MAX_MIP_SHIFT 9 +#define TM0S3_MAX_MIP_MASK (0xff<<9) +#define TM0S3_MIN_MIP_SHIFT 3 +#define TM0S3_MIN_MIP_MASK (0x3f<<3) +#define TM0S3_KILL_PIXEL (1<<2) +#define TM0S3_KEYED_FILTER (1<<1) +#define TM0S3_CHROMA_KEY (1<<0) + +/* _3DSTATE_MAP_TEXEL_STREAM, p188 */ +#define _3DSTATE_MAP_TEX_STREAM_CMD (CMD_3D|(0x1c<<24)|(0x05<<19)) +#define DISABLE_TEX_STREAM_BUMP (1<<12) +#define ENABLE_TEX_STREAM_BUMP ((1<<12)|(1<<11)) +#define TEX_MODIFY_UNIT_0 0 +#define TEX_MODIFY_UNIT_1 (1<<8) +#define ENABLE_TEX_STREAM_COORD_SET (1<<7) +#define TEX_STREAM_COORD_SET(x) ((x)<<4) +#define ENABLE_TEX_STREAM_MAP_IDX (1<<3) +#define TEX_STREAM_MAP_IDX(x) (x) + +#define FLUSH_MAP_CACHE (1<<0) + +#define _3DSTATE_MAP_FILTER_CMD (CMD_3D|(0x1c<<24)|(0x02<<19)) +#define FILTER_TEXMAP_INDEX(x) ((x) << 16) +#define MAG_MODE_FILTER_ENABLE (1 << 5) +#define MIN_MODE_FILTER_ENABLE (1 << 2) +#define MAG_MAPFILTER_NEAREST (0 << 3) +#define MAG_MAPFILTER_LINEAR (1 << 3) +#define MAG_MAPFILTER_ANISOTROPIC (2 << 3) +#define MIN_MAPFILTER_NEAREST (0) +#define MIN_MAPFILTER_LINEAR (1) +#define MIN_MAPFILTER_ANISOTROPIC (2) +#define ENABLE_KEYS (1<<15) +#define DISABLE_COLOR_KEY 0 +#define DISABLE_CHROMA_KEY 0 +#define DISABLE_KILL_PIXEL 0 +#define ENABLE_MIP_MODE_FILTER (1 << 9) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + +#endif diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/lib/i915_3d.h intel-gpu-tools-1.0.2+git20110408+1c9ede3/lib/i915_3d.h --- intel-gpu-tools-1.0.2+git20110324+d641f0f/lib/i915_3d.h 1970-01-01 00:00:00.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/lib/i915_3d.h 2011-04-08 19:58:59.000000000 +0000 @@ -0,0 +1,619 @@ +/* -*- c-basic-offset: 4 -*- */ +/* + * Copyright © 2006,2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt + * Chris Wilson + * + */ + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_TYPE_SHIFT 4 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: +*/ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + +/* MASK_* are the unshifted bitmasks of the destination mask in arithmetic + * operations + */ +#define MASK_X 0x1 +#define MASK_Y 0x2 +#define MASK_Z 0x4 +#define MASK_W 0x8 +#define MASK_XYZ (MASK_X | MASK_Y | MASK_Z) +#define MASK_XYZW (MASK_XYZ | MASK_W) +#define MASK_SATURATE 0x10 + +/* Temporary, undeclared regs. Preserved between phases */ +#define FS_R0 ((REG_TYPE_R << REG_TYPE_SHIFT) | 0) +#define FS_R1 ((REG_TYPE_R << REG_TYPE_SHIFT) | 1) +#define FS_R2 ((REG_TYPE_R << REG_TYPE_SHIFT) | 2) +#define FS_R3 ((REG_TYPE_R << REG_TYPE_SHIFT) | 3) + +/* Texture coordinate regs. Must be declared. */ +#define FS_T0 ((REG_TYPE_T << REG_TYPE_SHIFT) | 0) +#define FS_T1 ((REG_TYPE_T << REG_TYPE_SHIFT) | 1) +#define FS_T2 ((REG_TYPE_T << REG_TYPE_SHIFT) | 2) +#define FS_T3 ((REG_TYPE_T << REG_TYPE_SHIFT) | 3) +#define FS_T4 ((REG_TYPE_T << REG_TYPE_SHIFT) | 4) +#define FS_T5 ((REG_TYPE_T << REG_TYPE_SHIFT) | 5) +#define FS_T6 ((REG_TYPE_T << REG_TYPE_SHIFT) | 6) +#define FS_T7 ((REG_TYPE_T << REG_TYPE_SHIFT) | 7) +#define FS_T8 ((REG_TYPE_T << REG_TYPE_SHIFT) | 8) +#define FS_T9 ((REG_TYPE_T << REG_TYPE_SHIFT) | 9) +#define FS_T10 ((REG_TYPE_T << REG_TYPE_SHIFT) | 10) + +/* Constant values */ +#define FS_C0 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0) +#define FS_C1 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1) +#define FS_C2 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2) +#define FS_C3 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3) +#define FS_C4 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4) +#define FS_C5 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5) +#define FS_C6 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6) +#define FS_C7 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7) + +/* Sampler regs */ +#define FS_S0 ((REG_TYPE_S << REG_TYPE_SHIFT) | 0) +#define FS_S1 ((REG_TYPE_S << REG_TYPE_SHIFT) | 1) +#define FS_S2 ((REG_TYPE_S << REG_TYPE_SHIFT) | 2) +#define FS_S3 ((REG_TYPE_S << REG_TYPE_SHIFT) | 3) + +/* Output color */ +#define FS_OC ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0) + +/* Output depth */ +#define FS_OD ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0) + +/* Unpreserved temporary regs */ +#define FS_U0 ((REG_TYPE_U << REG_TYPE_SHIFT) | 0) +#define FS_U1 ((REG_TYPE_U << REG_TYPE_SHIFT) | 1) +#define FS_U2 ((REG_TYPE_U << REG_TYPE_SHIFT) | 2) +#define FS_U3 ((REG_TYPE_U << REG_TYPE_SHIFT) | 3) + +#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3) +#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 4) +#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 4) +#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 4) + +#define REG_CHANNEL_MASK 0xf + +#define REG_NR(reg) ((reg) & REG_NR_MASK) +#define REG_TYPE(reg) (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK) +#define REG_X(reg) (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Y(reg) (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Z(reg) (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_W(reg) (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK) + +enum i915_fs_channel { + X_CHANNEL_VAL = 0, + Y_CHANNEL_VAL, + Z_CHANNEL_VAL, + W_CHANNEL_VAL, + ZERO_CHANNEL_VAL, + ONE_CHANNEL_VAL, + + NEG_X_CHANNEL_VAL = X_CHANNEL_VAL | 0x8, + NEG_Y_CHANNEL_VAL = Y_CHANNEL_VAL | 0x8, + NEG_Z_CHANNEL_VAL = Z_CHANNEL_VAL | 0x8, + NEG_W_CHANNEL_VAL = W_CHANNEL_VAL | 0x8, + NEG_ONE_CHANNEL_VAL = ONE_CHANNEL_VAL | 0x8 +}; + +#define i915_fs_operand(reg, x, y, z, w) \ + (reg) | \ +(x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \ +(y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \ +(z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \ +(w##_CHANNEL_VAL << W_CHANNEL_SHIFT) + +/** + * Construct an operand description for using a register with no swizzling + */ +#define i915_fs_operand_reg(reg) \ + i915_fs_operand(reg, X, Y, Z, W) + +#define i915_fs_operand_reg_negate(reg) \ + i915_fs_operand(reg, NEG_X, NEG_Y, NEG_Z, NEG_W) + +/** + * Returns an operand containing (0.0, 0.0, 0.0, 0.0). + */ +#define i915_fs_operand_zero() i915_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO) + +/** + * Returns an unused operand + */ +#define i915_fs_operand_none() i915_fs_operand_zero() + +/** + * Returns an operand containing (1.0, 1.0, 1.0, 1.0). + */ +#define i915_fs_operand_one() i915_fs_operand(FS_R0, ONE, ONE, ONE, ONE) + +#define i915_get_hardware_channel_val(val, shift, negate) \ + (((val & 0x7) << shift) | ((val & 0x8) ? negate : 0)) + +/** + * Outputs a fragment shader command to declare a sampler or texture register. + */ +#define i915_fs_dcl(reg) \ + do { \ + OUT_BATCH(D0_DCL | \ + (REG_TYPE(reg) << D0_TYPE_SHIFT) | \ + (REG_NR(reg) << D0_NR_SHIFT) | \ + ((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \ + OUT_BATCH(0); \ + OUT_BATCH(0); \ + } while (0) + +#define i915_fs_texld(dest_reg, sampler_reg, address_reg) \ + do { \ + OUT_BATCH(T0_TEXLD | \ + (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \ + (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \ + OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \ + (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \ + OUT_BATCH(0); \ + } while (0) + +#define i915_fs_texldp(dest_reg, sampler_reg, address_reg) \ + do { \ + OUT_BATCH(T0_TEXLDP | \ + (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \ + (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \ + OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \ + (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \ + OUT_BATCH(0); \ + } while (0) + +#define i915_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2) \ + _i915_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2) + +#define i915_fs_arith(op, dest_reg, operand0, operand1, operand2) \ + _i915_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2) + +#define _i915_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \ + do { \ + /* Set up destination register and write mask */ \ + OUT_BATCH(cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \ + (((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ + } while (0) + +#define _i915_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\ + /* Set up destination register and write mask */ \ + OUT_BATCH(cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (A0_DEST_CHANNEL_ALL) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_BATCH(i915_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + i915_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + i915_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + i915_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + i915_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ +} while (0) + +#define i915_fs_mov(dest_reg, operand0) \ + i915_fs_arith(MOV, dest_reg, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + +#define i915_fs_mov_masked(dest_reg, dest_mask, operand0) \ + i915_fs_arith_masked (MOV, dest_reg, dest_mask, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + + +#define i915_fs_frc(dest_reg, operand0) \ + i915_fs_arith (FRC, dest_reg, \ + operand0, \ + i915_fs_operand_none(), \ + i915_fs_operand_none()) + +/** Add operand0 and operand1 and put the result in dest_reg */ +#define i915_fs_add(dest_reg, operand0, operand1) \ + i915_fs_arith (ADD, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Multiply operand0 and operand1 and put the result in dest_reg */ +#define i915_fs_mul(dest_reg, operand0, operand1) \ + i915_fs_arith (MUL, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */ +#define i915_fs_rsq(dest_reg, dest_mask, operand0) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (RSQ, dest_reg, dest_mask, \ + operand0, \ + i915_fs_operand_none (), \ + i915_fs_operand_none ()); \ + } else { \ + i915_fs_arith (RSQ, dest_reg, \ + operand0, \ + i915_fs_operand_none (), \ + i915_fs_operand_none ()); \ + } \ + } while (0) + +/** Puts the minimum of operand0 and operand1 in dest_reg */ +#define i915_fs_min(dest_reg, operand0, operand1) \ + i915_fs_arith (MIN, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +/** Puts the maximum of operand0 and operand1 in dest_reg */ +#define i915_fs_max(dest_reg, operand0, operand1) \ + i915_fs_arith (MAX, dest_reg, \ + operand0, operand1, \ + i915_fs_operand_none()) + +#define i915_fs_cmp(dest_reg, operand0, operand1, operand2) \ + i915_fs_arith (CMP, dest_reg, operand0, operand1, operand2) + +/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */ +#define i915_fs_mad(dest_reg, dest_mask, op0, op1, op2) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + i915_fs_arith (MAD, dest_reg, op0, op1, op2); \ + } \ + } while (0) + +#define i915_fs_dp2add(dest_reg, dest_mask, op0, op1, op2) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + i915_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \ + } \ + } while (0) + +/** + * Perform a 3-component dot-product of operand0 and operand1 and put the + * resulting scalar in the channels of dest_reg specified by the dest_mask. + */ +#define i915_fs_dp3(dest_reg, dest_mask, op0, op1) \ + do { \ + if (dest_mask) { \ + i915_fs_arith_masked (DP3, dest_reg, dest_mask, \ + op0, op1,\ + i915_fs_operand_none()); \ + } else { \ + i915_fs_arith (DP3, dest_reg, op0, op1,\ + i915_fs_operand_none()); \ + } \ + } while (0) + +/** + * Sets up local state for accumulating a fragment shader buffer. + * + * \param x maximum number of shader commands that may be used between + * a FS_START and FS_END + */ +#define FS_LOCALS() \ + uint32_t _shader_offset + +#define FS_BEGIN() \ + do { \ + _shader_offset = intel->batch_used++; \ + } while (0) + +#define FS_END() \ + do { \ + intel->batch_ptr[_shader_offset] = \ + _3DSTATE_PIXEL_SHADER_PROGRAM | \ + (intel->batch_used - _shader_offset - 2); \ + } while (0); diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/lib/i915_reg.h intel-gpu-tools-1.0.2+git20110408+1c9ede3/lib/i915_reg.h --- intel-gpu-tools-1.0.2+git20110324+d641f0f/lib/i915_reg.h 1970-01-01 00:00:00.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/lib/i915_reg.h 2011-04-08 19:58:59.000000000 +0000 @@ -0,0 +1,844 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _I915_REG_H_ +#define _I915_REG_H_ + +#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +#define CMD_3D (0x3<<29) + +#define PRIM3D (CMD_3D | (0x1f<<24)) +#define PRIM3D_INDIRECT_SEQUENTIAL ((1<<23) | (0<<17)) +#define PRIM3D_TRILIST (PRIM3D | (0x0<<18)) +#define PRIM3D_TRISTRIP (PRIM3D | (0x1<<18)) +#define PRIM3D_TRISTRIP_RVRSE (PRIM3D | (0x2<<18)) +#define PRIM3D_TRIFAN (PRIM3D | (0x3<<18)) +#define PRIM3D_POLY (PRIM3D | (0x4<<18)) +#define PRIM3D_LINELIST (PRIM3D | (0x5<<18)) +#define PRIM3D_LINESTRIP (PRIM3D | (0x6<<18)) +#define PRIM3D_RECTLIST (PRIM3D | (0x7<<18)) +#define PRIM3D_POINTLIST (PRIM3D | (0x8<<18)) +#define PRIM3D_DIB (PRIM3D | (0x9<<18)) +#define PRIM3D_CLEAR_RECT (PRIM3D | (0xa<<18)) +#define PRIM3D_ZONE_INIT (PRIM3D | (0xd<<18)) +#define PRIM3D_MASK (0x1f<<18) + +/* p137 */ +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF (1<<23) +#define BFO_STENCIL_REF_SHIFT 15 +#define BFO_STENCIL_REF_MASK (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS (1<<14) +#define BFO_STENCIL_TEST_SHIFT 11 +#define BFO_STENCIL_TEST_MASK (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT 8 +#define BFO_STENCIL_FAIL_MASK (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 +#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) +#define BFO_STENCIL_TWO_SIDE (1<<0) + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT 8 +#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT 0 +#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + +/* The depth subrectangle is not supported, but must be disabled. */ +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | (1 << 1) | (0 << 0)) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL (0<<30) +#define TEX_DEFAULT_COLOR_D3D (1<<30) +#define ZR_EARLY_DEPTH (1<<29) +#define LOD_PRECLAMP_OGL (1<<28) +#define LOD_PRECLAMP_D3D (0<<28) +#define DITHER_FULL_ALWAYS (0<<26) +#define DITHER_FULL_ON_FB_BLEND (1<<26) +#define DITHER_CLAMPED_ALWAYS (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP (1<<25) +#define DEBUG_DISABLE_ENH_DITHER (1<<24) +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLR_BUF_8BIT 0 +#define COLR_BUF_RGB555 (1<<8) +#define COLR_BUF_RGB565 (2<<8) +#define COLR_BUF_ARGB8888 (3<<8) +#define COLR_BUF_ARGB4444 (8<<8) +#define COLR_BUF_ARGB1555 (9<<8) +#define COLR_BUF_ARGB2AAA (0xa<<8) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 (0<<1) +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) +#define FMC1_FOGFUNC_VERTEX (0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) +#define FMC1_FOGFUNC_MASK (3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) +#define FMC1_FOGINDEX_Z (0<<25) +#define FMC1_FOGINDEX_W (1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) +#define FMC1_C1_ONE (1<<13) +#define FMC1_C1_MASK (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE (1<<16) + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE (1<<23) +#define IAB_ENABLE (1<<22) +#define IAB_MODIFY_FUNC (1<<21) +#define IAB_FUNC_SHIFT 16 +#define IAB_MODIFY_SRC_FACTOR (1<<11) +#define IAB_SRC_FACTOR_SHIFT 6 +#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR (1<<5) +#define IAB_DST_FACTOR_SHIFT 0 +#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +#define BLENDFUNC_ADD 0x0 +#define BLENDFUNC_SUBTRACT 0x1 +#define BLENDFUNC_REVERSE_SUBTRACT 0x2 +#define BLENDFUNC_MIN 0x3 +#define BLENDFUNC_MAX 0x4 +#define BLENDFUNC_MASK 0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) +#define LI0_STATE_SAMPLER (0x04<<8) +#define LI0_STATE_MAP (0x08<<8) +#define LI0_STATE_PROGRAM (0x10<<8) +#define LI0_STATE_CONSTANTS (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SIS0_FORCE_LOAD (1<<1) +#define SIS0_BUFFER_VALID (1<<0) +#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define DIS0_BUFFER_RESET (1<<1) +#define DIS0_BUFFER_VALID (1<<0) + +#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SSB0_FORCE_LOAD (1<<1) +#define SSB0_BUFFER_VALID (1<<0) +#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define MSB0_FORCE_LOAD (1<<1) +#define MSB0_BUFFER_VALID (1<<0) +#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSP0_FORCE_LOAD (1<<1) +#define PSP0_BUFFER_VALID (1<<0) +#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSC0_FORCE_LOAD (1<<1) +#define PSC0_BUFFER_VALID (1<<0) +#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_TEXKILL_3D_4D (1<<10) +#define TEXKILL_3D (0<<9) +#define TEXKILL_4D (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) + +#define TEXCOORD_WRAP_SHORTEST_TCX 8 +#define TEXCOORD_WRAP_SHORTEST_TCY 4 +#define TEXCOORD_WRAP_SHORTEST_TCZ 2 +#define TEXCOORD_PERSPECTIVE_DISABLE 1 + +#define S3_WRAP_SHORTEST_TCX(unit) (TEXCOORD_WRAP_SHORTEST_TCX << ((unit) * 4)) +#define S3_WRAP_SHORTEST_TCY(unit) (TEXCOORD_WRAP_SHORTEST_TCY << ((unit) * 4)) +#define S3_WRAP_SHORTEST_TCZ(unit) (TEXCOORD_WRAP_SHORTEST_TCZ << ((unit) * 4)) +#define S3_PERSPECTIVE_DISABLE(unit) (TEXCOORD_PERSPECTIVE_DISABLE << ((unit) * 4)) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + +/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */ +/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ + +/* _3DSTATE_MODES_4, p218 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK (0xf<<18) +#define LOGICOP_COPY 0xc +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) ((x)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p220 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define PIPELINE_FLUSH_RENDER_CACHE (1<<18) +#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16) + +/* p221 */ +#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16)) +#define PS1_REG(n) (1<<(n)) +#define PS2_CONST_X(n) (n) +#define PS3_CONST_Y(n) (n) +#define PS4_CONST_Z(n) (n) +#define PS5_CONST_W(n) (n) + +/* p222 */ + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: + */ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + +/* p207. + * The DWORD count is 3 times the number of bits set in MS1_MAPMASK_MASK + */ +#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT 0 +#define MS1_MAPMASK_MASK (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE (1<<31) +#define MS2_ADDRESS_MASK 0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE (1<<1) +#define MS2_VERTICAL_OFFSET (1<<1) + +#define MS3_HEIGHT_SHIFT 21 +#define MS3_WIDTH_SHIFT 10 +#define MS3_PALETTE_SELECT (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT 7 +#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) +#define MAPSURF_8BIT (1<<7) +#define MAPSURF_16BIT (2<<7) +#define MAPSURF_32BIT (3<<7) +#define MAPSURF_422 (5<<7) +#define MAPSURF_COMPRESSED (6<<7) +#define MAPSURF_4BIT_INDEXED (7<<7) +#define MS3_MT_FORMAT_MASK (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_8BIT_MONO8 (5<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_I16 (7<<3) +#define MT_16BIT_L16 (8<<3) +#define MT_16BIT_A16 (9<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_QWVU8888 (4<<3) +#define MT_32BIT_AXVU8888 (5<<3) +#define MT_32BIT_LXVU8888 (6<<3) +#define MT_32BIT_XLVU8888 (7<<3) +#define MT_32BIT_ARGB2101010 (8<<3) +#define MT_32BIT_ABGR2101010 (9<<3) +#define MT_32BIT_AWVU2101010 (0xA<<3) +#define MT_32BIT_GR1616 (0xB<<3) +#define MT_32BIT_VU1616 (0xC<<3) +#define MT_32BIT_xI824 (0xD<<3) +#define MT_32BIT_xA824 (0xE<<3) +#define MT_32BIT_xL824 (0xF<<3) +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define MT_COMPRESS_DXT1_RGB (4<<3) +#define MS3_USE_FENCE_REGS (1<<2) +#define MS3_TILED_SURFACE (1<<1) +#define MS3_TILE_WALK (1<<0) + +/* The pitch is the pitch measured in DWORDS, minus 1 */ +#define MS4_PITCH_SHIFT 21 +#define MS4_CUBE_FACE_ENA_NEGX (1<<20) +#define MS4_CUBE_FACE_ENA_POSX (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY (1<<18) +#define MS4_CUBE_FACE_ENA_POSY (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ (1<<15) +#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) +#define MS4_MAX_LOD_SHIFT 9 +#define MS4_MAX_LOD_MASK (0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_MASK (0xff<<0) + +/* p244. + * The DWORD count is 3 times the number of bits set in SS1_MAPMASK_MASK. + */ +#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT 0 +#define SS1_MAPMASK_MASK (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) +#define SS2_COLORSPACE_CONVERSION (1<<29) +#define SS2_CHROMAKEY_SHIFT 27 +#define SS2_BASE_MIP_LEVEL_SHIFT 22 +#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT 20 +#define SS2_MIP_FILTER_MASK (0x3<<20) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define SS2_MAG_FILTER_SHIFT 17 +#define SS2_MAG_FILTER_MASK (0x7<<17) +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 +#define FILTER_4X4_1 3 +#define FILTER_4X4_2 4 +#define FILTER_4X4_FLAT 5 +#define FILTER_6X5_MONO 6 /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT 14 +#define SS2_MIN_FILTER_MASK (0x7<<14) +#define SS2_LOD_BIAS_SHIFT 5 +#define SS2_LOD_BIAS_ONE (0x10<<5) +#define SS2_LOD_BIAS_MASK (0x1ff<<5) +/* Shadow requires: + * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + * FILTER_4X4_x MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE (1<<4) +#define SS2_MAX_ANISO_MASK (1<<3) +#define SS2_MAX_ANISO_2 (0<<3) +#define SS2_MAX_ANISO_4 (1<<3) +#define SS2_SHADOW_FUNC_SHIFT 0 +#define SS2_SHADOW_FUNC_MASK (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT 24 +#define SS3_MIN_LOD_ONE (0x10<<24) +#define SS3_MIN_LOD_MASK (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT 12 +#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP_EDGE 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 +#define SS3_TCY_ADDR_MODE_SHIFT 9 +#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT 6 +#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) +#define SS3_NORMALIZED_COORDS (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT 1 +#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) +#define SS3_DEINTERLACER_ENABLE (1<<0) + +#define SS4_BORDER_COLOR_MASK (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define FLUSH_MAP_CACHE (1<<0) +#define FLUSH_RENDER_CACHE (1<<1) + +#endif diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/lib/intel_batchbuffer.c intel-gpu-tools-1.0.2+git20110408+1c9ede3/lib/intel_batchbuffer.c --- intel-gpu-tools-1.0.2+git20110324+d641f0f/lib/intel_batchbuffer.c 2011-03-24 19:00:04.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/lib/intel_batchbuffer.c 2011-04-08 19:58:59.000000000 +0000 @@ -72,6 +72,8 @@ free(batch); } +#define CMD_POLY_STIPPLE_OFFSET 0x7906 + void intel_batchbuffer_flush(struct intel_batchbuffer *batch) { @@ -82,6 +84,13 @@ if (used == 0) return; + if (IS_GEN5(batch->devid)) { + BEGIN_BATCH(2); + OUT_BATCH(CMD_POLY_STIPPLE_OFFSET << 16); + OUT_BATCH(0); + ADVANCE_BATCH(); + } + /* Round batchbuffer usage to 2 DWORDs. */ if ((used & 4) == 0) { *(uint32_t *) (batch->ptr) = 0; /* noop */ @@ -113,7 +122,8 @@ void intel_batchbuffer_emit_reloc(struct intel_batchbuffer *batch, drm_intel_bo *buffer, uint32_t delta, - uint32_t read_domains, uint32_t write_domain) + uint32_t read_domains, uint32_t write_domain, + int fenced) { int ret; @@ -123,9 +133,14 @@ (int)(batch->ptr - batch->buffer), BATCH_SZ); - ret = drm_intel_bo_emit_reloc(batch->bo, batch->ptr - batch->buffer, - buffer, delta, - read_domains, write_domain); + if (fenced) + ret = drm_intel_bo_emit_reloc_fence(batch->bo, batch->ptr - batch->buffer, + buffer, delta, + read_domains, write_domain); + else + ret = drm_intel_bo_emit_reloc(batch->bo, batch->ptr - batch->buffer, + buffer, delta, + read_domains, write_domain); intel_batchbuffer_emit_dword(batch, buffer->offset + delta); assert(ret == 0); } diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/lib/intel_batchbuffer.h intel-gpu-tools-1.0.2+git20110408+1c9ede3/lib/intel_batchbuffer.h --- intel-gpu-tools-1.0.2+git20110324+d641f0f/lib/intel_batchbuffer.h 2011-03-24 19:00:04.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/lib/intel_batchbuffer.h 2011-04-08 19:58:59.000000000 +0000 @@ -36,7 +36,8 @@ drm_intel_bo *buffer, uint32_t delta, uint32_t read_domains, - uint32_t write_domain); + uint32_t write_domain, + int fenced); /* Inline functions - might actually be better off with these * non-inlined. Certainly better off switching all command packets to @@ -77,10 +78,16 @@ #define OUT_BATCH(d) intel_batchbuffer_emit_dword(batch, d) +#define OUT_RELOC_FENCED(buf, read_domains, write_domain, delta) do { \ + assert((delta) >= 0); \ + intel_batchbuffer_emit_reloc(batch, buf, delta, \ + read_domains, write_domain, 1); \ +} while (0) + #define OUT_RELOC(buf, read_domains, write_domain, delta) do { \ assert((delta) >= 0); \ intel_batchbuffer_emit_reloc(batch, buf, delta, \ - read_domains, write_domain); \ + read_domains, write_domain, 0); \ } while (0) #define ADVANCE_BATCH() do { \ diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/gem_stress.c intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/gem_stress.c --- intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/gem_stress.c 2011-03-24 19:00:04.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/gem_stress.c 2011-04-08 19:58:59.000000000 +0000 @@ -49,22 +49,9 @@ * In short: designed for maximum evilness. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "drm.h" -#include "i915_drm.h" -#include "drmtest.h" -#include "intel_bufmgr.h" -#include "intel_batchbuffer.h" -#include "intel_gpu_tools.h" +#include "gem_stress.h" + +#define CMD_POLY_STIPPLE_OFFSET 0x7906 /** TODO: * - beat on relaxed fencing (i.e. mappable/fenceable tracking in the kernel) @@ -87,61 +74,49 @@ return aperture.aper_size; } -struct scratch_buf { - drm_intel_bo *bo; - uint32_t stride; - uint32_t tiling; - uint32_t *data; -}; - -#define NO_HW 0 - -static drm_intel_bufmgr *bufmgr; +drm_intel_bufmgr *bufmgr; struct intel_batchbuffer *batch; -static int drm_fd; -static int devid; -static int num_fences; +int drm_fd; +int devid; +int num_fences; drm_intel_bo *busy_bo; -#define MAX_BUFS 4096 -#define SCRATCH_BUF_SIZE 1024*1024 -#define BUSY_BUF_SIZE (256*4096) -#define TILE_SIZE 16 -#define TILES_PER_BUF (SCRATCH_BUF_SIZE / (TILE_SIZE*TILE_SIZE*sizeof(uint32_t))) +struct option_struct options; static struct scratch_buf buffers[2][MAX_BUFS]; /* tile i is at logical position tile_permutation[i] */ -static unsigned tile_permutation[MAX_BUFS*TILES_PER_BUF]; +static unsigned *tile_permutation; static unsigned num_buffers = 0; static unsigned current_set = 0; static unsigned target_set = 0; +static unsigned num_total_tiles = 0; -static int fence_storm = 0; +#define TILES_PER_BUF (num_total_tiles / num_buffers) + +int fence_storm = 0; +static int gpu_busy_load = 10; static void tile2xy(struct scratch_buf *buf, unsigned tile, unsigned *x, unsigned *y) { - assert(tile < TILES_PER_BUF); + assert(tile < buf->num_tiles); *x = (tile*TILE_SIZE) % (buf->stride/sizeof(uint32_t)); *y = ((tile*TILE_SIZE) / (buf->stride/sizeof(uint32_t))) * TILE_SIZE; } -/* All this gem trashing wastes too much cpu time, so give the gpu something to - * do to increase changes for races. TODO: Should be autotuned. */ -static void keep_gpu_busy(void) +static void emit_blt(drm_intel_bo *src_bo, uint32_t src_tiling, unsigned src_pitch, + unsigned src_x, unsigned src_y, unsigned w, unsigned h, + drm_intel_bo *dst_bo, uint32_t dst_tiling, unsigned dst_pitch, + unsigned dst_x, unsigned dst_y) { - uint32_t src_pitch, dst_pitch, cmd_bits; - - src_pitch = 4096; - dst_pitch = 4096; - cmd_bits = 0; + uint32_t cmd_bits = 0; - if (IS_965(devid)) { + if (IS_965(devid) && src_tiling) { src_pitch /= 4; cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED; } - if (IS_965(devid)) { + if (IS_965(devid) && dst_tiling) { dst_pitch /= 4; cmd_bits |= XY_SRC_COPY_BLT_DST_TILED; } @@ -155,15 +130,28 @@ OUT_BATCH((3 << 24) | /* 32 bits */ (0xcc << 16) | /* copy ROP */ dst_pitch); - OUT_BATCH(128 << 16 | 0); - OUT_BATCH(256 << 16 | 1024); - OUT_RELOC(busy_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); - OUT_BATCH(0 << 16 | 0); + OUT_BATCH(dst_y << 16 | dst_x); + OUT_BATCH((dst_y+h) << 16 | (dst_x+w)); + OUT_RELOC_FENCED(dst_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + OUT_BATCH(src_y << 16 | src_x); OUT_BATCH(src_pitch); - OUT_RELOC(busy_bo, I915_GEM_DOMAIN_RENDER, 0, 0); + OUT_RELOC_FENCED(src_bo, I915_GEM_DOMAIN_RENDER, 0, 0); ADVANCE_BATCH(); } +/* All this gem trashing wastes too much cpu time, so give the gpu something to + * do to increase changes for races. */ +void keep_gpu_busy(void) +{ + int tmp; + + tmp = 1 << gpu_busy_load; + assert(tmp <= 1024); + + emit_blt(busy_bo, 0, 4096, 0, 0, tmp, 128, + busy_bo, 0, 4096, 0, 128); +} + static unsigned int copyfunc_seq = 0; static void (*copyfunc)(struct scratch_buf *src, unsigned src_x, unsigned src_y, struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, @@ -175,6 +163,7 @@ unsigned logical_tile_no) { int i, j; + int failed = 0; for (i = 0; i < TILE_SIZE; i++) { for (j = 0; j < TILE_SIZE; j++) { @@ -182,19 +171,21 @@ unsigned src_ofs = src_x + j + src_stride * (src_y + i); unsigned expect = logical_tile_no*TILE_SIZE*TILE_SIZE + i*TILE_SIZE + j; - uint32_t tmp = src[src_ofs]; + uint32_t tmp = src[src_ofs]; if (tmp != expect) { - printf("mismatch at tile %i pos %i, read %u, expected %u\n", - logical_tile_no, j*TILE_SIZE, tmp, expect); - exit(1); + printf("mismatch at tile %i pos %i, read %i, expected %i, diff %i\n", + logical_tile_no, i*TILE_SIZE + j, tmp, expect, (int) tmp - expect); + if (options.trace_tile >= 0) + exit(1); + failed = 1; } dst[dst_ofs] = tmp; } } + if (failed) + exit(1); } -static void next_copyfunc(void); - static void cpu_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, unsigned logical_tile_no) @@ -241,41 +232,15 @@ struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, unsigned logical_tile_no) { - uint32_t src_pitch, dst_pitch, cmd_bits; - src_pitch = src->stride; - dst_pitch = dst->stride; - cmd_bits = 0; static unsigned keep_gpu_busy_counter = 0; /* check both edges of the fence usage */ if (keep_gpu_busy_counter & 1 && !fence_storm) keep_gpu_busy(); - if (IS_965(devid)) { - src_pitch /= 4; - cmd_bits |= XY_SRC_COPY_BLT_SRC_TILED; - } - - if (IS_965(devid)) { - dst_pitch /= 4; - cmd_bits |= XY_SRC_COPY_BLT_DST_TILED; - } - - BEGIN_BATCH(8); - OUT_BATCH(XY_SRC_COPY_BLT_CMD | - XY_SRC_COPY_BLT_WRITE_ALPHA | - XY_SRC_COPY_BLT_WRITE_RGB | - cmd_bits); - OUT_BATCH((3 << 24) | /* 32 bits */ - (0xcc << 16) | /* copy ROP */ - dst_pitch); - OUT_BATCH(dst_y << 16 | dst_x); - OUT_BATCH((dst_y+TILE_SIZE) << 16 | (dst_x+TILE_SIZE)); - OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); - OUT_BATCH(src_y << 16 | src_x); - OUT_BATCH(src_pitch); - OUT_RELOC(src->bo, I915_GEM_DOMAIN_RENDER, 0, 0); - ADVANCE_BATCH(); + emit_blt(src->bo, src->tiling, src->stride, src_x, src_y, + TILE_SIZE, TILE_SIZE, + dst->bo, dst->tiling, dst->stride, dst_x, dst_y); if (!(keep_gpu_busy_counter & 1) && !fence_storm) keep_gpu_busy(); @@ -293,20 +258,54 @@ } } -static void next_copyfunc(void) +static void render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, + unsigned logical_tile_no) +{ + if (IS_GEN2(devid)) + gen2_render_copyfunc(src, src_x, src_y, + dst, dst_x, dst_y, + logical_tile_no); + else if (IS_GEN3(devid)) + gen3_render_copyfunc(src, src_x, src_y, + dst, dst_x, dst_y, + logical_tile_no); + else + blitter_copyfunc(src, src_x, src_y, + dst, dst_x, dst_y, + logical_tile_no); +} + +static void next_copyfunc(int tile) { - if (fence_storm) + if (fence_storm) { + if (tile == options.trace_tile) + printf(" using fence storm\n"); return; + } if (copyfunc_seq % 61 == 0) { + if (tile == options.trace_tile) + printf(" using fence storm\n"); fence_storm = num_fences; copyfunc = blitter_copyfunc; - } else if (copyfunc_seq % 17 == 0) + } else if (copyfunc_seq % 17 == 0) { + if (tile == options.trace_tile) + printf(" using cpu\n"); copyfunc = cpu_copyfunc; - else if (copyfunc_seq % 19 == 0) + } else if (copyfunc_seq % 19 == 0) { + if (tile == options.trace_tile) + printf(" using prw\n"); copyfunc = prw_copyfunc; - else + } else if (copyfunc_seq % 3 == 0 && options.use_render) { + if (tile == options.trace_tile) + printf(" using render\n"); + copyfunc = render_copyfunc; + } else { + if (tile == options.trace_tile) + printf(" using blitter\n"); copyfunc = blitter_copyfunc; + } copyfunc_seq++; } @@ -315,23 +314,26 @@ { uint32_t tmp_tile[TILE_SIZE*TILE_SIZE]; uint32_t seq = 0; - int i, j, k; - unsigned x, y; + int i, k; + unsigned tile, buf_idx, x, y; - for (i = 0; i < num_buffers; i++) { - for (j = 0; j < TILES_PER_BUF; j++) { - tile2xy(&buffers[current_set][i], j, &x, &y); - for (k = 0; k < TILE_SIZE*TILE_SIZE; k++) - tmp_tile[k] = seq++; - - cpucpy2d(tmp_tile, TILE_SIZE, 0, 0, - buffers[current_set][i].data, - buffers[current_set][i].stride / sizeof(uint32_t), - x, y, i*TILES_PER_BUF + j); - } + for (i = 0; i < num_total_tiles; i++) { + tile = i; + buf_idx = tile / TILES_PER_BUF; + tile %= TILES_PER_BUF; + + tile2xy(&buffers[current_set][buf_idx], tile, &x, &y); + + for (k = 0; k < TILE_SIZE*TILE_SIZE; k++) + tmp_tile[k] = seq++; + + cpucpy2d(tmp_tile, TILE_SIZE, 0, 0, + buffers[current_set][buf_idx].data, + buffers[current_set][buf_idx].stride / sizeof(uint32_t), + x, y, i); } - for (i = 0; i < num_buffers*TILES_PER_BUF; i++) + for (i = 0; i < num_total_tiles; i++) tile_permutation[i] = i; } @@ -340,7 +342,7 @@ uint32_t tmp_tile[TILE_SIZE*TILE_SIZE]; unsigned tile, buf_idx, x, y; int i; - for (i = 0; i < num_buffers*TILES_PER_BUF; i++) { + for (i = 0; i < num_total_tiles; i++) { tile = tile_permutation[i]; buf_idx = tile / TILES_PER_BUF; tile %= TILES_PER_BUF; @@ -355,19 +357,21 @@ } } -static void init_buffer(struct scratch_buf *buf) +static void init_buffer(struct scratch_buf *buf, unsigned size) { - buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", SCRATCH_BUF_SIZE, 4096); + buf->bo = drm_intel_bo_alloc(bufmgr, "tiled bo", size, 4096); assert(buf->bo); buf->tiling = I915_TILING_NONE; buf->stride = 8192; - /* XXX for development of the shuffling */ -#if NO_HW - buf->data = malloc(SCRATCH_BUF_SIZE); -#else - drm_intel_gem_bo_map_gtt(buf->bo); - buf->data = buf->bo->virtual; -#endif + + if (options.no_hw) + buf->data = malloc(size); + else { + drm_intel_gem_bo_map_gtt(buf->bo); + buf->data = buf->bo->virtual; + } + + buf->num_tiles = size / TILE_BYTES; } static void permute_array(void *array, unsigned size, @@ -393,6 +397,7 @@ memcpy(&buf_arr[j], &tmp, sizeof(struct scratch_buf)); } + /* libdrm is to clever and prevents us from changin tiling of buffers already * used in relocations. */ static void set_tiling(drm_intel_bo *bo, unsigned *tiling, unsigned stride) @@ -408,7 +413,7 @@ */ set_tiling.handle = bo->handle; set_tiling.tiling_mode = *tiling; - set_tiling.stride = stride; + set_tiling.stride = tiling ? stride : 0; ret = ioctl(drm_fd, DRM_IOCTL_I915_GEM_SET_TILING, @@ -426,6 +431,12 @@ permute_array(buffers[set], num_buffers, exchange_buf); + if (current_set == 1 && options.gpu_busy_load == 0) { + gpu_busy_load++; + if (gpu_busy_load > 10) + gpu_busy_load = 6; + } + for (i = 0; i < num_buffers; i++) { r = random(); if ((r & 3) != 0) @@ -437,6 +448,8 @@ else buffers[set][i].tiling = I915_TILING_NONE; r >>= 2; + if (options.no_tiling) + buffers[set][i].tiling = I915_TILING_NONE; if (buffers[set][i].tiling == I915_TILING_NONE) { /* min 64 byte stride */ @@ -456,6 +469,11 @@ set_tiling(buffers[set][i].bo, &buffers[set][i].tiling, buffers[set][i].stride); + + if (i == options.trace_tile/TILES_PER_BUF) + printf("changing buffer %i containing tile %i: tiling %i, stride %i\n", i, + options.trace_tile, + buffers[set][i].tiling, buffers[set][i].stride); } } @@ -475,7 +493,7 @@ unsigned dst_tile, dst_buf_idx, dst_x, dst_y; struct scratch_buf *src_buf, *dst_buf; int i, idx; - for (i = 0; i < num_buffers*TILES_PER_BUF; i++) { + for (i = 0; i < num_total_tiles; i++) { /* tile_permutation is independant of current_permutation, so * abuse it to randomize the order of the src bos */ idx = tile_permutation[i]; @@ -491,20 +509,25 @@ tile2xy(dst_buf, dst_tile, &dst_x, &dst_y); -#if NO_HW - cpucpy2d(src_buf->data, - src_buf->stride / sizeof(uint32_t), - src_x, src_y, - dst_buf->data, - dst_buf->stride / sizeof(uint32_t), - dst_x, dst_y, - i); -#else - next_copyfunc(); + if (options.trace_tile == i) + printf("copying tile %i from %i (%i, %i) to %i (%i, %i)", i, + tile_permutation[i], src_buf_idx, src_tile, + permutation[idx], dst_buf_idx, dst_tile); + + if (options.no_hw) { + cpucpy2d(src_buf->data, + src_buf->stride / sizeof(uint32_t), + src_x, src_y, + dst_buf->data, + dst_buf->stride / sizeof(uint32_t), + dst_x, dst_y, + i); + } else { + next_copyfunc(i); - copyfunc(src_buf, src_x, src_y, dst_buf, dst_x, dst_y, - i); -#endif + copyfunc(src_buf, src_x, src_y, dst_buf, dst_x, dst_y, + i); + } } intel_batchbuffer_flush(batch); @@ -526,40 +549,137 @@ return val - 2; } -int main(int argc, char **argv) +static void parse_options(int argc, char **argv) { - int i, j; - unsigned *current_permutation, *tmp_permutation; + int c, tmp; + int option_index = 0; + static struct option long_options[] = { + {"no-hw", 0, 0, 'd'}, + {"buf-size", 1, 0, 's'}, + {"gpu-busy-load", 1, 0, 'g'}, + {"buffer-count", 1, 0, 'c'}, + {"trace-tile", 1, 0, 't'}, + {"disable-render", 0, 0, 'r'}, + {"untiled", 0, 0, 'u'} + }; + + options.scratch_buf_size = 256*4096; + options.no_hw = 0; + options.gpu_busy_load = 0; + options.num_buffers = 0; + options.trace_tile = -1; + options.use_render = 1; + options.no_tiling = 0; + + while((c = getopt_long(argc, argv, "ns:g:c:t:ru", + long_options, &option_index)) != -1) { + switch(c) { + case 'd': + options.no_hw = 1; + printf("no-hw debug mode\n"); + break; + case 's': + tmp = atoi(optarg); + if (tmp < TILE_SIZE*8192) + printf("scratch buffer size needs to be at least %i\n", + TILE_SIZE*8192); + else if (tmp & (tmp - 1)) { + printf("scratch buffer size needs to be a power-of-two\n"); + } else { + printf("fixed scratch buffer size to %u\n", tmp); + options.scratch_buf_size = tmp; + } + break; + case 'g': + tmp = atoi(optarg); + if (tmp < 0 || tmp > 10) + printf("gpu busy load needs to be bigger than 0 and smaller than 10\n"); + else { + printf("gpu busy load factor set to %i\n", tmp); + gpu_busy_load = options.gpu_busy_load = tmp; + } + break; + case 'c': + options.num_buffers = atoi(optarg); + printf("buffer count set to %i\n", options.num_buffers); + break; + case 't': + options.trace_tile = atoi(optarg); + printf("tracing tile %i\n", options.trace_tile); + break; + case 'r': + options.use_render = 0; + printf("disabling render copy\n"); + break; + case 'u': + options.no_tiling = 1; + printf("disabling tiling\n"); + break; + default: + printf("unkown command options\n"); + break; + } + } + + if (optind < argc) + printf("unkown command options\n"); +} + +static void init(void) +{ + int i; + unsigned tmp; drm_fd = drm_open_any(); - num_buffers = 2 * gem_aperture_size(drm_fd) / (1024*1024) / 3; - num_buffers /= 2; - printf("Using %d 1MiB buffers\n", num_buffers); + if (options.num_buffers == 0) { + tmp = gem_aperture_size(drm_fd); + tmp = tmp > 256*(1024*1024) ? 256*(1024*1024) : tmp; + num_buffers = 2 * tmp / options.scratch_buf_size / 3; + num_buffers /= 2; + printf("using %u buffers\n", num_buffers); + } else + num_buffers = options.num_buffers; bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096); drm_intel_bufmgr_gem_enable_reuse(bufmgr); + drm_intel_bufmgr_gem_enable_fenced_relocs(bufmgr); devid = intel_get_drm_devid(drm_fd); num_fences = get_num_fences(); batch = intel_batchbuffer_alloc(bufmgr, devid); busy_bo = drm_intel_bo_alloc(bufmgr, "tiled bo", BUSY_BUF_SIZE, 4096); for (i = 0; i < num_buffers; i++) { - init_buffer(&buffers[0][i]); - init_buffer(&buffers[1][i]); + init_buffer(&buffers[0][i], options.scratch_buf_size); + init_buffer(&buffers[1][i], options.scratch_buf_size); + + num_total_tiles += buffers[0][i].num_tiles; } current_set = 0; - current_permutation = malloc(MAX_BUFS*TILES_PER_BUF*sizeof(uint32_t)); - tmp_permutation = malloc(MAX_BUFS*TILES_PER_BUF*sizeof(uint32_t)); - assert(current_permutation); - assert(tmp_permutation); - /* just in case it helps reproducability */ srandom(0xdeadbeef); +} + +int main(int argc, char **argv) +{ + int i, j; + unsigned *current_permutation, *tmp_permutation; + + parse_options(argc, argv); + + init(); + + tile_permutation = malloc(num_total_tiles*sizeof(uint32_t)); + current_permutation = malloc(num_total_tiles*sizeof(uint32_t)); + tmp_permutation = malloc(num_total_tiles*sizeof(uint32_t)); + assert(tile_permutation); + assert(current_permutation); + assert(tmp_permutation); fan_out(); for (i = 0; i < 512; i++) { + printf("round %i\n", i); if (i % 64 == 63) { fan_in_and_check(); printf("everything correct after %i rounds\n", i + 1); @@ -568,16 +688,16 @@ target_set = (current_set + 1) & 1; init_set(target_set); - for (j = 0; j < num_buffers*TILES_PER_BUF; j++) + for (j = 0; j < num_total_tiles; j++) current_permutation[j] = j; - permute_array(current_permutation, num_buffers*TILES_PER_BUF, exchange_uint); + permute_array(current_permutation, num_total_tiles, exchange_uint); copy_tiles(current_permutation); - memcpy(tmp_permutation, tile_permutation, sizeof(tile_permutation)); + memcpy(tmp_permutation, tile_permutation, sizeof(unsigned)*num_total_tiles); /* accumulate the permutations */ - for (j = 0; j < num_buffers*TILES_PER_BUF; j++) + for (j = 0; j < num_total_tiles; j++) tile_permutation[j] = current_permutation[tmp_permutation[j]]; current_set = target_set; diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/gem_stress.h intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/gem_stress.h --- intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/gem_stress.h 1970-01-01 00:00:00.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/gem_stress.h 2011-04-08 19:58:59.000000000 +0000 @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "drm.h" +#include "i915_drm.h" +#include "drmtest.h" +#include "intel_bufmgr.h" +#include "intel_batchbuffer.h" +#include "intel_gpu_tools.h" + +struct scratch_buf { + drm_intel_bo *bo; + uint32_t stride; + uint32_t tiling; + uint32_t *data; + unsigned num_tiles; +}; + +struct option_struct { + unsigned scratch_buf_size; + unsigned num_buffers; + int trace_tile; + int no_hw; + int gpu_busy_load; + int use_render; + int no_tiling; +}; + +extern struct option_struct options; +extern drm_intel_bufmgr *bufmgr; +extern struct intel_batchbuffer *batch; +extern int drm_fd; +extern int devid; + +extern int fence_storm; + +#define MAX_BUFS 4096 +#define SCRATCH_BUF_SIZE 1024*1024 +#define BUSY_BUF_SIZE (256*4096) +#define TILE_SIZE 16 +#define TILE_BYTES (TILE_SIZE*TILE_SIZE*sizeof(uint32_t)) + +void keep_gpu_busy(void); + +static inline void emit_vertex(float f) +{ + union { float f; uint32_t ui; } u; + u.f = f; + OUT_BATCH(u.ui); +} + +static inline void emit_vertex_normalized(float f, float total) +{ + union { float f; uint32_t ui; } u; + u.f = f / total; + OUT_BATCH(u.ui); +} + +static inline unsigned buf_width(struct scratch_buf *buf) +{ + return buf->stride/sizeof(uint32_t); +} + +static inline unsigned buf_height(struct scratch_buf *buf) +{ + return options.scratch_buf_size/buf->stride; +} + +void gen3_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, + unsigned logical_tile_no); +void gen2_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, + unsigned logical_tile_no); diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/gem_stress_i830.c intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/gem_stress_i830.c --- intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/gem_stress_i830.c 1970-01-01 00:00:00.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/gem_stress_i830.c 2011-04-08 19:58:59.000000000 +0000 @@ -0,0 +1,354 @@ +#include "i830_reg.h" +#include "gem_stress.h" + +#define TB0C_LAST_STAGE (1 << 31) +#define TB0C_RESULT_SCALE_1X (0 << 29) +#define TB0C_RESULT_SCALE_2X (1 << 29) +#define TB0C_RESULT_SCALE_4X (2 << 29) +#define TB0C_OP_MODULE (3 << 25) +#define TB0C_OUTPUT_WRITE_CURRENT (0 << 24) +#define TB0C_OUTPUT_WRITE_ACCUM (1 << 24) +#define TB0C_ARG3_REPLICATE_ALPHA (1<<23) +#define TB0C_ARG3_INVERT (1<<22) +#define TB0C_ARG3_SEL_XXX +#define TB0C_ARG2_REPLICATE_ALPHA (1<<17) +#define TB0C_ARG2_INVERT (1<<16) +#define TB0C_ARG2_SEL_ONE (0 << 12) +#define TB0C_ARG2_SEL_FACTOR (1 << 12) +#define TB0C_ARG2_SEL_TEXEL0 (6 << 12) +#define TB0C_ARG2_SEL_TEXEL1 (7 << 12) +#define TB0C_ARG2_SEL_TEXEL2 (8 << 12) +#define TB0C_ARG2_SEL_TEXEL3 (9 << 12) +#define TB0C_ARG1_REPLICATE_ALPHA (1<<11) +#define TB0C_ARG1_INVERT (1<<10) +#define TB0C_ARG1_SEL_ONE (0 << 6) +#define TB0C_ARG1_SEL_TEXEL0 (6 << 6) +#define TB0C_ARG1_SEL_TEXEL1 (7 << 6) +#define TB0C_ARG1_SEL_TEXEL2 (8 << 6) +#define TB0C_ARG1_SEL_TEXEL3 (9 << 6) +#define TB0C_ARG0_REPLICATE_ALPHA (1<<5) +#define TB0C_ARG0_SEL_XXX + +#define TB0A_CTR_STAGE_ENABLE (1<<31) +#define TB0A_RESULT_SCALE_1X (0 << 29) +#define TB0A_RESULT_SCALE_2X (1 << 29) +#define TB0A_RESULT_SCALE_4X (2 << 29) +#define TB0A_OP_MODULE (3 << 25) +#define TB0A_OUTPUT_WRITE_CURRENT (0<<24) +#define TB0A_OUTPUT_WRITE_ACCUM (1<<24) +#define TB0A_CTR_STAGE_SEL_BITS_XXX +#define TB0A_ARG3_SEL_XXX +#define TB0A_ARG3_INVERT (1<<17) +#define TB0A_ARG2_INVERT (1<<16) +#define TB0A_ARG2_SEL_ONE (0 << 12) +#define TB0A_ARG2_SEL_TEXEL0 (6 << 12) +#define TB0A_ARG2_SEL_TEXEL1 (7 << 12) +#define TB0A_ARG2_SEL_TEXEL2 (8 << 12) +#define TB0A_ARG2_SEL_TEXEL3 (9 << 12) +#define TB0A_ARG1_INVERT (1<<10) +#define TB0A_ARG1_SEL_ONE (0 << 6) +#define TB0A_ARG1_SEL_TEXEL0 (6 << 6) +#define TB0A_ARG1_SEL_TEXEL1 (7 << 6) +#define TB0A_ARG1_SEL_TEXEL2 (8 << 6) +#define TB0A_ARG1_SEL_TEXEL3 (9 << 6) + +void gen2_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, + unsigned logical_tile_no) +{ + static unsigned keep_gpu_busy_counter = 0; + + /* check both edges of the fence usage */ + if (keep_gpu_busy_counter & 1) + keep_gpu_busy(); + + /* invariant state */ + { + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(2)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(3)); + + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_FOG_MODE_CMD); + OUT_BATCH(FOGFUNC_ENABLE | + FOG_LINEAR_CONST | FOGSRC_INDEX_Z | ENABLE_FOG_DENSITY); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(0) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(0) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(0)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(1) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(1) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(1)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(2) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(2) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(2)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(3) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(3) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(3)); + + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(1)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(2)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3)); + + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + ENABLE_TRI_STRIP_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2)); + + OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + + OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM); + OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); + + OUT_BATCH(_3DSTATE_W_STATE_CMD); + OUT_BATCH(MAGIC_W_STATE_DWORD1); + OUT_BATCH(0x3f800000 /* 1.0 in IEEE float */ ); + + OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD); + OUT_BATCH(0x80808080); /* .5 required in alpha for GL_DOT3_RGBA_EXT */ + + OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); + OUT_BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) | + TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) | + TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | + TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); + + /* copy from mesa */ + OUT_BATCH(_3DSTATE_FOG_COLOR_CMD | + FOG_COLOR_RED(0) | FOG_COLOR_GREEN(0) | FOG_COLOR_BLUE(0)); + + OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_MODES_1_CMD | + ENABLE_COLR_BLND_FUNC | + BLENDFUNC_ADD | + ENABLE_SRC_BLND_FACTOR | + SRC_BLND_FACT(BLENDFACTOR_ONE) | + ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO)); + OUT_BATCH(_3DSTATE_MODES_2_CMD | ENABLE_GLOBAL_DEPTH_BIAS | GLOBAL_DEPTH_BIAS(0) | ENABLE_ALPHA_TEST_FUNC | ALPHA_TEST_FUNC(0) | /* always */ + ALPHA_REF_VALUE(0)); + OUT_BATCH(_3DSTATE_MODES_3_CMD | + ENABLE_DEPTH_TEST_FUNC | + DEPTH_TEST_FUNC(0x2) | /* COMPAREFUNC_LESS */ + ENABLE_ALPHA_SHADE_MODE | + ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_FOG_SHADE_MODE | + FOG_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_SPEC_SHADE_MODE | + SPEC_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_COLOR_SHADE_MODE | + COLOR_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_CULL_MODE | CULLMODE_NONE); + + OUT_BATCH(_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | + LOGIC_OP_FUNC(LOGICOP_COPY) | + ENABLE_STENCIL_TEST_MASK | + STENCIL_TEST_MASK(0xff) | + ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff)); + + OUT_BATCH(_3DSTATE_STENCIL_TEST_CMD | + ENABLE_STENCIL_PARMS | + STENCIL_FAIL_OP(0) | /* STENCILOP_KEEP */ + STENCIL_PASS_DEPTH_FAIL_OP(0) | /* STENCILOP_KEEP */ + STENCIL_PASS_DEPTH_PASS_OP(0) | /* STENCILOP_KEEP */ + ENABLE_STENCIL_TEST_FUNC | + STENCIL_TEST_FUNC(0) | /* COMPAREFUNC_ALWAYS */ + ENABLE_STENCIL_REF_VALUE | + STENCIL_REF_VALUE(0)); + + OUT_BATCH(_3DSTATE_MODES_5_CMD | + FLUSH_TEXTURE_CACHE | + ENABLE_SPRITE_POINT_TEX | SPRITE_POINT_TEX_OFF | + ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(0x2) | /* 1.0 */ + ENABLE_FIXED_POINT_WIDTH | FIXED_POINT_WIDTH(1)); + + OUT_BATCH(_3DSTATE_STIPPLE); + + /* Set default blend state */ + OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | + TEXPIPE_COLOR | + ENABLE_TEXOUTPUT_WRT_SEL | + TEXOP_OUTPUT_CURRENT | + DISABLE_TEX_CNTRL_STAGE | + TEXOP_SCALE_1X | + TEXOP_MODIFY_PARMS | TEXOP_LAST_STAGE | TEXBLENDOP_ARG1); + OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | + TEXPIPE_ALPHA | + ENABLE_TEXOUTPUT_WRT_SEL | + TEXOP_OUTPUT_CURRENT | + TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1); + OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | + TEXPIPE_COLOR | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE); + OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | + TEXPIPE_ALPHA | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE); + + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | + AA_LINE_REGION_WIDTH_1_0 | AA_LINE_DISABLE); + } + + /* render target state */ + { + uint32_t tiling_bits = 0; + if (dst->tiling != I915_TILING_NONE) + tiling_bits = BUF_3D_TILED_SURFACE; + if (dst->tiling == I915_TILING_Y) + tiling_bits |= BUF_3D_TILE_WALK_Y; + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits | + BUF_3D_PITCH(dst->stride)); + OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(COLR_BUF_ARGB8888 | + DSTORG_HORT_BIAS(0x8) | + DSTORG_VERT_BIAS(0x8)); + + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); /* ymin, xmin */ + OUT_BATCH(DRAW_YMAX(buf_height(dst) - 1) | + DRAW_XMAX(buf_width(dst) - 1)); + OUT_BATCH(0); /* yorig, xorig */ + } + + /* dynamic state */ + { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); + OUT_BATCH(1); /* number of coordinate sets */ + OUT_BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); + OUT_BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | + BLENDFACTOR_ONE << S8_SRC_BLEND_FACTOR_SHIFT | + BLENDFACTOR_ZERO << S8_DST_BLEND_FACTOR_SHIFT | + S8_ENABLE_COLOR_BUFFER_WRITE); + + OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | DISABLE_INDPT_ALPHA_BLEND); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_BLEND_STAGE(0) | 1); + OUT_BATCH(TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULE | + TB0C_OUTPUT_WRITE_CURRENT | TB0C_ARG1_SEL_TEXEL0 | + TB0C_ARG2_SEL_ONE); + OUT_BATCH(TB0A_RESULT_SCALE_1X | TB0A_OP_MODULE | + TB0A_OUTPUT_WRITE_CURRENT | TB0A_ARG1_SEL_TEXEL0 | + TB0A_ARG2_SEL_ONE); + + OUT_BATCH(_3DSTATE_ENABLES_1_CMD | DISABLE_LOGIC_OP | + DISABLE_STENCIL_TEST | DISABLE_DEPTH_BIAS | + DISABLE_SPEC_ADD | DISABLE_FOG | DISABLE_ALPHA_TEST | + ENABLE_COLOR_BLEND | DISABLE_DEPTH_TEST); + /* We have to explicitly say we don't want write disabled */ + OUT_BATCH(_3DSTATE_ENABLES_2_CMD | ENABLE_COLOR_MASK | + DISABLE_STENCIL_WRITE | ENABLE_TEX_CACHE | + DISABLE_DITHER | ENABLE_COLOR_WRITE | DISABLE_DEPTH_WRITE); + + OUT_BATCH(_3DSTATE_VERTEX_FORMAT_2_CMD | + TEXCOORDFMT_2D << 0); + } + + /* sampler state */ + { + uint32_t tiling_bits = 0; + if (src->tiling != I915_TILING_NONE) + tiling_bits = TM0S1_TILED_SURFACE; + if (src->tiling == I915_TILING_Y) + tiling_bits |= TM0S1_TILE_WALK; + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_MAP(0) | 4); + OUT_RELOC(src->bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); + OUT_BATCH((buf_height(src) - 1) << TM0S1_HEIGHT_SHIFT | + (buf_width(src) - 1) << TM0S1_WIDTH_SHIFT | + MAPSURF_32BIT | MT_32BIT_ARGB8888 | tiling_bits); + OUT_BATCH((src->stride / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); + OUT_BATCH(FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | + FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT | + MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT); + OUT_BATCH(0); /* default color */ + OUT_BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(0) | + ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | + TEXCOORDTYPE_CARTESIAN | + ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(TEXCOORDMODE_CLAMP_BORDER) | + ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(TEXCOORDMODE_CLAMP_BORDER)); + /* map texel stream */ + OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); + OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | + TEXBIND_SET1(TEXCOORDSRC_KEEP) | + TEXBIND_SET2(TEXCOORDSRC_KEEP) | + TEXBIND_SET3(TEXCOORDSRC_KEEP)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | (0 << 16) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(0) | + ENABLE_TEX_STREAM_MAP_IDX | TEX_STREAM_MAP_IDX(0)); + } + + OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST | (3*4 -1)); + emit_vertex(dst_x + TILE_SIZE); + emit_vertex(dst_y + TILE_SIZE); + emit_vertex_normalized(src_x + TILE_SIZE, buf_width(src)); + emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src)); + + emit_vertex(dst_x); + emit_vertex(dst_y + TILE_SIZE); + emit_vertex_normalized(src_x, buf_width(src)); + emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src)); + + emit_vertex(dst_x); + emit_vertex(dst_y); + emit_vertex_normalized(src_x, buf_width(src)); + emit_vertex_normalized(src_y, buf_height(src)); + + if (!(keep_gpu_busy_counter & 1)) + keep_gpu_busy(); + + keep_gpu_busy_counter++; + + intel_batchbuffer_flush(batch); +} diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/gem_stress_i915.c intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/gem_stress_i915.c --- intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/gem_stress_i915.c 1970-01-01 00:00:00.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/gem_stress_i915.c 2011-04-08 19:58:59.000000000 +0000 @@ -0,0 +1,197 @@ +#include "i915_reg.h" +#include "i915_3d.h" +#include "gem_stress.h" + +void gen3_render_copyfunc(struct scratch_buf *src, unsigned src_x, unsigned src_y, + struct scratch_buf *dst, unsigned dst_x, unsigned dst_y, + unsigned logical_tile_no) +{ + static unsigned keep_gpu_busy_counter = 0; + + /* check both edges of the fence usage */ + if (keep_gpu_busy_counter & 1) + keep_gpu_busy(); + + /* invariant state */ + { + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | AA_LINE_REGION_WIDTH_1_0); + OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | + IAB_MODIFY_ENABLE | + IAB_MODIFY_FUNC | (BLENDFUNC_ADD << IAB_FUNC_SHIFT) | + IAB_MODIFY_SRC_FACTOR | (BLENDFACT_ONE << + IAB_SRC_FACTOR_SHIFT) | + IAB_MODIFY_DST_FACTOR | (BLENDFACT_ZERO << + IAB_DST_FACTOR_SHIFT)); + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7)); + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | ENABLE_TEXKILL_3D_4D | TEXKILL_4D); + OUT_BATCH(_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) | + ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff) | + ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff)); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2); + OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */ + OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | + S4_LINE_WIDTH_ONE | + S4_CULLMODE_NONE | + S4_VFMT_XY); + OUT_BATCH(0x00000000); /* Stencil. */ + OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + OUT_BATCH(_3DSTATE_LOAD_INDIRECT | 0); /* disable indirect state */ + OUT_BATCH(0); + OUT_BATCH(_3DSTATE_STIPPLE); + OUT_BATCH(0x00000000); + OUT_BATCH(_3DSTATE_BACKFACE_STENCIL_OPS | BFO_ENABLE_STENCIL_TWO_SIDE | 0); + } + + /* samler state */ + { +#define TEX_COUNT 1 + uint32_t tiling_bits = 0; + if (src->tiling != I915_TILING_NONE) + tiling_bits = MS3_TILED_SURFACE; + if (src->tiling == I915_TILING_Y) + tiling_bits |= MS3_TILE_WALK; + + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * TEX_COUNT)); + OUT_BATCH((1 << TEX_COUNT) - 1); + OUT_RELOC(src->bo, I915_GEM_DOMAIN_SAMPLER, 0, 0); + OUT_BATCH(MAPSURF_32BIT | MT_32BIT_ARGB8888 | + tiling_bits | + (buf_height(src) - 1) << MS3_HEIGHT_SHIFT | + (buf_width(src) - 1) << MS3_WIDTH_SHIFT); + OUT_BATCH((src->stride/4-1) << MS4_PITCH_SHIFT); + + OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * TEX_COUNT)); + OUT_BATCH((1 << TEX_COUNT) - 1); + OUT_BATCH(MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT | + FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | + FILTER_NEAREST << SS2_MIN_FILTER_SHIFT); + OUT_BATCH(SS3_NORMALIZED_COORDS | + TEXCOORDMODE_WRAP << SS3_TCX_ADDR_MODE_SHIFT | + TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT | + 0 << SS3_TEXTUREMAP_INDEX_SHIFT); + OUT_BATCH(0x00000000); + } + + /* render target state */ + { + uint32_t tiling_bits = 0; + if (dst->tiling != I915_TILING_NONE) + tiling_bits = BUF_3D_TILED_SURFACE; + if (dst->tiling == I915_TILING_Y) + tiling_bits |= BUF_3D_TILE_WALK_Y; + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | tiling_bits | + BUF_3D_PITCH(dst->stride)); + OUT_RELOC(dst->bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(COLR_BUF_ARGB8888 | + DSTORG_HORT_BIAS(0x8) | + DSTORG_VERT_BIAS(0x8)); + + /* draw rect is unconditional */ + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0x00000000); + OUT_BATCH(0x00000000); /* ymin, xmin */ + OUT_BATCH(DRAW_YMAX(buf_height(dst) - 1) | + DRAW_XMAX(buf_width(dst) - 1)); + /* yorig, xorig (relate to color buffer?) */ + OUT_BATCH(0x00000000); + } + + /* texfmt */ + { + uint32_t ss2 = ~0; + ss2 &= ~S2_TEXCOORD_FMT(0, TEXCOORDFMT_NOT_PRESENT); + ss2 |= S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1); + OUT_BATCH(ss2); + OUT_BATCH(S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | + BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | + BLENDFACT_ONE << S6_CBUF_SRC_BLEND_FACT_SHIFT | + BLENDFACT_ZERO << S6_CBUF_DST_BLEND_FACT_SHIFT); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(0) | I1_LOAD_S(1) | 1); + OUT_BATCH(0); /* no vbo */ + OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | + (4 << S1_VERTEX_PITCH_SHIFT)); + } + + /* frage shader */ + { + OUT_BATCH(_3DSTATE_PIXEL_SHADER_PROGRAM | (1 + 3*3 - 2)); + /* decl FS_T0 */ + OUT_BATCH(D0_DCL | + REG_TYPE(FS_T0) << D0_TYPE_SHIFT | + REG_NR(FS_T0) << D0_NR_SHIFT | + ((REG_TYPE(FS_T0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); + OUT_BATCH(0); + OUT_BATCH(0); + /* decl FS_S0 */ + OUT_BATCH(D0_DCL | + (REG_TYPE(FS_S0) << D0_TYPE_SHIFT) | + (REG_NR(FS_S0) << D0_NR_SHIFT) | + ((REG_TYPE(FS_S0) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); + OUT_BATCH(0); + OUT_BATCH(0); + /* texld(FS_OC, FS_S0, FS_T0 */ + OUT_BATCH(T0_TEXLD | + (REG_TYPE(FS_OC) << T0_DEST_TYPE_SHIFT) | + (REG_NR(FS_OC) << T0_DEST_NR_SHIFT) | + (REG_NR(FS_S0) << T0_SAMPLER_NR_SHIFT)); + OUT_BATCH((REG_TYPE(FS_T0) << T1_ADDRESS_REG_TYPE_SHIFT) | + (REG_NR(FS_T0) << T1_ADDRESS_REG_NR_SHIFT)); + OUT_BATCH(0); + } + + OUT_BATCH(PRIM3D_RECTLIST | (3*4 - 1)); + emit_vertex(dst_x + TILE_SIZE); + emit_vertex(dst_y + TILE_SIZE); + emit_vertex_normalized(src_x + TILE_SIZE, buf_width(src)); + emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src)); + + emit_vertex(dst_x); + emit_vertex(dst_y + TILE_SIZE); + emit_vertex_normalized(src_x, buf_width(src)); + emit_vertex_normalized(src_y + TILE_SIZE, buf_height(src)); + + emit_vertex(dst_x); + emit_vertex(dst_y); + emit_vertex_normalized(src_x, buf_width(src)); + emit_vertex_normalized(src_y, buf_height(src)); + + if (!(keep_gpu_busy_counter & 1)) + keep_gpu_busy(); + + keep_gpu_busy_counter++; + + intel_batchbuffer_flush(batch); +} + diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/Makefile.am intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/Makefile.am --- intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/Makefile.am 2011-03-24 19:00:04.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/Makefile.am 2011-04-08 19:58:59.000000000 +0000 @@ -1,7 +1,13 @@ -bin_PROGRAMS = \ +noinst_PROGRAMS = \ gem_stress \ $(NULL) +gem_stress_SOURCES = \ + gem_stress.c \ + gem_stress.h \ + gem_stress_i915.c \ + gem_stress_i830.c + TESTS = getversion \ getclient \ getstats \ diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/Makefile.in intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/Makefile.in --- intel-gpu-tools-1.0.2+git20110324+d641f0f/tests/Makefile.in 2011-03-24 19:00:13.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/tests/Makefile.in 2011-04-08 19:59:11.000000000 +0000 @@ -34,7 +34,7 @@ POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ -bin_PROGRAMS = gem_stress$(EXEEXT) +noinst_PROGRAMS = gem_stress$(EXEEXT) TESTS = getversion$(EXEEXT) getclient$(EXEEXT) getstats$(EXEEXT) \ gem_basic$(EXEEXT) gem_exec_nop$(EXEEXT) gem_exec_blt$(EXEEXT) \ gem_flink$(EXEEXT) gem_readwrite$(EXEEXT) \ @@ -74,8 +74,7 @@ gem_bad_blit$(EXEEXT) gem_fence_thrash$(EXEEXT) \ gem_gtt_speed$(EXEEXT) $(am__EXEEXT_1) am__EXEEXT_3 = gem_bad_batch$(EXEEXT) gem_hang$(EXEEXT) -am__installdirs = "$(DESTDIR)$(bindir)" -PROGRAMS = $(bin_PROGRAMS) +PROGRAMS = $(noinst_PROGRAMS) gem_bad_address_SOURCES = gem_bad_address.c gem_bad_address_OBJECTS = gem_bad_address.$(OBJEXT) gem_bad_address_LDADD = $(LDADD) @@ -187,8 +186,9 @@ gem_ringfill_DEPENDENCIES = ../lib/libintel_tools.la \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ $(am__DEPENDENCIES_2) -gem_stress_SOURCES = gem_stress.c -gem_stress_OBJECTS = gem_stress.$(OBJEXT) +am_gem_stress_OBJECTS = gem_stress.$(OBJEXT) gem_stress_i915.$(OBJEXT) \ + gem_stress_i830.$(OBJEXT) +gem_stress_OBJECTS = $(am_gem_stress_OBJECTS) gem_stress_LDADD = $(LDADD) gem_stress_DEPENDENCIES = ../lib/libintel_tools.la \ $(am__DEPENDENCIES_1) $(am__DEPENDENCIES_1) \ @@ -264,16 +264,18 @@ gem_exec_blt.c gem_exec_nop.c gem_fence_thrash.c gem_flink.c \ gem_gtt_speed.c gem_hang.c gem_largeobject.c \ gem_linear_blits.c gem_mmap.c gem_pread_after_blit.c \ - gem_pwrite.c gem_readwrite.c gem_ringfill.c gem_stress.c \ - gem_tiled_blits.c gem_tiled_fence_blits.c gem_tiled_pread.c \ - getclient.c getstats.c getversion.c testdisplay.c + gem_pwrite.c gem_readwrite.c gem_ringfill.c \ + $(gem_stress_SOURCES) gem_tiled_blits.c \ + gem_tiled_fence_blits.c gem_tiled_pread.c getclient.c \ + getstats.c getversion.c testdisplay.c DIST_SOURCES = gem_bad_address.c gem_bad_batch.c gem_bad_blit.c \ gem_basic.c gem_exec_blt.c gem_exec_nop.c gem_fence_thrash.c \ gem_flink.c gem_gtt_speed.c gem_hang.c gem_largeobject.c \ gem_linear_blits.c gem_mmap.c gem_pread_after_blit.c \ - gem_pwrite.c gem_readwrite.c gem_ringfill.c gem_stress.c \ - gem_tiled_blits.c gem_tiled_fence_blits.c gem_tiled_pread.c \ - getclient.c getstats.c getversion.c testdisplay.c + gem_pwrite.c gem_readwrite.c gem_ringfill.c \ + $(gem_stress_SOURCES) gem_tiled_blits.c \ + gem_tiled_fence_blits.c gem_tiled_pread.c getclient.c \ + getstats.c getversion.c testdisplay.c ETAGS = etags CTAGS = ctags am__tty_colors = \ @@ -423,6 +425,12 @@ top_build_prefix = @top_build_prefix@ top_builddir = @top_builddir@ top_srcdir = @top_srcdir@ +gem_stress_SOURCES = \ + gem_stress.c \ + gem_stress.h \ + gem_stress_i915.c \ + gem_stress_i830.c + HANG = \ gem_bad_batch \ gem_hang \ @@ -469,43 +477,9 @@ $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh $(am__aclocal_m4_deps): -install-binPROGRAMS: $(bin_PROGRAMS) - @$(NORMAL_INSTALL) - test -z "$(bindir)" || $(MKDIR_P) "$(DESTDIR)$(bindir)" - @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ - for p in $$list; do echo "$$p $$p"; done | \ - sed 's/$(EXEEXT)$$//' | \ - while read p p1; do if test -f $$p || test -f $$p1; \ - then echo "$$p"; echo "$$p"; else :; fi; \ - done | \ - sed -e 'p;s,.*/,,;n;h' -e 's|.*|.|' \ - -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \ - sed 'N;N;N;s,\n, ,g' | \ - $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \ - { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \ - if ($$2 == $$4) files[d] = files[d] " " $$1; \ - else { print "f", $$3 "/" $$4, $$1; } } \ - END { for (d in files) print "f", d, files[d] }' | \ - while read type dir files; do \ - if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \ - test -z "$$files" || { \ - echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(bindir)$$dir'"; \ - $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \ - } \ - ; done - -uninstall-binPROGRAMS: - @$(NORMAL_UNINSTALL) - @list='$(bin_PROGRAMS)'; test -n "$(bindir)" || list=; \ - files=`for p in $$list; do echo "$$p"; done | \ - sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \ - -e 's/$$/$(EXEEXT)/' `; \ - test -n "$$list" || exit 0; \ - echo " ( cd '$(DESTDIR)$(bindir)' && rm -f" $$files ")"; \ - cd "$(DESTDIR)$(bindir)" && rm -f $$files -clean-binPROGRAMS: - @list='$(bin_PROGRAMS)'; test -n "$$list" || exit 0; \ +clean-noinstPROGRAMS: + @list='$(noinst_PROGRAMS)'; test -n "$$list" || exit 0; \ echo " rm -f" $$list; \ rm -f $$list || exit $$?; \ test -n "$(EXEEXT)" || exit 0; \ @@ -612,6 +586,8 @@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gem_readwrite.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gem_ringfill.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gem_stress.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gem_stress_i830.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gem_stress_i915.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gem_tiled_blits.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gem_tiled_fence_blits.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gem_tiled_pread.Po@am__quote@ @@ -845,9 +821,6 @@ check: check-am all-am: Makefile $(PROGRAMS) installdirs: - for dir in "$(DESTDIR)$(bindir)"; do \ - test -z "$$dir" || $(MKDIR_P) "$$dir"; \ - done install: install-am install-exec: install-exec-am install-data: install-data-am @@ -876,7 +849,8 @@ @echo "it deletes files that may require special tools to rebuild." clean: clean-am -clean-am: clean-binPROGRAMS clean-generic clean-libtool mostlyclean-am +clean-am: clean-generic clean-libtool clean-noinstPROGRAMS \ + mostlyclean-am distclean: distclean-am -rm -rf ./$(DEPDIR) @@ -902,7 +876,7 @@ install-dvi-am: -install-exec-am: install-binPROGRAMS +install-exec-am: install-html: install-html-am @@ -942,15 +916,15 @@ ps-am: -uninstall-am: uninstall-binPROGRAMS +uninstall-am: .MAKE: check-am install-am install-strip .PHONY: CTAGS GTAGS all all-am check check-TESTS check-am clean \ - clean-binPROGRAMS clean-generic clean-libtool ctags distclean \ - distclean-compile distclean-generic distclean-libtool \ - distclean-tags distdir dvi dvi-am html html-am info info-am \ - install install-am install-binPROGRAMS install-data \ + clean-generic clean-libtool clean-noinstPROGRAMS ctags \ + distclean distclean-compile distclean-generic \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ install-data-am install-dvi install-dvi-am install-exec \ install-exec-am install-html install-html-am install-info \ install-info-am install-man install-pdf install-pdf-am \ @@ -958,7 +932,7 @@ installcheck-am installdirs maintainer-clean \ maintainer-clean-generic mostlyclean mostlyclean-compile \ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ - tags uninstall uninstall-am uninstall-binPROGRAMS + tags uninstall uninstall-am # Tell versions [3.59,3.63) of GNU make to not export all variables. diff -Nru intel-gpu-tools-1.0.2+git20110324+d641f0f/tools/intel_decode.c intel-gpu-tools-1.0.2+git20110408+1c9ede3/tools/intel_decode.c --- intel-gpu-tools-1.0.2+git20110324+d641f0f/tools/intel_decode.c 2011-03-24 19:00:04.000000000 +0000 +++ intel-gpu-tools-1.0.2+git20110408+1c9ede3/tools/intel_decode.c 2011-04-08 19:58:59.000000000 +0000 @@ -997,135 +997,134 @@ /* save vertex state for decode */ if (!IS_GEN2(devid)) { - if (word == 2) { - saved_s2_set = 1; - saved_s2 = data[i]; - } - if (word == 4) { - saved_s4_set = 1; - saved_s4 = data[i]; - } - } - - switch (word) { - case 0: - instr_out(data, hw_offset, i, "S0: vbo offset: 0x%08x%s\n", - data[i]&(~1),data[i]&1?", auto cache invalidate disabled":""); - break; - case 1: - instr_out(data, hw_offset, i, "S1: vertex width: %i, vertex pitch: %i\n", - (data[i]>>24)&0x3f,(data[i]>>16)&0x3f); - break; - case 2: - instr_out(data, hw_offset, i, "S2: texcoord formats: "); - for (int tex_num = 0; tex_num < 8; tex_num++) { - switch((data[i]>>tex_num*4)&0xf) { - case 0: fprintf(out, "%i=2D ", tex_num); break; - case 1: fprintf(out, "%i=3D ", tex_num); break; - case 2: fprintf(out, "%i=4D ", tex_num); break; - case 3: fprintf(out, "%i=1D ", tex_num); break; - case 4: fprintf(out, "%i=2D_16 ", tex_num); break; - case 5: fprintf(out, "%i=4D_16 ", tex_num); break; - case 0xf: fprintf(out, "%i=NP ", tex_num); break; + if (word == 2) { + saved_s2_set = 1; + saved_s2 = data[i]; } - } - fprintf(out, "\n"); - - break; - case 3: - instr_out(data, hw_offset, i, "S3: not documented\n", word); - break; - case 4: - { - char *cullmode = ""; - char *vfmt_xyzw = ""; - switch((data[i]>>13)&0x3) { - case 0: cullmode = "both"; break; - case 1: cullmode = "none"; break; - case 2: cullmode = "cw"; break; - case 3: cullmode = "ccw"; break; + if (word == 4) { + saved_s4_set = 1; + saved_s4 = data[i]; } - switch((data[i]>>6)&0x7) { - case 1: vfmt_xyzw = "XYZ,"; break; - case 2: vfmt_xyzw = "XYZW,"; break; - case 3: vfmt_xyzw = "XY,"; break; - case 4: vfmt_xyzw = "XYW,"; break; - } - instr_out(data, hw_offset, i, "S4: point_width=%i, line_width=%.1f," - "%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s " - "%s%s\n", - (data[i]>>23)&0x1ff, - ((data[i]>>19)&0xf) / 2.0, - data[i]&(0xf<<15)?" flatshade=":"", - data[i]&(1<<18)?"Alpha,":"", - data[i]&(1<<17)?"Fog,":"", - data[i]&(1<<16)?"Specular,":"", - data[i]&(1<<15)?"Color,":"", - cullmode, - data[i]&(1<<12)?"PointWidth,":"", - data[i]&(1<<11)?"SpecFog,":"", - data[i]&(1<<10)?"Color,":"", - data[i]&(1<<9)?"DepthOfs,":"", - vfmt_xyzw, - data[i]&(1<<9)?"FogParam,":"", - data[i]&(1<<5)?"force default diffuse, ":"", - data[i]&(1<<4)?"force default specular, ":"", - data[i]&(1<<3)?"local depth ofs enable, ":"", - data[i]&(1<<3)?"point sprite enable, ":"", - data[i]&(1<<3)?"line AA enable, ":""); - break; - } - case 5: - { - char *vfmt_xyzw = ""; - switch((data[i]>>6)&0x7) { - case 1: vfmt_xyzw = "XYZ,"; break; - case 2: vfmt_xyzw = "XYZW,"; break; - case 3: vfmt_xyzw = "XY,"; break; - case 4: vfmt_xyzw = "XYW,"; break; + + switch (word) { + case 0: + instr_out(data, hw_offset, i, "S0: vbo offset: 0x%08x%s\n", + data[i]&(~1),data[i]&1?", auto cache invalidate disabled":""); + break; + case 1: + instr_out(data, hw_offset, i, "S1: vertex width: %i, vertex pitch: %i\n", + (data[i]>>24)&0x3f,(data[i]>>16)&0x3f); + break; + case 2: + instr_out(data, hw_offset, i, "S2: texcoord formats: "); + for (int tex_num = 0; tex_num < 8; tex_num++) { + switch((data[i]>>tex_num*4)&0xf) { + case 0: fprintf(out, "%i=2D ", tex_num); break; + case 1: fprintf(out, "%i=3D ", tex_num); break; + case 2: fprintf(out, "%i=4D ", tex_num); break; + case 3: fprintf(out, "%i=1D ", tex_num); break; + case 4: fprintf(out, "%i=2D_16 ", tex_num); break; + case 5: fprintf(out, "%i=4D_16 ", tex_num); break; + case 0xf: fprintf(out, "%i=NP ", tex_num); break; + } + } + fprintf(out, "\n"); + + break; + case 3: + instr_out(data, hw_offset, i, "S3: not documented\n", word); + break; + case 4: + { + char *cullmode = ""; + char *vfmt_xyzw = ""; + switch((data[i]>>13)&0x3) { + case 0: cullmode = "both"; break; + case 1: cullmode = "none"; break; + case 2: cullmode = "cw"; break; + case 3: cullmode = "ccw"; break; + } + switch(data[i] & (7<<6 | 1<<2)) { + case 1<<6: vfmt_xyzw = "XYZ,"; break; + case 2<<6: vfmt_xyzw = "XYZW,"; break; + case 3<<6: vfmt_xyzw = "XY,"; break; + case 4<<6: vfmt_xyzw = "XYW,"; break; + case 1<<6 | 1<<2: vfmt_xyzw = "XYZF,"; break; + case 2<<6 | 1<<2: vfmt_xyzw = "XYZWF,"; break; + case 3<<6 | 1<<2: vfmt_xyzw = "XYF,"; break; + case 4<<6 | 1<<2: vfmt_xyzw = "XYWF,"; break; + } + instr_out(data, hw_offset, i, "S4: point_width=%i, line_width=%.1f," + "%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s " + "%s%s\n", + (data[i]>>23)&0x1ff, + ((data[i]>>19)&0xf) / 2.0, + data[i]&(0xf<<15)?" flatshade=":"", + data[i]&(1<<18)?"Alpha,":"", + data[i]&(1<<17)?"Fog,":"", + data[i]&(1<<16)?"Specular,":"", + data[i]&(1<<15)?"Color,":"", + cullmode, + data[i]&(1<<12)?"PointWidth,":"", + data[i]&(1<<11)?"SpecFog,":"", + data[i]&(1<<10)?"Color,":"", + data[i]&(1<<9)?"DepthOfs,":"", + vfmt_xyzw, + data[i]&(1<<9)?"FogParam,":"", + data[i]&(1<<5)?"force default diffuse, ":"", + data[i]&(1<<4)?"force default specular, ":"", + data[i]&(1<<3)?"local depth ofs enable, ":"", + data[i]&(1<<1)?"point sprite enable, ":"", + data[i]&(1<<0)?"line AA enable, ":""); + break; + } + case 5: + { + instr_out(data, hw_offset, i, "S5:%s%s%s%s%s" + "%s%s%s%s stencil_ref=0x%x, stencil_test=%s, " + "stencil_fail=%s, stencil_pass_z_fail=%s, " + "stencil_pass_z_pass=%s, %s%s%s%s\n", + data[i]&(0xf<<28)?" write_disable=":"", + data[i]&(1<<31)?"Alpha,":"", + data[i]&(1<<30)?"Red,":"", + data[i]&(1<<29)?"Green,":"", + data[i]&(1<<28)?"Blue,":"", + data[i]&(1<<27)?" force default point size,":"", + data[i]&(1<<26)?" last pixel enable,":"", + data[i]&(1<<25)?" global depth ofs enable,":"", + data[i]&(1<<24)?" fog enable,":"", + (data[i]>>16)&0xff, + decode_compare_func(data[i]>>13), + decode_stencil_op(data[i]>>10), + decode_stencil_op(data[i]>>7), + decode_stencil_op(data[i]>>4), + data[i]&(1<<3)?"stencil write enable, ":"", + data[i]&(1<<2)?"stencil test enable, ":"", + data[i]&(1<<1)?"color dither enable, ":"", + data[i]&(1<<0)?"logicop enable, ":""); + } + break; + case 6: + instr_out(data, hw_offset, i, "S6: %salpha_test=%s, alpha_ref=0x%x, " + "depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, " + "%s%stristrip_provoking_vertex=%i\n", + data[i]&(1<<31)?"alpha test enable, ":"", + decode_compare_func(data[i]>>28), + data[i]&(0xff<<20), + decode_compare_func(data[i]>>16), + data[i]&(1<<15)?"cbuf blend enable, ":"", + decode_blend_fact(data[i]>>8), + decode_blend_fact(data[i]>>4), + data[i]&(1<<3)?"depth write enable, ":"", + data[i]&(1<<2)?"cbuf write enable, ":"", + data[i]&(0x3)); + break; + case 7: + instr_out(data, hw_offset, i, "S7: depth offset constant: 0x%08x\n", data[i]); + break; } - instr_out(data, hw_offset, i, "S5:%s%s%s%s%s" - "%s%s%s%s stencil_ref=0x%x, stencil_test=%s, " - "stencil_fail=%s, stencil_pass_z_fail=%s, " - "stencil_pass_z_pass=%s, %s%s%s%s\n", - data[i]&(0xf<<28)?" write_disable=":"", - data[i]&(1<<31)?"Alpha,":"", - data[i]&(1<<30)?"Red,":"", - data[i]&(1<<29)?"Green,":"", - data[i]&(1<<28)?"Blue,":"", - data[i]&(1<<27)?" force default point size,":"", - data[i]&(1<<26)?" last pixel enable,":"", - data[i]&(1<<25)?" global depth ofs enable,":"", - data[i]&(1<<24)?" fog enable,":"", - (data[i]>>16)&0xff, - decode_compare_func(data[i]>>13), - decode_stencil_op(data[i]>>10), - decode_stencil_op(data[i]>>7), - decode_stencil_op(data[i]>>4), - data[i]&(1<<3)?"stencil write enable, ":"", - data[i]&(1<<2)?"stencil test enable, ":"", - data[i]&(1<<1)?"color dither enable, ":"", - data[i]&(1<<0)?"logicop enable, ":""); - } - break; - case 6: - instr_out(data, hw_offset, i, "S6: %salpha_test=%s, alpha_ref=0x%x, " - "depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, " - "%s%stristrip_provoking_vertex=%i\n", - data[i]&(1<<31)?"alpha test enable, ":"", - decode_compare_func(data[i]>>28), - data[i]&(0xff<<20), - decode_compare_func(data[i]>>16), - data[i]&(1<<15)?"cbuf blend enable, ":"", - decode_blend_fact(data[i]>>12), - decode_blend_fact(data[i]>>8), - data[i]&(1<<3)?"depth write enable, ":"", - data[i]&(1<<2)?"cbuf write enable, ":"", - data[i]&(0x3)); - break; - case 7: - instr_out(data, hw_offset, i, "S7: depth offset constant: 0x%08x\n", data[i]); - break; + } else { + instr_out(data, hw_offset, i, "S%d: 0x%08x\n", i, data[i]); } i++; }