diff -Nru xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/.lastcommit xserver-xorg-video-ati-7.1.99+git20130730.6a278369/.lastcommit --- xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/.lastcommit 2013-07-10 15:44:03.000000000 +0000 +++ xserver-xorg-video-ati-7.1.99+git20130730.6a278369/.lastcommit 2013-07-30 17:27:13.000000000 +0000 @@ -1 +1 @@ -commit c16c59f8f9b6aa7a4a6a6465582ad98f02a3606a +commit 6a278369c05a298a4367306d986467a9ceacae8c diff -Nru xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/ChangeLog xserver-xorg-video-ati-7.1.99+git20130730.6a278369/ChangeLog --- xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/ChangeLog 2013-07-10 15:44:11.000000000 +0000 +++ xserver-xorg-video-ati-7.1.99+git20130730.6a278369/ChangeLog 2013-07-30 17:27:22.000000000 +0000 @@ -1,9 +1,78 @@ -commit 2ae465b8dc18d69e0012dd9b3c4568fac952e3cf +commit 8e9453ff52d9e481123e9dc5fac5ecaa2e51bbfc Author: Robert Hooker -Date: Wed Jul 10 11:44:03 2013 -0400 +Date: Tue Jul 30 13:27:13 2013 -0400 Add debian tree from origin/ubuntu +commit 6a278369c05a298a4367306d986467a9ceacae8c +Author: Raul Fernandes +Date: Tue Jul 30 09:26:05 2013 -0400 + + EXA/6xx/7xx: optimize non-overlapping Copy + + In case dst and src rectangles of a Copy operation in the same surface + don't overlap, it is safe to skip the scratch surface. This is a + common case. + + Based on evergreen/ni patch from Grigori Goronzy. + + Signed-off-by: Alex Deucher + +commit 4375a6e75e5d41139be7031a0dee58c057ecbd07 +Author: Grigori Goronzy +Date: Mon Jul 22 02:30:28 2013 +0200 + + EXA/evergreen/ni: accelerate PictOpOver with component alpha + + Subpixel text rendering is typically done with a solid src and a + pixmap mask. Traditionally, this cannot be accelerated in a single + pass and requires two passes [1]. However, we can cheat a little + with a constant blend color. + + We can use: + + const.A = src.A / src.A + const.R = src.R / src.A + const.G = src.G / src.A + const.B = src.B / src.A + + dst.A = const.A * (src.A * mask.A) + (1 - (src.A * mask.A)) * dst.A + dst.R = const.R * (src.A * mask.R) + (1 - (src.A * mask.R)) * dst.R + dst.G = const.G * (src.A * mask.G) + (1 - (src.A * mask.G)) * dst.G + dst.B = const.B * (src.A * mask.B) + (1 - (src.A * mask.B)) * dst.B + + This only needs a single source value. src.A is cancelled down in + the right places. + + [1] http://anholt.livejournal.com/32058.html + +commit 94d0d14914a025525a0766669b556eaa6681def7 +Author: Grigori Goronzy +Date: Thu Jul 18 16:06:23 2013 +0200 + + EXA/evergreen/ni: fast solid pixmap support + + Solid pixmaps are currently implemented with scratch pixmaps, which + is slow. This replaces the hack with a proper implementation. The + Composite shader can now either sample a src/mask or use a constant + value. + +commit 5bb04351c43a91a1d60348b7293544da05d75e72 +Author: Grigori Goronzy +Date: Fri Jul 27 17:31:53 2012 +0200 + + EXA/evergreen/ni: optimize non-overlapping Copy + + In case dst and src rectangles of a Copy operation in the same surface + don't overlap, it is safe to skip the scratch surface. This is a + common case. + +commit c08e09b7bec441c4bf93b4cae4de1260754bf940 +Author: Grigori Goronzy +Date: Sat May 18 13:46:03 2013 +0200 + + Fix RADEON_FALLBACK logging + commit c16c59f8f9b6aa7a4a6a6465582ad98f02a3606a Author: Mark Kettenis Date: Sun Jul 7 13:44:13 2013 +0200 diff -Nru xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/debian/changelog xserver-xorg-video-ati-7.1.99+git20130730.6a278369/debian/changelog --- xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/debian/changelog 2013-07-30 17:31:22.000000000 +0000 +++ xserver-xorg-video-ati-7.1.99+git20130730.6a278369/debian/changelog 2013-07-30 17:31:23.000000000 +0000 @@ -1,14 +1,20 @@ -xserver-xorg-video-ati (1:7.1.99+git20130710.c16c59f8-0ubuntu0sarvatt~precise) precise; urgency=critical +xserver-xorg-video-ati (1:7.1.99+git20130730.6a278369-0ubuntu0sarvatt~precise) precise; urgency=medium - * Checkout from git 20130710 (master branch) up to commit - c16c59f8f9b6aa7a4a6a6465582ad98f02a3606a + * Checkout from git 20130730 (master branch) up to commit + 6a278369c05a298a4367306d986467a9ceacae8c * Only added debian/ tree from origin/ubuntu * hook: Drop fixup-close.patch (upstream) * hook: Drop drmmode-fail.patch (upstream) * Forcing Build-depends to xserver-xorg-dev 2:1.12 - * + Drop reverse-prime.patch for precise. + * + Drop reverse-prime.patch for 12.04. - -- Robert Hooker Wed, 10 Jul 2013 11:44:11 -0400 + -- Robert Hooker Tue, 30 Jul 2013 13:27:22 -0400 + +xserver-xorg-video-ati (1:7.1.0-0ubuntu2.1) raring; urgency=critical + + * Rebuild for xserver 1.14 + + -- Robert Hooker Fri, 22 Mar 2013 11:27:25 -0400 xserver-xorg-video-ati (1:7.1.0-0ubuntu2) raring; urgency=low diff -Nru xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/cayman_shader.c xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/cayman_shader.c --- xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/cayman_shader.c 2013-05-31 16:34:11.000000000 +0000 +++ xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/cayman_shader.c 2013-07-30 17:27:13.000000000 +0000 @@ -2495,17 +2495,44 @@ int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(3), + /* call interp-fetch-mask if boolean1 == true */ + shader[i++] = CF_DWORD0(ADDR(12), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), + CF_CONST(1), COND(SQ_CF_COND_BOOL), I_COUNT(0), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_CALL), BARRIER(0)); + /* 1 */ - shader[i++] = CF_DWORD0(ADDR(8), + /* call read-constant-mask if boolean1 == false */ + shader[i++] = CF_DWORD0(ADDR(15), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(1), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_CALL), + BARRIER(0)); + + /* 2 */ + /* call interp-fetch-src if boolean0 == true */ + shader[i++] = CF_DWORD0(ADDR(7), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_CALL), + BARRIER(0)); + + /* 3 */ + /* call read-constant-src if boolean0 == false */ + shader[i++] = CF_DWORD0(ADDR(10), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -2514,7 +2541,41 @@ VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_CALL), BARRIER(0)); - /* 2 - end */ + /* 4 */ + /* src IN mask (GPR2 := GPR1 .* GPR0) */ + shader[i++] = CF_ALU_DWORD0(ADDR(17), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 5 */ + /* export pixel data */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* 6 */ + /* end of program */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2524,33 +2585,53 @@ VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_END), BARRIER(1)); - /* 3 - mask sub */ - shader[i++] = CF_ALU_DWORD0(ADDR(12), + + /* subroutine interp-fetch-src */ + + /* 7 */ + /* interpolate src */ + shader[i++] = CF_ALU_DWORD0(ADDR(21), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), KCACHE_ADDR0(0), KCACHE_ADDR1(0), - I_COUNT(8), + I_COUNT(4), ALT_CONST(0), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 4 */ - shader[i++] = CF_DWORD0(ADDR(28), + /* 8 */ + /* texture fetch src into GPR0 */ + shader[i++] = CF_DWORD0(ADDR(26), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), - I_COUNT(2), + I_COUNT(1), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_TC), BARRIER(1)); - /* 5 */ - shader[i++] = CF_ALU_DWORD0(ADDR(20), + /* 9 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + BARRIER(0)); + + /* subroutine read-constant-src */ + + /* 10 */ + /* read constants into GPR0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(28), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); @@ -2558,29 +2639,13 @@ KCACHE_ADDR0(0), KCACHE_ADDR1(0), I_COUNT(4), - ALT_CONST(0), + ALT_CONST(1), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 6 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(2), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - BURST_COUNT(1), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - MARK(0), - BARRIER(1)); - /* 7 */ + /* 11 */ + /* return */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2589,10 +2654,13 @@ I_COUNT(0), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_RETURN), - BARRIER(1)); + BARRIER(0)); + + /* subroutine interp-fetch-mask */ - /* 8 - non-mask sub */ - shader[i++] = CF_ALU_DWORD0(ADDR(24), + /* 12 */ + /* interpolate mask */ + shader[i++] = CF_ALU_DWORD0(ADDR(32), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); @@ -2604,8 +2672,10 @@ CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 9 */ - shader[i++] = CF_DWORD0(ADDR(32), + + /* 13 */ + /* texture fetch mask into GPR1 */ + shader[i++] = CF_DWORD0(ADDR(36), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -2615,24 +2685,37 @@ CF_INST(SQ_CF_INST_TC), BARRIER(1)); - /* 10 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(0), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - BURST_COUNT(1), - VALID_PIXEL_MODE(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - MARK(0), - BARRIER(1)); + /* 14 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + CF_INST(SQ_CF_INST_RETURN), + BARRIER(0)); - /* 11 */ + /* subroutine read-constant-src */ + + /* 15 */ + /* read constants into GPR1 */ + shader[i++] = CF_ALU_DWORD0(ADDR(38), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(1), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 16 */ + /* return */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2641,18 +2724,21 @@ I_COUNT(0), VALID_PIXEL_MODE(0), CF_INST(SQ_CF_INST_RETURN), - BARRIER(1)); + BARRIER(0)); + + /* ALU clauses */ - /* 12 interpolate src tex coords - mask */ + /* 17 */ + /* MUL gpr[0].x gpr[0].x gpr[1].x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2661,22 +2747,24 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), - CLAMP(0)); - /* 13 */ + CLAMP(1)); + + /* 18 */ + /* MUL gpr[0].y gpr[0].y gpr[1].y */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Y), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2685,67 +2773,70 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), - CLAMP(0)); - /* 14 */ + CLAMP(1)); + /* 19 */ + /* MUL gpr[0].z gpr[0].z gpr[1].z */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(0), + WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), - CLAMP(0)); - /* 15 */ + CLAMP(1)); + /* 20 */ + /* MUL gpr[0].w gpr[0].w gpr[1].w */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_W), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(0), + WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), - CLAMP(0)); + CLAMP(1)); - /* 16 interpolate mask tex coords */ + /* 21 */ + /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2764,12 +2855,13 @@ DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(0)); - /* 17 */ + /* 22 */ + /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2788,12 +2880,13 @@ DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 18 */ + /* 23 */ + /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2812,12 +2905,14 @@ DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 19 */ + + /* 24 */ + /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2837,17 +2932,53 @@ DST_ELEM(ELEM_W), CLAMP(0)); - /* 20 - alu 0 */ - /* MUL gpr[2].x gpr[0].x gpr[1].x */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + /* 25 */ + shader[i++] = 0; + shader[i++] = 0; + + /* 26/27 */ + /* SAMPLE RID=0 GPR0, GPR0 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + /* 28 */ + /* MOV GPR0.x, KC4.x */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2856,23 +2987,24 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(1)); - /* 21 - alu 1 */ - /* MUL gpr[2].y gpr[0].y gpr[1].y */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + + /* 29 */ + /* MOV GPR0.y, KC4.y */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2881,23 +3013,24 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(1)); - /* 22 - alu 2 */ - /* MUL gpr[2].z gpr[0].z gpr[1].z */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + + /* 30 */ + /* MOV GPR0.z, KC4.z */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2906,23 +3039,24 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(1)); - /* 23 - alu 3 */ - /* MUL gpr[2].w gpr[0].w gpr[1].w */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + + /* 31 */ + /* MOV GPR0.w, KC4.w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2931,19 +3065,20 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(1)); - /* 24 - interpolate tex coords - non-mask */ + /* 32 */ + /* INTERP_XY GPR1.x, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2958,16 +3093,17 @@ OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(0)); - /* 25 */ + /* 33 */ + /* INTERP_XY GPR1.y, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2982,16 +3118,17 @@ OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 26 */ + /* 34 */ + /* INTERP_XY GPR1.z, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -3006,16 +3143,17 @@ OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 27 */ + /* 35 */ + /* INTERP_XY GPR1.w, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -3030,16 +3168,17 @@ OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - /* 28/29 - src - mask */ + /* 36/37 */ + /* SAMPLE RID=1 GPR1, GPR1 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), INST_MOD(0), FETCH_WHOLE_QUAD(0), - RESOURCE_ID(0), + RESOURCE_ID(1), SRC_GPR(1), SRC_REL(ABSOLUTE), ALT_CONST(0), @@ -3059,36 +3198,6 @@ shader[i++] = TEX_DWORD2(OFFSET_X(0), OFFSET_Y(0), OFFSET_Z(0), - SAMPLER_ID(0), - SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; - /* 30/31 - mask */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - INST_MOD(0), - FETCH_WHOLE_QUAD(0), - RESOURCE_ID(1), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - ALT_CONST(0), - RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), - SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); - shader[i++] = TEX_DWORD1(DST_GPR(0), - DST_REL(ABSOLUTE), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_Z), - DST_SEL_W(SQ_SEL_W), - LOD_BIAS(0), - COORD_TYPE_X(TEX_NORMALIZED), - COORD_TYPE_Y(TEX_NORMALIZED), - COORD_TYPE_Z(TEX_NORMALIZED), - COORD_TYPE_W(TEX_NORMALIZED)); - shader[i++] = TEX_DWORD2(OFFSET_X(0), - OFFSET_Y(0), - OFFSET_Z(0), SAMPLER_ID(1), SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), @@ -3096,36 +3205,109 @@ SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 32/33 - src - non-mask */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - INST_MOD(0), - FETCH_WHOLE_QUAD(0), - RESOURCE_ID(0), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - ALT_CONST(0), - RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), - SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); - shader[i++] = TEX_DWORD1(DST_GPR(0), - DST_REL(ABSOLUTE), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_Z), - DST_SEL_W(SQ_SEL_W), - LOD_BIAS(0), - COORD_TYPE_X(TEX_NORMALIZED), - COORD_TYPE_Y(TEX_NORMALIZED), - COORD_TYPE_Z(TEX_NORMALIZED), - COORD_TYPE_W(TEX_NORMALIZED)); - shader[i++] = TEX_DWORD2(OFFSET_X(0), - OFFSET_Y(0), - OFFSET_Z(0), - SAMPLER_ID(0), - SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; + /* 38 */ + /* MOV GPR1.x, KC5.x */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + + /* 39 */ + /* MOV GPR1.y, KC5.y */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + + /* 40 */ + /* MOV GPR1.z, KC5.z */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + + /* 41 */ + /* MOV GPR1.w, KC5.w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); return i; } diff -Nru xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/evergreen_accel.c xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/evergreen_accel.c --- xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/evergreen_accel.c 2013-05-31 16:34:11.000000000 +0000 +++ xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/evergreen_accel.c 2013-07-30 17:27:13.000000000 +0000 @@ -335,7 +335,19 @@ (CB_NORMAL << CB_COLOR_CONTROL__MODE_shift))); EREG(CB_BLEND0_CONTROL, cb_conf->blendcntl); END_BATCH(); +} +void evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + + BEGIN_BATCH(2 + 4); + PACK0(CB_BLEND_RED, 4); + EFLOAT(color[0]); /* R */ + EFLOAT(color[1]); /* G */ + EFLOAT(color[2]); /* B */ + EFLOAT(color[3]); /* A */ + END_BATCH(); } static void diff -Nru xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/evergreen_exa.c xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/evergreen_exa.c --- xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/evergreen_exa.c 2013-05-31 16:34:11.000000000 +0000 +++ xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/evergreen_exa.c 2013-07-30 17:27:13.000000000 +0000 @@ -575,7 +575,12 @@ if (accel_state->vsync) RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); - if (accel_state->same_surface && accel_state->copy_area) { + if (accel_state->same_surface && + (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) { + EVERGREENDoPrepareCopy(pScrn); + EVERGREENAppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); + EVERGREENDoCopyVline(pDst); + } else if (accel_state->same_surface && accel_state->copy_area) { uint32_t orig_dst_domain = accel_state->dst_obj.domain; uint32_t orig_src_domain = accel_state->src_obj[0].domain; uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags; @@ -699,6 +704,14 @@ } else if (dblend == (BLEND_ONE_MINUS_SRC_ALPHA << COLOR_DESTBLEND_shift)) { dblend = (BLEND_ONE_MINUS_SRC_COLOR << COLOR_DESTBLEND_shift); } + + /* With some tricks, we can still accelerate PictOpOver with solid src. + * This is commonly used for text rendering, so it's worth the extra + * effort. + */ + if (sblend == (BLEND_ONE << COLOR_SRCBLEND_shift)) { + sblend = (BLEND_CONSTANT_COLOR << COLOR_SRCBLEND_shift); + } } return sblend | dblend; @@ -772,10 +785,9 @@ return TRUE; } -static void EVERGREENXFormSetup(PicturePtr pPict, PixmapPtr pPix, +static void EVERGREENXFormSetup(PicturePtr pPict, ScrnInfoPtr pScrn, int unit, float *vs_alu_consts) { - ScrnInfoPtr pScrn = xf86ScreenToScrn(pPix->drawable.pScreen); RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; int const_offset = unit * 8; @@ -1091,12 +1103,17 @@ /* Check if it's component alpha that relies on a source alpha and * on the source value. We can only get one of those into the * single source value that we get to blend with. + * + * We can cheat a bit if the src is solid, though. PictOpOver + * can use the constant blend color to sneak a second blend + * source in. */ if (EVERGREENBlendOp[op].src_alpha && (EVERGREENBlendOp[op].blend_cntl & COLOR_SRCBLEND_mask) != (BLEND_ZERO << COLOR_SRCBLEND_shift)) { - RADEON_FALLBACK(("Component alpha not supported with source " - "alpha and source value blending.\n")); + if (pSrcPicture->pDrawable || op != 3) + RADEON_FALLBACK(("Component alpha not supported with source " + "alpha and source value blending.\n")); } } @@ -1113,6 +1130,139 @@ } +static void EVERGREENSetSolidConsts(ScrnInfoPtr pScrn, float *buf, int format, uint32_t fg, int unit) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + float pix_r = 0, pix_g = 0, pix_b = 0, pix_a = 0; + + uint32_t w = (fg >> 24) & 0xff; + uint32_t z = (fg >> 16) & 0xff; + uint32_t y = (fg >> 8) & 0xff; + uint32_t x = (fg >> 0) & 0xff; + float xf = (float)x / 255; /* R */ + float yf = (float)y / 255; /* G */ + float zf = (float)z / 255; /* B */ + float wf = (float)w / 255; /* A */ + + /* component swizzles */ + switch (format) { + case PICT_a1r5g5b5: + case PICT_a8r8g8b8: + pix_r = zf; /* R */ + pix_g = yf; /* G */ + pix_b = xf; /* B */ + pix_a = wf; /* A */ + break; + case PICT_a8b8g8r8: + pix_r = xf; /* R */ + pix_g = yf; /* G */ + pix_b = zf; /* B */ + pix_a = wf; /* A */ + break; + case PICT_x8b8g8r8: + pix_r = xf; /* R */ + pix_g = yf; /* G */ + pix_b = zf; /* B */ + pix_a = 1.0; /* A */ + break; + case PICT_b8g8r8a8: + pix_r = yf; /* R */ + pix_g = zf; /* G */ + pix_b = wf; /* B */ + pix_a = xf; /* A */ + break; + case PICT_b8g8r8x8: + pix_r = yf; /* R */ + pix_g = zf; /* G */ + pix_b = wf; /* B */ + pix_a = 1.0; /* A */ + break; + case PICT_x1r5g5b5: + case PICT_x8r8g8b8: + case PICT_r5g6b5: + pix_r = zf; /* R */ + pix_g = yf; /* G */ + pix_b = xf; /* B */ + pix_a = 1.0; /* A */ + break; + case PICT_a8: + pix_r = 0.0; /* R */ + pix_g = 0.0; /* G */ + pix_b = 0.0; /* B */ + pix_a = xf; /* A */ + break; + default: + ErrorF("Bad format 0x%x\n", format); + } + + if (unit == 0) { + if (!accel_state->msk_pic) { + if (PICT_FORMAT_RGB(format) == 0) { + pix_r = 0.0; + pix_g = 0.0; + pix_b = 0.0; + } + + if (PICT_FORMAT_A(format) == 0) + pix_a = 1.0; + } else { + if (accel_state->component_alpha) { + if (accel_state->src_alpha) { + /* required for PictOpOver */ + float cblend[4] = { pix_r / pix_a, pix_g / pix_a, + pix_b / pix_a, pix_a / pix_a }; + evergreen_set_blend_color(pScrn, cblend); + + if (PICT_FORMAT_A(format) == 0) { + pix_r = 1.0; + pix_g = 1.0; + pix_b = 1.0; + pix_a = 1.0; + } else { + pix_r = pix_a; + pix_g = pix_a; + pix_b = pix_a; + } + } else { + if (PICT_FORMAT_A(format) == 0) + pix_a = 1.0; + } + } else { + if (PICT_FORMAT_RGB(format) == 0) { + pix_r = 0; + pix_g = 0; + pix_b = 0; + } + + if (PICT_FORMAT_A(format) == 0) + pix_a = 1.0; + } + } + } else { + if (accel_state->component_alpha) { + if (PICT_FORMAT_A(format) == 0) + pix_a = 1.0; + } else { + if (PICT_FORMAT_A(format) == 0) { + pix_r = 1.0; + pix_g = 1.0; + pix_b = 1.0; + pix_a = 1.0; + } else { + pix_r = pix_a; + pix_g = pix_a; + pix_b = pix_a; + } + } + } + + buf[0] = pix_r; + buf[1] = pix_g; + buf[2] = pix_b; + buf[3] = pix_a; +} + static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, PicturePtr pDstPicture, PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) @@ -1127,30 +1277,26 @@ const_config_t vs_const_conf; struct r600_accel_object src_obj, mask_obj, dst_obj; float *cbuf; + uint32_t ps_bool_consts = 0; if (pDst->drawable.bitsPerPixel < 8 || (pSrc && pSrc->drawable.bitsPerPixel < 8)) return FALSE; - if (!pSrc) { - pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color); - if (!pSrc) - RADEON_FALLBACK("Failed to create solid scratch pixmap\n"); + if (pSrc) { + src_obj.bo = radeon_get_pixmap_bo(pSrc); + src_obj.surface = radeon_get_pixmap_surface(pSrc); + src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); + src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); + src_obj.width = pSrc->drawable.width; + src_obj.height = pSrc->drawable.height; + src_obj.bpp = pSrc->drawable.bitsPerPixel; + src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; } dst_obj.bo = radeon_get_pixmap_bo(pDst); - src_obj.bo = radeon_get_pixmap_bo(pSrc); dst_obj.surface = radeon_get_pixmap_surface(pDst); - src_obj.surface = radeon_get_pixmap_surface(pSrc); dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst); - src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc); - src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); - - src_obj.width = pSrc->drawable.width; - src_obj.height = pSrc->drawable.height; - src_obj.bpp = pSrc->drawable.bitsPerPixel; - src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; - dst_obj.width = pDst->drawable.width; dst_obj.height = pDst->drawable.height; dst_obj.bpp = pDst->drawable.bitsPerPixel; @@ -1160,30 +1306,16 @@ dst_obj.domain = RADEON_GEM_DOMAIN_VRAM; if (pMaskPicture) { - if (!pMask) { - pMask = RADEONSolidPixmap(pScreen, pMaskPicture->pSourcePict->solidFill.color); - if (!pMask) { - if (!pSrcPicture->pDrawable) - pScreen->DestroyPixmap(pSrc); - RADEON_FALLBACK("Failed to create solid scratch pixmap\n"); - } + if (pMask) { + mask_obj.bo = radeon_get_pixmap_bo(pMask); + mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask); + mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); + mask_obj.surface = radeon_get_pixmap_surface(pMask); + mask_obj.width = pMask->drawable.width; + mask_obj.height = pMask->drawable.height; + mask_obj.bpp = pMask->drawable.bitsPerPixel; + mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; } - mask_obj.bo = radeon_get_pixmap_bo(pMask); - mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask); - mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8); - mask_obj.surface = radeon_get_pixmap_surface(pMask); - mask_obj.width = pMask->drawable.width; - mask_obj.height = pMask->drawable.height; - mask_obj.bpp = pMask->drawable.bitsPerPixel; - mask_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT; - - if (!R600SetAccelState(pScrn, - &src_obj, - &mask_obj, - &dst_obj, - accel_state->comp_vs_offset, accel_state->comp_ps_offset, - 3, 0xffffffff)) - return FALSE; accel_state->msk_pic = pMaskPicture; if (pMaskPicture->componentAlpha) { @@ -1197,19 +1329,19 @@ accel_state->src_alpha = FALSE; } } else { - if (!R600SetAccelState(pScrn, - &src_obj, - NULL, - &dst_obj, - accel_state->comp_vs_offset, accel_state->comp_ps_offset, - 3, 0xffffffff)) - return FALSE; - accel_state->msk_pic = NULL; accel_state->component_alpha = FALSE; accel_state->src_alpha = FALSE; } + if (!R600SetAccelState(pScrn, + pSrc ? &src_obj : NULL, + (pMaskPicture && pMask) ? &mask_obj : NULL, + &dst_obj, + accel_state->comp_vs_offset, accel_state->comp_ps_offset, + 3, 0xffffffff)) + return FALSE; + if (!EVERGREENGetDestFormat(pDstPicture, &dst_format)) return FALSE; @@ -1233,11 +1365,14 @@ evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height); - if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) { - radeon_ib_discard(pScrn); - radeon_cs_flush_indirect(pScrn); - return FALSE; - } + if (pSrc) { + if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) { + radeon_ib_discard(pScrn); + radeon_cs_flush_indirect(pScrn); + return FALSE; + } + } else + accel_state->is_transform[0] = FALSE; if (pMask) { if (!EVERGREENTextureSetup(pMaskPicture, pMask, 1)) { @@ -1248,12 +1383,16 @@ } else accel_state->is_transform[1] = FALSE; + if (pSrc) + ps_bool_consts |= (1 << 0); + if (pMask) + ps_bool_consts |= (1 << 1); + evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, ps_bool_consts); + if (pMask) { evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (1 << 0)); - evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (1 << 0)); } else { evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_vs, (0 << 0)); - evergreen_set_bool_consts(pScrn, SQ_BOOL_CONST_ps, (0 << 0)); } /* Shader */ @@ -1266,7 +1405,7 @@ ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.shader_size = accel_state->ps_size; - ps_conf.num_gprs = 3; + ps_conf.num_gprs = 2; ps_conf.stack_size = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; @@ -1341,9 +1480,27 @@ vs_const_conf.const_addr = accel_state->cbuf.vb_offset; vs_const_conf.cpu_ptr = (uint32_t *)(char *)cbuf; - EVERGREENXFormSetup(pSrcPicture, pSrc, 0, cbuf); + EVERGREENXFormSetup(pSrcPicture, pScrn, 0, cbuf); if (pMask) - EVERGREENXFormSetup(pMaskPicture, pMask, 1, cbuf); + EVERGREENXFormSetup(pMaskPicture, pScrn, 1, cbuf); + + if (!pSrc) { + /* solid src color */ + EVERGREENSetSolidConsts(pScrn, &cbuf[16], pSrcPicture->format, + pSrcPicture->pSourcePict->solidFill.color, 0); + } + + if (!pMaskPicture) { + /* use identity constant if there is no mask */ + cbuf[20] = 1.0; + cbuf[21] = 1.0; + cbuf[22] = 1.0; + cbuf[23] = 1.0; + } else if (!pMask) { + /* solid mask color */ + EVERGREENSetSolidConsts(pScrn, &cbuf[20], pMaskPicture->format, + pMaskPicture->pSourcePict->solidFill.color, 1); + } radeon_vbo_commit(pScrn, &accel_state->cbuf); evergreen_set_alu_consts(pScrn, &vs_const_conf, RADEON_GEM_DOMAIN_GTT); @@ -1372,7 +1529,7 @@ accel_state->vline_y1, accel_state->vline_y2); - vtx_size = accel_state->msk_pic ? 24 : 16; + vtx_size = accel_state->msk_pix ? 24 : 16; evergreen_finish_op(pScrn, vtx_size); } @@ -1385,12 +1542,6 @@ struct radeon_accel_state *accel_state = info->accel_state; EVERGREENFinishComposite(pScrn, pDst, accel_state); - - if (!accel_state->src_pic->pDrawable) - pScreen->DestroyPixmap(accel_state->src_pix); - - if (accel_state->msk_pic && !accel_state->msk_pic->pDrawable) - pScreen->DestroyPixmap(accel_state->msk_pix); } static void EVERGREENComposite(PixmapPtr pDst, @@ -1419,7 +1570,7 @@ if (accel_state->vsync) RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); - if (accel_state->msk_pic) { + if (accel_state->msk_pix) { vb = radeon_vbo_space(pScrn, &accel_state->vbo, 24); diff -Nru xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/evergreen_shader.c xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/evergreen_shader.c --- xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/evergreen_shader.c 2013-05-31 16:34:11.000000000 +0000 +++ xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/evergreen_shader.c 2013-07-30 17:27:13.000000000 +0000 @@ -2472,15 +2472,16 @@ } /* comp ps --------------------------------------- */ -int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader) +int evergreen_comp_ps(RADEONChipFamily ChipSet, uint32_t *shader) { int i = 0; /* 0 */ - shader[i++] = CF_DWORD0(ADDR(3), + /* call interp-fetch-mask if boolean1 == true */ + shader[i++] = CF_DWORD0(ADDR(11), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), + CF_CONST(1), COND(SQ_CF_COND_BOOL), I_COUNT(0), VALID_PIXEL_MODE(0), @@ -2488,11 +2489,13 @@ CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), BARRIER(0)); + /* 1 */ - shader[i++] = CF_DWORD0(ADDR(8), + /* call read-constant-mask if boolean1 == false */ + shader[i++] = CF_DWORD0(ADDR(14), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), - CF_CONST(0), + CF_CONST(1), COND(SQ_CF_COND_NOT_BOOL), I_COUNT(0), VALID_PIXEL_MODE(0), @@ -2500,48 +2503,118 @@ CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), BARRIER(0)); + /* 2 */ - shader[i++] = CF_DWORD0(ADDR(0), - JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + /* call interp-fetch-src if boolean0 == true */ + shader[i++] = CF_DWORD0(ADDR(6), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), - COND(SQ_CF_COND_ACTIVE), + COND(SQ_CF_COND_BOOL), I_COUNT(0), VALID_PIXEL_MODE(0), - END_OF_PROGRAM(1), - CF_INST(SQ_CF_INST_NOP), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), WHOLE_QUAD_MODE(0), - BARRIER(1)); + BARRIER(0)); - /* 3 - mask sub */ - shader[i++] = CF_ALU_DWORD0(ADDR(12), + /* 3 */ + /* call read-constant-src if boolean0 == false */ + shader[i++] = CF_DWORD0(ADDR(9), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_NOT_BOOL), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_CALL), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + /* 4 */ + /* src IN mask (GPR2 := GPR1 .* GPR0) */ + shader[i++] = CF_ALU_DWORD0(ADDR(16), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), KCACHE_ADDR0(0), KCACHE_ADDR1(0), - I_COUNT(8), + I_COUNT(4), ALT_CONST(0), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 4 */ - shader[i++] = CF_DWORD0(ADDR(28), + /* 5 */ + /* export pixel data */ + shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), + TYPE(SQ_EXPORT_PIXEL), + RW_GPR(0), + RW_REL(ABSOLUTE), + INDEX_GPR(0), + ELEM_SIZE(1)); + shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_Z), + SRC_SEL_W(SQ_SEL_W), + BURST_COUNT(1), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(1), + CF_INST(SQ_CF_INST_EXPORT_DONE), + MARK(0), + BARRIER(1)); + + /* subroutine interp-fetch-src */ + + /* 6 */ + /* interpolate src */ + shader[i++] = CF_ALU_DWORD0(ADDR(20), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_NOP)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(0), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 7 */ + /* texture fetch src into GPR0 */ + shader[i++] = CF_DWORD0(ADDR(24), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), COND(SQ_CF_COND_ACTIVE), - I_COUNT(2), + I_COUNT(1), VALID_PIXEL_MODE(0), END_OF_PROGRAM(0), CF_INST(SQ_CF_INST_TC), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 5 */ - shader[i++] = CF_ALU_DWORD0(ADDR(20), + /* 8 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(0)); + + /* subroutine read-constant-src */ + + /* 9 */ + /* read constants into GPR0 */ + shader[i++] = CF_ALU_DWORD0(ADDR(26), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); @@ -2549,30 +2622,13 @@ KCACHE_ADDR0(0), KCACHE_ADDR1(0), I_COUNT(4), - ALT_CONST(0), + ALT_CONST(1), CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 6 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(2), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - BURST_COUNT(1), - VALID_PIXEL_MODE(0), - END_OF_PROGRAM(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - MARK(0), - BARRIER(1)); - /* 7 */ + /* 10 */ + /* return */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2583,10 +2639,13 @@ END_OF_PROGRAM(0), CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), - BARRIER(1)); + BARRIER(0)); + + /* subroutine interp-fetch-mask */ - /* 8 - non-mask sub */ - shader[i++] = CF_ALU_DWORD0(ADDR(24), + /* 11 */ + /* interpolate mask */ + shader[i++] = CF_ALU_DWORD0(ADDR(30), KCACHE_BANK0(0), KCACHE_BANK1(0), KCACHE_MODE0(SQ_CF_KCACHE_NOP)); @@ -2598,8 +2657,10 @@ CF_INST(SQ_CF_INST_ALU), WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 9 */ - shader[i++] = CF_DWORD0(ADDR(32), + + /* 12 */ + /* texture fetch mask into GPR1 */ + shader[i++] = CF_DWORD0(ADDR(34), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), CF_CONST(0), @@ -2611,25 +2672,39 @@ WHOLE_QUAD_MODE(0), BARRIER(1)); - /* 10 */ - shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0), - TYPE(SQ_EXPORT_PIXEL), - RW_GPR(0), - RW_REL(ABSOLUTE), - INDEX_GPR(0), - ELEM_SIZE(1)); - shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_Z), - SRC_SEL_W(SQ_SEL_W), - BURST_COUNT(1), - VALID_PIXEL_MODE(0), - END_OF_PROGRAM(0), - CF_INST(SQ_CF_INST_EXPORT_DONE), - MARK(0), - BARRIER(1)); + /* 13 */ + /* return */ + shader[i++] = CF_DWORD0(ADDR(0), + JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); + shader[i++] = CF_DWORD1(POP_COUNT(0), + CF_CONST(0), + COND(SQ_CF_COND_ACTIVE), + I_COUNT(0), + VALID_PIXEL_MODE(0), + END_OF_PROGRAM(0), + CF_INST(SQ_CF_INST_RETURN), + WHOLE_QUAD_MODE(0), + BARRIER(0)); - /* 11 */ + /* subroutine read-constant-src */ + + /* 14 */ + /* read constants into GPR1 */ + shader[i++] = CF_ALU_DWORD0(ADDR(36), + KCACHE_BANK0(0), + KCACHE_BANK1(0), + KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1)); + shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP), + KCACHE_ADDR0(0), + KCACHE_ADDR1(0), + I_COUNT(4), + ALT_CONST(1), + CF_INST(SQ_CF_INST_ALU), + WHOLE_QUAD_MODE(0), + BARRIER(1)); + + /* 15 */ + /* return */ shader[i++] = CF_DWORD0(ADDR(0), JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A)); shader[i++] = CF_DWORD1(POP_COUNT(0), @@ -2640,18 +2715,21 @@ END_OF_PROGRAM(0), CF_INST(SQ_CF_INST_RETURN), WHOLE_QUAD_MODE(0), - BARRIER(1)); + BARRIER(0)); + + /* ALU clauses */ - /* 12 interpolate src tex coords - mask */ + /* 16 */ + /* MUL gpr[0].x gpr[0].x gpr[1].x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2660,22 +2738,24 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), - CLAMP(0)); - /* 13 */ + CLAMP(1)); + + /* 17 */ + /* MUL gpr[0].y gpr[0].y gpr[1].y */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Y), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2684,67 +2764,70 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), - CLAMP(0)); - /* 14 */ + CLAMP(1)); + /* 18 */ + /* MUL gpr[0].z gpr[0].z gpr[1].z */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_Y), + SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_Z), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(0), + WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), - CLAMP(0)); - /* 15 */ + CLAMP(1)); + /* 19 */ + /* MUL gpr[0].w gpr[0].w gpr[1].w */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), - SRC0_ELEM(ELEM_X), + SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_GPR_BASE + 1), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_X), + SRC1_ELEM(ELEM_W), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_AR_X), + INDEX_MODE(SQ_INDEX_LOOP), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), SRC1_ABS(0), UPDATE_EXECUTE_MASK(0), UPDATE_PRED(0), - WRITE_MASK(0), + WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_INTERP_XY), - BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(1), + ALU_INST(SQ_OP2_INST_MUL), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), - CLAMP(0)); + CLAMP(1)); - /* 16 interpolate mask tex coords */ + /* 20 */ + /* INTERP_XY GPR0.x, GPR0.y PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2763,12 +2846,13 @@ DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(0)); - /* 17 */ + /* 21 */ + /* INTERP_XY GPR0.y, GPR0.x PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2787,12 +2871,13 @@ DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 18 */ + /* 22 */ + /* INTERP_XY GPR0.z, GPR0.y PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2811,12 +2896,14 @@ DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 19 */ + + /* 23 */ + /* INTERP_XY GPR0.w, GPR0.x PARAM0.x */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 1), + SRC1_SEL(ALU_SRC_PARAM_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2836,17 +2923,49 @@ DST_ELEM(ELEM_W), CLAMP(0)); - /* 20 - alu 0 */ - /* MUL gpr[2].x gpr[0].x gpr[1].x */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + /* 24/25 */ + /* SAMPLE RID=0 GPR0, GPR0 */ + shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), + INST_MOD(0), + FETCH_WHOLE_QUAD(0), + RESOURCE_ID(0), + SRC_GPR(0), + SRC_REL(ABSOLUTE), + ALT_CONST(0), + RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), + SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); + shader[i++] = TEX_DWORD1(DST_GPR(0), + DST_REL(ABSOLUTE), + DST_SEL_X(SQ_SEL_X), + DST_SEL_Y(SQ_SEL_Y), + DST_SEL_Z(SQ_SEL_Z), + DST_SEL_W(SQ_SEL_W), + LOD_BIAS(0), + COORD_TYPE_X(TEX_NORMALIZED), + COORD_TYPE_Y(TEX_NORMALIZED), + COORD_TYPE_Z(TEX_NORMALIZED), + COORD_TYPE_W(TEX_NORMALIZED)); + shader[i++] = TEX_DWORD2(OFFSET_X(0), + OFFSET_Y(0), + OFFSET_Z(0), + SAMPLER_ID(0), + SRC_SEL_X(SQ_SEL_X), + SRC_SEL_Y(SQ_SEL_Y), + SRC_SEL_Z(SQ_SEL_0), + SRC_SEL_W(SQ_SEL_1)); + shader[i++] = TEX_DWORD_PAD; + + /* 26 */ + /* MOV GPR0.x, KC4.x */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2855,23 +2974,24 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(1)); - /* 21 - alu 1 */ - /* MUL gpr[2].y gpr[0].y gpr[1].y */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + + /* 27 */ + /* MOV GPR0.y, KC4.y */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Y), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2880,23 +3000,24 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(1)); - /* 22 - alu 2 */ - /* MUL gpr[2].z gpr[0].z gpr[1].z */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + + /* 28 */ + /* MOV GPR0.z, KC4.z */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Z), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_Z), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(0)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2905,23 +3026,24 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(1)); - /* 23 - alu 3 */ - /* MUL gpr[2].w gpr[0].w gpr[1].w */ - shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), + + /* 29 */ + /* MOV GPR0.w, KC4.w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 4), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_W), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_GPR_BASE + 1), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), SRC1_REL(ABSOLUTE), - SRC1_ELEM(ELEM_W), + SRC1_ELEM(ELEM_X), SRC1_NEG(0), - INDEX_MODE(SQ_INDEX_LOOP), + INDEX_MODE(SQ_INDEX_AR_X), PRED_SEL(SQ_PRED_SEL_OFF), LAST(1)); shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), @@ -2930,19 +3052,20 @@ UPDATE_PRED(0), WRITE_MASK(1), OMOD(SQ_ALU_OMOD_OFF), - ALU_INST(SQ_OP2_INST_MUL), + ALU_INST(SQ_OP2_INST_MOV), BANK_SWIZZLE(SQ_ALU_VEC_012), - DST_GPR(2), + DST_GPR(0), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(1)); - /* 24 - interpolate tex coords - non-mask */ + /* 30 */ + /* INTERP_XY GPR1.x, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2957,16 +3080,17 @@ OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_X), CLAMP(0)); - /* 25 */ + /* 31 */ + /* INTERP_XY GPR1.y, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -2981,16 +3105,17 @@ OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Y), CLAMP(0)); - /* 26 */ + /* 32 */ + /* INTERP_XY GPR1.z, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_Y), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -3005,16 +3130,17 @@ OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_Z), CLAMP(0)); - /* 27 */ + /* 33 */ + /* INTERP_XY GPR1.w, PARAM1 */ shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0), SRC0_REL(ABSOLUTE), SRC0_ELEM(ELEM_X), SRC0_NEG(0), - SRC1_SEL(ALU_SRC_PARAM_BASE + 0), + SRC1_SEL(ALU_SRC_PARAM_BASE + 1), SRC1_REL(ABSOLUTE), SRC1_ELEM(ELEM_X), SRC1_NEG(0), @@ -3029,16 +3155,17 @@ OMOD(SQ_ALU_OMOD_OFF), ALU_INST(SQ_OP2_INST_INTERP_XY), BANK_SWIZZLE(SQ_ALU_VEC_210), - DST_GPR(0), + DST_GPR(1), DST_REL(ABSOLUTE), DST_ELEM(ELEM_W), CLAMP(0)); - /* 28/29 - src - mask */ + /* 34/35 */ + /* SAMPLE RID=1 GPR1, GPR1 */ shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), INST_MOD(0), FETCH_WHOLE_QUAD(0), - RESOURCE_ID(0), + RESOURCE_ID(1), SRC_GPR(1), SRC_REL(ABSOLUTE), ALT_CONST(0), @@ -3058,36 +3185,6 @@ shader[i++] = TEX_DWORD2(OFFSET_X(0), OFFSET_Y(0), OFFSET_Z(0), - SAMPLER_ID(0), - SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; - /* 30/31 - mask */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - INST_MOD(0), - FETCH_WHOLE_QUAD(0), - RESOURCE_ID(1), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - ALT_CONST(0), - RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), - SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); - shader[i++] = TEX_DWORD1(DST_GPR(0), - DST_REL(ABSOLUTE), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_Z), - DST_SEL_W(SQ_SEL_W), - LOD_BIAS(0), - COORD_TYPE_X(TEX_NORMALIZED), - COORD_TYPE_Y(TEX_NORMALIZED), - COORD_TYPE_Z(TEX_NORMALIZED), - COORD_TYPE_W(TEX_NORMALIZED)); - shader[i++] = TEX_DWORD2(OFFSET_X(0), - OFFSET_Y(0), - OFFSET_Z(0), SAMPLER_ID(1), SRC_SEL_X(SQ_SEL_X), SRC_SEL_Y(SQ_SEL_Y), @@ -3095,36 +3192,109 @@ SRC_SEL_W(SQ_SEL_1)); shader[i++] = TEX_DWORD_PAD; - /* 32/33 - src - non-mask */ - shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE), - INST_MOD(0), - FETCH_WHOLE_QUAD(0), - RESOURCE_ID(0), - SRC_GPR(0), - SRC_REL(ABSOLUTE), - ALT_CONST(0), - RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE), - SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE)); - shader[i++] = TEX_DWORD1(DST_GPR(0), - DST_REL(ABSOLUTE), - DST_SEL_X(SQ_SEL_X), - DST_SEL_Y(SQ_SEL_Y), - DST_SEL_Z(SQ_SEL_Z), - DST_SEL_W(SQ_SEL_W), - LOD_BIAS(0), - COORD_TYPE_X(TEX_NORMALIZED), - COORD_TYPE_Y(TEX_NORMALIZED), - COORD_TYPE_Z(TEX_NORMALIZED), - COORD_TYPE_W(TEX_NORMALIZED)); - shader[i++] = TEX_DWORD2(OFFSET_X(0), - OFFSET_Y(0), - OFFSET_Z(0), - SAMPLER_ID(0), - SRC_SEL_X(SQ_SEL_X), - SRC_SEL_Y(SQ_SEL_Y), - SRC_SEL_Z(SQ_SEL_0), - SRC_SEL_W(SQ_SEL_1)); - shader[i++] = TEX_DWORD_PAD; + /* 36 */ + /* MOV GPR1.x, KC5.x */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_X), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_X), + CLAMP(1)); + + /* 37 */ + /* MOV GPR1.y, KC5.y */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Y), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Y), + CLAMP(1)); + + /* 38 */ + /* MOV GPR1.z, KC5.z */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_Z), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(0)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_Z), + CLAMP(1)); + + /* 39 */ + /* MOV GPR1.w, KC5.w */ + shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 5), + SRC0_REL(ABSOLUTE), + SRC0_ELEM(ELEM_W), + SRC0_NEG(0), + SRC1_SEL(ALU_SRC_GPR_BASE + 0), + SRC1_REL(ABSOLUTE), + SRC1_ELEM(ELEM_X), + SRC1_NEG(0), + INDEX_MODE(SQ_INDEX_AR_X), + PRED_SEL(SQ_PRED_SEL_OFF), + LAST(1)); + shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0), + SRC1_ABS(0), + UPDATE_EXECUTE_MASK(0), + UPDATE_PRED(0), + WRITE_MASK(1), + OMOD(SQ_ALU_OMOD_OFF), + ALU_INST(SQ_OP2_INST_MOV), + BANK_SWIZZLE(SQ_ALU_VEC_012), + DST_GPR(1), + DST_REL(ABSOLUTE), + DST_ELEM(ELEM_W), + CLAMP(1)); return i; } diff -Nru xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/evergreen_state.h xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/evergreen_state.h --- xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/evergreen_state.h 2013-05-31 16:34:11.000000000 +0000 +++ xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/evergreen_state.h 2013-07-30 17:27:13.000000000 +0000 @@ -297,6 +297,8 @@ void evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain); void +evergreen_set_blend_color(ScrnInfoPtr pScrn, float *color); +void evergreen_cp_wait_vline_sync(ScrnInfoPtr pScrn, PixmapPtr pPix, xf86CrtcPtr crtc, int start, int stop); void evergreen_set_spi(ScrnInfoPtr pScrn, int vs_export_count, int num_interp); diff -Nru xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/r600_exa.c xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/r600_exa.c --- xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/r600_exa.c 2013-07-10 15:44:02.000000000 +0000 +++ xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/r600_exa.c 2013-07-30 17:27:13.000000000 +0000 @@ -643,7 +643,12 @@ if (accel_state->vsync) RADEONVlineHelperSet(pScrn, dstX, dstY, dstX + w, dstY + h); - if (accel_state->same_surface && accel_state->copy_area) { + if (accel_state->same_surface && + (srcX + w <= dstX || dstX + w <= srcX || srcY + h <= dstY || dstY + h <= srcY)) { + R600DoPrepareCopy(pScrn); + R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); + R600DoCopyVline(pDst); + } else if (accel_state->same_surface && accel_state->copy_area) { uint32_t orig_dst_domain = accel_state->dst_obj.domain; uint32_t orig_src_domain = accel_state->src_obj[0].domain; uint32_t orig_src_tiling_flags = accel_state->src_obj[0].tiling_flags; @@ -1179,7 +1184,7 @@ if (!pSrc) { pSrc = RADEONSolidPixmap(pScreen, pSrcPicture->pSourcePict->solidFill.color); if (!pSrc) - RADEON_FALLBACK("Failed to create solid scratch pixmap\n"); + RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); } dst_obj.bo = radeon_get_pixmap_bo(pDst); @@ -1211,7 +1216,7 @@ if (!pMask) { if (!pSrcPicture->pDrawable) pScreen->DestroyPixmap(pSrc); - RADEON_FALLBACK("Failed to create solid scratch pixmap\n"); + RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); } } diff -Nru xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/radeon_exa_render.c xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/radeon_exa_render.c --- xserver-xorg-video-ati-7.1.99+git20130710.c16c59f8/src/radeon_exa_render.c 2013-05-31 16:34:11.000000000 +0000 +++ xserver-xorg-video-ati-7.1.99+git20130730.6a278369/src/radeon_exa_render.c 2013-07-30 17:27:13.000000000 +0000 @@ -602,7 +602,7 @@ if (!pSrc) { pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color)); if (!pSrc) - RADEON_FALLBACK("Failed to create solid scratch pixmap\n"); + RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); } if (((dst_pitch >> pixel_shift) & 0x7) != 0) @@ -616,7 +616,7 @@ if (!pMask) { if (!pSrcPicture->pDrawable) pScreen->DestroyPixmap(pSrc); - RADEON_FALLBACK("Failed to create solid scratch pixmap\n"); + RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); } } @@ -967,7 +967,7 @@ if (!pSrc) { pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color)); if (!pSrc) - RADEON_FALLBACK("Failed to create solid scratch pixmap\n"); + RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); } if (!RADEONSetupSourceTile(pSrcPicture, pSrc, FALSE, TRUE)) @@ -978,7 +978,7 @@ if (!pMask) { if (!pSrcPicture->pDrawable) pScreen->DestroyPixmap(pSrc); - RADEON_FALLBACK("Failed to create solid scratch pixmap\n"); + RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); } } @@ -1459,7 +1459,7 @@ if (!pSrc) { pSrc = RADEONSolidPixmap(pScreen, cpu_to_le32(pSrcPicture->pSourcePict->solidFill.color)); if (!pSrc) - RADEON_FALLBACK("Failed to create solid scratch pixmap\n"); + RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); } if (!RADEONSetupSourceTile(pSrcPicture, pSrc, TRUE, FALSE)) @@ -1470,7 +1470,7 @@ if (!pMask) { if (!pSrcPicture->pDrawable) pScreen->DestroyPixmap(pSrc); - RADEON_FALLBACK("Failed to create solid scratch pixmap\n"); + RADEON_FALLBACK(("Failed to create solid scratch pixmap\n")); } }