diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/.lastcommit xserver-xorg-video-intel-2.21.11+git20130701.7d916398/.lastcommit --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/.lastcommit 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/.lastcommit 2013-07-01 14:03:55.000000000 +0000 @@ -1 +1 @@ -commit 60d716b53993b08a2a00c22f523c575e62e0a18d +commit 7d9163983ea2e960c0a7b55266fcc532b9c6e382 diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/ChangeLog xserver-xorg-video-intel-2.21.11+git20130701.7d916398/ChangeLog --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/ChangeLog 2013-06-27 14:40:03.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/ChangeLog 2013-07-01 14:04:07.000000000 +0000 @@ -1,9 +1,321 @@ -commit d3bb9dd9545410016a6400e300e5b44cef28631d +commit 3373e5633faaa079ccba513cf37de6dde8cb4e27 Author: Robert Hooker -Date: Thu Jun 27 10:39:52 2013 -0400 +Date: Mon Jul 1 10:03:55 2013 -0400 Add debian tree from origin/ubuntu +commit 7d9163983ea2e960c0a7b55266fcc532b9c6e382 +Author: Chris Wilson +Date: Sun Jun 30 15:26:57 2013 +0100 + + 2.21.11 release + +commit 5005bd2d52ab64cbeae099d512d0b65be6c4abaa +Author: Chris Wilson +Date: Sun Jun 30 15:02:19 2013 +0100 + + intel: Fix failure code for reporting !drmCheckModesetingSupported + + The new function returns the fd, not a Bool, so the error code must now + be -1. + + Signed-off-by: Chris Wilson + +commit f8738d7b4cc1c624d4390ef9ce7426ba457d7dd3 +Author: Jonathan Gray +Date: Sun Jun 30 19:37:45 2013 +1000 + + intel: replace direct ioctl use with drm{Set, Drop}Master + + Use drmSetMaster/drmDropMaster instead of calling the ioctls + directly. Fixes compilation on OpenBSD where these ioctls + aren't defined. + + Signed-off-by: Jonathan Gray + +commit 40301e6d03f6e8d2d2d01e6bb9f1754a7e543a08 +Author: Chris Wilson +Date: Sun Jun 30 11:12:34 2013 +0100 + + sna: Store the path used to open the device and pass to DRI + + Avoid having to search the device tree once again in order to simply + recover the path we used to open the device. + + Signed-off-by: Chris Wilson + +commit 17da58f904e75d434aaf71e297e15d41153ba954 +Author: Chris Wilson +Date: Sun Jun 30 11:01:49 2013 +0100 + + sna: Replace conflicting drmDropMaster + + Calling drmDropMaster twice along the CloseScreen path is not a good + idea. + + Signed-off-by: Chris Wilson + +commit 3a787da7e888da7e9943be94bd1cb177fe1495ab +Author: Chris Wilson +Date: Sat Jun 29 21:57:20 2013 +0100 + + sna: Allow tiled memcpy on i386 + + With the split into per-swizzle functions, and with the forced + optimisation levels, it appears that i386 doesn't suffer so badly and + the tiled memcpy are a viable method. + + Signed-off-by: Chris Wilson + +commit 1d9941a7c003587c0bd732fb8b21fee5cefa6f87 +Author: Chris Wilson +Date: Sat Jun 29 21:56:13 2013 +0100 + + sna: Add the Ofast option to the critical memcpy routines + + Always enable gcc to fully optimize the core memcpy routines (provided + that optimisations are not entirely disabled, for instance for + debugging). + + Signed-off-by: Chris Wilson + +commit 84c190db33142f3c1ec347ec0bf87f77ce132d36 +Author: Chris Wilson +Date: Sat Jun 29 19:06:40 2013 +0100 + + sna: Fix get_image_inplace to use the pixmap offset + + The inplace routine assumed that the region to be read was already in + pixmap coordinates. Making it so makes the code easier, so do it. + + Signed-off-by: Chris Wilson + +commit c7d246ba6f750ee080c38ccc5603d01fcf7fce92 +Author: Chris Wilson +Date: Sat Jun 29 16:31:34 2013 +0100 + + sna: Move the clone discard into free-gpu + + Rather than peppering the discard manually before the call to free the + GPU bo, always discard the COW when we actually free the GPU bo. + + Signed-off-by: Chris Wilson + +commit 6ab2a3acf71b5204c399c7649e5601c93a99f25f +Author: Chris Wilson +Date: Sat Jun 29 15:04:09 2013 +0100 + + sna: Improve checks for coherent access through CPU mappings + + Refactor the CPU mapping tests to a single function, and remember to + test for a pending GPU write (i.e. bo->exec). + + Signed-off-by: Chris Wilson + +commit 9026bb954646c0425360c2236e26c79d097142cd +Author: Chris Wilson +Date: Fri Jun 28 15:59:17 2013 +0100 + + sna: Inspect the dirty boxes when querying whether damage contains a rectangle + + This helps in the cases where we have subtracted a small number of + rectangles from an all-damage pixmap (such as a number of successive + GetImage, PutImage operations). The danger is that we end up searching a + long list of dirty boxes - maybe just search the first chunk if that + becomes noticeable? + + Signed-off-by: Chris Wilson + +commit d635e05c9dd26a397ccf958be091b56d1075e923 +Author: Chris Wilson +Date: Fri Jun 28 19:24:06 2013 +0100 + + sna: Promote assert(!priv->mapped) along migration paths + + With the advent of the more permissive mapping schemes and finer damage + tracking, we are more liable to have pixmaps mapped and so can reach the + upload path with the pixmap still mapped. + + Signed-off-by: Chris Wilson + +commit 2d40500851a7c6d857b17258e4989ddf7401cfbc +Author: Chris Wilson +Date: Fri Jun 28 15:12:13 2013 +0100 + + sna: Add asserts around applying clears + + Signed-off-by: Chris Wilson + +commit e3ad737ef9d33e924b206741949d59224bfef566 +Author: Chris Wilson +Date: Fri Jun 28 15:33:54 2013 +0100 + + sna: Use inplace CPU mapping readback for GetImage on linear buffers + + Signed-off-by: Chris Wilson + +commit 626b5e541663f838475eaef2c1bc3ae4d4848165 +Author: Chris Wilson +Date: Fri Jun 28 14:24:10 2013 +0100 + + sna: Add debug control for disabling accelerated GetImage + + Signed-off-by: Chris Wilson + +commit 2356579cdff36adf58fb69894f646a6e63053a15 +Author: Chris Wilson +Date: Fri Jun 28 13:18:00 2013 +0100 + + sna: Assert that the kernel tiling mode matches our bo + + Signed-off-by: Chris Wilson + +commit e979d32bb71fef7341ceb9c2b2e80c6dfa50a7b3 +Author: Chris Wilson +Date: Fri Jun 28 10:59:23 2013 +0100 + + sna/gen2+: Consider precision in render operation placement + + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66297 + Signed-off-by: Chris Wilson + +commit 541f816815e392db9e798d2f940029d82a6b2e0b +Author: Chris Wilson +Date: Fri Jun 28 10:14:16 2013 +0100 + + sna: Markup when a gradient is opaque + + Signed-off-by: Chris Wilson + +commit 34e6366f2a61c145445056e5fc6c483999c0402c +Author: Chris Wilson +Date: Fri Jun 28 10:03:47 2013 +0100 + + sna/blt: Remove a pair of leftover asserts + + As the variable they were inspecting was removed, the asserts are now + breaking the debug build. + + Signed-off-by: Chris Wilson + +commit 0c93a0cf41cbfe88b18e9e69bd97bd75cf2404bd +Author: Chris Wilson +Date: Fri Jun 28 00:35:14 2013 +0100 + + sna: Compensate redirect drawing subrectangle inside an offset pixmap + + Reported-by: Clemens Eisserer + Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=66249 + Signed-off-by: Chris Wilson + +commit e33fbd6d7ff2a9efcc14974a05b5e1eb01bbce34 +Author: Chris Wilson +Date: Fri Jun 28 08:53:32 2013 +0100 + + sna/blt: Refine op placement logic for handling current source location + + Signed-off-by: Chris Wilson + +commit 48b5ac11a0737f65de2e6290308bd37017cc29a9 +Author: Chris Wilson +Date: Thu Jun 27 21:39:30 2013 +0100 + + intel: Use fcntl to try and set CLOEXEC if the open(O_CLOEXEC) fails + + As suggested by Arkadiusz Miskiewicz. + + Signed-off-by: Chris Wilson + +commit 9b3e5c211451ac07bd96cd997ac714bcbe1809b0 +Author: Chris Wilson +Date: Thu Jun 27 21:08:05 2013 +0100 + + intel: #ifdef O_CLOEXEC for compilation on squeeze + + If the system doesn't support O_CLOEXEC, then we simply can't use it. + + Signed-off-by: Chris Wilson + +commit caf43fcadb0fcb3d342543f1e7dd78ee2314a627 +Author: Chris Wilson +Date: Thu Jun 27 16:52:21 2013 +0100 + + sna: Enable memcpy_from_tiled for the IO paths + + Should you ever need to read back from a tiled surface and for whatever + reason do not have a CPU bo to accelerate the operation, maybe we could + use the manual tiling instead (as it is useful elsewhere). + + Signed-off-by: Chris Wilson + +commit b5e85e495e55e2537d305b7bebacdf6f97b66199 +Author: Roy.Li +Date: Thu Jun 27 14:10:14 2013 +0800 + + uxa: fix the compilation error with xorg-xserver <= 1.10 + + struct _Screen has no canDoBGNoneRoot when ABI_VIDEODRV_VERSION is less than 10.0 + + Signed-off-by: Roy.Li + +commit 41715af4d009bfcb351946ddaa3a3ea3767a1429 +Author: Chris Wilson +Date: Thu Jun 27 16:36:52 2013 +0100 + + configure: SNA supports the old Xorgs + + So allow it to be compiled by default for older Xorgs as well. + + Signed-off-by: Chris Wilson + +commit 7ce487617445c81f0178823de8896a2b73bbaaf1 +Author: Chris Wilson +Date: Thu Jun 27 16:08:43 2013 +0100 + + sna: Trim the large object threshold + + Be kinder to smaller machines by lowering the threshold at which treat + an object as huge and worthy of avoiding duplication. + + Signed-off-by: Chris Wilson + +commit 31467e18d2ccdc42b0601b43b581524859de1373 +Author: Chris Wilson +Date: Thu Jun 27 16:07:36 2013 +0100 + + sna: Prefer operating inplace with a very large GPU bo + + As we strive to only keep one copy when working with very large objects, + so try operating inplace on a mapping for CPU operations with a large + GPU bo. + + Signed-off-by: Chris Wilson + +commit b615ce97ec43ea8fe02e995244c757138abcb2de +Author: Chris Wilson +Date: Thu Jun 27 10:45:22 2013 +0100 + + sna: Add a fast path for reading back from tiled X bo + + This is lower latency than the double copy incurred for first moving the + bo to the CPU and then copying it back - but due to the less efficient + tiled memcpy, it has lower throughput. So x11perf -shmget500 suffers + (by about 30%) but real world applications improve by about 2x. + + Signed-off-by: Chris Wilson + +commit 6493c8c65f93ad2554c2512a07ba640e966fd026 +Author: Chris Wilson +Date: Thu Jun 27 10:45:13 2013 +0100 + + sna: Implement memcpy_from_tiled functions (for X-tiling only atm) + + To provide symmetry with the ability to write into an X-tiled mapping of + a bo, we add the memcpy_from_tiled to be able to read back from the same + bo. + + Signed-off-by: Chris Wilson + commit 60d716b53993b08a2a00c22f523c575e62e0a18d Author: Chris Wilson Date: Thu Jun 27 14:11:00 2013 +0100 diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/NEWS xserver-xorg-video-intel-2.21.11+git20130701.7d916398/NEWS --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/NEWS 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/NEWS 2013-07-01 14:03:55.000000000 +0000 @@ -1,3 +1,55 @@ +Release 2.21.11 (2013-06-30) +============================ +An eventful week. What started with a regression with some builds of +firefox on some machines lead ultimately to the discovery of an older +kernel bug. Aside from the work to fix the image bug and a few other +older bugs that were reported and resolved this week, there is also a +(hopefully) subtle change to the initial configuration of displays. In +the absence of user overrides in xorg.conf, the DDX will try to preserve +the same display configuration as used by the kernel, which hopefully +will be the same configuration as setup by the BIOS. The result should +be a boot sequence that does not resize at all (aka fastboot) - until +the display manager takes over and loads a completely different +configuration! + + * Add reference counting of drmMaster for ZaphodHeads + https://bugs.freedesktop.org/show_bug.cgi?id=66041 + + * Add a GPU flush before changing blend modes on Ironlake + https://bugs.freedesktop.org/show_bug.cgi?id=51422 + + * Fix occasional missing images for inplace uploads + [regression from 2.21.10] + https://bugs.freedesktop.org/show_bug.cgi?id=66059 + + * Add missing utility files to the tarball and remove a few unused ones + + * Initialise PolyPoint operand state before calling miWideDash + https://bugs.freedesktop.org/show_bug.cgi?id=66104 + + * Fix redirection handling for rendering into large surfaces + https://bugs.freedesktop.org/show_bug.cgi?id=66168 + https://bugs.freedesktop.org/show_bug.cgi?id=66249 + + * Fix compilation of UXA with xorg-xserver < 1.10 + [regression from 2.20.0] + + * Fix consideration of gradients for deciding when to migrate render + operations + [performance regression from 2.21.10, the bug itself is older] + https://bugs.freedesktop.org/show_bug.cgi?id=66297 + +Also fixed this week was: + +commit 22fd5ca947b58901927d100d2b1aa0f1672b3435 +Author: Chris Wilson +Date: Fri Jun 28 16:54:08 2013 +0100 + + drm/i915: Only clear write-domains after a successful wait-seqno + +which affects kernels 3.7 - 3.10, coming to a stable kernel near you soon. + + Release 2.21.10 (2013-06-22) ============================ Fixes missing support for Xv (with the textured video adaptor) on diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/configure xserver-xorg-video-intel-2.21.11+git20130701.7d916398/configure --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/configure 2013-06-27 14:40:01.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/configure 2013-07-01 14:04:05.000000000 +0000 @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for xf86-video-intel 2.21.10. +# Generated by GNU Autoconf 2.69 for xf86-video-intel 2.21.11. # # Report bugs to . # @@ -591,8 +591,8 @@ # Identity of this package. PACKAGE_NAME='xf86-video-intel' PACKAGE_TARNAME='xf86-video-intel' -PACKAGE_VERSION='2.21.10' -PACKAGE_STRING='xf86-video-intel 2.21.10' +PACKAGE_VERSION='2.21.11' +PACKAGE_STRING='xf86-video-intel 2.21.11' PACKAGE_BUGREPORT='https://bugs.freedesktop.org/enter_bug.cgi?product=xorg' PACKAGE_URL='' @@ -1454,7 +1454,7 @@ # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xf86-video-intel 2.21.10 to adapt to many kinds of systems. +\`configure' configures xf86-video-intel 2.21.11 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1525,7 +1525,7 @@ if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xf86-video-intel 2.21.10:";; + short | recursive ) echo "Configuration of xf86-video-intel 2.21.11:";; esac cat <<\_ACEOF @@ -1714,7 +1714,7 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xf86-video-intel configure 2.21.10 +xf86-video-intel configure 2.21.11 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -2129,7 +2129,7 @@ This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xf86-video-intel $as_me 2.21.10, which was +It was created by xf86-video-intel $as_me 2.21.11, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -2950,7 +2950,7 @@ # Define the identity of the package. PACKAGE='xf86-video-intel' - VERSION='2.21.10' + VERSION='2.21.11' cat >>confdefs.h <<_ACEOF @@ -18576,9 +18576,6 @@ fi -if test "x$SNA" = "xauto" && pkg-config --exists "xorg-server >= 1.10"; then - SNA=yes -fi if test "x$SNA" != "xno"; then $as_echo "#define USE_SNA 1" >>confdefs.h @@ -20790,7 +20787,7 @@ # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xf86-video-intel $as_me 2.21.10, which was +This file was extended by xf86-video-intel $as_me 2.21.11, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -20856,7 +20853,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -xf86-video-intel config.status 2.21.10 +xf86-video-intel config.status 2.21.11 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/configure.ac xserver-xorg-video-intel-2.21.11+git20130701.7d916398/configure.ac --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/configure.ac 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/configure.ac 2013-07-01 14:03:55.000000000 +0000 @@ -23,7 +23,7 @@ # Initialize Autoconf AC_PREREQ([2.60]) AC_INIT([xf86-video-intel], - [2.21.10], + [2.21.11], [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg], [xf86-video-intel]) AC_CONFIG_SRCDIR([Makefile.am]) @@ -203,9 +203,6 @@ [SNA="$enableval"], [SNA=auto]) -if test "x$SNA" = "xauto" && pkg-config --exists "xorg-server >= 1.10"; then - SNA=yes -fi if test "x$SNA" != "xno"; then AC_DEFINE(USE_SNA, 1, [Enable SNA support]) AC_CHECK_HEADERS([sys/sysinfo.h]) diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/debian/changelog xserver-xorg-video-intel-2.21.11+git20130701.7d916398/debian/changelog --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/debian/changelog 2013-07-01 14:12:57.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/debian/changelog 2013-07-01 14:12:57.000000000 +0000 @@ -1,12 +1,12 @@ -xserver-xorg-video-intel (2:2.21.10+git20130627.60d716b5-0ubuntu0sarvatt~quantal) quantal; urgency=critical +xserver-xorg-video-intel (2:2.21.11+git20130701.7d916398-0ubuntu0sarvatt~quantal) quantal; urgency=critical - * Checkout from git 20130627 (master branch) up to commit - 60d716b53993b08a2a00c22f523c575e62e0a18d + * Checkout from git 20130701 (master branch) up to commit + 7d9163983ea2e960c0a7b55266fcc532b9c6e382 * Only added debian/ tree from origin/ubuntu * hook: Refresh 0002-Update-manpage-for-new-accelmethod-option.patch * hook: Drop sna-make-sure-the-source-is-coherent.diff (upstream) - -- Robert Hooker Thu, 27 Jun 2013 10:40:03 -0400 + -- Robert Hooker Mon, 01 Jul 2013 10:04:07 -0400 xserver-xorg-video-intel (2:2.21.9-0ubuntu2) saucy; urgency=low diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/intel_device.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/intel_device.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/intel_device.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/intel_device.c 2013-07-01 14:03:55.000000000 +0000 @@ -42,6 +42,7 @@ #include "intel_driver.h" struct intel_device { + char *path; int fd; int open_count; int master_count; @@ -62,11 +63,29 @@ xf86GetEntityPrivate(scrn->entityList[0], intel_device_key)->ptr = dev; } -static int __intel_open_device(const struct pci_device *pci, const char *path) +static int fd_set_cloexec(int fd) +{ + int flags; + + if (fd == -1) + return fd; + +#ifdef FD_CLOEXEC + flags = fcntl(fd, F_GETFD); + if (flags != -1) { + flags |= FD_CLOEXEC; + fcntl(fd, F_SETFD, flags); + } +#endif + + return fd; +} + +static int __intel_open_device(const struct pci_device *pci, char **path) { int fd; - if (path == NULL) { + if (*path == NULL) { char id[20]; int ret; @@ -79,14 +98,28 @@ if (xf86LoadKernelModule("i915")) ret = drmCheckModesettingSupported(id); if (ret) - return FALSE; + return -1; /* Be nice to the user and load fbcon too */ (void)xf86LoadKernelModule("fbcon"); } fd = drmOpen(NULL, id); - } else - fd = open(path, O_RDWR | O_CLOEXEC); + if (fd != -1) { + *path = drmGetDeviceNameFromFd(fd); + if (*path == NULL) { + close(fd); + fd = -1; + } + } + } else { +#ifdef O_CLOEXEC + fd = open(*path, O_RDWR | O_CLOEXEC); +#else + fd = -1; +#endif + if (fd == -1) + fd = fd_set_cloexec(open(*path, O_RDWR)); + } return fd; } @@ -96,6 +129,7 @@ const char *path) { struct intel_device *dev; + char *local_path; int fd; if (intel_device_key == -1) @@ -107,16 +141,20 @@ if (dev) return dev->fd; - fd = __intel_open_device(pci, path); + local_path = path ? strdup(path) : NULL; + + fd = __intel_open_device(pci, &local_path); if (fd == -1) return -1; dev = malloc(sizeof(*dev)); if (dev == NULL) { + free(local_path); close(fd); return -1; } + dev->path = local_path; dev->fd = fd; dev->open_count = 0; dev->master_count = 0; @@ -165,6 +203,13 @@ return dev->fd; } +const char *intel_get_device_name(ScrnInfoPtr scrn) +{ + struct intel_device *dev = intel_device(scrn); + assert(dev && dev->path); + return dev->path; +} + int intel_get_master(ScrnInfoPtr scrn) { struct intel_device *dev = intel_device(scrn); @@ -177,7 +222,7 @@ int retry = 2000; do { - ret = ioctl(dev->fd, DRM_IOCTL_SET_MASTER); + ret = drmSetMaster(dev->fd); if (ret == 0) break; usleep(1000); @@ -197,8 +242,8 @@ ret = 0; assert(dev->master_count); if (--dev->master_count == 0) { - assert(ioctl(dev->fd, DRM_IOCTL_SET_MASTER) == 0); - ret = ioctl(dev->fd, DRM_IOCTL_DROP_MASTER); + assert(drmSetMaster(dev->fd) == 0); + ret = drmDropMaster(dev->fd); } return ret; @@ -211,6 +256,7 @@ intel_set_device(scrn, NULL); drmClose(dev->fd); + free(dev->path); free(dev); } } @@ -228,5 +274,6 @@ intel_set_device(scrn, NULL); drmClose(dev->fd); + free(dev->path); free(dev); } diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/intel_display.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/intel_display.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/intel_display.c 2013-05-29 15:53:40.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/intel_display.c 2013-07-01 14:03:55.000000000 +0000 @@ -2113,7 +2113,9 @@ 0, 0, scrn->virtualX, scrn->virtualY); intel->uxa_driver->done_copy(dst); +#if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(10, 0) pScreen->canDoBGNoneRoot = TRUE; +#endif cleanup_dst: (*pScreen->DestroyPixmap)(dst); diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/intel_driver.h xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/intel_driver.h --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/intel_driver.h 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/intel_driver.h 2013-07-01 14:03:55.000000000 +0000 @@ -313,6 +313,7 @@ int intel_open_device(int entity_num, const struct pci_device *pci, const char *path); int intel_get_device(ScrnInfoPtr scrn); +const char *intel_get_device_name(ScrnInfoPtr scrn); int intel_get_master(ScrnInfoPtr scrn); int intel_put_master(ScrnInfoPtr scrn); void intel_put_device(ScrnInfoPtr scrn); diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/blt.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/blt.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/blt.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/blt.c 2013-07-01 14:03:55.000000000 +0000 @@ -277,6 +277,70 @@ } } +static fast_memcpy void +memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + const unsigned tile_width = 512; + const unsigned tile_height = 8; + const unsigned tile_size = 4096; + + const unsigned cpp = bpp / 8; + const unsigned stride_tiles = src_stride / tile_width; + const unsigned swizzle_pixels = tile_width / cpp; + const unsigned tile_pixels = ffs(tile_width / cpp) - 1; + const unsigned tile_mask = (1 << tile_pixels) - 1; + + unsigned x, y; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + + dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; + + for (y = 0; y < height; ++y) { + const uint32_t sy = y + src_y; + const uint32_t tile_row = + (sy / tile_height * stride_tiles * tile_size + + (sy & (tile_height-1)) * tile_width); + uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; + uint32_t sx = src_x, offset; + + x = width * cpp; + if (sx & (swizzle_pixels - 1)) { + const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); + const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + memcpy(dst_row, (const char *)src + offset, length * cpp); + + dst_row += length * cpp; + x -= length * cpp; + sx += length; + } + while (x >= 512) { + assert((sx & tile_mask) == 0); + offset = tile_row + (sx >> tile_pixels) * tile_size; + + memcpy(dst_row, (const char *)src + offset, 512); + + dst_row += 512; + x -= 512; + sx += swizzle_pixels; + } + if (x) { + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + memcpy(dst_row, (const char *)src + offset, x); + } + } +} + fast_memcpy static void memcpy_to_tiled_x__swizzle_9(const void *src, void *dst, int bpp, int32_t src_stride, int32_t dst_stride, @@ -347,6 +411,75 @@ } fast_memcpy static void +memcpy_from_tiled_x__swizzle_9(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + const unsigned tile_width = 512; + const unsigned tile_height = 8; + const unsigned tile_size = 4096; + + const unsigned cpp = bpp / 8; + const unsigned stride_tiles = src_stride / tile_width; + const unsigned swizzle_pixels = 64 / cpp; + const unsigned tile_pixels = ffs(tile_width / cpp) - 1; + const unsigned tile_mask = (1 << tile_pixels) - 1; + + unsigned x, y; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + + dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; + + for (y = 0; y < height; ++y) { + const uint32_t sy = y + src_y; + const uint32_t tile_row = + (sy / tile_height * stride_tiles * tile_size + + (sy & (tile_height-1)) * tile_width); + uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; + uint32_t sx = src_x, offset; + + x = width * cpp; + if (sx & (swizzle_pixels - 1)) { + const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); + const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + offset ^= (offset >> 3) & 64; + + memcpy(dst_row, (const char *)src + offset, length * cpp); + + dst_row += length * cpp; + x -= length * cpp; + sx += length; + } + while (x >= 64) { + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + offset ^= (offset >> 3) & 64; + + memcpy(dst_row, (const char *)src + offset, 64); + + dst_row += 64; + x -= 64; + sx += swizzle_pixels; + } + if (x) { + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + offset ^= (offset >> 3) & 64; + memcpy(dst_row, (const char *)src + offset, x); + } + } +} + +fast_memcpy static void memcpy_to_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp, int32_t src_stride, int32_t dst_stride, int16_t src_x, int16_t src_y, @@ -416,6 +549,75 @@ } fast_memcpy static void +memcpy_from_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + const unsigned tile_width = 512; + const unsigned tile_height = 8; + const unsigned tile_size = 4096; + + const unsigned cpp = bpp / 8; + const unsigned stride_tiles = src_stride / tile_width; + const unsigned swizzle_pixels = 64 / cpp; + const unsigned tile_pixels = ffs(tile_width / cpp) - 1; + const unsigned tile_mask = (1 << tile_pixels) - 1; + + unsigned x, y; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + + dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; + + for (y = 0; y < height; ++y) { + const uint32_t sy = y + src_y; + const uint32_t tile_row = + (sy / tile_height * stride_tiles * tile_size + + (sy & (tile_height-1)) * tile_width); + uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; + uint32_t sx = src_x, offset; + + x = width * cpp; + if (sx & (swizzle_pixels - 1)) { + const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); + const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; + + memcpy(dst_row, (const char *)src + offset, length * cpp); + + dst_row += length * cpp; + x -= length * cpp; + sx += length; + } + while (x >= 64) { + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; + + memcpy(dst_row, (const char *)src + offset, 64); + + dst_row += 64; + x -= 64; + sx += swizzle_pixels; + } + if (x) { + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + offset ^= ((offset ^ (offset >> 1)) >> 3) & 64; + memcpy(dst_row, (const char *)src + offset, x); + } + } +} + +fast_memcpy static void memcpy_to_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp, int32_t src_stride, int32_t dst_stride, int16_t src_x, int16_t src_y, @@ -483,7 +685,75 @@ } } -void choose_memcpy_to_tiled_x(struct kgem *kgem, int swizzling) +fast_memcpy static void +memcpy_from_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + const unsigned tile_width = 512; + const unsigned tile_height = 8; + const unsigned tile_size = 4096; + + const unsigned cpp = bpp / 8; + const unsigned stride_tiles = src_stride / tile_width; + const unsigned swizzle_pixels = 64 / cpp; + const unsigned tile_pixels = ffs(tile_width / cpp) - 1; + const unsigned tile_mask = (1 << tile_pixels) - 1; + + unsigned x, y; + + DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + + dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp; + + for (y = 0; y < height; ++y) { + const uint32_t sy = y + src_y; + const uint32_t tile_row = + (sy / tile_height * stride_tiles * tile_size + + (sy & (tile_height-1)) * tile_width); + uint8_t *dst_row = (uint8_t *)dst + dst_stride * y; + uint32_t sx = src_x, offset; + + x = width * cpp; + if (sx & (swizzle_pixels - 1)) { + const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels); + const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx; + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; + memcpy(dst_row, (const char *)src + offset, length * cpp); + + dst_row += length * cpp; + x -= length * cpp; + sx += length; + } + while (x >= 64) { + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; + + memcpy(dst_row, (const char *)src + offset, 64); + + dst_row += 64; + x -= 64; + sx += swizzle_pixels; + } + if (x) { + offset = tile_row + + (sx >> tile_pixels) * tile_size + + (sx & tile_mask) * cpp; + offset ^= ((offset ^ (offset >> 2)) >> 3) & 64; + memcpy(dst_row, (const char *)src + offset, x); + } + } +} + +void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling) { switch (swizzling) { default: @@ -492,18 +762,22 @@ case I915_BIT_6_SWIZZLE_NONE: DBG(("%s: no swizzling\n", __FUNCTION__)); kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0; + kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0; break; case I915_BIT_6_SWIZZLE_9: DBG(("%s: 6^9 swizzling\n", __FUNCTION__)); kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9; + kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9; break; case I915_BIT_6_SWIZZLE_9_10: DBG(("%s: 6^9^10 swizzling\n", __FUNCTION__)); kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_10; + kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_10; break; case I915_BIT_6_SWIZZLE_9_11: DBG(("%s: 6^9^11 swizzling\n", __FUNCTION__)); kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_11; + kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_11; break; } } diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/compiler.h xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/compiler.h --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/compiler.h 2013-05-29 15:53:40.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/compiler.h 2013-07-01 14:03:55.000000000 +0000 @@ -66,7 +66,7 @@ #endif #if HAS_GCC(4, 5) && defined(__OPTIMIZE__) -#define fast_memcpy __attribute__((target("inline-all-stringops"))) +#define fast_memcpy __attribute__((optimize("Ofast"))) __attribute__((target("inline-all-stringops"))) #else #define fast_memcpy #endif diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen2_render.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen2_render.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen2_render.c 2013-06-07 13:28:26.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen2_render.c 2013-07-01 14:03:55.000000000 +0000 @@ -1632,7 +1632,7 @@ } static bool -is_unhandled_gradient(PicturePtr picture) +is_unhandled_gradient(PicturePtr picture, bool precise) { if (picture->pDrawable) return false; @@ -1642,7 +1642,7 @@ case SourcePictTypeLinear: return false; default: - return true; + return precise; } } @@ -1678,12 +1678,12 @@ } static bool -source_fallback(PicturePtr p, PixmapPtr pixmap) +source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) { if (sna_picture_is_solid(p, NULL)) return false; - if (is_unhandled_gradient(p) || !gen2_check_repeat(p)) + if (is_unhandled_gradient(p, precise) || !gen2_check_repeat(p)) return true; if (pixmap && source_is_busy(pixmap)) @@ -1712,11 +1712,13 @@ dst_pixmap = get_drawable_pixmap(dst->pDrawable); src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; - src_fallback = source_fallback(src, src_pixmap); + src_fallback = source_fallback(src, src_pixmap, + dst->polyMode == PolyModePrecise); if (mask) { mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; - mask_fallback = source_fallback(mask, mask_pixmap); + mask_fallback = source_fallback(mask, mask_pixmap, + dst->polyMode == PolyModePrecise); } else { mask_pixmap = NULL; mask_fallback = NULL; diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen3_render.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen3_render.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen3_render.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen3_render.c 2013-07-01 14:03:55.000000000 +0000 @@ -3307,11 +3307,30 @@ } static bool -source_fallback(PicturePtr p, PixmapPtr pixmap) +is_unhandled_gradient(PicturePtr picture, bool precise) +{ + if (picture->pDrawable) + return false; + + switch (picture->pSourcePict->type) { + case SourcePictTypeSolidFill: + case SourcePictTypeLinear: + case SourcePictTypeRadial: + return false; + default: + return precise; + } +} + +static bool +source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) { if (sna_picture_is_solid(p, NULL)) return false; + if (is_unhandled_gradient(p, precise)) + return true; + if (!gen3_check_xformat(p) || !gen3_check_repeat(p)) return true; @@ -3342,11 +3361,13 @@ dst_pixmap = get_drawable_pixmap(dst->pDrawable); src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; - src_fallback = source_fallback(src, src_pixmap); + src_fallback = source_fallback(src, src_pixmap, + dst->polyMode == PolyModePrecise); if (mask) { mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; - mask_fallback = source_fallback(mask, mask_pixmap); + mask_fallback = source_fallback(mask, mask_pixmap, + dst->polyMode == PolyModePrecise); } else { mask_pixmap = NULL; mask_fallback = false; diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen4_render.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen4_render.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen4_render.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen4_render.c 2013-07-01 14:03:55.000000000 +0000 @@ -1635,14 +1635,14 @@ } static bool -check_gradient(PicturePtr picture) +check_gradient(PicturePtr picture, bool precise) { switch (picture->pSourcePict->type) { case SourcePictTypeSolidFill: case SourcePictTypeLinear: return false; default: - return true; + return precise; } } @@ -1679,13 +1679,13 @@ } static bool -source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap) +source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise) { if (sna_picture_is_solid(p, NULL)) return false; if (p->pSourcePict) - return check_gradient(p); + return check_gradient(p, precise); if (!gen4_check_repeat(p) || !gen4_check_format(p->format)) return true; @@ -1717,11 +1717,13 @@ dst_pixmap = get_drawable_pixmap(dst->pDrawable); src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; - src_fallback = source_fallback(sna, src, src_pixmap); + src_fallback = source_fallback(sna, src, src_pixmap, + dst->polyMode == PolyModePrecise); if (mask) { mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; - mask_fallback = source_fallback(sna, mask, mask_pixmap); + mask_fallback = source_fallback(sna, mask, mask_pixmap, + dst->polyMode == PolyModePrecise); } else { mask_pixmap = NULL; mask_fallback = false; diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen5_render.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen5_render.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen5_render.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen5_render.c 2013-07-01 14:03:55.000000000 +0000 @@ -1611,7 +1611,7 @@ } static bool -is_gradient(PicturePtr picture) +is_gradient(PicturePtr picture, bool precise) { if (picture->pDrawable) return false; @@ -1621,7 +1621,7 @@ case SourcePictTypeLinear: return false; default: - return true; + return precise; } } @@ -1658,12 +1658,12 @@ } static bool -source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap) +source_fallback(struct sna *sna, PicturePtr p, PixmapPtr pixmap, bool precise) { if (sna_picture_is_solid(p, NULL)) return false; - if (is_gradient(p) || + if (is_gradient(p, precise) || !gen5_check_repeat(p) || !gen5_check_format(p->format)) return true; @@ -1694,11 +1694,13 @@ dst_pixmap = get_drawable_pixmap(dst->pDrawable); src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; - src_fallback = source_fallback(sna, src, src_pixmap); + src_fallback = source_fallback(sna, src, src_pixmap, + dst->polyMode == PolyModePrecise); if (mask) { mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; - mask_fallback = source_fallback(sna, mask, mask_pixmap); + mask_fallback = source_fallback(sna, mask, mask_pixmap, + dst->polyMode == PolyModePrecise); } else { mask_pixmap = NULL; mask_fallback = false; diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen6_render.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen6_render.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen6_render.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen6_render.c 2013-07-01 14:03:55.000000000 +0000 @@ -1931,7 +1931,7 @@ } static bool -check_gradient(PicturePtr picture) +check_gradient(PicturePtr picture, bool precise) { if (picture->pDrawable) return false; @@ -1941,7 +1941,7 @@ case SourcePictTypeLinear: return false; default: - return true; + return precise; } } @@ -1974,13 +1974,13 @@ } static bool -source_fallback(PicturePtr p, PixmapPtr pixmap) +source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) { if (sna_picture_is_solid(p, NULL)) return false; if (p->pSourcePict) - return check_gradient(p); + return check_gradient(p, precise); if (!gen6_check_repeat(p) || !gen6_check_format(p->format)) return true; @@ -2011,11 +2011,13 @@ dst_pixmap = get_drawable_pixmap(dst->pDrawable); src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; - src_fallback = source_fallback(src, src_pixmap); + src_fallback = source_fallback(src, src_pixmap, + dst->polyMode == PolyModePrecise); if (mask) { mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; - mask_fallback = source_fallback(mask, mask_pixmap); + mask_fallback = source_fallback(mask, mask_pixmap, + dst->polyMode == PolyModePrecise); } else { mask_pixmap = NULL; mask_fallback = false; diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen7_render.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen7_render.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/gen7_render.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/gen7_render.c 2013-07-01 14:03:55.000000000 +0000 @@ -2138,7 +2138,7 @@ } static bool -check_gradient(PicturePtr picture) +check_gradient(PicturePtr picture, bool precise) { if (picture->pDrawable) return false; @@ -2148,7 +2148,7 @@ case SourcePictTypeLinear: return false; default: - return true; + return precise; } } @@ -2181,13 +2181,13 @@ } static bool -source_fallback(PicturePtr p, PixmapPtr pixmap) +source_fallback(PicturePtr p, PixmapPtr pixmap, bool precise) { if (sna_picture_is_solid(p, NULL)) return false; if (p->pSourcePict) - return check_gradient(p); + return check_gradient(p, precise); if (!gen7_check_repeat(p) || !gen7_check_format(p->format)) return true; @@ -2218,11 +2218,13 @@ dst_pixmap = get_drawable_pixmap(dst->pDrawable); src_pixmap = src->pDrawable ? get_drawable_pixmap(src->pDrawable) : NULL; - src_fallback = source_fallback(src, src_pixmap); + src_fallback = source_fallback(src, src_pixmap, + dst->polyMode == PolyModePrecise); if (mask) { mask_pixmap = mask->pDrawable ? get_drawable_pixmap(mask->pDrawable) : NULL; - mask_fallback = source_fallback(mask, mask_pixmap); + mask_fallback = source_fallback(mask, mask_pixmap, + dst->polyMode == PolyModePrecise); } else { mask_pixmap = NULL; mask_fallback = false; diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/kgem.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/kgem.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/kgem.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/kgem.c 2013-07-01 14:03:55.000000000 +0000 @@ -182,6 +182,23 @@ #define debug_alloc__bo(k, b) #endif +#ifndef NDEBUG +static void assert_tiling(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_get_tiling tiling; + + assert(bo); + + VG_CLEAR(tiling); + tiling.handle = bo->handle; + tiling.tiling_mode = -1; + (void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling); + assert(tiling.tiling_mode == bo->tiling); +} +#else +#define assert_tiling(kgem, bo) +#endif + static void kgem_sna_reset(struct kgem *kgem) { struct sna *sna = container_of(kgem, struct sna, kgem); @@ -968,15 +985,6 @@ { struct drm_i915_gem_get_tiling tiling; -#ifndef __x86_64__ - /* Between a register starved compiler emitting attrocious code - * and the extra overhead in the kernel for managing the tight - * 32-bit address space, unless we have a 64-bit system, - * using memcpy_to_tiled_x() is extremely slow. - */ - return; -#endif - if (kgem->gen < 050) /* bit17 swizzling :( */ return; @@ -991,7 +999,7 @@ if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling)) goto out; - choose_memcpy_to_tiled_x(kgem, tiling.swizzle_mode); + choose_memcpy_tiled_x(kgem, tiling.swizzle_mode); out: gem_close(kgem->fd, tiling.handle); } @@ -1205,8 +1213,8 @@ kgem->max_upload_tile_size = kgem->aperture_low; kgem->large_object_size = MAX_CACHE_SIZE; - if (kgem->large_object_size > kgem->max_gpu_size) - kgem->large_object_size = kgem->max_gpu_size; + if (kgem->large_object_size > half_gpu_max) + kgem->large_object_size = half_gpu_max; if (kgem->max_copy_tile_size > kgem->aperture_high/2) kgem->max_copy_tile_size = kgem->aperture_high/2; if (kgem->max_copy_tile_size > kgem->aperture_low) @@ -1573,6 +1581,7 @@ assert(!bo->flush); assert(!bo->needs_flush); assert(list_is_empty(&bo->vma)); + assert_tiling(kgem, bo); ASSERT_IDLE(kgem, bo->handle); kgem->need_expire = true; @@ -1835,6 +1844,7 @@ assert(bo->refcnt == 0); assert(!bo->purged); assert(bo->proxy == NULL); + assert_tiling(kgem, bo); bo->binding.offset = 0; @@ -3121,6 +3131,7 @@ list_del(&bo->request); bo->delta = 0; + assert_tiling(kgem, bo); return bo; discard: @@ -3203,6 +3214,7 @@ __FUNCTION__, bo->handle, num_pages(bo))); assert(use_active || bo->domain != DOMAIN_GPU); assert(!bo->needs_flush); + assert_tiling(kgem, bo); ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); return bo; } @@ -3291,6 +3303,7 @@ assert(list_is_empty(&bo->list)); assert(use_active || bo->domain != DOMAIN_GPU); assert(!bo->needs_flush || use_active); + assert_tiling(kgem, bo); ASSERT_MAYBE_IDLE(kgem, bo->handle, !use_active); return bo; } @@ -3621,6 +3634,7 @@ unsigned int size; assert(bo->tiling); + assert_tiling(kgem, bo); assert(kgem->gen < 040); if (kgem->gen < 030) @@ -3671,6 +3685,7 @@ assert(bo->delta); assert(!bo->purged); assert(!bo->flush); + assert_tiling(kgem, bo); if (size > num_pages(bo) || num_pages(bo) > 2*size) continue; @@ -3694,6 +3709,7 @@ DBG((" 1:from scanout: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3713,6 +3729,7 @@ assert(!bo->scanout); assert(bo->refcnt == 0); assert(bo->reusable); + assert_tiling(kgem, bo); if (kgem->gen < 040) { if (bo->pitch < pitch) { @@ -3745,6 +3762,7 @@ DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; bo->flush = true; return bo; @@ -3756,6 +3774,7 @@ assert(bo->refcnt == 0); assert(bo->reusable); assert(!bo->scanout); + assert_tiling(kgem, bo); if (size > num_pages(bo)) continue; @@ -3784,6 +3803,7 @@ DBG((" 1:from large inactive: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3809,6 +3829,7 @@ assert(bo->rq == NULL); assert(list_is_empty(&bo->request)); assert(bo->flush == false); + assert_tiling(kgem, bo); if (size > num_pages(bo)) { DBG(("inactive too small: %d < %d\n", @@ -3842,6 +3863,7 @@ assert(bo->domain != DOMAIN_GPU); ASSERT_IDLE(kgem, bo->handle); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3877,6 +3899,7 @@ assert(bo->tiling == tiling); assert(bo->flush == false); assert(!bo->scanout); + assert_tiling(kgem, bo); if (kgem->gen < 040) { if (bo->pitch < pitch) { @@ -3909,6 +3932,7 @@ DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3921,6 +3945,7 @@ assert(!bo->scanout); assert(bo->tiling == tiling); assert(bo->flush == false); + assert_tiling(kgem, bo); if (num_pages(bo) < size) continue; @@ -3933,6 +3958,7 @@ DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3951,6 +3977,7 @@ assert(bo->reusable); assert(!bo->scanout); assert(bo->flush == false); + assert_tiling(kgem, bo); if (num_pages(bo) < size) continue; @@ -3969,6 +3996,7 @@ DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -3992,6 +4020,7 @@ assert(bo->reusable); assert(!bo->scanout); assert(bo->flush == false); + assert_tiling(kgem, bo); if (bo->tiling) { if (bo->pitch < pitch) { @@ -4013,6 +4042,7 @@ DBG((" 1:from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", bo->pitch, bo->tiling, bo->handle, bo->unique_id)); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -4033,6 +4063,7 @@ assert(bo->reusable); assert(!bo->scanout); assert(bo->flush == false); + assert_tiling(kgem, bo); if (size > num_pages(bo)) { DBG(("inactive too small: %d < %d\n", @@ -4070,6 +4101,7 @@ assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU); ASSERT_MAYBE_IDLE(kgem, bo->handle, flags & CREATE_INACTIVE); assert(bo->pitch*kgem_aligned_height(kgem, height, bo->tiling) <= kgem_bo_size(bo)); + assert_tiling(kgem, bo); bo->refcnt = 1; return bo; } @@ -4124,6 +4156,7 @@ } assert(bytes(bo) >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling)); + assert_tiling(kgem, bo); debug_alloc__bo(kgem, bo); @@ -4154,6 +4187,7 @@ return bo; assert(bo->tiling == I915_TILING_NONE); + assert_tiling(kgem, bo); if (kgem_bo_map__cpu(kgem, bo) == NULL) { kgem_bo_destroy(kgem, bo); @@ -4175,6 +4209,7 @@ bo = search_snoop_cache(kgem, NUM_PAGES(size), 0); if (bo) { assert(bo->tiling == I915_TILING_NONE); + assert_tiling(kgem, bo); assert(bo->snoop); bo->refcnt = 1; bo->pitch = stride; @@ -4188,6 +4223,7 @@ return NULL; assert(bo->tiling == I915_TILING_NONE); + assert_tiling(kgem, bo); if (!gem_set_cacheing(kgem->fd, bo->handle, SNOOPED)) { kgem_bo_destroy(kgem, bo); @@ -4385,6 +4421,7 @@ if (kgem->aperture + num_pages(bo) > kgem->aperture_high) return false; + assert_tiling(kgem, bo); if (kgem->gen < 040 && bo->tiling != I915_TILING_NONE) { if (kgem->nfence >= kgem->fence_max) return false; @@ -4431,6 +4468,7 @@ if (needs_semaphore(kgem, bo)) return false; + assert_tiling(kgem, bo); num_pages += num_pages(bo); num_exec++; if (kgem->gen < 040 && bo->tiling) { @@ -4618,6 +4656,7 @@ assert(bo->proxy == NULL); assert(list_is_empty(&bo->list)); assert(!IS_USER_MAP(bo->map)); + assert_tiling(kgem, bo); if (bo->tiling == I915_TILING_NONE && !bo->scanout && kgem->has_llc) { DBG(("%s: converting request for GTT map into CPU map\n", @@ -4662,6 +4701,7 @@ assert(list_is_empty(&bo->list)); assert(!IS_USER_MAP(bo->map)); assert(bo->exec == NULL); + assert_tiling(kgem, bo); if (bo->tiling == I915_TILING_NONE && !bo->scanout && (kgem->has_llc || bo->domain == DOMAIN_CPU)) { @@ -4729,6 +4769,7 @@ assert(bo->exec == NULL); assert(list_is_empty(&bo->list)); assert(!IS_USER_MAP(bo->map)); + assert_tiling(kgem, bo); if (IS_CPU_MAP(bo->map)) kgem_bo_release_map(kgem, bo); diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/kgem.h xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/kgem.h --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/kgem.h 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/kgem.h 2013-07-01 14:03:55.000000000 +0000 @@ -201,6 +201,11 @@ int16_t src_x, int16_t src_y, int16_t dst_x, int16_t dst_y, uint16_t width, uint16_t height); + void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height); uint16_t reloc__self[256]; uint32_t batch[64*1024-8] page_aligned; @@ -559,6 +564,22 @@ return kgem_bo_size(bo) <= kgem->aperture_mappable / 4; } +static inline bool kgem_bo_can_map__cpu(struct kgem *kgem, + struct kgem_bo *bo, + bool write) +{ + if (bo->scanout) + return false; + + if (kgem->has_llc) + return true; + + if (bo->domain != DOMAIN_CPU) + return false; + + return !write || bo->exec == NULL; +} + static inline bool kgem_bo_is_snoop(struct kgem_bo *bo) { assert(bo->refcnt); @@ -713,6 +734,21 @@ width, height); } -void choose_memcpy_to_tiled_x(struct kgem *kgem, int swizzling); +static inline void +memcpy_from_tiled_x(struct kgem *kgem, + const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + return kgem->memcpy_from_tiled_x(src, dst, bpp, + src_stride, dst_stride, + src_x, src_y, + dst_x, dst_y, + width, height); +} + +void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling); #endif /* KGEM_H */ diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna.h xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna.h --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna.h 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna.h 2013-07-01 14:03:55.000000000 +0000 @@ -293,7 +293,6 @@ bool dri_available; bool dri_open; - char *deviceName; /* Broken-out options. */ OptionInfoPtr Options; diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_accel.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_accel.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_accel.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_accel.c 2013-07-01 14:03:55.000000000 +0000 @@ -77,6 +77,7 @@ #define ACCEL_FILL_SPANS 1 #define ACCEL_SET_SPANS 1 #define ACCEL_PUT_IMAGE 1 +#define ACCEL_GET_IMAGE 1 #define ACCEL_COPY_AREA 1 #define ACCEL_COPY_PLANE 1 #define ACCEL_COPY_WINDOW 1 @@ -422,6 +423,9 @@ static void sna_pixmap_free_gpu(struct sna *sna, struct sna_pixmap *priv) { assert(priv->gpu_damage == NULL || priv->gpu_bo); + + if (priv->cow) + sna_pixmap_undo_cow(sna, priv, 0); assert(priv->cow == NULL); sna_damage_destroy(&priv->gpu_damage); @@ -1402,7 +1406,8 @@ } static inline bool has_coherent_map(struct sna *sna, - struct kgem_bo *bo) + struct kgem_bo *bo, + unsigned flags) { assert(bo->map); @@ -1412,7 +1417,7 @@ if (bo->tiling != I915_TILING_NONE) return false; - return bo->domain == DOMAIN_CPU || sna->kgem.has_llc; + return kgem_bo_can_map__cpu(&sna->kgem, bo, flags & MOVE_WRITE); } static inline bool has_coherent_ptr(struct sna_pixmap *priv) @@ -1436,7 +1441,7 @@ static inline bool pixmap_inplace(struct sna *sna, PixmapPtr pixmap, struct sna_pixmap *priv, - bool write_only) + unsigned flags) { if (FORCE_INPLACE) return FORCE_INPLACE > 0; @@ -1445,9 +1450,9 @@ return false; if (priv->mapped) - return has_coherent_map(sna, priv->gpu_bo); + return has_coherent_map(sna, priv->gpu_bo, flags); - if (!write_only && priv->cpu_damage) + if (flags & MOVE_READ && priv->cpu_damage) return false; return (pixmap->devKind * pixmap->drawable.height >> 12) > @@ -1577,7 +1582,6 @@ flags)); assert(priv->gpu_bo == cow->bo); - assert(!priv->mapped); assert(cow->refcnt); list_del(&priv->cow_list); @@ -1669,6 +1673,10 @@ assert(priv->gpu_bo); kgem_bo_destroy(&sna->kgem, priv->gpu_bo); priv->gpu_bo = bo; + if (priv->gpu_bo == NULL && priv->mapped) { + priv->pixmap->devPrivate.ptr = NULL; + priv->mapped = false; + } } priv->cow = NULL; @@ -1785,6 +1793,12 @@ return true; } + if (priv->create & KGEM_CAN_CREATE_LARGE) { + DBG(("%s: large object, has GPU? %d\n", + __FUNCTION__, priv->gpu_bo)); + return priv->gpu_bo != NULL; + } + if (flags & MOVE_WRITE && priv->gpu_bo&&kgem_bo_is_busy(priv->gpu_bo)) { DBG(("%s: no, GPU is busy, so stage write\n", __FUNCTION__)); return false; @@ -1848,7 +1862,7 @@ if (!priv->mapped) goto skip_inplace_map; - assert(has_coherent_map(sna, priv->gpu_bo)); + assert(has_coherent_map(sna, priv->gpu_bo, flags)); pixmap->devKind = priv->gpu_bo->pitch; assert(priv->gpu_bo->proxy == NULL); @@ -1896,7 +1910,7 @@ assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL); if (operate_inplace(priv, flags) && - pixmap_inplace(sna, pixmap, priv, (flags & MOVE_READ) == 0) && + pixmap_inplace(sna, pixmap, priv, flags) && sna_pixmap_create_mappable_gpu(pixmap, (flags & MOVE_READ) == 0)) { DBG(("%s: try to operate inplace (GTT)\n", __FUNCTION__)); assert(priv->cow == NULL || (flags & MOVE_WRITE) == 0); @@ -1908,7 +1922,7 @@ pixmap->devPrivate.ptr = kgem_bo_map(&sna->kgem, priv->gpu_bo); priv->mapped = pixmap->devPrivate.ptr != NULL; if (priv->mapped) { - assert(has_coherent_map(sna, priv->gpu_bo)); + assert(has_coherent_map(sna, priv->gpu_bo, flags)); pixmap->devKind = priv->gpu_bo->pitch; if (flags & MOVE_WRITE) { assert(priv->gpu_bo->proxy == NULL); @@ -1936,7 +1950,7 @@ } if (priv->gpu_damage && priv->cpu_damage == NULL && !priv->cow && - (flags & MOVE_READ || priv->gpu_bo->domain == DOMAIN_CPU || sna->kgem.has_llc) && + (flags & MOVE_READ || kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, flags & MOVE_WRITE)) && priv->gpu_bo->tiling == I915_TILING_NONE && ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == 0 || !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))) { @@ -1999,7 +2013,7 @@ if (priv->clear_color == 0 || pixmap->drawable.bitsPerPixel == 8 || - priv->clear_color == (1 << pixmap->drawable.bitsPerPixel) - 1) { + priv->clear_color == (1 << pixmap->drawable.depth) - 1) { memset(pixmap->devPrivate.ptr, priv->clear_color, pixmap->devKind * pixmap->drawable.height); } else { @@ -2058,8 +2072,6 @@ pixmap->drawable.width, pixmap->drawable.height); assert(priv->gpu_damage == NULL); - if (priv->cow) - sna_pixmap_undo_cow(sna, priv, 0); sna_pixmap_free_gpu(sna, priv); if (priv->flush) { @@ -2072,14 +2084,14 @@ if (flags & MOVE_WRITE) { assert(DAMAGE_IS_ALL(priv->cpu_damage)); assert(priv->gpu_damage == NULL); + assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL); if (priv->cow) sna_pixmap_undo_cow(sna, priv, 0); - priv->source_count = SOURCE_BIAS; - assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL); - if (priv->gpu_bo && priv->gpu_bo->domain != DOMAIN_GPU) { - DBG(("%s: discarding inactive GPU bo\n", __FUNCTION__)); + if (priv->gpu_bo && priv->gpu_bo->rq == NULL) { + DBG(("%s: discarding idle GPU bo\n", __FUNCTION__)); sna_pixmap_free_gpu(sna, priv); } + priv->source_count = SOURCE_BIAS; } if (priv->cpu_bo) { @@ -2091,10 +2103,6 @@ assert(pixmap->devPrivate.ptr == (void *)((unsigned long)priv->cpu_bo->map & ~3)); assert((flags & MOVE_WRITE) == 0 || !kgem_bo_is_busy(priv->cpu_bo)); } - if (flags & MOVE_WRITE) { - DBG(("%s: discarding GPU bo in favour of CPU bo\n", __FUNCTION__)); - sna_pixmap_free_gpu(sna, priv); - } } priv->cpu = (flags & (MOVE_INPLACE_HINT | MOVE_ASYNC_HINT)) == 0 && @@ -2151,7 +2159,7 @@ PixmapPtr pixmap, RegionPtr region, struct sna_pixmap *priv, - bool write_only) + unsigned flags) { assert_pixmap_damage(pixmap); @@ -2161,7 +2169,7 @@ if (wedged(sna) && !priv->pinned) return false; - if ((priv->cpu || !write_only) && + if ((priv->cpu || flags & MOVE_READ) && region_overlaps_damage(region, priv->cpu_damage, 0, 0)) { DBG(("%s: no, uncovered CPU damage pending\n", __FUNCTION__)); return false; @@ -2174,7 +2182,7 @@ if (priv->mapped) { DBG(("%s: yes, already mapped, continuiung\n", __FUNCTION__)); - return has_coherent_map(sna, priv->gpu_bo); + return has_coherent_map(sna, priv->gpu_bo, flags); } if (priv->flush) { @@ -2247,6 +2255,7 @@ pixmap->drawable.height)) { DBG(("%s: pixmap=%ld all damaged on CPU\n", __FUNCTION__, pixmap->drawable.serialNumber)); + assert(!priv->clear); sna_damage_destroy(&priv->gpu_damage); @@ -2261,8 +2270,9 @@ } if (USE_INPLACE && - (flags & (MOVE_READ | MOVE_ASYNC_HINT)) == 0 && - (priv->flush || box_inplace(pixmap, ®ion->extents))) { + (priv->create & KGEM_CAN_CREATE_LARGE || + ((flags & (MOVE_READ | MOVE_ASYNC_HINT)) == 0 && + (priv->flush || box_inplace(pixmap, ®ion->extents))))) { DBG(("%s: marking for inplace hint (%d, %d)\n", __FUNCTION__, priv->flush, box_inplace(pixmap, ®ion->extents))); flags |= MOVE_INPLACE_HINT; @@ -2289,7 +2299,7 @@ } if (operate_inplace(priv, flags) && - region_inplace(sna, pixmap, region, priv, (flags & MOVE_READ) == 0) && + region_inplace(sna, pixmap, region, priv, flags) && sna_pixmap_create_mappable_gpu(pixmap, false)) { DBG(("%s: try to operate inplace\n", __FUNCTION__)); assert(priv->cow == NULL || (flags & MOVE_WRITE) == 0); @@ -2300,10 +2310,11 @@ pixmap->devPrivate.ptr = kgem_bo_map(&sna->kgem, priv->gpu_bo); priv->mapped = pixmap->devPrivate.ptr != NULL; if (priv->mapped) { - assert(has_coherent_map(sna, priv->gpu_bo)); + assert(has_coherent_map(sna, priv->gpu_bo, flags)); pixmap->devKind = priv->gpu_bo->pitch; if (flags & MOVE_WRITE) { if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + assert(!priv->clear); sna_damage_add(&priv->gpu_damage, region); if (sna_damage_is_all(&priv->gpu_damage, pixmap->drawable.width, @@ -2346,23 +2357,25 @@ sna_damage_contains_box__no_reduce(priv->gpu_damage, ®ion->extents)) && priv->gpu_bo->tiling == I915_TILING_NONE && - (priv->gpu_bo->domain == DOMAIN_CPU || sna->kgem.has_llc) && + kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, flags & MOVE_WRITE) && ((flags & (MOVE_WRITE | MOVE_ASYNC_HINT)) == 0 || !__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))) { - DBG(("%s: try to operate inplace (CPU)\n", __FUNCTION__)); + DBG(("%s: try to operate inplace (CPU), read? %d, write? %d\n", + __FUNCTION__, !!(flags & MOVE_READ), !!(flags & MOVE_WRITE))); assert(priv->cow == NULL || (flags & MOVE_WRITE) == 0); assert(!priv->mapped); pixmap->devPrivate.ptr = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); if (pixmap->devPrivate.ptr != NULL) { - assert(has_coherent_map(sna, priv->gpu_bo)); + assert(has_coherent_map(sna, priv->gpu_bo, flags)); assert(IS_CPU_MAP(priv->gpu_bo->map)); pixmap->devKind = priv->gpu_bo->pitch; priv->cpu = true; priv->mapped = true; if (flags & MOVE_WRITE) { if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + assert(!priv->clear); sna_damage_add(&priv->gpu_damage, region); if (sna_damage_is_all(&priv->gpu_damage, pixmap->drawable.width, @@ -2423,6 +2436,8 @@ int n = RegionNumRects(region); BoxPtr box = RegionRects(region); + assert(DAMAGE_IS_ALL(priv->gpu_damage)); + DBG(("%s: pending clear, doing partial fill\n", __FUNCTION__)); if (priv->cpu_bo) { DBG(("%s: syncing CPU bo\n", __FUNCTION__)); @@ -2673,11 +2688,8 @@ pixmap->drawable.width, pixmap->drawable.height); if (DAMAGE_IS_ALL(priv->cpu_damage)) { - if (priv->gpu_bo) { - DBG(("%s: replaced entire pixmap\n", - __FUNCTION__)); - sna_pixmap_free_gpu(sna, priv); - } + DBG(("%s: replaced entire pixmap\n", __FUNCTION__)); + sna_pixmap_free_gpu(sna, priv); } if (priv->flush) { assert(!priv->shm); @@ -2695,6 +2707,7 @@ assert(priv->gpu_bo == NULL || priv->gpu_bo->proxy == NULL); assert(priv->gpu_bo || priv->gpu_damage == NULL); assert(!priv->flush || !list_is_empty(&priv->flush_list)); + assert(!priv->clear); } if ((flags & MOVE_ASYNC_HINT) == 0 && priv->cpu_bo) { DBG(("%s: syncing cpu bo\n", __FUNCTION__)); @@ -2938,11 +2951,11 @@ box, n, 0); } if (!ok) { - assert(!priv->mapped); - if (pixmap->devPrivate.ptr == NULL) { + if (priv->mapped || pixmap->devPrivate.ptr == NULL) { assert(priv->ptr && priv->stride); pixmap->devPrivate.ptr = PTR(priv->ptr); pixmap->devKind = priv->stride; + priv->mapped = false; } if (n == 1 && !priv->pinned && box->x1 <= 0 && box->y1 <= 0 && @@ -2977,11 +2990,11 @@ box, 1, 0); } if (!ok) { - assert(!priv->mapped); - if (pixmap->devPrivate.ptr == NULL) { + if (priv->mapped || pixmap->devPrivate.ptr == NULL) { assert(priv->ptr && priv->stride); pixmap->devPrivate.ptr = PTR(priv->ptr); pixmap->devKind = priv->stride; + priv->mapped = false; } ok = sna_write_boxes(sna, pixmap, priv->gpu_bo, 0, 0, @@ -3008,11 +3021,11 @@ box, n, 0); } if (!ok) { - assert(!priv->mapped); - if (pixmap->devPrivate.ptr == NULL) { + if (priv->mapped || pixmap->devPrivate.ptr == NULL) { assert(priv->ptr && priv->stride); pixmap->devPrivate.ptr = PTR(priv->ptr); pixmap->devKind = priv->stride; + priv->mapped = false; } ok = sna_write_boxes(sna, pixmap, priv->gpu_bo, 0, 0, @@ -3938,16 +3951,17 @@ struct kgem_bo *bo = priv->gpu_bo; assert(bo); - if (priv->cow) - return false; - - if (bo->tiling != I915_TILING_X) + if (priv->cow) { + DBG(("%s: no, has COW\n", __FUNCTION__)); return false; + } - if (bo->scanout) + if (bo->tiling != I915_TILING_X) { + DBG(("%s: no, uses %d tiling\n", __FUNCTION__, bo->tiling)); return false; + } - return bo->domain == DOMAIN_CPU || kgem->has_llc; + return kgem_bo_can_map__cpu(kgem, bo, true); } static bool @@ -4012,8 +4026,6 @@ if (priv->gpu_bo && (replaces || priv->gpu_bo->proxy)) { DBG(("%s: discarding cached upload proxy\n", __FUNCTION__)); - if (priv->cow) - sna_pixmap_undo_cow(sna, priv, 0); sna_pixmap_free_gpu(sna, priv); } @@ -4025,10 +4037,11 @@ return false; assert(priv->gpu_bo->tiling == I915_TILING_X); - if (__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) + if ((priv->create & KGEM_CAN_CREATE_LARGE) == 0 && + __kgem_bo_is_busy(&sna->kgem, priv->gpu_bo)) return false; - dst = __kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); + dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); if (dst == NULL) return false; @@ -4048,9 +4061,9 @@ box->x2 - box->x1, box->y2 - box->y1); box++; } while (--n); - __kgem_bo_unmap__cpu(&sna->kgem, priv->gpu_bo, dst); if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + assert(!priv->clear); if (replaces) { sna_damage_all(&priv->gpu_damage, pixmap->drawable.width, @@ -4740,6 +4753,7 @@ } if (!DAMAGE_IS_ALL(priv->gpu_damage)) { + assert(!priv->clear); RegionTranslate(region, tx, ty); sna_damage_add(&priv->gpu_damage, region); } @@ -4945,7 +4959,7 @@ hint = source_prefer_gpu(sna, src_priv, region, src_dx, src_dy) ?: region_inplace(sna, dst_pixmap, region, - dst_priv, alu_overwrites(alu)); + dst_priv, alu_overwrites(alu) ? MOVE_WRITE : MOVE_READ | MOVE_WRITE); if (dst_priv->cpu_damage && alu_overwrites(alu)) { DBG(("%s: overwritting CPU damage\n", __FUNCTION__)); if (region_subsumes_damage(region, dst_priv->cpu_damage)) { @@ -4977,7 +4991,7 @@ ®ion->extents, &damage); if (bo) { if (src_priv && src_priv->clear) { - DBG(("%s: applying src clear[%08x] to dst\n", + DBG(("%s: applying src clear [%08x] to dst\n", __FUNCTION__, src_priv->clear_color)); if (n == 1) { if (replaces) @@ -4995,6 +5009,10 @@ } if (replaces && bo == dst_priv->gpu_bo) { + DBG(("%s: marking dst handle=%d as all clear [%08x]\n", + __FUNCTION__, + dst_priv->gpu_bo->handle, + src_priv->clear_color)); dst_priv->clear = true; dst_priv->clear_color = src_priv->clear_color; sna_damage_all(&dst_priv->gpu_damage, @@ -5292,8 +5310,10 @@ assert(dst_priv->clear == false); dst_priv->cpu = false; if (damage) { + assert(!dst_priv->clear); assert(dst_priv->gpu_bo); assert(dst_priv->gpu_bo->proxy == NULL); + assert(*damage == dst_priv->gpu_damage); if (replaces) { sna_damage_destroy(&dst_priv->cpu_damage); sna_damage_all(&dst_priv->gpu_damage, @@ -14259,12 +14279,11 @@ } static bool -sna_get_image_blt(DrawablePtr drawable, +sna_get_image_blt(PixmapPtr pixmap, RegionPtr region, char *dst, unsigned flags) { - PixmapPtr pixmap = get_drawable_pixmap(drawable); struct sna_pixmap *priv = sna_pixmap(pixmap); struct sna *sna = to_sna_from_pixmap(pixmap); struct kgem_bo *dst_bo; @@ -14278,10 +14297,17 @@ int w = region->extents.x2 - region->extents.x1; int h = region->extents.y2 - region->extents.y1; + DBG(("%s: applying clear [%08x]\n", + __FUNCTION__, priv->clear_color)); + assert(DAMAGE_IS_ALL(priv->gpu_damage)); + assert(priv->cpu_damage == NULL); + pitch = PixmapBytePad(w, pixmap->drawable.depth); if (priv->clear_color == 0 || pixmap->drawable.bitsPerPixel == 8 || - priv->clear_color == (1U << pixmap->drawable.bitsPerPixel) - 1) { + priv->clear_color == (1U << pixmap->drawable.depth) - 1) { + DBG(("%s: memset clear [%02x]\n", + __FUNCTION__, priv->clear_color & 0xff)); memset(dst, priv->clear_color, pitch * h); } else { pixman_fill((uint32_t *)dst, @@ -14316,21 +14342,17 @@ DBG(("%s: download through a temporary map\n", __FUNCTION__)); pitch = PixmapBytePad(region->extents.x2 - region->extents.x1, - drawable->depth); + pixmap->drawable.depth); dst_bo = kgem_create_map(&sna->kgem, dst, pitch * (region->extents.y2 - region->extents.y1), false); if (dst_bo) { - int16_t dx, dy; - dst_bo->flush = true; dst_bo->pitch = pitch; kgem_bo_mark_unreusable(dst_bo); - get_drawable_deltas(drawable, pixmap, &dx, &dy); - ok = sna->render.copy_boxes(sna, GXcopy, - pixmap, priv->gpu_bo, dx, dy, + pixmap, priv->gpu_bo, 0, 0, pixmap, dst_bo, -region->extents.x1, -region->extents.y1, @@ -14345,6 +14367,71 @@ return ok; } +static bool +sna_get_image_inplace(PixmapPtr pixmap, + RegionPtr region, + char *dst, + unsigned flags) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + struct sna *sna = to_sna_from_pixmap(pixmap); + char *src; + + if (priv == NULL || priv->gpu_bo == NULL) + return false; + + switch (priv->gpu_bo->tiling) { + case I915_TILING_Y: + return false; + case I915_TILING_X: + if (!sna->kgem.memcpy_from_tiled_x) + return false; + default: + break; + } + + if (!kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, false)) + return false; + + if (priv->gpu_damage == NULL || + !(DAMAGE_IS_ALL(priv->gpu_damage) || + sna_damage_contains_box__no_reduce(priv->gpu_damage, + ®ion->extents))) + return false; + + src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo); + if (src == NULL) + return false; + + kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC); + + if (priv->gpu_bo->tiling) { + DBG(("%s: download through a tiled CPU map\n", __FUNCTION__)); + memcpy_from_tiled_x(&sna->kgem, src, dst, + pixmap->drawable.bitsPerPixel, + priv->gpu_bo->pitch, + PixmapBytePad(region->extents.x2 - region->extents.x1, + pixmap->drawable.depth), + region->extents.x1, region->extents.y1, + 0, 0, + region->extents.x2 - region->extents.x1, + region->extents.y2 - region->extents.y1); + } else { + DBG(("%s: download through a linear CPU map\n", __FUNCTION__)); + memcpy_blt(src, dst, + pixmap->drawable.bitsPerPixel, + priv->gpu_bo->pitch, + PixmapBytePad(region->extents.x2 - region->extents.x1, + pixmap->drawable.depth), + region->extents.x1, region->extents.y1, + 0, 0, + region->extents.x2 - region->extents.x1, + region->extents.y2 - region->extents.y1); + } + + return true; +} + static void sna_get_image(DrawablePtr drawable, int x, int y, int w, int h, @@ -14353,22 +14440,14 @@ { RegionRec region; unsigned int flags; - bool can_blt; if (!fbDrawableEnabled(drawable)) return; - DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); - - region.extents.x1 = x + drawable->x; - region.extents.y1 = y + drawable->y; - region.extents.x2 = region.extents.x1 + w; - region.extents.y2 = region.extents.y1 + h; - region.data = NULL; - - can_blt = format == ZPixmap && - drawable->bitsPerPixel >= 8 && - PM_IS_SOLID(drawable, mask); + DBG(("%s: pixmap=%ld (%d, %d)x(%d, %d), format=%d, mask=%lx, depth=%d\n", + __FUNCTION__, + (long)get_drawable_pixmap(drawable)->drawable.serialNumber, + x, y, w, h, format, mask, drawable->depth)); flags = MOVE_READ; if ((w | h) == 1) @@ -14376,29 +14455,49 @@ if (w == drawable->width) flags |= MOVE_WHOLE_HINT; - if (can_blt && sna_get_image_blt(drawable, ®ion, dst, flags)) - return; - - if (!sna_drawable_move_region_to_cpu(drawable, ®ion, flags)) - return; - - if (can_blt) { + if (ACCEL_GET_IMAGE && + !FORCE_FALLBACK && + format == ZPixmap && + drawable->bitsPerPixel >= 8 && + PM_IS_SOLID(drawable, mask)) { PixmapPtr pixmap = get_drawable_pixmap(drawable); int16_t dx, dy; + get_drawable_deltas(drawable, pixmap, &dx, &dy); + region.extents.x1 = x + drawable->x + dx; + region.extents.y1 = y + drawable->y + dy; + region.extents.x2 = region.extents.x1 + w; + region.extents.y2 = region.extents.y1 + h; + region.data = NULL; + + if (sna_get_image_blt(pixmap, ®ion, dst, flags)) + return; + + if (sna_get_image_inplace(pixmap, ®ion, dst, flags)) + return; + + if (!sna_drawable_move_region_to_cpu(&pixmap->drawable, + ®ion, flags)) + return; + DBG(("%s: copy box (%d, %d), (%d, %d)\n", __FUNCTION__, region.extents.x1, region.extents.y1, region.extents.x2, region.extents.y2)); - get_drawable_deltas(drawable, pixmap, &dx, &dy); assert(has_coherent_ptr(sna_pixmap(pixmap))); memcpy_blt(pixmap->devPrivate.ptr, dst, drawable->bitsPerPixel, pixmap->devKind, PixmapBytePad(w, drawable->depth), - region.extents.x1 + dx, - region.extents.y1 + dy, - 0, 0, w, h); - } else - fbGetImage(drawable, x, y, w, h, format, mask, dst); + region.extents.x1, region.extents.y1, 0, 0, w, h); + } else { + region.extents.x1 = x + drawable->x; + region.extents.y1 = y + drawable->y; + region.extents.x2 = region.extents.x1 + w; + region.extents.y2 = region.extents.y1 + h; + region.data = NULL; + + if (sna_drawable_move_region_to_cpu(drawable, ®ion, flags)) + fbGetImage(drawable, x, y, w, h, format, mask, dst); + } } static void diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_blt.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_blt.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_blt.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_blt.c 2013-07-01 14:03:55.000000000 +0000 @@ -1537,11 +1537,7 @@ assert(op->dst.bo); assert(kgem_bo_can_blt(&sna->kgem, op->dst.bo)); - - if (!kgem_bo_can_blt(&sna->kgem, bo)) { - DBG(("%s: fallback -- can't blt from source\n", __FUNCTION__)); - return false; - } + assert(kgem_bo_can_blt(&sna->kgem, bo)); if (!kgem_check_many_bo_fenced(&sna->kgem, op->dst.bo, bo, NULL)) { kgem_submit(&sna->kgem); @@ -1919,16 +1915,8 @@ struct sna_composite_op *op, uint32_t alpha_fixup) { - PixmapPtr src = op->u.blt.src_pixmap; - DBG(("%s\n", __FUNCTION__)); - if (!sna_pixmap_move_to_cpu(src, MOVE_READ)) - return false; - - assert(src->devKind); - assert(src->devPrivate.ptr); - if (op->dst.bo) { assert(op->dst.bo == sna_pixmap(op->dst.pixmap)->gpu_bo); if (alpha_fixup) { @@ -1958,15 +1946,6 @@ return true; } -static bool source_is_gpu(PixmapPtr pixmap, const BoxRec *box) -{ - struct sna_pixmap *priv = sna_pixmap(pixmap); - if (priv == NULL) - return false; - return sna_damage_contains_box(priv->gpu_damage, - box) != PIXMAN_REGION_OUT; -} - #define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format), \ PICT_FORMAT_TYPE(format), \ 0, \ @@ -2043,10 +2022,13 @@ return prepare_blt_nop(sna, tmp); hint = 0; - if (can_render(sna)) + if (can_render(sna)) { hint |= PREFER_GPU; - if (dst->pCompositeClip->data == NULL) - hint |= IGNORE_CPU; + if (sna_pixmap(tmp->dst.pixmap)->gpu_bo) + hint |= FORCE_GPU; + if (dst->pCompositeClip->data == NULL) + hint |= IGNORE_CPU; + } tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &dst_box, &tmp->damage); if (tmp->dst.bo && !kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { @@ -2086,11 +2068,17 @@ color = get_solid_color(src, tmp->dst.format); fill: + if (color == 0) + goto clear; + hint = 0; - if (can_render(sna)) + if (can_render(sna)) { hint |= PREFER_GPU; - if (dst->pCompositeClip->data == NULL) - hint |= IGNORE_CPU; + if (sna_pixmap(tmp->dst.pixmap)->gpu_bo) + hint |= FORCE_GPU; + if (dst->pCompositeClip->data == NULL) + hint |= IGNORE_CPU; + } tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &dst_box, &tmp->damage); if (tmp->dst.bo && !kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { @@ -2144,7 +2132,7 @@ if ((x >= src->pDrawable->width || y >= src->pDrawable->height || - x + width <= 0 || + x + width <= 0 || y + height <= 0) && (!src->repeat || src->repeatType == RepeatNone)) { DBG(("%s: source is outside of valid area, converting to clear\n", @@ -2173,7 +2161,7 @@ /* XXX tiling? fixup extend none? */ if (x < 0 || y < 0 || - x + width > src->pDrawable->width || + x + width > src->pDrawable->width || y + height > src->pDrawable->height) { DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d, repeat=%d\n", __FUNCTION__, @@ -2211,78 +2199,88 @@ __FUNCTION__, tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup)); - ret = false; src_box.x1 = x; src_box.y1 = y; src_box.x2 = x + width; src_box.y2 = y + height; - bo = NULL; + bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true); + if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) { + DBG(("%s: can not blit from src size=%dx%d, tiling? %d, pitch? %d\n", + __FUNCTION__, + src_pixmap->drawable.width < sna->render.max_3d_size, + src_pixmap->drawable.height < sna->render.max_3d_size, + bo->tiling, bo->pitch)); + + if (src_pixmap->drawable.width <= sna->render.max_3d_size && + src_pixmap->drawable.height <= sna->render.max_3d_size && + bo->pitch <= sna->render.max_3d_pitch) + { + return false; + } + + bo = NULL; + } hint = 0; - if (can_render(sna)) + if (bo || can_render(sna)) { hint |= PREFER_GPU; - if (dst->pCompositeClip->data == NULL) - hint |= IGNORE_CPU; - if (source_is_gpu(src_pixmap, &src_box)) - hint |= FORCE_GPU; - + if (dst->pCompositeClip->data == NULL) + hint |= IGNORE_CPU; + if (bo) + hint |= FORCE_GPU; + } tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint, &dst_box, &tmp->damage); - if (tmp->dst.bo && !kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { - DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n", - __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch)); - return false; - } - if (tmp->dst.bo || source_is_gpu(src_pixmap, &src_box)) - bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true); + ret = false; if (bo) { - if (!tmp->dst.bo) - tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, - FORCE_GPU | PREFER_GPU, - &dst_box, - &tmp->damage); - if (!tmp->dst.bo) { DBG(("%s: fallback -- unaccelerated read back\n", __FUNCTION__)); + if (!kgem_bo_is_busy(bo)) + goto put; } else if (bo->snoop && tmp->dst.bo->snoop) { DBG(("%s: fallback -- can not copy between snooped bo\n", __FUNCTION__)); + goto put; + } else if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) { + DBG(("%s: fallback -- unaccelerated upload\n", + __FUNCTION__)); + if (!kgem_bo_is_busy(tmp->dst.bo) && + !kgem_bo_is_busy(bo)) + goto put; } else { ret = prepare_blt_copy(sna, tmp, bo, alpha_fixup); if (fallback && !ret) goto put; } } else { + RegionRec region; + put: - if (tmp->dst.bo) { - struct sna_pixmap *priv = sna_pixmap(tmp->dst.pixmap); - if (tmp->dst.bo == priv->cpu_bo) { - assert(kgem_bo_is_busy(tmp->dst.bo)); - tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, - FORCE_GPU | PREFER_GPU, - &dst_box, - &tmp->damage); - if (tmp->dst.bo == priv->cpu_bo) { - DBG(("%s: forcing the stall to overwrite a busy CPU bo\n", __FUNCTION__)); - tmp->dst.bo = NULL; - tmp->damage = NULL; - } - } + if (tmp->dst.bo == sna_pixmap(tmp->dst.pixmap)->cpu_bo) { + tmp->dst.bo = NULL; + tmp->damage = NULL; } if (tmp->dst.bo == NULL) { - RegionRec region; + hint = MOVE_INPLACE_HINT | MOVE_WRITE; + if (dst->pCompositeClip->data) + hint |= MOVE_READ; region.extents = dst_box; region.data = NULL; - - if (!sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, - MOVE_INPLACE_HINT | MOVE_READ | MOVE_WRITE)) + if (!sna_drawable_move_region_to_cpu(dst->pDrawable, + ®ion, hint)) return false; } + region.extents = src_box; + region.data = NULL; + if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable, + ®ion, MOVE_READ)) + return false; + ret = prepare_blt_put(sna, tmp, alpha_fixup); } diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_damage.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_damage.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_damage.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_damage.c 2013-07-01 14:03:55.000000000 +0000 @@ -1332,18 +1332,59 @@ } #endif +static bool box_overlaps(const BoxRec *a, const BoxRec *b) +{ + return (a->x1 < b->x2 && a->x2 > b->x1 && + a->y1 < b->y2 && a->y2 > b->y1); +} + bool _sna_damage_contains_box__no_reduce(const struct sna_damage *damage, const BoxRec *box) { + struct sna_damage_box *iter; + int ret; + assert(damage && damage->mode != DAMAGE_ALL); - if (damage->mode == DAMAGE_SUBTRACT) + if (!sna_damage_overlaps_box(damage, box)) return false; - if (!sna_damage_overlaps_box(damage, box)) + ret = pixman_region_contains_rectangle(&damage->region, (BoxPtr)box); + if (!damage->dirty) + return ret == PIXMAN_REGION_IN; + + if (damage->mode == DAMAGE_ADD) { + if (ret == PIXMAN_REGION_IN) + return true; + + list_for_each_entry(iter, &damage->embedded_box.list, list) { + BoxPtr b; + int n; + + b = (BoxPtr)(iter + 1); + for (n = 0; n < iter->size; n++) { + if (box_contains(&b[n], box)) + return true; + } + } + return false; + } else { + if (ret != PIXMAN_REGION_IN) + return false; + + list_for_each_entry(iter, &damage->embedded_box.list, list) { + BoxPtr b; + int n; + + b = (BoxPtr)(iter + 1); + for (n = 0; n < iter->size; n++) { + if (box_overlaps(&b[n], box)) + return false; + } + } - return pixman_region_contains_rectangle((RegionPtr)&damage->region, - (BoxPtr)box) == PIXMAN_REGION_IN; + return true; + } } static bool __sna_damage_intersect(struct sna_damage *damage, diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_dri.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_dri.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_dri.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_dri.c 2013-07-01 14:03:55.000000000 +0000 @@ -2445,11 +2445,10 @@ return false; } - sna->deviceName = drmGetDeviceNameFromFd(sna->kgem.fd); memset(&info, '\0', sizeof(info)); info.fd = sna->kgem.fd; info.driverName = dri_driver_name(sna); - info.deviceName = sna->deviceName; + info.deviceName = intel_get_device_name(sna->scrn); DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n", __FUNCTION__, info.driverName, sna->kgem.gen, info.deviceName)); @@ -2487,5 +2486,4 @@ { DBG(("%s()\n", __FUNCTION__)); DRI2CloseScreen(screen); - drmFree(sna->deviceName); } diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_driver.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_driver.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_driver.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_driver.c 2013-07-01 14:03:55.000000000 +0000 @@ -734,6 +734,8 @@ DBG(("%s\n", __FUNCTION__)); + /* XXX Note that we will leak kernel resources if !vtSema */ + xf86_hide_cursors(scrn); sna_uevent_fini(scrn); @@ -749,8 +751,10 @@ sna->front = NULL; } - drmDropMaster(sna->kgem.fd); - scrn->vtSema = FALSE; + if (scrn->vtSema) { + intel_put_master(scrn); + scrn->vtSema = FALSE; + } xf86_cursors_fini(screen); @@ -773,7 +777,6 @@ free(depths); free(screen->visuals); - intel_put_master(xf86ScreenToScrn(screen)); return TRUE; } diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_gradient.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_gradient.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_gradient.c 2013-05-29 15:53:40.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_gradient.c 2013-07-01 14:03:55.000000000 +0000 @@ -34,6 +34,19 @@ #define xFixedToDouble(f) pixman_fixed_to_double(f) +bool +sna_gradient_is_opaque(const PictGradient *gradient) +{ + int n; + + for (n = 0; n < gradient->nstops; n++) { + if (gradient->stops[n].color.alpha < 0xff00) + return false; + } + + return true; +} + static int sna_gradient_sample_width(PictGradient *gradient) { diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_io.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_io.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_io.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_io.c 2013-07-01 14:03:55.000000000 +0000 @@ -53,6 +53,52 @@ upload_too_large(sna, width, height)); } +static bool bo_inplace_tiled(struct kgem *kgem, struct kgem_bo *bo, bool write) +{ + if (bo->tiling != I915_TILING_X) + return false; + + return kgem_bo_can_map__cpu(kgem, bo, write); +} + +static bool download_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo) +{ + if (!kgem->memcpy_from_tiled_x) + return false; + + return bo_inplace_tiled(kgem, bo, false); +} + +static bool +read_boxes_inplace__tiled(struct kgem *kgem, + struct kgem_bo *bo, int16_t src_dx, int16_t src_dy, + PixmapPtr pixmap, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n) +{ + int bpp = pixmap->drawable.bitsPerPixel; + void *src, *dst = pixmap->devPrivate.ptr; + int src_pitch = bo->pitch; + int dst_pitch = pixmap->devKind; + + assert(bo->tiling == I915_TILING_X); + + src = __kgem_bo_map__cpu(kgem, bo); + if (src == NULL) + return false; + + kgem_bo_sync__cpu_full(kgem, bo, 0); + do { + memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1); + box++; + } while (--n); + __kgem_bo_unmap__cpu(kgem, bo, src); + + return true; +} + static void read_boxes_inplace(struct kgem *kgem, struct kgem_bo *bo, int16_t src_dx, int16_t src_dy, PixmapPtr pixmap, int16_t dst_dx, int16_t dst_dy, @@ -63,6 +109,12 @@ int src_pitch = bo->pitch; int dst_pitch = pixmap->devKind; + if (download_inplace__tiled(kgem, bo) && + read_boxes_inplace__tiled(kgem, bo, src_dx, src_dy, + pixmap, dst_dx, dst_dy, + box, n)) + return; + DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling)); if (!kgem_bo_can_map(kgem, bo)) @@ -106,7 +158,7 @@ if (unlikely(kgem->wedged)) return true; - if (!kgem_bo_can_map(kgem, bo)) + if (!kgem_bo_can_map(kgem, bo) && !download_inplace__tiled(kgem, bo)) return false; if (FORCE_INPLACE) @@ -115,7 +167,9 @@ if (kgem->can_blt_cpu && kgem->max_cpu_size) return false; - return !__kgem_bo_is_busy(kgem, bo) || bo->tiling == I915_TILING_NONE; + return !__kgem_bo_is_busy(kgem, bo) || + bo->tiling == I915_TILING_NONE || + download_inplace__tiled(kgem, bo); } void sna_read_boxes(struct sna *sna, @@ -480,13 +534,7 @@ if (!kgem->memcpy_to_tiled_x) return false; - if (bo->tiling != I915_TILING_X) - return false; - - if (bo->scanout) - return false; - - return bo->domain == DOMAIN_CPU || kgem->has_llc; + return bo_inplace_tiled(kgem, bo, true); } static bool diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_render.c xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_render.c --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_render.c 2013-06-27 14:39:52.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_render.c 2013-07-01 14:03:55.000000000 +0000 @@ -332,7 +332,7 @@ return NULL; } - if (priv->cpu_bo->snoop && priv->source_count > SOURCE_BIAS) { + if (!blt && priv->cpu_bo->snoop && priv->source_count > SOURCE_BIAS) { DBG(("%s: promoting snooped CPU bo due to reuse\n", __FUNCTION__)); return NULL; @@ -1480,8 +1480,13 @@ return -1; } + channel->is_opaque = sna_gradient_is_opaque(picture->pSourcePict); + channel->pict_format = + channel->is_opaque ? PIXMAN_x8r8g8b8 : PIXMAN_a8r8g8b8; + DBG(("%s: gradient is opaque? %d, selecting format %08x\n", + __FUNCTION__, channel->is_opaque, channel->pict_format)); assert(channel->card_format == -1); - channel->pict_format = PIXMAN_a8r8g8b8; + channel->bo = kgem_create_buffer_2d(&sna->kgem, w2, h2, 32, KGEM_BUFFER_WRITE_INPLACE, @@ -1492,7 +1497,7 @@ return 0; } - dst = pixman_image_create_bits(PIXMAN_a8r8g8b8, + dst = pixman_image_create_bits(channel->pict_format, w2, h2, ptr, channel->bo->pitch); if (!dst) { kgem_bo_destroy(&sna->kgem, channel->bo); @@ -1911,10 +1916,15 @@ DBG(("%s: dst pitch (%d) fits within render pipeline (%d)\n", __FUNCTION__, op->dst.bo->pitch, sna->render.max_3d_pitch)); - box.x1 = x; - box.x2 = bound(x, width); - box.y1 = y; - box.y2 = bound(y, height); + box.x1 = x + op->dst.x; + box.x2 = bound(box.x1, width); + box.y1 = y + op->dst.y; + box.y2 = bound(box.y1, height); + + if (box.x1 < 0) + box.x1 = 0; + if (box.y1 < 0) + box.y1 = 0; /* Ensure we align to an even tile row */ if (op->dst.bo->tiling) { diff -Nru xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_render.h xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_render.h --- xserver-xorg-video-intel-2.21.10+git20130627.60d716b5/src/sna/sna_render.h 2013-06-07 13:28:26.000000000 +0000 +++ xserver-xorg-video-intel-2.21.11+git20130701.7d916398/src/sna/sna_render.h 2013-07-01 14:03:55.000000000 +0000 @@ -531,6 +531,9 @@ sna_render_get_gradient(struct sna *sna, PictGradient *pattern); +bool +sna_gradient_is_opaque(const PictGradient *gradient); + uint32_t sna_rgba_for_color(uint32_t color, int depth); uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format); bool sna_get_rgba_from_pixel(uint32_t pixel,