--- linux-rt-2.6.29.5.orig/debian/NOTES
+++ linux-rt-2.6.29.5/debian/NOTES
@@ -0,0 +1,4 @@
+eSCO patch removed. Replaced upstream with a disable_esco module parm.
+airprime: Module gone, use option driver instead
+AppArmor: Patch is all there and ported. Ooops when enabled, so default
+	off (still can be enabled apparmor=1)
--- linux-rt-2.6.29.5.orig/debian/rules
+++ linux-rt-2.6.29.5/debian/rules
@@ -0,0 +1,81 @@
+#!/usr/bin/make -f
+#
+# debian/rules for Ubuntu linux
+#
+# Use this however you want, just give credit where credit is due.
+#
+# Copyright (c) 2007 Ben Collins <bcollins@ubuntu.com>
+#
+
+# dpkg-buildpackage passes options that are incomptatible
+# with the kernel build.
+unexport CFLAGS
+unexport LDFLAGS
+
+# This is the debhelper compatability version to use.
+export DH_COMPAT=4
+export LC_ALL=C
+export SHELL=/bin/bash -e
+
+# Common variables for all architectures
+include debian/rules.d/0-common-vars.mk
+
+# Pill in some arch specific stuff
+include debian/rules.d/$(arch).mk
+
+# Maintainer targets
+include debian/rules.d/1-maintainer.mk
+
+# Debian Build System targets
+binary: binary-indep binary-arch
+
+build: build-arch build-indep
+
+clean: unpatch debian/control
+	dh_testdir
+	dh_testroot
+	dh_clean
+
+	# d-i stuff
+	rm -rf modules kernel-versions package-list
+	rm -rf debian/d-i-$(arch)
+
+	# normal build junk
+	rm -rf debian/abi/$(release)-$(revision)
+	rm -rf $(builddir)
+	rm -f $(stampdir)/stamp-*
+	rm -rf debian/linux-*
+
+	# This gets rid of the d-i packages in control
+	cp -f debian/control.stub debian/control
+
+# Builds the image, arch headers and debug packages
+include debian/rules.d/2-binary-arch.mk
+
+# Rules for building the udebs (debian-installer)
+#include debian/rules.d/5-udebs.mk
+
+# Builds the source, doc and linux-headers indep packages
+include debian/rules.d/3-binary-indep.mk
+
+# Various checks to be performed on builds
+include debian/rules.d/4-checks.mk
+
+# Misc stuff
+debian/control.stub: debian/scripts/control-create	\
+		debian/control.stub.in			\
+		debian/changelog			\
+		$(wildcard debian/control.d/* debian/sub-flavours/*.vars)
+#	for i in debian/control.stub.in; do
+	for i in debian/control.stub.in; do	\
+	  new=`echo $$i | sed 's/\.in$$//'`;					\
+	  cat $$i | sed -e 's/PKGVER/$(release)/g' -e 's/ABINUM/$(abinum)/g' >	\
+		$$new;								\
+	done
+	flavours="$(wildcard debian/control.d/vars.* debian/sub-flavours/*.vars)";\
+	for i in $$flavours; do							\
+	  $(SHELL) debian/scripts/control-create $$i |				\
+		sed -e 's/PKGVER/$(release)/g' -e 's/ABINUM/$(abinum)/g' >>	\
+		debian/control.stub;						\
+	done
+	cp debian/control.stub debian/control
--- linux-rt-2.6.29.5.orig/debian/control.stub
+++ linux-rt-2.6.29.5/debian/control.stub
@@ -0,0 +1,72 @@
+Source: linux-rt
+Section: devel
+Priority: optional
+Maintainer: Alessio Igor Bogani <abogani@ubuntu.com>
+Standards-Version: 3.6.1
+Build-Depends: debhelper (>= 3), module-init-tools, kernel-wedge (>= 2.24ubuntu1), makedumpfile [!armel], quilt
+Build-Depends-Indep: xmlto, docbook-utils, gs, transfig, bzip2, sharutils
+
+Package: linux-rt-headers-2.6.29.5-1
+Architecture: all
+Section: devel
+Priority: optional
+Depends: coreutils | fileutils (>= 4.0)
+Provides: linux-rt-headers, linux-rt-headers-2.6
+Description: Header files related to Linux kernel version 2.6.29.5
+ This package provides kernel header files for version 2.6.29.5, for sites
+ that want the latest kernel headers. Please read
+ /usr/share/doc/linux-headers-2.6.29.5-1/debian.README.gz for details
+
+Package: linux-image-2.6.29.5-1-rt
+Architecture: i386 amd64
+Section: base
+Priority: optional
+Pre-Depends: dpkg (>= 1.10.24)
+Provides: linux-image, linux-image-2.6, fuse-module, kvm-api-4, redhat-cluster-modules, ivtv-modules, ndiswrapper-modules-1.9
+Depends: initramfs-tools (>= 0.36ubuntu6), coreutils | fileutils (>= 4.0), module-init-tools (>= 3.3-pre11-4ubuntu3)
+Conflicts: hotplug (<< 0.0.20040105-1)
+Recommends: grub | lilo (>= 19.1)
+Suggests: fdutils, linux-doc-2.6.29.5 | linux-source-2.6.29.5
+Description: Linux kernel image for version 2.6.29.5 on Ingo Molnar's full real time preemption patch (2.6.28-rt)
+ This package contains the Linux kernel image for version 2.6.29.5 on
+ Ingo Molnar's full real time preemption patch (2.6.28-rt).
+ .
+ Also includes the corresponding System.map file, the modules built by the
+ packager, and scripts that try to ensure that the system is not left in an
+ unbootable state after an update.
+ .
+ Supports Generic processors.
+ .
+ Geared toward real time systems.
+ .
+ You likely do not want to install this package directly. Instead, install
+ the linux-rt meta-package, which will ensure that upgrades work
+ correctly, and that supporting packages are also installed.
+
+Package: linux-headers-2.6.29.5-1-rt
+Architecture: i386 amd64
+Section: devel
+Priority: optional
+Depends: coreutils | fileutils (>= 4.0), linux-rt-headers-2.6.29.5-1, ${shlibs:Depends}
+Provides: linux-headers, linux-headers-2.6
+Description: Linux kernel headers for version 2.6.29.5 on Ingo Molnar's full real time preemption patch (2.6.28-rt)
+ This package provides kernel header files for version 2.6.29.5 on
+ Ingo Molnar's full real time preemption patch (2.6.28-rt).
+ .
+ This is for sites that want the latest kernel headers.  Please read
+ /usr/share/doc/linux-headers-2.6.29.5-1/debian.README.gz for details.
+
+Package: linux-image-debug-2.6.29.5-1-rt
+Architecture: i386 amd64
+Section: devel
+Priority: optional
+Provides: linux-debug
+Description: Linux kernel debug image for version 2.6.29.5 on Ingo Molnar's full real time preemption patch (2.6.28-rt)
+ This package provides a kernel debug image for version 2.6.29.5 on
+ Ingo Molnar's full real time preemption patch (2.6.28-rt).
+ .
+ This is for sites that wish to debug the kernel.
+ .
+ The kernel image contained in this package is NOT meant to boot from. It
+ is uncompressed, and unstripped. This package also includes the
+ unstripped modules.
--- linux-rt-2.6.29.5.orig/debian/copyright
+++ linux-rt-2.6.29.5/debian/copyright
@@ -0,0 +1,30 @@
+This is the Ubuntu prepackaged version of the Linux kernel.
+Linux was written by Linus Torvalds <Linus.Torvalds@cs.Helsinki.FI>
+and others.
+
+This package was put together by the Ubuntu Kernel Team, from
+sources retrieved from upstream linux git.
+The sources may be found at most Linux ftp sites, including 
+ftp://ftp.kernel.org/pub/linux/kernel/
+
+This package is currently maintained by the
+Ubuntu Kernel Team <ubuntu-kernel@lists.ubuntu.com>
+
+Linux is copyrighted by Linus Torvalds and others.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 dated June, 1991.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+   02111-1307, USA.
+
+On Ubuntu Linux systems, the complete text of the GNU General
+Public License v2 can be found in `/usr/share/common-licenses/GPL-2'.
--- linux-rt-2.6.29.5.orig/debian/compat
+++ linux-rt-2.6.29.5/debian/compat
@@ -0,0 +1 @@
+4
--- linux-rt-2.6.29.5.orig/debian/changelog.historical
+++ linux-rt-2.6.29.5/debian/changelog.historical
@@ -0,0 +1,5745 @@
+linux (2.6.24-19.33) UNRELEASED; urgency=low
+
+  CHANGELOG: Do not edit directly. Autogenerated at release.
+  CHANGELOG: Use the printchanges target to see the curent changes.
+  CHANGELOG: Use the insertchanges target to create the final log.
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Sun, 04 May 2008 20:22:21 -0600
+
+linux (2.6.24-18.32) hardy-security; urgency=low
+
+  * CVE-2007-6694: [POWERPC] CHRP: Fix possible NULL pointer dereference
+  * fix SMP ordering hole in fcntl_setlk() (CVE-2008-1669)
+  * Fix dnotify/close race (CVE-2008-1375)
+  * tehuti: check register size (CVE-2008-1675)
+  * tehuti: move ioctl perm check closer to function start (CVE-2008-1675)
+
+ -- Ben Collins <ben.collins@canonical.com>  Mon, 19 May 2008 16:50:11 +0000
+
+linux (2.6.24-17.31) hardy; urgency=low
+
+  [Alessio Igor Bogani]
+
+  * rt: Fix mutex in the toshiba_acpi driver
+  * rt: Updated configuration files
+
+  [Ben Collins]
+
+  * build: Fix revert detection in git-ubuntu-log
+  * SAUCE: Re-add eeprom_bad_csum_allow module-param
+    - LP: #60388
+
+  [Stefan Bader]
+
+  * Pulled updates to openvz custom build. Fixes openvz 'refuses to boot' problem.
+    - LP: #210672
+  * sched: retain vruntime, fix delayed key events when CONFIG_FAIR_GROUP_SCHED.
+    - LP: #218516
+  * UBUNTU: SAUCE: Add blacklist support to fix Belkin bluetooth dongle.
+    - LP: #140511
+
+  [Tim Gardner]
+
+  * Enable CONFIG_ISCSI_TCP for -virtual
+    - LP: #218215
+  * build: Add fancontrol modules to powerpc64-smp debian installer
+  * Fix Xen Dom0/DomU bridging
+    - LP: #218126
+  * TSC Clocksource can cause hangs and time jumps
+    - LP: #221351
+  * Kernel should use CONFIG_FAIR_CGROUP_SCHED. Fixes high load issues
+    with pulseaudio.
+    - LP: #188226
+
+  [Upstream Kernel Changes]
+
+  * KVM: MMU: prepopulate guest pages after write-protecting
+    - LP: #221032
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Fri, 11 Apr 2008 07:59:10 -0600
+
+linux (2.6.24-16.30) hardy; urgency=low
+
+  * Fix amd64/i386 ABI and module check FTBS by creating an ignore
+    and ignore.modules in the ABI directory.
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Wed, 09 Apr 2008 21:58:25 -0600
+
+linux (2.6.24-16.29) hardy; urgency=low
+
+  [Stephan Bader]
+
+  * UBUNTU: SAUCE: mmc: Increase power_up deleay to fix TI readers
+
+  [Alessio Igor Bogani]
+
+  * rt: Updated configuration files
+
+  [Chuck Short]
+
+  * Xen updates for vitrio changes.
+
+  [Tim Gardner]
+
+  * openvz updates for vitrio changes.
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Tue, 08 Apr 2008 21:48:16 -0600
+
+linux (2.6.24-16.28) hardy; urgency=low
+
+  [Tim Gardner]
+
+  * Revert "UBUNTU: x86: tsc prevent time going backwards"
+
+  [Kees Cook]
+
+  * AppArmor: implement mmap_min_addr check as done in mainline.
+
+  [Soren Hansen]
+
+  * Bring our virtio code up to date with 2.6.25-rc7
+
+  [Upstream Kernel Changes]
+
+  * Ubuntu: Revert all our virtio changes
+  * lguest: Reboot support
+  * lguest: adapt launcher to per-cpuness
+  * virtio: Implement skb_partial_csum_set, for setting partial csums on
+    untrusted packets.
+  * virtio: simplify config mechanism.
+  * virtio: explicit enable_cb/disable_cb rather than callback return.
+  * virtio: configuration change callback
+  * virtio: Fix vring_init/vring_size to take unsigned long
+  * virtio: clarify NO_NOTIFY flag usage
+  * virtio: remove unused id field from struct virtio_blk_outhdr
+  * virtio: Net header needs hdr_len
+  * virtio: Tweak virtio_net defines
+  * virtio: populate network rings in the probe routine, not open
+  * virtio: reset function
+  * virtio: handle interrupts after callbacks turned off
+  * virtio: Use the sg_phys convenience function.
+  * virtio: Allow virtio to be modular and used by modules
+  * virtnet: remove double ether_setup
+  * virtio: flush buffers on open
+  * virtio: free transmit skbs when notified, not on next xmit.
+  * virtio_net: parametrize the napi_weight for virtio receive queue.
+  * virtio_blk: provide getgeo
+  * virtio_blk: Dont waste major numbers
+  * virtio_blk: implement naming for vda-vdz,vdaa-vdzz,vdaaa-vdzzz
+  * virtio: PCI device
+  * virtio: Use PCI revision field to indicate virtio PCI ABI version
+  * virtio: balloon driver
+  * virtio net: fix oops on interface-up
+  * virtio: add missing #include <linux/delay.h>
+  * virtio: fix race in enable_cb
+  * virtio: handle > 2 billion page balloon targets
+  * virtio_net: Fix oops on early interrupts - introduced by virtio reset
+    code
+  * lguest: Do not append space to guests kernel command line
+  * virtio: Use spin_lock_irqsave/restore for virtio-pci
+  * virtio: Fix sysfs bits to have proper block symlink
+  * virtio: Enable netpoll interface for netconsole logging
+  * virtio_pci: unregister virtio device at device remove
+  * lguest: Add puppies which where previously missing.
+  * lguest: lguest.txt documentation fix
+  * lguest: Don't need comment terminator before disk section.
+  * virtio_pci iomem annotations
+  * virtio_net: remove overzealous printk
+  * virtio: remove overzealous BUG_ON.
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Tue, 08 Apr 2008 11:53:49 -0600
+
+linux (2.6.24-15.27) hardy; urgency=low
+
+  [Alan Stern]
+
+  * usb-storage: don't access beyond the end of the sg buffer
+    - LP: #204922
+
+  [Mario Limonciello]
+
+  * Enable Reset and SCO workaround on Dell 410 BT adapter
+
+  [Tim Gardner]
+
+  * Enable CONFIG_E1000 in the i386 virtual image.
+    - LP: #205646
+
+  [Thomas Gleixner]
+
+  * x86: tsc prevent time going backwards
+
+  [Matthew Garrett]
+
+  * Fix framebuffer fonts on non-x86 platforms
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Fri, 04 Apr 2008 08:14:49 -0600
+
+linux (2.6.24-15.26) hardy; urgency=low
+
+  [Colin Ian King]
+
+  * airprime.c supports more devices
+    - LP: #208250
+
+  [Kees Cook]
+
+  * AppArmor: get latest batch of upstream fixes into Hardy (svn 1160)
+
+  [Stefan Bader]
+
+  * ACPI: fix boot oops regression in kernel
+    - LP: #207014
+
+  [Tim Gardner]
+
+  * Enable CGROUPS for non x86/x86_64 arches, all flavours.
+   - LP: #188226
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Thu, 03 Apr 2008 07:00:29 -0600
+
+linux (2.6.24-14.25) hardy; urgency=low
+
+  [Mario Limonciello]
+
+  * Resolve sky2 race condition leading to failed suspends
+    - LP: #210877
+
+  [Tim Gardner]
+
+  * Copy drivers/media internal header files into header
+    package for external LUM compilation. This paves the
+    way for LP #202065.
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Wed, 02 Apr 2008 08:28:32 -0600
+
+linux (2.6.24-14.24) hardy; urgency=low
+
+  [Amit Kucheria]
+
+  * LPIA: Update from moblin
+  * LPIA: Fix reboot problem after S3/S4
+  * LPIA: Integrate latest Dabney thermal patches
+  * LPIA: Change-umd_dbg-debug-level-to-KERN_INFO
+  * LPIA: Compile modules into kernel to save on boot time
+  * LPIA: lots of Dabney CONFIG options dissapeared
+  * LPIA: Purge nonexistent config options
+
+  [Jay Chetty]
+
+  * UBUNTU:USBC:Integrated USBC 2.0.0.32L.0009
+
+  [Misha Zhilin]
+
+  * USB: ehci: handle large bulk URBs correctly (again)
+    - LP: #204857
+
+  [Tim Gardner]
+
+  * frame buffer regression - screen blank except for blinking cursor after
+    fbcon vtswitch
+    - LP: #201591
+  * Blacklist Bluetooth Dell Wireless 370 for SCO MTU
+    - LP: #209715
+  * Set CONFIG_FAIR_CGROUP_SCHED for server flavours.
+    - LP: #188226
+  * Add DMI IO_DELAY support.
+    - LP: #200057
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Mon, 31 Mar 2008 11:19:49 -0600
+
+linux (2.6.24-13.23) hardy; urgency=low
+
+  [Alessio Igor Bogani]
+
+  * rt: Updated configuration files
+
+  [Ben Collins]
+
+  * openvz: New custom flavour for OpenVZ
+  * config: Disable IDE AMD driver in favor of PATA version
+    - LP: #181561
+  * config: Disable IDE VIA driver in favor of PATA version
+    - LP: #181561
+  * drivers/video: Restore gutsy backlight dimming behavior
+    - LP: #205261
+  * build/config: Enable CONFIG_CIFS_WEAK_PW_HASH
+    - LP: #202445
+
+  [Colin Ian King]
+
+  * SAUCE: Add support for version 4 of Chelsio NICs in cxgb3 driver
+    - LP: #201893
+
+  [Kees Cook]
+
+  * AppArmor: re-add missing "type" field in syslog reports.
+    - LP: #202888
+  * kvm: reset TSS on x86_64 to avoid ioperm bitmap corruption
+    - LP: #144900
+
+  [Stefan Bader]
+
+  * USB: EHCI: add separate IAA watchdog timer
+    - LP: #198619
+  * SAUCE: Always use SCO protocol (disable eSCO support)
+    - LP: #39414
+  * PM: Introduce PM_EVENT_HIBERNATE callback state
+    - LP: #201086
+
+  [Tim Gardner]
+
+  * Disable DRM suspend/resume on pre-915 Intel chips
+    - LP: #207496
+  * frame buffer regression - screen blank except for blinking cursor after fbcon
+    vtswitch
+    - LP: #201591
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Wed, 19 Mar 2008 10:05:05 -0400
+
+linux (2.6.24-12.22) hardy; urgency=low
+
+  [Ben Collins]
+
+  * custom/rt: Disable toshiba_acpi, since it isn't compatible
+
+ -- Ben Collins <bcollins@ubuntu.com>  Wed, 12 Mar 2008 14:38:59 -0400
+
+linux (2.6.24-12.21) hardy; urgency=low
+
+  [Ben Collins]
+
+  * build: Fix vesafb module inclusion into initrd subdir
+    - LP: #129910
+  * net/bluetooth: POWERBOOK => APPLE, fix for apple keyboard patch
+  * custom/xen: Remove asix portion of xen patch, breaks driver
+    - LP: #199296
+
+  [Colin Ian King]
+
+  * SAUCE: fix Udma not fully available in Acer 1694 Wlmi
+    - LP: #187121
+  * SAUCE: Update toshiba_acpi.c to version 0.19a
+    - LP: #77026
+
+  [Stefan Bader]
+
+  * x86: Clear DF before calling signal handler
+  * Enable FN key on Apple aluminum bluetooth keyboard
+    - LP: #162083
+
+ -- Ben Collins <bcollins@ubuntu.com>  Tue, 11 Mar 2008 13:20:49 -0400
+
+linux (2.6.24-12.20) hardy; urgency=low
+
+  [Ben Collins]
+
+  * Enable CONFIG_SOUND at least, so alsa build in lum works
+    - LP: #200338
+
+ -- Ben Collins <bcollins@ubuntu.com>  Mon, 10 Mar 2008 08:15:00 -0400
+
+linux (2.6.24-12.19) hardy; urgency=low
+
+  * Re-upload of -12.18 to fix build failures
+  * Fixup binary-custom configs
+  * Fixup xen patch to cope with kvm changes
+
+  [Amit Kucheria]
+
+  * Move Marvell 8686 and 8688 to LUM
+  * Poulsbo: Sync patches with moblin/ume-hardy tree
+  * Break if a patch fails to apply
+  * SAUCE:  implement smarter atime updates support
+    - LP: #199427
+  * Enable USB_PERSIST to allow devices with /root on usb to work with
+    suspend
+  * Enable USB_PERSIST across the board
+
+  [Ben Collins]
+
+  * build/config: Really fix ide on smp ppc configs
+  * build/configs: Enable relatime config option for all flavors
+  * build/abi: Ignore ide-core module for ppc, moved to built-in
+
+  [Colin Ian King]
+
+  * fix reversed logic for bbuild check leads to -j1 default
+    - LP: #197040
+  * Enable IDE_PMAC for powerpc-smp
+    - LP: #196686
+  * Disable CONFIG_USB_OHCI_HCD_SSB
+    - LP: #182716
+  * SAUCE: fix arcmsr + archttp64 calls dma_free_coherent() with irqs
+    disabled - dmesg filled with warnings
+    - LP: #194207
+
+  [Jorge Boncompte [DTI2]]
+
+  * Fix Messed multicast lists after dev_mc_sync/unsync
+    - LP: #193468
+
+  [Stefan Bader]
+
+  * Add support for Apple Aluminium keyboards.
+    - LP: #162083
+  * SAUCE: Restore VT fonts on switch
+
+  [Upstream Kernel Changes]
+
+  * [NET]: Messed multicast lists after dev_mc_sync/unsync
+  * KVM: x86 emulator: add support for group decoding
+  * KVM: x86 emulator: group decoding for group 1A
+  * KVM: x86 emulator: Group decoding for group 3
+  * KVM: x86 emulator: Group decoding for groups 4 and 5
+  * KVM: x86 emulator: add group 7 decoding
+  * KVM: constify function pointer tables
+  * KVM: Only x86 has pio
+  * KVM: x86 emulator: group decoding for group 1 instructions
+  * KVM: MMU: Decouple mmio from shadow page tables
+  * KVM: Limit vcpu mmap size to one page on non-x86
+  * KVM: VMX: Enable Virtual Processor Identification (VPID)
+  * KVM: Use CONFIG_PREEMPT_NOTIFIERS around struct preempt_notifier
+  * KVM: Disable pagefaults during copy_from_user_inatomic()
+  * KVM: make EFER_RESERVED_BITS configurable for architecture code
+  * KVM: align valid EFER bits with the features of the host system
+  * KVM: allow access to EFER in 32bit KVM
+  * kvm: i386 fix
+  * KVM: export information about NPT to generic x86 code
+  * KVM: MMU: make the __nonpaging_map function generic
+  * KVM: export the load_pdptrs() function to modules
+  * KVM: MMU: add TDP support to the KVM MMU
+  * KVM: x86 emulator: Fix 'jmp abs'
+  * KVM: x86 emulator: fix group 5 decoding
+  * KVM: Fix kvm_arch_vcpu_ioctl_set_sregs so that set_cr0 works properly
+  * KVM: Make the supported cpuid list a host property rather than a vm
+    property
+  * KVM: emulate access to MSR_IA32_MCG_CTL
+  * KVM: remove the usage of the mmap_sem for the protection of the memory
+    slots.
+  * KVM: SVM: allocate the MSR permission map per VCPU
+  * KVM: make MMU_DEBUG compile again
+  * KVM: paravirtualized clocksource: host part
+  * KVM: Add missing semicolon
+  * KVM: x86 emulator: add ad_mask static inline
+  * KVM: x86 emulator: make register_address, address_mask static inlines
+  * KVM: x86 emulator: make register_address_increment and JMP_REL static
+    inlines
+  * KVM: Add API to retrieve the number of supported vcpus per vm
+  * KVM: Increase vcpu count to 16
+  * KVM: Add API for determining the number of supported memory slots
+  * KVM: Increase the number of user memory slots per vm
+  * KVM: Add stat counter for hypercalls
+  * KVM: x86 emulator: fix sparse warnings in x86_emulate.c
+  * KVM: sparse fixes for kvm/x86.c
+  * KVM: Implement dummy values for MSR_PERF_STATUS
+  * KVM: MMU: ignore zapped root pagetables
+  * KVM: call write_guest_time as soon as we register the paravirt clock
+  * KVM: MMU: large page support
+  * KVM: Prefix control register accessors with kvm_ to avoid namespace
+    pollution
+  * KVM: Avoid infinite-frequency local apic timer
+  * KVM: Route irq 0 to vcpu 0 exclusively
+  * KVM: SVM: add support for Nested Paging
+  * KVM: SVM: enable LBR virtualization
+  * KVM: SVM: make iopm_base static
+  * KVM: SVM: let init_vmcb() take struct vcpu_svm as parameter
+  * KVM: VMX: fix typo in VMX header define
+  * KVM: SVM: fix Windows XP 64 bit installation crash
+  * KVM: VMX: Fix invalid opcode of VPID
+  * KVM: VMX: Handle machines without EFER
+  * KVM: move alloc_apic_access_page() outside of non-preemptable region
+  * KVM: VMX: unifdef the EFER specific code
+  * KVM: SVM: move feature detection to hardware setup code
+  * KVM: Export include/linux/kvm.h only if $ARCH actually supports KVM
+  * dlm: fix rcom_names message to self
+  * virtio: Net header needs hdr_len
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Mon, 03 Mar 2008 07:07:16 -0700
+
+linux (2.6.24-11.17) hardy; urgency=low
+
+  [Alan Cox]
+
+  * Pull in fixes for pata_it821x.
+    - LP: #106931
+
+  [Alessio Igor Bogani]
+
+  * rt: Synchronized with upstream (2.6.24.3-rt3)
+  * rt: Updated configuration files
+
+  [Amit Kucheria]
+
+  * Add AGP support for Radeon Mobility 9000 chipset
+    - LP: #178634
+  * Bluetooth: SCO flow control to enable bluetooth headsets
+
+  [Ben Collins]
+
+  * binary: Include vesafs in initrd subdir, should fix vga= usage
+
+  [Colin Ian King]
+
+  * AMD SB700 south bridge support patches
+    - LP: #195354
+  * BCM4311 Revision 2 fix
+    - LP: #184600
+
+  [Mauro Carvalho Chehab]
+
+  * V4L/DVB (6753): Fix vivi to support non-zero minor node
+
+  [Tim Gardner]
+
+  * Merged 2.6.24.3
+  * Add atl1 to d-i bits.
+    - LP: #159561
+  * SAUCE: Add xpad support for RedOctane Guitar Hero
+    - LP: #196745
+
+  [Upstream Kernel Changes]
+
+  * DVB: cx23885: add missing subsystem ID for Hauppauge HVR1800 Retail
+  * slab: fix bootstrap on memoryless node
+  * vm audit: add VM_DONTEXPAND to mmap for drivers that need it
+    (CVE-2008-0007)
+  * USB: keyspan: Fix oops
+  * usb gadget: fix fsl_usb2_udc potential OOPS
+  * USB: CP2101 New Device IDs
+  * USB: add support for 4348:5523 WinChipHead USB->RS 232 adapter
+  * USB: Sierra - Add support for Aircard 881U
+  * USB: Adding YC Cable USB Serial device to pl2303
+  * USB: sierra driver - add devices
+  * USB: ftdi_sio - enabling multiple ELV devices, adding EM1010PC
+  * USB: ftdi-sio: Patch to add vendor/device id for ATK_16IC CCD
+  * USB: sierra: add support for Onda H600/Zte MF330 datacard to USB Driver
+    for Sierra Wireless
+  * USB: remove duplicate entry in Option driver and Pl2303 driver for
+    Huawei modem
+  * USB: pl2303: add support for RATOC REX-USB60F
+  * USB: ftdi driver - add support for optical probe device
+  * USB: use GFP_NOIO in reset path
+  * USB: Variant of the Dell Wireless 5520 driver
+  * USB: storage: Add unusual_dev for HP r707
+  * USB: fix usbtest halt check on big endian systems
+  * USB: handle idVendor of 0x0000
+  * USB: Fix usb_serial_driver structure for Kobil cardreader driver.
+  * forcedeth: mac address mcp77/79
+  * lockdep: annotate epoll
+  * sys_remap_file_pages: fix ->vm_file accounting
+  * PCI: Fix fakephp deadlock
+  * ACPI: update ACPI blacklist
+  * x86: restore correct module name for apm
+  * sky2: restore multicast addresses after recovery
+  * sky2: fix for WOL on some devices
+  * b43: Fix suspend/resume
+  * b43: Drop packets we are not able to encrypt
+  * b43: Fix dma-slot resource leakage
+  * b43legacy: fix PIO crash
+  * b43legacy: fix suspend/resume
+  * b43legacy: drop packets we are not able to encrypt
+  * b43legacy: fix DMA slot resource leakage
+  * selinux: fix labeling of /proc/net inodes
+  * b43: Reject new firmware early
+  * sched: let +nice tasks have smaller impact
+  * sched: fix high wake up latencies with FAIR_USER_SCHED
+  * fix writev regression: pan hanging unkillable and un-straceable
+  * Driver core: Revert "Fix Firmware class name collision"
+  * drm: the drm really should call pci_set_master..
+  * splice: missing user pointer access verification (CVE-2008-0009/10)
+  * Linux 2.6.24.1
+  * splice: fix user pointer access in get_iovec_page_array()
+  * Linux 2.6.24.2
+  * ACPI: video: Rationalise ACPI backlight implementation
+  * ACPI: video: Ignore ACPI video devices that aren't present in hardware
+  * SPARC/SPARC64: Fix usage of .section .sched.text in assembler code.
+  * NETFILTER: nf_conntrack_tcp: conntrack reopening fix
+  * NFS: Fix a potential file corruption issue when writing
+  * inotify: fix check for one-shot watches before destroying them
+  * hugetlb: add locking for overcommit sysctl
+  * XFS: Fix oops in xfs_file_readdir()
+  * Fix dl2k constants
+  * SCSI: sd: handle bad lba in sense information
+  * TCP: Fix a bug in strategy_allowed_congestion_control
+  * TC: oops in em_meta
+  * SELinux: Fix double free in selinux_netlbl_sock_setsid()
+  * PKT_SCHED: ematch: oops from uninitialized variable (resend)
+  * NET: Add if_addrlabel.h to sanitized headers.
+  * IPV4: fib_trie: apply fixes from fib_hash
+  * IPV4: fib: fix route replacement, fib_info is shared
+  * IPCOMP: Fix reception of incompressible packets
+  * IPCOMP: Fetch nexthdr before ipch is destroyed
+  * INET_DIAG: Fix inet_diag_lock_handler error path.
+  * INET: Prevent out-of-sync truesize on ip_fragment slow path
+  * BLUETOOTH: Add conn add/del workqueues to avoid connection fail.
+  * AUDIT: Increase skb->truesize in audit_expand
+  * Be more robust about bad arguments in get_user_pages()
+  * Disable G5 NAP mode during SMU commands on U3
+  * hrtimer: fix *rmtp handling in hrtimer_nanosleep()
+  * hrtimer: fix *rmtp/restarts handling in compat_sys_nanosleep()
+  * SLUB: Deal with annoying gcc warning on kfree()
+  * hrtimer: check relative timeouts for overflow
+  * hrtimer: catch expired CLOCK_REALTIME timers early
+  * genirq: do not leave interupts enabled on free_irq
+  * S390: Fix futex_atomic_cmpxchg_std inline assembly.
+  * USB: fix pm counter leak in usblp
+  * SCSI: gdth: scan for scsi devices
+  * PCMCIA: Fix station address detection in smc
+  * POWERPC: Revert chrp_pci_fixup_vt8231_ata devinit to fix libata on
+    pegasos
+  * bonding: fix NULL pointer deref in startup processing
+  * x86_64: CPA, fix cache attribute inconsistency bug
+  * Linux 2.6.24.3
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Mon, 25 Feb 2008 12:28:13 -0700
+
+linux (2.6.24-10.16) hardy; urgency=low
+
+  [Alessio Igor Bogani]
+
+  * rt: Synchronized with upstream (2.6.24.2-rt2)
+  * rt: Updated configuration files
+
+  [Eric Piel]
+
+  * SAUCE: ACPI: Allow custom DSDT tables to be loaded from initramfs
+    Amit Kucheria consolidated the DSDT patch with another fix that
+    ifdefs symbols required when BLK_DEV_INITR is disabled.
+
+  [Stefan Bader]
+
+  * Add Optiarc DVD drive to audio quirks list.
+    - LP: #186664
+  * Update drm and i915 drm driver to fix suspend issues.
+    - LP: #189260
+
+  [Tim Gardner]
+
+  * Fix FTBS without BLK_DEV_INITRD
+    - LP: #193507
+  * 64 bit CPA cache attribute bug
+    - LP: #193736
+  * Implemented default EDD control
+
+  [Upstream Kernel Changes]
+
+  * bonding: fix NULL pointer deref in startup processing
+  * dlm: bind connections from known local address when using TCP
+  * dlm: proper prototypes
+  * dlm: don't print common non-errors
+  * dlm: use dlm prefix on alloc and free functions
+  * dlm: close othercons
+  * dlm: align midcomms message buffer
+  * dlm: swap bytes for rcom lock reply
+  * dlm: use fixed errno values in messages
+  * dlm: clear ast_type when removing from astqueue
+  * dlm: recover locks waiting for overlap replies
+  * dlm: another call to confirm_master in receive_request_reply
+  * dlm: reject messages from non-members
+  * dlm: validate messages before processing
+  * dlm: reject normal unlock when lock is waiting for lookup
+  * dlm: limit dir lookup loop
+  * dlm: fix possible use-after-free
+  * dlm: change error message to debug
+  * dlm: keep cached master rsbs during recovery
+  * dlm: Sanity check namelen before copying it
+  * dlm: clean ups
+  * dlm: static initialization improvements
+  * dlm: use proper C for dlm/requestqueue stuff (and fix alignment bug)
+  * dlm: dlm_process_incoming_buffer() fixes
+  * dlm: do not byteswap rcom_lock
+  * dlm: do not byteswap rcom_config
+  * dlm: use proper type for ->ls_recover_buf
+  * dlm: missing length check in check_config()
+  * dlm: validate data in dlm_recover_directory()
+  * dlm: verify that places expecting rcom_lock have packet long enough
+  * dlm: receive_rcom_lock_args() overflow check
+  * dlm: make find_rsb() fail gracefully when namelen is too large
+  * dlm: fix overflows when copying from ->m_extra to lvb
+  * dlm: fix dlm_dir_lookup() handling of too long names
+  * dlm: dlm/user.c input validation fixes
+  * dlm: proper types for asts and basts
+  * dlm: eliminate astparam type casting
+  * dlm: add __init and __exit marks to init and exit functions
+  * virtio: Use PCI revision field to indicate virtio PCI ABI version
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Tue, 19 Feb 2008 09:57:18 -0700
+
+linux (2.6.24-9.15) hardy; urgency=low
+
+  [Alessio Igor Bogani]
+
+  * rt: Fix FTBS
+  * rt: Updated configuration files
+
+  [Tim Gardner]
+
+  * SAUCE: make /dev/kmem a config option
+  * SAUCE: x86: introduce /dev/mem restrictions with a config option
+  * Fixed CGROUP FTBS caused by AppArmor patch.
+  * Enabled CGROUP and CPUSETS for server flavor.
+    - LP: #182434
+
+  [Colin King]
+
+  * Turn on /proc/acpi/alarm for x86_64 (amd64)
+    - LP: #186297
+
+  [Upstream Kernel Changes]
+
+  * Ubuntu: LatencyTOP infrastructure patch
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Thu, 14 Feb 2008 13:34:55 -0700
+
+linux (2.6.24-8.14) hardy; urgency=low
+
+  [cking]
+
+  * Support Novatel U727 EVDO modem: Add pid and vid to
+    drivers/usb/serial/airprime.c
+    - LP: #150996
+  * Enable speedstep for sonoma processors.
+    - LP: #132271
+
+  [Stefan Bader]
+
+  * SAUCE: Export dm_disk function of device-mapper
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Wed, 13 Feb 2008 21:47:18 -0700
+
+linux (2.6.24-8.13) hardy; urgency=low
+
+  [Soren Hansen]
+
+  * Add missing iscsi modules to kernel udebs
+
+  [Stefan Bader]
+
+  * Lower message level for PCI memory and I/O allocation.
+
+  [Tim Gardner]
+
+  * Enabled IP_ADVANCED_ROUTER and IP_MULTIPLE_TABLES in sparc, hppa
+    - LP: #189560
+  * Compile RealTek 8139 using PIO method.
+    - LP: #90271
+  * Add WD WD800ADFS NCQ horkage quirk support.
+    - LP: #147858
+
+  [Upstream Kernel Changes]
+
+  * Introduce WEXT scan capabilities
+  * DVB: cx23885: add missing subsystem ID for Hauppauge HVR1800 Retail
+  * slab: fix bootstrap on memoryless node
+  * vm audit: add VM_DONTEXPAND to mmap for drivers that need it
+    (CVE-2008-0007)
+  * USB: keyspan: Fix oops
+  * usb gadget: fix fsl_usb2_udc potential OOPS
+  * USB: CP2101 New Device IDs
+  * USB: add support for 4348:5523 WinChipHead USB->RS 232 adapter
+  * USB: Sierra - Add support for Aircard 881U
+  * USB: Adding YC Cable USB Serial device to pl2303
+  * USB: sierra driver - add devices
+  * USB: ftdi_sio - enabling multiple ELV devices, adding EM1010PC
+  * USB: ftdi-sio: Patch to add vendor/device id for ATK_16IC CCD
+  * USB: sierra: add support for Onda H600/Zte MF330 datacard to USB Driver
+    for Sierra Wireless
+  * USB: remove duplicate entry in Option driver and Pl2303 driver for
+    Huawei modem
+  * USB: pl2303: add support for RATOC REX-USB60F
+  * USB: ftdi driver - add support for optical probe device
+  * USB: use GFP_NOIO in reset path
+  * USB: Variant of the Dell Wireless 5520 driver
+  * USB: storage: Add unusual_dev for HP r707
+  * USB: fix usbtest halt check on big endian systems
+  * USB: handle idVendor of 0x0000
+  * forcedeth: mac address mcp77/79
+  * lockdep: annotate epoll
+  * sys_remap_file_pages: fix ->vm_file accounting
+  * PCI: Fix fakephp deadlock
+  * ACPI: update ACPI blacklist
+  * x86: restore correct module name for apm
+  * sky2: restore multicast addresses after recovery
+  * sky2: fix for WOL on some devices
+  * b43: Fix suspend/resume
+  * b43: Drop packets we are not able to encrypt
+  * b43: Fix dma-slot resource leakage
+  * b43legacy: fix PIO crash
+  * b43legacy: fix suspend/resume
+  * b43legacy: drop packets we are not able to encrypt
+  * b43legacy: fix DMA slot resource leakage
+  * selinux: fix labeling of /proc/net inodes
+  * b43: Reject new firmware early
+  * sched: let +nice tasks have smaller impact
+  * sched: fix high wake up latencies with FAIR_USER_SCHED
+  * fix writev regression: pan hanging unkillable and un-straceable
+  * Driver core: Revert "Fix Firmware class name collision"
+  * drm: the drm really should call pci_set_master..
+  * splice: missing user pointer access verification (CVE-2008-0009/10)
+  * Linux 2.6.24.1
+  * splice: fix user pointer access in get_iovec_page_array()
+  * Linux 2.6.24.2
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Thu, 07 Feb 2008 06:50:13 -0700
+
+linux (2.6.24-7.12) hardy; urgency=low
+  
+  [Jay Chetty]
+
+  * Added patch to fix legacy USB interrupt issue
+  * Enabled Poulsbo PATA udma5 support
+  * Add touchscreen doubleclick workaround
+
+  [Amit Kucheria]
+
+  * Add AGP support for Radeon Mobility 9000 chipset
+    - LP: #178634
+
+  [Soren Hansen]
+
+  * Add virtio modules to the relevant udebs
+  * Add missing "?" for virtio modules in storage-core-modules
+
+  [Stefan Bader]
+
+  * Added vendor id for Dell 5720 broadband modem
+
+ -- Jay Chetty <jay.chetty@intel.com>  Wed, 06 Feb 2008 14:13:41 -0800
+
+linux (2.6.24-7.11) hardy; urgency=low
+
+  [Jay Chetty]
+
+  * poulsbo: Add a 100ms delay for SiB workaround
+
+  [Tim Gardner]
+
+  * -6.10 should have been an ABI bump, but due to incomplete build testing
+    went undetected.
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Mon, 04 Feb 2008 19:13:52 -0700
+
+linux (2.6.24-6.10) hardy; urgency=low
+
+  [Alessio Igor Bogani]
+
+  * rt: Synced with upstream, removed old kvm related patches and updated
+    configurations files.
+
+  [Chuck Short]
+
+  * SAUCE: Enable Xen
+
+  [Soren Hansen]
+
+  * Update kvm driver to kvm-60.
+  * Added CONFIG_ARCH_SUPPORTS_KVM=y for lpia, i386, and amd64
+  * Add rtl8139 driver to -virtual flavour
+
+  [Stefan Bader]
+
+  * Fix usb_serial_driver structure for Kobil cardreader driver.
+    - LP: #183109
+  * Lower warning level of pci resource allocation messages.
+    - LP: #159241
+
+  [Tim Gardner]
+
+  * Enabled CONFIG_BLK_DEV_IDE_PMAC
+    - LP: #185862
+  * Add virtio config options to lpiacompat.
+  * SAUCE: Export symbols for aufs (in lum).
+  * Enabled Xen
+
+  [Upstream Kernel Changes]
+
+  * KVM: mmu: add missing dirty page tracking cases
+  * KVM: Move virtualization deactivation from CPU_DEAD state to
+    CPU_DOWN_PREPARE
+  * KVM: Cosmetics
+  * KVM: vmx: hack set_cr0_no_modeswitch() to actually do modeswitch
+  * KVM: Use ARRAY_SIZE macro instead of manual calculation.
+  * KVM: Use page_private()/set_page_private() apis
+  * KVM: add MSR based hypercall API
+  * KVM: Add host hypercall support for vmx
+  * KVM: Add hypercall host support for svm
+  * KVM: Wire up hypercall handlers to a central arch-independent location
+  * KVM: svm: init cr0 with the wp bit set
+  * KVM: SVM: intercept SMI to handle it at host level
+  * KVM: More 0 -> NULL conversions
+  * kvm, dirty pages log: adding some calls to mark_page_dirty()
+  * KVM: Add internal filesystem for generating inodes
+  * KVM: Create an inode per virtual machine
+  * KVM: Rename some kvm_dev_ioctl_*() functions to kvm_vm_ioctl_*()
+  * KVM: Move kvm_vm_ioctl_create_vcpu() around
+  * KVM: Per-vcpu inodes
+  * KVM: Bump API version
+  * .gitignore: ignore emacs backup files (*~)
+  * kvm: dirty pages log: fix bitmap size/access calculation
+  * kvm: move do_remove_write_access() up
+  * kvm: dirty page logging: remove write access permissions when
+    dirty-page-logging is enabled
+  * KVM: Add missing calls to mark_page_dirty()
+  * KVM: Fix dirty page log bitmap size/access calculation
+  * kvm: move do_remove_write_access() up
+  * KVM: Remove write access permissions when dirty-page-logging is enabled
+  * KVM: Fix bogus failure in kvm.ko module initialization
+  * KVM: Move kvmfs magic number to <linux/magic.h>
+  * KVM: Unset kvm_arch_ops if arch module loading failed
+  * KVM: Fix guest register corruption on paravirt hypercall
+  * KVM: Use the generic skip_emulated_instruction() in hypercall code
+  * KVM: Use own minor number
+  * KVM: Fix guest sysenter on vmx
+  * KVM: Export <linux/kvm.h>
+  * KVM: Fix bogus sign extension in mmu mapping audit
+  * KVM: MMU: Fix guest writes to nonpae pde
+  * KVM: MMU: Fix host memory corruption on i386 with >= 4GB ram
+  * KVM: trivial whitespace fixes
+  * KVM: always reload segment selectors
+  * KVM: Remove extraneous guest entry on mmio read
+  * added KVM_GET_MEM_MAP ioctl to get the memory bitmap for a memory slot
+  * KVM: Prevent system selectors leaking into guest on real->protected
+    mode transition on vmx
+  * KVM: Use a shared page for kernel/user communication when runing a vcpu
+  * KVM: Do not communicate to userspace through cpu registers during PIO
+  * KVM: Initialize PIO I/O count
+  * KVM: Handle cpuid in the kernel instead of punting to userspace
+  * KVM: Remove the 'emulated' field from the userspace interface
+  * KVM: Remove minor wart from KVM_CREATE_VCPU ioctl
+  * KVM: Renumber ioctls
+  * KVM: Add method to check for backwards-compatible API extensions
+  * KVM: Allow userspace to process hypercalls which have no kernel handler
+  * KVM: Fold kvm_run::exit_type into kvm_run::exit_reason
+  * KVM: Add a special exit reason when exiting due to an interrupt
+  * KVM: Initialize the apic_base msr on svm too
+  * KVM: Add guest mode signal mask
+  * KVM: Allow kernel to select size of mmap() buffer
+  * KVM: Future-proof argument-less ioctls
+  * KVM: Avoid guest virtual addresses in string pio userspace interface
+  * KVM: MMU: Remove unnecessary check for pdptr access
+  * KVM: MMU: Remove global pte tracking
+  * KVM: Workaround vmx inability to virtualize the reset state
+  * KVM: Remove set_cr0_no_modeswitch() arch op
+  * KVM: Modify guest segments after potentially switching modes
+  * KVM: Hack real-mode segments on vmx from KVM_SET_SREGS
+  * KVM: Don't allow the guest to turn off the cpu cache
+  * KVM: Remove unused and write-only variables
+  * KVM: Handle writes to MCG_STATUS msr
+  * KVM: MMU: Fix hugepage pdes mapping same physical address with
+    different access
+  * KVM: SVM: Ensure timestamp counter monotonicity
+  * KVM: Remove unused function
+  * KVM: Remove debug message
+  * KVM: x86 emulator: fix bit string operations operand size
+  * KVM: SVM: enable LBRV virtualization if available
+  * Add mmu cache clear function
+  * KVM: Simply gfn_to_page()
+  * KVM: Add physical memory aliasing feature
+  * KVM: Add fpu get/set operations
+  * KVM: Use kernel-standard types
+  * KVM: Fix overflow bug in overflow detection code
+  * KVM: Fix memory leak on pio completion
+  * KVM: Handle partial pae pdptr
+  * KVM: Fix string pio when count == 0
+  * KVM: Use slab caches to allocate mmu data structures
+  * KVM: Retry sleeping allocation if atomic allocation fails
+  * KVM: Fix pio completion
+  * KVM: SVM: Report hardware exit reason to userspace instead of dmesg
+  * KVM: Handle guest page faults when emulating mmio
+  * KVM: VMX: Reduce unnecessary saving of host msrs
+  * KVM: Fix off-by-one when writing to a nonpae guest pde
+  * KVM: VMX: Don't switch 64-bit msrs for 32-bit guests
+  * KVM: Fold drivers/kvm/kvm_vmx.h into drivers/kvm/vmx.c
+  * KVM: VMX: Only save/restore MSR_K6_STAR if necessary
+  * KVM: Per-vcpu statistics
+  * KVM: Silence compile warning on i386
+  * KVM: Allow passing 64-bit values to the emulated read/write API
+  * KVM: Lazy FPU support for SVM
+  * KVM: Fix msr-avoidance regression on Core processors
+  * KVM: Don't complain about cpu erratum AA15
+  * KVM: Document MSR_K6_STAR's special place in the msr index array
+  * KVM: MMU: Avoid heavy ASSERT at non debug mode.
+  * KVM: Initialize cr0 to indicate an fpu is present
+  * KVM: We want asserts on debug builds, not release
+  * KVM: Avoid unused function warning due to assertion removal
+  * KVM: VMX: Avoid unnecessary vcpu_load()/vcpu_put() cycles
+  * KVM: Move need_resched() check to common code
+  * KVM: VMX: Properly shadow the CR0 register in the vcpu struct
+  * KVM: VMX: Add lazy FPU support for VT
+  * KVM: fix an if() condition
+  * KVM: SVM: Only save/restore MSRs when needed
+  * KVM: Remove trailing whitespace
+  * KVM: Remove extraneous guest entry on mmio read
+  * KVM: Don't require explicit indication of completion of mmio or pio
+  * KVM: Remove unused 'instruction_length'
+  * KVM: VMX: Enable io bitmaps to avoid IO port 0x80 VMEXITs
+  * KVM: SVM: Allow direct guest access to PC debug port
+  * KVM: Fix RMW mmio handling
+  * KVM: Assume that writes smaller than 4 bytes are to non-pagetable pages
+  * KVM: Avoid saving and restoring some host CPU state on lightweight
+    vmexit
+  * KVM: Unindent some code
+  * KVM: Reduce misfirings of the fork detector
+  * KVM: Be more careful restoring fs on lightweight vmexit
+  * KVM: Unify kvm_mmu_pre_write() and kvm_mmu_post_write()
+  * KVM: MMU: Respect nonpae pagetable quadrant when zapping ptes
+  * KVM: Update shadow pte on write to guest pte
+  * KVM: Increase mmu shadow cache to 1024 pages
+  * KVM: Fix potential guest state leak into host
+  * KVM: Prevent guest fpu state from leaking into the host
+  * KVM: Move some more msr mangling into vmx_save_host_state()
+  * KVM: Rationalize exception bitmap usage
+  * KVM: Consolidate guest fpu activation and deactivation
+  * KVM: Ensure host cr0.ts is saved
+  * KVM: Set cr0.mp for guests
+  * KVM: Implement IA32_EBL_CR_POWERON msr
+  * KVM: MMU: Simplify kvm_mmu_free_page() a tiny bit
+  * KVM: MMU: Store shadow page tables as kernel virtual addresses, not
+    physical
+  * KVM: VMX: Only reload guest msrs if they are already loaded
+  * KVM: Avoid corrupting tr in real mode
+  * KVM: Fix vmx I/O bitmap initialization on highmem systems
+  * KVM: Remove merge artifact
+  * KVM: VMX: Use local labels in inline assembly
+  * KVM: VMX: Handle #SS faults from real mode
+  * KVM: VMX: Avoid saving and restoring msrs on lightweight vmexit
+  * KVM: VMX: Compile-fix for 32-bit hosts
+  * KVM: VMX: Cleanup redundant code in MSR set
+  * KVM: VMX: Fix a typo which mixes X86_64 and CONFIG_X86_64
+  * KVM: VMX: Avoid saving and restoring msr_efer on lightweight vmexit
+  * KVM: VMX: Remove warnings on i386
+  * Use menuconfig objects II - KVM/Virt
+  * KVM: x86 emulator: implement wbinvd
+  * KVM: Fix includes
+  * KVM: Use symbolic constants instead of magic numbers
+  * KVM: MMU: Use slab caches for shadow pages and their headers
+  * KVM: MMU: Simplify fetch() a little bit
+  * KVM: MMU: Move set_pte_common() to pte width dependent code
+  * KVM: MMU: Pass the guest pde to set_pte_common
+  * KVM: MMU: Fold fix_read_pf() into set_pte_common()
+  * KVM: MMU: Fold fix_write_pf() into set_pte_common()
+  * KVM: Move shadow pte modifications from set_pte/set_pde to
+    set_pde_common()
+  * KVM: Make shadow pte updates atomic
+  * KVM: MMU: Make setting shadow ptes atomic on i386
+  * KVM: MMU: Remove cr0.wp tricks
+  * KVM: MMU: Simpify accessed/dirty/present/nx bit handling
+  * KVM: MMU: Don't cache guest access bits in the shadow page table
+  * KVM: MMU: Remove unused large page marker
+  * KVM: VMX: Fix asm constraint
+  * KVM: Lazy guest cr3 switching
+  * KVM: Replace C code with call to ARRAY_SIZE() macro.
+  * KVM: Remove unnecessary initialization and checks in mark_page_dirty()
+  * KVM: Fix vcpu freeing for guest smp
+  * KVM: Fix adding an smp virtual machine to the vm list
+  * KVM: Enable guest smp
+  * KVM: Move duplicate halt handling code into kvm_main.c
+  * KVM: Emulate hlt on real mode for Intel
+  * KVM: Keep an upper bound of initialized vcpus
+  * KVM: Flush remote tlbs when reducing shadow pte permissions
+  * KVM: SVM: Replace memset(<addr>, 0, PAGESIZE) with clear_page(<addr>)
+  * KVM: VMX: Replace memset(<addr>, 0, PAGESIZE) with clear_page(<addr>)
+  * KVM: Require a cpu which can set 64-bit values atomically
+  * KVM: Initialize the BSP bit in the APIC_BASE msr correctly
+  * KVM: VMX: Ensure vcpu time stamp counter is monotonous
+  * KVM: Bring local tree in line with origin
+  * KVM: Implement emulation of "pop reg" instruction (opcode 0x58-0x5f)
+  * KVM: Implement emulation of instruction "ret" (opcode 0xc3)
+  * KVM: Adds support for in-kernel mmio handlers
+  * KVM: VMX: Fix interrupt checking on lightweight exit
+  * KVM: Add support for in-kernel pio handlers
+  * KVM: Fix x86 emulator writeback
+  * KVM: Avoid useless memory write when possible
+  * KVM: VMX: Reinitialize the real-mode tss when entering real mode
+  * KVM: MMU: Fix Wrong tlb flush order
+  * KVM: VMX: Remove unnecessary code in vmx_tlb_flush()
+  * KVM: SVM: Reliably detect if SVM was disabled by BIOS
+  * KVM: Remove kvmfs in favor of the anonymous inodes source
+  * KVM: Clean up #includes
+  * KVM: Fix svm availability check miscompile on i386
+  * HOTPLUG: Add CPU_DYING notifier
+  * HOTPLUG: Adapt cpuset hotplug callback to CPU_DYING
+  * HOTPLUG: Adapt thermal throttle to CPU_DYING
+  * SMP: Implement on_cpu()
+  * KVM: Keep track of which cpus have virtualization enabled
+  * KVM: Tune hotplug/suspend IPIs
+  * KVM: Use CPU_DYING for disabling virtualization
+  * KVM: MMU: Store nx bit for large page shadows
+  * KVM: Fix *nopage() in kvm_main.c
+  * KVM: SMP: Add vcpu_id field in struct vcpu
+  * KVM - add hypercall nr to kvm_run
+  * KVM:: Future-proof the exit information union ABI
+  * KVM: In-kernel string pio write support
+  * KVM: Fix memory slot management functions for guest smp
+  * KVM: x86 emulator: implement rdmsr and wrmsr
+  * KVM: Trivial: /dev/kvm interface is no longer experimental.
+  * KVM: Trivial: Remove unused struct cpu_user_regs declaration
+  * KVM: Trivial: Make decode_register() static
+  * KVM: Trivial: Comment spelling may escape grep
+  * KVM: Trivial: Avoid hardware_disable predeclaration
+  * KVM: Trivial: Use standard CR0 flags macros from asm/cpu-features.h
+  * Use standard CR3 flags, tighten checking
+  * Use standard CR4 flags, tighten checking
+  * KVM: Trivial: Use standard BITMAP macros, open-code userspace-exposed
+    header
+  * KVM: Set exit_reason to KVM_EXIT_MMIO where run->mmio is initialized.
+  * KVM: Use standard CR8 flags, and fix TPR definition
+  * KVM: MMU: Fix oopses with SLUB
+  * KVM: x86 emulator: fix cmov for writeback changes
+  * KVM: MMU: Fix cleaning up the shadow page allocation cache
+  * KVM: Require CONFIG_ANON_INODES
+  * KVM: x86 emulator: fix faulty check for two-byte opcode
+  * KVM: Correctly handle writes crossing a page boundary
+  * KVM: Fix unlikely kvm_create vs decache_vcpus_on_cpu race
+  * KVM: Hoist kvm_mmu_reload() out of the critical section
+  * KVM: Fix removal of nx capability from guest cpuid
+  * KVM: Move gfn_to_page out of kmap/unmap pairs
+  * KVM: disable writeback for 0x0f 0x01 instructions.
+  * KVM: VMX: Import some constants of vmcs from IA32 SDM
+  * KVM: Remove dead code in the cmpxchg instruction emulation
+  * KVM: load_pdptrs() cleanups
+  * KVM: Remove arch specific components from the general code
+  * KVM: Dynamically allocate vcpus
+  * KVM: VMX: Improve the method of writing vmcs control
+  * KVM: Use the scheduler preemption notifiers to make kvm preemptible
+  * KVM: Convert vm lock to a mutex
+  * KVM: fx_init() needs preemption disabled while it plays with the FPU
+    state
+  * KVM: VMX: pass vcpu_vmx internally
+  * KVM: Remove three magic numbers
+  * KVM: SVM: de-containization
+  * KVM: SVM: internal function name cleanup
+  * KVM: x86 emulator: disable writeback for debug register instructions
+  * KVM: Change the emulator_{read,write,cmpxchg}_* functions to take a
+    vcpu
+  * KVM: Remove kvm_{read,write}_guest()
+  * KVM: Use kmem cache for allocating vcpus
+  * KVM: Use alignment properties of vcpu to simplify FPU ops
+  * KVM: kvm_vm_ioctl_get_dirty_log restore "nothing dirty" optimization
+  * KVM: VMX: Add cpu consistency check
+  * KVM: Don't assign vcpu->cr3 if it's invalid: check first, set last
+  * KVM: Cleanup mark_page_dirty
+  * KVM: SVM: Make set_msr_interception more reliable
+  * KVM: Remove redundant alloc_vmcs_cpu declaration
+  * KVM: Fix defined but not used warning in drivers/kvm/vmx.c
+  * KVM: Remove stat_set from debugfs
+  * KVM: Remove unneeded kvm_dev_open and kvm_dev_release functions.
+  * KVM: Add and use pr_unimpl for standard formatting of unimplemented
+    features
+  * KVM: Use kmem_cache_free for kmem_cache_zalloc'ed objects
+  * KVM: VMX: Remove a duplicated ia32e mode vm entry control
+  * KVM: Remove useless assignment
+  * KVM: Cleanup string I/O instruction emulation
+  * KVM: Clean up kvm_setup_pio()
+  * KVM: VMX: Don't require cr8 load/store exit capability when running on
+    32-bit
+  * KVM: Close minor race in signal handling
+  * KVM: Communicate cr8 changes to userspace
+  * KVM: x86 emulator: implement 'and $imm, %{al|ax|eax}'
+  * KVM: x86 emulator: implement 'jmp rel' instruction (opcode 0xe9)
+  * KVM: x86 emulator: Implement 'jmp rel short' instruction (opcode 0xeb)
+  * KVM: x86 emulator: implement 'push reg' (opcodes 0x50-0x57)
+  * KVM: VMX: allow rmode_tss_base() to work with >2G of guest memory
+  * KVM: Avoid calling smp_call_function_single() with interrupts disabled
+  * KVM: MMU: Fix rare oops on guest context switch
+  * KVM: Support more memory slots
+  * KVM: X86 emulator: fix 'push reg' writeback
+  * KVM: VMX: Split segments reload in vmx_load_host_state()
+  * KVM: Add support for in-kernel PIC emulation
+  * KVM: Define and use cr8 access functions
+  * KVM: Emulate local APIC in kernel
+  * KVM: In-kernel I/O APIC model
+  * KVM: Emulate hlt in the kernel
+  * KVM: Protect in-kernel pio using kvm->lock
+  * KVM: Add get/set irqchip ioctls for in-kernel PIC live migration
+    support
+  * KVM: Bypass irq_pending get/set when using in kernel irqchip
+  * KVM: in-kernel IOAPIC save and restore support
+  * KVM: in-kernel LAPIC save and restore support
+  * KVM: pending irq save/restore
+  * KVM: VMX: Use shadow TPR/cr8 for 64-bits guests
+  * KVM: Keep track of missed timer irq injections
+  * KVM: Migrate lapic hrtimer when vcpu moves to another cpu
+  * KVM: disable tpr/cr8 sync when in-kernel APIC is used
+  * KVM: VMX: Fix tpr threshold updating
+  * KVM: deliver PIC interrupt only to vcpu0
+  * KVM: round robin for APIC lowest priority delivery mode
+  * KVM: enable in-kernel APIC INIT/SIPI handling
+  * KVM: Set the ET flag in CR0 after initializing FX
+  * KVM: Remove the unused invlpg member of struct kvm_arch_ops.
+  * KVM: Clean up unloved invlpg emulation
+  * KVM: Keep control regs in sync
+  * KVM: Hoist SVM's get_cs_db_l_bits into core code.
+  * KVM: Simplify memory allocation
+  * KVM: Rename kvm_arch_ops to kvm_x86_ops
+  * KVM: Fix lapic 64-bit division on 32-bit hosts
+  * KVM: fix apic timer migration when inactive
+  * KVM: MMU: Don't do GFP_NOWAIT allocations
+  * KVM: Remove smp_processor_id() in kvm_vcpu_kick()
+  * KVM: VMX: Move vm entry failure handling to the exit handler
+  * KVM: Move main vcpu loop into subarch independent code
+  * KVM: Fix link error to "genapic"
+  * KVM: VMX: Fix exit qualification width on i386
+  * KVM: x86 emulator: push imm8
+  * KVM: x86 emulator: call near
+  * KVM: x86 emulator: pushf
+  * KVM: Improve emulation failure reporting
+  * KVM: VMX: Prevent setting CPU_BASED_TPR_SHADOW on i386 host
+  * KVM: x86 emulator: sort opcodes into ascending order
+  * KVM: x86 emulator: imlpement jump conditional relative
+  * KVM: X86 emulator: jump conditional short
+  * KVM: x86 emulator: lea
+  * KVM: x86 emulator: jmp abs
+  * KVM: x86 emulator: fix src, dst value initialization
+  * KVM: x86 emulator: popf
+  * KVM: Skip pio instruction when it is emulated, not executed
+  * KVM: fix PIC interrupt delivery on different APIC conditions
+  * KVM: Fix kvm_vcpu_ioctl_get_sregs() warning on i386
+  * KVM: Remove errant printk() in kvm_vcpu_ioctl_get_sregs()
+  * KVM: Fix virtualization menu help text
+  * KVM: x86 emulator: Add vmmcall/vmcall to x86_emulate (v3)
+  * KVM: Refactor hypercall infrastructure (v3)
+  * KVM: x86 emulator: remove unused functions
+  * KVM: x86 emulator: move all x86_emulate_memop() to a structure
+  * KVM: x86 emulator: move all decoding process to function
+    x86_decode_insn()
+  * KVM: emulate_instruction() calls now x86_decode_insn() and
+    x86_emulate_insn()
+  * KVM: Call x86_decode_insn() only when needed
+  * KVM: Fix ioapic level-triggered interrupt redelivery
+  * KVM: Fix #UD exception delivery
+  * KVM: VMX: Further reduce efer reloads
+  * KVM: VMX: Fix build on i386 due to EFER_LMA not defined
+  * KVM: Fix ioapic.c compilation failure due to missing include
+  * KVM: x86 emulator: fix merge screwup due to emulator split
+  * KVM: x85 emulator: Correct inconcistency in between cr2 and ctxt->cr2.
+  * KVM: Avoid redelivery of edge-triggered irq if it is already in service
+  * KVM: Implement ioapic irq polarity bit
+  * KVM: x86 emulator: fix repne/repnz decoding
+  * KVM: Fix host oops due to guest changing efer
+  * KVM: Fix ioapic edge-triggered interrupts
+  * KVM: MMU: Set shadow pte atomically in mmu_pte_write_zap_pte()
+  * KVM: Allow not-present guest page faults to bypass kvm
+  * KVM: MMU: Make flooding detection work when guest page faults are
+    bypassed
+  * KVM: MMU: Ignore reserved bits in cr3 in non-pae mode
+  * KVM: x86 emulator: split some decoding into functions for readability
+  * KVM: x86 emulator: remove _eflags and use directly ctxt->eflags.
+  * KVM: x86 emulator: Remove no_wb, use dst.type = OP_NONE instead
+  * KVM: x86_emulator: no writeback for bt
+  * KVM: apic round robin cleanup
+  * KVM: Purify x86_decode_insn() error case management
+  * KVM: x86 emulator: Any legacy prefix after a REX prefix nullifies its
+    effect
+  * i386: Expose IOAPIC register definitions even if CONFIG_X86_IO_APIC is
+    not set
+  * KVM: x86 emulator: On a pop instruction, don't restore ECX and EIP on
+    error
+  * KVM: x86 emulator: remove unused variable
+  * KVM: VMX: Don't clear the vmcs if the vcpu is not loaded on any
+    processor
+  * KVM: VMX: Simplify vcpu_clear()
+  * KVM: Remove the usage of paeg->private field by rmap
+  * KVM: x86 emulator: Correct management of REP prefix
+  * KVM: Add general accessors to read and write guest memory
+  * KVM: Allow dynamic allocation of the mmu shadow cache size
+  * KVM: Check I/O APIC indirect index before writing
+  * KVM: Add kvm_free_lapic() to pair with kvm_create_lapic()
+  * KVM: Hoist kvm_create_lapic() into kvm_vcpu_init()
+  * KVM: Remove gratuitous casts from lapic.c
+  * KVM: CodingStyle cleanup
+  * KVM: VMX: Handle NMIs before enabling interrupts and preemption
+  * KVM: Support assigning userspace memory to the guest
+  * KVM: Export PIC reset for kernel device reset
+  * KVM: Split IOAPIC reset function and export for kernel RESET
+  * KVM: VMX: Reset mmu context when entering real mode
+  * KVM: Replace enum by #define
+  * KVM: Move x86 msr handling to new files x86.[ch]
+  * KVM: MMU: Clean up MMU functions to take struct kvm when appropriate
+  * KVM: MMU: More struct kvm_vcpu -> struct kvm cleanups
+  * KVM: Move guest pte dirty bit management to the guest pagetable walker
+  * KVM: MMU: Fix nx access bit for huge pages
+  * KVM: MMU: Disable write access on clean large pages
+  * KVM: MMU: Instatiate real-mode shadows as user writable shadows
+  * KVM: MMU: Move dirty bit updates to a separate function
+  * KVM: MMU: When updating the dirty bit, inform the mmu about it
+  * KVM: Portability: split kvm_vcpu_ioctl
+  * KVM: Restore missing #include <linux/vmalloc.h>
+  * KVM: Add some \n in ioapic_debug()
+  * KVM: x86 emulator: implement 'movnti mem, reg'
+  * KVM: MMU: Call update_dirty_bit() without disabling preemption
+  * KVM: Move apic timer interrupt backlog processing to common code
+  * KVM: Move interrupt injection out of interrupt disabled section
+  * KVM: Rename KVM_TLB_FLUSH to KVM_REQ_TLB_FLUSH
+  * KVM: VMX: Force vm86 mode if setting flags during real mode
+  * KVM: MMU: Simplify page table walker
+  * KVM: Actually move the interrupt injection code out of the critical
+    section
+  * KVM: x86 emulator: cmc, clc, cli, sti
+  * KVM: x86 emulator: use a defined flag definition
+  * KVM: x86 emulator: fix access registers for instructions with ModR/M
+    byte and Mod = 3
+  * KVM: MMU: Add rmap_next(), a helper for walking kvm rmaps
+  * KVM: MMU: Keep a reverse mapping of non-writable translations
+  * KVM: MMU: Make gfn_to_page() always safe
+  * KVM: Partial swapping of guest memory
+  * KVM: VMX: Initialize vcpu with preemption enabled
+  * KVM: Use virtual cpu accounting if available for guest times.
+  * KVM: Move kvm_guest_exit() after local_irq_enable()
+  * KVM: MMU: Fix dirty bit pte gpa calculation
+  * KVM: Allocate userspace memory for older userspace
+  * KVM: Portability: Split kvm_vcpu into arch dependent and independent
+    parts (part 1)
+  * KVM: Fix local apic timer divide by zero
+  * KVM: Move vmx_vcpu_reset() out of vmx_vcpu_setup()
+  * KVM: Add a might_sleep() annotation to gfn_to_page()
+  * KVM: VMX: vmx_vcpu_setup(): remove unused variable.
+  * KVM: Per-architecture hypercall definitions
+  * KVM: Use new smp_call_function_mask() in kvm_flush_remote_tlbs()
+  * KVM: Unmap kernel-allocated memory on slot destruction
+  * KVM: Export memory slot allocation mechanism
+  * KVM: Add kernel-internal memory slots
+  * KVM: Add ioctl to tss address from userspace,
+  * KVM: x86 emulator: fix 'push imm8' emulation
+  * KVM: VMX: Let gcc to choose which registers to save (x86_64)
+  * KVM: VMX: Let gcc to choose which registers to save (i386)
+  * KVM: SVM: Let gcc to choose which registers to save (x86_64)
+  * KVM: SVM: Let gcc to choose which registers to save (i386)
+  * KVM: x86 emulator: invd instruction
+  * KVM: SVM: Intercept the 'invd' and 'wbinvd' instructions
+  * KVM: x86 emulator: don't depend on cr2 for mov abs emulation
+  * KVM: Move page fault processing to common code
+  * KVM: MMU: Topup the mmu memory preallocation caches before emulating an
+    insn
+  * KVM: Portability: Split kvm_vm_ioctl v3
+  * KVM: Portability: Move memory segmentation to x86.c
+  * KVM: Portability: move get/set_apic_base to x86.c
+  * KVM: Portability: Move control register helper functions to x86.c
+  * KVM: VMX: Enable memory mapped TPR shadow (FlexPriority)
+  * KVM: Fix gfn_to_page() acquiring mmap_sem twice
+  * KVM: Portability: Move kvm_get/set_msr[_common] to x86.c
+  * KVM: Portability: Move x86 emulation and mmio device hook to x86.c
+  * KVM: Portability: Move pio emulation functions to x86.c
+  * KVM: x86 emulator: Extract the common code of SrcReg and DstReg
+  * KVM: x86 emulator: centralize decoding of one-byte register access
+    insns
+  * KVM: Simplify decode_register_operand() calling convention
+  * KVM: Make mark_page_dirty() work for aliased pages too.
+  * KVM: x86 emulator: Hoist modrm and abs decoding into separate functions
+  * KVM: Portability: Make exported debugfs data architecture-specific
+  * KVM: Portability: Move x86 instruction emulation code to x86.c
+  * KVM: Portability: Move x86 FPU handling to x86.c
+  * KVM: Portability: Move x86 vcpu ioctl handlers to x86.c
+  * KVM: x86 emulator: Move one-byte insns with reg operand into one-byte
+    section
+  * KVM: VMX: Fix repeated allocation of apic access page on smp
+  * KVM: SVM: Fix SMP with kernel apic
+  * KVM: Add make_page_dirty() to kvm_clear_guest_page()
+  * KVM: SVM: Defer nmi processing until switch to host state is complete
+  * KVM: VMX: Avoid reloading host efer on cpus that don't have it
+  * KVM: VMX: Use vmx to inject real interrupts
+  * KVM: Go back to atomically injecting interrupts
+  * KVM: VMX: Comment VMX primary/secondary exec ctl definitions
+  * KVM: VMX: wbinvd exiting
+  * KVM: x86 emulator: fix JMP_REL
+  * KVM: x86 emulator: fix the saving of of the eip value
+  * KVM: x86 emulator: remove 8 bytes operands emulator for call near
+    instruction
+  * KVM: Simplify CPU_TASKS_FROZEN cpu notifier handling
+  * KVM: add kvm_is_error_hva()
+  * KVM: introduce gfn_to_hva()
+  * KVM: Change kvm_{read,write}_guest() to use copy_{from,to}_user()
+  * KVM: Portability: Move some includes to x86.c
+  * KVM: Portability: Move kvm_x86_ops to x86.c
+  * KVM: Portability: Add vcpu and hardware management arch hooks
+  * KVM: Portability: Combine kvm_init and kvm_init_x86
+  * KVM: Portability: Move x86 specific code from kvm_init() to kvm_arch()
+  * KVM: x86 emulator: modify 'lods', and 'stos' not to depend on CR2
+  * KVM: Portability: move KVM_CHECK_EXTENSION
+  * KVM: VMX: Consolidate register usage in vmx_vcpu_run()
+  * KVM: Portability: Make kvm_vcpu_ioctl_translate arch dependent
+  * KVM: x86 emulator: Rename 'cr2' to 'memop'
+  * KVM: Remove ptr comparisons to 0
+  * KVM: Remove __init attributes for kvm_init_debug and kvm_init_msr_list
+  * KVM: Portability: Add two hooks to handle kvm_create and destroy vm
+  * KVM: Replace 'light_exits' stat with 'host_state_reload'
+  * KVM: Add fpu_reload counter
+  * KVM: Add instruction emulation statistics
+  * KVM: Extend stats support for VM stats
+  * KVM: MMU: Add some mmu statistics
+  * KVM: x86 emulator: Use emulator_write_emulated and not
+    emulator_write_std
+  * KVM: Make unloading of FPU state when putting vcpu arch-independent
+  * KVM: SVM: Disable Lazy FPU optimization
+  * KVM: Portability: Move kvm_vcpu_ioctl_get_dirty_log to arch-specific 
+    file
+  * KVM: Portability: MMU initialization and teardown split
+  * KVM: Portability: Move some macro definitions from kvm.h to x86.h
+  * KVM: Portability: Move struct kvm_x86_ops definition to x86.h
+  * KVM: Portability: Move vcpu regs enumeration definition to x86.h
+  * KVM: Move some static inline functions out from kvm.h into x86.h
+  * KVM: Portability: Move some function declarations to x86.h
+  * KVM: VMX: Force seg.base == (seg.sel << 4) in real	mode
+  * KVM: MMU: Change guest pte access to kvm_{read,write}_guest()
+  * kvm: simplify kvm_clear_guest_page()
+  * KVM: Add missing #include <asm/pgtable.h>
+  * KVM: MMU: Remove unused variable
+  * KVM: Remove unused "rmap_overflow" variable
+  * KVM: Correct consistent typo: "destory" -> "destroy"
+  * KVM: Move misplaced comment
+  * KVM: Portability: Move kvm_memory_alias to asm/kvm.h
+  * KVM: Portability: Move x86 pic strutctures
+  * KVM: Portability: Move kvm_regs to <asm/kvm.h>
+  * KVM: Portability: Move structure lapic_state to <asm/kvm.h>
+  * KVM: Portability: Move kvm_segment & kvm_dtable structure to 
+    <asm/kvm.h>
+  * KVM: Portability: Move kvm_sregs and msr structures to <asm/kvm.h>
+  * KVM: Portability: Move cpuid structures to <asm/kvm.h>
+  * KVM: Export include/asm-x86/kvm.h
+  * KVM: MMU: Fix potential memory leak with smp real-mode
+  * KVM: MMU: Selectively set PageDirty when releasing guest memory
+  * KVM: x86 emulator: retire ->write_std()
+  * KVM: x86 emulator: prefetch up to 15 bytes of the instruction executed
+  * KVM: SVM: Fix FPU leak and re-enable lazy FPU switching
+  * KVM: Recalculate mmu pages needed for every memory region change
+  * KVM: Portability:  Split kvm_set_memory_region() to have an arch
+    callout
+  * KVM: Split vcpu creation to avoid vcpu_load() before preemption setup
+  * KVM: MMU: Implement guest page fault bypass for nonpae
+  * KVM: Add statistic for remote tlb flushes
+  * KVM: MMU: Avoid unnecessary remote tlb flushes when guest updates a pte
+  * KVM: Add parentheses to silence gcc
+  * KVM: Don't bother the mmu if cr3 load doesn't change cr3
+  * KVM: MMU: Code cleanup
+  * KVM: MMU: Introduce and use gpte_to_gfn()
+  * KVM: MMU: Move pse36 handling to the guest walker
+  * KVM: MMU: Remove extra gaddr parameter from set_pte_common()
+  * KVM: MMU: Remove set_pde()
+  * KVM: MMU: Adjust page_header_update_slot() to accept a gfn instead of a
+    gpa
+  * KVM: MMU: Introduce gfn_to_gpa()
+  * KVM: MMU: Simplify nonpaging_map()
+  * KVM: MMU: Remove gva_to_hpa()
+  * KVM: Remove gpa_to_hpa()
+  * KVM: MMU: Rename variable of type 'struct kvm_mmu_page *'
+  * KVM: MMU: Rename 'release_page'
+  * KVM: Disallow fork() and similar games when using a VM
+  * KVM: Enhance guest cpuid management
+  * KVM: Replace private 'struct segment descriptor' by x86's desc_struct
+  * KVM: Remove segment_descriptor, part 2
+  * KVM: Fix compile error on i386
+  * KVM: VMX: Read & store IDT_VECTORING_INFO_FIELD
+  * KVM: Fix faults during injection of real-mode interrupts
+  * KVM: x86 emulator: Fix instruction fetch cache hit check
+  * KVM: VMX: Remove the secondary execute control dependency on irqchip
+  * KVM: Portability: Move unalias_gfn to arch dependent file
+  * KVM: x86 emulator: Make a distinction between repeat prefixes F3 and F2
+  * KVM: x86 emulator: address size and operand size overrides are sticky
+  * KVM: Remove desc.h include in kvm_main.c
+  * KVM: Revert segment_descriptor.h removal
+  * KVM: Remove misleading check for mmio during event injection
+  * KVM: MMU: mark pages that were inserted to the shadow pages table as
+    accessed
+  * KVM: x86 emulator: rename REP_REPE_PREFIX
+  * KVM: x86 emulator: cmps instruction
+  * KVM: Add ifdef in irqchip struct for x86 only structures
+  * KVM: Fix cpuid2 killing 32-bit guests on non-NX machines
+  * KVM: x86 emulator: Move rep processing before instruction execution
+  * KVM: x86 emulator: unify two switches
+  * KVM: x86 emulator: unify four switch statements into two
+  * KVM: Don't bypass the mmu if in pae and pdptrs changed
+  * KVM: Portability: Move KVM_INTERRUPT vcpu ioctl to x86.c
+  * KVM: Correct kvm_init() error paths not freeing bad_pge.
+  * KVM: Export include/linux/kvm.h only if $ARCH actually supports KVM
+  * KVM: SVM: Remove KVM specific defines for MSR_EFER
+  * KVM: Replace kvm_lapic with kvm_vcpu in ioapic/lapic interface
+  * KVM: Replace dest_Lowest_Prio and dest_Fixed with self-defined macros
+  * KVM: Extend ioapic code to support iosapic
+  * KVM: Portability: Move address types to their own header file
+  * KVM: Portability: Move IO device definitions to its own header file
+  * KVM: Portability: Stop including x86-specific headers in kvm_main.c
+  * KVM: Portability: Create kvm_arch_vcpu_runnable() function
+  * KVM: Convert KVM from ->nopage() to ->fault()
+  * KVM: MMU: Remove unused prev_shadow_ent variable from fetch()
+  * KVM: Generalize exception injection mechanism
+  * KVM: Replace page fault injection by the generalized exception queue
+  * KVM: Replace #GP injection by the generalized exception queue
+  * KVM: Use generalized exception queue for injecting #UD
+  * KVM: x86 emulator: fix eflags preparation for emulation
+  * KVM: VMX: Avoid exit when setting cr8 if the local apic is in the
+    kernel
+  * KVM: SVM: Emulate read/write access to cr8
+  * KVM: x86 emulator: Fix stack instructions on 64-bit mode
+  * KVM: SVM: Trap access to the cr8 register
+  * KVM: VMX: Fix cr8 exit optimization
+  * KVM: MMU: Use cmpxchg for pte updates on walk_addr()
+  * KVM: MMU: Simplify calculation of pte access
+  * KVM: MMU: Set nx bit correctly on shadow ptes
+  * KVM: MMU: Move pte access calculation into a helper function
+  * KVM: MMU: Fix inherited permissions for emulated guest pte updates
+  * KVM: MMU: No need to pick up nx bit from guest pte
+  * KVM: MMU: Pass pte dirty flag to set_pte() instead of calculating it
+    on-site
+  * KVM: MMU: Remove walker argument to set_pte()
+  * KVM: MMU: Move set_pte() into guest paging mode independent code
+  * KVM: MMU: Adjust mmu_set_spte() debug code for gpte removal
+  * KVM: MMU: Use mmu_set_spte() for real-mode shadows
+  * KVM: SVM: Exit to userspace if write to cr8 and not using in-kernel
+    apic
+  * KVM: SVM: support writing 0 to K8 performance counter control registers
+  * KVM: MMU: Fix kunmap_atomic() call in cmpxchg_gpte()
+  * KVM: MMU: Fix SMP shadow instantiation race
+  * KVM: LAPIC: minor debugging compile fix
+  * KVM: MMU: emulated cmpxchg8b should be atomic on i386
+  * KVM: Fix bad kunmap_atomic() paramerter inm cmpxchg emulation
+  * KVM: Make cmpxchg emulation compile on i386
+  * KVM: Another cmpxchg i386 compile fix
+  * KVM: Another cmpxchg emulation compile fix
+  * KVM: Another cmpxchg emulation compile fix
+  * KVM: Portability: Move kvm{pic,ioapic} accesors to x86 specific code
+  * KVM: Portability: Introduce kvm_vcpu_arch
+  * KVM: Portability: Split mmu-related static inline functions to mmu.h
+  * KVM: Portability: Move kvm_vcpu definition back to kvm.h
+  * KVM: Portability: Expand the KVM_VCPU_COMM in kvm_vcpu structure.
+  * KVM: Portability: Move kvm_vcpu_stat to x86.h
+  * KVM: Portability: Move memslot aliases to new struct kvm_arch
+  * KVM: Portability: Move mmu-related fields to kvm_arch
+  * KVM: Portability: move vpic and vioapic to kvm_arch
+  * KVM: Portability: Move round_robin_prev_vcpu and tss_addr to kvm_arch
+  * KVM: Portability: Move kvm_vm_stat to x86.h
+  * KVM: VMX: Add printk_ratelimit in vmx_intr_assist
+  * KVM: Move arch dependent files to new directory arch/x86/kvm/
+  * KVM: Move drivers/kvm/* to virt/kvm/
+  * KVM: Fix compile error in asm/kvm_host.h
+  * KVM: Move irqchip declarations into new ioapic.h and lapic.h
+  * KVM: Move ioapic code to common directory.
+  * KVM: Move kvm_vcpu_kick() to x86.c
+  * KVM: Expose ioapic to ia64 save/restore APIs
+  * KVM: MMU: Coalesce remote tlb flushes
+  * KVM: MMU: Add cache miss statistic
+  * KVM: Print data for unimplemented wrmsr
+  * KVM: Ensure pages are copied on write
+  * KVM: MMU: Fix cmpxchg8b emulation on i386 (again)
+  * KVM: x86 emulator: Add vmmcall/vmcall to x86_emulate (v3)
+  * KVM: Refactor hypercall infrastructure (v3)
+  * KVM: x86 emulator: remove unused functions
+  * KVM: x86 emulator: move all x86_emulate_memop() to a structure
+  * KVM: x86 emulator: move all decoding process to function
+    x86_decode_insn()
+  * KVM: emulate_instruction() calls now x86_decode_insn() and
+    x86_emulate_insn()
+  * KVM: Call x86_decode_insn() only when needed
+  * KVM: VMX: Further reduce efer reloads
+  * KVM: Allow not-present guest page faults to bypass kvm
+  * KVM: MMU: Make flooding detection work when guest page faults are
+    bypassed
+  * KVM: MMU: Ignore reserved bits in cr3 in non-pae mode
+  * KVM: x86 emulator: split some decoding into functions for readability
+  * KVM: x86 emulator: remove _eflags and use directly ctxt->eflags.
+  * KVM: x86 emulator: Remove no_wb, use dst.type = OP_NONE instead
+  * KVM: x86_emulator: no writeback for bt
+  * KVM: Purify x86_decode_insn() error case management
+  * KVM: x86 emulator: Any legacy prefix after a REX prefix nullifies its
+    effect
+  * KVM: VMX: Don't clear the vmcs if the vcpu is not loaded on any
+    processor
+  * KVM: VMX: Simplify vcpu_clear()
+  * KVM: Remove the usage of page->private field by rmap
+  * KVM: Add general accessors to read and write guest memory
+  * KVM: Allow dynamic allocation of the mmu shadow cache size
+  * KVM: Add kvm_free_lapic() to pair with kvm_create_lapic()
+  * KVM: Hoist kvm_create_lapic() into kvm_vcpu_init()
+  * KVM: Remove gratuitous casts from lapic.c
+  * KVM: CodingStyle cleanup
+  * KVM: Support assigning userspace memory to the guest
+  * KVM: Move x86 msr handling to new files x86.[ch]
+  * KVM: MMU: Clean up MMU functions to take struct kvm when appropriate
+  * KVM: MMU: More struct kvm_vcpu -> struct kvm cleanups
+  * KVM: Move guest pte dirty bit management to the guest pagetable walker
+  * KVM: MMU: Fix nx access bit for huge pages
+  * KVM: MMU: Disable write access on clean large pages
+  * KVM: MMU: Instantiate real-mode shadows as user writable shadows
+  * KVM: MMU: Move dirty bit updates to a separate function
+  * KVM: MMU: When updating the dirty bit, inform the mmu about it
+  * KVM: Portability: split kvm_vcpu_ioctl
+  * KVM: apic round robin cleanup
+  * KVM: Add some \n in ioapic_debug()
+  * KVM: Move apic timer interrupt backlog processing to common code
+  * KVM: Rename KVM_TLB_FLUSH to KVM_REQ_TLB_FLUSH
+  * KVM: x86 emulator: Implement emulation of instruction: inc & dec
+  * KVM: MMU: Simplify page table walker
+  * KVM: x86 emulator: cmc, clc, cli, sti
+  * KVM: MMU: Add rmap_next(), a helper for walking kvm rmaps
+  * KVM: MMU: Keep a reverse mapping of non-writable translations
+  * KVM: MMU: Make gfn_to_page() always safe
+  * KVM: MMU: Partial swapping of guest memory
+  * KVM: Use virtual cpu accounting if available for guest times.
+  * KVM: Allocate userspace memory for older userspace
+  * KVM: Portability: Split kvm_vcpu into arch dependent and independent
+    parts (part 1)
+  * KVM: Move vmx_vcpu_reset() out of vmx_vcpu_setup()
+  * KVM: Add a might_sleep() annotation to gfn_to_page()
+  * KVM: Export PIC reset for kernel device reset
+  * KVM: Split IOAPIC reset function and export for kernel RESET
+  * KVM: Per-architecture hypercall definitions
+  * KVM: Unmap kernel-allocated memory on slot destruction
+  * KVM: Export memory slot allocation mechanism
+  * KVM: Add kernel-internal memory slots
+  * KVM: Add ioctl to tss address from userspace,
+  * KVM: VMX: Let gcc to choose which registers to save (x86_64)
+  * KVM: VMX: Let gcc to choose which registers to save (i386)
+  * KVM: SVM: Let gcc to choose which registers to save (x86_64)
+  * KVM: SVM: Let gcc to choose which registers to save (i386)
+  * KVM: x86 emulator: don't depend on cr2 for mov abs emulation
+  * KVM: Move page fault processing to common code
+  * KVM: MMU: Topup the mmu memory preallocation caches before emulating an
+    insn
+  * KVM: Portability: Split kvm_vm_ioctl v3
+  * KVM: Portability: Move memory segmentation to x86.c
+  * KVM: Portability: move get/set_apic_base to x86.c
+  * KVM: Portability: Move control register helper functions to x86.c
+  * KVM: VMX: Enable memory mapped TPR shadow (FlexPriority)
+  * KVM: Fix gfn_to_page() acquiring mmap_sem twice
+  * KVM: Portability: Move kvm_get/set_msr[_common] to x86.c
+  * KVM: Portability: Move x86 emulation and mmio device hook to x86.c
+  * KVM: Portability: Move pio emulation functions to x86.c
+  * KVM: x86 emulator: Extract the common code of SrcReg and DstReg
+  * KVM: x86 emulator: centralize decoding of one-byte register access
+    insns
+  * KVM: Simplify decode_register_operand() calling convention
+  * KVM: Make mark_page_dirty() work for aliased pages too.
+  * KVM: x86 emulator: Hoist modrm and abs decoding into separate functions
+  * KVM: Portability: Make exported debugfs data architecture-specific
+  * KVM: Portability: Move x86 instruction emulation code to x86.c
+  * KVM: Portability: Move x86 FPU handling to x86.c
+  * KVM: Portability: Move x86 vcpu ioctl handlers to x86.c
+  * KVM: Add make_page_dirty() to kvm_clear_guest_page()
+  * KVM: VMX: Use vmx to inject real-mode interrupts
+  * KVM: VMX: Read & store IDT_VECTORING_INFO_FIELD
+  * KVM: Fix faults during injection of real-mode interrupts
+  * KVM: VMX: Comment VMX primary/secondary exec ctl definitions
+  * KVM: VMX: wbinvd exiting
+  * KVM: x86 emulator: remove 8 bytes operands emulator for call near
+    instruction
+  * KVM: Simplify CPU_TASKS_FROZEN cpu notifier handling
+  * KVM: add kvm_is_error_hva()
+  * KVM: introduce gfn_to_hva()
+  * KVM: Change kvm_{read,write}_guest() to use copy_{from,to}_user()
+  * KVM: Portability: Move some includes to x86.c
+  * KVM: Portability: Move kvm_x86_ops to x86.c
+  * KVM: Portability: Add vcpu and hardware management arch hooks
+  * KVM: Portability: Combine kvm_init and kvm_init_x86
+  * KVM: Portability: Move x86 specific code from kvm_init() to kvm_arch()
+  * KVM: x86 emulator: modify 'lods', and 'stos' not to depend on CR2
+  * KVM: Portability: move KVM_CHECK_EXTENSION
+  * KVM: VMX: Consolidate register usage in vmx_vcpu_run()
+  * KVM: Portability: Make kvm_vcpu_ioctl_translate arch dependent
+  * KVM: Remove ptr comparisons to 0
+  * KVM: Remove __init attributes for kvm_init_debug and kvm_init_msr_list
+  * KVM: Portability: Add two hooks to handle kvm_create and destroy vm
+  * KVM: Replace 'light_exits' stat with 'host_state_reload'
+  * KVM: Add fpu_reload counter
+  * KVM: Add instruction emulation statistics
+  * KVM: Extend stats support for VM stats
+  * KVM: MMU: Add some mmu statistics
+  * KVM: Make unloading of FPU state when putting vcpu arch-independent
+  * KVM: Portability: Move kvm_vcpu_ioctl_get_dirty_log to arch-specific 
+    file
+  * KVM: Portability: MMU initialization and teardown split
+  * KVM: Portability: Move some macro definitions from kvm.h to x86.h
+  * KVM: Portability: Move struct kvm_x86_ops definition to x86.h
+  * KVM: Portability: Move vcpu regs enumeration definition to x86.h
+  * KVM: Move some static inline functions out from kvm.h into x86.h
+  * KVM: Portability: Move some function declarations to x86.h
+  * KVM: VMX: Force seg.base == (seg.sel << 4) in real	mode
+  * KVM: MMU: Change guest pte access to kvm_{read,write}_guest()
+  * KVM: Simplify kvm_clear_guest_page()
+  * KVM: Add missing #include <asm/pgtable.h>
+  * KVM: MMU: Remove unused variable
+  * KVM: Remove unused "rmap_overflow" variable
+  * KVM: Correct consistent typo: "destory" -> "destroy"
+  * KVM: Move misplaced comment
+  * KVM: Portability: Move kvm_memory_alias to asm/kvm.h
+  * KVM: Portability: Move x86 pic strutctures
+  * KVM: Portability: Move kvm_regs to <asm/kvm.h>
+  * KVM: Portability: Move structure lapic_state to <asm/kvm.h>
+  * KVM: Portability: Move kvm_segment & kvm_dtable structure to 
+    <asm/kvm.h>
+  * KVM: Portability: Move kvm_sregs and msr structures to <asm/kvm.h>
+  * KVM: Portability: Move cpuid structures to <asm/kvm.h>
+  * KVM: Export include/asm-x86/kvm.h
+  * KVM: MMU: Fix potential memory leak with smp real-mode
+  * KVM: MMU: Selectively set PageDirty when releasing guest memory
+  * KVM: x86 emulator: retire ->write_std()
+  * KVM: x86 emulator: prefetch up to 15 bytes of the instruction executed
+  * KVM: Recalculate mmu pages needed for every memory region change
+  * KVM: Portability:  Split kvm_set_memory_region() to have an arch
+    callout
+  * KVM: Split vcpu creation to avoid vcpu_load() before preemption setup
+  * KVM: MMU: Implement guest page fault bypass for nonpae
+  * KVM: Add statistic for remote tlb flushes
+  * KVM: MMU: Avoid unnecessary remote tlb flushes when guest updates a pte
+  * KVM: Don't bother the mmu if cr3 load doesn't change cr3
+  * KVM: MMU: Code cleanup
+  * KVM: MMU: Introduce and use gpte_to_gfn()
+  * KVM: MMU: Move pse36 handling to the guest walker
+  * KVM: MMU: Remove extra gaddr parameter from set_pte_common()
+  * KVM: MMU: Remove set_pde()
+  * KVM: MMU: Adjust page_header_update_slot() to accept a gfn instead of a
+    gpa
+  * KVM: MMU: Introduce gfn_to_gpa()
+  * KVM: MMU: Simplify nonpaging_map()
+  * KVM: MMU: Remove gva_to_hpa()
+  * KVM: Remove gpa_to_hpa()
+  * KVM: MMU: Rename variables of type 'struct kvm_mmu_page *'
+  * KVM: MMU: Rename 'release_page'
+  * KVM: Disallow fork() and similar games when using a VM
+  * KVM: Enhance guest cpuid management
+  * KVM: VMX: Remove the secondary execute control dependency on irqchip
+  * KVM: Portability: Move unalias_gfn to arch dependent file
+  * KVM: x86 emulator: Make a distinction between repeat prefixes F3 and F2
+  * KVM: x86 emulator: address size and operand size overrides are sticky
+  * KVM: Remove misleading check for mmio during event injection
+  * KVM: MMU: mark pages that were inserted to the shadow pages table as
+    accessed
+  * KVM: x86 emulator: rename REP_REPE_PREFIX
+  * KVM: x86 emulator: Rename 'cr2' to 'memop'
+  * KVM: x86 emulator: cmps instruction
+  * KVM: Add ifdef in irqchip struct for x86 only structures
+  * KVM: Fix cpuid2 killing 32-bit guests on non-NX machines
+  * KVM: x86 emulator: Move rep processing before instruction execution
+  * KVM: x86 emulator: unify two switches
+  * KVM: x86 emulator: unify four switch statements into two
+  * KVM: Portability: Move KVM_INTERRUPT vcpu ioctl to x86.c
+  * KVM: Correct kvm_init() error paths not freeing bad_pge.
+  * KVM: Export include/linux/kvm.h only if $ARCH actually supports KVM
+  * KVM: SVM: Remove KVM specific defines for MSR_EFER
+  * KVM: Replace kvm_lapic with kvm_vcpu in ioapic/lapic interface
+  * KVM: Replace dest_Lowest_Prio and dest_Fixed with self-defined macros
+  * KVM: Extend ioapic code to support iosapic
+  * KVM: Portability: Move address types to their own header file
+  * KVM: Portability: Move IO device definitions to its own header file
+  * KVM: Portability: Stop including x86-specific headers in kvm_main.c
+  * KVM: Portability: Create kvm_arch_vcpu_runnable() function
+  * KVM: Convert KVM from ->nopage() to ->fault()
+  * KVM: MMU: Remove unused prev_shadow_ent variable from fetch()
+  * KVM: Generalize exception injection mechanism
+  * KVM: Replace page fault injection by the generalized exception queue
+  * KVM: Replace #GP injection by the generalized exception queue
+  * KVM: Use generalized exception queue for injecting #UD
+  * KVM: x86 emulator: fix eflags preparation for emulation
+  * KVM: VMX: Avoid exit when setting cr8 if the local apic is in the
+    kernel
+  * KVM: SVM: Emulate read/write access to cr8
+  * KVM: x86 emulator: Fix stack instructions on 64-bit mode
+  * KVM: SVM: Trap access to the cr8 register
+  * KVM: VMX: Fix cr8 exit optimization
+  * KVM: MMU: Use cmpxchg for pte updates on walk_addr()
+  * KVM: MMU: Simplify calculation of pte access
+  * KVM: MMU: Set nx bit correctly on shadow ptes
+  * KVM: MMU: Move pte access calculation into a helper function
+  * KVM: MMU: Fix inherited permissions for emulated guest pte updates
+  * KVM: MMU: No need to pick up nx bit from guest pte
+  * KVM: MMU: Pass pte dirty flag to set_pte() instead of calculating it
+    on-site
+  * KVM: MMU: Remove walker argument to set_pte()
+  * KVM: MMU: Move set_pte() into guest paging mode independent code
+  * KVM: MMU: Adjust mmu_set_spte() debug code for gpte removal
+  * KVM: MMU: Use mmu_set_spte() for real-mode shadows
+  * KVM: SVM: Exit to userspace if write to cr8 and not using in-kernel
+    apic
+  * KVM: MMU: Fix SMP shadow instantiation race
+  * KVM: LAPIC: minor debugging compile fix
+  * KVM: SVM: support writing 0 to K8 performance counter control registers
+  * KVM: MMU: emulated cmpxchg8b should be atomic on i386
+  * KVM: Portability: Move kvm{pic,ioapic} accesors to x86 specific code
+  * KVM: Portability: Introduce kvm_vcpu_arch
+  * KVM: Portability: Split mmu-related static inline functions to mmu.h
+  * KVM: Portability: Move kvm_vcpu definition back to kvm.h
+  * KVM: Portability: Expand the KVM_VCPU_COMM in kvm_vcpu structure.
+  * KVM: Portability: Move kvm_vcpu_stat to x86.h
+  * KVM: Portability: Move memslot aliases to new struct kvm_arch
+  * KVM: Portability: Move mmu-related fields to kvm_arch
+  * KVM: Portability: move vpic and vioapic to kvm_arch
+  * KVM: Portability: Move round_robin_prev_vcpu and tss_addr to kvm_arch
+  * KVM: Portability: Move kvm_vm_stat to x86.h
+  * KVM: VMX: Add printk_ratelimit in vmx_intr_assist
+  * KVM: Move arch dependent files to new directory arch/x86/kvm/
+  * KVM: Move drivers/kvm/* to virt/kvm/
+  * KVM: Move irqchip declarations into new ioapic.h and lapic.h
+  * KVM: Move ioapic code to common directory.
+  * KVM: Move kvm_vcpu_kick() to x86.c
+  * KVM: Expose ioapic to ia64 save/restore APIs
+  * KVM: MMU: Coalesce remote tlb flushes
+  * KVM: MMU: Add cache miss statistic
+  * KVM: Print data for unimplemented wrmsr
+  * KVM: Ensure pages are copied on write
+  * KVM: local APIC TPR access reporting facility
+  * KVM: Accelerated apic support
+  * KVM: Disable vapic support on Intel machines with FlexPriority
+  * KVM: MMU: Concurrent guest walkers
+  * KVM: Add kvm_read_guest_atomic()
+  * KVM: MMU: Avoid calling gfn_to_page() in mmu_set_spte()
+  * KVM: MMU: Switch to mmu spinlock
+  * KVM: MMU: Move kvm_free_some_pages() into critical section
+  * KVM: MMU: Broaden scope of mmap_sem to include actual mapping
+  * KVM: MMU: Fix recursive locking of mmap_sem()
+  * KVM: Fix unbalanced mmap_sem operations in cmpxchg8b emulation
+  * KVM: Mark vapic page as dirty for save/restore/migrate
+  * KVM: x86 emulator: Only allow VMCALL/VMMCALL trapped by #UD
+  * KVM: MMU: Update shadow ptes on partial guest pte writes
+  * KVM: MMU: Simplify hash table indexing
+  * KVM: Portability: Move kvm_fpu to asm-x86/kvm.h
+  * KVM: MMU: Fix dirty page setting for pages removed from rmap
+  * KVM: Initialize the mmu caches only after verifying cpu support
+  * KVM: Fix unbounded preemption latency
+  * KVM: Put kvm_para.h include outside __KERNEL__
+  * KVM: Move apic timer migration away from critical section
+  * KVM: SVM: Fix lazy FPU switching
+  * KVM: MMU: Fix gpa truncation when reading a pte
+  * [GFS2] Handle multiple glock demote requests
+  * [GFS2] Clean up internal read function
+  * [GFS2] Use ->page_mkwrite() for mmap()
+  * [GFS2] Remove useless i_cache from inodes
+  * [GFS2] Remove unused field in struct gfs2_inode
+  * [GFS2] Add gfs2_is_writeback()
+  * [GFS2] Introduce gfs2_set_aops()
+  * [GFS2] Split gfs2_writepage into three cases
+  * [GFS2] Add writepages for GFS2 jdata
+  * [GFS2] Don't hold page lock when starting transaction
+  * [GFS2] Use correct include file in ops_address.c
+  * [GFS2] Remove unused variables
+  * [GFS2] Remove "reclaim limit"
+  * [GFS2] Add sync_page to metadata address space operations
+  * [GFS2] Reorder writeback for glock sync
+  * [GFS2] Remove flags no longer required
+  * [GFS2] Given device ID rather than s_id in "id" sysfs file
+  * [GFS2] check kthread_should_stop when waiting
+  * [GFS2] Don't add glocks to the journal
+  * [GFS2] Use atomic_t for journal free blocks counter
+  * [GFS2] Move gfs2_logd into log.c
+  * [GFS2] Don't periodically update the jindex
+  * [GFS2] Check for installation of mount helpers for DLM mounts
+  * [GFS2] tidy up error message
+  * [GFS2] Fix runtime issue with UP kernels
+  * [GFS2] remove unnecessary permission checks
+  * [GFS2] Fix build warnings
+  * [GFS2] Remove unrequired code
+  * [GFS2] Remove lock methods for lock_nolock protocol
+  * [GFS2] patch to check for recursive lock requests in gfs2_rename code
+    path
+  * [GFS2] Remove unused variable
+  * [GFS2] use pid for plock owner for nfs clients
+  * [GFS2] Remove function gfs2_get_block
+  * [GFS2] Journal extent mapping
+  * [GFS2] Get rid of useless "found" variable in quota.c
+  * [GFS2] Run through full bitmaps quicker in gfs2_bitfit
+  * [GFS2] Reorganize function gfs2_glmutex_lock
+  * [GFS2] Only fetch the dinode once in block_map
+  * [GFS2] Function meta_read optimization
+  * [GFS2] Incremental patch to fix compiler warning
+  * [GFS2] Eliminate the no longer needed sd_statfs_mutex
+  * [GFS2] Minor correction
+  * [GFS2] Fix log block mapper
+  * [GFS2] Remove unused variable
+  * [GFS2] Allow page migration for writeback and ordered pages
+  * [GFS2] Initialize extent_list earlier
+  * [GFS2] Fix problems relating to execution of files on GFS2
+  * [GFS2] Fix assert in log code
+  * [GFS2] Reduce inode size by moving i_alloc out of line
+  * [GFS2] Remove unneeded i_spin
+  * [GFS2] gfs2_alloc_required performance
+  * [GFS2] Fix write alloc required shortcut calculation
+  * [GFS2] Fix typo
+  * [GFS2] Fix page_mkwrite truncation race path
+  * [GFS2] Lockup on error
+  * [GFS2] Allow journal recovery on read-only mount
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Sun, 27 Jan 2008 20:37:18 -0700
+
+linux (2.6.24-5.9) hardy; urgency=low
+
+  [Amit Kucheria]
+
+  * Fix LPIA FTBFS due to virtio Ignore: yes
+
+  [Upstream Kernel Changes]
+
+  * ACPI: processor: Fix null pointer dereference in throttling
+  * [SPARC64]: Fix of section mismatch warnings.
+  * [SPARC64]: Fix section error in sparcspkr
+  * [SPARC]: Constify function pointer tables.
+  * [BLUETOOTH]: Move children of connection device to NULL before
+    connection down.
+  * [TULIP] DMFE: Fix SROM parsing regression.
+  * [IPV4]: Add missing skb->truesize increment in ip_append_page().
+  * iwlwifi: fix possible read attempt on ucode that is not available
+  * [NETNS]: Re-export init_net via EXPORT_SYMBOL.
+  * [INET]: Fix truesize setting in ip_append_data
+  * sis190: add cmos ram access code for the SiS19x/968 chipset pair
+  * sis190: remove duplicate INIT_WORK
+  * sis190: mdio operation failure is not correctly detected
+  * sis190: scheduling while atomic error
+  * Update ctime and mtime for memory-mapped files
+  * [SCSI] initio: fix module hangs on loading
+  * xen: disable vcpu_info placement for now
+  * agp/intel: add support for E7221 chipset
+  * drm/i915: add support for E7221 chipset
+  * DMI: move dmi_available declaration to linux/dmi.h
+  * DMI: create dmi_get_slot()
+  * ACPI: create acpi_dmi_dump()
+  * ACPI: on OSI(Linux), print needed DMI rather than requesting dmidecode
+    output
+  * ACPI: Delete Intel Customer Reference Board (CRB) from OSI(Linux) DMI
+    list
+  * ACPI: make _OSI(Linux) console messages smarter
+  * ACPI: Add ThinkPad R61, ThinkPad T61 to OSI(Linux) white-list
+  * ACPI: DMI blacklist to reduce console warnings on OSI(Linux) systems.
+  * ACPI: EC: fix dmesg spam regression
+  * ACPI: EC: add leading zeros to debug messages
+  * Pull bugzilla-9747 into release branch
+  * Pull bugzilla-8459 into release branch
+  * Pull bugzilla-9798 into release branch
+  * Pull dmi-2.6.24 into release branch
+  * [SPARC64]: Partially revert "Constify function pointer tables."
+  * lockdep: fix kernel crash on module unload
+  * sysctl: kill binary sysctl KERN_PPC_L2CR
+  * fix hugepages leak due to pagetable page sharing
+  * spi: omap2_mcspi PIO RX fix
+  * Linux 2.6.24
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Fri, 25 Jan 2008 01:44:27 -0700
+
+linux (2.6.24-5.8) hardy; urgency=low
+
+  [Alessio Igor Bogani]
+
+  * rt: Update to 2.6.24-rc8-rt1
+  * rt: Update configuration files
+
+  [Amit Kucheria]
+
+  * Asix: fix breakage caused in 2.6.24-rc7
+  * Add CONFIG_CPUSETS to server-related flavours
+    - LP: #182434
+
+  [Chuck Short]
+
+  * SAUCE: ata: blacklist FUJITSU MHW2160BH PL
+    - LP: #175834
+
+  [Kees Cook]
+
+  * AppArmor: updated patch series to upstream SVN 1079.
+
+  [Soren Hansen]
+
+  * Updated configs to enable virtio stuff Ignore: yes
+
+  [Stefan Bader]
+
+  * Enabled CONFIG_BSD_PROCESS_ACCT=y for sparc.
+    - LP: #176587
+  * Enable CONFIG_AUDITSYSCALL=y.
+    - LP: #140784
+  * Added CONFIG_AUDIT_SYSCALL=y to custom lpia(compat)
+  * Enabled CONFIG_HUGETLBFS=y for i386/server amd64/server and ia64.
+  * Lower priority of pnpacpi resource messages to warning level.
+    - LP: #159241
+  * Fix the messed up message level of pnpacpi parser.
+
+  [Tim Gardner]
+
+  * Start new release, bump ABI to -5
+  * Disabled iwlwifi preperatory to moving it to l-u-m.
+  * Enabled CONFIG_USB_SERIAL_KEYSPAN
+  * Disabled CONFIG_CGROUPS.
+  * Virtio config settings for -rt.
+  * Re-enable IWLWIFI in the kernel.
+  * Fixed -rt saa7134-core.c FTBS
+
+  [Upstream Kernel Changes]
+
+  * Input: Handle EV_PWR type of input caps in input_set_capability.
+  * Input: jornada680_kbd - fix default keymap
+  * increase PNP_MAX_PORT to 40 from 24
+  * sched: fix gcc warnings
+  * leds: Fix leds_list_lock locking issues
+  * leds: Fix locomo LED driver oops
+  * x86: fix asm-x86/byteorder.h for userspace export
+  * x86: fix asm-x86/msr.h for user-space export
+  * ACPI: EC: Enable boot EC before bus_scan
+  * ACPI: Make sysfs interface in ACPI power optional.
+  * fix lguest rmmod "bad pgd"
+  * slub: provide /proc/slabinfo
+  * [POWERPC] Fix build failure on Cell when CONFIG_SPU_FS=y
+  * slub: register slabinfo to procfs
+  * [SCSI] scsi_sysfs: restore prep_fn when ULD is removed
+  * Unify /proc/slabinfo configuration
+  * scsi: revert "[SCSI] Get rid of scsi_cmnd->done"
+  * restrict reading from /proc/<pid>/maps to those who share ->mm or can
+    ptrace pid
+  * Fix kernel/ptrace.c compile problem (missing "may_attach()")
+  * hwmon: (w83627ehf) Be more careful when changing VID input level
+  * NFS: Fix a possible Oops in fs/nfs/super.c
+  * NFSv4: Fix circular locking dependency in nfs4_kill_renewd
+  * NFS: add newline to kernel warning message in auth_gss code
+  * NFSv4: nfs4_open_confirm must not set the open_owner as confirmed on
+    error
+  * NFSv4: Fix open_to_lock_owner sequenceid allocation...
+  * gameport: don't export functions that are static inline
+  * Input: spitzkbd - fix suspend key handling
+  * Input: pass EV_PWR events to event handlers
+  * [ARM] 4735/1: Unbreak pxa25x suspend/resume
+  * IB/srp: Fix list corruption/oops on module reload
+  * Console is utf-8 by default
+  * [IA64] Update Altix BTE error return status patch
+  * [IA64] Update Altix nofault code
+  * [X25]: Add missing x25_neigh_put
+  * [XFRM]: Do not define km_migrate() if !CONFIG_XFRM_MIGRATE
+  * [CASSINI]: Fix endianness bug.
+  * [CASSINI]: Revert 'dont touch page_count'.
+  * [CASSINI]: Program parent Intel31154 bridge when necessary.
+  * [CASSINI]: Set skb->truesize properly on receive packets.
+  * [CASSINI]: Fix two obvious NAPI bugs.
+  * [CASSINI]: Bump driver version and release date.
+  * [INET]: Fix netdev renaming and inet address labels
+  * [CONNECTOR]: Return proper error code in cn_call_callback()
+  * [ISDN] i4l: 'NO CARRIER' message lost after ldisc flush
+  * [ISDN]: i4l: Fix DLE handling for i4l-audio
+  * fix: using joysticks in 32 bit applications on 64 bit systems
+  * [ARM] 4691/1: add missing i2c_board_info struct for at91rm9200
+  * hda_intel suspend latency: shorten codec read
+  * CPU hotplug: fix cpu_is_offline() on !CONFIG_HOTPLUG_CPU
+  * Linux 2.6.24-rc7
+  * sh: Fix argument page dcache flushing regression.
+  * V4L/DVB (6944a): Fix Regression VIDIOCGMBUF ioctl hangs on bttv driver
+  * V4L/DVB (6916): ivtv: udelay has to be changed *after* the eeprom was
+    read, not before
+  * [MIPS] Move inclusing of kernel/time/Kconfig menu to appropriate place
+  * [MIPS] Alchemy: Fix use of __init code bug exposed by modpost warning
+  * [MIPS] Fix IP32 breakage
+  * [MIPS] Assume R4000/R4400 newer than 3.0 don't have the mfc0 count bug
+  * [MIPS] Fix CONFIG_BOOT_RAW.
+  * ACPI: Reintroduce run time configurable max_cstate for !CPU_IDLE case
+  * core dump: real_parent ppid
+  * acct: real_parent ppid
+  * IB/mlx4: Fix value of pkey_index in QP1 completions
+  * IB/srp: Release transport before removing host
+  * x86: fix do_fork_idle section mismatch
+  * spi_bitbang: always grab lock with irqs blocked
+  * fat: optimize fat_count_free_clusters()
+  * KEYS: fix macro
+  * md: fix data corruption when a degraded raid5 array is reshaped
+  * xip: fix get_zeroed_page with __GFP_HIGHMEM
+  * eCryptfs: fix dentry handling on create error, unlink, and inode
+    destroy
+  * vmcoreinfo: add the array length of "free_list" for filtering free
+    pages
+  * dmi-id: fix for __you_cannot_kmalloc_that_much failure
+  * snd_mixer_oss_build_input(): fix for __you_cannot_kmalloc_that_much
+    failure with gcc-3.2
+  * Fix crash with FLAT_MEMORY and ARCH_PFN_OFFSET != 0
+  * hfs: handle more on-disk corruptions without oopsing
+  * pl2303: Fix mode switching regression
+  * futex: Prevent stale futex owner when interrupted/timeout
+  * [NIU]: Fix slowpath interrupt handling.
+  * [NIU]: Missing ->last_rx update.
+  * [NIU]: Fix potentially stuck TCP socket send queues.
+  * [NIU]: Update driver version and release date.
+  * [IPV4] raw: Strengthen check on validity of iph->ihl
+  * [IPV4] ipconfig: Fix regression in ip command line processing
+  * [NET]: Fix netx-eth.c compilation.
+  * [METH]: Fix MAC address handling.
+  * [TULIP]: NAPI full quantum bug.
+  * [ATM]: [nicstar] delay irq setup until card is configured
+  * [SCTP]: Fix the name of the authentication event.
+  * [SCTP]: Correctly handle AUTH parameters in unexpected INIT
+  * [SCTP]: Add back the code that accounted for FORWARD_TSN parameter in
+    INIT.
+  * [IRDA]: irda_create() nuke user triggable printk
+  * b43: Fix rxheader channel parsing
+  * [NET]: Do not grab device reference when scheduling a NAPI poll.
+  * [NET]: Add NAPI_STATE_DISABLE.
+  * [NET]: Do not check netif_running() and carrier state in ->poll()
+  * ssb: Fix probing of PCI cores if PCI and PCIE core is available
+  * mac80211: return an error when SIWRATE doesn't match any rate
+  * [NETXEN]: Fix ->poll() done logic.
+  * [NET]: Fix drivers to handle napi_disable() disabling interrupts.
+  * [NET]: Stop polling when napi_disable() is pending.
+  * [NET]: Make ->poll() breakout consistent in Intel ethernet drivers.
+  * [NET] Intel ethernet drivers: update MAINTAINERS
+  * [NET]: kaweth was forgotten in msec switchover of usb_start_wait_urb
+  * [IPV4] ROUTE: ip_rt_dump() is unecessary slow
+  * [NET]: Clone the sk_buff 'iif' field in __skb_clone()
+  * [LRO] Fix lro_mgr->features checks
+  * [NET]: mcs7830 passes msecs instead of jiffies to usb_control_msg
+  * [FORCEDETH]: Fix reversing the MAC address on suspend.
+  * [XFRM]: xfrm_algo_clone() allocates too much memory
+  * [SOCK]: Adds a rcu_dereference() in sk_filter
+  * [CONNECTOR]: Don't touch queue dev after decrement of ref count.
+  * [IPV6]: IPV6_MULTICAST_IF setting is ignored on link-local connect()
+  * [ATM]: Check IP header validity in mpc_send_packet
+  * show_task: real_parent
+  * [SCSI] qla1280: fix 32 bit segment code
+  * [NIU]: Support for Marvell PHY
+  * [NEIGH]: Fix race between neigh_parms_release and neightbl_fill_parms
+  * [IPV4] ROUTE: fix rcu_dereference() uses in /proc/net/rt_cache
+  * [AX25]: Kill user triggable printks.
+  * [ARM] pxa: silence warnings from cpu_is_xxx() macros
+  * [POWERPC] efika: add phy-handle property for fec_mpc52xx
+  * [ARM] vfp: fix fuitod/fsitod instructions
+  * [CRYPTO] padlock: Fix alignment fault in aes_crypt_copy
+  * rt2x00: Allow rt61 to catch up after a missing tx report
+  * rt2x00: Corectly initialize rt2500usb MAC
+  * rt2x00: Put 802.11 data on 4 byte boundary
+  * NFSv4: Give the lock stateid its own sequence queue
+  * sata_qstor: use hardreset instead of softreset
+  * libata-sff: PCI IRQ handling fix
+  * pata_pdc202xx_old: Further fixups
+  * pata_ixp4xx_cf: fix compilation introduced by ata_port_desc()
+    conversion
+  * libata-pmp: 4726 hates SRST
+  * libata-pmp: propagate timeout to host link
+  * libata: don't normalize UNKNOWN to NONE after reset
+  * Update kernel parameter document for libata DMA mode setting knobs.
+  * sata_sil24: prevent hba lockup when pass-through ATA commands are used
+  * ide: workaround suspend bug for ACPI IDE
+  * ide: fix cable detection for SATA bridges
+  * trm290: do hook dma_host_{on,off} methods (take 2)
+  * libata and starting/stopping ATAPI floppy devices
+  * ACPI : Not register gsi for PCI IDE controller in legacy mode
+  * ACPICA: fix acpi_serialize hang regression
+  * sh: Force __access_ok() to obey address space limit.
+  * [AX25] af_ax25: Possible circular locking.
+  * ACPI: apply quirk_ich6_lpc_acpi to more ICH8 and ICH9
+  * [POWERPC] Fix CPU hotplug when using the SLB shadow buffer
+  * [BLUETOOTH]: rfcomm tty BUG_ON() code fix
+  * [BLUETOOTH]: Always send explicit hci_ll wake-up acks.
+  * [DECNET] ROUTE: fix rcu_dereference() uses in /proc/net/decnet_cache
+  * [VLAN]: nested VLAN: fix lockdep's recursive locking warning
+  * [MACVLAN]: Prevent nesting macvlan devices
+  * [NETFILTER]: ip6t_eui64: Fixes calculation of Universal/Local bit
+  * [NETFILTER]: xt_helper: Do not bypass RCU
+  * [XFS] fix unaligned access in readdir
+  * Don't blatt first element of prv in sg_chain()
+  * loop: fix bad bio_alloc() nr_iovec request
+  * block: fix blktrace timestamps
+  * blktrace: kill the unneeded initcall
+  * V4L/DVB (6999): ivtv: stick to udelay=10 after all
+  * V4L/DVB (7001): av7110: fix section mismatch
+  * [MIPS] Wrong CONFIG option prevents setup of DMA zone.
+  * [MIPS] pnx8xxx: move to clocksource
+  * [MIPS] Malta: Fix software reset on big endian
+  * [MIPS] Lasat: Fix built in separate object directory.
+  * [MIPS] Replace 40c7869b693b18412491fdcff64682215b739f9e kludge
+  * Pull bugzilla-5637 into release branch
+  * Pull bugzilla-8171 into release branch
+  * Pull bugzilla-8973 into release branch
+  * PM: ACPI and APM must not be enabled at the same time
+  * Pull bugzilla-9194 into release branch
+  * Pull bugzilla-9494 into release branch
+  * Pull bugzilla-9535 into release branch
+  * Pull bugzilla-9627 into release branch
+  * Pull bugzilla-9683 into release branch
+  * IDE: terminate ACPI DMI list
+  * cache invalidation error for buffered write
+  * ps3fb: prevent use after free of fb_info
+  * ps3fb: fix deadlock on kexec()
+  * [NETFILTER]: bridge: fix double POST_ROUTING invocation
+  * xircom_cb endianness fixes
+  * de4x5 fixes
+  * endianness noise in tulip_core
+  * netxen: update MAINTAINERS
+  * netxen: update driver version
+  * netxen: stop second phy correctly
+  * netxen: optimize tx handling
+  * netxen: fix byte-swapping in tx and rx
+  * 3c509: PnP resource management fix
+  * Fixed a small typo in the loopback driver
+  * ip1000: menu location change
+  * r8169: fix missing loop variable increment
+  * [usb netdev] asix: fix regression
+  * fs_enet: check for phydev existence in the ethtool handlers
+  * Use access mode instead of open flags to determine needed permissions
+  * sky2: large memory workaround.
+  * sky2: remove check for PCI wakeup setting from BIOS
+  * spidernet MAINTAINERship update
+  * pnpacpi: print resource shortage message only once
+  * Pull bugzilla-9535 into release branch
+  * [SPARC]: Make gettimeofday() monotonic again.
+  * [SPARC64]: Fix build with SPARSEMEM_VMEMMAP disabled.
+  * remove task_ppid_nr_ns
+  * knfsd: Allow NFSv2/3 WRITE calls to succeed when krb5i etc is used.
+  * Input: improve Kconfig help entries for HP Jornada devices
+  * [TOKENRING]: rif_timer not initialized properly
+  * modules: de-mutex more symbol lookup paths in the module code
+  * w1: decrement slave counter only in ->release() callback
+  * Kick CPUS that might be sleeping in cpus_idle_wait
+  * TPM: fix suspend and resume failure
+  * MAINTAINERS: email update and add missing entry
+  * quicklists: Only consider memory that can be used with GFP_KERNEL
+  * macintosh: fix fabrication of caplock key events
+  * scsi/qla2xxx/qla_os.c section fix
+  * cciss: section mismatch
+  * advansys: fix section mismatch warning
+  * hugetlbfs: fix quota leak
+  * s3c2410fb: fix incorrect argument type in resume function
+  * CRIS: define __ARCH_WANT_SYS_RT_SIGSUSPEND in unistd.h for CRIS
+  * CRIS v10: correct do_signal to fix oops and clean up signal handling in
+    general
+  * CRIS v10: kernel/time.c needs to include linux/vmstat.h to compile
+  * uvesafb: fix section mismatch warnings
+  * CRIS v10: driver for ds1302 needs to include cris-specific i2c.h
+  * OSS msnd: fix array overflows
+  * i2c-omap: Fix NULL pointer dereferencing
+  * i2c: Spelling fixes
+  * i2c: Driver IDs are optional
+  * i2c-sibyte: Fix an error path
+  * fix the "remove task_ppid_nr_ns" commit
+  * [MIPS] Kconfig fixes for BCM47XX platform
+  * [MIPS] Cobalt: Fix ethernet interrupts for RaQ1
+  * [MIPS] Cobalt: Qube1 has no serial port so don't use it
+  * [MIPS] Cacheops.h: Fix typo.
+  * ata_piix: ignore ATA_DMA_ERR on vmware ich4
+  * sata_sil24: fix stupid typo
+  * sata_sil24: freeze on non-dev errors reported via CERR
+  * libata: relocate sdev->manage_start_stop configuration
+  * [POWERPC] Fix boot failure on POWER6
+  * x86: fix boot crash on HIGHMEM4G && SPARSEMEM
+  * x86: asm-x86/msr.h: pull in linux/types.h
+  * x86: fix RTC_AIE with CONFIG_HPET_EMULATE_RTC
+  * Fix ARM profiling/instrumentation configuration
+  * Fix Blackfin HARDWARE_PM support
+  * libata fixes for sparse-found problems
+  * [libata] pata_bf54x: checkpatch fixes
+  * [libata] core checkpatch fix
+  * libata: correct handling of TSS DVD
+  * [IA64] Fix unaligned handler for floating point instructions with base
+    update
+  * Linux 2.6.24-rc8
+  * lockdep: fix internal double unlock during self-test
+  * lockdep: fix workqueue creation API lockdep interaction
+  * lockdep: more hardirq annotations for notify_die()
+  * hostap: section mismatch warning
+  * wireless/libertas support for 88w8385 sdio older revision
+  * ipw2200: fix typo in kerneldoc
+  * b43: fix use-after-free rfkill bug
+  * rt2x00: Fix ieee80211 payload alignment
+  * sysfs: make sysfs_lookup() return ERR_PTR(-ENOENT) on failed lookup
+  * sysfs: fix bugs in sysfs_rename/move_dir()
+  * Use access mode instead of open flags to determine needed permissions
+    (CVE-2008-0001)
+  * IB/ipath: Fix receiving UD messages with immediate data
+  * [NET]: Fix TX timeout regression in Intel drivers.
+  * [NIU]: Fix 1G PHY link state handling.
+  * [SPARC64]: Fix hypervisor TLB operation error reporting.
+  * Input: mousedev - handle mice that use absolute coordinates
+  * Input: usbtouchscreen - fix buffer overflow, make more egalax work
+  * Input: psmouse - fix potential memory leak in psmouse_connect()
+  * Input: psmouse - fix input_dev leak in lifebook driver
+  * Input: ALPS - fix sync loss on Acer Aspire 5720ZG
+  * ipg: balance locking in irq handler
+  * ipg: plug Tx completion leak
+  * ipg: fix queue stop condition in the xmit handler
+  * ipg: fix Tx completion irq request
+  * cpufreq: Initialise default governor before use
+  * hfs: fix coverity-found null deref
+  * pnpacpi: print resource shortage message only once (more)
+  * CRIS v10: vmlinux.lds.S: ix kernel oops on boot and use common defines
+  * mm: fix section mismatch warning in page_alloc.c
+  * jbd: do not try lock_acquire after handle made invalid
+  * alpha: fix conversion from denormal float to double
+  * #ifdef very expensive debug check in page fault path
+  * Fix unbalanced helper_lock in kernel/kmod.c
+  * fix wrong sized spinlock flags argument
+  * bonding: fix locking in sysfs primary/active selection
+  * bonding: fix ASSERT_RTNL that produces spurious warnings
+  * bonding: fix locking during alb failover and slave removal
+  * bonding: release slaves when master removed via sysfs
+  * bonding: Fix up parameter parsing
+  * bonding: fix lock ordering for rtnl and bonding_rwsem
+  * bonding: Don't hold lock when calling rtnl_unlock
+  * Documentation: add a guideline for hard_start_xmit method
+  * atl1: fix frame length bug
+  * S2io: Fixed synchronization between scheduling of napi with card reset
+    and close
+  * dscc4 endian fixes
+  * wan/lmc bitfields fixes
+  * sbni endian fixes
+  * 3c574, 3c515 bitfields abuse
+  * dl2k: BMCR_t fixes
+  * dl2k: ANAR, ANLPAR fixes
+  * dl2k: BMSR fixes
+  * dl2k: MSCR, MSSR, ESR, PHY_SCR fixes
+  * dl2k: the rest
+  * Replace cpmac fix
+  * [WATCHDOG] Revert "Stop looking for device as soon as one is found"
+  * [WATCHDOG] clarify watchdog operation in documentation
+  * x86: add support for the latest Intel processors to Oprofile
+  * Selecting LGUEST should turn on Guest support, as in 2.6.23.
+  * ARM: OMAP1: Keymap fix for f-sample and p2-sample
+  * ARM: OMAP1: Fix compile for board-nokia770
+  * pata_pdc202xx_old: Fix crashes with ATAPI
+  * arch: Ignore arch/i386 and arch/x86_64
+  * Remove bogus duplicate CONFIG_LGUEST_GUEST entry.
+  * [ARM] pxa: don't rely on r2 being preserved over a function call
+  * [ARM] 4748/1: dca: source drivers/dca/Kconfig in arch/arm/Kconfig to
+    fix warning
+  * rfkill: call rfkill_led_trigger_unregister() on error
+  * [IPV6]: Mischecked tw match in __inet6_check_established.
+  * [IPV4] fib_hash: fix duplicated route issue
+  * [IPV4] fib_trie: fix duplicated route issue
+  * [NET]: Fix interrupt semaphore corruption in Intel drivers.
+  * [IPV4] FIB_HASH : Avoid unecessary loop in fn_hash_dump_zone()
+  * [IPV6] ROUTE: Make sending algorithm more friendly with RFC 4861.
+  * [NETFILTER]: bridge-netfilter: fix net_device refcnt leaks
+  * [NEIGH]: Revert 'Fix race between neigh_parms_release and
+    neightbl_fill_parms'
+  * [IrDA]: af_irda memory leak fixes
+  * [ATM] atm/idt77105.c: Fix section mismatch.
+  * [ATM] atm/suni.c: Fix section mismatch.
+  * [AF_KEY]: Fix skb leak on pfkey_send_migrate() error
+  * [NET]: rtnl_link: fix use-after-free
+  * [IPV6]: ICMP6_MIB_OUTMSGS increment duplicated
+  * [IPV6]: RFC 2011 compatibility broken
+  * [ICMP]: ICMP_MIB_OUTMSGS increment duplicated
+  * selinux: fix memory leak in netlabel code
+  * [MIPS] SMTC: Fix build error.
+  * [MIPS] Malta: Fix reading the PCI clock frequency on big-endian
+  * tc35815: Use irq number for tc35815-mac platform device id
+  * keyspan: fix oops
+  * hrtimer: fix section mismatch
+  * timer: fix section mismatch
+  * CRIS: add missed local_irq_restore call
+  * s3c2410_fb: fix line length calculation
+  * Fix filesystem capability support
+  * sched: group scheduler, set uid share fix
+  * hwmon: (it87) request only Environment Controller ports
+  * W1: w1_therm.c ds18b20 decode freezing temperatures correctly
+  * W1: w1_therm.c is flagging 0C etc as invalid
+  * rcu: fix section mismatch
+  * Fix file references in documentation and Kconfig
+  * x86: GEODE fix a race condition in the MFGPT timer tick
+  * virtnet: remove double ether_setup
+  * virtio:simplify-config-mechanism
+  * virtio: An entropy device, as suggested by hpa.
+  * virtio: Export vring functions for modules to use
+  * virtio: Put the virtio under the virtualization menu
+  * virtio:pci-device
+  * Fix vring_init/vring_size to take unsigned long
+  * virtio:vring-kick-when-empty
+  * virtio:explicit-callback-disable
+  * virtio:net-flush-queue-on-init
+  * virtio:net-fix-xmit-skb-free-real
+  * Parametrize the napi_weight for virtio receive queue.
+  * Handle module unload Add the device release function.
+  * Update all status fields on driver unload
+  * Make virtio modules GPL
+  * Make virtio_pci license be GPL2+
+  * Use Qumranet donated PCI vendor/device IDs
+  * virtio:more-interrupt-suppression
+  * Reboot Implemented
+  * lguest:reboot-fix
+  * introduce vcpu struct
+  * adapt lguest launcher to per-cpuness
+  * initialize vcpu
+  * per-cpu run guest
+  * make write() operation smp aware
+  * make hypercalls use the vcpu struct
+  * per-vcpu lguest timers
+  * per-vcpu interrupt processing.
+  * map_switcher_in_guest() per-vcpu
+  * make emulate_insn receive a vcpu struct.
+  * make registers per-vcpu
+  * replace lguest_arch with lg_cpu_arch.
+  * per-vcpu lguest task management
+  * makes special fields be per-vcpu
+  * make pending notifications per-vcpu
+  * per-vcpu lguest pgdir management
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Thu, 17 Jan 2008 14:45:01 -0700
+
+linux (2.6.24-4.7) hardy; urgency=low
+
+  [Amit Kucheria]
+
+  * Poulsbo: Add SD8686 and 8688 WLAN drivers
+  * Poulsbo: Mass update of patches to be identical to those on moblin
+  * SAUCE: make fc transport removal of target configurable OriginalAuthor:
+    Michael Reed <mdr <at> sgi.com> OriginalLocation:
+    http://thread.gmane.org/gmane.linux.scsi/25318 Bug: 163075
+
+  [Fabio M. Di Nitto]
+
+  * Fix handling of gcc-4.1 for powerpc and ia64
+
+  [Tim Gardner]
+
+  * Re-engineered architecture specific linux-headers compiler version
+    dependencies.
+  * Doh! Changed header-depends to header_depends.
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Fri, 11 Jan 2008 07:10:46 -0700
+
+linux (2.6.24-4.6) hardy; urgency=low
+
+  [Alessio Igor Bogani]
+
+  * Fix -rt build FTBS.
+
+  [Amit Kucheria]
+
+  * LPIACOMPAT: Update thermal patches to be inline with lpia flavour
+  * Poulsbo: Add USB Controller patch and corresponding config change
+
+  [Fabio M. Di Nitto]
+
+  * Enable aoe and nbd modules on hppa Ignore: yes
+  * Fix ia64 build by using gcc-4.1
+
+  [Tim Gardner]
+
+  * Enable JFFS2 LZO compression.
+    - LP: #178343
+  * Remove IS_G33 special handling.
+    - LP: #174367
+  * Enabled CONFIG_SECURITY_CAPABILITIES and
+    CONFIG_SECURITY_FILE_CAPABILITIES
+    - LP: #95089
+  * Enabled CONFIG_TASKSTATS and CONFIG_TASK_IO_ACCOUNTING
+  * Turned CONFIG_SECURITY_FILE_CAPABILITIES back off.
+  * Enabled CONFIG_B43LEGACY=m
+  * Enabled CONFIG_SCSI_QLOGIC_1280=m
+  * Enabled CONFIG_FUSION=y for virtual
+  * USB bluetooth device 0x0e5e:0x6622 floods errors to syslog
+    - LP: #152689
+  * Removed lpia from d-i.
+  * Added ia64 modules.
+  * Added hppa32/64 modules.
+
+  [Upstream Kernel Changes]
+
+  * DMI autoload dcdbas on all Dell systems.
+  * sched: fix gcc warnings
+  * leds: Fix leds_list_lock locking issues
+  * leds: Fix locomo LED driver oops
+  * x86: fix asm-x86/byteorder.h for userspace export
+  * x86: fix asm-x86/msr.h for user-space export
+  * fix lguest rmmod "bad pgd"
+  * slub: provide /proc/slabinfo
+  * [POWERPC] Fix build failure on Cell when CONFIG_SPU_FS=y
+  * slub: register slabinfo to procfs
+  * [SCSI] scsi_sysfs: restore prep_fn when ULD is removed
+  * Unify /proc/slabinfo configuration
+  * scsi: revert "[SCSI] Get rid of scsi_cmnd->done"
+  * restrict reading from /proc/<pid>/maps to those who share ->mm or can
+    ptrace pid
+  * Fix kernel/ptrace.c compile problem (missing "may_attach()")
+  * hwmon: (w83627ehf) Be more careful when changing VID input level
+  * NFS: Fix a possible Oops in fs/nfs/super.c
+  * NFSv4: Fix circular locking dependency in nfs4_kill_renewd
+  * NFS: add newline to kernel warning message in auth_gss code
+  * NFSv4: nfs4_open_confirm must not set the open_owner as confirmed on
+    error
+  * NFSv4: Fix open_to_lock_owner sequenceid allocation...
+  * IB/srp: Fix list corruption/oops on module reload
+  * Console is utf-8 by default
+  * [IA64] Update Altix BTE error return status patch
+  * [IA64] Update Altix nofault code
+  * [X25]: Add missing x25_neigh_put
+  * [XFRM]: Do not define km_migrate() if !CONFIG_XFRM_MIGRATE
+  * [CASSINI]: Fix endianness bug.
+  * [CASSINI]: Revert 'dont touch page_count'.
+  * [CASSINI]: Program parent Intel31154 bridge when necessary.
+  * [CASSINI]: Set skb->truesize properly on receive packets.
+  * [CASSINI]: Fix two obvious NAPI bugs.
+  * [CASSINI]: Bump driver version and release date.
+  * [INET]: Fix netdev renaming and inet address labels
+  * [CONNECTOR]: Return proper error code in cn_call_callback()
+  * [ISDN] i4l: 'NO CARRIER' message lost after ldisc flush
+  * [ISDN]: i4l: Fix DLE handling for i4l-audio
+  * fix: using joysticks in 32 bit applications on 64 bit systems
+  * hda_intel suspend latency: shorten codec read
+  * CPU hotplug: fix cpu_is_offline() on !CONFIG_HOTPLUG_CPU
+  * Linux 2.6.24-rc7
+  * PIE executable randomization (upstream cherry pick by kees)
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Fri, 04 Jan 2008 07:15:47 -0700
+
+linux (2.6.24-3.5) hardy; urgency=low
+
+  [Alessio Igor Bogani]
+
+  * rt: Fix rt preempt patchset version
+  * Updated README file for binary custom flavours
+  * Fix -rt build FTBS.
+  * rt: Update configuration files
+
+  [Tim Gardner]
+
+  * SAUCE: Add extra headers to linux-libc-dev
+
+  [Upstream Kernel Changes]
+
+  * [WATCHDOG] at32ap700x_wdt: add support for boot status and add fix for
+    silicon errata
+  * [WATCHDOG] Stop looking for device as soon as one is found
+  * [WATCHDOG] bfin_wdt, remove SPIN_LOCK_UNLOCKED
+  * [WATCHDOG] Sbus: cpwatchdog, remove SPIN_LOCK_UNLOCKED
+  * [WATCHDOG] IT8212F watchdog driver
+  * ACPI: acpiphp: Remove dmesg spam on device remove
+  * [WATCHDOG] ipmi: add the standard watchdog timeout ioctls
+  * [WATCHDOG] add Nano 7240 driver
+  * ACPI: battery: fix ACPI battery technology reporting
+  * [ARM] 4667/1: CM-X270 fixes
+  * [ARM] 4690/1: PXA: fix CKEN corruption in PXA27x AC97 cold reset code
+  * [IPV6] XFRM: Fix auditing rt6i_flags; use RTF_xxx flags instead of
+    RTCF_xxx.
+  * [IPV4]: Swap the ifa allocation with the"ipv4_devconf_setall" call
+  * [IPv4] ESP: Discard dummy packets introduced in rfc4303
+  * [IPv6] ESP: Discard dummy packets introduced in rfc4303
+  * [UM]: Fix use of skb after netif_rx
+  * [XTENSA]: Fix use of skb after netif_rx
+  * [S390]: Fix use of skb after netif_rx
+  * [BNX2]: Add PHY_DIS_EARLY_DAC workaround.
+  * [BNX2]: Fix RX packet rot.
+  * [BNX2]: Update version to 1.6.9.
+  * [NET]: Fix wrong comments for unregister_net*
+  * [VLAN]: Fix potential race in vlan_cleanup_module vs
+    vlan_ioctl_handler.
+  * [IPSEC]: Fix potential dst leak in xfrm_lookup
+  * V4L/DVB (6485): ivtv: fix compile warning
+  * V4L/DVB (6540): em28xx: fix failing autodetection after the reboot
+  * V4L/DVB (6542): Fix S-video mode on tvp5150
+  * V4L/DVB (6579): Fix bug #8824: Correct support for Diseqc on tda10086
+  * V4L/DVB (6581): Fix: avoids negative vma usage count
+  * V4L/DVB (6601): V4L: videobuf-core locking fixes and comments
+  * V4L/DVB (6602): V4L: Convert videobuf drivers to videobuf_stop
+  * V4L/DVB (6615): V4L: Fix  VIDIOCGMBUF locking in saa7146
+  * V4L/DVB (6629): zl10353: fix default adc_clock and TRL nominal rate
+    calculation
+  * V4L/DVB (6666): saa7134-alsa: fix period handling
+  * V4L/DVB (6684): Complement va_start() with va_end() + style fixes
+  * V4L/DVB (6686): saa7134: fix composite over s-video input on the Tevion
+    MD 9717
+  * V4L/DVB (6690): saa7134: fix ignored interrupts
+  * V4L/DVB (6751): V4L: Memory leak!  Fix count in videobuf-vmalloc mmap
+  * V4L/DVB (6746): saa7134-dvb: fix tuning for WinTV HVR-1110
+  * V4L/DVB (6750): Fix in-kernel compilation for cxusb
+  * V4L/DVB (6733): DVB: Compile 3000MC-specific DIB code only for
+    CONFIG_DVB_DIB3000MC
+  * V4L/DVB (6794): Fix compilation when dib3000mc is compiled as a module
+  * NFS: Fix NFS mountpoint crossing...
+  * V4L/DVB (6796): ivtv/ section fix
+  * V4L/DVB (6797): bt8xx/ section fixes
+  * NFSv2/v3: Fix a memory leak when using -onolock
+  * V4L/DVB (6609): Re-adds lock safe videobuf_read_start
+  * i2c: Delete an outdated piece of documentation
+  * i2c-gpio: Initialize adapter class
+  * i2c: Add missing spaces in split log messages
+  * i2c/isp1301_omap: Build fix
+  * [SERIAL] sparc: Infrastructure to fix section mismatch bugs.
+  * NFS: Fix an Oops in NFS unmount
+  * sdhci: describe quirks
+  * sdhci: don't warn about sdhci 2.0 controllers
+  * sdhci: use PIO when DMA can't satisfy the request
+  * sdhci: support JMicron JMB38x chips
+  * mmc: remove unused 'mode' from the mmc_host structure
+  * IB/ehca: Return correct number of SGEs for SRQ
+  * IB/ehca: Serialize HCA-related hCalls if necessary
+  * ide-scsi: add ide_scsi_hex_dump() helper
+  * ide: add missing checks for control register existence
+  * ide: deprecate CONFIG_BLK_DEV_OFFBOARD
+  * ide: fix ide_scan_pcibus() error message
+  * ide: coding style fixes for drivers/ide/setup-pci.c
+  * ide: add /sys/bus/ide/devices/*/{model,firmware,serial} sysfs entries
+  * ide: DMA reporting and validity checking fixes (take 3)
+  * ide-cd: remove dead post_transform_command()
+  * pdc202xx_new: fix Promise TX4 support
+  * hpt366: fix HPT37x PIO mode timings (take 2)
+  * ide: remove dead code from __ide_dma_test_irq()
+  * ide: remove stale changelog from ide-disk.c
+  * ide: remove stale changelog from ide-probe.c
+  * ide: fix ->io_32bit race in set_io_32bit()
+  * MAINTAINERS: update the NFS CLIENT entry
+  * V4L/DVB (6803): buf-core.c locking fixes
+  * [SPARC64]: Fix two kernel linear mapping setup bugs.
+  * IB/ehca: Fix lock flag variable location, bump version number
+  * kbuild: re-enable Makefile generation in a new O=... directory
+  * V4L/DVB (6798): saa7134: enable LNA in analog mode for Hauppauge WinTV
+    HVR-1110
+  * V4L/DVB (6814): Makefile: always enter video/
+  * V4L/DVB (6819): i2c: fix drivers/media/video/bt866.c
+  * V4L/DVB (6820): s5h1409: QAM SNR related fixes
+  * ACPI: video_device_list corruption
+  * ACPI: fix modpost warnings
+  * ACPI: thinkpad-acpi: fix lenovo keymap for brightness
+  * Pull thinkpad-2.6.24 into release branch
+  * Pull battery-2.6.24 into release branch
+  * [POWERPC] Fix typo #ifdef -> #ifndef
+  * [POWERPC] Kill non-existent symbols from ksyms and commproc.h
+  * [POWRPC] CPM2: Eliminate section mismatch warning in cpm2_reset().
+  * [POWERPC] 82xx: mpc8272ads, pq2fads: Update defconfig with
+    CONFIG_FS_ENET_MDIO_FCC
+  * [POWERPC] iSeries: don't printk with HV spinlock held
+  * [POWERPC] Fix rounding bug in emulation for double float operating
+  * [POWERPC] Make PS3_SYS_MANAGER default y, not m
+  * [MIPS] time: Set up Cobalt's mips_hpt_frequency
+  * [MIPS] Alchemy: fix PCI resource conflict
+  * [MIPS] Alchemy: fix off by two error in __fixup_bigphys_addr()
+  * [MIPS] Atlas, Malta: Don't free firmware memory on free_initmem.
+  * [MIPS] PCI: Make pcibios_fixup_device_resources ignore legacy
+    resources.
+  * [MIPS] time: Delete weak definition of plat_time_init() due to gcc bug.
+  * [MIPS] Ensure that ST0_FR is never set on a 32 bit kernel
+  * [SPARC32]: Silence sparc32 warnings on missing syscalls.
+  * Pull hotplug into release branch
+  * ACPI: SBS: Reset alarm bit
+  * ACPI: SBS: Ignore alarms coming from unknown devices
+  * ACPI: SBS: Return rate in mW if capacity in mWh
+  * Pull bugzilla-9362 into release branch
+  * sky2: RX lockup fix
+  * sundance fixes
+  * starfire VLAN fix
+  * e100: free IRQ to remove warningwhenrebooting
+  * hamachi endianness fixes
+  * drivers/net/sis190.c section fix
+  * drivers/net/s2io.c section fixes
+  * ucc_geth: minor whitespace fix
+  * net: smc911x: shut up compiler warnings
+  * Net: ibm_newemac, remove SPIN_LOCK_UNLOCKED
+  * ixgb: make sure jumbos stay enabled after reset
+  * [NETFILTER]: ctnetlink: set expected bit for related conntracks
+  * [NETFILTER]: ip_tables: fix compat copy race
+  * [XFRM]: Display the audited SPI value in host byte order.
+  * [NETFILTER]: xt_hashlimit should use time_after_eq()
+  * [TIPC]: Fix semaphore handling.
+  * [SYNCPPP]: Endianness and 64bit fixes.
+  * [NETFILTER]: bridge: fix missing link layer headers on outgoing routed
+    packets
+  * [ATM]: Fix compiler warning noise with FORE200E driver
+  * [IPV4]: Updates to nfsroot documentation
+  * [BRIDGE]: Assign random address.
+  * [IPV6]: Fix the return value of ipv6_getsockopt
+  * [IPV4]: Make tcp_input_metrics() get minimum RTO via tcp_rto_min()
+  * [AX25]: Locking dependencies fix in ax25_disconnect().
+  * [SCTP]: Flush fragment queue when exiting partial delivery.
+  * [IRDA]: Race between open and disconnect in irda-usb.
+  * [IRDA]: mcs7780 needs to free allocated rx buffer.
+  * [IRDA]: irlmp_unregister_link() needs to free lsaps.
+  * [IRDA]: stir4200 fixes.
+  * [IRDA]: irda parameters warning fixes.
+  * [S390] pud_present/pmd_present bug.
+  * [ARM] 4710/1: Fix coprocessor 14 usage for debug messages via ICEDCC
+  * [ARM] 4694/1: IXP4xx: Update clockevent support for shutdown and resume
+  * kobject: fix the documentation of how kobject_set_name works
+  * tipar: remove obsolete module
+  * HOWTO: Change man-page maintainer address for Japanese HOWTO
+  * Add Documentation for FAIR_USER_SCHED sysfs files
+  * HOWTO: change addresses of maintainer and lxr url for Korean HOWTO
+  * add stable_api_nonsense.txt in korean
+  * HOWTO: update misspelling and word incorrected
+  * PCI: Restore PCI expansion ROM P2P prefetch window creation
+  * USB: sierra: fix product id
+  * usb-storage: Fix devices that cannot handle 32k transfers
+  * USB: cp2101: new device id
+  * USB: option: Bind to the correct interface of the Huawei E220
+  * usb.h: fix kernel-doc warning
+  * USB: fix locking loop by avoiding flush_scheduled_work
+  * USB: use IRQF_DISABLED for HCD interrupt handlers
+  * USB: at91_udc: correct hanging while disconnecting usb cable
+  * usb: Remove broken optimisation in OHCI IRQ handler
+  * USB: revert portions of "UNUSUAL_DEV: Sync up some reported devices
+    from Ubuntu"
+  * ocfs2: fix exit-while-locked bug in ocfs2_queue_orphans()
+  * ocfs2: Don't panic when truncating an empty extent
+  * ocfs2: Allow for debugging of transaction extends
+  * ocfs2: Re-journal buffers after transaction extend
+  * pcnet_cs: add new id
+  * ucc_geth: really fix section mismatch
+  * sis190 endianness
+  * libertas: add Dan Williams as maintainer
+  * zd1211rw: Fix alignment problems
+  * wireless/ipw2200.c: add __dev{init,exit} annotations
+  * ieee80211_rate: missed unlock
+  * iwlwifi3945/4965: fix rate control algo reference leak
+  * libertas: select WIRELESS_EXT
+  * bcm43xx_debugfs sscanf fix
+  * b43: Fix rfkill radio LED
+  * iwlwifi: fix rf_kill state inconsistent during suspend and resume
+  * sata_sil: fix spurious IRQ handling
+  * libata: clear link->eh_info.serror from ata_std_postreset()
+  * libata: add ST3160023AS / 3.42 to NCQ blacklist
+  * sata_mv: improve warnings about Highpoint RocketRAID 23xx cards
+  * libata-acpi: adjust constness in ata_acpi_gtm/stm() parameters
+  * libata: update ata_*_printk() macros such that level can be a variable
+  * libata: add more opcodes to ata.h
+  * libata: ata_dev_disable() should be called from EH context
+  * libata-acpi: add new hooks ata_acpi_dissociate() and
+    ata_acpi_on_disable()
+  * libata-acpi: implement and use ata_acpi_init_gtm()
+  * libata-acpi: implement dev->gtf_cache and evaluate _GTF right after
+    _STM during resume
+  * libata-acpi: improve ACPI disabling
+  * libata-acpi: improve _GTF execution error handling and reporting
+  * libata-acpi: implement _GTF command filtering
+  * libata: update atapi_eh_request_sense() such that lbam/lbah contains
+    buffer size
+  * libata: fix ATAPI draining
+  * fix headers_install
+  * revert "Hibernation: Use temporary page tables for kernel text mapping
+    on x86_64"
+  * uml: stop gdb from deleting breakpoints when running UML
+  * alpha: strncpy/strncat fixes
+  * rtc-at32ap700x: fix irq init oops
+  * parport: "dev->timeslice" is an unsigned long, not an int
+  * ecryptfs: initialize new auth_tokens before teardown
+  * Fix lguest documentation
+  * sparsemem: make SPARSEMEM_VMEMMAP selectable
+  * fs/Kconfig: grammar fix
+  * ext3, ext4: avoid divide by zero
+  * alpha: build fixes
+  * cpufreq: fix missing unlocks in cpufreq_add_dev error paths.
+  * mm/sparse.c: check the return value of sparse_index_alloc()
+  * mm/sparse.c: improve the error handling for sparse_add_one_section()
+  * pktcdvd: add kobject_put when kobject register fails
+  * drivers/macintosh/via-pmu.c: Added a missing iounmap
+  * drivers/cpufreq/cpufreq_stats.c section fix
+  * apm_event{,info}_t are userspace types
+  * mm: fix page allocation for larger I/O segments
+  * ecryptfs: set s_blocksize from lower fs in sb
+  * I/OAT: fixups from code comments
+  * I/OAT: fix null device in call to dev_err()
+  * fix bloat-o-meter for ppc64
+  * ecryptfs: fix fsx data corruption problems
+  * Documentation: update hugetlb information
+  * Fix compilation warning in dquot.c
+  * SLUB: remove useless masking of GFP_ZERO
+  * quicklist: Set tlb->need_flush if pages are remaining in quicklist 0
+  * sysctl: fix ax25 checks
+  * [XFS] Don't wait for pending I/Os when purging blocks beyond eof.
+  * [XFS] Put the correct offset in dirent d_off
+  * block: use jiffies conversion functions in scsi_ioctl.c
+  * as-iosched: fix incorrect comments
+  * as-iosched: fix write batch start point
+  * block: let elv_register() return void
+  * Cleanup umem driver: fix most checkpatch warnings, conform to kernel
+  * sched: fix crash on ia64, introduce task_current()
+  * sched: mark rwsem functions as __sched for wchan/profiling
+  * sched: sysctl, proc_dointvec_minmax() expects int values for
+  * sched: touch softlockup watchdog after idling
+  * sched: do not hurt SCHED_BATCH on wakeup
+  * oprofile: op_model_athlon.c support for AMD family 10h barcelona
+    performance counters
+  * clockevents: fix reprogramming decision in oneshot broadcast
+  * genirq: add unlocked version of set_irq_handler()
+  * timer: kernel/timer.c section fixes
+  * x86: jprobe bugfix
+  * x86: kprobes bugfix
+  * x86: also define AT_VECTOR_SIZE_ARCH
+  * genirq: revert lazy irq disable for simple irqs
+  * x86: fix "Kernel panic - not syncing: IO-APIC + timer doesn't work!"
+  * [SCSI] sym53c8xx: fix free_irq() regression
+  * [SCSI] dpt_i2o: driver is only 32 bit so don't set 64 bit DMA mask
+  * [SCSI] sym53c8xx: fix "irq X: nobody cared" regression
+  * [SCSI] initio: fix conflict when loading driver
+  * [SCSI] st: fix kernel BUG at include/linux/scatterlist.h:59!
+  * [SCSI] initio: bugfix for accessors patch
+  * IA64: Slim down __clear_bit_unlock
+  * [IA64] signal: remove redundant code in setup_sigcontext()
+  * [IA64] ia32 nopage
+  * [IA64] Avoid unnecessary TLB flushes when allocating memory
+  * [IA64] Two trivial spelling fixes
+  * [IA64] print kernel release in OOPS to make kerneloops.org happy
+  * [IA64] set_thread_area fails in IA32 chroot
+  * [IA64] Remove compiler warinings about uninitialized variable in
+    irq_ia64.c
+  * [IA64] Remove assembler warnings on head.S
+  * [IA64] Fix Altix BTE error return status
+  * [IA64] Guard elfcorehdr_addr with #if CONFIG_PROC_FS
+  * [IA64] make flush_tlb_kernel_range() an inline function
+  * [IA64] Adjust CMCI mask on CPU hotplug
+  * Do dirty page accounting when removing a page from the page cache
+  * x86 apic_32.c section fix
+  * x86 smpboot_32.c section fixes
+  * x86_32: select_idle_routine() must be __cpuinit
+  * x86_32: disable_pse must be __cpuinitdata
+  * x86: fix show cpuinfo cpu number always zero
+  * ps3fb: Update for firmware 2.10
+  * ps3fb: Fix ps3fb free_irq() dev_id
+  * pata_hpt37x: Fix HPT374 detection
+  * mac80211: Drop out of associated state if link is lost
+  * mac80211: fix header ops
+  * NET: mac80211: fix inappropriate memory freeing
+  * [TG3]: Endianness annotations.
+  * [TG3]: Endianness bugfix.
+  * rtl8187: Add USB ID for Sitecom WL-168 v1 001
+  * p54: add Kconfig description
+  * iwlwifi: fix possible priv->mutex deadlock during suspend
+  * ipw2200: prevent alloc of unspecified size on stack
+  * [IPV4] ARP: Remove not used code
+  * [IPSEC]: Avoid undefined shift operation when testing algorithm ID
+  * [XFRM]: Audit function arguments misordered
+  * [IPV4] ip_gre: set mac_header correctly in receive path
+  * [NET]: Correct two mistaken skb_reset_mac_header() conversions.
+  * [SPARC64]: Fix OOPS in dma_sync_*_for_device()
+  * sched: rt: account the cpu time during the tick
+  * debug: add end-of-oops marker
+  * mm: fix exit_mmap BUG() on a.out binary exit
+  * dm: table detect io beyond device
+  * dm mpath: hp requires scsi
+  * dm crypt: fix write endio
+  * dm: trigger change uevent on rename
+  * dm: merge max_hw_sector
+  * dm crypt: use bio_add_page
+  * [SPARC64]: Spelling fixes
+  * [SPARC32]: Spelling fixes
+  * [NET] include/net/: Spelling fixes
+  * [DCCP]: Spelling fixes
+  * [IRDA]: Spelling fixes
+  * [IPV6]: Spelling fixes
+  * [NET] net/core/: Spelling fixes
+  * [PKT_SCHED]: Spelling fixes
+  * [NETLABEL]: Spelling fixes
+  * [SCTP]: Spelling fixes
+  * [NETFILTER]: Spelling fixes
+  * [NETFILTER] ipv4: Spelling fixes
+  * [ATM]: Spelling fixes
+  * [NET]: Fix function put_cmsg() which may cause usr application memory
+    overflow
+  * x86: fix die() to not be preemptible
+  * x86: intel_cacheinfo.c: cpu cache info entry for Intel Tolapai
+  * [XFS] Fix mknod regression
+  * [XFS] Initialise current offset in xfs_file_readdir correctly
+  * Linux 2.6.24-rc6
+  * [IPV4]: OOPS with NETLINK_FIB_LOOKUP netlink socket
+  * SLUB: Improve hackbench speed
+  * typhoon: endianness bug in tx/rx byte counters
+  * typhoon: missing le32_to_cpu() in get_drvinfo
+  * typhoon: set_settings broken on big-endian
+  * typhoon: missed rx overruns on big-endian
+  * typhoon: memory corruptor on big-endian if TSO is enabled
+  * typhoon: trivial endianness annotations
+  * cycx: annotations and fixes (.24 fodder?)
+  * asix fixes
+  * yellowfin: annotations and fixes (.24 fodder?)
+  * dl2k endianness fixes (.24 fodder?)
+  * r8169 endianness
+  * rrunner: use offsetof() instead of homegrown insanity
+  * 3c574 and 3c589 endianness fixes (.24?)
+  * fec_mpc52xx: write in C...
+  * 3c359 endianness annotations and fixes
+  * MACB: clear transmit buffers properly on transmit underrun
+  * UIO: Add a MAINTAINERS entry for Userspace I/O
+  * Modules: fix memory leak of module names
+  * USB: Unbreak fsl_usb2_udc
+  * USB: VID/PID update for sierra
+  * USB: New device ID for the CP2101 driver
+  * quicklists: do not release off node pages early
+  * ecryptfs: fix string overflow on long cipher names
+  * Fix computation of SKB size for quota messages
+  * Don't send quota messages repeatedly when hardlimit reached
+  * ecryptfs: fix unlocking in error paths
+  * ecryptfs: redo dget,mntget on dentry_open failure
+  * MAINTAINERS: mailing list archives are web links
+  * ps3: vuart: fix error path locking
+  * lib: proportion: fix underflow in prop_norm_percpu()
+  * pcmcia: remove pxa2xx_lubbock build warning
+  * kconfig: obey KCONFIG_ALLCONFIG choices with randconfig.
+  * tty: fix logic change introduced by wait_event_interruptible_timeout()
+  * uml: user of helper_wait() got missed when it got extra arguments
+  * V4L/DVB (6871): Kconfig: VIDEO_CX23885 must select DVB_LGDT330X
+  * V4L/DVB (6876): ivtv: mspx4xx needs a longer i2c udelay
+  * drivers/ide/: Spelling fixes
+  * ide-cd: fix SAMSUNG CD-ROM SCR-3231 quirk
+  * ide-cd: fix ACER/AOpen 24X CDROM speed reporting on big-endian machines
+  * ide-cd: use ide_cd_release() in ide_cd_probe()
+  * ide-cd: fix error messages in cdrom_{read,write}_check_ireason()
+  * ide-cd: add missing 'ireason' masking to cdrom_write_intr()
+  * ide-cd: fix error messages in cdrom_write_intr()
+  * ide-cd: add error message for DMA error to cdrom_read_intr()
+  * ide-cd: fix error message in cdrom_pc_intr()
+  * ide-cd: fix 'ireason' reporting in cdrom_pc_intr()
+  * MAINTAINERS: update ide-cd entry
+  * [SPARC64]: Implement pci_resource_to_user()
+  * mac80211: round station cleanup timer
+  * mac80211: warn when receiving frames with unaligned data
+  * [NETFILTER]: nf_conntrack_ipv4: fix module parameter compatibility
+  * [TUNTAP]: Fix wrong debug message.
+  * [NET] tc_nat: header install
+  * [VETH]: move veth.h to include/linux
+  * [IPV4]: Fix ip command line processing.
+  * Revert quicklist need->flush fix
+  * [CRYPTO] padlock: Fix spurious ECB page fault
+  * [POWERPC] Oprofile: Remove dependency on spufs module
+  * [POWERPC] PS3: Fix printing of os-area magic numbers
+  * [PCI] Do not enable CRS Software Visibility by default
+  * [IPV4] Fix ip=dhcp regression
+  * [SERIAL]: Fix section mismatches in Sun serial console drivers.
+  * [TCP]: use non-delayed ACK for congestion control RTT
+  * [BLUETOOTH]: put_device before device_del fix
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Sat, 22 Dec 2007 15:16:11 -0700
+
+linux (2.6.24-2.4) hardy; urgency=low
+
+  [Alessio Igor Bogani]
+
+  * rt: First import for Hardy
+
+  [Amit Kucheria]
+
+  * LPIA: Fix FTBFS for hda
+  * LPIA: Trim configs including disabling stock DRM
+
+  [Tim Gardner]
+
+  * SAUCE: Increase CONFIG_IDE_MAX_HWIFS to 8 (from 4)
+    - LP: #157909
+    Then reverted since it causes an ABI bump. Will pick it up
+    again when next the ABI changes.
+  * Expose apm for applications.
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Wed, 19 Dec 2007 13:17:31 -0700
+
+linux (2.6.24-2.3) hardy; urgency=low
+
+  [Amit Kucheria]
+
+  * LPIA: Add thermal framework from Intel
+  * LPIA: Poulsbo-specific patches
+  * LPIA: Add thermal framework from Intel
+
+  [Tim Gardner]
+
+  * SAUCE: hdaps module does not load on Thinkpad T61P
+    - LP: #133636
+
+  [Upstream Kernel Changes]
+
+  * Rebased against 2.6.24-rc5
+
+ -- Tim Gardner <tim.gardner@canonical.com>  Wed, 12 Dec 2007 13:58:52 -0700
+
+linux (2.6.24-1.2) hardy; urgency=low
+
+  [Ben Collins]
+
+  * cell: Remove cell custom flavour, merged upstream
+  * apparmor: Added module from SVN repo
+  * ubuntu: Update configs to enable apparmor
+  * ubuntu/configs: Disable vga type framebuffers on hppa32. Fixes FTBFS
+
+  [Tim Gardner]
+
+  * Add support for PPA builds.
+
+  [Upstream Kernel Changes]
+
+  * [SPARC64] Export symbols for sunvnet and sunvdc to be built modular
+
+ -- Ben Collins <bcollins@ubuntu.com>  Fri, 07 Dec 2007 15:18:32 -0500
+
+linux (2.6.24-1.1) hardy; urgency=low
+
+  [Ben Collins]
+
+  * ubuntu: Disable custom binary flavours for now
+  * ubuntu: Remove cruft in headers-postinst
+  * ubuntu: Set skipabi/skipmodule to true if prev_revions == 0.0
+  * ubuntu: Do not fail on missing module lists when skipmodule is set
+  * ubuntu: capability.ko is built-in now, no need to place in initrd.
+  * ubuntu: Change to "linux" instead of "linux-source-2.6.x"
+  * d-i: cdrom-modules disappeared, and sha256/aes modules renamed.
+  * ubuntu-build: Add asm_link= to arch rules, and use them
+  * config: Re-enable snd-hda-intel
+
+ -- Ben Collins <bcollins@ubuntu.com>  Wed, 28 Nov 2007 12:58:37 -0500
+
+linux-source-2.6.22 (2.6.22-14.46) gutsy; urgency=low
+
+  [Upstream Kernel Changes]
+
+  * [SPARC64]: Fix bugs in SYSV IPC handling in 64-bit processes.
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Sun, 14 Oct 2007 20:30:09 +0000
+
+linux-source-2.6.22 (2.6.22-14.45) gutsy; urgency=low
+
+  [Upstream Kernel Changes]
+
+  * [SPARC64]: Fix register usage in xor_raid_4().
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Sun, 14 Oct 2007 12:34:44 -0400
+
+linux-source-2.6.22 (2.6.22-14.44) gutsy; urgency=low
+
+  [Kyle McMartin]
+
+  * Revert "sparc wants ehci built in"
+
+  [Upstream Kernel Changes]
+
+  * Revert "[PATCH]: Gutsy OHCI hang workaround for Huron"
+  * [USB]: Serialize EHCI CF initialization.
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Sun, 14 Oct 2007 16:25:51 +0000
+
+linux-source-2.6.22 (2.6.22-14.43) gutsy; urgency=low
+
+  [Kyle McMartin]
+
+  * sparc wants ehci built in
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Tue, 09 Oct 2007 20:07:58 +0000
+
+linux-source-2.6.22 (2.6.22-14.42) gutsy; urgency=low
+
+  [Kyle McMartin]
+
+  * fix up module-check to bail early if asked to ignore modules
+  * disable kernel DRM on lpia (we provide one in lum)
+    - LP: #145168
+  * add ignore for ia64 abi too
+
+  [Upstream Kernel Changes]
+
+  * [NIU]: Use netif_msg_*().
+  * [NIU]: Use pr_info().
+  * [NIU]: Remove redundant BUILD_BUG_ON() in __niu_wait_bits_clear().
+  * [NIU]: Remove BUG_ON() NULL pointer checks.
+  * [NIU]: Use dev_err().
+  * [NIU]: Fix x86_64 build failure.
+  * [NIU]: Use linux/io.h instead of asm/io.h
+  * [NIU]: Fix some checkpatch caught coding style issues.
+  * [NIU]: Fix shadowed local variables.
+  * [NIU]: Fix locking errors in link_status_10g().
+  * [NIU]: Document a few magic constants using comments.
+  * [NIU]: MII phy handling fixes.
+  * [NIU]: Make sure link_up status is set to something in
+    link_status_{1,10}g().
+  * [PATCH]: Gutsy OHCI hang workaround for Huron
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Tue, 09 Oct 2007 17:25:06 +0000
+
+linux-source-2.6.22 (2.6.22-14.41) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * ubuntu/d-i: Add niu to nic-modules
+
+  [Kyle McMartin]
+
+  * vesafb is not for ia64
+  * remove CONFIG_NIU from places it shouldn't be
+  * fix orinoco_cs oops
+    - LP: #149997
+
+  [Upstream Kernel Changes]
+
+  * [SPARC64]: Allow userspace to get at the machine description.
+  * [SPARC64]: Niagara-2 optimized copies.
+  * [SPARC64]: Do not touch %tick_cmpr on sun4v cpus.
+  * [SPARC64]: SMP trampoline needs to avoid %tick_cmpr on sun4v too.
+  * [SPARC64]: Create a HWCAP_SPARC_N2 and report it to userspace on
+    Niagara-2.
+  * [MATH-EMU]: Fix underflow exception reporting.
+  * [SPARC64]: Need to clobber global reg vars in switch_to().
+  * [MATH]: Fix typo in FP_TRAPPING_EXCEPTIONS default setting.
+  * [SUNVDC]: Use slice 0xff on VD_DISK_TYPE_DISK.
+  * [SPARC64]: Fix type and constant sizes wrt. sun4u IMAP/ICLR handling.
+  * [SPARC64]: Enable MSI on sun4u Fire PCI-E controllers.
+  * [SPARC64]: Fix several bugs in MSI handling.
+  * [SPARC64]: Fix booting on V100 systems.
+  * [SPARC64]: Fix lockdep, particularly on SMP.
+  * [SPARC64]: Warn user if cpu is ignored.
+  * [SUNSAB]: Fix several bugs.
+  * [SUNSAB]: Fix broken SYSRQ.
+  * [SPARC64]: Fix missing load-twin usage in Niagara-1 memcpy.
+  * [SPARC64]: Don't use in/local regs for ldx/stx data in N1 memcpy.
+  * [SPARC64]: Fix domain-services port probing.
+  * [SPARC64]: VIO device addition log message level is too high.
+  * [SPARC64]: check fork_idle() error
+  * [SPARC64]: Fix 'niu' complex IRQ probing.
+  * [NIU]: Add Sun Neptune ethernet driver.
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Tue, 09 Oct 2007 00:38:16 +0000
+
+linux-source-2.6.22 (2.6.22-13.40) gutsy; urgency=low
+
+  [Amit Kucheria]
+
+  * Enable CONFIG_VM86 for LPIA
+    - LP: #146311
+  * Update configuration files
+  * Disable MSI by default
+  * Add mmconf documentation
+  * Update configuration files
+
+  [Bartlomiej Zolnierkiewicz]
+
+  * ide-disk: workaround for buggy HPA support on ST340823A (take 3)
+    - LP: #26119
+
+  [Ben Collins]
+
+  * ubuntu/cell: Fixup ps3 related modules for d-i, enable RTAS console
+  * ubuntu/cell: Enable CELLEB and related modules (pata_scc)
+  * ubuntu/cell: Move ps3rom to storage-core. Also use spidernet, not
+    spider_net.
+  * ubuntu/cell: Set PS3_MANAGER=y
+  * ubuntu: Set NR_CPUS=256 for sparc64-smp
+
+  [Chuck Short]
+
+  * [USB] USB] Support for MediaTek MT6227 in cdc-acm.
+    - LP: #134123
+  * [XEN] Fix xen vif create with more than 14 guests.
+    - LP: #14486
+
+  [Jorge Juan Chico]
+
+  * ide: ST320413A has the same problem as ST340823A
+    - LP: #26119
+
+  [Kyle McMartin]
+
+  * fix -rt build
+  * fix ia32entry-xen.S for CVE-2007-4573
+  * fix build when CONFIG_PCI_MSI is not set
+
+  [Matthew Garrett]
+
+  * hostap: send events on data interface as well as master interface
+    - LP: #57146
+  * A malformed _GTF object should not prevent ATA device recovery
+    - LP: #139079
+  * hostap: send events on data interface as well as master interface
+    - LP: #57146
+  * A malformed _GTF object should not prevent ATA device recovery
+    - LP: #139079
+  * Don't lose appletouch button release events
+  * Fix build with appletouch change
+  * Disable Thinkpad backlight support on machines with ACPI video
+    - LP: #148055
+  * Don't attempt to register a callback if there is no CMOS object
+    - LP: #145857
+  * Update ACPI bay hotswap code to support locking
+    - LP: #148219
+  * Update ACPI bay hotswap code to support locking
+    - LP: #148219
+  * Don't attempt to register a callback if there is no CMOS object
+    - LP: #145857
+  * Disable Thinkpad backlight support on machines with ACPI video
+    - LP: #148055
+
+  [Steffen Klassert]
+
+  * 3c59x: fix duplex configuration
+    - LP: #94186
+
+  [Thomas Gleixner]
+
+  * clockevents: remove the suspend/resume workaround^Wthinko
+
+  [Tim Gardner]
+
+  * orinoco_cs.ko missing
+    - LP: #125832
+  * Marvell Technology ethernet card not recognized and not operational
+    - LP: #135316
+  * Marvell Technology ethernet card not recognized and not operational
+    - LP: #135316
+  * acpi_scan_rsdp() breaks some PCs by not honouring ACPI specification
+    - LP: #144336
+  * VIA southbridge Intel id missing
+    - LP: #128289
+  * Add T-Sinus 111card to hostap_cs driver to be able to upload firmware
+    - LP: #132466
+  * RTL8111 PCI Express Gigabit driver r8169 big files produce slow file
+    transfer
+    - LP: #114171
+  * Guest OS does not recognize a lun with non zero target id on Vmware ESX
+    Server
+    - LP: #140761
+  * Modualrize vesafb
+    - LP: #139505
+  * Nikon cameras need support in unusual_devs.h
+    - LP: #134477
+  * agp for i830m broken in gutsy
+    - LP: #139767
+  * hdaps: Added support for Thinkpad T61
+    - LP: #147383
+  * xen: Update config for i386
+    - LP: #139047
+  * xen: resync for amd64
+    - LP: #139047
+  * ide-disk: workaround for buggy HPA support on ST340823A (take 4)
+    - LP: #26119
+
+  [Upstream Kernel Changes]
+
+  * Convert snd-page-alloc proc file to use seq_file (CVE-2007-4571)
+  * Linux 2.6.22.8
+  * ACPI: disable lower idle C-states across suspend/resume
+  * V4L: ivtv: fix VIDIOC_S_FBUF: new OSD values were never set
+  * DVB: get_dvb_firmware: update script for new location of sp8870
+    firmware
+  * DVB: get_dvb_firmware: update script for new location of tda10046
+    firmware
+  * DVB: b2c2-flexcop: fix Airstar HD5000 tuning regression
+  * setpgid(child) fails if the child was forked by sub-thread
+  * sigqueue_free: fix the race with collect_signal()
+  * kconfig: oldconfig shall not set symbols if it does not need to
+  * MTD: Makefile fix for mtdsuper
+  * USB: fix linked list insertion bugfix for usb core
+  * ACPI: Validate XSDT, use RSDT if XSDT fails
+  * POWERPC: Flush registers to proper task context
+  * 3w-9xxx: Fix dma mask setting
+  * MTD: Initialise s_flags in get_sb_mtd_aux()
+  * JFFS2: fix write deadlock regression
+  * V4L: cx88: Avoid a NULL pointer dereference during mpeg_open()
+  * hwmon: End of I/O region off-by-one
+  * Fix debug regression in video/pwc
+  * splice: fix direct splice error handling
+  * rpc: fix garbage in printk in svc_tcp_accept()
+  * disable sys_timerfd()
+  * afs: mntput called before dput
+  * Fix DAC960 driver on machines which don't support 64-bit DMA
+  * Fix "Fix DAC960 driver on machines which don't support 64-bit DMA"
+  * firewire: fw-ohci: ignore failure of pci_set_power_state (fix suspend
+    regression)
+  * futex_compat: fix list traversal bugs
+  * Leases can be hidden by flocks
+  * ext34: ensure do_split leaves enough free space in both blocks
+  * nfs: fix oops re sysctls and V4 support
+  * dir_index: error out instead of BUG on corrupt dx dirs
+  * ieee1394: ohci1394: fix initialization if built non-modular
+  * Correctly close old nfsd/lockd sockets.
+  * Fix race with shared tag queue maps
+  * crypto: blkcipher_get_spot() handling of buffer at end of page
+  * fix realtek phy id in forcedeth
+  * Fix decnet device address listing.
+  * Fix device address listing for ipv4.
+  * Fix inet_diag OOPS.
+  * Fix IPV6 append OOPS.
+  * Fix IPSEC AH4 options handling
+  * Fix ipv6 double-sock-release with MSG_CONFIRM
+  * Fix IPV6 DAD handling
+  * Fix ipv6 source address handling.
+  * Fix oops in vlan and bridging code
+  * Fix tc_ematch kbuild
+  * Handle snd_una in tcp_cwnd_down()
+  * Fix TCP DSACK cwnd handling
+  * Fix datagram recvmsg NULL iov handling regression.
+  * Fix pktgen src_mac handling.
+  * Fix sparc64 v100 platform booting.
+  * bcm43xx: Fix cancellation of work queue crashes
+  * Linux 2.6.22.9
+  * usb: serial/pl2303: support for BenQ Siemens Mobile Phone EF81
+  * pata_it821x: fix lost interrupt with atapi devices
+  * i915: make vbl interrupts work properly on i965g/gm hw.
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Thu, 04 Oct 2007 13:57:53 +0000
+
+linux-source-2.6.22 (2.6.22-12.39) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * ubuntu: Re-order deps so that binary-custom is done before
+    binary-udebs. Fixes ppc build
+
+  [Upstream Kernel Changes]
+
+  * x86_64: Zero extend all registers after ptrace in 32bit entry path.
+  * Linux 2.6.22.7
+
+ -- Ben Collins <bcollins@ubuntu.com>  Sun, 23 Sep 2007 11:05:32 -0400
+
+linux-source-2.6.22 (2.6.22-12.38) gutsy; urgency=low
+
+  [Kyle McMartin]
+
+  * add -12 abi files
+  * update getabis for new flavours
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Fri, 21 Sep 2007 13:35:49 -0400
+
+linux-source-2.6.22 (2.6.22-12.37) gutsy; urgency=low
+
+  [Kyle McMartin]
+
+  * enable d-i for cell flavour
+  * ignore ABI check on all hppa flavours
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Fri, 21 Sep 2007 11:28:34 -0400
+
+linux-source-2.6.22 (2.6.22-12.36) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * ABI bump due to LED support being enabled.
+
+  [Kyle McMartin]
+
+  * fix memory leak in psparse.c
+    - Bug introduced in previous commit to acpi
+
+  [Upstream Kernel Changes]
+
+  * Ubuntu: Allocate acpi_devices structure rather than leaving it on the
+    stack.
+  * ipw2100: Fix `iwpriv set_power` error
+  * Fix ipw2200 set wrong power parameter causing firmware error
+  * [SCSI] Fix async scanning double-add problems
+    - LP: #110997
+
+ -- Ben Collins <bcollins@ubuntu.com>  Thu, 20 Sep 2007 11:34:52 -0400
+
+linux-source-2.6.22 (2.6.22-11.34) gutsy; urgency=low
+
+  [Alan Stern]
+
+  * USB: disable autosuspend by default for non-hubs
+    - LP: #85488
+
+  [Ben Collins]
+
+  * ubuntu: Enable LEDS_TRIGGERS and related options
+    - Needed for iwlwifi
+  * ubuntu: Add real ABI files for virtual flavour
+  * ubuntu: Re-enable missing CONFIG_SERPENT for hppa64
+    - Noticed by Lamont
+  * ubuntu: Add linux-headers postinst to handle hooks
+    - LP: #125816
+  * ubuntu: Add support for /etc/kernel/headers_postinst.d/ to
+    headers-postinst
+    - LP: #120049
+  * cell: Add binary-custom flavour "cell" to support ps3
+
+  [Mattia Dongili]
+
+  * sony-laptop: restore the last user requested brightness level on
+    resume.
+    - LP: #117331
+
+  [Tejun Heo]
+
+  * ata_piix: fix suspend/resume for some TOSHIBA laptops
+    - LP: #139045
+  * PCI: export __pci_reenable_device()
+    - needed for ata_piix change
+
+  [Tim Gardner]
+
+  * Enable Sierra Wireless MC8775 0x6813
+    - LP: #131167
+
+  [Zhang Rui]
+
+  * ACPI: work around duplicate name "VID" problem on T61
+    - Noted by mjg59
+
+ -- Ben Collins <bcollins@ubuntu.com>  Sun, 16 Sep 2007 22:31:47 -0400
+
+linux-source-2.6.22 (2.6.22-11.33) gutsy; urgency=low
+
+  [Alessio Igor Bogani]
+
+  * rt: Update to rt9
+  * rt: Update configuration files
+
+  [Ben Collins]
+
+  * ubuntu: Enable A100 driver
+    - LP: #138632
+  * libata: Default to hpa being overridden
+
+  [Chuck Short]
+
+  * [HDAPS] Add support for Thinkpad R61.
+  * [LIBATA] Add more hard drives to blacklist.
+  * [USB] Added support for Sprint Pantech PX-500.
+  * [XEN] No really enable amd64.
+  * [XEN] Fix amd64 yet again.
+
+  [Matthew Garrett]
+
+  * alter default behaviour of ACPI video module
+  * Add infrastructure for notification on ACPI method execution
+  * Get thinkpad_acpi to send notifications on CMOS updates
+  * Add support to libata-acpi for acpi-based bay hotplug
+
+  [Phillip Lougher]
+
+  * Add kernel flavour optimised for virtualised environments
+  * Change abi-check script to check for $flavour.ignore in previous abi
+  * Disable abi and module check for virtual flavour
+
+  [Richard Hughes]
+
+  * Refresh laptop lid status on resume
+
+  [Upstream Kernel Changes]
+
+  * [pata_marvell]: Add more identifiers
+
+ -- Ben Collins <bcollins@ubuntu.com>  Sun, 16 Sep 2007 22:13:08 -0400
+
+linux-source-2.6.22 (2.6.22-11.32) gutsy; urgency=low
+
+  [Amit Kucheria]
+
+  * Build system: Allow custom builds to comprise multiple patches
+  * Move UME to a Custom build and add first setup of thermal framework
+
+  [Ben Collins]
+
+  * ubuntu: Enable CONFIG_BLK_DEV_IO_TRACE
+  * bcm203x: Fix firmware loading
+    - LP: #85247
+  * ubuntu: mtd changes caused module renaming. Ignore
+  * rt: Do not patch top level Makefile for SUBLEVEL. Will always end up
+    breaking
+
+  [Chuck Short]
+
+  * [USB] Unusual Device support for Gold MP3 Player Energy
+    - LP: #125250
+  * [SIERRA] Adds support for Onda H600 ZTE MF330
+    - LP: #129433
+  * [HDAPS] Add Thinkpad T61P to whitelist.
+    - LP: #133636
+  * [USB] Add support for Toshiba (Novatel Wireless) HSDPA for M400.
+    - LP: #133650
+
+  [Kyle McMartin]
+
+  * apparmor 10.3 hooks
+  * unionfs 2.1 hooks
+  * nuke UNION_FS stuff from fs/{Kconfig,Makefile}
+
+  [Tim Gardner]
+
+  * Paravirt-ops I/O hypercalls
+  * Fix lazy vmalloc bug for Gutsy
+  * bluetooth headset patch
+    - LP: #130870
+  * Add the PCI ID of this ICH4 in list of laptops that use short cables.
+  * v2.6.22.5 merge
+  * Update Xen config options.
+    - LP: #132726
+  * Remove mtd modules from ABI
+  * Support parallel=<n> in DEB_BUILD_OPTIONS
+    - LP: #136426
+
+  [Upstream Kernel Changes]
+
+  * hwmon: fix w83781d temp sensor type setting
+  * hwmon: (smsc47m1) restore missing name attribute
+  * sky2: restore workarounds for lost interrupts
+  * sky2: carrier management
+  * sky2: check for more work before leaving NAPI
+  * sky2: check drop truncated packets
+  * revert "x86, serial: convert legacy COM ports to platform devices"
+  * ACPICA: Fixed possible corruption of global GPE list
+  * ACPICA: Clear reserved fields for incoming ACPI 1.0 FADTs
+  * AVR32: Fix atomic_add_unless() and atomic_sub_unless()
+  * r8169: avoid needless NAPI poll scheduling
+  * forcedeth: fix random hang in forcedeth driver when using netconsole
+  * libata: add ATI SB700 device IDs to AHCI driver
+  * Hibernation: do not try to mark invalid PFNs as nosave
+  * i386: allow debuggers to access the vsyscall page with compat vDSO
+  * x86_64: Check for .cfi_rel_offset in CFI probe
+  * x86_64: Change PMDS invocation to single macro
+  * i386: Handle P6s without performance counters in nmi watchdog
+  * i386: Fix double fault handler
+  * JFFS2 locking regression fix.
+  * [Input]: appletouch - improve powersaving for Geyser3 devices
+  * [Input]: add driver for Fujitsu serial touchscreens
+  * [sdhci]: add support to ENE-CB714
+  * v2.6.22.5
+  * [MTD] Makefile fix for mtdsuper
+  * ocfs2: Fix bad source start calculation during kernel writes
+  * NET: Share correct feature code between bridging and bonding
+  * sky2: don't clear phy power bits
+  * uml: fix previous request size limit fix
+  * i386: fix lazy mode vmalloc synchronization for paravirt
+  * signalfd: fix interaction with posix-timers
+  * signalfd: make it group-wide, fix posix-timers scheduling
+  * DCCP: Fix DCCP GFP_KERNEL allocation in atomic context
+  * IPV6: Fix kernel panic while send SCTP data with IP fragments
+  * IPv6: Invalid semicolon after if statement
+  * Fix soft-fp underflow handling.
+  * Netfilter: Missing Kbuild entry for netfilter
+  * SNAP: Fix SNAP protocol header accesses.
+  * NET: Fix missing rcu unlock in __sock_create()
+  * SPARC64: Fix sparc64 task stack traces.
+  * SPARC64: Fix sparc64 PCI config accesses on sun4u
+  * TCP: Do not autobind ports for TCP sockets
+  * TCP: Fix TCP rate-halving on bidirectional flows.
+  * TCP: Fix TCP handling of SACK in bidirectional flows.
+  * PPP: Fix PPP buffer sizing.
+  * PCI: lets kill the 'PCI hidden behind bridge' message
+  * PCI: disable MSI on RS690
+  * PCI: disable MSI on RD580
+  * PCI: disable MSI on RX790
+  * USB: allow retry on descriptor fetch errors
+  * USB: fix DoS in pwc USB video driver
+  * usb: add PRODUCT, TYPE to usb-interface events
+  * Linux 2.6.22.6
+  * V4L/DVB (6042): b2c2-flexcop: fix Airstar HD5000 tuning regression
+  * V4L/DVB (5967): ivtv: fix VIDIOC_S_FBUF:new OSD values where never set
+  * Re-add _GTM and _STM support
+
+ -- Ben Collins <bcollins@ubuntu.com>  Fri, 31 Aug 2007 16:26:56 -0400
+
+linux-source-2.6.22 (2.6.22-10.30) gutsy; urgency=low
+
+  * URGENT upload to fix FTBFS with xen-{i386,amd64} configs,
+    lpia d-i ftbfs, xen ftbfs.
+  * URGENT fix module-check to actually ignore things
+  * URGENT ignore ume modules
+
+  [Alek Du]
+
+  * Add Intel Poulsbo chipset Libata support
+
+  [Amit Kucheria]
+
+  * Update configuration files
+  * Enable stylus on Lenovo X60/X61 thinkpads
+
+  [Ben Collins]
+
+  * ubuntu: Disable snd-hda-intel, in favor of lum updated version
+
+  [Kyle McMartin]
+
+  * apparmor 10.3 hooks
+  * add lpia d-i udeb generation
+  * fix bits of rt/diff for -rt8
+  * fix rt/diff for 2.6.22.3 changes
+  * fix up rt/diff for stable 2.6.22.4
+
+  [LaMont Jones]
+
+  * Update configuration files
+
+  [Phillip Lougher]
+
+  * WriteSupportForNTFS: make fuse module available to d-i
+
+  [Tim Gardner]
+
+  * Gutsy Tribe 3 CD don't load on Dell Inspiron 1501
+    - LP: #121111
+  * Update configuration files
+  * Update configuration files
+  * Update configuration files
+
+  [Upstream Kernel Changes]
+
+  * [SPARC64]: Fix handling of multiple vdc-port nodes.
+  * [SPARC64]: Tweak assertions in sun4v_build_virq().
+  * [SPARC64]: Fix log message type in vio_create_one().
+  * [SPARC64]: Fix two year old bug in early bootup asm.
+  * [SPARC64]: Improve VIO device naming further.
+  * [SPARC64]: Handle multiple domain-services-port nodes properly.
+  * [SPARC64]: Add proper multicast support to VNET driver.
+  * [SPARC64]: Do not flood log with failed DS messages.
+  * [SPARC64]: Use KERN_ERR in IRQ manipulation error printks.
+  * [SPARC64]: Fix virq decomposition.
+  * [SPARC]: Fix serial console device detection.
+  * [SPARC64]: fix section mismatch warning in pci_sunv4
+  * [SPARC64]: fix section mismatch warning in mdesc.c
+  * [SPARC64] viohs: extern on function definition
+  * [SPARC64]: Fix sun4u PCI config space accesses on sun4u.
+  * [SPARC64]: Fix show_stack() when stack argument is NULL.
+  * [SUNLANCE]: Fix sparc32 crashes by using of_*() interfaces.
+  * [SPARC]: Centralize find_in_proplist() instead of duplicating N times.
+  * [SPARC64]: Fix hard-coding of cpu type output in /proc/cpuinfo on
+    sun4v.
+  * [SPARC64]: Do not assume sun4v chips have load-twin/store-init support.
+  * [SPARC64]: Fix memory leak when cpu hotplugging.
+  * USB: cdc-acm: fix sysfs attribute registration bug
+  * TCP FRTO retransmit bug fix
+  * Fix TC deadlock.
+  * Fix IPCOMP crashes.
+  * Fix console write locking in sparc drivers.
+  * Add a PCI ID for santa rosa's PATA controller.
+  * Missing header include in ipt_iprange.h
+  * SCTP scope_id handling fix
+  * Fix rfkill IRQ flags.
+  * gen estimator timer unload race
+  * gen estimator deadlock fix
+  * Fix error queue socket lookup in ipv6
+  * Fix ipv6 link down handling.
+  * Netpoll leak
+  * Sparc64 bootup assembler bug
+  * Fix ipv6 tunnel endianness bug.
+  * Fix sparc32 memset()
+  * Fix sparc32 udelay() rounding errors.
+  * Fix TCP IPV6 MD5 bug.
+  * KVM: SVM: Reliably detect if SVM was disabled by BIOS
+  * USB: fix warning caused by autosuspend counter going negative
+  * usb-serial: Fix edgeport regression on non-EPiC devices
+  * Fix reported task file values in sense data
+  * aacraid: fix security hole
+  * firewire: fw-sbp2: set correct maximum payload (fixes CardBus adapters)
+  * make timerfd return a u64 and fix the __put_user
+  * V4L: Add check for valid control ID to v4l2_ctrl_next
+  * V4L: ivtv: fix broken VBI output support
+  * V4L: ivtv: fix DMA timeout when capturing VBI + another stream
+  * V4L: ivtv: Add locking to ensure stream setup is atomic
+  * V4L: wm8775/wm8739: Fix memory leak when unloading module
+  * Input: lifebook - fix an oops on Panasonic CF-18
+  * splice: fix double page unlock
+  * drm/i915: Fix i965 secured batchbuffer usage (CVE-2007-3851)
+  * Fix leak on /proc/lockdep_stats
+  * CPU online file permission
+  * Fix user struct leakage with locked IPC shem segment
+  * md: handle writes to broken raid10 arrays gracefully
+  * md: raid10: fix use-after-free of bio
+  * pcmcia: give socket time to power down
+  * Fix leaks on /proc/{*/sched, sched_debug, timer_list, timer_stats}
+  * futex: pass nr_wake2 to futex_wake_op
+  * "ext4_ext_put_in_cache" uses __u32 to receive physical block number
+  * Include serial_reg.h with userspace headers
+  * dm io: fix panic on large request
+  * i386: HPET, check if the counter works
+  * fw-ohci: fix "scheduling while atomic"
+  * firewire: fix memory leak of fw_request instances
+  * softmac: Fix ESSID problem
+  * eCryptfs: ecryptfs_setattr() bugfix
+  * nfsd: fix possible read-ahead cache and export table corruption
+  * readahead: MIN_RA_PAGES/MAX_RA_PAGES macros
+  * fs: 9p/conv.c error path fix
+  * forcedeth bug fix: cicada phy
+  * forcedeth bug fix: vitesse phy
+  * forcedeth bug fix: realtek phy
+  * acpi-cpufreq: Proper ReadModifyWrite of PERF_CTL MSR
+  * jbd commit: fix transaction dropping
+  * jbd2 commit: fix transaction dropping
+  * hugetlb: fix race in alloc_fresh_huge_page()
+  * do not limit locked memory when RLIMIT_MEMLOCK is RLIM_INFINITY
+  * uml: limit request size on COWed devices
+  * sony-laptop: fix bug in event handling
+  * destroy_workqueue() can livelock
+  * drivers/video/macmodes.c:mac_find_mode() mustn't be __devinit
+  * cfq-iosched: fix async queue behaviour
+  * libata: add FUJITSU MHV2080BH to NCQ blacklist
+  * ieee1394: revert "sbp2: enforce 32bit DMA mapping"
+  * nfsd: fix possible oops on re-insertion of rpcsec_gss modules
+  * dm raid1: fix status
+  * dm io: fix another panic on large request
+  * dm snapshot: permit invalid activation
+  * dm: disable barriers
+  * cr_backlight_probe() allocates too little storage for struct cr_panel
+  * ACPI: dock: fix opps after dock driver fails to initialize
+  * Hangup TTY before releasing rfcomm_dev
+  * Keep rfcomm_dev on the list until it is freed
+  * nf_conntrack: don't track locally generated special ICMP error
+  * IPV6: /proc/net/anycast6 unbalanced inet6_dev refcnt
+  * sysfs: release mutex when kmalloc() failed in sysfs_open_file().
+  * Netfilter: Fix logging regression
+  * USB: fix for ftdi_sio quirk handling
+  * sx: switch subven and subid values
+  * UML: exports for hostfs
+  * Linux 2.6.22.2
+  * fix oops in __audit_signal_info()
+  * random: fix bound check ordering (CVE-2007-3105)
+  * softmac: Fix deadlock of wx_set_essid with assoc work
+  * ata_piix: update map 10b for ich8m
+  * PPC: Revert "[POWERPC] Don't complain if size-cells == 0 in
+    prom_parse()"
+  * PPC: Revert "[POWERPC] Add 'mdio' to bus scan id list for platforms
+    with QE UEC"
+  * powerpc: Fix size check for hugetlbfs
+  * direct-io: fix error-path crashes
+  * stifb: detect cards in double buffer mode more reliably
+  * pata_atiixp: add SB700 PCI ID
+  * CPUFREQ: ondemand: fix tickless accounting and software coordination
+    bug
+  * CPUFREQ: ondemand: add a check to avoid negative load calculation
+  * Linux 2.6.22.3
+  * intel_agp: really fix 945/965GME
+  * Reset current->pdeath_signal on SUID binary execution (CVE-2007-3848)
+  * MSS(mmc/sd/sdio) driver patch
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Thu, 16 Aug 2007 12:17:27 -0400
+
+linux-source-2.6.22 (2.6.22-9.25) gutsy; urgency=low
+
+  [Kyle McMartin]
+
+  * ubuntu: Fix FTBFS -- forgot to bump debian/abi
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Thu, 02 Aug 2007 22:13:28 +0000
+
+linux-source-2.6.22 (2.6.22-9.24) gutsy; urgency=low
+
+  [Colin Watson]
+
+  * provide Provides for fs-*-modules udebs
+
+  [Matthias Klose]
+
+  * test $dilist before using it
+
+  [Lamont Jones]
+
+  * hppa: Update abi files
+
+ -- Kyle McMartin <kyle@ubuntu.com>  Thu, 02 Aug 2007 18:26:34 +0000
+
+linux-source-2.6.22 (2.6.22-9.23) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * ubuntu: Add missing newline to module-check script
+  * ubuntu: Add lpia to linux-libc-dev. Should finally build now.
+
+ -- Ben Collins <bcollins@ubuntu.com>  Thu, 02 Aug 2007 13:10:23 -0400
+
+linux-source-2.6.22 (2.6.22-9.22) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * ubuntu: Use DEB_HOST_ARCH, not DEB_HOST_ARCH_CPU
+
+ -- Ben Collins <bcollins@ubuntu.com>  Thu, 02 Aug 2007 08:44:09 -0400
+
+linux-source-2.6.22 (2.6.22-9.21) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * lpia: Add build stuff for lpia architecture
+
+  [LaMont Jones]
+
+  * abi files for hppa
+  * UBUNTU-HPPA: configs that seem to work
+  * hppa: abi files for 9.20
+
+ -- Ben Collins <bcollins@ubuntu.com>  Wed, 01 Aug 2007 11:12:59 -0400
+
+linux-source-2.6.22 (2.6.22-9.20) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * tulip: Fix for Uli5261 chipsets.
+  * tulip: Define ULI PCI ID's
+  * tulip: Let dmfe handle davicom on non-sparc
+  * input: Allow root to inject unknown scan codes.
+  * irda: Default to dongle type 9 on IBM hardware
+  * input/mouse/alps: Do not call psmouse_reset() for alps
+  * pcmcia: Do not insert pcmcia cards on resume
+  * ide-cd: Disable verbose errors.
+  * block: Make CDROMEJECT more robust
+  * pm: Config option to disable handling of console during suspend/resume.
+  * version: Implement version_signature proc file.
+  * update toshiba_acpi to 0.19a-dev
+  * xpad: Update to latest version from xbox-linux.
+  * ubuntu: Enable setting of CONFIG_VERSION_SIGNATURE at build time
+  * toshiba_acpi: Don't use init_MUTEX_LOCKED
+
+  [Chuck Short]
+
+  * [USB]: add ASUS LCM to the blacklist
+  * [NET]: Add mcp73 to forcedeth.
+  * [USB]: Added support for Sanwa PC5000 multimeter usb cable (KB-USB2).
+  * [ATA] Add support for Sb700 AHCI nor-raid5 and raid5
+
+  [Fabio M. Di Nitto]
+
+  * drivers/char/vt.c: make promcon driver init a boot option.
+
+  [Kyle McMartin]
+
+  * Disable MMCONFIG by default
+
+  [Phillip Lougher]
+
+  * fix NFS mounting regression from Edgy->Feisty
+  * r8169: disable TSO by default for RTL8111/8168B chipsets.
+
+  [Tim Gardner]
+
+  * Catch nonsense keycodes and silently ignore
+  * Cause SoftMac to emit an association event when setting ESSID.
+
+ -- Ben Collins <bcollins@ubuntu.com>  Mon, 30 Jul 2007 12:01:43 -0400
+
+linux-source-2.6.22 (2.6.22-9.19) gutsy; urgency=low
+
+  [Amit Kucheria]
+
+  * Fix for FTBFS bug 123178
+  * Fix for FTBFS bug 123178
+  * Add devices to USB quirks to prevent USB autosuspend
+  * More devices added to USB quirks
+    - LP: #85488
+  * Support for ENE CB-712/4 SD card reader
+  * Reorder quirk list based on Vendor/Product ID
+
+  [Ben Collins]
+
+  * ubuntu: Enable HOTPLUG_CPU in sparc64-smp config.
+  * ubuntu: Add xen to amd64 custom builds
+  * ubuntu: Update real-time kernel to -rt4
+  * rt: Patch from Alessio Igor Bogani <abogani@texware.it> for RT-8
+
+  [Chuck Short]
+
+  * IDE: add MHV2080BH to NCQ blacklist
+  * XEN: update to 2.6.22 final and amd64 support.
+  * NET: Add more pci-ids to zd1211rw
+  * IDE: add new PCI ID
+  * USB: fix oops in ftdi_sio
+
+  [Eric Piel]
+
+  * ACPI: Allow custom DSDT tables to be loaded from initramfs
+
+  [Ryan Lortie]
+
+  * Macbook calibration loop fix
+    - LP: #54621
+
+  [Upstream Kernel Changes]
+
+  * NETFILTER: {ip, nf}_conntrack_sctp: fix remotely triggerable NULL ptr
+    dereference (CVE-2007-2876)
+  * Linux 2.6.22.1
+  * [SPARC64]: Use KERN_ERR in sun4v IRQ printk()'s.
+  * [SPARC64]: Add LDOM virtual channel driver and VIO device layer.
+  * [SPARC64]: Add Sun LDOM virtual network driver.
+  * [SPARC64]: Add Sun LDOM virtual disk driver.
+  * [SPARC64]: Create proper obppath sysfs files for VIO bus devices.
+  * [SPARC64] LDC: Do limited polled retry on setting RX queue head.
+  * [SUNVNET]: Validate RX descriptor size field.
+  * [SPARC64]: Add missing symbol exports for LDOM infrastructure.
+  * [SPARC64]: Temporary workaround for LDC INO double-delivery.
+  * [SPARC64]: Create 'devspec' nodes for vio devices.
+  * [SPARC64]: vdev->type can be NULL, handle this in devspec_show().
+  * [SPARC64]: Assorted LDC bug cures.
+  * [SPARC64]: Add domain-services nodes to VIO device tree.
+  * [SPARC64]: Export powerd facilities for external entities.
+  * [SPARC64]: Initial domain-services driver.
+  * [SPARC64]: Use more mearningful names for IRQ registry.
+  * [SPARC64]: Abstract out mdesc accesses for better MD update handling.
+  * [SPARC64]: Fix MD property lifetime bugs.
+  * [SPARC64]: Fix setting of variables in LDOM guest.
+  * [SPARC64]: Initial LDOM cpu hotplug support.
+  * [SPARC64]: Unconditionally register vio_bus_type.
+  * [SPARC64]: Fix build regressions added by dr-cpu changes.
+  * [SPARC64]: mdesc.c needs linux/mm.h
+  * [SPARC64]: SMP build fixes.
+  * [SPARC64]: More sensible udelay implementation.
+  * [SPARC64]: Process dr-cpu events in a kthread instead of workqueue.
+  * [SPARC64]: Add ->set_affinity IRQ handlers.
+  * [SPARC64]: Fix leak when DR added cpu does not bootup.
+  * [SPARC64]: Clear cpu_{core,sibling}_map[] in
+    smp_fill_in_sib_core_maps()
+  * [SPARC64]: Give more accurate errors in dr_cpu_configure().
+  * [SERIAL]: Fix console write locking in sparc drivers.
+  * [TIMER]: Fix clockevent notifications on 64-bit.
+  * [SPARC64]: dr-cpu unconfigure support.
+  * [SPARC64]: Fix UP build.
+  * [SPARC64]: SMP build fix.
+  * [SPARC64]: Fix race between MD update and dr-cpu add.
+  * [SERIAL] SUNHV: Fix jerky console on LDOM guests.
+  * [SPARC64]: Kill explicit %gl register reference.
+  * [SPARC64]: Add basic infrastructure for MD add/remove notification.
+  * [SPARC64]: Simplify VDC device probing.
+  * [SPARC64]: Simplify VNET probing.
+  * [SPARC64]: Massively simplify VIO device layer and support hot
+    add/remove.
+  * [SPARC64]: Handle LDC resets properly in domain-services driver.
+  * [SPARC64]: Handle reset events in vio_link_state_change().
+  * [SPARC64]: Fix reset handling in VNET driver.
+  * [SPARC64]: Set vio->desc_buf to NULL after freeing.
+  * [SPARC64]: Fix MODULE_DEVICE_TABLE() specification in VDC and VNET.
+  * [SPARC64]: Fix device type matching in VIO's devspec_show().
+  * Add empty <asm-parisc/vga.h>
+  * Add dummy isa_(bus|virt)_to_(virt|bus) inlines
+  * Clean up sti_flush
+  * Do not allow STI_CONSOLE to be modular
+  * Use compat_sys_getdents
+
+ -- Ben Collins <bcollins@ubuntu.com>  Sat, 28 Jul 2007 12:30:53 -0400
+
+linux-source-2.6.22 (2.6.22-8.18) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * ubuntu: *sigh* update xen config to fix FTBFS
+
+ -- Ben Collins <bcollins@ubuntu.com>  Thu, 12 Jul 2007 14:23:20 +0100
+
+linux-source-2.6.22 (2.6.22-8.17) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * ubuntu: Actually enable the -xen build.
+
+ -- Ben Collins <bcollins@ubuntu.com>  Thu, 12 Jul 2007 09:51:01 +0100
+
+linux-source-2.6.22 (2.6.22-8.16) gutsy; urgency=low
+
+  * Removed CONFIG_BLINK from all configs and added to modules.ignore
+  * This fixes a build failure for 8.15
+
+  [Alexey Starikovskiy]
+
+  * Fix ACPI battery detection on Asus
+
+  [Amit Kucheria]
+
+  * Export symbols required to build GFS1 in LUM
+  * Update configuration files
+  * 2.6.22-7.14 ABI
+  * Remove old ABI
+  * Update d-i modules to support Sparc LDOM
+  * Introducing the UME kernel flavour
+
+  [Jacob Pan]
+
+  * Poulsbo SMBus Controller
+  * Intel Poulsbo SCH IDE Controller
+  * Intel Poulsbo HD audio controller
+
+  [Phillip Lougher]
+
+  * xen: Update custom binary flavour (Xen 3.1 for 2.6.22-rc5)
+  * xen: Update xen/config.i386 to enable PAE
+
+  [Upstream Kernel Changes]
+
+  * [SCSI] fusion: fix for BZ 8426 - massive slowdown on SCSI CD/DVD drive
+  * [XFS] Update the MAINTAINERS file entry for XFS.
+  * IB/mlx4: Fix handling of wq->tail for send completions
+  * IB/mlx4: Fix warning in rounding up queue sizes
+  * [SCSI] ESP: Don't forget to clear ESP_FLAG_RESETTING.
+  * firewire: fix hang after card ejection
+  * ieee1394: fix to ether1394_tx in ether1394.c
+  * [ARM] Add support for pause_on_oops and display preempt/smp options
+  * sh: Fix restartable syscall arg5 clobbering.
+  * ACPI: gracefully print null trip-point device
+  * ACPICA: fix error path in new external package objects as method
+    arguments
+  * sh: oops_enter()/oops_exit() in die().
+  * [ARM] Update show_regs/oops register format
+  * IB/mlx4: Handle new FW requirement for send request prefetching
+  * IB/mlx4: Get rid of max_inline_data calculation
+  * IB/mlx4: Handle buffer wraparound in __mlx4_ib_cq_clean()
+  * IB/mlx4: Handle FW command interface rev 3
+  * Fix signalfd interaction with thread-private signals
+  * sched: fix SysRq-N (normalize RT tasks)
+  * Fix possible runqueue lock starvation in wait_task_inactive()
+  * sh: Handle -ERESTART_RESTARTBLOCK for restartable syscalls.
+  * sh64: Handle -ERESTART_RESTARTBLOCK for restartable syscalls.
+  * [POWERPC] Fix snd-powermac refcounting bugs
+  * [XFS] s/memclear_highpage_flush/zero_user_page/
+  * [XFS] Update the MAINTAINERS file entry for XFS - change git repo name.
+  * [XFRM]: Fix MTU calculation for non-ESP SAs
+  * [IPVS]: Fix state variable on failure to start ipvs threads
+  * [AF_RXRPC]: Return the number of bytes buffered in rxrpc_send_data()
+  * [S390] Missing blank when appending cio_ignore kernel parameter
+  * [S390] Fix zfcpdump header
+  * [S390] Fix yet another two section mismatches.
+  * [S390] Print list of modules on die().
+  * [S390] Add oops_enter()/oops_exit() calls to die().
+  * [S390] Move psw_set_key.
+  * [POWERPC] rheap - eliminates internal fragments caused by alignment
+  * [POWERPC] PowerPC: Prevent data exception in kernel space (32-bit)
+  * [POWERPC] Fix powermac late initcall to only run on powermac
+  * [MIPS] Don't drag a platform specific header into generic arch code.
+  * x86_64: Fix readahead/sync_file_range/fadvise64 compat calls
+  * x86_64: Fix eventd/timerfd syscalls
+  * x86: Disable DAC on VIA bridges
+  * x86_64: Quieten Atari keyboard warnings in Kconfig
+  * x86: Only make Macintosh drivers default on Macs
+  * x86: Disable KPROBES with DEBUG_RODATA for now
+  * x86: change_page_attr bandaids
+  * x86_64: fix link warning between for .text and .init.text
+  * Fix up CREDIT entry ordering
+  * firewire: Only set client->iso_context if allocation was successful.
+  * spidernet: null out skb pointer after its been used.
+  * spidernet: Cure RX ram full bug
+  * spidernet: Don't terminate the RX ring
+  * spidernet: silence the ramfull messages
+  * spidernet: turn off descriptor chain end interrupt.
+  * spidernet: checksum and ethtool
+  * bonding: Fix use after free in unregister path
+  * bonding: Fix 802.3ad no carrier on "no partner found" instance
+  * s390: print correct level for HiperSockets devices
+  * s390: qeth driver does not recover
+  * s390: avoid inconsistent lock state in qeth
+  * s390: qeth: wrong packet length in qdio header
+  * s390: Use ccw_device_get_id() in qeth/claw drivers
+  * s390: don't call iucv_path_connect from tasklet context
+  * s390: netiucv spinlock initializer cleanup
+  * s390: netiucv inlining cleanup
+  * forcedeth: use unicast receive mode for WoL
+  * natsemi irq flags
+  * cxgb3 - fix skb->dev dereference
+  * cxgb3 - fix netpoll hanlder
+  * cxgb3 - Fix direct XAUI support
+  * cxgb3 - Stop mac RX when changing MTU
+  * cxgb3 - MAC watchdog update
+  * PATA: Add the MCP73/77 support to PATA driver
+  * pata_it821x: (partially) fix DMA in RAID mode
+  * libata: more NONCQ devices
+  * kerneldoc fix in libata
+  * ahci: fix PORTS_IMPL override
+  * fix module_param mistake in it821x
+  * Blackfin arch: update ANOMALY handling
+  * Blackfin arch: update printk to use KERN_EMERG and reformat crash
+    output
+  * Blackfin arch: add missing braces around array bfin serial init
+  * Blackfin arch: match kernel startup messaage with new linker script
+  * Blackfin arch: move cond_syscall() behind __KERNEL__ like all other
+    architectures
+  * Blackfin arch: Add definition of dma_mapping_error
+  * Blackfin arch: add proper const volatile to addr argument to the read
+    functions
+  * [AGPGART] intel_agp: don't load if no IGD and AGP port
+  * IB/umem: Fix possible hang on process exit
+  * IPoIB/cm: Initialize RX before moving QP to RTR
+  * IPoIB/cm: Fix interoperability when MTU doesn't match
+  * IPoIB/cm: Remove dead definition of struct ipoib_cm_id
+  * IB/mlx4: Correct max_srq_wr returned from mlx4_ib_query_device()
+  * [PARISC] stop lcd driver from stripping initial whitespace
+  * [PARISC] Handle wrapping in expand_upwards()
+  * [PARISC] Fix unwinder on 64-bit kernels
+  * [PARISC] unwinder improvements
+  * page_mapping must avoid slub pages
+  * posix-timers: Prevent softirq starvation by small intervals and SIG_IGN
+  * Allow DEBUG_RODATA and KPROBES to co-exist
+  * [NETFILTER]: nf_conntrack_sip: add missing message types containing RTP
+    info
+  * [NETFILTER]: nfctnetlink: Don't allow to change helper
+  * [IPV6] NDISC: Fix thinko to control Router Preference support.
+  * [IPV4]: include sysctl.h from inetdevice.h
+  * i386: Make CMPXCHG64 only dependent on PAE
+  * x86_64: Fix only make Macintosh drivers default on Macs
+  * x86_64: Ignore compat mode SYSCALL when IA32_EMULATION is not defined
+  * [AVR32] Fix bug in invalidate_dcache_region()
+  * [AVR32] NGW100, Remove relics of the old USART mapping scheme
+  * [AVR32] Initialize dma_mask and dma_coherent_mask
+  * [AVR32] Update defconfigs
+  * ACPI: fix 2.6.20 SMP boot regression
+  * [SKBUFF]: Fix incorrect config #ifdef around skb_copy_secmark
+  * [TIPC]: Fix infinite loop in netlink handler
+  * [PPP]: Revert 606f585e363527da9feaed79465132c0c661fd9e
+  * [PPP]: Fix osize too small errors when decoding mppe.
+  * [TCP] tcp_read_sock: Allow recv_actor() return return negative error
+    value.
+  * [NET]: Re-enable irqs before pushing pending DMA requests
+  * [NET]: Make skb_seq_read unmap the last fragment
+  * hwmon/coretemp: fix a broken error path
+  * fix refcounting of nsproxy object when unshared
+  * console UTF-8 fixes (fix)
+  * SM501: suspend support
+  * SM501: initialise SDRAM clock before bus clocks
+  * SM501: Fix sm501_init_reg() mask/set order
+  * SM501: Clock updates and checks
+  * SM501: Add Documentation/SM501.txt
+  * SM501: Check SM501 ID register on initialisation
+  * SLUB: fix behavior if the text output of list_locations overflows
+    PAGE_SIZE
+  * sched: fix next_interval determination in idle_balance()
+  * update checkpatch.pl to version 0.05
+  * alpha: fix alignment problem in csum_ipv6_magic()
+  * Char: stallion, fix oops during init with ISA cards
+  * uml: use generic BUG
+  * uml: add asm/paravirt.h
+  * "volatile considered harmful"
+  * document nlink function
+  * slab allocators: MAX_ORDER one off fix
+  * update checkpatch.pl to version 0.06
+  * x86_64: fix misplaced `continue' in mce.c
+  * ext2: disallow setting xip on remount
+  * audit: fix oops removing watch if audit disabled
+  * ext3: lost brelse in ext3_read_inode()
+  * ext4: lost brelse in ext4_read_inode()
+  * ACPI: preserve the ebx value in acpi_copy_wakeup_routine
+  * FUTEX: Restore the dropped ERSCH fix
+  * Linus 2.6.22-rc6
+  * [ARM] 4452/1: Force the literal pool dump before reloc_end
+  * [ARM] 4449/1: more entries in arch/arm/boot/.gitignore
+  * fix nmi_watchdog=2 bootup hang
+  * [POWERPC] Update g5_defconfig
+  * [POWERPC] Update defconfigs
+  * [POWERPC] Fix VDSO gettimeofday() when called with NULL struct timeval
+  * [POWERPC] Fix subtle FP state corruption bug in signal return on SMP
+  * USB: g_file_storage: call allow_signal()
+  * USB: ti serial driver sleeps with spinlock held
+  * USB: memory leak in iowarrior.c
+  * USB: usblcd doesn't limit memory consumption during write
+  * USB: fix race leading to use after free in io_edgeport
+  * USB: add new device id to option driver
+  * USB: ftdio_sio: New IPlus device ID
+  * [MIPS] __ucmpdi2 arguments are unsigned long long.
+  * [MIPS] add io_map_base to pci_controller on Cobalt
+  * [MIPS] remove "support for" from system type entry
+  * [MIPS] Alchemy: Fix wrong cast
+  * [MIPS] Fix pb1500 reg B access
+  * [MIPS] AP/SP requires shadow registers, auto enable support.
+  * [MIPS] 20K: Handle WAIT related bugs according to errata information
+  * [MIPS] use compat_siginfo in rt_sigframe_n32
+  * [MIPS] Remove a duplicated local variable in test_and_clear_bit()
+  * [MIPS] EMMA2RH: Disable GEN_RTC, it can't possibly work.
+  * [MIPS] SMTC and non-SMTC kernel and modules are incompatible
+  * [MIPS] Count timer interrupts correctly.
+  * x86_64: set the irq_chip name for lapic
+  * x86_64 irq: use mask/unmask and proper locking in fixup_irqs()
+  * [SPARC64]: Add irqs to mdesc_node.
+  * [SPARC64]: Fix VIRQ enabling.
+  * [SPARC64]: Need to set state to IDLE during sun4v IRQ enable.
+  * [SPARC64]: Add LDOM virtual channel driver and VIO device layer.
+  * [SPARC64]: Add Sun LDOM virtual network driver.
+  * [SPARC64]: Add Sun LDOM virtual disk driver.
+  * [SPARC64]: Create proper obppath sysfs files for VIO bus devices.
+  * [SPARC64] LDC: Do limited polled retry on setting RX queue head.
+  * [GFS2] Fix gfs2_block_truncate_page err return
+  * [DLM] Telnet to port 21064 can stop all lockspaces
+  * [GFS2] inode size inconsistency
+  * [GFS2] remounting w/o acl option leaves acls enabled
+  * [GFS2] System won't suspend with GFS2 file system mounted
+  * [GFS2] git-gfs2-nmw-build-fix
+  * [GFS2] Obtaining no_formal_ino from directory entry
+  * [GFS2] Remove i_mode passing from NFS File Handle
+  * [SUNVNET]: Validate RX descriptor size field.
+  * [SPARC64]: Add missing symbol exports for LDOM infrastructure.
+  * [SPARC64]: Temporary workaround for LDC INO double-delivery.
+  * [SPARC64]: Create 'devspec' nodes for vio devices.
+  * [SPARC64]: vdev->type can be NULL, handle this in devspec_show().
+
+ -- Amit Kucheria <amit.kucheria@ubuntu.com>  Mon, 09 Jul 2007 12:55:56 +0300
+
+linux-source-2.6.22 (2.6.22-7.14) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * build/vars: Provide ivtv-modules
+  * Bump ABI
+  * ubuntu/config: Enable Intermediate Functional Block device
+  * coredump: Fix typo in patch merge
+  * ubuntu/scripts: Make sure to symlink *.lds for ia64 builds
+  * ubuntu/config: Enable NO_HZ for server and sparc64 targets.
+  * ubuntu/config: Remove bigiron target, see if anyone complains
+  * ubuntu: Ok, really remove bigiron
+  * ubuntu/control-scripts: Fo sho, remove the debconf stuff from controls
+    scripts
+  * AppArmor: Enable exports and changes for AppArmor usage
+  * ubuntu: Add feisty changelog for historical purposes.
+
+  [Colin Watson]
+
+  * Move isofs to storage-core-modules udeb from fs-core-modules.
+
+  [Upstream Kernel Changes]
+
+  * [MTD] [MAPS] don't force uclinux mtd map to be root dev
+  * [MTD] generalise the handling of MTD-specific superblocks
+  * [SCSI] zfcp: avoid clutter in erp_dbf
+  * [SCSI] zfcp: IO stall after deleting and path checker changes after
+    reenabling zfcp devices
+  * [SCSI] ipr: Proper return codes for eh_dev_reset for SATA devices
+  * [SCSI] stex: fix id mapping issue
+  * [SCSI] stex: extend hard reset wait time
+  * [SCSI] stex: fix reset recovery for console device
+  * [SCSI] stex: minor cleanup and version update
+  * [SCSI] MegaRAID: Update MAINTAINERS email-id
+  * [SCSI] tgt: fix a rdma indirect transfer error bug
+  * [SCSI] NCR53C9x: correct spelling mistake in deprecation notice
+  * [SCSI] aacraid: Correct sa platform support. (Was: [Bug 8469] Bad EIP
+    value on pentium3 SMP kernel-2.6.21.1)
+  * [SCSI] aacraid: fix panic on short Inquiry
+  * [WATCHDOG] ks8695_wdt.c - new KS8695 watchdog driver
+  * [JFFS2] Fix BUG() caused by failing to discard xattrs on deleted files.
+  * [JFFS2] Fix potential memory leak of dead xattrs on unmount.
+  * [SCSI] sd: fix refcounting regression in suspend/resume routines
+  * [SCSI] aacraid: apply commit config for reset_devices flag
+  * [SCSI] aic7xxx: fix aicasm build failure with gcc-3.4.6
+  * [SCSI] aic94xx: asd_clear_nexus should fail if the cleared task does
+    not complete
+  * [SCSI] fusion: Fix |/|| confusion
+  * parisc: make command_line[] static
+  * parisc: sync compat getdents
+  * [PARISC] Move #undef to end of syscall table
+  * [PARISC] Wire up kexec_load syscall
+  * parisc: convert /proc/gsc/pcxl_dma to seq_file
+  * [PARISC] Let PA-8900 processors boot
+  * [PARISC] Disable LWS debugging
+  * [PARISC] spelling fixes: arch/parisc/
+  * sh: section mismatch fixes for system timer.
+  * [PARISC] ROUND_UP macro cleanup in arch/parisc
+  * [PARISC] ROUNDUP macro cleanup in drivers/parisc
+  * [PPC] Fix COMMON symbol warnings
+  * [PPC] Remove duplicate export of __div64_32.
+  * [POWERPC] 52xx: unbreak lite5200 dts (_pic vs. -pic)
+  * [POWERPC] QE: fix Kconfig 'select' warning with UCC_FAST
+  * [POWERPC] Fix Section mismatch warnings
+  * [POWERPC] Fix modpost warning
+  * [PPC] Fix modpost warning
+  * [CIFS] Fix oops on failed cifs mount (in kthread_stop)
+  * [POWERPC] Fix Kconfig warning
+  * [CIFS] typo in previous patch
+  * [SCSI] megaraid_sas: intercept cmd timeout and throttle io
+  * [WATCHDOG] clean-up watchdog documentation
+  * drm: Spinlock initializer cleanup
+  * drm/radeon: add more IGP chipset pci ids
+  * drm: make sure the drawable code doesn't call malloc(0).
+  * [PARISC] kobject is embedded in subsys, not kset
+  * [PARISC] Build fixes for power.c
+  * [ARM] 4401/1: S3C2443: Add definitions for port GPIOJ
+  * [ARM] 4402/1: S3C2443: Add physical address of HSMMC controller
+  * [ARM] 4403/1: Make the PXA-I2C driver work with lockdep validator
+  * [ARM] 4404/1: Trivial IXP42x Kconfig cleanup
+  * [ARM] 4405/1: NSLU2, DSM-G600 frequency fixup code
+  * [ARM] 4406/1: Trivial NSLU2 / NAS-100D header & setup code cleanup
+  * [ARM] remove unused header file: arch/arm/mach-s3c2410/bast.h
+  * [PARISC] fix lasi_82596 build
+  * [PARISC] fix section mismatch in parport_gsc
+  * [PARISC] fix section mismatch in parisc STI video drivers
+  * [PARISC] fix section mismatch in ccio-dma
+  * [PARISC] fix section mismatches in arch/parisc/kernel
+  * [PARISC] fix section mismatch in parisc eisa driver
+  * [PARISC] fix section mismatch in superio serial drivers
+  * [PARISC] Wire up utimensat/signalfd/timerfd/eventfd syscalls
+  * hwmon/ds1621: Fix swapped temperature limits
+  * hwmon/coretemp: Add more safety checks
+  * hwmon/w83627hf: Be quiet when no chip is found
+  * hwmon-vid: Don't spam the logs when VRM version is missing
+  * hwmon/applesmc: Simplify dependencies
+  * hwmon/applesmc: Handle name file creation error and deletion
+  * ieee1394: sbp2: include workqueue.h
+  * ieee1394: eth1394: remove bogus netif_wake_queue
+  * ieee1394: eth1394: handle tlabel exhaustion
+  * ieee1394: eth1394: bring back a parent device
+  * ieee1394: raw1394: Fix async send
+  * firewire: Add missing byteswapping for receive DMA programs.
+  * firewire: prefix modules with firewire- instead of fw-
+  * firewire: fix return code
+  * [libata] Add drive to NCQ blacklist
+  * [ARM] enable arbitary speed tty ioctls and split input/output speed
+  * Input: db9 - do not ignore dev2 module parameter
+  * Input: logips2pp - fix typo in Kconfig
+  * [XFS] Write at EOF may not update filesize correctly.
+  * [SCSI] pluto: Use wait_for_completion_timeout.
+  * [SPARC64]: Kill unused DIE_PAGE_FAULT enum value.
+  * [SPARC64]: Don't be picky about virtual-dma values on sun4v.
+  * [SPARC32]: Removes mismatch section warnigs in sparc time.c file
+  * [SERIAL] sunzilog: section mismatch fix
+  * [SPARC64]: PCI device scan is way too verbose by default.
+  * [SCSI] jazz_esp: Converted to use esp_core.
+  * [SCSI] ESP: Kill SCSI_ESP_CORE and link directly just like jazz_esp
+  * [SPARC64]: Fix typo in sun4v_hvapi_register error handling.
+  * [SPARC64]: Report proper system soft state to the hypervisor.
+  * [SPARC64]: Negotiate hypervisor API for PCI services.
+  * [SPARC64]: Use machine description and OBP properly for cpu probing.
+  * [SPARC64]: Eliminate NR_CPUS limitations.
+  * [SPARC64]: arch/sparc64/time.c doesn't compile on Ultra 1 (no PCI)
+  * [SPARC]: Linux always started with 9600 8N1
+  * [SPARC64]: Fix _PAGE_EXEC_4U check in sun4u I-TLB miss handler.
+  * [SPARC]: Emulate cmpxchg like parisc
+  * [SPARC]: Mark as emulating cmpxchg, add appropriate depends for DRM.
+  * [SPARC64]: Fix two bugs wrt. kernel 4MB TSB.
+  * [SPARC64]: Fill holes in hypervisor APIs and fix KTSB registry.
+  * mac80211: fail back to use associate from reassociate
+  * mac80211: fix memory leak when defrag fragments
+  * mac80211: always set carrier status on open
+  * mac80211: avoid null ptr deref in ieee80211_ibss_add_sta
+  * prism54: fix monitor mode oops
+  * ieee80211: fix incomplete error message
+  * softmac: alloc_ieee80211() NULL check
+  * hostap: Allocate enough tailroom for TKIP
+  * sparc64: fix alignment bug in linker definition script
+  * USB: replace flush_workqueue with cancel_sync_work
+  * ACPICA: allow Load(OEMx) tables
+  * ACPI: thermal: Replace pointer with name in trip_points
+  * ACPI: extend "acpi_osi=" boot option
+  * IB/mthca: Fix handling of send CQE with error for QPs connected to SRQ
+  * IPoIB/cm: Fix performance regression on Mellanox
+  * IB/cm: Fix stale connection detection
+  * IB/mlx4: Fix last allocated object tracking in bitmap allocator
+  * NOHZ: prevent multiplication overflow - stop timer for huge timeouts
+  * random: fix error in entropy extraction
+  * random: fix seeding with zero entropy
+  * ACPI: Make _OSI(Linux) a special case
+  * ACPI: add __init to acpi_initialize_subsystem()
+  * [PARISC] fix "ENTRY" macro redefinition
+  * [PARISC] fix section mismatch in smp.c
+  * [PARISC] remove remnants of parisc-specific softirq code
+  * [PARISC] fix trivial spelling nit in asm/linkage.h
+  * [PARISC] fix null ptr deref in unwind.c
+  * [PARISC] fix "reduce size of task_struct on 64-bit machines" fallout
+  * [PARISC] be more defensive in process.c::get_wchan
+  * [ARM] use __used attribute
+  * [ARM] Fix stacktrace FP range checking
+  * [ARM] oprofile: avoid lockdep warnings on mpcore oprofile init
+  * [ARM] 4411/1: KS8695: Another serial driver fix
+  * [ARM] 4412/1: S3C2412: reset errata fix
+  * [ARM] 4414/1: S3C2443: sparse fix for clock.c
+  * [ARM] 4415/1: AML5900: fix sparse warnings from map_io
+  * [ARM] 4416/1: NWFPE: fix undeclared symbols
+  * [ARM] 4410/1: Remove extern declarations in coyote/ixdpg425-pci.c
+  * [ARM] 4394/1: ARMv7: Add the TLB range operations
+  * [ARM] 4417/1: Serial: Fix AMBA drivers locking
+  * sky2: dont set bogus bit in PHY register
+  * sky2: checksum offload plus vlan bug
+  * sky2: program proper register for fiber PHY
+  * defxx: Fix the handling of ioremap() failures
+  * e1000: restore netif_poll_enable call but make sure IRQs are off
+  * sky2: enable IRQ on duplex renegotiation
+  * ehea: Fixed multi queue RX bug
+  * [SCSI] fix CONFIG_SCSI_WAIT_SCAN=m
+  * [SCSI] qla2xxx: fix timeout in qla2x00_down_timeout
+  * [ARM] Fix some section mismatch warnings
+  * alpha: cleanup in bitops.h
+  * alpha: support new syscalls
+  * fix possible null ptr deref in kallsyms_lookup
+  * NFS: Fix a refcount leakage in O_DIRECT
+  * a bug in ramfs_nommu_resize function, passing old size to vmtruncate
+  * sh: Fix pcrel too far for in_nmi label.
+  * sh: Trivial fix for dma-api compile failure.
+  * sh: Fix vsyscall build failure.
+  * sh: trivial build cleanups.
+  * sh: support older gcc's
+  * [ALSA] HDA: Add support for Gateway NX860
+  * [ALSA] HDA: Add more systems to Sigmatel codec
+  * [ALSA] HDA: Fix headphone mute issue on non-eapd Conexant systems
+  * [ALSA] hda-codec - Add support for ASUS A8J modem
+  * [ALSA] ali5451 - Fix possible NULL dereference
+  * [ALSA] hda-intel: fix ASUS M2V detection
+  * [ALSA] Fix ASoC s3c24xx-pcm spinlock bug
+  * [ALSA] hda-codec - Add quirk for MSI S420
+  * [ALSA] hda-codec - Add quirk for Supermicro PDSBA to alc883_cfg_tbl[]
+  * [ALSA] hda-codec - Add support for MSI K9N Ultra
+  * [ALSA] hda-codec - Fix pin configs for Gateway MX6453
+  * [ALSA] hda-codec - Fix input with STAC92xx
+  * [ALSA] hda-codec - Fix STAC922x capture boost level
+  * [CRYPTO] cryptd: Fix problem with cryptd and the freezer
+  * [CASSINI]: Fix printk message typo.
+  * [XFRM]: Allow XFRM_ACQ_EXPIRES to be tunable via sysctl.
+  * [XFRM]: xfrm_larval_drop sysctl should be __read_mostly.
+  * [IPSEC]: Fix IPv6 AH calculation in outbound
+  * [IPV6] ROUTE: No longer handle ::/0 specially.
+  * [NET]: parse ip:port strings correctly in in4_pton
+  * [IPSEC]: Fix panic when using inter address familiy IPsec on loopback.
+  * [IPV4]: Kill references to bogus non-existent CONFIG_IP_NOSIOCRT
+  * [AF_PACKET]: Kill bogus CONFIG_PACKET_MULTICAST
+  * [IPV6]: Fix build warning.
+  * [AF_PACKET]: Kill CONFIG_PACKET_SOCKET.
+  * [SOCK]: Shrink struct sock by 8 bytes on 64-bit.
+  * [TCP]: Consolidate checking for tcp orphan count being too big.
+  * [NET] napi: Call __netif_rx_complete in netif_rx_complete
+  * [IPV6] ADDRCONF: Fix conflicts in DEVCONF_xxx constant.
+  * [TCP] tcp_probe: a trivial fix for mismatched number of printl
+    arguments.
+  * [TCP] tcp_probe: use GCC printf attribute
+  * [BRIDGE]: Reduce frequency of forwarding cleanup timer in bridge.
+  * [BRIDGE]: Round off STP perodic timers.
+  * [IPSEC]: Add xfrm_sysctl.txt.
+  * [SPARC64]: Add missing NCS and SVC hypervisor interfaces.
+  * [SPARC32]: Build fix.
+  * [SPARC]: Missing #include <linux/mm.h> in drivers/sbus/char/flash.c
+  * [ALSA] version 1.0.14
+  * neofb: Fix pseudo_palette array overrun in neofb_setcolreg
+  * smpboot: fix cachesize comparison in smp_tune_scheduling()
+  * at91: fix enable/disable_irq_wake symmetry in pcmcia driver
+  * SLUB: More documentation
+  * pci-quirks: fix MSI disabling on RS400-200 and RS480
+  * ntfs_init_locked_inode(): fix array indexing
+  * m68k: runtime patching infrastructure
+  * SLUB: Fix NUMA / SYSFS bootstrap issue
+  * afs: needs sched.h
+  * m68k: discontinuous memory support
+  * [S390] Add exception handler for diagnose 224
+  * [S390] dasd_eer: use mutex instead of semaphore
+  * [S390] arch/s390/kernel/debug.c: use mutex instead of semaphore
+  * [S390] raw3270: use mutex instead of semaphore
+  * [S390] Fix section annotations.
+  * [S390] cio: Use device_schedule_callback() for removing disconnected
+    devices.
+  * [S390] cio: deregister ccw device when pgid disband failed
+  * ACPI: thinkpad-acpi: do not use named sysfs groups
+  * ieee1394: fix calculation of sysfs attribute "address"
+  * ieee1394: sbp2: offer SAM-conforming target port ID in sysfs
+  * firewire: fw-sbp2: implement sysfs ieee1394_id
+  * firewire: add to MAINTAINERS
+  * firewire: Implement suspend/resume PCI driver hooks.
+  * firewire: Change struct fw_cdev_iso_packet to not use bitfields.
+  * firewire: Install firewire-constants.h and firewire-cdev.h for
+    userspace.
+  * EXT4: Fix whitespace
+  * Remove unnecessary exported symbols.
+  * ext4: Extent overlap bugfix
+  * When ext4_ext_insert_extent() fails to insert new blocks
+  * Define/reserve new ext4 superblock fields
+  * msi: fix ARM compile
+  * PCI: disable MSI by default on systems with Serverworks HT1000 chips
+  * PCI: Fix pci_find_present
+  * PCI: i386: fixup for Siemens Nixdorf AG FSC Multiprocessor Interrupt
+    Controllers
+  * PCI: quirk disable MSI on via vt3351
+  * [XTENSA] fix bit operations in bitops.h
+  * [XTENSA] Spelling fixes in arch/xtensa
+  * [XTENSA] fix sources using deprecated assembler directive
+  * [XTENSA] Remove multi-exported symbols from xtensa_ksyms.c
+  * [XTENSA] Use generic 64-bit division
+  * [XTENSA] clean-up header files
+  * [XTENSA] Move common sections into bss sections
+  * [XTENSA] Remove non-rt signal handling
+  * Xtensa: use asm-generic/fcntl.h
+  * [JFFS2] Fix buffer length calculations in jffs2_get_inode_nodes()
+  * Fix vmi.c compilation
+  * x86_64: allocate sparsemem memmap above 4G
+  * Add select PHYLIB to the UCC_GETH Kconfig option
+  * Fix possible UDF data corruption
+  * m68k: parenthesis balance
+  * msi: fix the ordering of msix irqs
+  * msi: mask the msix vector before we unmap it
+  * potential parse error in ifdef
+  * parse errors in ifdefs
+  * pci_ids: update patch for Intel ICH9M
+  * x86: fix oprofile double free
+  * Work around Dell E520 BIOS reboot bug
+  * fix compat futex code for private futexes
+  * skeletonfb: fix of xxxfb_setup ifdef
+  * vt8623fb: arkfb: null pointer dereference fix
+  * cfag12864bfb: Use sys_ instead of cfb_ framebuffer accessors
+  * fbdev: Move declaration of fb_class to <linux/fb.h>
+  * misc/tifm_7xx1: replace deprecated irq flag
+  * add a trivial patch style checker
+  * Documentation: How to use GDB to decode OOPSes
+  * RTC: use fallback IRQ if PNP tables don't provide one
+  * memory hotplug: fix unnecessary calling of init_currenty_empty_zone()
+  * tty: fix leakage of -ERESTARTSYS to userland
+  * ISDN4Linux: fix maturity label
+  * Fix broken CLIR in isdn driver
+  * prism54: MAINTAINERS update
+  * atmel_spi dma address bugfix
+  * h8300 trival patches
+  * ALPHA: support graphics on non-zero PCI domains
+  * ALPHA: correct low-level I/O routines for sable-lynx
+  * ALPHA: misc fixes
+  * Better documentation for ERESTARTSYS
+  * serial_core.h: include <linux/sysrq.h>
+  * SPI: Freescale iMX SPI controller driver fixes
+  * SLUB: fix locking for hotplug callbacks
+  * pm3fb: switching between X and fb fix
+  * microcode: fix section mismatch warning
+  * isdn: fix section mismatch warnings
+  * acpi: fix section mismatch warning in asus + toshiba
+  * kvm: fix section mismatch warning in kvm-intel.o
+  * net/hp100: fix section mismatch warning
+  * timer statistics: fix race
+  * timer stats: speedups
+  * [SCSI] aacraid: fix shutdown handler to also disable interrupts.
+  * [MTD] Fix error checking after get_mtd_device() in get_sb_mtd functions
+  * [JFFS2] Fix obsoletion of metadata nodes in jffs2_add_tn_to_tree()
+  * ACPI: Section mismatch ... acpi_map_pxm_to_node
+  * ACPICA: Support for external package objects as method arguments
+  * Pull now into release branch
+  * Pull osi-now into release branch
+  * [POWERPC] Update documentation for of_find_node_by_type()
+  * [POWERPC] Fix ppc32 single-stepping out of syscalls
+  * [POWERPC] Fix compiler/assembler flags for Ebony platform boot files
+  * [POWERPC] Fix possible access to free pages
+  * [POWERPC] ps3/interrupt.c uses get_hard_smp_processor_id
+  * [POWERPC] pasemi idle uses hard_smp_processor_id
+  * [POWERPC] Create a zImage for legacy iSeries
+  * [POWERPC] Don't use HOSTCFLAGS in BOOTCFLAGS
+  * [POWERPC] Fix compile warning in pseries xics code
+  * [POWERPC] Fix return from pte_alloc_one() in out-of-memory case
+  * [POWERPC] Compare irq numbers with NO_IRQ not IRQ_NONE
+  * [POWERPC] Don't allow PMAC_APM_EMU for 64-bit
+  * [POWERPC] Fix compile breakage for IBM/AMCC 4xx arch/ppc platforms
+  * [POWERPC] Fix zImage.coff generation for 32-bit pmac
+  * [ARM] 4392/2: Do not corrupt the SP register in compressed/head.S
+  * [ARM] 4418/1: AT91: Number of programmable clocks differs
+  * [ARM] 4419/1: AT91: SAM9 USB clocks check for suspending
+  * [ARM] 4422/1: Fix default value handling in gpio_direction_output (PXA)
+  * [ARM] Solve buggy smp_processor_id() usage
+  * qla3xxx: device doesnt do hardware checksumming.
+  * VLAN: kill_vid is only useful for VLAN filtering devices
+  * sky2: Fix VLAN unregistration
+  * 8139cp: fix VLAN unregistration
+  * atl1: eliminate unneeded kill_vid code
+  * network drivers: eliminate unneeded kill_vid code
+  * e1000: disable polling before registering netdevice
+  * smc91x: sh solution engine fixes.
+  * Update tulip maintainer email address
+  * NetXen: Removal of extra free_irq call
+  * myri10ge: report link up/down in standard ethtool way
+  * NET: add MAINTAINERS entry for ucc_geth driver
+  * [ARM] 4421/1: AT91: Value of _KEY fields.
+  * [PARISC] Fix bug when syscall nr is __NR_Linux_syscalls
+  * [AF_UNIX]: Make socket locking much less confusing.
+  * [TG3]: Fix link problem on Dell's onboard 5906.
+  * [AF_UNIX]: Fix datagram connect race causing an OOPS.
+  * [TCP]: Use default 32768-61000 outgoing port range in all cases.
+  * [ATM]: Fix warning.
+  * [NET]: Make net watchdog timers 1 sec jiffy aligned.
+  * [NET]: Fix comparisons of unsigned < 0.
+  * [TCP]: Fix GSO ignorance of pkts_acked arg (cong.cntrl modules)
+  * [NET] gso: Fix GSO feature mask in sk_setup_caps
+  * [IPV4]: Fix "ipOutNoRoutes" counter error for TCP and UDP
+  * [ICMP]: Fix icmp_errors_use_inbound_ifaddr sysctl
+  * [VIDEO]: XVR500 and XVR2500 require FB=y
+  * [ATA]: Don't allow to enable this for SPARC64 without PCI.
+  * sh: Fix in_nmi symbol build error.
+  * sh: microdev: Fix compile warnings.
+  * sh: Fix SH4-202 clock fwk set_rate() mismatch.
+  * sh: voyagergx: Fix build warnings.
+  * sh: ioremap() through PMB needs asm/mmu.h.
+  * sh: Fix se73180 platform device registration.
+  * Input: ucb1x00 - do not access input_dev->private directly
+  * Input: reduce raciness when input handlers disconnect
+  * [PARISC] Fix kernel panic in check_ivt
+  * [SCSI] atari_NCR5380: update_timeout removal
+  * [SCSI] JAZZ ESP and SUN ESP need SPI_ATTRS
+  * [CIFS] fix mempool destroy done in wrong order in cifs error path
+  * SPI dynamic busid generation bugfix
+  * mtrr atomicity fix
+  * vanishing ioctl handler debugging
+  * libata: always use polling SETXFER
+  * Linux 2.6.22-rc4
+  * [SPARC64]: Move topology init code into new file, sysfs.c
+  * [SPARC64]: Export basic cpu properties via sysfs.
+  * [SPARC64]: Fix service channel hypervisor function names.
+  * [SPARC64]: Provide mmu statistics via sysfs.
+  * [SPARC64]: Proper multi-core scheduling support.
+  * [SPARC64]: Make core and sibling groups equal on UltraSPARC-IV.
+  * [SPARC64]: Fix {mc,smt}_capable().
+  * [SPARC64]: Fill in gaps in non-PCI dma_*() NOP implementation.
+  * [ATA]: Back out bogus (SPARC64 && !PCI) Kconfig depends.
+  * [VIDEO]: Fix section mismatch warning in promcon.
+  * [CIFS] whitespace cleanup
+  * [ARM] Fix 4417/1: Serial: Fix AMBA drivers locking
+  * [VIDEO] ffb: The pseudo_palette is only 16 elements long
+  * [ARM] pxa: fix pxa27x keyboard driver
+  * [VIDEO] sunxvr2500fb: Fix pseudo_palette array size
+  * [VIDEO] sunxvr500fb: Fix pseudo_palette array size
+  * [CIFS] whitespace cleanup part 2
+  * [CIFS] Missing flag on negprot needed for some servers to force packet
+    signing
+  * [MIPS] Atlas, Malta, SEAD: Remove scroll from interrupt handler.
+  * [MIPS] Remove duplicate fpu enable hazard code.
+  * [MIPS] EMMA2RH: remove dead KGDB code
+  * [MIPS] RM300: Fix MMIO problems by marking the PCI INT ACK region busy
+  * [MIPS] Fix VGA corruption on RM300C
+  * [MIPS] Drop __ARCH_WANT_SYS_FADVISE64
+  * [MIPS] Make dma_map_sg handle sg elements which are longer than one
+    page
+  * [MIPS] Fix some system calls with long long arguments
+  * [MIPS] Remove prototype for deleted function qemu_handle_int
+  * [MIPS] Fix some minor typoes in arch/mips/Kconfig.
+  * [MIPS] Fix warning by moving do_default_vi into CONFIG_CPU_MIPSR2_SRS
+  * [AGPGART] intel_agp: cleanup intel private data
+  * [AGPGART] intel_agp: use table for device probe
+  * [AGPGART] intel_agp: add support for 965GME/GLE
+  * [AGPGART] intel_agp: add support for 945GME
+  * [AGPGART] intel_agp: Add support for G33, Q33 and Q35 chipsets
+  * ocfs2: Fix masklog breakage
+  * ocfs2: Fix invalid assertion during write on 64k pages
+  * [POWERPC] pasemi: Fix iommu + 64K PAGE_SIZE bug
+  * [POWERPC] spufs: Refuse to load the module when not running on cell
+  * [POWERPC] spufs: Hook up spufs_release_mem
+  * [POWERPC] spufs: Fix gang destroy leaks
+  * [POWERPC] spufs: Free mm if spufs_fill_dir() failed
+  * [POWERPC] spufs: Synchronize pte invalidation vs ps close
+  * [POWERPC] spufs scheduler: Fix wakeup races
+  * [POWERPC] Fix pci_setup_phb_io_dynamic for pci_iomap
+  * [POWERPC] cbe_cpufreq: Limit frequency via cpufreq notifier chain
+  * [POWERPC] scc_sio: Fix link failure
+  * [POWERPC] Fix typo in booting-without-of-txt section numbering
+  * [POWERPC] spufs: Don't yield nosched context
+  * [POWERPC] Add table of contents to booting-without-of.txt
+  * [POWERPC] spufs: Fix error handling in spufs_fill_dir()
+  * mmc-atmel: remove linux/mmc/protocol.h dependencies
+  * au1xmmc: Replace C code with call to ARRAY_SIZE() macro.
+  * mmc: fix broken if clause
+  * mmc: don't call switch on old cards
+  * [POWERPC] Fix building of COFF zImages
+  * checkpatch.pl: should be executable
+  * Restrict clearing TIF_SIGPENDING
+  * mlx4_core: Fix CQ context layout
+  * mlx4_core: Initialize ctx_list and ctx_lock earlier
+  * mlx4_core: Free catastrophic error MSI-X interrupt with correct dev_id
+  * IB/mthca, mlx4_core: Fix typo in comment
+  * [BNX2]: Fix netdev watchdog on 5708.
+  * [BNX2]: Add missing wait in bnx2_init_5709_context().
+  * [BNX2]: Enable DMA on 5709.
+  * [BNX2]: Fix occasional counter corruption on 5708.
+  * [BNX2]: Update version and reldate.
+  * [TCP]: Honour sk_bound_dev_if in tcp_v4_send_ack
+  * [IPV4]: Only panic if inetdev_init fails for loopback
+  * [IPV4]: Convert IPv4 devconf to an array
+  * [IPV4]: Add default config support after inetdev_init
+  * [IPV4]: Restore old behaviour of default config values
+  * [RFKILL]: Make rfkill->name const
+  * [TCP]: Use LIMIT_NETDEBUG in tcp_retransmit_timer().
+  * [TCP] tcp_probe: Attach printf attribute properly to printl().
+  * [NETLINK]: Mark netlink policies const
+  * [RTNETLINK]: ifindex 0 does not exist
+  * [NETFILTER]: nf_conntrack: fix helper module unload races
+  * [NETFILTER]: ip_tables: fix compat related crash
+  * [NETFILTER]: nf_conntrack_amanda: fix textsearch_prepare() error check
+  * [AF_UNIX]: Fix stream recvmsg() race.
+  * [UDP]: Revert 2-pass hashing changes.
+  * [NET]: Avoid duplicate netlink notification when changing link state
+  * [NET_SCHED]: Fix filter double free
+  * xfrm: Add security check before flushing SAD/SPD
+  * [SPARC64]: Fix 2 bugs in PCI Sabre bus scanning.
+  * [SPARC64]: Fix SBUS IRQ regression caused by PCI-E driver.
+  * frv: build fix
+  * enable interrupts in user path of page fault.
+  * RAMFS NOMMU: missed POSIX UID/GID inode attribute checking
+  * [SPARC64]: Include <linux/rwsem.h> instead of <asm/rwsem.h>.
+  * [SPARC64]: Handle PCI bridges without 'ranges' property.
+  * mlx4_core: Check firmware command interface revision
+  * mlx4_core: Don't set MTT address in dMPT entries with PA set
+  * IB/mlx4: Fix zeroing of rnr_retry value in ib_modify_qp()
+  * RDMA/cma: Fix initialization of next_port
+  * IB/mlx4: Make sure RQ allocation is always valid
+  * splice: move inode size check into generic_file_splice_read()
+  * splice: remove do_splice_direct() symbol export
+  * pipe: move pipe_inode_info structure decleration up before it's used
+  * splice: move balance_dirty_pages_ratelimited() outside of splice actor
+  * splice: __generic_file_splice_read: fix i_size_read() length checks
+  * splice: __generic_file_splice_read: fix read/truncate race
+  * V4L/DVB (5702): Fix Kconfig items to avoid linkedition errors
+  * V4L/DVB (5700): Saa7111: fix picture settings cache bug
+  * V4L/DVB (5699): Cinergyt2: fix file release handler
+  * V4L/DVB (5675): Move big PIO accesses from the interrupt handler to a
+    workhandler
+  * V4L/DVB (5716): Tda10086,tda826x: fix tuning, STR/SNR values
+  * V4L/DVB (5720): Usbvision: fix urb allocation and submits
+  * V4L/DVB (5730): Remove unused V4L2_CAP_VIDEO_OUTPUT_POS
+  * V4L/DVB (5732): Add ivtv CROPCAP support and fix ivtv S_CROP for video
+    output.
+  * V4L/DVB (5736): Add V4L2_FBUF_CAP/FLAG_LOCAL/GLOBAL_INV_ALPHA
+  * V4L/DVB (5673): Fix audio stuttering for saa711x/ivtv when in radio
+    mode.
+  * V4L/DVB (5761): Fix broken b2c2 dependency on non x86 architectures
+  * V4L/DVB (5751): Ivtv: fix ia64 printk format warnings.
+  * serverworks: remove crappy code
+  * serverworks: fix CSB6 tuning logic
+  * it821x: RAID mode fixes
+  * ide: HPA detect from resume
+  * ide: generic IDE PCI driver, add another device exception
+  * hpt366: disallow Ultra133 for HPT374
+  * Add the PATA controller device ID to pci_ids.h for MCP73/MCP77.
+  * ide: Add the MCP73/77 support to PATA driver
+  * [CIFS] CIFS should honour umask
+  * update Documentation/driver-model/platform.txt
+  * Driver core: keep PHYSDEV for old struct class_device
+  * Driver core: kill unused code
+  * kobject: use the proper printk level for kobject error
+  * firmware: remove orphaned Email
+  * [IPV4]: Do not remove idev when addresses are cleared
+  * [NetLabel]: consolidate the struct socket/sock handling to just struct
+    sock
+  * [CIPSO]: Fix several unaligned kernel accesses in the CIPSO engine.
+  * USB: set default y for CONFIG_USB_DEVICE_CLASS
+  * usblp: Don't let suspend to kill ->used
+  * USB: usb gadgets avoid le{16,32}_to_cpup()
+  * USB: UNUSUAL_DEV: Sync up some reported devices from Ubuntu
+  * USB: cxacru: add Documentation file
+  * USB: cxacru: create sysfs attributes in atm_start instead of bind
+  * USB: cxacru: ignore error trying to start ADSL in atm_start
+  * USB: Fix up bogus bInterval values in endpoint descriptors
+  * OHCI: Fix machine check in ohci_hub_status_data
+  * update checkpatch.pl to version 0.03
+  * m68knommu: fix ColdFire timer off by 1
+  * nommu: report correct errno in message
+  * loop: preallocate eight loop devices
+  * document Acked-by:
+  * update feature-removal-schedule.txt to include deprecated functions
+  * mount -t tmpfs -o mpol=: check nodes online
+  * slab: fix alien cache handling
+  * potential parse error in ifdef part 3
+  * SLUB: return ZERO_SIZE_PTR for kmalloc(0)
+  * uml: fix kernel stack size on x86_64
+  * Documentation/atomic_ops.txt typo fix
+  * Move three functions that are only needed for CONFIG_MEMORY_HOTPLUG
+  * Char: stallion, don't fail with less than max panels
+  * Char: stallion, alloc tty before pci devices init
+  * Char: stallion, proper fail return values
+  * uml: get declaration of simple_strtoul
+  * isdn/diva: fix section mismatch
+  * sata_promise: use TF interface for polling NODATA commands
+  * rt-mutex: fix stale return value
+  * rt-mutex: fix chain walk early wakeup bug
+  * pi-futex: fix exit races and locking problems
+  * fix sysrq-m oops
+  * x86_64: oops_begin() fix
+  * reiserfs: mailing list has moved
+  * checkpatch: produce fewer lines of output
+  * MAINTAINERS: corrections
+  * hexdump: more output formatting
+  * update checkpatch.pl to version 0.04
+  * Protect <linux/console_struct.h> from multiple inclusion
+  * [IrDA]: Fix Rx/Tx path race.
+  * [IrDA]: f-timer reloading when sending rejected frames.
+  * ibmveth: Fix h_free_logical_lan error on pool resize
+  * ibmveth: Automatically enable larger rx buffer pools for larger mtu
+  * typo in via-velocity.c
+  * NetXen: Fix ping issue after reboot on Blades with 3.4.19 firmware
+  * NetXen: Fix compile failure seen on PPC architecture
+  * ehea: Fixed possible kernel panic on VLAN packet recv
+  * phylib: add RGMII-ID mode to the Marvell m88e1111 PHY to fix broken
+    ucc_geth
+  * net: fix typo in drivers/net/usb/Kconfig
+  * remove unused variable in pata_isapnp
+  * libata: disable NCQ for HITACHI HTS541680J9SA00/SB21C7EP
+  * libata: fix probe time irq printouts
+  * libata: print device model and firmware revision for ATAPI devices
+  * libata: fix hw_sata_spd_limit initialization
+  * ahci: Add MCP73/MCP77 support to AHCI driver
+  * libata-core/sff: Fix multiple assumptions about DMA
+  * libata: Correct abuse of language
+  * libata passthru: update protocol numbers
+  * libata passthru: support PIO multi commands
+  * libata passthru: map UDMA protocols
+  * libata passthru: always enforce correct DEV bit
+  * libata passthru: update cached device paramters
+  * i915: add new pciids for 945GME, 965GME/GLE
+  * drm/i915:  Add support for the G33, Q33, and Q35 chipsets.
+  * drm: fix radeon setparam on 32/64 bit systems.
+  * [ARM] VFP: fix section mismatch error
+  * libata: force PIO on IOMEGA ZIP 250 ATAPI
+  * libata: limit post SRST nsect/lbal wait to ~100ms
+  * Blackfin arch: remove defconfig file
+  * Blackfin arch: DMA code minor naming convention fix
+  * Blackfin arch: spelling fixes
+  * Blackfin arch:  fix bug ad1836 fails to build properly for BF533-EZKIT
+  * Blackfin arch: all symbols were offset by 4k, since we didn't have the
+    __text label.
+  * Blackfin arch: mark our memory init functions with __init so they get
+    freed after init
+  * Blackfin arch: implement a basic /proc/sram file for L1 allocation
+    visibility
+  * Blackfin arch: fixup Blackfin MAINTIANERS team member list
+  * Blackfin arch: scrub old console defines
+  * Blackfin arch: update defconfigs
+  * Blackfin arch: unify differences between our diff head.S files -- no
+    functional changes
+  * Blackfin arch: move more of our startup code to .init so it can be
+    freed once we are up and running
+  * Blackfin arch: add proper ENDPROC()
+  * Blackfin arch: try to split up functions like this into smaller units
+    according to LKML review
+  * Blackfin arch: fix spelling typo in output
+  * Blackfin arch: As Mike pointed out range goes form m..MAX_BLACKFIN_GPIO
+    -1
+  * Blackfin arch: add missing gpio.h header to fix compiling in some pm
+    configurations
+  * Blackfin arch: add support for Alon Bar-Lev's dynamic kernel
+    command-line
+  * Blackfin arch: fix bug can not wakeup from sleep via push buttons
+  * Blackfin arch: make sure we initialize our L1 Data B section properly
+    based on the linked kernel
+  * Blackfin arch: redo our linker script a bit
+  * Blackfin arch: move HI/LO macros into blackfin.h and punt the rest of
+    macros.h as it includes VDSP macros we never use
+  * Blackfin serial driver: hook up our UARTs STP bit with userspaces
+    CMSPAR
+  * Blackfin serial driver: ignore framing and parity errors
+  * Blackfin serial driver: actually implement the break_ctl() function
+  * Blackfin serial driver: decouple PARODD and CMSPAR checking from PARENB
+  * Blackfin RTC drivers: update MAINTAINERS information
+  * Blackfin SPI driver: tweak spi cleanup function to match newer kernel
+    changes
+  * [ARM] 4442/1: OSIRIS: Fix CPLD register definitions
+  * [ARM] 4443/1: OSIRIS: Add watchdog device to machine devices
+  * [ARM] 4444/2: OSIRIS: CPLD suspend fix
+  * [ARM] 4445/1: ANUBIS: Fix CPLD registers
+  * Blackfin SPI driver: fix bug SPI DMA incomplete transmission
+  * Blackfin SMC91X ethernet supporting driver: SMC91C111 LEDs are note
+    drived in the kernel like in uboot
+  * [MIPS] Fix KMODE for the R3000
+  * [MIPS] SMTC: Don't set and restore irqregs ptr from self_ipi.
+  * [MIPS] Always install the DSP exception handler.
+  * [MIPS] Atlas: Fix build.
+  * [MIPS] Wire up utimensat, signalfd, timerfd, eventfd
+  * [MIPS] SMTC: Fix warning.
+  * [MIPS] SMTC: Don't continue in set_vi_srs_handler on detected bad
+    arguments.
+  * [MIPS] SMTC: The MT ASE requires to initialize c0_pagemask and
+    c0_wired.
+  * [MIPS] SMTC: Fix build error caused by nonsense code.
+  * [MIPS] Fix modpost warnings by making start_secondary __cpuinit
+  * [MIPS] Fix IP27 build
+  * [MIPS] Fix smp barriers in test_and_{change,clear,set}_bit
+  * libertas: scan two channels per scan command
+  * libertas: rename wlan_association_worker
+  * libertas: a debug output was missing a newline
+  * libertas: fix removal of all debugfs files
+  * libertas: remove deprecated pm_register and associated code
+  * libertas: remove __FILE__ from debug output
+  * libertas: remove unused/superfluous definitions of DEV_NAME_LEN
+  * libertas: move vendor & product id's into if_usb.c
+  * libertas: make libertas_wlan_data_rates static
+  * libertas: fix scanning from associate path
+  * libertas: exclude non-used code when PROC_DEBUG is not set
+  * libertas: make debug configurable
+  * libertas: tune debug code
+  * libertas: single out mesh code
+  * libertas: change debug output of libertas_interrupt()
+  * libertas: get rid of libertas_sbi_get_priv()
+  * libertas: fix SSID output
+  * libertas: changed some occurences of kmalloc() + memset(&a,0,sz) to
+    kzalloc()
+  * libertas: move reset_device() code main.c to if_usb.c
+  * libertas: split wlan_add_card()
+  * libertas: fixed transmission flow control on the mesh interface
+  * libertas: fix error handling of card initialization
+  * libertas: added transmission failures to mesh statistics
+  * libertas: wakeup both mesh and normal wakeup when getting out of scan
+  * libertas: indirect all hardware access via hw_XXXX functions
+  * libertas: move contents of fw.h to decl.h
+  * libertas: split module into two (libertas.ko and usb8xxx.ko)
+  * libertas: fix RESET logic at unload time
+  * libertas: let DRV_NAME be overridable
+  * libertas: remove unused variables in wlan_dev_t
+  * libertas: fixed incorrect assigment of fcs errors to frag errors
+  * libertas: add URB debug info
+  * libertas: fixed kernel oops on module/card removal
+  * libertas: call SET_NETDEV_DEV from common code
+  * libertas: replace 'macaddress' with 'bssid'
+  * libertas: correctly unregister mesh netdev on error
+  * libertas: don't tear down netdev in libertas_activate_card
+  * libertas: version bump (321p0) and cmds update for new fw (5.220.10.p0)
+  * libertas: updated mesh commands for 5.220.9.p11
+  * libertas: make scan result handling more flexible
+  * libertas: fix 'keep previous scan' behavior
+  * libertas: cleanup of fwt_list_route processing
+  * libertas: fix oops on rmmod
+  * libertas: move channel changing into association framework
+  * libertas: make association paths consistent
+  * libertas: use MAC_FMT and MAC_ARG where appropriate
+  * libertas: use compare_ether_addr() rather than memcmp() where
+    appropriate
+  * libertas: fix debug enter/leave prints for
+    libertas_execute_next_command
+  * libertas: correctly balance locking in libertas_process_rx_command
+  * libertas: correct error report paths for wlan_fwt_list_ioctl
+  * libertas: fix deadlock SIOCGIWSCAN handler
+  * libertas: fix default adhoc channel
+  * libertas: honor specific channel requests during association
+  * libertas: send SIOCGIWSCAN event after partial scans too
+  * libertas: debug print spacing fixes in assoc.c
+  * libertas: add more verbose debugging to libertas_cmd_80211_authenticate
+  * libertas: Make WPA work through supplicant handshake
+  * libertas: updated readme file
+  * libertas: make mac address configuration work with mesh interface too
+  * libertas: split wext for eth and msh
+  * libertas: support for mesh autostart on firmware 5.220.11
+  * libertas: fix character set in README
+  * libertas: sparse fixes
+  * libertas: first pass at fixing up endianness issues
+  * libertas: More endianness fixes.
+  * libertas: more endianness fixes, in tx.c this time
+  * libertas: don't byte-swap firmware version number. It's a byte array.
+  * libertas: fix big-endian associate command.
+  * libertas: tweak association debug output
+  * libertas: remove structure WLAN_802_11_SSID and libertas_escape_essid
+  * libertas: remove WPA_SUPPLICANT structure
+  * libertas: reduce SSID and BSSID mixed-case abuse
+  * kbuild: fix sh64 section mismatch problems
+  * cfg80211: fix signed macaddress in sysfs
+  * mac80211: fix debugfs tx power reduction output
+  * mac80211: Don't stop tx queue on master device while scanning.
+  * Input: usbtouchscreen - fix fallout caused by move from drivers/usb
+  * Input: i8042 - add ASUS P65UP5 to the noloop list
+  * Input: i8042 - add ULI EV4873 to noloop list
+  * [PARISC] remove global_ack_eiem
+  * libertas: pull current channel from firmware on mesh autostart
+  * libertas: deauthenticate from AP in channel switch
+  * libertas: actually send mesh frames to mesh netdev
+  * libertas: convert libertas_mpp into anycast_mask
+  * [PPP_MPPE]: Fix "osize too small" check.
+  * NetXen: Fix link status messages
+  * myri10ge: limit the number of recoveries
+  * myri10ge: report when the link partner is running in Myrinet mode
+  * myri10ge: update driver version
+  * sysfs: store sysfs inode nrs in s_ino to avoid readdir oopses
+  * sysfs: fix condition check in sysfs_drop_dentry()
+  * sysfs: fix race condition around sd->s_dentry, take#2
+  * [TCP]: Fix left_out setting during FRTO
+  * Input: move input-polldev to drivers/input
+  * [SPARC64]: Wire up cookie based sun4v interrupt registry.
+  * [SPARC64]: Fix IO/MEM space sizing for PCI.
+  * [SPARC64]: Really fix parport.
+  * [SPARC64]: Fix args to sun4v_ldc_revoke().
+  * [TCP]: Set initial_ssthresh default to zero in Cubic and BIC.
+  * mmc-omap: fix sd response type 6 vs. 1
+  * mmc: get back read-only switch function
+  * [SCTP]: Correctly set daddr for IPv6 sockets during peeloff
+  * [SCTP]: Allow unspecified port in sctp_bindx()
+  * [SCTP] Fix leak in sctp_getsockopt_local_addrs when copy_to_user fails
+  * [SCTP] Update pmtu handling to be similar to tcp
+  * [SCTP] Flag a pmtu change request
+  * [SCTP] Don't disable PMTU discovery when mtu is small
+  * [POWERPC] Fix per-cpu allocation on oldworld SMP powermacs
+  * [POWERPC] Fix console output getting dropped on platforms without
+    udbg_putc
+  * [AVR32] ratelimit segfault reporting rate
+  * [AVR32] gpio_*_cansleep() fix
+  * [AVR32] STK1000: Set SPI_MODE_3 in the ltv350qv board info
+  * [AVR32] Define ARCH_KMALLOC_MINALIGN to L1_CACHE_BYTES
+  * [MIPS] Malta: Fix for SOCitSC based Maltas
+  * [MIPS] Separate performance counter interrupts
+  * [MIPS] Fix builds where MSC01E_xxx is undefined.
+  * [TCP]: Add missing break to TCP option parsing code
+  * [IPV6] addrconf: Fix IPv6 on tuntap tunnels
+  * [AGPGART] intel_agp: fix device probe
+  * KVM: Prevent guest fpu state from leaking into the host
+  * splice: adjust balance_dirty_pages_ratelimited() call
+  * splice: fix leak of pages on short splice to pipe
+  * splice: only check do_wakeup in splice_to_pipe() for a real pipe
+  * [TCP]: Congestion control API RTT sampling fix
+  * [TCP]: Fix logic breakage due to DSACK separation
+  * [RXRPC] net/rxrpc/ar-connection.c: fix NULL dereference
+  * block: always requeue !fs requests at the front
+  * mm: Fix memory/cpu hotplug section mismatch and oops.
+  * Resume from RAM on HPC nx6325 broken
+  * ide-scsi: fix OOPS in idescsi_expiry()
+  * fix radeon setparam on 32/64 systems, harder.
+  * tty: restore locked ioctl file op
+  * i386: fix NMI watchdog not reserving its MSRs
+  * i386: use the right wrapper to disable the NMI watchdog
+  * SLUB slab validation: Alloc while interrupts are disabled must use
+    GFP_ATOMIC
+  * Restore shmid as inode# to fix /proc/pid/maps ABI breakage
+  * i386 mm: use pte_update() in ptep_test_and_clear_dirty()
+  * cpuset: zero malloc - fix for old cpusets
+  * toshiba_acpi: fix section mismatch in allyesconfig
+  * swsusp: Fix userland interface
+  * perfctr-watchdog: fix interchanged parameters to
+    release_{evntsel,perfctr}_nmi
+  * fuse: ->fs_flags fixlet
+  * md: fix two raid10 bugs
+  * md: fix bug in error handling during raid1 repair
+  * spi doc updates
+  * uml: remove PAGE_SIZE from libc code
+  * uml: kill x86_64 STACK_TOP_MAX
+  * random: fix output buffer folding
+  * Rework ptep_set_access_flags and fix sun4c
+  * SLUB: minimum alignment fixes
+  * udf: fix possible leakage of blocks
+  * hugetlb: fix get_policy for stacked shared memory files
+  * shm: fix the filename of hugetlb sysv shared memory
+  * Linux 2.6.22-rc5
+  * [GFS2] flush the glock completely in inode_go_sync
+  * [DLM] fix a couple of races
+  * [GFS2] kernel changes to support new gfs2_grow command
+  * [GFS2] Kernel changes to support new gfs2_grow command (part 2)
+  * [GFS2] use zero_user_page
+  * [GFS2] Addendum patch 2 for gfs2_grow
+  * [GFS2] Reduce size of struct gdlm_lock
+  * [GFS2] Clean up inode number handling
+  * [GFS2] Quotas non-functional - fix bug
+  * [DLM] keep dlm from panicing when traversing rsb list in debugfs
+  * [DLM] block scand during recovery [1/6]
+  * [DLM] add lock timeouts and warnings [2/6]
+  * [DLM] dlm_device interface changes [3/6]
+  * [DLM] cancel in conversion deadlock [4/6]
+  * [DLM] fix new_lockspace error exit [5/6]
+  * [DLM] wait for config check during join [6/6]
+  * [DLM] fix compile breakage
+  * [GFS2] latest gfs2-nmw headers break userland build
+  * [DLM] Compile fix
+  * [DLM] timeout fixes
+  * [DLM] canceling deadlocked lock
+  * [DLM] dumping master locks
+  * [DLM] show default protocol
+  * [GFS2] Quotas non-functional - fix another bug
+  * [GFS2] Make the log reserved blocks depend on block size
+  * [DLM] fix socket shutdown
+  * [GFS2] fix jdata issues
+  * [GFS2] Fix sign problem in quota/statfs and cleanup _host structures
+  * [GFS2] Add nanosecond timestamp feature
+  * [DLM] fix reference counting
+  * [DLM] variable allocation
+  * [GFS2] Fix typo in rename of directories
+  * [GFS2] Fix bug in error path of inode
+  * [GFS2] Can't mount GFS2 file system on AoE device
+  * [GFS2] Recovery for lost unlinked inodes
+  * [GFS2] gfs2_lookupi() uninitialised var fix
+  * [GFS2] set plock owner in GETLK info
+  * [GFS2] return conflicts for GETLK
+  * [GFS2] Fix deallocation issues
+  * [DLM] don't require FS flag on all nodes
+  * [GFS2] Journaled file write/unstuff bug
+  * [GFS2] Remove bogus '\0' in rgrp.c
+  * [GFS2] Use zero_user_page() in stuffed_readpage()
+  * [GFS2] assertion failure after writing to journaled file, umount
+  * [GFS2] Simplify multiple glock aquisition
+  * [GFS2] Addendum to the journaled file/unmount patch
+
+ -- Ben Collins <bcollins@ubuntu.com>  Fri, 01 Jun 2007 12:15:58 -0400
+
+linux-source-2.6.22 (2.6.22-6.13) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * Bump ABI
+  * build/scripts: Remove all remnants of debconf from control scripts
+  * build/config: Re-enable paravirt/vmi
+  * build/config: Build ide-core as a module
+  * i386/x86_64: Allow disabling the putstr's from compressed boot wrapper
+  * PM: Do not require dev spew to get PM_DEBUG
+  * RTC: Ratelimit "lost interrupts" message
+  * UNUSUAL_DEV: Sync up some reported devices from Ubuntu
+  * build/d-i: Include ide-core in storage-core udeb, not that it's modular
+  * build/d-i: Make ide-modules depend on storage-code-modules
+  * build/config: Enable CONFIG_TIMER_STATS on x86_64.
+  * build/config: Disable CONFIG_RTC_DRV_CMOS
+  * build/config: Enable TIMER_STATS everywhere.
+  * build/config: Enable SND_AC97_POWER_SAVE
+    - LP: #116679
+  * kmod: Improve call_usermodehelper_pipe to handle data close
+  * coredump: Convert to new call_usermodehelper_pipe symantics
+  * PPC: Only set hwif stuff when ide-core is non-modular
+  * PPC/MEDIABAY: Export some functions for modular ide-core/ppc-ide
+
+  [Colin Watson]
+
+  * Move isofs to storage-core-modules udeb from fs-core-modules.
+
+  [Upstream Kernel Changes]
+
+  * Input: logips2pp - add type 72 (PS/2 TrackMan Marble)
+  * Input: adbhid - do not access input_dev->private directly
+  * sh: Shut up compiler warnings in __do_page_fault().
+  * sh: Fix up psw build rules for r7780rp.
+  * sh: Kill off pmb slab cache destructor.
+  * sh: landisk: rtc-rs5c313 support.
+  * sh: landisk: Header cleanups.
+  * input: hp680_ts compile fixes.
+  * [ARM] 4375/1: sharpsl_pm: Fix compile warnings
+  * [ARM] 4376/1: Selects GENERIC_GPIO for ARCH_IXP4XX in Kconfig
+  * [ARM] 4378/1: KS8695: Serial driver fix
+  * [ARM] Remove Integrator/CP SMP platform support
+  * [ARM] 4382/1: iop13xx: fix msi support
+  * [ARM] 4383/1: iop: fix usage of '__init' and 'inline' in iop files
+  * [ARM] 4384/1: S3C2412/13 SPI registers offset correction
+  * [ARM] Update ARM syscalls
+  * [ARM] Silence OMAP kernel configuration warning
+  * [ARM] gic: Fix gic cascade irq handling
+  * [ARM] integrator: fix pci_v3 compile error with DEBUG_LL
+  * [ARM] ARMv6: add CPU_HAS_ASID configuration
+  * [CRYPTO] padlock: Make CRYPTO_DEV_PADLOCK a tristate again
+  * [CRYPTO] tcrypt: Add missing error check
+  * eventfd use waitqueue lock ...
+  * timerfd use waitqueue lock ...
+  * [IA64] Fix bogus messages about system calls not implemented.
+  * [IA64] Yet another section mismatch warning
+  * Fix roundup_pow_of_two(1)
+  * Further update of the i386 boot documentation
+  * cciss: Fix pci_driver.shutdown while device is still active
+  * Linux v2.6.22-rc2
+  * [CRYPTO] api: Read module pointer before freeing algorithm
+  * powerpc: Fix the MODALIAS generation in modpost for of devices
+  * kbuild: include limits.h in sumversion.c for PATH_MAX
+  * kconfig: search harder for curses library in check-lxdialog.sh
+  * kbuild: make modpost section warnings clearer
+  * kbuild: make better section mismatch reports on i386, arm and mips
+  * kbuild: add "Section mismatch" warning whitelist for powerpc
+  * all-archs: consolidate .text section definition in asm-generic
+  * all-archs: consolidate .data section definition in asm-generic
+  * kbuild: introduce __init_refok/__initdata_refok to supress section
+    mismatch warnings
+  * init/main: use __init_refok to fix section mismatch
+  * mm: fix section mismatch warnings
+  * mm/slab: fix section mismatch warning
+  * IB/core: Free umem when mm is already gone
+  * IB/ipath: Fix potential deadlock with multicast spinlocks
+  * IB/core: Add helpers for uncached GID and P_Key searches
+  * IB/core: Use start_port() and end_port()
+  * IPoIB: Handle P_Key table reordering
+  * IB/ehca: Return proper error code if register_mr fails
+  * IB/mthca: Fix use-after-free on device restart
+  * IB/mlx4: Fix check of max_qp_dest_rdma in modify QP
+  * IB/mthca: Set GRH:HopLimit when building MLX headers
+  * IB/mlx4: Set GRH:HopLimit when sending globally routed MADs
+  * IB/mthca: Fix RESET to ERROR transition
+  * IB/mlx4: Fix RESET to RESET and RESET to ERROR transitions
+  * mlx4_core: Fix array overrun in dump_dev_cap_flags()
+  * IB/mlx4: Fix check of opcode in mlx4_ib_post_send()
+  * [IPV6]: Add ip6_tunnel.h to headers_install
+  * [RFKILL]: Fix check for correct rfkill allocation
+  * [NET]: Fix net/core/skbuff.c gcc-3.2.3 compilation error
+  * [TCP] FRTO: Add missing ECN CWR sending to one of the responses
+  * [TCP] FRTO: Prevent state inconsistency in corner cases
+  * [IPSEC] pfkey: Load specific algorithm in pfkey_add rather than all
+  * [NETFILTER]: nf_conntrack: fix use-after-free in helper destroy
+    callback invocation
+  * [NETFILTER]: nf_conntrack_ipv4: fix incorrect #ifdef config name
+  * [IPV4]: icmp: fix crash with sysctl_icmp_errors_use_inbound_ifaddr
+  * [NET]: Fix race condition about network device name allocation.
+  * IB/mlx4: Pass send queue sizes from userspace to kernel
+  * [ARM] 4387/1: fix /proc/cpuinfo formatting for pre-ARM7 parts
+  * [ARM] 4388/1: no need for arm/mm mmap range checks for non-mmu
+  * [ARM] 4395/1: S3C24XX: add include of <linux/sysdev.h> to relevant
+    machines
+  * [ARM] 4396/1: S3C2443: Add missing HCLK clocks
+  * [ARM] 4397/1: S3C2443: remove SDI0/1 IRQ ambiguity
+  * [ARM] 4398/1: S3C2443: Fix watchdog IRQ number
+  * [ARM] 4399/2: S3C2443: Fix SMDK2443 nand timings
+  * [ARM] 4400/1: S3C24XX: Add high-speed MMC device definition
+  * [ARM] at91_adc parenthesis balance
+  * [ARM] spelling fixes
+  * IB/mlx4: Check if SRQ is full when posting receive
+  * spelling fixes: arch/sh/
+  * sh: revert addition of page fault notifiers
+  * sh: Wire up signalfd/timerfd/eventfd syscalls.
+  * sh: Fix up various compile warnings for SE boards.
+  * sh: Fix page size alignment in __copy_user_page().
+  * sh: Disable psw support for R7785RP.
+  * fs: Kill sh dependency for binfmt_flat.
+  * sh: disable genrtc support.
+  * sh: sr.bl toggling around idle sleep.
+  * sh: Wire up kdump crash kernel exec in die().
+  * sh: Fix clock multiplier on SH7722.
+  * sh: Fix dreamcast build for IRQ changes.
+  * [S390] cio: Update documentation.
+  * [S390] Wire up sys_utimensat.
+  * [S390] Wire up signald, timerfd and eventfd syscalls.
+  * [S390] Make use of kretprobe_assert.
+  * [S390] More verbose show_mem() like other architectures.
+  * Fix "fs: convert core functions to zero_user_page"
+  * Detach sched.h from mm.h
+  * Blackfin arch: Add Workaround for ANOMALY 05000257
+  * Blackfin arch: add SPI MMC driver support on bf533-stamp, tested on
+    STAMP-BF533
+  * Blackfin arch: ISP1761 doesn't work for USB flash disk
+  * Blackfin arch: fix a few random warnings
+  * Blackfin arch: Add configuration data for ISP176x on BF561
+  * Blackfin arch: mark a bunch of local functions as static
+  * Blackfin arch: Fix reserved map after we changed PORT_H definition
+  * Blackfin arch: Move write to VR_CTL closer to IDLE
+  * Blackfin arch: DMA operation cleanup
+  * Blackfin arch: GPIO fix some defines
+  * Blackfin arch: fix trace output for FLAT binaries
+  * Blackfin arch: Fix bug using usb keyboard crashes kernel
+  * Blackfin arch: initial tepla-bf561 board support
+  * Blackfin arch: make sure we declare the revid functions as pure (since
+    they are)
+  * Blackfin arch: dont clear status register bits in SWRST so we can
+    actually use it
+  * Blackfin arch: finish removing p* volatile defines for MMRs
+  * Blackfin arch: move board specific setup out of common init code and
+    into the board specific init code
+  * Blackfin arch: issue reset via SWRST so we dont clobber the watchdog
+    state
+  * Blackfin arch: document why we have to touch the UART peripheral in our
+    boot up code
+  * Blackfin arch: dma_memcpy borken for > 64K
+  * Blackfin arch: dont clear the bit that tells coreb to start booting
+  * Blackfin arch: make sure we use local labels
+  * Blackfin arch: update blackfin header files to latest one in VDSP.
+  * Blackfin arch: cache SWRST value at bootup so other things like
+    watchdog can non-destructively query it
+  * Blackfin arch: fix signal handling bug
+  * Blackfin arch: Change NO_ACCESS_CHECK to ACCESS_CHECK
+  * Blackfin arch: add board default configs to blackfin arch
+  * Blackfin arch: update defconfig files
+  * Blackfin arch: update pm.c according to power management API change.
+  * Blackfin serial driver: fix overhead issue
+  * Blackfin serial driver: implement support for ignoring parity/break
+    errors
+  * Blackfin SPI: cleanup according to David Brownell's review
+  * x86_64: Update defconfig
+  * i386: Update defconfig
+  * x86_64: Support x86_64 in make buildtar
+  * i386: Fix K8/core2 oprofile on multiple CPUs
+  * x86_64: Support gcc 5 properly
+  * i386: Clear MCE flag on AMD K6
+  * i386: Fix wrong CPU error message in early boot path
+  * i386: Enable CX8/PGE CPUID bits early on VIA C3
+  * x86_64: early_print kernel console should send CRLF not LFCR
+  * x86_64: vsyscall time() fix
+  * i386: fix PGE mask
+  * LDM: Fix for Windows Vista dynamic disks
+  * IB/ipoib: Fix typos in error messages
+  * IPoIB/cm: Fix SRQ WR leak
+  * IB/cm: Improve local id allocation
+  * e1000: Don't enable polling in open() (was: e1000: assertion hit in
+    e1000_clean(), kernel 2.6.21.1)
+  * declance: Remove a dangling spin_unlock_irq() thingy
+  * Add constant for FCS/CRC length (frame check sequence)
+  * ahci: disable 64bit dma on sb600
+  * libata: Add Seagate STT20000A to DMA blacklist.
+  * pata_hpt366: Enable bits are unreliable so don't use them
+  * ata_piix: clean up
+  * libata: Kiss post_set_mode goodbye
+  * libata: Trim trailing whitespace
+  * partitions/LDM: build fix
+  * Make 'headerscheck' stop immediately on an error
+  * Fix headers check fallout
+  * [POWERPC] Fix smp_call_function to be preempt-safe
+  * [POWERPC] Add missing pmc_type fields in cpu_table
+  * [POWERPC] Fix typo: MMCR0_PMA0 != MMCR0_PMAO
+  * [POWERPC] Fix powerpc vmlinux.lds.S
+  * [POWERPC] Fix warning in 32-bit builds with CONFIG_HIGHMEM
+  * libertas: skb dereferenced after netif_rx
+  * drivers/net/wireless/libertas/fw.c: fix use-before-check
+  * drivers/net/wireless/libertas/rx.c: fix use-after-free
+  * [IA64] Improve unwind checking.
+  * [IA64] Only unwind non-running tasks.
+  * [IA64] fix kmalloc(0) in arch/ia64/pci/pci.c
+  * i2c: Legacy i2c drivers shouldn't issue uevents
+  * i2c-tiny-usb: Fix truncated adapter name
+  * i2c-s3c2410: Fix build warning
+  * V4L/DVB (5639): Fix Kconfig dependencies for ivtv
+  * V4L/DVB (5640): Fix: em28xx shouldn't be selecting VIDEO_BUF
+  * V4L/DVB (5670): Adding new fields to v4l2_pix_format broke the ABI,
+    reverted that change
+  * V4L/DVB (5639a): Fix dst usage count
+  * V4L/DVB (5630): Dvb-core: Handle failures to create devices
+  * V4L/DVB (5680): Tuner-simple.c fix suport for SECAM with FI1216MF
+  * V4L/DVB (5690): Cafe_ccic: Properly power down the sensor
+  * V4L/DVB (5691): Ov7670: reset clkrc in rgb565 mode
+  * [IPSEC]: Fix warnings with casting int to pointer
+  * [AF_RXRPC]: AF_RXRPC depends on IPv4
+  * [AF_RXRPC]: Make call state names available if CONFIG_PROC_FS=n
+  * [RTNETLINK]: Allow changing of subsets of netdevice flags in
+    rtnl_setlink
+  * [RTNETLINK]: Remove remains of wireless extensions over rtnetlink
+  * Input: iforce - fix force feedback not working
+  * Input: iforce - minor clean-ups
+  * Input: ALPS - force stream mode
+  * Input: ucb1400_ts - use sched_setscheduler()
+  * Input: ucb1x00-ts - remove commented out code
+  * Input: input-polldev - add module info
+  * Input: ads7846 - document that it handles tsc2046 too
+  * Input: ads7846 - SPI_CPHA mode bugfix
+  * USB: fix omninet memory leak found by coverity
+  * USB: remove useless check in mos7840 found by coverity
+  * usb-storage: ignore Sitecom WL-117 USB-WLAN
+  * USB: fix more ftdi-elan/u132-hcd #include lossage
+  * USB: handle more rndis_host oddities
+  * USB: remove usb DocBook warnings
+  * USB: address FIXME in usbnet w.r.t drivers claiming multiple interfaces
+  * EHCI: fix problem with BIOS handoff
+  * USB: more autosuspend timer stuff
+  * USB: remove unneeded WARN_ON
+  * USB: New device PID for ftdi_sio driver
+  * USB: set the correct Interrupt interval in usb_bulk_msg
+  * USB: fsl_usb2_udc: Fix UMTI_WIDE support and a compile warning
+  * USB: auerswald: fix file release handler
+  * USB: Remove duplicate IDs from option card driver
+  * USB: Deref URB after usbmon is done with it
+  * USB: remove short initial timeout for device descriptor fetch
+  * USB: don't try to kzalloc 0 bytes
+  * USB: Onetouch - switch to using input_dev->dev.parent
+  * USB: Fix debug output of ark3116
+  * USB: usblp: Use correct DMA address in case of probe error
+  * USB: Fix USB OHCI Subvendor for Toshiba Portege 4000
+  * USB: make the autosuspend workqueue thread freezable
+  * USB: handle errors in power/level attribute
+  * USB: fix ratelimit call semantics
+  * USB: ftdi_sio: Add USB Product Id for OpenDCC
+  * USB: ldusb bugfix
+  * USB: Add support for Sierra Wireless Aircard 595U
+  * USB: Add support for Olimex arm-usb-ocd JTAG interface serial port
+  * IB/mlx4: Don't allocate RQ doorbell if using SRQ
+  * [IA64] start_secondary() and smp_callin() should be __cpuinit
+  * add the IDE device ID for ATI SB700
+  * ide/pci/serverworks.c: Fix corruption/timeouts with MegaIDE
+  * Add two missing chipsets to drivers/ide/ide-proc.c
+  * Match DMA blacklist entries between ide-dma.c and libata-core.c
+  * ide serverworks warning fixes
+  * freezer: close potential race between refrigerator and thaw_tasks
+  * freezer: fix vfork problem
+  * freezer: take kernel_execve into consideration
+  * freezer: fix kthread_create vs freezer theoretical race
+  * freezer: fix PF_NOFREEZE vs freezeable race
+  * freezer: move frozen_process() to kernel/power/process.c
+  * Ignore bogus ACPI info for offline CPUs
+  * SLUB Debug: Fix object size calculation
+  * fuse: fix mknod of regular file
+  * mpc52xx_psc_spi: fix it for CONFIG_PPC_MERGE
+  * spi doc update: describe clock mode bits
+  * NOHZ: Rate limit the local softirq pending warning output
+  * genhd: expose AN to user space
+  * genhd: send async notification on media change
+  * capability.h warning fix
+  * spi/spidev: check message size before copying
+  * uml: improve PTRACE_SYSEMU checking
+  * prohibit rcutorture from being compiled into the kernel
+  * Documentation: fix the explanation of Kconfig files
+  * Avoid zero size allocation in cache_k8_northbridges()
+  * recalc_sigpending_tsk fixes
+  * optimize compat_core_sys_select() by a using stack space for small fd
+    sets
+  * spi: potential memleak in spidev_ioctl
+  * fbdev: cleanup of sparc FB options
+  * pm2fb: RDAC_WR barriers clean up
+  * pm3fb: various fixes
+  * w100fb: fix compile warnings
+  * ps3fb: use FB_SYS_* instead of FB_CFB_*
+  * imxfb: remove ifdefs
+  * imxfb: fix memory hole
+  * Missing 'const' from reiserfs MIN_KEY declaration.
+  * uselib: add missing MNT_NOEXEC check
+  * fuse: generic_write_checks() for direct_io
+  * fuse: delete inode on drop
+  * fix unused setup_nr_node_ids
+  * SLUB Debug: fix check for super sized slabs (>512k 64bit, >256k 32bit)
+  * Char: cyclades, fix deadlock
+  * simplify cleanup_workqueue_thread()
+  * phantom: move to unlocked_ioctl
+  * Misc: phantom, take care of pci posting
+  * power: Fix sizeof(PAGE_SIZE) typo
+  * update dontdiff file
+  * signalfd: retrieve multiple signals with one read() call
+  * i2o: destroy event queue only when drv->event is set
+  * i2o: fix notifiers when max_drivers is configured
+  * i2o: eliminate a peculiar constraint on i2o_max_drivers
+  * i386, x86-64: show that CONFIG_HOTPLUG_CPU is required for suspend on
+    SMP
+  * md: avoid overflow in raid0 calculation with large components
+  * md: don't write more than is required of the last page of a bitmap
+  * md: fix bug with linear hot-add and elsewhere
+  * documentation: Documentation/initrd.txt
+  * HiSax: fix error checking for hisax_register()]
+  * applesmc - sensors patch missing from 2.6.22-rc2
+  * Off by one in floppy.c
+  * eCryptfs: delay writing 0's after llseek until write
+  * document clocksources
+  * ehci-fsl: fix cache coherency problem on system with large memory
+  * Prevent going idle with softirq pending
+  * i386: fix early usage of atomic_add_return and local_add_return on real
+    i386
+  * Documentation/memory-barriers.txt: various fixes
+  * omap_uwire: SPI_CPHA mode bugfix
+  * capifunc warning fixes
+  * drivers/isdn/hardware/eicon/message.c warning fixes
+  * i386 bigsmp: section mismatch fixes
+  * boot documentation: clarifications
+  * mmc: clean up unused parts of block driver
+  * mmc: mark unmaintained drivers
+  * mmc: Add maintainers for TI OMAP MMC interface
+  * mmc: add maintainer for iMX MMC interface
+  * mmc: add maintainer for ARM Primecell controller
+  * [CRYPTO] geode: Fix in-place operations and set key
+  * [Bluetooth] Always send HCI_Reset for Broadcom devices
+  * [Bluetooth] Fix L2CAP configuration parameter handling
+  * NFS: Avoid a deadlock situation on write
+  * NFS: Fix handful of compiler warnings in direct.c
+  * NFS: Fix nfs_direct_dirty_pages()
+  * Don't call a warnign a bug. It's a warning.
+  * [IA64] Fix using uninitialized data in _PDC setup
+  * [IA64] Cleanup acpi header to reuse the generic _PDC defines
+  * Documentation: Fix up docs still talking about i_sem
+  * [IA64] acpi_get_sysname() should be __init
+  * IB/mlx4: Initialize send queue entry ownership bits
+  * IB/ehca: Fix number of send WRs reported for new QP
+  * IPoIB/cm: Fix timeout check in ipoib_cm_dev_stop()
+  * IPoIB/cm: Drain cq in ipoib_cm_dev_stop()
+  * ucc_geth: Fix MODULE_DEVICE_TABLE() duplication
+  * ucc_geth:trivial fix
+  * asix.c - Add Belkin F5D5055 ids
+  * fix compiler warning in fixed.c
+  * remove unnecessary dependency on VIA velocity config
+  * meth driver renovation
+  * spidernet: skb used after netif_receive_skb
+  * chelsio parenthesis fix
+  * forcedeth: fix cpu irq mask
+  * [NET_SCHED]: Fix qdisc_restart return value when dequeue is empty
+  * [IPV6]: Ignore ipv6 events on non-IPV6 capable devices.
+  * [ATM]: Use mutex instead of binary semaphore in idt77252 driver.
+  * [DCCP]: Use menuconfig objects.
+  * [IPVS]: Use menuconfig objects.
+  * [SCTP]: Use menuconfig objects.
+  * [TIPC]: Use menuconfig objects.
+  * [ARCNET]: Use menuconfig objects.
+  * [TR]: Use menuconfig objects.
+  * [RTNETLINK]: Fix sending netlink message when replace route.
+  * [TIPC]: Fixed erroneous introduction of for_each_netdev
+  * [DCCP]: Fix build warning when debugging is disabled.
+  * [NET_SCHED]: sch_htb: fix event cache time calculation
+  * [NETFILTER]: nf_conntrack_ftp: fix newline sequence number update
+  * [NETFILTER]: nf_conntrack_ftp: fix newline sequence number calculation
+  * [NETFILTER]: nf_conntrack_h323: fix ASN.1 types
+  * [NETFILTER]: nf_conntrack_h323: fix get_h225_addr() for IPv6 address
+    access
+  * [NETFILTER]: nf_conntrack_h323: remove unnecessary process of
+    Information signal
+  * [NETFILTER]: nf_conntrack_h323: add missing T.120 address in OLCA
+  * [NETFILTER]: nf_nat_h323: call set_h225_addr instead of
+    set_h225_addr_hook
+  * [NET]: "wrong timeout value" in sk_wait_data() v2
+  * hpt3x2n: Correct revision boundary
+  * pata_sis: Fix and clean up some timing setups
+  * ata_piix: add short 40c quirk for Acer Aspire 2030, take #2
+  * libata: don't consider 0xff as port empty if SStatus is available
+  * libata: -ENODEV during prereset isn't an error
+  * pata_via: Handle laptops via DMI
+  * [CASSINI]: Check pci_set_mwi() return value.
+  * [XFRM]: Allow packet drops during larval state resolution.
+  * [libata] sata_promise: fix flags typo
+  * [libata] sata_mv: add TODO list
+  * Fix build failure for drivers/ata/pata_scc.c
+  * libata: sata_sis fixes
+  * [libata] Fix decoding of 6-byte commands
+  * [libata] sata_via, pata_via: Add PCI IDs.
+  * ocfs2: trylock in ocfs2_readpage()
+  * ocfs2: unmap_mapping_range() in ocfs2_truncate()
+  * ocfs2: use zero_user_page
+  * ocfs2: fix inode leak
+  * ocfs2: use generic_segment_checks
+  * pata: Trivia
+  * pata_hpt37x: Further improvements based on the IDE updates and vendor
+    drivers
+  * fix compat console unimap regression
+  * Linux 2.6.22-rc3
+
+ -- Ben Collins <bcollins@ubuntu.com>  Thu, 31 May 2007 12:35:44 -0400
+
+linux-source-2.6.22 (2.6.22-5.11) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * build/headers/ppc: Correct asm-ppc -> asm for arch symlink
+  * build/headers/ia64: Fix find command line to correctly pull in *.lds
+    files
+  * Bump ABI
+
+  [Upstream Kernel Changes]
+
+  * [IA64] spelling fixes: arch/ia64/
+  * [AVR32] Remove bogus comment in arch/avr32/kernel/irq.c
+  * [AVR32] optimize pagefault path
+  * [AVR32] Wire up signalfd, timerfd and eventfd
+  * [IA64] wire up {signal,timer,event}fd syscalls
+  * [IA64] kdump on INIT needs multi-nodes sync-up (v.2)
+  * [IA64] s/scalibility/scalability/
+  * [AVR32] Implement platform hooks for atmel_lcdfb driver
+  * [IA64] Fix section conflict of ia64_mlogbuf_finish
+  * [SPARC64]: Add hypervisor API negotiation and fix console bugs.
+  * pata_scc had been missed by ata_std_prereset() switch
+  * libata: separate out ata_dev_reread_id()
+  * libata: during revalidation, check n_sectors after device is configured
+  * libata-acpi: add ATA_FLAG_ACPI_SATA port flag
+  * libata: fix shutdown warning message printing
+  * libata: track spindown status and skip spindown_compat if possible
+  * [ALSA] usb-audio: another Logitech QuickCam ID
+  * [ALSA] hda-codec - Make the mixer capability check more robust
+  * [ALSA] ASoC AC97 static GPL symbol fix
+  * [ALSA] ASoC AC97 device reg bugfix
+  * [ALSA] hda-codec - Fix ALC882/861VD codec support on some laptops
+  * [ALSA] version 1.0.14rc4
+  * [ALSA] Fix probe of non-PnP ISA devices
+  * [ALSA] Include quirks from Ubuntu Dapper/Edgy/Feisty
+  * [ALSA] usbaudio - Coping with short replies in usbmixer
+  * [IA64] optimize pagefaults a little
+  * Fix ACPI suspend / device suspend ordering problem
+  * AFS: write back dirty data on unmount
+  * SLUB: It is legit to allocate a slab of the maximum permitted size
+  * slub: don't confuse ctor and dtor
+  * AFS: Fix afs_prepare_write()
+  * spi: fix spidev for >sizeof(long)/32 devices
+  * parport_pc needs dma-mapping.h
+  * Fix: find_or_create_page skips cpuset memory spreading.
+  * slob: implement RCU freeing
+  * Slab allocators: Drop support for destructors
+  * SLUB: Remove depends on EXPERIMENTAL and !ARCH_USES_SLAB_PAGE_STRUCT
+  * SLAB: Move two remaining SLAB specific definitions to slab_def.h
+  * SLUB: Define functions for cpu slab handling instead of using
+    PageActive
+  * slab: warn on zero-length allocations
+  * slub: fix handling of oversized slabs
+  * SLUB: slabinfo fixes
+  * SLUB: Do our own flags based on PG_active and PG_error
+  * Remove SLAB_CTOR_CONSTRUCTOR
+  * SLUB: Simplify debug code
+  * Slab allocators: define common size limitations
+  * acpi: fix potential call to a freed memory section.
+  * i386/x86-64: fix section mismatch
+  * Make __vunmap static
+  * simplify compat_sys_timerfd
+  * Let smp_call_function_single return -EBUSY on UP
+  * Refine SCREEN_INFO sanity check for vgacon initialization
+  * make freezeable workqueues singlethread
+  * parport: mailing list is subscribers-only
+  * docbook: make kernel-locking table readable
+  * gpio interface loosens call restrictions
+  * rtc-omap build fix
+  * rtc kconfig clarification
+  * icom: add new sub-device-id to support new adapter
+  * make sysctl/kernel/core_pattern and fs/exec.c agree on maximum core
+    filename size
+  * ecryptfs: use zero_user_page
+  * i386: don't check_pgt_cache in flush_tlb_mm
+  * circular locking dependency found in QUOTA OFF
+  * swsusp: fix sysfs interface
+  * Fix page allocation flags in grow_dev_page()
+  * mm: more rmap checking
+  * NS16550A: Restore HS settings in EXCR2 on resume
+  * Fix incorrect prototype for ipxrtr_route_packet()
+  * sky2: remove Gigabyte 88e8056 restriction
+  * sky2: PHY register settings
+  * sky2: keep track of receive alloc failures
+  * sky2: MIB counter overflow handling
+  * sky2: remove dual port workaround
+  * sky2: memory barriers change
+  * small netdevices.txt fix
+  * ibm_emac: fix section mismatch warnings
+  * ibm_emac: improved PHY support
+  * ibm_emac: fix link speed detection change
+  * gianfar: Add I/O barriers when touching buffer descriptor ownership.
+  * spidernet: node-aware skbuff allocation
+  * NetXen: Fix NetXen driver ping on system-p
+  * ixgb: don't print error if pci_enable_msi() fails, cleanup minor leak
+  * e1000: Fix msi enable leak on error, don't print error message, cleanup
+  * drivers/ata: remove the wildcard from sata_nv driver
+  * sata_nv: fix fallout of devres conversion
+  * libata: remove libata.spindown_compat
+  * sata_via: pcim_iomap_regions() conversion missed BAR5
+
+ -- Ben Collins <bcollins@ubuntu.com>  Thu, 17 May 2007 14:54:16 -0400
+
+linux-source-2.6.22 (2.6.22-4.10) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * Bump ABI
+  * build/config: Disable obsolete tsdev driver.
+  * build: Add tsdev to list of modules intentionally removed.
+  * build/headers: Include *.lds files (fixes ia64 headers).
+  * build/headers: Add arch/powerpc/include/asm symlink to get all headers.
+  * build/module-check: Fix logic for printed messages.
+  * build/maintainer: Use linux instead of upstream-linux for local diffs
+  * build/config: Enable SLUB slab allocator (vs. SLAB).
+  * build/config: Disable orinoco_nortel, use prefered hostap_plx
+  * build/config: Disable ir-usb in favor of irda-usb
+  * build/config: Disable sis5513(ide) in favor of pata_sis(libata)
+  * build/config: Disable piix(ide) in favour of pata_oldpiix, ata_piix and
+    pata_mpiix (libata)
+  * build/config: Disable zaurus driver in favour of the cdc_ether driver
+  * build/abi: Note a few modules intentionally removed.
+  * build/config: Disable mxb and dpc7146 driver in favour of hexium_orion
+  * build/config: Disable usbtest driver (for development only)
+  * build/config: Disable keyspan driver in favour of keyspan_pda
+  * build/abi: Add mxb and usbtest to list of removed modules.
+
+  [Upstream Kernel Changes]
+
+  * net: Trivial MLX4_DEBUG dependency fix.
+  * mlx4_core: Remove unused doorbell_lock
+  * [CPUFREQ] Support rev H AMD64s in powernow-k8
+  * [CPUFREQ] powernow-k7: fix MHz rounding issue with perflib
+  * [AGPGART] Fix wrong ID in via-agp.c
+  * sh64: ROUND_UP macro cleanup in arch/sh64/kernel/pci_sh5.c
+  * spelling fixes: arch/sh64/
+  * sh64: Wire up many new syscalls.
+  * sh64: Fixups for the irq_regs changes.
+  * sh64: dma-mapping updates.
+  * sh64: ppoll/pselect6() and restartable syscalls.
+  * sh64: Fixup sh-sci build.
+  * sh64: Update cayman defconfig.
+  * sh64: generic quicklist support.
+  * sh64: Add .gitignore entry for syscalltab.
+  * IB/mlx4: Fix uninitialized spinlock for 32-bit archs
+  * IB/ipath: Shadow the gpio_mask register
+  * IB/ehca: Serialize hypervisor calls in ehca_register_mr()
+  * IB/ehca: Correctly set GRH mask bit in ehca_modify_qp()
+  * IB/ehca: Fix AQP0/1 QP number
+  * IB/ehca: Remove _irqsave, move #ifdef
+  * IB/ehca: Beautify sysfs attribute code and fix compiler warnings
+  * IB/ehca: Disable scaling code by default, bump version number
+  * RDMA/cma: Simplify device removal handling code
+  * RDMA/cma: Fix synchronization with device removal in cma_iw_handler
+  * RDMA/cma: Add check to validate that cm_id is bound to a device
+  * IB/mthca: Fix posting >255 recv WRs for Tavor
+  * IB/mthca: Set cleaned CQEs back to HW ownership when cleaning CQ
+  * IPoIB/cm: Optimize stale connection detection
+  * [CPUFREQ] Correct revision mask for powernow-k8
+  * fix epoll single pass code and add wait-exclusive flag
+  * epoll locks changes and cleanups
+  * epoll: fix some comments
+  * epoll: move kfree inside ep_free
+  * nommu: add ioremap_page_range()
+  * h8300 atomic.h update
+  * alpha: fix hard_smp_processor_id compile error
+  * m68k: implement __clear_user()
+  * Remove cpu hotplug defines for __INIT & __INITDATA
+  * i386: move common parts of smp into their own file
+  * i386: fix voyager build
+  * SLUB: CONFIG_LARGE_ALLOCS must consider MAX_ORDER limit
+  * ll_rw_blk: fix gcc 4.2 warning on current_io_context()
+  * pasemi_mac: Fix register defines
+  * pasemi_mac: Interrupt ack fixes
+  * pasemi_mac: Terminate PCI ID list
+  * pasemi_mac: Fix local-mac-address parsing
+  * smc911x: fix compilation breakage
+  * ucc_geth: eliminate max-speed, change interface-type to
+    phy-connection-type
+  * pdc202xx_old: rewrite mode programming code (v2)
+  * serverworks: PIO mode setup fixes
+  * sis5513: PIO mode setup fixes
+  * alim15x3: use ide_tune_dma()
+  * pdc202xx_new: use ide_tune_dma()
+  * ide: always disable DMA before tuning it
+  * cs5530/sc1200: add ->udma_filter methods
+  * ide: use ide_tune_dma() part #2
+  * cs5530/sc1200: DMA support cleanup
+  * cs5530/sc1200: add ->speedproc support
+  * sl82c105: add speedproc() method and MWDMA0/1 support
+  * ide: remove ide_dma_enable()
+  * ide: add missing validity checks for identify words 62 and 63
+  * ide: remove ide_use_dma()
+  * sl82c105: Switch to ref counting API
+  * Use menuconfig objects: IDE
+  * x86: Fix discontigmem + non-HIGHMEM compile
+  * missing mm.h in fw-ohci
+  * missing dependencies for USB drivers in input
+  * missing includes in mlx4
+  * em28xx and ivtv should depend on PCI
+  * rpadlpar breakage - fallout of struct subsystem removal
+  * m32r: __xchg() should be always_inline
+  * audit_match_signal() and friends are used only if CONFIG_AUDITSYSCALL
+    is set
+  * fix uml-x86_64
+  * arm: walk_stacktrace() needs to be exported
+
+ -- Ben Collins <bcollins@ubuntu.com>  Tue, 15 May 2007 10:13:23 -0400
+
+linux-source-2.6.22 (2.6.22-3.9) gutsy; urgency=low
+
+  * Fixup firmware-modules -> efi-modules in exclude files.
+
+  [Ben Collins]
+
+  * build/config: Enable CONFIG_TIMER_STATS
+  * build/config: Disable CONFIG_IRQBALANCE, handled in userspace now
+  * build: Update modules that have been deprecated
+  * sparc64: Get some drivers compiling, till patches get upstream.
+  * powerpc: Add 64-bit cmp op for 32-bit.
+  * build/config: Disable apm_emu, pasemi_mac and cbe_cpufreq on ppc64
+  * build/d-i(cjwatson): Rename firmware-modules to efi-modules
+
+ -- Ben Collins <bcollins@ubuntu.com>  Fri, 11 May 2007 09:38:50 +0200
+
+linux-source-2.6.22 (2.6.22-2.7) gutsy; urgency=low
+
+  [Changes for 2.7]
+
+  * Added some more modules going missing to ignore.
+  * Disable ADB_PMU_LED on powerpc64. FTBFS.
+
+  [Ben Collins]
+
+  * XXX: Well, xen and rt got disabled in this upload. Hopefully things will
+    get working again soon.
+
+  * build: Add check for nrcpus on buildd's for CONCURRENCY_LEVEL
+  * build: No longer provide ndiswrapper or ivtv modules (l-u-m does).
+  * build/d-i: Remove firmware lists, since we no longer supply those udebs
+  * build: Remove more firmware stuff
+  * build/control: Build-dep on coreutils
+  * Update configuration files
+  * build/custom: Updated xen/rt patches and configs.
+  * build: Make sure to use /bin/bash for headers_install
+  * build: Add SHELL=/bin/bash to headers_install
+  * Update configuration files
+  * Bump ABI
+  * Update module lists to match module name changes and merges.
+  * build/rt: Trimmed down real-time patch from Alessio Igor Bogani.
+  * Update configuration files
+  * Update configuration files
+  * build/rt: Fix typo in diff
+  * Update configuration files
+  * build: make explicit binary-headers target
+  * Update configuration files
+  * build/control-scripts: Remove debconf from pre-rm script
+  * build/ia64: Compress and use vmlinuz for target install
+  * build/config: Diable OSS i810_audio driver (Alsa driver prefered)
+  * build/config: Disable OSS cs4232 driver (Alsa prefered)
+  * build/config: Disable OSS via82xx driver (Alsa prefered)
+  * build/config: Disable OSS trident driver (Alsa prefered)
+  * build/config: Disable OSS Sound Blaster driver (Alsa prefered)
+  * build/config: Disable IDE generic, ata_generic prefered
+  * build/config: Disable siimage, pata_sil680 prefered
+  * build/module-check: More robust module checking
+  * build: Call module-check with perl, not $SHELL
+  * Update configuration files
+  * build: Fixup calling conventions of module-check
+  * build: Add modules.ignore from 1.3 revision
+  * build/config: Disable obsolete MOXA_SMARTIO in favor of new driver.
+  * build/config: Disable orinoco_cs in favor of hostap_cs
+  * build/config: Disable orinoco_pci in favor of hostap_pci
+  * build/config: Disable orinoco_{plx,tmd} in favor of hostap_plx
+  * build/config: Disable sk98lin in favor of skge
+  * build: Add more modules intentionally removed since 1.3
+
+ -- Ben Collins <bcollins@ubuntu.com>  Fri, 27 Apr 2007 09:04:29 -0400
+
+linux-source-2.6.22 (2.6.22-1.3) gutsy; urgency=low
+
+  [Ben Collins]
+
+  * build: Only use bzip2 for linux-image, and pre-depend on proper dpkg
+
+  [2.6.22-1.2]
+
+  [Ben Collins]
+
+  * build: Add build-arch target. FTBFS
+
+  [2.6.22-1.1]
+
+  [Ben Collins]
+
+  * debian: New build system, from scratch
+  * debian: Rename place holder so debian/stamps/ sticks around
+  * debian: Create stamp-flavours at start of build (for build scripts)
+  * debian/abi: Add revision 0.0 bootstrap module list.
+  * debian: Fix backwards logic in module/abi checkers.
+  * debian: Add arch= to vars.* files
+  * Update configuration files
+  * build: Added control scripts for images
+  * build/config: Disable CONFIG_PARAVIRT for now
+  * build/config: Enable CONFIG_FB_VESA
+  * build: Take CONCURRENCY_LEVEL from env if it exists.
+  * build: Do not print SHAs by default for changelog
+  * build/config(i386): Disable NO_HZ on all but generic
+  * build: Implement udeb rules
+  * build/d-i: Remove speakup-modules udeb
+  * build/udebs: Fix a couple trivial errors in the build.
+  * build/config: Disable CONFIG_FB_IMSTT on powerpc64-smp (no NVRAM)
+  * build/config: Disable some modules for ppc64 that don't use DMA API
+  * build/config: Yet another module to disable on ppc64
+  * build/tests: New test infrastructure
+  * build: Special kernel build infrastructure
+  * build: Fix typo from last commit
+  * build/custom: Move custom files for each flavour into subdir.
+  * build/config: Disable some drivers on sparc that don't support DMA API
+  * build/sparc: Add new compress_file config, and use it for sparc
+  * build: Fix typo in compress_file commit.
+  * build/schedcfs: Update to v6 of the patch.
+  * build: Fix control file generation for custom images
+  * build: Correct message in link-headers
+  * build: 2.6.21 is released, force our SUBLEVEL to .22
+  * build/vars: kvm API is at 4, provide that.
+  * build/custom: Allow custom builds to override things like build_image
+  * build/custom: Fix type causing custom rules not to be included.
+  * build/custom: Hello Xen 3.0.5
+  * build/custom: Remove sched-cfs. Superseded in use by rt.
+  * build/custom: Add 2.6.21-rt1 patch for -rt custom flavour
+  * build/link-headers: Make sure to copy new files for custom
+
+ -- Ben Collins <bcollins@ubuntu.com>  Fri, 27 Apr 2007 08:29:00 -0400
--- linux-rt-2.6.29.5.orig/debian/control.stub.in
+++ linux-rt-2.6.29.5/debian/control.stub.in
@@ -0,0 +1,18 @@
+Source: linux-rt
+Section: devel
+Priority: optional
+Maintainer: Alessio Igor Bogani <abogani@ubuntu.com>
+Standards-Version: 3.6.1
+Build-Depends: debhelper (>= 3), module-init-tools, kernel-wedge (>= 2.24ubuntu1), makedumpfile [!armel], quilt
+Build-Depends-Indep: xmlto, docbook-utils, gs, transfig, bzip2, sharutils
+
+Package: linux-rt-headers-PKGVER-ABINUM
+Architecture: all
+Section: devel
+Priority: optional
+Depends: coreutils | fileutils (>= 4.0)
+Provides: linux-rt-headers, linux-rt-headers-2.6
+Description: Header files related to Linux kernel version PKGVER
+ This package provides kernel header files for version PKGVER, for sites
+ that want the latest kernel headers. Please read
+ /usr/share/doc/linux-headers-PKGVER-ABINUM/debian.README.gz for details
--- linux-rt-2.6.29.5.orig/debian/changelog
+++ linux-rt-2.6.29.5/debian/changelog
@@ -0,0 +1,429 @@
+linux-rt (2.6.29.5-1.2) karmic; urgency=low
+
+  * Temporarily hard-code the .5 point release for the kernel, since without
+    this, needed files cannot be copied into the correct places, causing an
+    FTBFs.
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Sun, 28 Jun 2009 11:18:17 +1000
+
+linux-rt (2.6.29.5-1.1) karmic; urgency=low
+
+  * New 2.6.29 kernel upstream release
+  * New realtime patch upstream release
+  * Bump ABI
+  * Use main ubuntu kernel configs as a base for the rt configs.
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Sat, 27 Jun 2009 21:25:03 +1000
+
+linux-rt (2.6.29-1.1) karmic; urgency=low
+
+  * New upstream release
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Sun, 14 Jun 2009 15:07:25 +1000
+
+linux-rt (2.6.28-3.12) jaunty; urgency=low
+
+  * Build depend on linux-source-2.6.28 >= 2.6.28-11.42.
+
+  Rebase on jaunty mainline 2.6.28-11.41:
+
+  [ Tim Gardner ]
+
+  * Enabled LPIA CONFIG_PACKET=y
+    - LP: #362071
+
+  [ Upstream Kernel Changes ]
+
+  * ext4: fix bb_prealloc_list corruption due to wrong group locking
+    - LP: #348836
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Fri, 17 Apr 2009 15:06:04 +1000
+
+linux-rt (2.6.28-3.11) jaunty; urgency=low
+
+  Rebase on jaunty mainline 2.6.28-11.41:
+
+  [ Amit Kucheria ]
+
+  * ixp4xx: Enabled TCP SYN_COOKIES
+    - LP: #346378
+
+  [ Brad Figg ]
+
+  * Change LPIA configuration to compile with CONFIG_NETFILTER_XT_MATCH_RECENT
+    - LP: #355291
+
+  [ Kay Sievers ]
+
+  * SAUCE: driver core: allow non-root users to listen to uevents
+    - LP: #357124
+
+  [ Manoj Iyer ]
+
+  * SAUCE: Added quirk to recognize GE0301 3G modem as an interface.
+    - LP: #348861
+
+  [ Tim Gardner ]
+
+  * Revert "SAUCE: [i915] allocate MCHBAR space & enable if necessary"
+    Appears to cause hard locks in some cases. 
+    - LP: #349314
+
+  [ Trond Myklebust ]
+
+  * SAUCE: NFS: Fix the notifications when renaming onto an existing file
+    - LP: #224642
+
+  [ Upstream Kernel Changes ]
+
+  * USB: option: add QUANTA HSDPA Data Card device ids
+    - LP: #353321
+  * hwmon: (abituguru3) Match partial DMI board name strings
+    - LP: #298798
+  * zd1211rw: adding Sitecom WL-603 (0df6:0036) to the USB id list
+    - LP: #339631
+  * USB: unusual dev for Option N.V. ZeroCD modems
+    - LP: #348861
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Thu, 09 Apr 2009 02:34:08 +1000
+
+linux-rt (2.6.28-3.10) jaunty; urgency=low
+
+  * Disable DEVKMEM for all archs on Jaunty
+
+  Rebase on jaunty mainline 2.6.28-11.40:
+
+  [ Amit Kucheria ]
+
+  * Disable DEVKMEM for all archs on Jaunty
+    - LP: #354221
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: md: wait for possible pending deletes after stopping an array
+    - LP: #334994
+
+  [ Brad Figg ]
+
+  * ARM: Setting the bootloader for imx51 flavour.
+    - LP: #348382
+  * ARM: Add bootloader package Recomendation to iop32x and ixp4xx flavours
+    - LP: #348382
+
+  [ Tim Gardner ]
+
+  * SAUCE: [i915] allocate MCHBAR space & enable if necessary
+    - LP: #349314
+
+  [ Upstream Kernel Changes ]
+
+  * hpilo: open/close fix
+    - LP: #353496
+
+  [ Alan Tull ]
+
+  * SAUCE: mx51: fix to1.1 in mxc_iomux_set_input
+    - LP: #348333
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: acer: rfkill disable quirk for ACER Aspire One
+    - LP: #319825
+
+  [ Brad Figg ]
+
+  * ARM: Increase CONFIG_BLK_DEV_RAM_SIZE for imx51 flavour.
+    - LP: #349842
+  * ARM: Enable rtl8187 for imx51
+    - LP: #349526
+  * ARM: Unset CONFIG_USB_STORAGE_DEBUG for imx51
+    - LP: #348504
+
+  [ Bryan Wu ]
+
+  * build CRAMFS into kernel to support mounting CRAMFS initrd on iop32x
+    machine
+    - LP: #349104
+
+  [ Michael Casadevall ]
+
+  * [lpia] Change ATA, SCSI, SD, ext2-4 modules into compiled-in components
+    - LP: #347458
+
+  [ Rob Herring ]
+
+  * SAUCE: imx51: fec: fix cache operations for receive
+    - LP: #348333
+
+  [ Sam Yang ]
+
+  * SAUCE: Revert ENGR00103870 FEC reopening causes network wdog timeout
+    - LP: #348333
+  * SAUCE: imx51: fec cache flush functions are incorrect
+    - LP: #348333
+
+  [ Upstream Kernel Changes ]
+
+  * Bluetooth: Add fine grained mem_flags usage to btusb driver
+    - LP: #268502
+  * Bluetooth: Handle bulk URBs in btusb driver from notify callback
+    - LP: #268502
+  * Bluetooth: Submit bulk URBs along with interrupt URBs
+    - LP: #268502
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Tue, 07 Apr 2009 09:23:10 +1000
+
+linux-rt (2.6.28-3.9) jaunty; urgency=low
+
+  * link-headers: only link directories which do not already exist
+
+  Rebase on jaunty mainline 2.6.28-11.38:
+
+  [ Brad Figg ]
+
+  * When AppArmor is configured, securityfs must be as well.
+    - LP: #344370
+  * ARM: Enable AA with SECURITYFS for imx51
+    - LP: #344370
+
+  [ Bryan Wu ]
+
+  * Add 3 missing files to prerm remove file list
+    - LP: #345623
+
+  [ Daniel T Chen ]
+
+  * SAUCE: (drop after 2.6.28) Don't trust hw-ptr blindly
+    - LP: #330814
+  * SAUCE: (drop after 2.6.28) Apply further pcm_lib updates for hw_ptr
+    - LP: #330814
+
+  [ Ike Panhc ]
+
+  * Copy header files for various kernel media driver
+    - LP: #322732
+
+  [ Tim Gardner ]
+
+  * Revert "Fix the VFP handling on the Feroceon CPU"
+    Only applied to mv78xx0 ARM flavour.
+  * Enabled drivers/staging/at76_usb
+    - LP: #152626
+
+  [ ubuntu@tjworld.net ]
+
+  * SAUCE: ipw2200: Enable LED by default
+    - LP: #21367
+  * SAUCE: wistron_btns: support Prestigio Wifi RF kill button over suspend
+    - LP: #346586
+
+  [ Upstream Kernel Changes ]
+
+  * Build fix for __early_pfn_to_nid() undefined link error
+  * Fix misreporting of #cores as #hyperthreads for Q9550
+  * eventfd: remove fput() call from possible IRQ context
+  * S390: __div64_31 broken for CONFIG_MARCH_G5
+  * ALSA: Fix vunmap and free order in snd_free_sgbuf_pages()
+  * ALSA: mixart, fix lock imbalance
+  * ALSA: pcm_oss, fix locking typo
+  * ALSA: hda - Fix DMA mask for ATI controllers
+  * ALSA: hda - Workaround for buggy DMA position on ATI controllers
+  * ALSA: opl3sa2 - Fix NULL dereference when suspending snd_opl3sa2
+  * nfsd: nfsd should drop CAP_MKNOD for non-root
+  * NFSD: provide encode routine for OP_OPENATTR
+  * dm ioctl: validate name length when renaming
+  * dm io: respect BIO_MAX_PAGES limit
+  * dm crypt: fix kcryptd_async_done parameter
+  * dm crypt: wait for endio to complete before destruction
+  * ata_piix: add workaround for Samsung DB-P70
+  * V4L/DVB (10218): cx23885: Fix Oops for mixed install of analog and
+    digital only cards
+  * thinkpad-acpi: fix module autoloading for older models
+  * Add '-fwrapv' to gcc CFLAGS
+  * Move cc-option to below arch-specific setup
+  * USB: storage: Unusual USB device Prolific 2507 variation added
+  * USB: Add Vendor/Product ID for new CDMA U727 to option driver
+  * USB: option.c: add ZTE 622 modem device
+  * USB: Add device id for Option GTM380 to option driver
+  * USB: Option: let cdc-acm handle Sony Ericsson F3507g / Dell 5530
+  * USB: Updated unusual-devs entry for USB mass storage on Nokia 6233
+  * USB: unusual_devs: Add support for GI 0431 SD-Card interface
+  * USB: serial: add FTDI USB/Serial converter devices
+  * USB: serial: ftdi: enable UART detection on gnICE JTAG adaptors
+    blacklist interface0
+  * USB: serial: new cp2101 device id
+  * USB: usbtmc: fix stupid bug in open()
+  * USB: usbtmc: add protocol 1 support
+  * USB: usbfs: keep async URBs until the device file is closed
+  * USB: EHCI: expedite unlinks when the root hub is suspended
+  * USB: EHCI: Fix isochronous URB leak
+  * powerpc: Remove extra semicolon in fsl_soc.c
+  * menu: fix embedded menu snafu
+  * Linux 2.6.28.9
+  * Add '-fwrapv' to gcc CFLAGS
+    - LP: #348015
+  * Move cc-option to below arch-specific setup
+    - LP: #348015
+  * Revert Staging: at76_usb: update drivers/staging/at76_usb w/ mac80211
+    port
+    - LP: #152626
+  * Staging: at76_usb: fix bugs introduced by "Staging: at76_usb: cleanup
+    dma on stack issues"
+    - LP: #152626
+  * Staging: at76_usb: Add support for OQO Model 01+
+    - LP: #152626
+
+  [ Alex Deucher ]
+
+  * SAUCE: radeon: add some new pci ids
+    - LP: #334101
+
+  [ Amit Kucheria ]
+
+  * Updating configs - rip out orion5x and mv78xx0 flavours
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: tone down the synaptics warning to avoid triggering kerneloops
+    - LP: #330606
+
+  [ Upstream Kernel Changes ]
+
+  * ext4: fix header check in ext4_ext_search_right() for deep extent
+    trees.
+    - LP: #346194
+  * eCryptfs: NULL crypt_stat dereference during lookup
+    - LP: #345766
+  * eCryptfs: Allocate a variable number of pages for file headers
+    (CVE-2009-0787)
+    - LP: #345544
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Tue, 24 Mar 2009 10:40:43 +1100
+
+linux-rt (2.6.28-3.8) jaunty; urgency=low
+
+  * Rebase on jaunty mainline 2.6.28-11.36.
+  * Adjust CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR as per jaunty mainline.
+  * Adjust CONFIG_UEVENT_HELPER_PATH as per jaunty mainline.
+  * Adjust dependency on linux-source-2.6.28.
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Sun, 22 Mar 2009 10:18:54 +1100
+
+linux-rt (2.6.28-3.7) jaunty; urgency=low
+
+  * Rebase on jaunty mainline 2.6.28-11.34.
+  * Update config files as per jaunty mainline.
+  * Add hard dependency on linux-source-2.6.28 = 2.6.28-11.34
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Wed, 18 Mar 2009 17:56:45 +1100
+
+linux-rt (2.6.28-2.6) jaunty; urgency=low
+
+  * Rebase on top of Jaunty 2.6.28-9.29.
+  * Add code to copy modules.order into the kernel packages.
+  * Add fuse to modules.ignore since fuse is built into the kernel.
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Tue, 10 Mar 2009 09:45:01 +1100
+
+linux-rt (2.6.28-1.5) jaunty; urgency=low
+
+  [ Alessio Igor Bogani ]
+  * Resync with ubuntu jaunty kernel v2.6.28-8.26
+
+  [ Luke Yelavich ]
+  * re-enable ABI checking
+  * debian/scripts/misc/getabis: tweak for fetching the rt abi files
+  * build our own arch independant headers package as well
+  * rebase against jaunty 2.6.28-8.27
+  * update config files.
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Fri, 06 Mar 2009 07:58:51 +1100
+
+linux-rt (2.6.28-1.4) jaunty; urgency=low
+
+  * Resync with ubuntu jaunty kernel v2.6.28-8.24
+  * Merge all patches into only one
+  * Update config files
+
+ -- Alessio Igor Bogani <abogani@ubuntu.com>  Fri, 20 Feb 2009 19:54:53 +0100
+
+linux-rt (2.6.28-1.3) jaunty; urgency=low
+
+  * Update config files, and remove unneeded armel files/references.
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Sat, 14 Feb 2009 22:18:58 +1100
+
+linux-rt (2.6.28-1.2) jaunty; urgency=low
+
+  * Resync with the ubuntu jaunty kernel, v2.6.28-8.21, and use the work done
+    so far by Alessio Igor Bogani <abogani@ubuntu.com> for the realtime patch
+  * Partially remove udeb generation code, as the udebs are not ever used
+    for installation
+  * Add a hack to ensure that the linux-headers package depends on jaunty's
+    main linux headers package
+
+ -- Luke Yelavich <themuso@ubuntu.com>  Sat, 14 Feb 2009 16:24:24 +1100
+
+linux-rt (2.6.28-1.1) jaunty; urgency=low
+
+  * Initial version based on linux-source-2.6.28 2.6.28-6.16
+  * CONFIG_AUFS and CONFIG_CGROUP_MEM_RES_CTLR are still disabled
+
+ -- Alessio Igor Bogani <abogani@ubuntu.com>  Thu, 29 Jan 2009 08:46:29 +0100
+
+linux-rt (2.6.27-3.8) intrepid; urgency=low
+
+  * Blacklisted XFS to prevent an ABI bump
+    - This is based off git commit 21c69ea60244403e503f148cd29d89df85eb0908
+      from ubuntu-intrepid.git (LP: #289683)
+
+ -- Michael Casadevall <sonicmctails@gmail.com>  Sun, 26 Oct 2008 20:46:33 -0400
+
+linux-rt (2.6.27-3.7) intrepid; urgency=low
+
+  * Synced against linux-source-2.6.27 2.6.27-7.12
+  * Added ext4-nrpages.patch and jbd2-nrpages.patch
+  * Synced configuration
+
+ -- Alessio Igor Bogani <abogani@ubuntu.com>  Sat, 18 Oct 2008 18:08:55 +0200
+
+linux-rt (2.6.27-3.6) intrepid; urgency=low
+
+  * Synced against linux-source-2.6.27 2.6.27-7.11
+
+ -- Alessio Igor Bogani <abogani@ubuntu.com>  Wed, 15 Oct 2008 15:36:52 +0200
+
+linux-rt (2.6.27-3.5) intrepid; urgency=low
+
+  * Synced against linux-source-2.6.27 2.6.27-7.10
+  * Bump ABI to -3
+
+ -- Alessio Igor Bogani <abogani@ubuntu.com>  Mon, 13 Oct 2008 15:46:02 +0200
+
+linux-rt (2.6.27-2.4) intrepid; urgency=low
+
+  * Mistake in packaging
+
+ -- Alessio Igor Bogani <abogani@ubuntu.com>  Fri, 10 Oct 2008 14:07:41 +0200
+
+linux-rt (2.6.27-2.3) intrepid; urgency=low
+
+  * Fix a FTBS 
+
+ -- Alessio Igor Bogani <abogani@ubuntu.com>  Fri, 10 Oct 2008 11:46:25 +0200
+
+linux-rt (2.6.27-2.2) intrepid; urgency=low
+
+  * Synced against linux-source-2.6.27 2.6.27-6.9 
+  * Disable CONFIG_AUFS, CONFIG_CGROUP_MEM_RES_CTLR, CONFIG_FTRACE,
+    CONFIG_LATENCYTOP, CONFIG_CONTEXT_SWITCH_TRACER
+  * Set timer frequency to 1000 HZ
+  * Bump ABI to -2
+
+ -- Alessio Igor Bogani <abogani@ubuntu.com>  Thu, 09 Oct 2008 15:27:27 +0200
+
+linux-rt (2.6.27-1.1) intrepid; urgency=low
+
+  * Initial version based on linux-source-2.6.27 2.6.27-4.6 (LP: #281276)
+
+ -- Alessio Igor Bogani <abogani@ubuntu.com>  Fri, 03 Oct 2008 09:15:26 +0200
--- linux-rt-2.6.29.5.orig/debian/changelog.jaunty
+++ linux-rt-2.6.29.5/debian/changelog.jaunty
@@ -0,0 +1,4737 @@
+linux (2.6.28-11.42) jaunty; urgency=low
+
+  [ Tim Gardner ]
+
+  * Enabled LPIA CONFIG_PACKET=y
+    - LP: #362071
+
+  [ Upstream Kernel Changes ]
+
+  * ext4: fix bb_prealloc_list corruption due to wrong group locking
+    - LP: #348836
+
+ -- Stefan Bader <stefan.bader@canonical.com>  Thu, 16 Apr 2009 08:10:55 +0200
+
+linux (2.6.28-11.41) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * ixp4xx: Enabled TCP SYN_COOKIES
+    - LP: #346378
+
+  [ Brad Figg ]
+
+  * Change LPIA configuration to compile with CONFIG_NETFILTER_XT_MATCH_RECENT
+    - LP: #355291
+
+  [ Kay Sievers ]
+
+  * SAUCE: driver core: allow non-root users to listen to uevents
+    - LP: #357124
+
+  [ Manoj Iyer ]
+
+  * SAUCE: Added quirk to recognize GE0301 3G modem as an interface.
+    - LP: #348861
+
+  [ Tim Gardner ]
+
+  * Revert "SAUCE: [i915] allocate MCHBAR space & enable if necessary"
+    Appears to cause hard locks in some cases. 
+    - LP: #349314
+
+  [ Trond Myklebust ]
+
+  * SAUCE: NFS: Fix the notifications when renaming onto an existing file
+    - LP: #224642
+
+  [ Upstream Kernel Changes ]
+
+  * USB: option: add QUANTA HSDPA Data Card device ids
+    - LP: #353321
+  * hwmon: (abituguru3) Match partial DMI board name strings
+    - LP: #298798
+  * zd1211rw: adding Sitecom WL-603 (0df6:0036) to the USB id list
+    - LP: #339631
+  * USB: unusual dev for Option N.V. ZeroCD modems
+    - LP: #348861
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Sat, 04 Apr 2009 08:42:14 -0600
+
+linux (2.6.28-11.40) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * Disable DEVKMEM for all archs on Jaunty
+    - LP: #354221
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: md: wait for possible pending deletes after stopping an array
+    - LP: #334994
+
+  [ Brad Figg ]
+
+  * ARM: Setting the bootloader for imx51 flavour.
+    - LP: #348382
+  * ARM: Add bootloader package Recomendation to iop32x and ixp4xx flavours
+    - LP: #348382
+
+  [ Tim Gardner ]
+
+  * SAUCE: [i915] allocate MCHBAR space & enable if necessary
+    - LP: #349314
+
+  [ Upstream Kernel Changes ]
+
+  * hpilo: open/close fix
+    - LP: #353496
+
+ -- Amit Kucheria <amit.kucheria@canonical.com>  Thu, 02 Apr 2009 11:26:22 -0400
+
+linux (2.6.28-11.39) jaunty; urgency=low
+
+  [ Alan Tull ]
+
+  * SAUCE: mx51: fix to1.1 in mxc_iomux_set_input
+    - LP: #348333
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: acer: rfkill disable quirk for ACER Aspire One
+    - LP: #319825
+
+  [ Brad Figg ]
+
+  * ARM: Increase CONFIG_BLK_DEV_RAM_SIZE for imx51 flavour.
+    - LP: #349842
+  * ARM: Enable rtl8187 for imx51
+    - LP: #349526
+  * ARM: Unset CONFIG_USB_STORAGE_DEBUG for imx51
+    - LP: #348504
+
+  [ Bryan Wu ]
+
+  * build CRAMFS into kernel to support mounting CRAMFS initrd on iop32x
+    machine
+    - LP: #349104
+
+  [ Michael Casadevall ]
+
+  * [lpia] Change ATA, SCSI, SD, ext2-4 modules into compiled-in components
+    - LP: #347458
+
+  [ Rob Herring ]
+
+  * SAUCE: imx51: fec: fix cache operations for receive
+    - LP: #348333
+
+  [ Sam Yang ]
+
+  * SAUCE: Revert ENGR00103870 FEC reopening causes network wdog timeout
+    - LP: #348333
+  * SAUCE: imx51: fec cache flush functions are incorrect
+    - LP: #348333
+
+  [ Upstream Kernel Changes ]
+
+  * Bluetooth: Add fine grained mem_flags usage to btusb driver
+    - LP: #268502
+  * Bluetooth: Handle bulk URBs in btusb driver from notify callback
+    - LP: #268502
+  * Bluetooth: Submit bulk URBs along with interrupt URBs
+    - LP: #268502
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Wed, 01 Apr 2009 17:37:32 -0600
+
+linux (2.6.28-11.38) jaunty; urgency=low
+
+  [ Brad Figg ]
+
+  * When AppArmor is configured, securityfs must be as well.
+    - LP: #344370
+  * ARM: Enable AA with SECURITYFS for imx51
+    - LP: #344370
+
+  [ Bryan Wu ]
+
+  * Add 3 missing files to prerm remove file list
+    - LP: #345623
+
+  [ Daniel T Chen ]
+
+  * SAUCE: (drop after 2.6.28) Don't trust hw-ptr blindly
+    - LP: #330814
+  * SAUCE: (drop after 2.6.28) Apply further pcm_lib updates for hw_ptr
+    - LP: #330814
+
+  [ Ike Panhc ]
+
+  * Copy header files for various kernel media driver
+    - LP: #322732
+
+  [ Tim Gardner ]
+
+  * Revert "Fix the VFP handling on the Feroceon CPU"
+    Only applied to mv78xx0 ARM flavour.
+  * Enabled drivers/staging/at76_usb
+    - LP: #152626
+
+  [ ubuntu@tjworld.net ]
+
+  * SAUCE: ipw2200: Enable LED by default
+    - LP: #21367
+  * SAUCE: wistron_btns: support Prestigio Wifi RF kill button over suspend
+    - LP: #346586
+
+  [ Upstream Kernel Changes ]
+
+  * Build fix for __early_pfn_to_nid() undefined link error
+  * Fix misreporting of #cores as #hyperthreads for Q9550
+  * eventfd: remove fput() call from possible IRQ context
+  * S390: __div64_31 broken for CONFIG_MARCH_G5
+  * ALSA: Fix vunmap and free order in snd_free_sgbuf_pages()
+  * ALSA: mixart, fix lock imbalance
+  * ALSA: pcm_oss, fix locking typo
+  * ALSA: hda - Fix DMA mask for ATI controllers
+  * ALSA: hda - Workaround for buggy DMA position on ATI controllers
+  * ALSA: opl3sa2 - Fix NULL dereference when suspending snd_opl3sa2
+  * nfsd: nfsd should drop CAP_MKNOD for non-root
+  * NFSD: provide encode routine for OP_OPENATTR
+  * dm ioctl: validate name length when renaming
+  * dm io: respect BIO_MAX_PAGES limit
+  * dm crypt: fix kcryptd_async_done parameter
+  * dm crypt: wait for endio to complete before destruction
+  * ata_piix: add workaround for Samsung DB-P70
+  * V4L/DVB (10218): cx23885: Fix Oops for mixed install of analog and
+    digital only cards
+  * thinkpad-acpi: fix module autoloading for older models
+  * Add '-fwrapv' to gcc CFLAGS
+  * Move cc-option to below arch-specific setup
+  * USB: storage: Unusual USB device Prolific 2507 variation added
+  * USB: Add Vendor/Product ID for new CDMA U727 to option driver
+  * USB: option.c: add ZTE 622 modem device
+  * USB: Add device id for Option GTM380 to option driver
+  * USB: Option: let cdc-acm handle Sony Ericsson F3507g / Dell 5530
+  * USB: Updated unusual-devs entry for USB mass storage on Nokia 6233
+  * USB: unusual_devs: Add support for GI 0431 SD-Card interface
+  * USB: serial: add FTDI USB/Serial converter devices
+  * USB: serial: ftdi: enable UART detection on gnICE JTAG adaptors
+    blacklist interface0
+  * USB: serial: new cp2101 device id
+  * USB: usbtmc: fix stupid bug in open()
+  * USB: usbtmc: add protocol 1 support
+  * USB: usbfs: keep async URBs until the device file is closed
+  * USB: EHCI: expedite unlinks when the root hub is suspended
+  * USB: EHCI: Fix isochronous URB leak
+  * powerpc: Remove extra semicolon in fsl_soc.c
+  * menu: fix embedded menu snafu
+  * Linux 2.6.28.9
+  * Add '-fwrapv' to gcc CFLAGS
+    - LP: #348015
+  * Move cc-option to below arch-specific setup
+    - LP: #348015
+  * Revert Staging: at76_usb: update drivers/staging/at76_usb w/ mac80211
+    port
+    - LP: #152626
+  * Staging: at76_usb: fix bugs introduced by "Staging: at76_usb: cleanup
+    dma on stack issues"
+    - LP: #152626
+  * Staging: at76_usb: Add support for OQO Model 01+
+    - LP: #152626
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Mon, 23 Mar 2009 19:20:08 -0600
+
+linux (2.6.28-11.37) jaunty; urgency=low
+
+  [ Alex Deucher ]
+
+  * SAUCE: radeon: add some new pci ids
+    - LP: #334101
+
+  [ Amit Kucheria ]
+
+  * Updating configs - rip out orion5x and mv78xx0 flavours
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: tone down the synaptics warning to avoid triggering kerneloops
+    - LP: #330606
+
+  [ Upstream Kernel Changes ]
+
+  * ext4: fix header check in ext4_ext_search_right() for deep extent
+    trees.
+    - LP: #346194
+  * eCryptfs: NULL crypt_stat dereference during lookup
+    - LP: #345766
+  * eCryptfs: Allocate a variable number of pages for file headers
+    (CVE-2009-0787)
+    - LP: #345544
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Mon, 23 Mar 2009 09:24:32 -0600
+
+linux (2.6.28-11.36) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * Updating imx51 configs one more time
+  * Disable CONFIG_UEVENT_HELPER_PATH
+
+  [ Anton Veretenenko ]
+
+  * SAUCE: sony-laptop: add support for Sony Vaio FW series function/media
+    keys
+    - LP: #307592
+
+  [ Brad Figg ]
+
+  * Have AUFS use current VFS APIs so it can build with or without
+    AppArmor.
+
+  [ Bryan Wu ]
+
+  * Build-in "Ram block device support" to boot up with initramfs
+    - LP: #329098
+  * Remove brd module from iop32x modules list
+    - LP: #329098
+  * Increase the CONFIG_BLK_DEV_RAM_SIZE to 8192 on i.MX51
+
+  [ Ike Panhc ]
+
+  * SAUCE: Fixing symbol name in HECI module
+    - LP: #336549
+
+  [ Manoj Iyer ]
+
+  * SAUCE: Added quirk for Ralink rt2870 802.11n USB driver
+    - LP: #326621
+
+  [ Upstream Kernel Changes ]
+
+  * udf:SAUCE (drop after 2.6.30): Fix oops when invalid character in
+    filename occurs
+    - LP: #321606
+
+ -- Stefan Bader <stefan.bader@canonical.com>  Fri, 20 Mar 2009 16:52:08 +0100
+
+linux (2.6.28-11.35) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * Updating imx51 configs
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: hotkey quirks for various Zeptro Znote and Fujitsu Amilo laptops
+    - LP: #330259
+
+  [ Tim Gardner ]
+
+  * Revert "SAUCE: (drop after 2.6.28) eCryptfs: Don't encrypt file key
+    with filename key". Use upstream commit.
+  * CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR set to upstream defaults.
+    64K for x86'en, 32K for ARM
+    - LP: #344955
+
+  [ Upstream Kernel Changes ]
+
+  * eCryptfs: don't encrypt file key with filename key
+  * libata: set NODEV_HINT for 0x7f status
+    - LP: #293218
+  * USB: cdc-acm: Add another conexant modem to the quirks
+    - LP: #323829
+  * Input: elantech - touchpad driver miss-recognising logitech mice
+    - LP: #318722
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Wed, 18 Mar 2009 08:52:46 -0600
+
+linux (2.6.28-11.34) jaunty; urgency=low
+
+  [ Alex Deucher ]
+
+  * SAUCE: (drop after 2.6.28) radeon: add support for RS600, R6xx, and
+    R7xx GPUs
+    - LP: #334101
+
+  [ Aristeu Sergio Rozanski Filho ]
+
+  * SAUCE: (drop after 2.6.28) ALSA: hda: add quirk for Lenovo X200 laptop
+    dock
+
+  [ Shane Huang ]
+
+  * SAUCE: (drop after 2.6.28) i2c-piix4: Add support to SB800 SMBus
+    changes
+    - LP: #338108
+
+  [ Upstream Kernel Changes ]
+
+  * net: amend the fix for SO_BSDCOMPAT gsopt infoleak
+  * net: Kill skb_truesize_check(), it only catches false-positives.
+  * sparc64: Fix crashes in jbusmc_print_dimm()
+  * sparc64: Fix DAX handling via userspace access from kernel.
+  * vfs: separate FMODE_PREAD/FMODE_PWRITE into separate flags
+  * seq_file: properly cope with pread
+  * vt: Declare PIO_CMAP/GIO_CMAP as compatbile ioctls.
+  * timerfd: add flags check
+  * aoe: ignore vendor extension AoE responses
+  * mm: clean up for early_pfn_to_nid()
+  * mm: fix memmap init for handling memory hole
+  * Fix oops in cifs_strfromUCS_le mounting to servers which do not specify
+    their OS
+  * mm: fix lazy vmap purging (use-after-free error)
+  * mm: vmap fix overflow
+  * PCI quirk: enable MSI on 8132
+  * SCSI: hptiop: Add new PCI device ID
+  * JFFS2: fix mount crash caused by removed nodes
+  * SCSI: sd: revive sd_index_lock
+  * USB: usb_get_string should check the descriptor type
+  * USB: usb-storage: add IGNORE_RESIDUE flag for Genesys Logic adapters
+  * USB: cdc-acm: add usb id for motomagx phones
+  * rtl8187: New USB ID's for RTL8187L
+  * WATCHDOG: ks8695_wdt.c: 'CLOCK_TICK_RATE' undeclared
+  * WATCHDOG: rc32434_wdt: fix watchdog driver
+  * WATCHDOG: rc32434_wdt: fix sections
+  * RDMA/nes: Don't allow userspace QPs to use STag zero
+  * USB: option: add BenQ 3g modem information
+  * md: avoid races when stopping resync.
+  * md/raid10: Don't call bitmap_cond_end_sync when we are doing recovery.
+  * md/raid10: Don't skip more than 1 bitmap-chunk at a time during
+    recovery.
+  * sound: virtuoso: revert "do not overwrite EEPROM on Xonar D2/D2X"
+  * ALSA: usb-audio - Fix non-continuous rate detection
+  * ALSA: usb-audio - Workaround for misdetected sample rate with CM6207
+  * sound: usb-audio: fix uninitialized variable with M-Audio MIDI
+    interfaces
+  * ALSA: fix excessive background noise introduced by OSS emulation rate
+    shrink
+  * ALSA: hda - Fix digital mic on dell-m4-1 and dell-m4-3
+  * ALSA: aw2: do not grab every saa7146 based device
+  * acer-wmi: fix regression in backlight detection
+  * vmalloc: call flush_cache_vunmap() from unmap_kernel_range()
+  * Fix fixpoint divide exception in acct_update_integrals
+  * 8250: fix boot hang with serial console when using with Serial Over Lan
+    port
+  * x86, vmi: TSC going backwards check in vmi clocksource
+  * HID: fix bus endianity in file2alias
+  * inotify: fix GFP_KERNEL related deadlock
+  * sdhci: fix led naming
+  * x86: oprofile: don't set counter width from cpuid on Core2
+  * intel-agp: fix a panic with 1M of shared memory, no GTT entries
+  * mtd_dataflash: fix probing of AT45DB321C chips.
+  * proc: fix kflags to uflags copying in /proc/kpageflags
+  * fs: new inode i_state corruption fix
+  * PCIe: portdrv: call pci_disable_device during remove
+  * PCI: Enable PCIe AER only after checking firmware support
+  * jsm: additional device support
+  * libata: Don't trust current capacity values in identify words 57-58
+  * mmc: fix data timeout for SEND_EXT_CSD
+  * s3cmci: Fix hangup in do_pio_write()
+  * mmc: s3cmci: fix s3c2410_dma_config() arguments.
+  * MMC: fix bug - SDHC card capacity not correct
+  * mmc_test: fix basic read test
+  * x86: tone down mtrr_trim_uncached_memory() warning
+  * selinux: Fix a panic in selinux_netlbl_inode_permission()
+  * selinux: Fix the NetLabel glue code for setsockopt()
+  * hpilo: new pci device
+  * x86-64: seccomp: fix 32/64 syscall hole
+  * x86-64: syscall-audit: fix 32/64 syscall hole
+  * xen: disable interrupts early, as start_kernel expects
+  * xen/blkfront: use blk_rq_map_sg to generate ring entries
+  * asix: new device ids
+  * cdc_ether: add usb id for Ericsson F3507g
+  * zaurus: add usb id for motomagx phones
+  * fore200: fix oops on failed firmware load
+  * PCI: Add PCI quirk to disable L0s ASPM state for 82575 and 82598
+  * copy_process: fix CLONE_PARENT && parent_exec_id interaction
+  * proc: fix PG_locked reporting in /proc/kpageflags
+  * powerpc: Fix load/store float double alignment handler
+  * sdhci: Add quirk for controllers with no end-of-busy IRQ
+  * sdhci: Add NO_BUSY_IRQ quirk for Marvell CAFE host chip
+  * pipe_rdwr_fasync: fix the error handling to prevent the leak/crash
+  * DVB: s5h1409: Perform s5h1409 soft reset after tuning
+  * V4L: tda8290: fix TDA8290 + TDA18271 initialization
+  * V4L: ivtv: fix decoder crash regression
+  * x86/paravirt: make arch_flush_lazy_mmu/cpu disable preemption
+  * x86, hpet: fix for LS21 + HPET = boot hang
+  * x86: math_emu info cleanup
+  * x86: fix math_emu register frame access
+  * ide-iops: fix odd-length ATAPI PIO transfers
+  * HID: move tmff and zpff devices from ignore_list to blacklist
+  * ARM: Add i2c_board_info for RiscPC PCF8583
+  * i2c: Timeouts reach -1
+  * i2c: Fix misplaced parentheses
+  * ACPI: fix broken usage of name.ascii
+  * ACPI: fix broken usage of acpi_ut_get_node_name()
+  * crypto: api - Fix algorithm test race that broke aead initialisation
+  * hwmon: (f71882fg) Hide misleading error message
+  * MIPS: compat: Implement is_compat_task.
+  * hwmon: (it87) Properly decode -128 degrees C temperature
+  * Linux 2.6.28.8
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Tue, 17 Mar 2009 07:07:33 -0600
+
+linux (2.6.28-10.33) jaunty; urgency=low
+
+  [ Scott James Remnant ]
+
+  * SAUCE: nbd: Change default partitions per device to 15
+    - LP: #342563
+
+  [ Tejun Heo ]
+
+  * SAUCE: libata: make sure port is thawed when skipping resets
+    - LP: #269652
+
+  [ Tim Gardner ]
+
+  * Revert "SAUCE: Auto-load esp module when device opened."
+    This driver performs unsafe ISA probes (according to Alan Cox).
+  * Enable CONFIG_USB_GADGET_DUMMY_HCD
+    This facilitates gadget slave endpoints in virtual environments.
+  * Build ehci, uhci, and ohci into the i386/amd64 kernels
+    - LP: #296710
+
+  [ Upstream Kernel Changes ]
+
+  * Add "thumbee" to the hwcap_str array
+    - LP: #343602
+  * Add HWCAP_NEON to the ARM hwcap.h file
+    - LP: #343602
+  * x86: mtrr: don't modify RdDram/WrDram bits of fixed MTRRs
+    - LP: #292619
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Mon, 16 Mar 2009 08:19:53 -0600
+
+linux (2.6.28-10.32) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * Delete prepare-ppa-source script
+
+  [ Andy Isaacson ]
+
+  * SAUCE: FSAM7400: select CHECK_SIGNATURE
+  * SAUCE: LIRC_PVR150: depends on VIDEO_IVTV
+    - LP: #341477
+
+  [ Ayaz Abdulla ]
+
+  * SAUCE: forcedeth: msi interrupt fix
+    - LP: #288281
+
+  [ Brad Figg ]
+
+  * Updating armel configs to remove PREEMPT
+
+  [ Catalin Marinas ]
+
+  * Fix the VFP handling on the Feroceon CPU
+
+  [ Huaxu Wan ]
+
+  * SAUCE: (drop after 2.6.28) [Jaunty] iwlagn: fix iwlagn DMA mapping
+    direction
+
+  [ Ike Panhc ]
+
+  * squashfs: correct misspelling
+    - LP: #322306
+
+  [ Theodore Ts'o ]
+
+  * SAUCE: (drop after 2.6.28) ext4: add EXT4_IOC_ALLOC_DA_BLKS ioctl
+  * SAUCE: (drop after 2.6.28) ext4: Automatically allocate delay allocated
+    blocks on close
+  * SAUCE: (drop after 2.6.28) ext4: Automatically allocate delay allocated
+    blocks on rename
+    - LP: #317781
+
+  [ Tyler Hicks ]
+
+  * SAUCE: (drop after 2.6.28) eCryptfs: Don't encrypt file key with
+    filename key
+    - LP: #342128
+
+  [ Upstream Kernel Changes ]
+
+  * ALS: hda - Add support of iMac 24 Aluminium
+  * USB: fix broken OTG makefile reference
+  * ALSA: hda - add another MacBook Pro 3,1 SSID
+  * ALSA: hda - Add model entry for HP dv4
+  * x86-64: fix int $0x80 -ENOSYS return
+    - LP: #339743
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Thu, 12 Mar 2009 19:16:07 -0600
+
+linux (2.6.28-9.31) jaunty; urgency=low
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: cpufreq-nforce2: probe failures are not errors
+    - LP: #332170
+  * SAUCE: mmc: add MODALIAS linkage for MMC/SD devices
+    - LP: #30335
+  * remove test-suspend script
+    - LP: #333856
+
+  [ Kees Cook ]
+
+  * handle relative paths in modules.dep
+    Fixes 2.6.28-9.30 FTBS.
+
+  [ Upstream Kernel Changes ]
+
+  * ricoh_mmc: Handle newer models of Ricoh controllers
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Wed, 11 Mar 2009 08:19:24 -0600
+
+linux (2.6.28-9.30) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * ARM:mx51 Add SoC and board support for mx51 platforms
+  * ARM:mx51 Add CONFIG_ARCH_MXC_CANONICAL to disable parts of Freescale's
+    code
+  * MMC: Add support for 8-bit cards
+  * Add ARM:MX51 SoC support to the build system
+  * ARM: Make ARM arch aware of ubuntu/ drivers
+  * ARM: Add imx51 configuration
+  * Disable d-i modules for imx51 and mv78xx0
+  * Disable Apparmor on boot for ARM
+  * Updating imx51 config
+
+  [ Jason Liu ]
+
+  * Do not use OOB with MLC NAND
+
+  [ Richard Zhu ]
+
+  * Support the eMMC4.3 card
+
+  [ Rob Herring ]
+
+  * ARM: Add more cache memory types macros
+
+  [ Tim Gardner ]
+
+  * Set CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y for i386/amd64/lpia
+
+  [ Manoj Iyer ]
+
+  * Enable CONFIG_RTL8187SE=m
+
+  [ Upstream Kernel Changes ]
+
+  * USB: EHCI: slow down ITD reuse
+    - LP: #329437
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Sun, 08 Mar 2009 14:14:15 -0600
+
+linux (2.6.28-9.29) jaunty; urgency=low
+
+  [ Andy Whitcroft ]
+
+  * link-headers -- only link directories which do not already exist
+    - LP: #315252
+
+  [ Daniel Marjamäki ]
+
+  * SAUCE: (drop after 2.6.28) netxen: fix memory leak in
+    drivers/net/netxen_nic_init.c
+    - LP: #330813
+
+  [ Dhananjay Phadke ]
+
+  * SAUCE: (drop after 2.6.28) netxen: fix endianness in firmware commands
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: fix ipv6 offload and tx cleanup
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: fix link speed reporting for some
+    boards
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: firmware init fix
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: cleanup mac list on driver unload
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: hold tx lock while sending firmware
+    commands
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: handle dma mapping failures
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: avoid invalid iounmap
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: include ipv6.h (fixes build failure)
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: fix vlan tso/checksum offload
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: reduce memory footprint
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: revert jumbo ringsize
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: fix msi-x interrupt handling
+    - LP: #330813
+  * SAUCE: (drop after 2.6.28) netxen: remove pcie workaround
+    - LP: #330813
+
+  [ Hannes Eder ]
+
+  * SAUCE: (drop after 2.6.28) drivers/net/netxen: fix sparse warnings: use
+    NULL pointer instead of plain integer
+    - LP: #330813
+
+  [ Huaxu Wan ]
+
+  * SAUCE: report rfkill changes event if interface is down
+    - LP: #193970
+
+  [ Tim Gardner ]
+
+  * MV78XX0 must specify a target in the vars definition.
+
+  [ Upstream Kernel Changes ]
+
+  * Revert "ext4: wait on all pending commits in ext4_sync_fs()"
+  * jbd2: Fix return value of jbd2_journal_start_commit()
+  * jbd2: Avoid possible NULL dereference in
+    jbd2_journal_begin_ordered_truncate()
+  * ext4: Fix to read empty directory blocks correctly in 64k
+  * ext4: Fix lockdep warning
+  * ext4: Initialize preallocation list_head's properly
+  * ext4: Implement range_cyclic in ext4_da_writepages instead of
+    write_cache_pages
+  * ext4: Fix NULL dereference in ext4_ext_migrate()'s error handling
+  * ext4: Add fallback for find_group_flex
+  * ext4: Fix deadlock in ext4_write_begin() and ext4_da_write_begin()
+  * Added mv78xx0 flavor
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Fri, 06 Mar 2009 06:13:31 -0700
+
+linux (2.6.28-8.28) jaunty; urgency=low
+
+  [ Alexey Starikovskiy ]
+
+  * SAUCE: ACPI: EC: Limit workaround for ASUS notebooks even more
+    - LP: #288385
+
+  [ Scott James Remnant ]
+
+  * SAUCE: Auto-load esp module when device opened.
+  * SAUCE: Auto-load bridge module when socket opened.
+  * SAUCE: Auto-load af_netlink module when socket opened.
+  * SAUCE: Auto-load wanrouter module when socket opened.
+  * SAUCE: Auto-load ip_queue module when socket opened.
+  * SAUCE: Auto-load ip6_queue module when socket opened.
+  * SAUCE: Auto-load cn module when socket opened.
+  * SAUCE: Auto-load scsi_transport_iscsi module when socket opened.
+  * SAUCE: Auto-load ftl module when device opened.
+  * SAUCE: Auto-load pcd module when device opened.
+  * SAUCE: Auto-load pf module when device opened.
+  * SAUCE: Auto-load nftl module when device opened.
+  * SAUCE: Auto-load mousedev module when psaux device opened.
+  * SAUCE: Auto-load mousedev module when /dev/input/mice opened.
+  * SAUCE: Auto-load rng-core module when device opened.
+  * SAUCE: Auto-load openprom module when device opened.
+  * SAUCE: Auto-load applicom module when device opened.
+  * SAUCE: Auto-load toshiba module when device opened.
+  * SAUCE: Auto-load cyclades module when device opened.
+  * SAUCE: Auto-load riscom8 module when device opened.
+  * SAUCE: Auto-load specialix module when device opened.
+  * SAUCE: Auto-load videodev module when device opened.
+  * SAUCE: Auto-load i2c_dev module when device opened.
+  * SAUCE: Auto-load mtdchar module when device opened.
+  * SAUCE: Auto-load pt module when device opened.
+  * SAUCE: Auto-load pg module when device opened.
+  * SAUCE: Auto-load cdc_acm module when device opened.
+  * SAUCE: Auto-load msr module when device opened.
+  * SAUCE: Auto-load cpuid module when device opened.
+  * SAUCE: quickcam: Enable double-buffering by default
+  * SAUCE: libata: Ignore HPA by default.
+  * SAUCE: hostap: Change initial operation mode to managed (infra)
+  * SAUCE: floppy: Provide a PnP device table in the module.
+    - LP: #255651
+  * SAUCE: Auto-load mwave module when device opened.
+  * Build CONFIG_FUSE_FS into kernel, not as module.
+
+  [ Stefan Bader ]
+
+  * Enable build of ext4 as a module on LPIA
+    - LP: #331848
+
+  [ Tim Gardner ]
+
+  * Update configs to fix LPIA FTBS
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Thu, 05 Mar 2009 10:43:24 -0700
+
+linux (2.6.28-8.27) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * Updating configs (arm:ixp4xx)
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: enable Intel HDMI output
+
+  [ Manoj Iyer ]
+
+  * SAUCE: Added quirk for Linksys WUSB600N USB wifi-n networking adapter
+    - LP: #323473
+
+  [ Steve Beattie ]
+
+  * fix apparmor memory leak on unlinked file ops
+    - LP: #329489
+
+  [ Tim Gardner ]
+
+  * SAUCE: Dell XPS710 reboot quirk
+    - LP: #323592
+  * SAUCE: (drop after 2.6.28) ieee80211: Add infrastructure to obsolete
+    scan results
+    - LP: #336055
+  * Add modules.order to the linux-image package.
+
+  [ Upstream Kernel Changes ]
+
+  * iwlwifi: fix time interval misuse in iwl_poll_{direct_}bit
+  * x86: only scan the root bus in early PCI quirks
+    - LP: #267295
+  * ALSA: hda - Intel HDMI audio support
+  * ALSA: hda - Fix unused function in patch_intelhdmi.c
+  * ALSA: handle SiI1392 HDMI codec in patch_intelhdmi.c
+  * ALSA: hda-intel: reorder HDMI audio enabling sequence
+  * ALSA: introduce snd_print_pcm_rates()
+  * ALSA: create hda_eld.c for ELD routines and proc interface
+  * ALSA: ELD proc interface for HDMI sinks
+  * ALSA: hda: make standalone hdmi_fill_audio_infoframe()
+  * ALSA: hda: make global snd_print_channel_allocation()
+  * ALSA: hda: HDMI channel allocations for audio infoframe
+  * ALSA: hda: HDMI channel mapping cleanups
+  * ALSA: hda: minor code cleanups
+  * ALSA: hda: rename sink_eld to hdmi_eld
+  * ALSA: hda - Release ELD proc file
+  * ALSA: hda - minor HDMI code cleanups
+  * ALSA: hda - report selected CA index for Audio InfoFrame
+  * ALSA: hda - Add Intel vendor id string
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Wed, 25 Feb 2009 14:23:46 -0700
+
+linux (2.6.28-8.26) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * Updating configs (armel:ixp4xx)
+    - LP: #331510
+
+  [ Tim Gardner ]
+
+  * Add more missing modules
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Tue, 24 Feb 2009 06:58:53 -0700
+
+linux (2.6.28-8.25) jaunty; urgency=low
+
+  [ Scott James Remnant ]
+
+  * SAUCE: Prefer powernow-k8 to acpi-cpufreq
+  * Change CONFIG_X86_P4_CLOCKMOD to be a module again.
+
+  [ Tim Gardner ]
+
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Initialize the new
+    group descriptor when resizing the filesystem"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Add sanity check
+    to make_indexed_dir"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: only use
+    i_size_high for regular files"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Add sanity checks
+    for the superblock before mounting the filesystem"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Fix
+    s_dirty_blocks_counter if block allocation failed with nodelalloc"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Init the complete
+    page while building buddy cache"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Don't allow new
+    groups to be added during block allocation"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: mark the
+    blocks/inode bitmap beyond end of group as used"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Use new
+    buffer_head flag to check uninit group bitmaps initialization"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Fix the race
+    between read_inode_bitmap() and ext4_new_inode()"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Fix race between
+    read_block_bitmap() and mark_diskspace_used()"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: don't use blocks
+    freed but not yet committed in buddy cache init"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: cleanup mballoc
+    header files"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Use
+    EXT4_GROUP_INFO_NEED_INIT_BIT during resize"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Add blocks added
+    during resize to bitmap"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Don't overwrite
+    allocation_context ac_status"
+  * Revert "SAUCE: (revert before 2.6.28.y update) jbd2: Add barrier not
+    supported test to journal_wait_on_commit_record"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Widen type of
+    ext4_sb_info.s_mb_maxs[]"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: avoid ext4_error
+    when mounting a fs with a single bg"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Fix the delalloc
+    writepages to allocate blocks at the right offset."
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: tone down
+    ext4_da_writepages warnings"
+  * Revert "SAUCE: (revert before 2.6.28.y update) ext4: Add support for
+    non-native signed/unsigned htree hash algorithms"
+  * Enabled X86_ACPI_CPUFREQ=y
+
+  [ Upstream Kernel Changes ]
+
+  * ath9k: quiet harmless ForceXPAon messages
+    - LP: #321474
+  * [WATCHDOG] iTCO_wdt: fix SMI_EN regression 2
+    - LP: #314050
+  * pid: implement ns_of_pid
+  * mqueue: fix si_pid value in mqueue do_notify()
+  * powerpc/vsx: Fix VSX alignment handler for regs 32-63
+  * sata_nv: give up hardreset on nf2
+  * Fix Intel IOMMU write-buffer flushing
+  * SCSI: libiscsi: fix iscsi pool leak
+  * x86/cpa: make sure cpa is safe to call in lazy mmu mode
+  * sched: SCHED_OTHER vs SCHED_IDLE isolation
+  * x86, vm86: fix preemption bug
+  * Add support for VT6415 PCIE PATA IDE Host Controller
+  * ext2/xip: refuse to change xip flag during remount with busy inodes
+  * 3c505: do not set pcb->data.raw beyond its size
+  * Bluetooth: Fix TX error path in btsdio driver
+  * ext4: Add support for non-native signed/unsigned htree hash algorithms
+  * ext4: tone down ext4_da_writepages warnings
+  * ext4: Fix the delalloc writepages to allocate blocks at the right
+    offset.
+  * ext4: avoid ext4_error when mounting a fs with a single bg
+  * ext4: Widen type of ext4_sb_info.s_mb_maxs[]
+  * jbd2: Add barrier not supported test to journal_wait_on_commit_record
+  * ext4: Don't overwrite allocation_context ac_status
+  * ext4: Add blocks added during resize to bitmap
+  * ext4: Use EXT4_GROUP_INFO_NEED_INIT_BIT during resize
+  * ext4: cleanup mballoc header files
+  * ext4: don't use blocks freed but not yet committed in buddy cache init
+  * ext4: Fix race between read_block_bitmap() and mark_diskspace_used()
+  * ext4: Fix the race between read_inode_bitmap() and ext4_new_inode()
+  * ext4: Use new buffer_head flag to check uninit group bitmaps
+    initialization
+  * ext4: mark the blocks/inode bitmap beyond end of group as used
+  * ext4: Don't allow new groups to be added during block allocation
+  * ext4: Init the complete page while building buddy cache
+  * ext4: Fix s_dirty_blocks_counter if block allocation failed with
+    nodelalloc
+  * ext4: Add sanity checks for the superblock before mounting the
+    filesystem
+  * ext4: only use i_size_high for regular files
+  * ext4: Add sanity check to make_indexed_dir
+  * ext4: Initialize the new group descriptor when resizing the filesystem
+  * Fix longstanding "error: storage size of '__mod_dmi_device_table' isn't
+    known"
+  * Linux 2.6.28.7
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Thu, 19 Feb 2009 06:45:55 -0700
+
+linux (2.6.28-8.24) jaunty; urgency=low
+
+  [ Scott James Remnant ]
+
+  * Change CPU_FREQ_DEFAULT_GOV_ONDEMAND to y
+  * SAUCE: Link acpi-cpufreq.o first
+
+  [ Tim Gardner ]
+
+  * Build in CPU Frequency scaling drivers
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Wed, 18 Feb 2009 06:12:24 -0700
+
+linux (2.6.28-8.23) jaunty; urgency=low
+
+  [ Andy Whitcroft ]
+
+  * include the kernel configuration in the sub-flavour images
+    - LP: #328859
+
+  [ Tim Gardner ]
+
+  * Revert "SAUCE: (drop after 2.6.28) [eCryptfs] Regression in unencrypted
+    filename symlinks" in favor of upstream commit.
+  * Fix compile issues with qc-usb
+  * SAUCE: (remove after 2.6.28) V4L/DVB (10216): saa7127: fix broken
+    S-Video with saa7129
+    - LP: #329267
+
+  [ Upstream Kernel Changes ]
+
+  * Subject:SAUCE: LP#193970 iwlagn: fix hw-rfkill while the interface is
+    down
+    - LP: #193970
+  * x86, vmi: put a missing paravirt_release_pmd in pgd_dtor
+  * nbd: fix I/O hang on disconnected nbds
+  * mac80211: restrict to AP in outgoing interface heuristic
+  * w1: w1 temp calculation overflow fix
+  * zd1211rw: adding 0ace:0xa211 as a ZD1211 device
+  * zd1211rw: treat MAXIM_NEW_RF(0x08) as UW2453_RF(0x09) for TP-Link
+    WN322/422G
+  * parport: parport_serial, don't bind netmos ibm 0299
+  * syscall define: fix uml compile bug
+  * kernel-doc: fix syscall wrapper processing
+  * Fix page writeback thinko, causing Berkeley DB slowdown
+  * write-back: fix nr_to_write counter
+  * writeback: fix break condition
+  * mm: rearrange exit_mmap() to unlock before arch_exit_mmap
+  * powerpc/fsl-booke: Fix mapping functions to use phys_addr_t
+  * lockd: fix regression in lockd's handling of blocked locks
+  * sctp: Fix crc32c calculations on big-endian arhes.
+  * sctp: Correctly start rtx timer on new packet transmissions.
+  * sctp: Properly timestamp outgoing data chunks for rtx purposes
+  * net: Fix frag_list handling in skb_seq_read
+  * net: Fix OOPS in skb_seq_read().
+  * drivers/net/skfp: if !capable(CAP_NET_ADMIN): inverted logic
+  * ipv4: fix infinite retry loop in IP-Config
+  * net: Fix userland breakage wrt. linux/if_tunnel.h
+  * net: packet socket packet_lookup_frame fix
+  * packet: Avoid lock_sock in mmap handler
+  * sungem: Soft lockup in sungem on Netra AC200 when switching interface
+    up
+  * udp: Fix UDP short packet false positive
+  * udp: increments sk_drops in __udp_queue_rcv_skb()
+  * ipv6: Disallow rediculious flowlabel option sizes.
+  * ipv6: Copy cork options in ip6_append_data
+  * net: 4 bytes kernel memory disclosure in SO_BSDCOMPAT gsopt try #2
+  * sky2: fix hard hang with netconsoling and iface going up
+  * tun: Add some missing TUN compat ioctl translations.
+  * tun: Fix unicast filter overflow
+  * virtio_net: Fix MAX_PACKET_LEN to support 802.1Q VLANs
+  * tcp: splice as many packets as possible at once
+  * tcp: Fix length tcp_splice_data_recv passes to skb_splice_bits.
+  * sparc: Enable syscall wrappers for 64-bit (CVE-2009-0029)
+  * sparc64: Annotate sparc64 specific syscalls with SYSCALL_DEFINEx()
+  * ALSA: hda - Add missing terminator in slave dig-out array
+  * ALSA: mtpav - Fix initial value for input hwport
+  * HID: adjust report descriptor fixup for MS 1028 receiver
+  * ide/libata: fix ata_id_is_cfa() (take 4)
+  * libata: fix EH device failure handling
+  * netfilter: fix tuple inversion for Node information request
+  * netfilter: xt_sctp: sctp chunk mapping doesn't work
+  * x86: microcode_amd: fix wrong handling of equivalent CPU id
+  * ide-cd: fix DMA for non bio-backed requests
+  * net: Fix data corruption when splicing from sockets.
+  * Linux 2.6.28.6
+  * eCryptfs: Regression in unencrypted filename symlinks
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Mon, 16 Feb 2009 06:43:51 -0700
+
+linux (2.6.28-8.22) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * Remove perm-blacklist
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: psmouse/synaptics: ensure we reset the device on resume
+    - LP: #317270
+
+  [ Tim Gardner ]
+
+  * Add lpia to getabi script
+  * SAUCE: tracer for sreadahead
+
+ -- Amit Kucheria <amit.kucheria@ubuntu.com>  Fri, 13 Feb 2009 15:23:21 +0200
+
+linux (2.6.28-8.21) jaunty; urgency=low
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: switch the Asus Pundit P1-AH2 to old acpi sleep ordering
+    - LP: #327267
+
+  [ Tim Gardner ]
+
+  * Added LPIA arch support
+  * Added libdrm-dev as a 'Replaces' to linux-libc-dev
+  * SAUCE: LPIA support for 9202 HDA Sigmatel codec
+  * SAUCE: Add an X86_LPIA Kconfig option
+  * SAUCE: UHCI USB quirk for resume
+  * SAUCE: LPIA Reboot fix for Intel Crownbeach development boards
+  * SAUCE: LPIA  Logical reset of USB port on resume
+  * Set CONFIG_WIRELESS_OLD_REGULATORY=n, added wireless-crda
+    as an install dependency.
+
+  [ Upstream Kernel Changes ]
+
+  * Revert "Revert "x86, early_ioremap: fix fencepost error""
+    - LP: #312554
+  * drm/i915: capture last_vblank count at IRQ uninstall time too
+    - LP: #320813
+  * drm/i915: add get_vblank_counter function for GM45
+    - LP: #320813
+  * Staging: comedi: fix Kbuild
+  * Staging: meilhaus: fix Kbuild
+  * Staging: android: binder: fix arm build errors
+  * Staging: android: timed_gpio: Fix build to build on kernels after
+    2.6.25.
+  * Staging: android: fix build error on 64bit boxes
+  * Staging: android: Add lowmemorykiller documentation.
+  * Staging: android: task_get_unused_fd_flags: fix the wrong usage of
+    tsk->signal
+  * staging: agnx: drivers/staging/agnx/agnx.h needs <linux/io.h>
+  * Staging: usbip: usbip_start_threads(): handle kernel_thread failure
+  * Staging: poch: fix verification of memory area
+  * Documentation: move DMA-mapping.txt to Doc/PCI/
+  * sgi-xp: fix writing past the end of kzalloc()'d space
+  * do_wp_page: fix regression with execute in place
+  * wait: prevent exclusive waiter starvation
+  * shm: fix shmctl(SHM_INFO) lockup with !CONFIG_SHMEM
+  * revert "rlimit: permit setting RLIMIT_NOFILE to RLIM_INFINITY"
+  * prevent kprobes from catching spurious page faults
+  * sound: usb-audio: handle wMaxPacketSize for FIXED_ENDPOINT devices
+  * md: Ensure an md array never has too many devices.
+  * md: Fix a bug in linear.c causing which_dev() to return the wrong
+    device.
+  * ACPI: Enable bit 11 in _PDC to advertise hw coord
+  * ACPI: dock: Don't eval _STA on every show_docked sysfs read
+  * ieee1394: ohci1394: increase AT req. retries, fix ack_busy_X from
+    Panasonic camcorders and others
+  * firewire: ohci: increase AT req. retries, fix ack_busy_X from Panasonic
+    camcorders and others
+  * firewire: sbp2: fix DMA mapping leak on the failure path
+  * firewire: sbp2: add workarounds for 2nd and 3rd generation iPods
+  * ieee1394: sbp2: add workarounds for 2nd and 3rd generation iPods
+  * module: remove over-zealous check in __module_get()
+  * x86: APIC: enable workaround on AMD Fam10h CPUs
+  * eeepc-laptop: fix oops when changing backlight brightness during
+    eeepc-laptop init
+  * eeepc-laptop: Add support for extended hotkeys
+  * e1000: fix bug with shared interrupt during reset
+  * e1000: Fix PCI enable to honor the need_ioport flag
+  * agp/intel: Fix broken ® symbol in device name.
+  * ALSA: hda - Add quirk for FSC Amilo Xi2550
+  * ALSA: hda - Add missing COEF initialization for ALC887
+  * ALSA: hda - Add missing initialization for ALC272
+  * asus_acpi: Add R1F support
+  * panasonic-laptop: fix X[ ARRAY_SIZE(X) ]
+  * ACPI: Skip the first two elements in the _BCL package
+  * ACPI: proc_dir_entry 'video/VGA' already registered
+  * ACPI: disable ACPI cleanly when bad RSDP found
+  * ACPICA: Fix table entry truncation calculation
+  * PCI: properly clean up ASPM link state on device remove
+  * PCI: return error on failure to read PCI ROMs
+  * seq_file: move traverse so it can be used from seq_read
+  * seq_file: fix big-enough lseek() + read()
+  * serial: set correct baud_base for Oxford Semiconductor Ltd EXSYS
+    EX-41092 Dual 16950 Serial adapter
+  * Add support for '8-port RS-232 MIC-3620 from advantech'
+  * mm: fix error case in mlock downgrade reversion
+  * elf core dump: fix get_user use
+  * ACPI: video: Fix reversed brightness behavior on ThinkPad SL series
+  * ipw2200: fix scanning while associated
+  * XFS: set b_error from bio error in xfs_buf_bio_end_io
+  * Revert USB: option: add Pantech cards
+  * USB: option: New mobile broadband modems to be supported
+  * USB: new id for ti_usb_3410_5052 driver
+  * USB: two more usb ids for ti_usb_3410_5052
+  * USB: usb-storage: add Pentax to the bad-vendor list
+  * sata_via: Add VT8261 support
+  * nbd: do not allow two clients at the same time
+  * sctp: Fix another socket race during accept/peeloff
+  * Linux 2.6.28.5
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Mon, 09 Feb 2009 16:11:28 -0700
+
+linux (2.6.28-7.20) jaunty; urgency=low
+
+  [ Tim Gardner ]
+
+  * SAUCE: Input: atkbd - Samsung NC10 key repeat fix
+
+  [ Upstream Kernel Changes ]
+
+  * Manually revert "mlock: downgrade mmap sem while populating mlocked
+    regions"
+  * xen: make sysfs files behave as their names suggest
+  * sata_mv: fix 8-port timeouts on 508x/6081 chips
+  * m68knommu: set NO_DMA
+  * PCI/MSI: bugfix/utilize for msi_capability_init()
+  * x86: use early clobbers in usercopy*.c
+  * netfilter: ctnetlink: fix scheduling while atomic
+  * orinoco: move kmalloc(..., GFP_KERNEL) outside spinlock in
+    orinoco_ioctl_set_genie
+  * fbdev/atyfb: Fix DSP config on some PowerMacs & PowerBooks
+  * kmalloc: return NULL instead of link failure
+  * sata_nv: rename nv_nf2_hardreset()
+  * sata_nv: fix MCP5x reset
+  * sata_nv: ck804 has borked hardreset too
+  * Fix memory corruption in console selection
+  * Add enable_ms to jsm driver
+  * nfsd: only set file_lock.fl_lmops in nfsd4_lockt if a stateowner is
+    found
+  * nfsd: Ensure nfsv4 calls the underlying filesystem on LOCKT
+  * iwlwifi: fix rs_get_rate WARN_ON()
+  * p54: fix lm87 checksum endianness
+  * p54: fix p54_read_eeprom to cope with tx_hdr_len
+  * p54usb: rewriting rx/tx routines to make use of usb_anchor's facilities
+  * minstrel: fix warning if lowest supported rate index is not 0
+  * PCI: irq and pci_ids patch for Intel Tigerpoint DeviceIDs
+  * cpuidle: Add decaying history logic to menu idle predictor
+  * ACPI: Avoid array address overflow when _CST MWAIT hint bits are set
+  * video: always update the brightness when poking "brightness"
+  * Newly inserted battery might differ from one just removed, so update of
+    battery info fields is required.
+  * ACPI: Do not modify SCI_EN directly
+  * dlm: initialize file_lock struct in GETLK before copying conflicting
+    lock
+  * sata_mv: Fix chip type for Hightpoint RocketRaid 1740/1742
+  * ACPICA: Allow multiple backslash prefix in namepaths
+  * Linux 2.6.28.4
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Sat, 07 Feb 2009 18:53:42 -0700
+
+linux (2.6.28-7.19) jaunty; urgency=low
+
+  * Fix missing modules FTBS
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Thu, 05 Feb 2009 15:28:15 -0700
+
+linux (2.6.28-7.18) jaunty; urgency=low
+
+  [ Alok Kataria ]
+
+  * SAUCE: (drop after 2.6.29) x86: add a synthetic TSC_RELIABLE feature
+    bit
+    - LP: #319945
+  * SAUCE: (drop after 2.6.29) x86: add X86_FEATURE_HYPERVISOR feature bit
+    - LP: #319945
+  * SAUCE: (drop after 2.6.29) x86: Hypervisor detection and get tsc_freq
+    from hypervisor
+    - LP: #319945
+  * SAUCE: (drop after 2.6.29) x86: Add a synthetic TSC_RELIABLE feature
+    bit.
+    - LP: #319945
+  * SAUCE: (drop after 2.6.29) x86: Skip verification by the watchdog for
+    TSC clocksource.
+    - LP: #319945
+  * SAUCE: (drop after 2.6.29) x86: VMware: Fix vmware_get_tsc code
+    - LP: #319945
+  * SAUCE: (drop after 2.6.29) x86: vmware: look for DMI string in the
+    product serial key
+    - LP: #319945
+
+  [ Andy Whitcroft ]
+
+  * SAUCE: toshiba_acpi -- pull in current -dev version of driver
+    - LP: #269831
+  * SAUCE: toshiba_acpi -- add acpi hotkey kernel thread
+    - LP: #269831
+  * move toshiba laptops back from tlsup to toshiba_acpi
+    - LP: #269831
+
+  [ Aneesh Kumar K.V ]
+
+  * SAUCE: (revert before 2.6.28.y update) ext4: Fix the delalloc
+    writepages to allocate blocks at the right offset.
+  * SAUCE: (revert before 2.6.28.y update) ext4: avoid ext4_error when
+    mounting a fs with a single bg
+  * SAUCE: (revert before 2.6.28.y update) ext4: Don't overwrite
+    allocation_context ac_status
+  * SAUCE: (revert before 2.6.28.y update) ext4: Add blocks added during
+    resize to bitmap
+  * SAUCE: (revert before 2.6.28.y update) ext4: Use
+    EXT4_GROUP_INFO_NEED_INIT_BIT during resize
+  * SAUCE: (revert before 2.6.28.y update) ext4: cleanup mballoc header
+    files
+  * SAUCE: (revert before 2.6.28.y update) ext4: don't use blocks freed but
+    not yet committed in buddy cache init
+  * SAUCE: (revert before 2.6.28.y update) ext4: Fix race between
+    read_block_bitmap() and mark_diskspace_used()
+  * SAUCE: (revert before 2.6.28.y update) ext4: Fix the race between
+    read_inode_bitmap() and ext4_new_inode()
+  * SAUCE: (revert before 2.6.28.y update) ext4: Use new buffer_head flag
+    to check uninit group bitmaps initialization
+  * SAUCE: (revert before 2.6.28.y update) ext4: mark the blocks/inode
+    bitmap beyond end of group as used
+  * SAUCE: (revert before 2.6.28.y update) ext4: Don't allow new groups to
+    be added during block allocation
+  * SAUCE: (revert before 2.6.28.y update) ext4: Init the complete page
+    while building buddy cache
+  * SAUCE: (revert before 2.6.28.y update) ext4: Fix s_dirty_blocks_counter
+    if block allocation failed with nodelalloc
+
+  [ Hannes Eder ]
+
+  * SAUCE: (drop after 2.6.29) x86: vmware - fix sparse warnings
+    - LP: #319945
+
+  [ Luke Yelavich ]
+
+  * hid modules have hyphens instead of underscores in their names
+
+  [ Mark Fasheh ]
+
+  * SAUCE: (revert before 2.6.28.y update) jbd2: Add BH_JBDPrivateStart
+
+  [ Theodore Ts'o ]
+
+  * SAUCE: (revert before 2.6.28.y update) ext4: Add support for non-native
+    signed/unsigned htree hash algorithms
+  * SAUCE: (revert before 2.6.28.y update) ext4: tone down
+    ext4_da_writepages warnings
+  * SAUCE: (revert before 2.6.28.y update) jbd2: Add barrier not supported
+    test to journal_wait_on_commit_record
+  * SAUCE: (revert before 2.6.28.y update) ext4: Add sanity checks for the
+    superblock before mounting the filesystem
+  * SAUCE: (revert before 2.6.28.y update) ext4: only use i_size_high for
+    regular files
+  * SAUCE: (revert before 2.6.28.y update) ext4: Add sanity check to
+    make_indexed_dir
+  * SAUCE: (revert before 2.6.28.y update) jbd2: On a __journal_expect()
+    assertion failure printk "JBD2", not "EXT3-fs"
+  * SAUCE: (revert before 2.6.28.y update) ext4: Initialize the new group
+    descriptor when resizing the filesystem
+
+  [ Tyler Hicks ]
+
+  * SAUCE: (drop after 2.6.28) [eCryptfs] Regression in unencrypted
+    filename symlinks
+    - LP: #322532
+
+  [ Upstream Kernel Changes ]
+
+  * Input: atkbd - broaden the Dell DMI signatures
+    - LP: #261721
+  * ti_usb_3410_5052: support alternate firmware
+  * ath5k: fix mesh point operation
+  * mac80211: decrement ref count to netdev after launching mesh discovery
+  * inotify: clean up inotify_read and fix locking problems
+  * fuse: destroy bdi on umount
+  * fuse: fix missing fput on error
+  * fuse: fix NULL deref in fuse_file_alloc()
+  * x86, mm: fix pte_free()
+  * klist.c: bit 0 in pointer can't be used as flag
+  * sysfs: fix problems with binary files
+  * x86: fix page attribute corruption with cpa()
+  * USB: fix toggle mismatch in disable_endpoint paths
+  * sound: virtuoso: enable UART on Xonar HDAV1.3
+  * USB: usbmon: Implement compat_ioctl
+  * USB: fix char-device disconnect handling
+  * USB: storage: add unusual devs entry
+  * alpha: nautilus - fix compile failure with gcc-4.3
+  * alpha: fix vmalloc breakage
+  * resources: skip sanity check of busy resources
+  * rtl8187: Add termination packet to prevent stall
+  * it821x: Add ultra_mask quirk for Vortex86SX
+  * libata: pata_via: support VX855, future chips whose IDE controller use
+    0x0571
+  * serial_8250: support for Sealevel Systems Model 7803 COMM+8
+  * drm: stash AGP include under the do-we-have-AGP ifdef
+  * Fix OOPS in mmap_region() when merging adjacent VM_LOCKED file segments
+  * bnx2x: Block nvram access when the device is inactive
+  * ext3: Add sanity check to make_indexed_dir
+  * rtl8187: Fix error in setting OFDM power settings for RTL8187L
+  * epoll: drop max_user_instances and rely only on max_user_watches
+  * gpiolib: fix request related issue
+  * sgi-xpc: Remove NULL pointer dereference.
+  * sgi-xpc: ensure flags are updated before bte_copy
+  * include/linux: Add bsg.h to the Kernel exported headers
+  * ALSA: hda - Fix PCM reference NID for STAC/IDT analog outputs
+  * ALSA: hda - add another MacBook Pro 4, 1 subsystem ID
+  * ALSA: hda - Add quirk for HP DV6700 laptop
+  * crypto: authenc - Fix zero-length IV crash
+  * crypto: ccm - Fix handling of null assoc data
+  * x86, pat: fix reserve_memtype() for legacy 1MB range
+  * x86, pat: fix PTE corruption issue while mapping RAM using /dev/mem
+  * PCI hotplug: fix lock imbalance in pciehp
+  * dmaengine: fix dependency chaining
+  * NET: net_namespace, fix lock imbalance
+  * relay: fix lock imbalance in relay_late_setup_files
+  * Linux 2.6.28.3
+  * ALSA: Enable SPDIF output on ALC655
+  * ALSA: hda - Add ASUS V1Sn support
+  * ALSA: hda - support detecting HD Audio devices with PCI class code
+  * ALSA: hda: alc883 model for ASUS P5Q-EM boards
+  * ALSA: hda - Add quirk for MSI 7260 mobo
+  * ALSA: hda - Add quirk for Sony VAIO VGN-SR19XN
+  * ALSA: oxygen: add Claro halo support
+  * ALSA: hda - Add a new function to seek for a codec ID
+  * ALSA: patch_sigmatel: Add missing Gateway entries and autodetection
+  * ALSA: hda - More fixes on Gateway entries
+  * ALSA: hda - Add MCP67 HDMI support
+  * ALSA: hda - fix name for ALC1200
+  * LSA: hda - Add HP Acacia detection
+  * ALSA: hda - Add quirk for HP 2230s
+  * ALSA: hda - Add quirk for Dell Inspiron Mini9
+  * ALSA: hda - add support for Intel DX58SO board
+  * ALSA: hda - Fix silent headphone output on Panasonic CF-74
+  * ALSA: USB quirk for Logitech Quickcam Pro 9000 name
+  * ALSA: hda - add quirks for some 82801H variants to use ALC883_MITAC
+
+  [ Yasunori Goto ]
+
+  * SAUCE: (revert before 2.6.28.y update) ext4: Widen type of
+    ext4_sb_info.s_mb_maxs[]
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Mon, 02 Feb 2009 23:07:13 -0700
+
+linux (2.6.28-6.17) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * Updating configs: ARMEL/versatile
+
+ -- Amit Kucheria <amit.kucheria@ubuntu.com>  Fri, 30 Jan 2009 13:36:59 +0200
+
+linux (2.6.28-6.16) jaunty; urgency=low
+
+  [ Luke Yelavich ]
+
+  * Add hid quirks to input-modules udeb
+
+  [ Tim Gardner ]
+
+  * Revert "[arm] Fix kexec on ARM by properly calling the relocation
+    function". This patch was deemed 'bogus' by Russell King on the
+    ARM mailing list.
+
+  [ Upstream Kernel Changes ]
+
+  * PCI: keep ASPM link state consistent throughout PCIe hierarchy
+  * security: introduce missing kfree
+  * rt2x00: add USB ID for the Linksys WUSB200.
+  * p54usb: Add USB ID for Thomson Speedtouch 121g
+  * lib/idr.c: use kmem_cache_zalloc() for the idr_layer cache
+  * sgi-xp: eliminate false detection of no heartbeat
+  * sched: fix update_min_vruntime
+  * IA64: Turn on CONFIG_HAVE_UNSTABLE_CLOCK
+  * sound: virtuoso: do not overwrite EEPROM on Xonar D2/D2X
+  * ALSA: hda - Add quirk for another HP dv5
+  * ALSA: hda - Fix HP dv5 mic input
+  * ALSA: hda - Don't reset HP pinctl in patch_sigmatel.c
+  * ALSA: hda - make laptop-eapd model back for AD1986A
+  * drivers/net/irda/irda-usb.c: fix buffer overflow
+  * usb-storage: add last-sector hacks
+  * usb-storage: set CAPACITY_HEURISTICS flag for bad vendors
+  * pkt_sched: sch_htb: Fix deadlock in hrtimers triggered by HTB
+  * ipv6: Fix fib6_dump_table walker leak
+  * sctp: Avoid memory overflow while FWD-TSN chunk is received with bad
+    stream ID
+  * pkt_sched: cls_u32: Fix locking in u32_change()
+  * r6040: fix wrong logic in mdio code
+  * r6040: save and restore MIER correctly in the interrupt routine
+  * r6040: bump release number to 0.19
+  * tcp: don't mask EOF and socket errors on nonblocking splice receive
+  * p54usb: fix traffic stalls / packet drop
+  * netfilter: x_tables: fix match/target revision lookup
+  * netfilter: ebtables: fix inversion in match code
+  * netfilter: nf_conntrack: fix ICMP/ICMPv6 timeout sysctls on big-endian
+  * dell_rbu: use scnprintf() instead of less secure sprintf()
+  * powerpc: is_hugepage_only_range() must account for both 4kB and 64kB
+    slices
+  * hwmon: (abituguru3) Fix CONFIG_DMI=n fallback to probe
+  * mm: write_cache_pages cyclic fix
+  * mm: write_cache_pages early loop termination
+  * mm: write_cache_pages writepage error fix
+  * mm: write_cache_pages integrity fix
+  * mm: write_cache_pages cleanups
+  * mm: write_cache_pages optimise page cleaning
+  * mm: write_cache_pages terminate quickly
+  * mm: write_cache_pages more terminate quickly
+  * mm: do_sync_mapping_range integrity fix
+  * mm: direct IO starvation improvement
+  * fs: remove WB_SYNC_HOLD
+  * fs: sync_sb_inodes fix
+  * fs: sys_sync fix
+  * Linux 2.6.28.2
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Sun, 25 Jan 2009 13:36:16 -0700
+
+linux (2.6.28-5.15) jaunty; urgency=low
+
+  [ Tim Gardner ]
+
+  * Revert "Enabled CONFIG_PID_NS=y for i386/amd64"
+    Somehow this commit also reverted the 7 prior commits (which is bad).
+  * Enabled CONFIG_PID_NS=y for i386/amd64 (version 2)
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Thu, 22 Jan 2009 13:48:34 -0700
+
+linux (2.6.28-5.14) jaunty; urgency=low
+
+  [ Ben Collins ]
+
+  * lirc_gpio: Forward ported to current kernel (jaunty)
+  * configs: Enable LIRC_GPIO on 64-bit/32-bit x86
+    - LP: #298791
+
+  [ Jeff Layton ]
+
+  * SAUCE: cifs: make sure we allocate enough storage for socket address
+    - LP: #318565
+
+  [ Tim Gardner ]
+
+  * check-abi: Return success when ABI skip is requested and no ABI files exist.
+    This ought to fix the armel FTBS.
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Thu, 22 Jan 2009 06:42:49 -0700
+
+linux (2.6.28-5.13) jaunty; urgency=low
+
+  [ Andy Whitcroft ]
+
+  * Revert "SAUCE: don't use buggy _BCL/_BCM/_BQC for backlight control"
+
+  [ Tim Gardner ]
+
+  * Fix udeb generation breakage caused by the previous armel versatile
+    flavour config update.
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Wed, 21 Jan 2009 12:38:35 -0700
+
+linux (2.6.28-5.12) jaunty; urgency=low
+
+  [ Ante ]
+
+  * Update drbd to 8.3.0
+
+  [ Dave Airlie ]
+
+  * i915/drm: provide compat defines for userspace for certain struct
+
+  [ Eric Anholt ]
+
+  * drm/i915: Don't double-unpin buffers if we take a signal in
+  * drm/i915: Don't complain when interrupted while pinning in execbuffers.
+  * drm/i915: Don't allow objects to get bound while VT switched.
+
+  [ Jani Monoses ]
+
+  * Fix webcam having USB ID 0ac8:303b
+    - LP: #292086
+
+  [ Jesse Barnes ]
+
+  * drm/i915: set vblank enabled flag correctly across IRQ
+  * drm/i915: don't enable vblanks on disabled pipes
+
+  [ Michael Casadevall ]
+
+  * [arm] Fix kexec on ARM by properly calling the relocation function
+
+  [ Tim Gardner ]
+
+  * Enabled CONFIG_PID_NS=y for i386/amd64
+  * SAUCE: Increase ATA_TMOUT_PMP_SRST_WAIT to 5 seconds.
+    - LP: #318978
+  * Update armel versatile config
+    - LP: #314789
+  * Enabled CONFIG_RT2860=m for i386/amd64
+  * Enabled CONFIG_RT2870=m for i386/amd64
+
+  [ Upstream Kernel Changes ]
+
+  * Input: atkbd - add keyboard quirk for HP Pavilion ZV6100 laptop
+    - LP: #291878
+  * ALSA: hda - Add quirk for another HP dv7
+  * ALSA: hda - Add quirk for HP6730B laptop
+  * ALSA: caiaq - Fix Oops with MIDI
+  * ALSA: hda - Fix typos for AD1882 codecs
+  * x86: fix intel x86_64 llc_shared_map/cpu_llc_id anomolies
+  * x86: default to SWIOTLB=y on x86_64
+  * CIFS: make sure that DFS pathnames are properly formed
+  * ring-buffer: prevent false positive warning
+  * ring-buffer: fix dangling commit race
+  * iwlwifi: use GFP_KERNEL to allocate Rx SKB memory
+  * tx493[89]ide: Fix length for __ide_flush_dcache_range
+  * tx4939ide: Do not use zero count PRD entry
+  * SCSI: eata: fix the data buffer accessors conversion regression
+  * USB: emi26: fix oops on load
+  * x86, UV: remove erroneous BAU initialization
+  * x86: fix incorrect __read_mostly on _boot_cpu_pda
+  * vmalloc.c: fix flushing in vmap_page_range()
+  * fs: symlink write_begin allocation context fix
+  * cgroups: fix a race between cgroup_clone and umount
+  * dm raid1: fix error count
+  * dm log: fix dm_io_client leak on error paths
+  * minix: fix add link's wrong position calculation
+  * md: fix bitmap-on-external-file bug.
+  * sched_clock: prevent scd->clock from moving backwards, take #2
+  * devices cgroup: allow mkfifo
+  * SCSI: aha152x_cs: Fix regression that keeps driver from using shared
+    interrupts
+  * ioat: fix self test for multi-channel case
+  * USB: isp1760: use a specific PLX bridge instead of any bdridge
+  * USB: isp1760: Fix probe in PCI glue code
+  * USB: unusual_devs.h additions for Pentax K10D
+  * inotify: fix type errors in interfaces
+  * Move compat system call declarations to compat header file
+  * Convert all system calls to return a long
+  * Rename old_readdir to sys_old_readdir
+  * Remove __attribute__((weak)) from sys_pipe/sys_pipe2
+  * Make sys_pselect7 static
+  * Make sys_syslog a conditional system call
+  * System call wrapper infrastructure
+  * powerpc: Enable syscall wrappers for 64-bit
+  * s390: enable system call wrappers
+  * System call wrapper special cases
+  * System call wrappers part 01
+  * System call wrappers part 02
+  * System call wrappers part 03
+  * System call wrappers part 04
+  * System call wrappers part 05
+  * System call wrappers part 06
+  * System call wrappers part 07
+  * System call wrappers part 08
+  * System call wrappers part 09
+  * System call wrappers part 10
+  * System call wrappers part 11
+  * System call wrappers part 12
+  * System call wrappers part 13
+  * System call wrappers part 14
+  * System call wrappers part 15
+  * System call wrappers part 16
+  * System call wrappers part 17
+  * System call wrappers part 18
+  * System call wrappers part 19
+  * System call wrappers part 20
+  * System call wrappers part 21
+  * System call wrappers part 22
+  * System call wrappers part 23
+  * System call wrappers part 24
+  * System call wrappers part 25
+  * System call wrappers part 26
+  * System call wrappers part 27
+  * System call wrappers part 28
+  * System call wrappers part 29
+  * System call wrappers part 30
+  * System call wrappers part 31
+  * System call wrappers part 32
+  * System call wrappers part 33
+  * s390 specific system call wrappers
+  * x86: fix RIP printout in early_idt_handler
+  * Fix timeouts in sys_pselect7
+  * USB: another unusual_devs entry for another bad Argosy storage device
+  * USB: storage: extend unusual range for 067b:3507
+  * USB: storage: recognizing and enabling Nokia 5200 cell phoes
+  * HID: fix error condition propagation in hid-sony driver
+  * fix switch_names() breakage in short-to-short case
+  * nfs: remove redundant tests on reading new pages
+  * eCryptfs: check readlink result was not an error before using it
+  * mvsas: increase port type detection delay to suit Seagate's 10k6 drive ST3450856SS 0003
+  * x86: avoid theoretical vmalloc fault loop
+  * ath9k: enable RXing of beacons on STA/IBSS
+  * mm lockless pagecache barrier fix
+  * powerpc: Disable Collaborative Memory Manager for kdump
+  * ibmvfc: Delay NPIV login retry and add retries
+  * ibmvfc: Improve async event handling
+  * getrusage: RUSAGE_THREAD should return ru_utime and ru_stime
+  * ath5k: ignore the return value of ath5k_hw_noise_floor_calibration
+  * mm: fix assertion
+  * XFS: truncate readdir offsets to signed 32 bit values
+  * Linux 2.6.28.1
+  * eCryptfs: Filename Encryption: Tag 70 packets
+  * eCryptfs: Filename Encryption: Header updates
+  * eCryptfs: Filename Encryption: Encoding and encryption functions
+  * eCryptfs: Filename Encryption: filldir, lookup, and readlink
+  * eCryptfs: Filename Encryption: mount option
+  * eCryptfs: Replace %Z with %z
+  * eCryptfs: Fix data types (int/size_t)
+  * eCryptfs: kerneldoc for ecryptfs_parse_tag_70_packet()
+  * eCryptfs: Clean up ecryptfs_decode_from_filename()
+  * fs/ecryptfs/inode.c: cleanup kerneldoc
+  * staging-p80211: Kill directly reference of netdev->priv
+  * staging-slicoss: Kill directly reference of netdev->priv
+  * staging-winbond: Kill directly reference of netdev->priv
+  * Staging: go7007: fixes due to video_usercopy api change
+  * Staging: go7007: fixes due v4l2_file_operations api change
+  * staging: correct dubious use of !x & y
+  * Staging: w35und: make wb35_probe() and wb35_disconnect() funtions static
+  * Staging: w35und: remove unused wb35_open() and wb35_close() functions
+  * Staging: w35und: use msleep() and udelay()
+  * Staging: w35und: remove the no-op pa_stall_execution macro
+  * Staging: w35und: purb typedef removal
+  * Staging: w35und: reg queue struct typedef removal
+  * Staging: w35und: wb35reg struct typedef removal
+  * Staging: w35und: padapter struct typedef removal
+  * Staging: w35und: merge wblinux struct to adapter
+  * Staging: w35und: wb35_probe() cleanup
+  * Staging: w35und: remove usb_submit_urb wrapper function
+  * Staging: w35und: remove usb_alloc_urb wrapper function
+  * w35und: remove dead code from wbusb_f.h
+  * Staging: w35und: remove true/false boolean macros
+  * Staging: w35und: OS_MEMORY_ALLOC wrapper removal
+  * Staging: w35und: usb_put_dev() is missing from wb35_disconnect()
+  * Staging: w35und: remove macro magic from MLME_GetNextPacket()
+  * Staging: w35und: plug memory leak in wbsoft_tx()
+  * Staging: w35und: move supported band initialization out of wb35_probe()
+  * Staging: w35und: remove timer wrappers
+  * Staging: w35und: remove atomic op wrappers
+  * Staging: w35und: remove memcpy/memcmp wrappers
+  * Staging: w35und: remove abs() and BIT() macros
+  * Staging: w35und: remove unused macros from common.h
+  * Staging: w35und: remove unused link status code
+  * Staging: w35und: #include cleanup
+  * Staging: w35und: remove some dead code
+  * Staging: w35und: move source files to one directory
+  * Staging: w35und: move struct wbsoft_priv to core.h and use it
+  * Staging: w35und: remove ->adapter from struct _HW_DATA_T
+  * Staging: w35und: clean up adapter.h a bit
+  * Staging: w35und: merge struct wb35_adapter to struct wbsoft_priv
+  * Staging: w35und: remove global struct ieee80211_hw
+  * Staging: w35und: inline DRIVER_AUTHOR and DRIVER_DESC macros
+  * Staging: w35und: clean up wblinux.c a bit
+  * Staging: w35und: remove unused ->ShutDowned member from struct
+    LOCAL_PARA
+  * Staging: w35und: move global wbsoft_enabled to struct wbsoft_priv
+  * Staging: w35und: move packet_came() to wb35rx.c
+  * Staging: w35und: remove ->skb_array from struct wbsoft_priv
+  * Staging: w35und: remove ->shutdown from struct wbsoft_priv
+  * Staging: w35und: make functions local to mds.c static
+  * Staging: w35und: make functions local to mlmetxrx.c static
+  * Staging: w35und: remove dead code from mto.c
+  * Staging: w35und: make functions local to wb35rx.c static
+  * Staging: w35und: make functions local to wb35tx.c static
+  * Staging: w35und: remove dead code from wbhal.c
+  * Staging: w35und: remove rxisr.c as dead code
+  * Staging: w35und: fix Kconfig
+  * Staging: w35und: fix config build warnings
+  * Staging: wlan-ng: Remove PCI/PLX/PCMCIA files.
+  * Staging: wlan-ng: Update Help text to mention prism3 devices.
+  * Staging: wlan-ng: Delete PCI/PLX/PCMCIA-specific code.
+  * Staging: wlan-ng: Make wlan-ng use WEXT mode by default.
+  * Staging: wlan-ng: Eliminate more <2.6 kernel support.
+  * Staging: wlan-ng: Eliminate all backwards-compatibility for <2.6.13 kernels.
+  * Staging: wlan-ng: Eliminate a boatload of tertiaryAP-only code.
+  * Staging: wlan-ng: Remove AP-only code from MLME functions.
+  * Staging: wlan-ng: Get rid of the MTU tests in the rx conversion path.
+  * Staging: wlan-ng: Eliminate one more rx mtu test.
+  * Staging: wlan-ng: Eliminate local 'version.h'
+  * Staging: wlan-ng: Eliminate usage of procfs.
+  * Staging: wlan-ng: Use standard kernel integer (u32/s32/etc) types.
+  * Staging: wlan-ng: Eliminate all backwards-compatible kernel code.
+  * Staging: wlan-ng: Wireless Extension support is mandatory.
+  * Staging: wlan-ng: use WIRELESS_EXT, not CONFIG_WIRELESS_EXT
+  * Staging: wlan-ng: Delete a large pile of now-unused code.
+  * Staging: wlan-ng: Delete a pile of unused mibs. And fix WEXT SET_TXPOWER.
+  * Staging: wlan-ng: Consolidate wlan-ng into a single module.
+  * Staging: wlan-ng: Purge all MIBs not used internally.
+  * Staging: wlan-ng: p80211netdev.c fix netdev alloc to prevent oops on device start
+  * Staging: wlan-ng: prism2_usb.c always enable the card in probe_usb
+  * Staging: wlan-ng: hfa384x_usb.c use newest version of 384x_drvr_start
+  * Staging: wlan-ng: p80211wext.c add latest changes & remove extra nulls from wext_handlers
+  * Staging: wlan-ng: p80211wext don't set default key id twice
+  * Staging: wlan-ng: hfa384x_usbin_callback: check for hardware removed
+  * Staging: wlan-ng: p80211conv.c copy code from wlan-ng-devel branch to not drop packets
+  * Staging: wlan-ng: remove unused #include <version.h>
+  * Staging: wlan-ng: p80211wext.c: use ARRAY_SIZE
+  * Staging: wlan-ng: fix compiler warnings
+  * Staging: wlan-ng: skb_p80211_to_ether() - payload_length is unsigned, check before subtraction
+  * Staging: at76_usb: update drivers/staging/at76_usb w/ mac80211 port
+  * Staging: at76_usb: fix build breakage
+  * Staging: at76_usb: remove compiler warnings
+  * Staging: at76_usb: fix up all remaining checkpatch.pl warnings
+  * Staging: at76_usb: cleanup dma on stack issues
+  * Staging: poch: Block size bug fix
+  * Staging: poch: Update TODO list
+  * Staging: poch: Correct pages from bytes.
+  * Staging: poch: minor fixes
+  * Staging: poch: Fix build warnings
+  * Staging: poch: Rx control register init
+  * Staging: poch: Fix user space protocol syncing
+  * Staging: poch: Fine grained locking
+  * Staging: sxg: remove typedefs
+  * Staging: sxg: break the build in a cleaner way when !x86
+  * Staging: sxg: update README
+  * staging: struct device - replace bus_id with dev_name(), dev_set_name()
+  * Staging: echo: remove typedefs
+  * Staging: echo: Lindent drivers/staging/echo
+  * Staging: go7007: saa7134 updates
+  * Staging: go7007: add sensoray 2250/2251 support
+  * Staging: go7007: Convert driver to use video_ioctl2
+  * Staging: go7007: annotate code pointers
+  * Staging: go7007: fix minor build warnings
+  * Staging: go7007: small cleanup
+  * Staging: go7007: add some more v4l2 ioctls
+  * Staging: et131x: Cleanup et131x_debug.h defines
+  * Staging: et131x: fix build failure
+  * Staging: et131x: remove unused variable in et1310_tx.c
+  * Staging: usbip: cleanup kerneldoc
+  * Staging: slicoss: use kzalloc
+  * Staging: slicoss: use correct type for memory allcations
+  * Staging: slicoss: use request_firmware
+  * Staging: add agnx wireless driver
+  * Staging: agnx: fix build errors due to ssid removal
+  * Staging: agnx: fix build errors due to rate control API changes
+  * Staging: agnx: fix build warnings
+  * Staging: add otus Atheros wireless network driver
+  * Staging: otus: fix netdev->priv usage
+  * Staging: otus: fix name clash
+  * Staging: otus: fix urb callback function type
+  * Staging: otus: remove dependence on kernel version
+  * Staging: add rt2860 wireless driver
+  * Staging: rt2860: disable root hack for reading files
+  * Staging: rt2860: fix up netdev->priv usage
+  * Staging: rt2860: use standard bit-reverse function
+  * Staging: rt2860: Fix minor compiler warnings
+  * Staging: rt2860: enable WPA_SUPPLICANT support
+  * Staging: Add ServerEngines benet 10Gb ethernet driver
+  * Staging: benet: fix netif api breakage
+  * Staging: benet: fix up netdev->priv change
+  * Staging: benet: build is broken unless CONFIG_NETPOLL is enabled
+  * Staging: benet: patch to remove subdirectories
+  * Staging: benet: fix build errors when CONFIG_NETPOLL is off
+  * Staging: benet: fix build error.
+  * Staging: benet: patch to use offsetof() instead of AMAP_BYTE_OFFSET()
+  * Staging: benet: fix problems reported by checkpatch
+  * Staging: benet: cleanup a check while posting rx buffers
+  * Staging: add comedi core
+  * Staging: comedi: fix up a lot of checkpatch.pl warnings
+  * Staging: comedi: fix checkpatch.pl errors in comedi_fops.c
+  * Staging: comedi: fix build error in comedilib.h
+  * Staging: comedi: add kcomedilib to the tree
+  * Staging: comedi: set up infrastructure for individual drivers
+  * Staging: comedi: add local copy of interrupt.h
+  * Staging: comedi: add pci and usb wrapper header files
+  * Staging: comedi: comedi driver common function module
+  * Staging: comedi: add mite comedi pci driver
+  * Staging: comedi: add usb usbdux driver
+  * Staging: comedi: add usb usbduxfast driver
+  * Staging: comedi: add usb dt9812 driver
+  * Staging: comedi: add comedi_bond driver
+  * Staging: comedi: add comedi_test driver
+  * Staging: comedi: add comedi_parport driver
+  * Staging: comedi: dt9812: fix up a lot of coding style issues
+  * Staging: comedi: dt9812: remove dt9812.h
+  * Staging: comedi: dt9812: remove typedefs
+  * Staging: comedi: dt9812: fix sparse warnings
+  * Staging: comedi: usbdux: remove kernel version checks
+  * Staging: comedi: usbdux: code style cleanups
+  * Staging: comedi: usbdux: remove // comments
+  * Staging: comedi: usbdux: fix up printk calls
+  * Staging: comedi: usbdux: remove checkpatch.pl warnings
+  * Staging: comedi: usbdux: remove typedef
+  * Staging: comedi: usbdux: remove comedi usb wrappers
+  * Staging: comedi: usbduxfast: remove comedi usb wrappers
+  * Staging: comedi: dt9812: remove #ifdef that is not needed
+  * Staging: comedi: remove usb wrappers
+  * Staging: comedi: remove PCI wrappers
+  * Staging: comedi: add icp_multi driver
+  * Staging: comedi: add me4000 driver
+  * Staging: comedi: fix checkpatch.pl issues in comedi_bond.c
+  * Staging: comedi: fix checkpatch.pl issues in comedi_fc.c
+  * Staging: comedi: remove typedefs from comedi_bond.c
+  * Staging: comedi: fix sparse issues in comedi_bond.c
+  * Staging: comedi: fix checkpatch.pl issues in comedi_test.c
+  * Staging: comedi: fix sparse issues in comedi_test.c
+  * Staging: comedi: remove typedefs from comedi_test.c
+  * Staging: comedi: fix comedi_parport.c checkpatch.pl issues.
+  * Staging: comedi: fix comedi_fc.h checkpatch.pl issues.
+  * Staging: comedi: fix comedi_pci.h checkpatch.pl issues.
+  * Staging: comedi: comedi_pci.h: remove unneeded wrapper
+  * Staging: comedi: comedi_pci.h: remove comedi_pci_enable_no_regions
+  * Staging: comedi: comedi_pci.h: remove comedi_pci_disable_no_regions
+  * Staging: comedi: add s626 driver
+  * Staging: comedi: add rtd520 driver
+  * Staging: comedi: add me_daq driver
+  * Staging: comedi: me_daq: fix checkpatch.pl issues
+  * Staging: comedi: me_daq: remove typedefs
+  * Staging: comedi: me_daq: fix sparse issues
+  * Staging: comedi: fix checkpatch.pl warning in interrupt.h
+  * Staging: comedi: fix build if CONFIG_PROC_FS is not set
+  * Staging: add asus_oled driver
+  * Staging: asus_oled: fix build dependancy
+  * Staging: Add the Meilhaus ME-IDS driver package
+  * Staging: meilhaus: fix __symbol_get problems
+  * Staging: add lcd-panel driver
+  * Staging: panel: major checkpatch cleanup
+  * Staging: panel: remove ifdefs and code for pre-2.6 kernels
+  * Staging: panel: remove support for smartcards
+  * Staging: add Driver for Altera PCI Express Chaining DMA reference design
+  * Staging: add rtl8187se driver
+  * Staging: rtl8187se: remove unneeded files
+  * Staging: rtl8187se: make the built module be the proper name
+  * Staging: rtl8187se: remove duplicate pci ids
+  * Staging: me4000: switch to list_for_each*()
+  * Staging: usbip: switch to list_for_each_entry()
+  * Staging: add princeton instruments usb camera driver
+  * Staging: add mimio xi driver
+  * Staging: add rt2870 wireless driver
+  * Staging: rt2870: disable root hack for reading files
+  * Staging: rt2870: fix up netdev->priv usage
+  * Staging: add frontier tranzport and alphatrack drivers
+  * Staging: frontier: remove unused alphatrack_sysfs.c file
+  * Staging: frontier: fix compiler warnings
+  * Staging: add epl stack
+  * Staging: epl: run Lindent on all kernel/*.h files
+  * Staging: epl: run Lindent on all user/*.h files
+  * Staging: epl: run Lindent on *.h files
+  * Staging: epl: run Lindent on *.c files
+  * Staging: epl: hr timers all run in hard irq context now
+  * Staging: epl: fix netdev->priv b0rkage
+  * Staging: add android framework
+  * Staging: android: add binder driver
+  * Staging: android: binder: Fix gcc warnings about improper format specifiers for size_t in printk
+  * staging: android: binder: Fix use of euid
+  * Staging: android: add logging driver
+  * Staging: android: add ram_console driver
+  * Staging: android: add timed_gpio driver
+  * Staging: android: timed_gpio: Rename android_timed_gpio to timed_gpio
+  * Staging: android: remove dummy android.c driver
+  * Staging: android: add lowmemorykiller driver
+  * Staging: android: binder: fix build errors
+  * staging: __FUNCTION__ is gcc-specific, use __func__
+  * V4L/DVB (10176a): Switch remaining clear_user_page users over to
+    clear_user_highpage
+
+  [ Zhenyu Wang ]
+
+  * agp/intel: add support for G41 chipset
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Sun, 18 Jan 2009 20:22:54 -0700
+
+linux (2.6.28-4.11) jaunty; urgency=low
+
+  [ Mario Limonciello ]
+
+  * SAUCE: Enable HDMI audio codec on Studio XPS 1340
+    - LP: #309508
+
+  [ Tim Gardner ]
+
+  * Fix armel d-i FTBSs
+
+  [ Upstream Kernel Changes ]
+
+  * USB: re-enable interface after driver unbinds
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Tue, 13 Jan 2009 16:33:08 -0700
+
+linux (2.6.28-4.10) jaunty; urgency=low
+
+  [ Andy Whitcroft ]
+
+  * update kernel bootloader recommends: to prefer grub
+    - LP: #314004
+  * SAUCE: don't use buggy _BCL/_BCM/_BQC for backlight control
+    - LP: #311716
+  * SAUCE: test-suspend -- add the suspend test scripts
+    - LP: #316419
+
+  [ Colin Watson ]
+
+  * Enable udebs for armel
+
+  [ Tim Gardner ]
+
+  * SAUCE: Dell laptop digital mic does not work, PCI 1028:0271
+    - LP: #309508
+  * Enable CIFS_XATTR=y and CONFIG_CIFS_POSIX=y
+    - LP: #220658
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Thu, 08 Jan 2009 10:38:22 -0700
+
+linux (2.6.28-4.9) jaunty; urgency=low
+
+  [ Tim Gardner ]
+
+  * Restore DM_CRYPT, AES, ECB, and CBC as modules. This fixes
+    some installer issues with encrypted /home and Private directories.
+  * Take one more stab at building armel without module or ABI errors.
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Tue, 06 Jan 2009 08:38:23 -0700
+
+linux (2.6.28-4.8) jaunty; urgency=low
+
+  * Fix i386/amd64 FTBS by ignoring all module and ABI changes,
+    not something you would normally do, but I'm sure the ABI
+    has not changed. This will probably also allow the ARM builds to complete.
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Mon, 05 Jan 2009 14:42:58 -0700
+
+linux (2.6.28-4.7) jaunty; urgency=low
+
+  [ Tim Gardner ]
+
+  * Enable CONFIG_ATH5K=m for i386/amd64
+    - LP: #306719
+  * Build all i386/amd64 AGP/DRM components as modules.
+    - LP: #312721
+  * git commands are now installed outside the default $PATH
+    Use 'git  CMD' instead of 'git-CMD'.
+  * Build in most PATA/SATA drivers. This should allow most i386/amd64 systems to boot
+    without an initramfs, though some support work is still required in initramfs-tools
+    and grub.
+    - LP: #311730
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Fri, 02 Jan 2009 07:33:09 -0700
+
+linux (2.6.28-4.6) jaunty; urgency=low
+
+  [ Tim Gardner ]
+
+  * Enable CONFIG_X86_E_POWERSAVER=m for i386 generic
+    - LP: #237405
+  * Build i386 AGP drivers as modules
+    - LP: #312721
+  * Build i386 DRM as a module
+    - LP: #312721
+
+  [ Upstream Kernel Changes ]
+
+  * drm/i915: Add missing userland definitions for gem init/execbuffer.
+    - LP: #308387
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Mon, 29 Dec 2008 09:16:47 -0700
+
+linux (2.6.28-4.5) jaunty; urgency=low
+
+  [ Andy Whitcroft ]
+
+  * clean up module dependancy information on package removal/purge
+    - LP: #300773
+
+  [ Tim Gardner ]
+
+  * Update iscsitarget to 0.4.17
+  * Build in ext{234}
+  * Build in Crypto modules AES, CBC, ECB
+  * Build in ACPI AC,BATTERY,BUTTON,FAN,PCI_SLOT,PROCESSOR,SBS,THERMAL,WMI
+  * Build in AGP intel,via,sis,ali,amd,amd64,efficeon,nvidia,sworks
+  * Build in ata,dev_dm,dev_loop,dev_md,dev_sd,dev_sr
+  * Build in BT l2cap,rfcomm,sco
+  * Reduce CONFIG_LEGACY_PTY_COUNT to 0
+  * Build in CDROM_PKTCDVD and CHR_DEV_SG
+  * Build in CPU_FREQ
+    GOV_CONSERVATIVE,GOV_ONDEMAND,GOV_POWERSAVE,GOV_USERSPACE,STAT,TABLE
+  * Build in DM CRYPT,MIRROR,MULTIPATH,SNAPSHOT
+  * Build in DRM
+  * Build in HID
+  * Build in HOTPLUG PCI,PCIE
+  * Build in I2C
+  * Build in IEEE1394 OHCI1394
+  * Build in INPUT EVDEV
+  * Build in IPV6
+  * Build in MMC
+  * Build in PACKET
+  * Enable both IEEE1394 (Firewire) stacks as modules
+    - LP: #276463
+  * Disable SUNRPC_REGISTER_V4
+    - LP: #306016
+  * Enable dm-raid4-5
+    - LP: #309378
+  * Build in PPP
+  * Build in RFKILL
+  * Build in USB SERIAL
+
+  [ Upstream Kernel Changes ]
+
+  * Rebased to v2.6.28
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Thu, 18 Dec 2008 21:18:44 -0700
+
+linux (2.6.28-3.4) jaunty; urgency=low
+
+  [ Tim Gardner ]
+
+  * Build ecryptfs into the kernel
+    - LP: #302870
+  * Deprecated gnbd
+
+  [ Upstream Kernel Changes ]
+
+  * Rebased to v2.6.28-rc8
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Wed, 10 Dec 2008 22:45:13 -0700
+
+linux (2.6.28-2.3) jaunty; urgency=low
+
+  [ Andy Whitcroft ]
+
+  * update the templates so that we have spaces following the title line
+
+  [ Tim Gardner ]
+
+  * Add upload number to kernel version signature. This has the side effect
+    of renaming kernel packages back to the original way, e.g., without '-ub'
+    in the name.
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Thu, 04 Dec 2008 12:18:31 -0700
+
+linux (2.6.28-2.2) jaunty; urgency=low
+
+  [ Andy Whitcroft ]
+
+  * Revert "SAUCE: (no-up) version: Implement version_signature proc file."
+  * SAUCE: (no-up) version: Implement version_signature proc file.
+  * SAUCE: serial: RS485 ioctl structure uses __u32 include linux/types.h
+    - LP: #303711
+
+  [ Tim Gardner ]
+
+  * UBUNTU: Removed CONFIG_DRM_VIA_CHROME9 since it is upstream.
+  * UBUNTU: Removed ubuntu/via_chrome9
+
+  [ Upstream Kernel Changes ]
+
+  * Rebased to v2.6.28-rc7
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Tue, 02 Dec 2008 07:33:32 -0700
+
+linux (2.6.28-1.1) jaunty; urgency=low
+
+  [ Amit Kucheria ]
+
+  * SAUCE: make fc transport removal of target configurable
+  * SAUCE: pm: Config option to disable handling of console during
+    suspend/resume
+  * SAUCE: Adds support for COMPAL JHL90 webcam
+  * Map armel to arm to all editconfigs to work correctly
+  * Add armel to getabis for completeness sake
+  * Add -ub to our versioning to allow kerneloops.org to identify us
+
+  [ Andy Whitcroft ]
+
+  * Fix Vcs-Git path for the kernel repository.
+    - LP: #296915
+
+  [ Ben Collins ]
+
+  * SAUCE: Lower warning level of some PCI messages
+    - LP: #159241
+  * SAUCE: input/mouse/alps: Do not call psmouse_reset() for alps
+  * SAUCE: tulip: Let dmfe handle davicom on non-sparc
+  * SAUCE: tulip: Define ULI PCI ID's
+  * SAUCE: (no-up) version: Implement version_signature proc file.
+  * SAUCE: (no-up) connector.h: Add idx/val for drbd
+  * SAUCE: (no-up) swap: Add notify_swap_entry_free callback for compcache
+  * SAUCE: drivers: Remove some duplicate device entries in various modules
+  * SAUCE: (no-up) [AppArmor] merge with upstream subversion r1291
+  * SAUCE: (no-up) Enable ubuntu extra subdirectory
+  * SAUCE: (no-up) ACPI: initramfs DSDT override support
+  * ubuntu: Add drbd module
+  * ubuntu: Add iscsitarget module
+  * ubuntu: Add BOM for iscsitarget
+  * ubuntu: Add squashfs driver
+  * SAUCE: (no-up) Check for squashfs superblock in initramfs mounting.
+  * ubuntu: Add aufs module
+  * ubuntu: Added atl2 driver
+  * ubuntu: Added et131x driver
+  * ubuntu: Add dm-raid4-5 driver
+  * ubuntu: Add ndiswrapper driver
+  * ubuntu: Added ram backed compressed swap module (compcache)
+  * ubuntu: Add misc drivers from hardy lum
+  * ubuntu: Add heci driver 3.2.0.24
+  * ubuntu: Add ov511 and bt-sco drivers
+  * ubuntu: Add acx, prism2_usb wireless drivers
+  * ubuntu: Add at76 driver to build
+  * ubuntu: Add fsam7400 sw kill switch driver
+  * ubuntu: Added qc-usb driver
+  * ubuntu: e1000e: Upgraded module to 0.4.1.7
+  * ubuntu: Added rfkill drivers
+  * ubuntu: VIA - Add VIA DRM Chrome9 3D engine
+  * ubuntu: unionfs: Added v1.4 module from hardy
+  * ubuntu: Add LIRC driver
+  * ubuntu: Add GFS driver
+  * ubuntu: New tlsup driver for toshiba laptops
+  * SAUCE: (no-up) Export lookup_has for aufs
+  * SAUCE: (no-up) Modularize vesafb
+  * ubuntu: Config files
+  * Disable some modules that need porting to 2.6.28
+  * ubuntu: Fixup headers creation to include arch/*/include
+  * ubuntu/module-check: Ignore comment lines
+
+  [ Chuck Short ]
+
+  * SAUCE: ata: blacklist FUJITSU MHW2160BH PL
+
+  [ cking ]
+
+  * SAUCE: Enable speedstep for sonoma processors.
+
+  [ Colin Ian King ]
+
+  * ubuntu: Add dm-loop
+  * SAUCE: cx88: Support Leadtek WinFast DTV2000 H version J.
+  * SAUCE: fix kernel oops in VirtualBox during paravirt patching
+  * SAUCE: qc-usb: Enable Logitech QuickCam Messenger
+  * SAUCE: appleir: Enable driver for new MacBook Pro
+
+  [ Colin Watson ]
+
+  * Enable configfs, fuse, jfs, reiserfs, and xfs for armel
+  * Extend debian/d-i/ modules handling to make armel easier to support
+  * Create udebs for armel
+
+  [ Fabio M. Di Nitto ]
+
+  * ubuntu: update GFS Cluster File System
+
+  [ Kees Cook ]
+
+  * SAUCE: AppArmor: update to upstream subversion r1302
+
+  [ Leann Ogasawara ]
+
+  * Add automatic model setting for Samsung Q45
+  * Add Dell Dimension 9200 reboot quirk
+
+  [ Mackenzie Morgan ]
+
+  * SAUCE: Add quirk for ASUS Z37E to make sound audible after resume
+
+  [ Matthew Garrett ]
+
+  * SAUCE: hostap: send events on data interface as well as master
+    interface
+
+  [ Michael Frey (Senior Manager, MID ]
+
+  * SAUCE: Send HCI_RESET for Broadcomm 2046
+
+  [ Michael Haas ]
+
+  * add proper aufs source tree from 20080922
+  * Fix AUFS compilation in vfsub.c
+  * Add splice-2.6.23.patch from AUFS to export a symbol needed by AUFS
+  * Add put_filp.patch from AUFS to export a symbol needed by AUFS
+  * Add deny_write_access.patch from AUFS - export deny_write_access
+  * Add sec_perm-2.6.24.patch from AUFS - export security_inode_permission
+  * make sure TMPFS_MAGIC is defined in AUFS Makefile
+  * SAUCE: Revert aufs changes from AppArmor merge
+
+  [ Mohamed Abbas ]
+
+  * SAUCE: iwlagn -- fix rfkill when on when driver loaded
+
+  [ Phillip Lougher ]
+
+  * SAUCE: r8169: disable TSO by default for RTL8111/8168B chipsets.
+
+  [ Stefan Bader ]
+
+  * SAUCE: (no-up) Export dm_disk function of device-mapper
+  * SAUCE: Restore VT fonts on switch
+  * SAUCE: mmc: Increase power_up deleay to fix TI readers
+  * gfs1: GFS1 can't create more than 4kb file
+  * uvcvideo: Commit streaming parameters when enabling the video stream.
+
+  [ Tim Gardner ]
+
+  * SAUCE: Add extra headers to linux-libc-dev
+  * SAUCE: Catch nonsense keycodes and silently ignore
+  * SAUCE: Added support for HDAPS on various ThinkPads from Lenovo and IBM
+  * SAUCE: Guest OS does not recognize a lun with non zero target id on
+    Vmware ESX Server
+  * SAUCE: (no-up) Take care of orinoco_cs overlap with hostap_cs
+  * ubuntu: Add GNBD driver
+  * SAUCE: e1000e: Map NV RAM dynamically only when needed.
+  * SAUCE: Correctly blacklist Thinkpad r40e in ACPI
+  * SAUCE: Update Wacom tablet driver to 1.49
+  * SAUCE: Fix Wacom tablet 1.49 porting errors
+  * SAUCE: Enable an e1000e Intel Corporation 82567 Gigabit controller
+  * SAUCE: Fix Oops in wlan_setup
+  * SAUCE: ipw2200: change default policy for auto-associate
+  * Dell Wireless 365 needs BTUSB_RESET quirk.
+  * ndiswrapper remote buffer overflows on long ESSIDs (CVE 2008-4395)
+  * Disabled ubuntu/e1000e config
+
+  [ Upstream Kernel Changes ]
+
+  * Revert "[Bluetooth] Eliminate checks for impossible conditions in IRQ
+    handler"
+  * Revert "x86, early_ioremap: fix fencepost error"
+  * mac80211: fix two issues in debugfs
+  * iwl3945: do not send scan command if channel count zero
+
+ --  Ben Collins <ben.collins@canonical.com>  Fri, 07 Nov 2008 09:37:42 -0700
+
+linux (2.6.27-8.17) intrepid-proposed; urgency=low
+
+  [ John W. Linville ]
+
+  * SAUCE: iwlagn: avoid sleep in softirq context
+    -LP: #286285
+
+  [ Tim Gardner ]
+
+  * Dell Wireless 365 needs BTUSB_RESET quirk.
+    - LP: #293670
+  * SAUCE: ALSA: hda: make a STAC_DELL_EQ option (version 2)
+    - LP: #293271
+
+  [ Upstream Kernel Changes ]
+
+  * iwlagn: downgrade BUG_ON in interrupt
+  * Input: atkbd - expand Latitude's force release quirk to other Dells
+  * fbcon_set_all_vcs: fix kernel crash when switching the rotated consoles
+  * modules: fix module "notes" kobject leak
+  * Driver core: Fix cleanup in device_create_vargs().
+  * Driver core: Clarify device cleanup.
+  * ath9k/mac80211: disallow fragmentation in ath9k, report to userspace
+  * md: Fix rdev_size_store with size == 0
+  * xfs: fix remount rw with unrecognized options
+  * OHCI: Allow broken controllers to auto-stop
+  * USB: OHCI: fix endless polling behavior
+  * USB: Fix s3c2410_udc usb speed handling
+  * USB: EHCI: log a warning if ehci-hcd is not loaded first
+  * usb gadget: cdc ethernet notification bugfix
+  * usb: musb_hdrc build fixes
+  * drm/i915: fix ioremap of a user address for non-root (CVE-2008-3831)
+  * DVB: au0828: add support for another USB id for Hauppauge HVR950Q
+  * DVB: sms1xxx: support two new revisions of the Hauppauge WinTV
+    MiniStick
+  * security: avoid calling a NULL function pointer in
+    drivers/video/tvaudio.c
+  * Linux 2.6.27.3
+    -LP: #294152
+
+  * gpiolib: fix oops in gpio_get_value_cansleep()
+  * edac cell: fix incorrect edac_mode
+  * x86 ACPI: fix breakage of resume on 64-bit UP systems with SMP kernel
+  * sched: fix the wrong mask_len
+  * USB: cdc-wdm: make module autoload work
+  * USB: don't rebind drivers after failed resume or reset
+  * USB: fix memory leak in cdc-acm
+  * USB: Speedtouch: add pre_reset and post_reset routines
+  * dm kcopyd: avoid queue shuffle
+  * dm snapshot: fix primary_pe race
+  * amd_iommu: fix nasty bug that caused ILLEGAL_DEVICE_TABLE_ENTRY errors
+  * CIFS: fix saving of resume key before CIFSFindNext
+  * netfilter: xt_iprange: fix range inversion match
+  * netfilter: snmp nat leaks memory in case of failure
+  * netfilter: restore lost ifdef guarding defrag exception
+  * anon_vma_prepare: properly lock even newly allocated entries
+  * hvc_console: Fix free_irq in spinlocked section
+  * ACPI Suspend: Enable ACPI during resume if SCI_EN is not set
+  * ACPI suspend: Blacklist HP xw4600 Workstation for old code ordering
+  * ACPI suspend: Always use the 32-bit waking vector
+  * proc: fix vma display mismatch between /proc/pid/{maps,smaps}
+  * SCSI: scsi_dh: add Dell product information into rdac device handler
+  * PCI hotplug: cpqphp: fix kernel NULL pointer dereference
+  * V4L/DVB (9300): pvrusb2: Fix deadlock problem
+  * Linux 2.6.27.4
+    -LP: #294155
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Tue, 04 Nov 2008 12:16:07 -0700
+
+linux (2.6.27-7.16) intrepid-security; urgency=low
+
+  [ Tim Gardner ]
+
+  * ndiswrapper remote buffer overflows on long ESSIDs (CVE 2008-4395)
+    - LP: #275860
+
+  [ Upstream Kernel Changes ]
+
+  * ext[234]: Avoid printk floods in the face of directory corruption
+    (CVE-2008-3528)
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Mon, 03 Nov 2008 13:34:42 -0700
+
+linux (2.6.27-7.15) intrepid-security; urgency=low
+
+  [ Upstream Kernel Changes ]
+
+  * tcp: Restore ordering of TCP options for the sake of inter-operability
+    - LP: #264019
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Mon, 27 Oct 2008 19:28:06 -0600
+
+linux (2.6.27-7.14) intrepid; urgency=low
+
+  [ Tim Gardner ]
+
+  * Disable ath5k in 2.6.27
+    - LP: #288148
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Thu, 23 Oct 2008 07:40:43 -0600
+
+linux (2.6.27-7.13) intrepid; urgency=low
+
+  [ Stefan Bader ]
+
+  * gfs1: GFS1 can't create more than 4kb file
+
+  [ Tim Gardner ]
+
+  * Revert "SAUCE: x86: Reserve FIRST_DEVICE_VECTOR in used_vectors
+    bitmap.". Use upstream commit to avoid future conflicts.
+  * Revert "STABLE queue: mac80211: fix two issues in debugfs".
+    Use upstream commit to avoid future conflicts.
+  * Revert "x86, early_ioremap: fix fencepost error"
+    Use upstream commit to avoid future conflicts.
+
+  [ Upstream Kernel Changes ]
+
+  * sched_rt.c: resch needed in rt_rq_enqueue() for the root rt_rq
+  * x86: Reserve FIRST_DEVICE_VECTOR in used_vectors bitmap.
+  * mac80211: fix two issues in debugfs
+  * Fix barrier fail detection in XFS
+  * tty: Termios locking - sort out real_tty confusions and lock reads
+  * CIFS: make sure we have the right resume info before calling
+    CIFSFindNext
+  * rfkill: update LEDs for all state changes
+  * libertas: clear current command on card removal
+  * b43legacy: Fix failure in rate-adjustment mechanism
+  * x86, early_ioremap: fix fencepost error
+  * x86: SB450: skip IRQ0 override if it is not routed to INT2 of IOAPIC
+  * x86: improve UP kernel when CPU-hotplug and SMP is enabled
+  * sky2: Fix WOL regression
+  * netdrvr: atl1e: Don't take the mdio_lock in atl1e_probe
+  * Linux 2.6.27.2
+
+  [ Amit Kucheria ]
+
+  * Ubuntu: agp: Fix stolen memory counting on G4X.
+    -LP: 285572
+
+  [ Scott Remnant ]
+
+  * add MODULE_ALIAS to load ipmi_devintf with ipmi_si
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Sun, 19 Oct 2008 10:06:21 -0600
+
+linux (2.6.27-7.12) intrepid; urgency=low
+
+  [ Chuck Short ]
+
+  * xen: Add xen modules to virtual flavours.
+
+  [ Mario Limonciello ]
+
+  * SAUCE: Add back in lost commit for Apple BT Wireless Keyboard
+    - LP: #162083
+
+  [ Tim Gardner ]
+
+  * Remove depmod created files from packages.
+    - LP: #250511
+  *  Changed default TCP congestion algorithm to 'cubic' (again)
+    - LP: #278801
+  * Update configs for 'disable CONFIG_DYNAMIC_FTRACE'
+    - LP: #263555
+
+  [ Upstream Kernel Changes ]
+
+  * x86: register a platform RTC device if PNP doesn't describe it
+  * disable CONFIG_DYNAMIC_FTRACE due to possible memory corruption on
+    module unload
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Fri, 17 Oct 2008 11:25:39 -0600
+
+linux (2.6.27-7.11) intrepid; urgency=low
+
+  [ Amit Kucheria ]
+
+  * STABLE queue: mac80211: fix two issues in debugfs
+    - LP: #275227
+  * SAUCE: Adds support for COMPAL JHL90 webcam
+
+  [ Ben Collins ]
+
+  * SAUCE: (no-up) x86: Quiet "Kernel alive" messages
+    - LP: #39985
+  * SAUCE: (no-up) Modularize vesafb
+  * build/config: Enable vesafb module
+  * build: Switch to vesafb as preferred.
+
+  [ Leann Ogasawara ]
+
+  * Add Dell Dimension 9200 reboot quirk
+    - LP: #271370
+
+  [ Michael Haas ]
+
+  * SAUCE: Revert aufs changes from AppArmor merge
+
+  [ Tim Gardner ]
+
+  * fix virtio udeb layout
+    - LP: #257739
+  * Enabled CONFIG_EXT4DEV_FS=m
+  * Changed default TCP congestion algorithm to 'cubic'
+    - LP: #278801
+  * SAUCE: ipw2200: change default policy for auto-associate
+    - LP: #264104
+
+  [ Upstream Kernel Changes ]
+
+  * x86, early_ioremap: fix fencepost error
+    - LP: #263543
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Sat, 11 Oct 2008 08:07:42 -0600
+
+linux (2.6.27-7.10) intrepid; urgency=low
+
+  [ Alexey Starikovskiy ]
+
+  * SAUCE: ACPI: EC: do transaction from interrupt context
+    - LP: #277802
+
+  [ Ben Collins ]
+
+  * build/d-i: Change virtio-modules udeb to prio standard
+
+  [ Colin Ian King ]
+
+  * SAUCE: Blacklist IBM 2656 in serio/i8042
+    - LP: #21558
+
+  [ Henrik Rydberg ]
+
+  * Revert "SAUCE: applesmc: Add MacBookAir"
+  * SAUCE: [PATCH 1/5] hwmon: applesmc: Specified number of bytes to read
+    should match actual
+  * SAUCE: [PATCH 2/5] hwmon: applesmc: Fix the 'wait status failed: c !=
+    8' problem
+  * SAUCE: [PATCH 3/5] hwmon: applesmc: Prolong status wait
+  * SAUCE: [PATCH 4/5] hwmon: applesmc: Allow for variable ALV0 and ALV1
+    package length
+  * SAUCE: [PATCH 5/5] hwmon: applesmc: Add support for Macbook Air
+  * SAUCE: hwmon: applesmc: Add support for Macbook Pro 4
+  * SAUCE: hwmon: applesmc: Add support for Macbook Pro 3
+  * SAUCE: hwmon: applesmc: Lighter wait mechanism, drastic improvement
+
+  [ Leann Ogasawara ]
+
+  * Add automatic model setting for Samsung Q45
+    - LP: #200210
+
+  [ Tim Gardner ]
+
+  * SAUCE: Correctly blacklist Thinkpad r40e in ACPI
+    - LP: #278794
+  * SAUCE: Update Wacom tablet driver to 1.49
+    - LP: #260675
+  * SAUCE: ALPS touchpad for Dell Latitude E6500/E6400
+    - LP: #270643
+  * SAUCE: Fix Wacom tablet 1.49 porting errors
+  * SAUCE: Enable an e1000e Intel Corporation 82567 Gigabit controller
+  * SAUCE: Fix Oops in wlan_setup
+    - LP: #263309
+
+  [ Upstream Kernel Changes ]
+
+  * ath9k: fix oops on trying to hold the wrong spinlock
+  * [Bluetooth] Fix double frees on error paths of btusb and bpa10x drivers
+  * [Bluetooth] Add reset quirk for new Targus and Belkin dongles
+  * [Bluetooth] Add reset quirk for A-Link BlueUSB21 dongle
+  * Revert "ax25: Fix std timer socket destroy handling."
+  * ax25: Quick fix for making sure unaccepted sockets get destroyed.
+  * netrom: Fix sock_orphan() use in nr_release
+  * Revert "V4L/DVB (8904): cx88: add missing unlock_kernel"
+  * SLOB: fix bogus ksize calculation
+  * net: only invoke dev->change_rx_flags when device is UP
+  * tcp: Fix possible double-ack w/ user dma
+  * net: Fix netdev_run_todo dead-lock
+  * tcp: Fix tcp_hybla zero congestion window growth with small rho and large cwnd.
+  * [MIPS] Sibyte: Register PIO PATA device only for Swarm and Litte Sur
+  * eeepc-laptop: Fix hwmon interface
+  * hwmon: (it87) Prevent power-off on Shuttle SN68PT
+  * hwmon: Define sysfs interface for energy consumption register
+  * hwmon: (adt7473) Fix some bogosity in documentation file
+  * hwmon: (abituguru3) Enable reading from AUX3 fan on Abit AT8 32X
+  * hwmon: (abituguru3) Enable DMI probing feature on Abit AT8 32X
+  * [CPUFREQ] correct broken links and email addresses
+  * SLOB: fix bogus ksize calculation fix
+  * Don't allow splice() to files opened with O_APPEND
+  * Linux 2.6.27
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Wed, 08 Oct 2008 21:19:34 -0600
+
+linux (2.6.27-6.9) intrepid; urgency=low
+
+  [ Kees Cook ]
+
+  * SAUCE: AppArmor: update to upstream subversion r1302
+    - LP: #269921
+
+  [ Stefan Bader ]
+
+  * Update configuration files to be compliant to desktop specs
+    - LP: #279019
+
+  [ Tim Gardner ]
+
+  * Add support in e1000e for a couple of ICH10 PCI IDs
+  * Enable CONFIG_INPUT_PCSPKR=m
+    - LP: #275453
+
+  [ Upstream Kernel Changes ]
+
+  * V4L/DVB (8559a): Fix a merge conflict at gspca/sonixb
+  * V4L/DVB (8789): wm8739: remove wrong kfree
+  * V4L/DVB (8883): w9968cf: Fix order of usb_alloc_urb validation
+  * V4L/DVB (8884): em28xx-audio: fix memory leak
+  * V4L/DVB (8885): cpia2_usb: fix memory leak
+  * V4L/DVB (8886): ov511: fix memory leak
+  * V4L/DVB (8887): gspca: fix memory leak
+  * V4L/DVB (8892): pvrusb2: Handle USB ID 2040:2950 same as 2040:2900
+  * V4L/DVB (8904): cx88: add missing unlock_kernel
+  * V4L/DVB (8905): ov511: fix exposure sysfs attribute bug
+  * V4L/DVB (8909): gspca: PAC 7302 webcam 093a:262a added.
+  * hrtimer: migrate pending list on cpu offline
+  * hrtimer: fix migration of CB_IRQSAFE_NO_SOFTIRQ hrtimers
+  * hrtimer: mark migration state
+  * hrtimer: prevent migration of per CPU hrtimers
+  * [IA64] Put the space for cpu0 per-cpu area into .data section
+  * powerpc: Fix PCI in Holly device tree
+  * powerpc: Fix failure to shutdown with CPU hotplug
+  * mfd: Fix Kconfig accroding to the new gpiolib symbols
+  * mfd: Fix asic3 compilation
+  * x86: fix typo in enable_mtrr_cleanup early parameter
+  * ipsec: Fix pskb_expand_head corruption in xfrm_state_check_space
+  * iucv: Fix mismerge again.
+  * ALSA: ASoC: Fix cs4270 error path
+  * ALSA: hda - Fix model for Dell Inspiron 1525
+  * sctp: Fix kernel panic while process protocol violation parameter
+  * x86: Fix broken LDT access in VMI
+  * x86, vmi: fix broken LDT access
+  * tcp: Fix NULL dereference in tcp_4_send_ack()
+  * ipv6: NULL pointer dereferrence in tcp_v6_send_ack
+  * XFRM,IPv6: initialize ip6_dst_blackhole_ops.kmem_cachep
+  * af_key: Free dumping state on socket close
+  * dm: always allow one page in dm_merge_bvec
+  * dm: cope with access beyond end of device in dm_merge_bvec
+  * dm mpath: add missing path switching locking
+  * MN10300: Fix IRQ handling
+  * pxa2xx_spi: fix build breakage
+  * e1000e: write protect ICHx NVM to prevent malicious write/erase
+  * powerpc: Fix boot hang regression on MPC8544DS
+  * ASoC: Set correct name for WM8753 rec mixer output
+  * ALSA: snd-powermac: mixers for PowerMac G4 AGP
+  * ALSA: snd-powermac: HP detection for 1st iMac G3 SL
+  * fbcon: fix monochrome color value calculation
+  * inotify: fix lock ordering wrt do_page_fault's mmap_sem
+  * braille_console: only register notifiers when the braille console is used
+  * fix error-path NULL deref in alloc_posix_timer()
+  * memory hotplug: missing zone->lock in test_pages_isolated()
+  * mm: tiny-shmem nommu fix
+  * mm: handle initialising compound pages at orders greater than MAX_ORDER
+  * e1000e: reset swflag after resetting hardware
+  * e1000e: do not ever sleep in interrupt context
+  * e1000e: remove phy read from inside spinlock
+  * e1000e: drop stats lock
+  * e1000e: debug contention on NVM SWFLAG
+  * e1000e: update version from k4 to k6
+  * Check mapped ranges on sysfs resource files
+  * e1000e: Fix incorrect debug warning
+  * [MIPS] Build fix: Fix irq flags type
+  * [MIPS] SMTC: Build fix: Fix filename in Makefile
+  * [MIPS] SMTC: Fix holes in SMTC and FPU affinity support.
+  * [MIPS] SMTC: Close tiny holes in the SMTC IPI replay system.
+  * [MIPS] SMTC: Fix SMTC dyntick support.
+  * [S390] nohz: Fix __udelay.
+  * [S390] qdio: prevent stack clobber
+  * Fix init/main.c to use regular printk with '%pF' for initcall fn
+  * x86 setup: correct segfault in generation of 32-bit reloc kernel
+  * selinux: Fix an uninitialized variable BUG/panic in selinux_secattr_to_sid()
+  * rtc: fix kernel panic on second use of SIGIO nofitication
+  * fbdev: fix recursive notifier and locking when fbdev console is blanked
+  * orion_spi: fix handling of default transfer speed
+  * include/linux/stacktrace.h: declare struct task_struct
+  * cpusets: remove pj from cpuset maintainers
+  * MAINTAINERS: add mailing list for man-pages
+  * SubmitChecklist: interfaces changes should CC linux-api@
+  * Documentation/HOWTO: info about interface changes should CC linux-api@vger
+  * dw_dmac: fix copy/paste bug in tasklet
+  * leds-fsg: change order of initialization and deinitialization
+  * leds-pca955x: add proper error handling and fix bogus memory handling
+  * ACPI: Make /proc/acpi/wakeup interface handle PCI devices (again)
+  * clockevents: check broadcast tick device not the clock events device
+  * V4L/DVB (8919): cx18: Fix tuner audio input for Compro H900 cards
+  * V4L/DVB (8926): gspca: Bad fix of leak memory (changeset 43d2ead315b1).
+  * V4L/DVB (8933): gspca: Disable light frquency for zc3xx cs2102 Kokom.
+  * V4L/DVB (8935): em28xx-cards: Remove duplicate entry (EM2800_BOARD_KWORLD_USB2800)
+  * V4L/DVB (8955): bttv: Prevent NULL pointer dereference in radio_open
+  * V4L/DVB (8957): zr36067: Restore the default pixel format
+  * V4L/DVB (8958): zr36067: Return proper bytes-per-line value
+  * V4L/DVB (8960): drivers/media/video/cafe_ccic.c needs mm.h
+  * V4L/DVB (8961): zr36067: Fix RGBR pixel format
+  * V4L/DVB (8963): s2255drv field count fix
+  * V4L/DVB (8967): Use correct XC3028L firmware for AMD ATI TV Wonder 600
+  * V4L/DVB (8978): sms1xxx: fix product name for Hauppauge WinTV MiniStick
+  * V4L/DVB (8979): sms1xxx: Add new USB product ID for Hauppauge WinTV MiniStick
+  * V4L/DVB (9029): Fix deadlock in demux code
+  * V4L/DVB (9037): Fix support for Hauppauge Nova-S SE
+  * V4L/DVB (9043): S5H1420: Fix size of shadow-array to avoid overflow
+  * V4L/DVB (9053): fix buffer overflow in uvc-video
+  * V4L/DVB (9075): gspca: Bad check of returned status in i2c_read() spca561.
+  * V4L/DVB (9080): gspca: Add a delay after writing to the sonixj sensors.
+  * V4L/DVB (9092): gspca: Bad init values for sonixj ov7660.
+  * V4L/DVB (9099): em28xx: Add detection for K-WORLD DVB-T 310U
+  * V4L/DVB (9103): em28xx: HVR-900 B3C0 - fix audio clicking issue
+  * x86: gart iommu have direct mapping when agp is present too
+  * ide-cd: temporary tray close fix
+  * ide-dma: fix ide_build_dmatable() for TRM290
+  * IDE: Fix platform device registration in Swarm IDE driver (v2)
+  * ide-cd: Optiarc DVD RW AD-7200A does play audio
+  * ide: workaround for bogus gcc warning in ide_sysfs_register_port()
+  * [MIPS] Fix CMP Kconfig configuration and mark as broken.
+  * [MIPS] IP27: Fix build errors if CONFIG_MAPPED_KERNEL=y
+  * x86 ACPI: Blacklist two HP machines with buggy BIOSes
+  * kgdb, x86: Avoid invoking kgdb_nmicallback twice per NMI
+  * kgdb: call touch_softlockup_watchdog on resume
+  * atmel-mci: Initialize BLKR before sending data transfer command
+  * Marker depmod fix core kernel list
+  * Linux 2.6.27-rc9
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Sun, 05 Oct 2008 21:27:49 -0600
+
+linux (2.6.27-5.8) intrepid; urgency=low
+
+  [ Amit Kucheria ]
+
+  * Update AUFS-related Kconfig
+    - LP: #264048
+
+  [ Michael Haas ]
+
+  * add proper aufs source tree from 20080922
+  * Fix AUFS compilation in vfsub.c
+  * Add splice-2.6.23.patch from AUFS to export a symbol needed by AUFS
+  * Add put_filp.patch from AUFS to export a symbol needed by AUFS
+  * apply (modified) lhash.patch from AUFS to export __lookup_hash()
+  * Add deny_write_access.patch from AUFS - export deny_write_access
+  * Add sec_perm-2.6.24.patch from AUFS - export security_inode_permission
+  * make sure TMPFS_MAGIC is defined in AUFS Makefile
+
+  [ Tim Gardner ]
+
+  * Enabled CONFIG_IPWIRELESS
+    - LP: #274748
+  * Enabled CONFIG_E1000E, disabled CONFIG_E1000E_NEW
+    This takes advantage of the upstream NVM protection fix in
+    commit 4a7703582836f55a1cbad0e2c1c6ebbee3f9b3a7.
+
+  [ Upstream Kernel Changes ]
+
+  * Revert "[Bluetooth] Eliminate checks for impossible conditions in IRQ
+    handler"
+  * [SCSI] qla2xxx: Defer enablement of RISC interrupts until ISP
+    initialization completes.
+  * PCI: Fix pcie_aspm=force
+  * PCI: fix compiler warnings in pci_get_subsys()
+  * UBIFS: create the name of the background thread in every case
+  * UBIFS: TNC / GC race fixes
+  * UBIFS: remove incorrect assert
+  * UBIFS: fix printk format warnings
+  * AMD IOMMU: set iommu sunc flag after command queuing
+  * AMD IOMMU: protect completion wait loop with iommu lock
+  * sparc64: Fix disappearing PCI devices on e3500.
+  * x86, oprofile: BUG scheduling while atomic
+  * ALSA: ASoC: Fix at32-pcm build breakage with PM enabled
+  * ath9k: connectivity is lost after Group rekeying is done
+  * wireless: zd1211rw: add device ID fix wifi dongle "trust nw-3100"
+  * [IA64] Ski simulator doesn't need check_sal_cache_flush
+  * [IA64] kexec fails on systems with blocks of uncached memory
+  * ath9k: Fix IRQ nobody cared issue with ath9k
+  * [Bluetooth] Fix I/O errors on MacBooks with Broadcom chips
+  * [Bluetooth] Fix wrong URB handling of btusb driver
+  * [Bluetooth] Fix USB disconnect handling of btusb driver
+  * sparc64: Fix missing devices due to PCI bridge test in
+    of_create_pci_dev().
+  * [WATCHDOG] ibmasr: remove unnecessary spin_unlock()
+  * [WATCHDOG] wdt285: fix sparse warnings
+  * [WATCHDOG] unlocked_ioctl changes
+  * x86: fix 27-rc crash on vsmp due to paravirt during module load
+  * sched: fix init_hrtick() section mismatch warning
+  * clockevents: prevent cpu online to interfere with nohz
+  * x86: prevent stale state of c1e_mask across CPU offline/online
+  * clockevents: prevent stale tick_next_period for onlining CPUs
+  * clockevents: check broadcast device not tick device
+  * clockevents: prevent mode mismatch on cpu online
+  * x86: prevent C-states hang on AMD C1E enabled machines
+  * x86: c1e_idle: don't mark TSC unstable if CPU has invariant TSC
+  * timers: fix build error in !oneshot case
+  * ALSA: ASoC: maintainers - update email address for Liam Girdwood
+  * ibmasr: remove unnecessary spin_unlock()
+  * smb.h: do not include linux/time.h in userspace
+  * kernel-doc: allow structs whose members are all private
+  * kexec: fix segmentation fault in kimage_add_entry
+  * Documentation/DMA-mapping.txt: update for pci_dma_mapping_error()
+    changes
+  * sys_paccept: disable paccept() until API design is resolved
+  * mm: tiny-shmem fix lock ordering: mmap_sem vs i_mutex
+  * Documentation/sysctl/kernel.txt: fix softlockup_thresh description
+  * memcg: check under limit at shrink_usage
+  * atmel_serial: update the powersave handler to match serial core
+  * [SCSI] Fix hang with split requests
+  * USB Storage: Sierra: Non-configurable TRU-Install
+  * USB Serial: Sierra: Device addition & version rev
+  * USB: ehci: fix some ehci hangs and crashes
+  * USB: Fix the Nokia 6300 storage-mode.
+  * USB: Correct Sierra Wireless USB EVDO Modem Device ID
+  * USB: fix hcd interrupt disabling
+  * USB: update of Documentation/usb/anchors.txt
+  * usb gadget: fix omap_udc DMA regression
+  * USB: Fixing Nokia 3310c in storage mode
+  * usb: musb: fix include path
+  * USB: fix EHCI periodic transfers
+  * usb-serial: Add Siemens EF81 to PL-2303 hack triggers
+  * USB: SERIAL CP2101 add device IDs
+  * USB: unusual_devs addition for RockChip MP3 player
+  * USB: fsl_usb2_udc: fix VDBG() format string
+  * usb serial: ti_usb_3410_5052 obviously broken by firmware changes
+  * USB: ftdi_sio: Add 0x5050/0x0900 USB IDs (Papouch Quido USB 4/4)
+  * USB: serial: add ZTE CDMA Tech id to option driver
+  * USB Serial: Sierra: Add MC8785 VID/PID
+  * USB: drivers/usb/musb/: disable it on SuperH
+  * usb: ftdi_sio: add support for Domintell devices
+  * usb: unusual devs patch for Nokia 5310 Music Xpress
+  * USB: revert recovery from transient errors
+  * [MIPS] au1000: Fix gpio direction
+  * [MIPS] Fixe the definition of PTRS_PER_PGD
+  * x86: prevent stale state of c1e_mask across CPU offline/online, fix
+  * x86: disable apm on the olpc
+  * i2c-powermac: Fix section for probe and remove functions
+  * i2c-dev: Return correct error code on class_create() failure
+  * i2c: Fix mailing lists in two MAINTAINERS entries
+  * ath9k: disable MIB interrupts to fix interrupt storm
+  * 9p: implement proper trans module refcounting and unregistration
+  * 9p-trans_fd: fix trans_fd::p9_conn_destroy()
+  * 9p-trans_fd: clean up p9_conn_create()
+  * 9p-trans_fd: don't do fs segment mangling in p9_fd_poll()
+  * 9p-trans_fd: fix and clean up module init/exit paths
+  * 9p: introduce missing kfree
+  * 9p: use an IS_ERR test rather than a NULL test
+  * 9p: fix put_data error handling
+  * netfilter: ip6t_{hbh,dst}: Rejects not-strict mode on rule insertion
+  * MN10300: Move asm-arm/cnt32_to_63.h to include/linux/
+  * MN10300: Make sched_clock() report time since boot
+  * ALSA: fix locking in snd_pcm_open*() and snd_rawmidi_open*()
+  * ALSA: remove unneeded power_mutex lock in snd_pcm_drop
+  * IPoIB: Fix crash when path record fails after path flush
+  * [XFS] Fix extent list corruption in xfs_iext_irec_compact_full().
+  * [XFS] Remove xfs_iext_irec_compact_full()
+  * kgdb: could not write to the last of valid memory with kgdb
+  * kgdb, x86, arm, mips, powerpc: ignore user space single stepping
+  * kgdb, x86_64: gdb serial has BX and DX reversed
+  * kgdb, x86_64: fix PS CS SS registers in gdb serial
+  * kgdboc,tty: Fix tty polling search to use name correctly
+  * ARM: Delete ARM's own cnt32_to_63.h
+  * m32r: remove the unused NOHIGHMEM option
+  * m32r: don't offer CONFIG_ISA
+  * m32r: export empty_zero_page
+  * m32r: export __ndelay
+  * m32r/kernel/: cleanups
+  * [MIPS] au1000: Make sure GPIO value is zero or one
+  * [MIPS] IP27: Switch to dynamic interrupt routing avoding panic on
+    error.
+  * [MIPS] BCM47xx: Fix build error due to missing PCI functions
+  * [SSB] Initialise dma_mask for SSB_BUSTYPE_SSB devices
+  * Swarm: Fix crash due to missing initialization
+  * ide-tape: fix vendor strings
+  * ide: note that IDE generic may prevent other drivers from attaching
+  * cdrom: update ioctl documentation
+  * [SCSI] qlogicpti: fix sg list traversal error in continuation entries
+  * sata_nv: reinstate nv_hardreset() for non generic controllers
+  * scsi: fix fall out of sg-chaining patch in qlogicpti
+  * ALSA: make the CS4270 driver a new-style I2C driver
+  * ALSA: ASoC: Fix another cs4270 error path
+  * Fix NULL pointer dereference in proc_sys_compare
+  * kconfig: fix silentoldconfig
+  * kconfig: readd lost change count
+  * mm owner: fix race between swapoff and exit
+  * Linux 2.6.27-rc8
+  * e1000e: write protect ICHx NVM to prevent malicious write/erase
+
+ -- Amit Kucheria <amit.kucheria@canonical.com>  Tue, 30 Sep 2008 18:22:35 +0300
+
+linux (2.6.27-4.7) intrepid; urgency=low
+
+  [ Ben Collins ]
+
+  * build/abi: Add gfs1 to perm blacklist
+  * build/abi: Ignored changes in gfs2 symbols
+
+  [ Fabio M. Di Nitto ]
+
+  * Revert "SAUCE: Export gfs2 symbols required for gfs1 kernel module"
+  * ubuntu: update GFS Cluster File System
+
+  [ Stefan Bader ]
+
+  * SAUCE: x86: Reserve FIRST_DEVICE_VECTOR in used_vectors bitmap.
+    - LP: #276334
+
+  [ Tim Gardner ]
+
+  * Revert "Disable e1000e until the NVRAM corruption problem is found."
+  * Add atl1e and atl2 to Debian installer bits
+    - LP: #273904
+  * SAUCE: e1000e: Map NV RAM dynamically only when needed.
+    - LP: #263555
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Fri, 26 Sep 2008 20:51:22 -0600
+
+linux (2.6.27-4.6) intrepid; urgency=low
+
+  [ Tim Gardner ]
+
+  * Disable e1000e until the NVRAM corruption problem is found.
+    - LP: #263555
+
+  [ Upstream Kernel Changes ]
+
+  * Revert "[Bluetooth] Eliminate checks for impossible conditions in IRQ
+    handler"
+
+ -- Ben Collins <ben.collins@canonical.com>  Tue, 23 Sep 2008 09:53:57 -0400
+
+linux (2.6.27-4.5) intrepid; urgency=low
+
+  [ Upstream Kernel Changes ]
+
+  * Revert "b43/b43legacy: add RFKILL_STATE_HARD_BLOCKED support"
+  * udf: Fix lock inversion between iprune_mutex and alloc_mutex (v2)
+  * udf: Fix error paths in udf_new_inode()
+  * [SCSI] sd: select CRC_T10DIF only when necessary
+  * [SCSI] zfcp: Fix request queue locking
+  * [SCSI] zfcp: Correctly query end flag in gpn_ft response
+  * [SCSI] zfcp: Simplify ccw notify handler
+  * [SCSI] zfcp: Fix reference counter for remote ports
+  * [SCSI] zfcp: channel cannot be detached due to refcount imbalance
+  * [SCSI] zfcp: Remove duplicated unlikely() macros.
+  * [SCSI] scsi_dh: make check_sense return ADD_TO_MLQUEUE
+  * [SCSI] make scsi_check_sense HARDWARE_ERROR return ADD_TO_MLQUEUE on
+    retry
+  * [SCSI] fix check of PQ and PDT bits for WLUNs
+  * pcm037: add rts/cts support for serial port
+  * i.MX serial: fix init failure
+  * imx serial: set RXD mux bit on i.MX27 and i.MX31
+  * imx serial: fix rts handling for non imx1 based hardware
+  * mlx4_core: Set RAE and init mtt_sz field in FRMR MPT entries
+  * udf: add llseek method
+  * PCI/iommu: blacklist DMAR on Intel G31/G33 chipsets
+  * PCI: Fix printk warnings in probe.c
+  * PCI: Fix printk warnings in setup-bus.c
+  * PCI Hotplug: fakephp: fix deadlock... again
+  * clockevents: remove WARN_ON which was used to gather information
+  * ocfs2: Fix a bug in direct IO read.
+  * arch/x86/kernel/kdebugfs.c: introduce missing kfree
+  * [IA64] fix compile failure with non modular builds
+  * [IA64] fix up bte.h
+  * [IA64] arch/ia64/sn/pci/tioca_provider.c: introduce missing kfree
+  * PCI: fix pciehp_free_irq()
+  * [IA64] prevent ia64 from invoking irq handlers on offline CPUs
+  * ide: Fix pointer arithmetic in hpt3xx driver code (3rd try)
+  * add deprecated ide-scsi to feature-removal-schedule.txt
+  * swiotlb: fix back-off path when memory allocation fails
+  * sparc64: Fix interrupt register calculations on Psycho and Sabre.
+  * VIDEO_SH_MOBILE_CEU should depend on HAS_DMA
+  * m68k: Update defconfigs for 2.6.27-rc6
+  * sparc32: Fix function signature of of_bus_sbus_get_flags().
+  * sched: fix 2.6.27-rc5 couldn't boot on tulsa machine randomly
+  * sched: fix deadlock in setting scheduler parameter to zero
+  * KVM: SVM: fix random segfaults with NPT enabled
+  * KVM: SVM: fix guest global tlb flushes with NPT
+  * KVM: VMX: Always return old for clear_flush_young() when using EPT
+  * clocksource, acpi_pm.c: fix check for monotonicity
+  * [ARM] OMAP: Fix MMC device data
+  * block: disable sysfs parts of the disk command filter
+  * ath9k: Assign seq# when mac80211 requests this
+  * sg: disable interrupts inside sg_copy_buffer
+  * MN10300: Change the fault handler to check in_atomic() not
+    in_interrupt()
+  * [Bluetooth] Fix regression from using default link policy
+  * netlink: fix overrun in attribute iteration
+  * x86: fix possible x86_64 and EFI regression
+  * sparc64: Fix PCI error interrupt registry on PSYCHO.
+  * sparc: Fix user_regset 'n' field values.
+  * niu: panic on reset
+  * PCI: re-add debug prints for unmodified BARs
+  * [ARM] 5245/1: Fix warning about unused return value in drivers/pcmcia
+  * [ARM] 5246/1: tosa: add proper clock alias for tc6393xb clock
+  * [ARM] 5247/1: tosa: SW_EAR_IN support
+  * [ARM] Fix PCI_DMA_BUS_IS_PHYS for ARM
+  * ata: duplicate variable sparse warning
+  * sata_inic162x: enable LED blinking
+  * [libata] LBA28/LBA48 off-by-one bug in ata.h
+  * proc: more debugging for "already registered" case
+  * include/linux/ioport.h: add missing macro argument for devm_release_*
+    family
+  * cpuset: avoid changing cpuset's cpus when -errno returned
+  * cpuset: hotplug documentation fix
+  * coredump_filter: add description of bit 4
+  * bfs: fix Lockdep warning
+  * mm: ifdef Quicklists in /proc/meminfo
+  * spi_mpc83xx: fix clockrate calculation for low speed
+  * spi_mpc83xx: reject invalid transfer sizes
+  * pxa2xx_spi: chipselect bugfixes
+  * pxa2xx_spi: dma bugfixes
+  * mm: mark the correct zone as full when scanning zonelists
+  * Documentation/ABI: /sys/class/gpio
+  * MAINTAINERS: fix USB VIDEO CLASS mail list address
+  * ia64: fix panic during `modprobe -r xpc'
+  * atmel_lcdfb: disable LCD and DMA engines when suspending
+  * spi_s3c24xx: fix section warning
+  * rescan_partitions(): make device capacity errors non-fatal
+  * memstick: fix MSProHG 8-bit interface mode support
+  * Add Uwe Kleine-König to .mailmap
+  * xen: fix for xen guest with mem > 3.7G
+  * x86/paravirt: Remove duplicate paravirt_pagetable_setup_{start, done}()
+  * crypto: talitos - Avoid consecutive packets going out with same IV
+  * slub: fixed uninitialized counter in struct kmem_cache_node
+  * udp: Fix rcv socket locking
+  * IB/mlx4: Fix up fast register page list format
+  * [MIPS] VR41xx: unsigned irq cannot be negative
+  * x86: completely disable NOPL on 32 bits
+  * [S390] cio: Fix driver_data handling for ccwgroup devices.
+  * [S390] cio: fix orb initialization in cio_start_key
+  * sparc64: Fix OOPS in psycho_pcierr_intr_other().
+  * sparc64: Fix SMP bootup with CONFIG_STACK_DEBUG or ftrace.
+  * RDMA/nes: Fix client side QP destroy
+  * IPoIB: Fix deadlock on RTNL between bcast join comp and ipoib_stop()
+  * clockevents: make device shutdown robust
+  * powerpc: Fix interrupt values for DMA2 in MPC8610 HPCD device tree
+  * hpplus: fix build regression
+  * Fix PNP build failure, bugzilla #11276
+  * warn: Turn the netdev timeout WARN_ON() into a WARN()
+  * [XFS] Move memory allocations for log tracing out of the critical path
+  * [XFS] Fix regression introduced by remount fixup
+  * [XFS] Prevent direct I/O from mapping extents beyond eof
+  * [XFS] Fix barrier status change detection.
+  * [XFS] Prevent lockdep false positives when locking two inodes.
+  * [XFS] Fix use-after-free with buffers
+  * [XFS] Don't do I/O beyond eof when unreserving space
+  * powerpc: Holly board needs dtbImage target
+  * Fix compile failure with non modular builds
+  * [ARM] 5249/1: davinci: remove redundant check in davinci_psc_config()
+  * [ARM] omap: back out 'internal_clock' support
+  * sctp: set the skb->ip_summed correctly when sending over loopback.
+  * [ARM] 5255/1: Update jornada ssp to remove build errors/warnings
+  * sctp: do not enable peer features if we can't do them.
+  * sctp: Fix oops when INIT-ACK indicates that peer doesn't support AUTH
+  * bnx2: Promote vector field in bnx2_irq structure from u16 to unsigned
+    int
+  * forcedeth: call restore mac addr in nv_shutdown path
+  * e1000: prevent corruption of EEPROM/NVM
+  * e100: Use pci_pme_active to clear PME_Status and disable PME#
+  * md: Don't wait UNINTERRUPTIBLE for other resync to finish
+  * atstk1000: fix build breakage with BOARD_ATSTK100X_SW2_CUSTOM=y
+  * avr32: add .gitignore files
+  * avr32: add generic_find_next_le_bit bit function
+  * avr32: fix sys_sync_file_range() call convention
+  * avr32: nmi_enter() without nmi_exit()
+  * KVM: ia64: 'struct fdesc' build fix
+  * hwmon: (atxp1) Fix device detection logic
+  * hwmon: (it87) Fix fan tachometer reading in IT8712F rev 0x7 (I)
+  * hwmon: (ad7414) Make ad7414_update_device() static
+  * tmio_mmc: fix compilation with debug enabled
+  * atmel-mci: debugfs: enable clock before dumping regs
+  * atmel-mci: Fix memory leak in atmci_regs_show
+  * atmel-mci: Fix bogus debugfs file size
+  * atmel-mci: Set MMC_CAP_NEEDS_POLL if no detect_pin
+  * mmc_block: handle error from mmc_register_driver()
+  * mmc_test: initialize mmc_test_lock statically
+  * [MIPS] Fix 64-bit IP checksum code
+  * [MIPS] SMTC: Clear TIF_FPUBOUND on clone / fork.
+  * [MIPS] Fix potential latency problem due to non-atomic cpu_wait.
+  * [MIPS] vmlinux.lds.S: handle .text.*
+  * MAINTAINERS: Trivial whitespace cleanups
+  * MAINTAINERS: Various fixes
+  * Linux 2.6.27-rc7
+
+ --  Tim Gardner <tim.gardner@canonical.com>  Sun, 21 Sep 2008 21:49:28 -0600
+
+linux (2.6.27-3.4) intrepid; urgency=low
+
+  [ Colin Ian King ]
+
+  * SAUCE: fix kernel oops in VirtualBox during paravirt patching
+    - LP: #246067
+  * SAUCE: qc-usb: Enable Logitech QuickCam Messenger
+    - LP: #209901
+  * SAUCE: appleir: Enable driver for new MacBook Pro
+    - LP: #157919
+
+  [ Tim Gardner ]
+
+  * Enabled CONFIG_DEBUG_RODATA=y
+
+  [ Upstream Kernel Changes ]
+
+  * Revert "ALSA: hda - Added model selection for iMac 24""
+  * Revert "x86: fix HPET regression in 2.6.26 versus 2.6.25, check hpet
+    against BAR, v3"
+  * Revert "[ARM] use the new byteorder headers"
+  * Revert "mac80211: Use IWEVASSOCREQIE instead of IWEVCUSTOM"
+  * Revert "crypto: camellia - Use kernel-provided bitops, unaligned access
+    helpers"
+  * svcrdma: Fix race between svc_rdma_recvfrom thread and the dto_tasklet
+  * sched, cpuset: rework sched domains and CPU hotplug handling (v4)
+  * ACPI: Fix now signed module parameter.
+  * ACPI: Change package length error to warning
+  * ACPI: Fix now signed module parameter.
+  * ACPI: Fix typo in "Disable MWAIT via DMI on broken Compal board"
+  * acpi: add checking for NULL early param
+  * UBIFS: fix zero-length truncations
+  * Input: bcm5974 - add maintainer entry
+  * sh64: re-add the __strnlen_user() prototype
+  * sh: fix ptrace_64.c:user_disable_single_step()
+  * PNPACPI: ignore the producer/consumer bit for extended IRQ descriptors
+  * UBIFS: always read hashed-key nodes under TNC mutex
+  * UBIFS: allow for racing between GC and TNC
+  * [CIFS] Fix plaintext authentication
+  * sparc32: Implement smp_call_function_single().
+  * sh: crash kernel resource fix
+  * sh: fix kexec entry point for crash kernels
+  * sh: fix platform_resource_setup_memory() section mismatch
+  * sh: update Migo-R defconfig
+  * sh: update AP325RXA defconfig
+  * sh: fix semtimedop syscall
+  * cifs: fix O_APPEND on directio mounts
+  * [CIFS] update cifs change log
+  * [CIFS] Turn off Unicode during session establishment for plaintext
+    authentication
+  * ACPI: thinkpad-acpi: wan radio control is not experimental
+  * sparc: Fix resource flags for PCI children in OF device tree.
+  * remove blk_register_filter and blk_unregister_filter in gendisk
+  * ALSA: oxygen: fix distorted output on AK4396-based cards
+  * ipv6: When we droped a packet, we should return NET_RX_DROP instead of
+    0
+  * pkt_sched: Fix locking of qdisc_root with qdisc_root_sleeping_lock()
+  * net: Unbreak userspace usage of linux/mroute.h
+  * Don't trigger softlockup detector on network fs blocked tasks
+  * Resource handling: add 'insert_resource_expand_to_fit()' function
+  * sparc64: setup_valid_addr_bitmap_from_pavail() should be __init
+  * UBIFS: do not update min_idx_lebs in stafs
+  * UBIFS: push empty flash hack down
+  * UBIFS: remove incorrect index space check
+  * UBIFS: improve statfs reporting
+  * UBIFS: fix assertion
+  * UBIFS: add forgotten gc_idx_lebs component
+  * UBIFS: introduce LEB overhead
+  * UBIFS: improve statfs reporting even more
+  * UBIFS: fill f_fsid
+  * drm/radeon: downgrade debug message from info to debug.
+  * Remove invalidate_partition call from do_md_stop.
+  * Fix problem with waiting while holding rcu read lock in md/bitmap.c
+  * ALSA: hda: Distortion fix for dell_m6_core_init
+  * ALSA: ASoC: fix pxa2xx-i2s clk_get call
+  * block: restore original behavior of /proc/partition when there's no
+    partition
+  * debugobjects: fix lockdep warning
+  * avr32: Fix lockup after Java stack underflow in user mode
+  * avr32: pm_standby low-power ram bug fix
+  * nfsd: fix compound state allocation error handling
+  * sunrpc: fix possible overrun on read of /proc/sys/sunrpc/transports
+  * nfsd: fix buffer overrun decoding NFSv4 acl
+  * audit: Moved variable declaration to beginning of function
+  * Fix modules_install on RO nfs-exported trees.
+  * Remove '#include <stddef.h>' from mm/page_isolation.c
+  * dabusb_fpga_download(): fix a memory leak
+  * [MTD] mtdchar.c: Fix regression in MEMGETREGIONINFO ioctl()
+  * ALSA: hda - Fix ALC663 auto-probe
+  * ALSA: hda - Add mic-boost controls to ALC662/663 auto configuration
+  * Un-break printk strings in x86 PCI probing code
+  * kernel/resource.c: fix new kernel-doc warning
+  * softlockup: minor cleanup, don't check task->state twice
+  * fix typo in arch/parisc/hpux/fs.c
+  * m68k: atari_keyb_init operator precedence fix
+  * ACPI: Fix typo in "Disable MWAIT via DMI on broken Compal board"
+  * don't diff generated firmware files
+  * IDE: compile fix for sff_dma_ops
+  * IDE: palm_bk3710: fix compile warning for unused variable
+  * ide: fix hwif_to_node()
+  * palm_bk3710: improve IDE registration
+  * ide-disk: remove stale init_idedisk_capacity() documentation
+  * ide/Kconfig: mark ide-scsi as deprecated
+  * net/wireless/Kconfig: clarify the description for
+    CONFIG_WIRELESS_EXT_SYSFS
+  * iwlwifi: do not use GFP_DMA in iwl_tx_queue_init
+  * iwlwifi: workaround interrupt handling no some platforms
+  * iwlwifi: fix apm_stop (wrong bit polarity for FLAG_INIT_DONE)
+  * iwlwifi: fix 64bit platform firmware loading
+  * orinoco: Multicast to the specified addresses
+  * wireless/libertas/if_cs.c: fix memory leaks
+  * mac80211: Fix debugfs union misuse and pointer corruption
+  * rt2x00: Compiler warning unmasked by fix of BUILD_BUG_ON
+  * ath9k: Incorrect key used when group and pairwise ciphers are
+    different.
+  * ath9: Fix ath_rx_flush_tid() for IRQs disabled kernel warning message.
+  * net/xfrm: Use an IS_ERR test rather than a NULL test
+  * ipv: Re-enable IP when MTU > 68
+  * NTFS: update homepage
+  * mm: make setup_zone_migrate_reserve() aware of overlapping nodes
+  * VFS: fix dio write returning EIO when try_to_release_page fails
+  * acer-wmi: remove debugfs entries upon unloading
+  * mm/bootmem: silence section mismatch warning -
+    contig_page_data/bootmem_node_data
+  * MAINTAINERS: add a maintainer for the BCM5974 multitouch driver
+  * 8250: improve workaround for UARTs that don't re-assert THRE correctly
+  * mmc: at91_mci: don't use coherent dma buffers
+  * pid_ns: zap_pid_ns_processes: fix the ->child_reaper changing
+  * pid_ns: (BUG 11391) change ->child_reaper when init->group_leader exits
+  * cirrusfb: check_par fixes
+  * devcgroup: fix race against rmdir()
+  * mm: show quicklist usage in /proc/meminfo
+  * mm: size of quicklists shouldn't be proportional to the number of CPUs
+  * ipc: document the new auto_msgmni proc file
+  * hp-wmi: update to match current rfkill semantics
+  * hp-wmi: add proper hotkey support
+  * tdfxfb: fix SDRAM memory size detection
+  * tdfxfb: fix frame buffer name overrun
+  * rtc_time_to_tm: fix signed/unsigned arithmetic
+  * ibft: fix target info parsing in ibft module
+  * sysfs: document files in /sys/firmware/sgi_uv/
+  * rtc-cmos: wake again from S5
+  * pm_qos_requirement might sleep
+  * drivers/char/random.c: fix a race which can lead to a bogus BUG()
+  * ipsec: Fix deadlock in xfrm_state management.
+  * [x86] Fix TSC calibration issues
+  * tipc: Don't use structure names which easily globally conflict.
+  * sparc64: Fix IPI call locking.
+  * [ARM] omap: fix gpio.c build error
+  * sparc64: Prevent sparc64 from invoking irq handlers on offline CPUs
+  * powerpc: Fix uninitialised variable in VSX alignment code
+  * powerpc: Only make kernel text pages of linear mapping executable
+  * powerpc: Make sure _etext is after all kernel text
+  * powerpc: Work around gcc's -fno-omit-frame-pointer bug
+  * powerpc: Fix build error with 64K pages and !hugetlbfs
+  * powerpc: Fix for getting CPU number in power_save_ppc32_restore()
+  * UBIFS: amend f_fsid
+  * net/usb/pegasus: avoid hundreds of diagnostics
+  * ixgbe: initialize interrupt throttle rate
+  * pcnet-cs, axnet_cs: add new IDs, remove dup ID with less info
+  * netxen: Remove workaround for chipset quirk
+  * Split up PIT part of TSC calibration from native_calibrate_tsc
+  * iwlwifi: W/A for the TSF correction in IBSS
+  * iwlwifi: fix hidden ssid discovery in passive channels
+  * iwlwifi: remove false rxon if rx chain changes
+  * iwlwifi: fix station mimo power save values
+  * iwlwifi: fix rx_chain computation
+  * iwlwifi: fix Tx cmd memory allocation failure handling
+  * iwlwifi: call apm stop on exit
+  * iwlwifi: fix STATUS_EXIT_PENDING is not set on pci_remove
+  * ath9k: Fix TX status reporting
+  * ath9k: Fix TX control flag use for no ACK and RTS/CTS
+  * V4L/DVB (8555): au8522: add mechanism to configure IF frequency for vsb
+    and qam
+  * V4L/DVB (8556): au0828: add support for Hauppauge Woodbury
+  * V4L/DVB (8598): au8522: clean up function au8522_set_if
+  * V4L/DVB (8599): au8522: remove if frequency settings from vsb/qam
+    modulation tables
+  * V4L/DVB (8600): au0828: explicitly set 6 MHz IF frequency in
+    hauppauge_hvr950q_config
+  * V4L/DVB (8629): v4l2-ioctl: do not try to handle private V4L1 ioctls
+  * V4L/DVB (8633): ivtv: update ivtv version number
+  * V4L/DVB (8648): ivtv: improve CC support
+  * V4L/DVB (8660): gspca: Simplify the scan of URB packets in pac7311.
+  * V4L/DVB (8661): gspca: Bug in the previous changeset about pac7311.
+  * V4L/DVB (8663): gspca: Webcam 0c45:6128 added in sonixj.
+  * V4L/DVB (8664): gspca: The bridge/sensor of the webcam 093a:2621 is a
+    PAC 7302.
+  * V4L/DVB (8665): gspca: Fix the 640x480 resolution of the webcam
+    093a:2621.
+  * V4L/DVB (8666): gspca: Bad scanning of frames in pac7311.
+  * V4L/DVB (8667): gspca: Bad probe of Z-Star/Vimicro webcams with pas106
+    sensor.
+  * V4L/DVB (8668): gspca: Conflict GSPCA / ET61X251 for the webcam
+    102c:6251.
+  * V4L/DVB (8669): gspca: Add white balance control for spca561 rev 012A.
+  * V4L/DVB (8671): gspca: Remove the unused field 'dev_name' of the device
+    structure.
+  * V4L/DVB (8672): gspca: Big rewrite of spca561.
+  * V4L/DVB (8673): gspca: Bad frame scanning again and bad init in
+    pac7311.
+  * V4L/DVB (8674): gspca: Webcam 0c45:612e added in sonixj.
+  * V4L/DVB (8675): gspca: Pixmap PJPG (Pixart 73xx JPEG) added, generated
+    by pac7311.
+  * V4L/DVB (8678): Remove the dead CONFIG_RADIO_MIROPCM20{,_RDS} code
+  * V4L/DVB (8681): v4l2-ioctl.c: fix warning
+  * V4L/DVB (8682): V4L: fix return value of register video func
+  * V4L/DVB (8701): cx18: Add missing lock for when the irq handler
+    manipulates the queues
+  * V4L/DVB (8703): gspca: Do controls work for spca561 revision 12a.
+  * V4L/DVB (8705): gspca: Adjust some control limits in spca561.
+  * V4L/DVB (8706): Make contrast and brightness work for pac7302.
+  * V4L/DVB (8707): gspca: Colors, hflip and vflip controls added for
+    pac7302.
+  * V4L/DVB (8709): gspca: Fix initialization and controls of sn9x110 -
+    ov7630.
+  * V4L/DVB (8710): gspca: Bad color control in sonixj.
+  * V4L/DVB (8711): gspca: Bad controls and quantization table of pac7311.
+  * V4L/DVB (8712): gspca: Bad start of sonixj webcams since changeset
+    a8779025e7e8.
+  * V4L/DVB (8713): gspca: Bad color control again in sonixj.
+  * V4L/DVB (8714): gspca: Bad start of sn9c110 and sensor om6802.
+  * V4L/DVB (8715): gspca: Change the name of some webcam in the gspca doc.
+  * V4L/DVB (8716): gspca: Bad start of sn9c110 and sensor ov7630.
+  * V4L/DVB (8717): gspca: Frame buffer too small for small resolutions
+    (sonixj and t613).
+  * V4L/DVB (8718): gspca: suspend/resume added.
+  * V4L/DVB (8719): gspca: Have VIDIOC_QUERYCTRL more compliant to the
+    spec.
+  * V4L/DVB (8720): gspca: V4L2_CAP_SENSOR_UPSIDE_DOWN added as a cap for
+    some webcams.
+  * V4L/DVB (8722): sms1xxx: fix typo in license header
+  * V4L/DVB (8726): link tuner before saa7134
+  * V4L/DVB (8727): V4L1: make PMS not autoprobe when builtin.
+  * V4L/DVB (8728): 1-make-pms-not-autoprobe-when-builtin update
+  * V4L/DVB (8749): Fix error code, when camera is not turned on by sonypi
+  * V4L/DVB (8750): V4L: check inval in video_register_device_index()
+  * V4L/DVB (8751): vivi: Fix some issues at vivi register routine
+  * V4L/DVB (8757): v4l-dvb: fix a bunch of sparse warnings
+  * V4L/DVB (8769): cx18: Simplify queue flush logic to prevent oops in
+    cx18_flush_queues()
+  * V4L/DVB (8778): radio: fix incorrect video_register_device result check
+  * V4L/DVB (8779): v4l: fix more incorrect video_register_device result
+    checks
+  * V4L/DVB (8790): saa7115: call i2c_set_clientdata only when state !=
+    NULL
+  * V4L/DVB (8803): s5h1409: Enable QAM_AUTO mode
+  * V4L/DVB (8804): s5h1411: Enable QAM_AUTO mode
+  * V4L/DVB (8805): Steven Toth email address change
+  * V4L/DVB (8809): gspca: Revert commit
+    9a9335776548d01525141c6e8f0c12e86bbde982
+  * V4L/DVB (8810): gspca: Compile error when CONFIG_PM not defined.
+  * V4L/DVB (8812): gspca: Do pac73xx webcams work.
+  * V4L/DVB (8813): gspca: Adjust SOF detection for pac73xx.
+  * V4L/DVB (8814): gspca: Set DISABLED the disabled controls at query
+    control time.
+  * V4L/DVB (8815): gspca: Fix problems with disabled controls.
+  * V4L/DVB (8816): gspca: Set disabled ctrls and fix a register pb with
+    ovxxxx in sonixb.
+  * V4L/DVB (8817): gspca: LED and proble changes in sonixb.
+  * V4L/DVB (8818): gspca: Reinitialize the device on resume.
+  * V4L/DVB (8819): gspca: Initialize the ov519 at open time and source
+    cleanup.
+  * V4L/DVB (8820): gspca: Change initialization and gamma of zc3xx -
+    pas106.
+  * V4L/DVB (8822): gspca: Change some subdriver functions for
+    suspend/resume.
+  * V4L/DVB (8823): gspca: H and V flips work for ov7670 only in ov519.
+  * V4L/DVB (8824): gspca: Too much code removed in the suspend/resume
+    changeset.
+  * V4L/DVB (8825): gspca: More controls for pac73xx and new webcam
+    093a:2624.
+  * V4L/DVB (8826): gspca: Webcam Labtec 2200 (093a:2626) added in pac7311.
+  * V4L/DVB (8827): gspca: Stop pac7302 autogain oscillation.
+  * V4L/DVB (8828): gspca: Set the clock at the end of initialization in
+    sonixj.
+  * V4L/DVB (8829): gspca: Have a clean kmalloc-ated buffer for USB
+    exchanges.
+  * V4L/DVB (8830): gspca: Move some probe code to the new init function.
+  * V4L/DVB (8831): gspca: Resolve webcam conflicts between some drivers.
+  * V4L/DVB (8832): gspca: Bad pixelformat of vc0321 webcams.
+  * V4L/DVB (8833): gspca: Cleanup the sonixb code.
+  * V4L/DVB (8834): gspca: Have a bigger buffer for sn9c10x compressed
+    images.
+  * V4L/DVB (8835): gspca: Same pixfmt as the sn9c102 driver and raw Bayer
+    added in sonixb.
+  * V4L/DVB (8837): dvb: fix I2C adapters name size
+  * V4L/DVB (8839): dib0700: add comment to identify 35th USB id pair
+  * V4L/DVB (8840): dib0700: add basic support for Hauppauge Nova-TD-500
+    (84xxx)
+  * V4L/DVB (8842): vivi_release(): fix use-after-free
+  * V4L/DVB (8843): tda10048_firmware_upload(): fix a memory leak
+  * V4L/DVB (8844): dabusb_fpga_download(): fix a memory leak
+  * bnx2x: Accessing un-mapped page
+  * SELinux: memory leak in security_context_to_sid_core
+  * x86: add io delay quirk for Presario F700
+  * mmap: fix petty bug in anonymous shared mmap offset handling
+  * x86: Change warning message in TSC calibration.
+  * PCI: fix pbus_size_mem() resource alignment for CardBus controllers
+  * [ARM] omap: fix build error in ohci-omap.c
+  * [ARM] remove unused #include <version.h>
+  * ACPI: Make Len Brown the ACPI maintainer again
+  * fujitsu-laptop: fix regression for P8010 in 2.6.27-rc
+  * ACPI: Avoid bogus timeout about SMbus check
+  * acer-wmi: remove debugfs entries upon unloading
+  * forgotten refcount on sysctl root table
+  * V4L/DVB (8868): gspca: Support for vga modes with sif sensors in
+    sonixb.
+  * V4L/DVB (8869): gspca: Move the Sonix webcams with TAS5110C1B from
+    sn9c102 to gspca.
+  * V4L/DVB (8870): gspca: Fix dark room problem with sonixb.
+  * V4L/DVB (8872): gspca: Bad image format and offset with rev072a of
+    spca561.
+  * V4L/DVB (8873): gspca: Bad image offset with rev012a of spca561 and
+    adjust exposure.
+  * V4L/DVB (8874): gspca: Adjust hstart for sn9c103/ov7630 and update
+    usb-id's.
+  * [ARM] omap: fix virtual vs physical address space confusions
+  * V4L/DVB (8876): budget: udelay changed to mdelay
+  * V4L/DVB (8877): b2c2 and bt8xx: udelay to mdelay
+  * V4L/DVB (8880): PATCH: Fix parents on some webcam drivers
+  * V4L/DVB (8881): gspca: After 'while (retry--) {...}', retry will be -1
+    but not 0.
+  * powerpc/spufs: Fix multiple get_spu_context()
+  * powerpc/spufs: Fix race for a free SPU
+  * Input: bcm5974 - small formatting cleanup
+  * Input: bcm5974 - improve finger tracking and counting
+  * Input: bcm5974 - add BTN_TOUCH event for mousedev benefit
+  * Input: i8042 - make Lenovo 3000 N100 blacklist entry more specific
+  * sh: resume_kernel fix for kernel oops built with CONFIG_BKL_PREEMPT=y.
+  * sh64: resume_kernel fix for kernel oops built with
+    CONFIG_BKL_PREEMPT=y.
+  * i2c: fix i2c-sh_mobile timing issues
+  * clockevents: prevent clockevent event_handler ending up handler_noop
+  * clockevents: prevent endless loop in periodic broadcast handler
+  * clockevents: enforce reprogram in oneshot setup
+  * clockevents: prevent multiple init/shutdown
+  * clockevents: prevent endless loop lockup
+  * HPET: make minimum reprogramming delta useful
+  * [MTD] [NAND] tmio_nand: fix base address programming
+  * Fix conditional export of kvh.h and a.out.h to userspace.
+  * async_tx: fix the bug in async_tx_run_dependencies
+  * sched_clock: fix NOHZ interaction
+  * sched: fix process time monotonicity
+  * UBIFS: fix division by zero
+  * UBIFS: make minimum fanout 3
+  * [MIPS] Fix data bus error recovery
+  * [MIPS] Fix WARNING: at kernel/smp.c:290
+  * [MIPS] TXx9: Fix txx9_pcode initialization
+  * [MIPS] TX39xx: Add missing local_flush_icache_range initialization
+  * [MIPS] Probe initrd header only if explicitly specified
+  * res_counter: fix off-by-one bug in setting limit
+  * forcedeth: fix kexec regression
+  * atmel_lcdfb: fix oops in rmmod when framebuffer fails to register
+  * tracehook: comment pasto fixes
+  * drivers/mmc/card/block.c: fix refcount leak in mmc_block_open()
+  * x86: boot: stub out unimplemented CPU feature words
+  * x86: add NOPL as a synthetic CPU feature bit
+  * x86: use X86_FEATURE_NOPL in alternatives
+  * clockevents: broadcast fixup possible waiters
+  * x86: HPET fix moronic 32/64bit thinko
+  * x86: HPET: read back compare register before reading counter
+  * Fix CONFIG_AC97_BUS dependency
+  * [ARM] 5241/1: provide ioremap_wc()
+  * ntp: fix calculation of the next jiffie to trigger RTC sync
+  * clocksource, acpi_pm.c: use proper read function also in errata mode
+  * clocksource, acpi_pm.c: check for monotonicity
+  * x86: delay early cpu initialization until cpuid is done
+  * x86: move mtrr cpu cap setting early in early_init_xxxx
+  * sched: arch_reinit_sched_domains() must destroy domains to force
+    rebuild
+  * x86, xen: Use native_pte_flags instead of native_pte_val for .pte_flags
+  * x86: pda_init(): fix memory leak when using CPU hotplug
+  * x86: cpu_init(): fix memory leak when using CPU hotplug
+  * powerpc/spufs: Fix possible scheduling of a context to multiple SPEs
+  * netfilter: nf_conntrack_sip: de-static helper pointers
+  * netfilter: nf_conntrack_gre: more locking around keymap list
+  * netfilter: nf_conntrack_gre: nf_ct_gre_keymap_flush() fixlet
+  * netfilter: nf_conntrack_irc: make sure string is terminated before
+    calling simple_strtoul
+  * pkt_sched: Fix qdisc state in net_tx_action()
+  * powerpc: Fix rare boot build breakage
+  * ahci, pata_marvell: play nicely together
+  * sata_mv: add RocketRaid 1720 PCI ID to driver
+  * ahci: disable PMP for marvell ahcis
+  * sata_nv: disable hardreset for generic
+  * libata-sff: kill spurious WARN_ON() in ata_hsm_move()
+  * pata_sil680: remove duplicate pcim_enable_device
+  * ahci: RAID mode SATA patch for Intel Ibex Peak DeviceIDs
+  * [MIPS] IP22: Fix detection of second HPC3 on Challenge S
+  * xen: fix 2.6.27-rc5 xen balloon driver warnings
+  * x86: disable static NOPLs on 32 bits
+  * netns : fix kernel panic in timewait socket destruction
+  * bridge: don't allow setting hello time to zero
+  * NFS: Restore missing hunk in NFS mount option parser
+  * usb: fix null deferences in low level usb serial
+  * Fix format of MAINTAINERS
+  * sparc64: Disable timer interrupts in fixup_irqs().
+  * [Bluetooth] Fix reference counting during ACL config stage
+  * [Bluetooth] Enforce correct authentication requirements
+  * [Bluetooth] Reject L2CAP connections on an insecure ACL link
+  * [S390] CVE-2008-1514: prevent ptrace padding area read/write in 31-bit
+    mode
+  * [S390] cio: Correct cleanup on error.
+  * [S390] cio: handle ssch() return codes correctly.
+  * [S390] cio: allow offline processing for disconnected devices
+  * ipsec: Restore larval states and socket policies in dump
+  * update Documentation/filesystems/Locking for 2.6.27 changes
+  * MAINTAINERS: add Atheros maintainer for atlx
+  * lib: Correct printk %pF to work on all architectures
+  * x86: fix memmap=exactmap boot argument
+  * clockevents: remove WARN_ON which was used to gather information
+  * ipv6: Fix OOPS in ip6_dst_lookup_tail().
+  * Linux 2.6.27-rc6
+
+ -- Ben Collins <ben.collins@canonical.com>  Tue, 02 Sep 2008 12:45:56 -0400
+
+linux (2.6.27-2.3) intrepid; urgency=low
+
+  [ Ben Collins ]
+
+  * build/retag: Make script save .orig of tags for later use
+  * ubuntu/lirc: Fix device_create call
+  * build/firmware: Put in-kernel firmware into version specific subdir
+    - LP: #262115
+  * Rebase on linux-2.6 git.
+  * ABI bump
+
+  [ Herton Ronaldo Krzesinski ]
+
+  * SAUCE: (no-up) Apparmor warning fixes
+
+  [ John Johansen ]
+
+  * SAUCE: (no-up) Proper AppArmor ptrace updates for newer lsm API
+
+  [ Mackenzie Morgan ]
+
+  * SAUCE: Add quirk for ASUS Z37E to make sound audible after resume
+    - LP: #25896
+
+ -- Ben Collins <ben.collins@canonical.com>  Wed, 27 Aug 2008 14:03:05 -0400
+
+linux (2.6.27-1.2) intrepid; urgency=low
+
+  [ Amit Kucheria ]
+
+  * SAUCE: make fc transport removal of target configurable
+  * SAUCE: pm: Config option to disable handling of console during
+    suspend/resume
+
+  [ Ben Collins ]
+
+  * SAUCE: Lower warning level of some PCI messages
+  * SAUCE: input/mouse/alps: Do not call psmouse_reset() for alps
+  * SAUCE: tulip: Let dmfe handle davicom on non-sparc
+  * SAUCE: tulip: Define ULI PCI ID's
+  * SAUCE: (no-up) version: Implement version_signature proc file.
+  * SAUCE: (no-up) connector.h: Add idx/val for drbd
+  * SAUCE: (no-up) swap: Add notify_swap_entry_free callback for compcache
+  * SAUCE: drivers: Remove some duplicate device entries in various modules
+  * SAUCE: (no-up) [AppArmor] merge with upstream subversion r1291
+  * SAUCE: apparmor: Update for changes to ptrace lsm hooks
+  * SAUCE: (no-up) Enable ubuntu extra subdirectory
+  * SAUCE: applesmc: Add MacBookAir
+  * SAUCE: (no-up) ACPI: initramfs DSDT override support
+  * ubuntu: Add drbd module
+  * ubuntu: Add iscsitarget module
+  * ubuntu: Add BOM for iscsitarget
+  * ubuntu: Add squashfs driver
+  * SAUCE: (no-up) Check for squashfs superblock in initramfs mounting.
+  * ubuntu: Add aufs module
+  * ubuntu: Added atl2 driver
+  * ubuntu: Added et131x driver
+  * ubuntu: Add dm-raid4-5 driver
+  * ubuntu: Add ndiswrapper driver
+  * ubuntu: Added ram backed compressed swap module (compcache)
+  * ubuntu: Add misc drivers from hardy lum
+  * ubuntu: Add heci driver 3.2.0.24
+  * ubuntu: Add ov511 and bt-sco drivers
+  * ubuntu: Add acx, prism2_usb wireless drivers
+  * ubuntu: Add at76 driver to build
+  * ubuntu: Add fsam7400 sw kill switch driver
+  * ubuntu: Added qc-usb driver
+  * ubuntu: e1000e: Upgraded module to 0.4.1.7
+  * ubuntu: Added rfkill drivers
+  * ubuntu: VIA - Add VIA DRM Chrome9 3D engine
+  * ubuntu: unionfs: Added v1.4 module from hardy
+  * ubuntu: Add LIRC driver
+  * ubuntu: Add GFS driver
+  * ubuntu: New tlsup driver for toshiba laptops
+  * Update config files
+  * build/d-i: Remove obsolete dm modules
+
+  [ Chuck Short ]
+
+  * SAUCE: ata: blacklist FUJITSU MHW2160BH PL
+
+  [ Colin Ian King ]
+
+  * ubuntu: Add dm-loop
+  * SAUCE: Enable speedstep for sonoma processors.
+
+  [ Dennis Noordsij ]
+
+  * SAUCE: Work around ACPI corruption upon suspend on some Dell machines.
+
+  [ Fabio M. Di Nitto ]
+
+  * SAUCE: Export gfs2 symbols required for gfs1 kernel module
+
+  [ Matthew Garrett ]
+
+  * SAUCE: hostap: send events on data interface as well as master
+    interface
+
+  [ Michael Frey (Senior Manager, MID ]
+
+  * SAUCE: Send HCI_RESET for Broadcomm 2046
+
+  [ Phillip Lougher ]
+
+  * SAUCE: r8169: disable TSO by default for RTL8111/8168B chipsets.
+
+  [ Stefan Bader ]
+
+  * SAUCE: (no-up) Export dm_disk function of device-mapper
+  * SAUCE: Restore VT fonts on switch
+  * SAUCE: mmc: Increase power_up deleay to fix TI readers
+
+  [ Tim Gardner ]
+
+  * SAUCE: Add extra headers to linux-libc-dev
+  * SAUCE: Catch nonsense keycodes and silently ignore
+  * SAUCE: Added support for HDAPS on various ThinkPads from Lenovo and IBM
+  * SAUCE: Guest OS does not recognize a lun with non zero target id on
+    Vmware ESX Server
+  * SAUCE: (no-up) Take care of orinoco_cs overlap with hostap_cs
+  * ubuntu: Add GNBD driver
+
+ -- Ben Collins <ben.collins@canonical.com>  Sat, 23 Aug 2008 15:48:35 -0400
+
+linux (2.6.27-0.0) intrepid; urgency=low
+
+  * Not uploaded, placeholder for new release
+
+ -- Ben Collins <ben.collins@canonical.com>  Sat, 23 Aug 2008 15:48:35 -0400
+
+linux (2.6.26-5.17) intrepid; urgency=low
+
+  [ Ben Collins ]
+
+  * build/abi: Add tosh_smm symbol to blacklist
+
+ -- Ben Collins <ben.collins@canonical.com>  Fri, 15 Aug 2008 09:29:34 -0400
+
+linux (2.6.26-5.16) intrepid; urgency=low
+
+  [ Ben Collins ]
+
+  * Revert "SAUCE: toshiba_acpi: Rewrote most of the proc entry bits."
+  * Revert "SAUCE: Update toshiba_acpi.c to version 0.19a"
+  * build/config: Disable in-kernel toshiba driver(s)
+  * ubuntu/tlsup: New driver for toshiba laptops
+  * build/config: Enable TLSUP driver
+  * SAUCE: e1000e: Fix E1000E_ENABLED logic to check for our E1000E_NEW
+    driver as well
+  * ubuntu/e1000e: Remove E1000E_ENABLED option in local config
+  * build/config: Update configs to have E1000E_ENABLED set
+  * ubuntu/prism2: Remove duplicate device
+
+  [ Fabio M. Di Nitto ]
+
+  * SAUCE: Export gfs2 symbols required for gfs1 kernel module
+
+  [ Stefan Bader ]
+
+  * SAUCE: x86: HPET rework for SB700
+    - LP: #255910
+
+  [ Tim Gardner ]
+
+  * Add GNBD driver
+  * Enable GNBD driver
+  * SAUCE: Add GFS driver
+  * SAUCE: Enable gfs driver configs
+  * b43: Linksys WMP54G (BCM4306/3) card in a PCI format has an SPROM
+    coding
+
+  [ Upstream Kernel Changes ]
+
+  * KVM: x86 emulator: emulate clflush
+  * USB: quirk PLL power down mode
+
+ -- Ben Collins <ben.collins@canonical.com>  Mon, 11 Aug 2008 13:19:28 -0400
+
+linux (2.6.26-5.15) intrepid; urgency=low
+
+  [ Ben Collins ]
+
+  * Revert "SAUCE: Add blacklist support to fix Belkin bluetooth dongle."
+    - Superceded by upstream changes.
+  * build/config: New option enabled for uvcvideo
+  * build/control: Add Vcs-Git meta data to control file
+  * SAUCE: toshiba_acpi: Rewrote most of the new code
+  * abi/perm-blacklist: Add emu10k1 driver to blacklist
+
+  [ Upstream Kernel Changes ]
+
+  * pxamci: trivial fix of DMA alignment register bit clearing
+  * udplite: Protection against coverage value wrap-around
+  * ipv6: use timer pending
+  * ipv6: __KERNEL__ ifdef struct ipv6_devconf
+  * hdlcdrv: Fix CRC calculation.
+  * quota: fix possible infinite loop in quota code
+  * isofs: fix minor filesystem corruption
+  * KVM: VMX: Fix a wrong usage of vmcs_config
+  * KVM: SVM: fix suspend/resume support
+  * KVM: mmu_shrink: kvm_mmu_zap_page requires slots_lock to be held
+  * KVM: VMX: Add ept_sync_context in flush_tlb
+  * KVM: x86 emulator: Fix HLT instruction
+  * KVM: MMU: nuke shadowed pgtable pages and ptes on memslot destruction
+  * KVM: MMU: Fix potential race setting upper shadow ptes on nonpae hosts
+  * Patch Upstream: x86 ptrace: fix PTRACE_GETFPXREGS error
+  * rcu: fix rcu_try_flip_waitack_needed() to prevent grace-period stall
+  * Fix typos from signal_32/64.h merge
+  * x86 reboot quirks: add Dell Precision WorkStation T5400
+  * USB: fix usb serial pm counter decrement for disconnected interfaces
+  * x86, suspend, acpi: enter Big Real Mode
+  * markers: fix duplicate modpost entry
+  * Fix build on COMPAT platforms when CONFIG_EPOLL is disabled
+  * proc: fix /proc/*/pagemap some more
+  * cpusets: fix wrong domain attr updates
+  * x86: fix crash due to missing debugctlmsr on AMD K6-3
+  * ide-cd: fix oops when using growisofs
+  * rtc-at91rm9200: avoid spurious irqs
+  * vmlinux.lds: move __attribute__((__cold__)) functions back into final
+    .text section
+  * ARM: fix fls() for 64-bit arguments
+  * tcp: Clear probes_out more aggressively in tcp_ack().
+  * sparc64: Fix lockdep issues in LDC protocol layer.
+  * sparc64: Fix cpufreq notifier registry.
+  * sparc64: Do not define BIO_VMERGE_BOUNDARY.
+  * iop-adma: fix platform driver hotplug/coldplug
+  * myri10ge: do not forget to setup the single slice pointers
+  * myri10ge: do not use mgp->max_intr_slots before loading the firmware
+  * ALSA: trident - pause s/pdif output
+  * V4L: cx18: Upgrade to newer firmware & update documentation
+  * DVB: dib0700: add support for Hauppauge Nova-TD Stick 52009
+  * V4L: uvcvideo: Fix a buffer overflow in format descriptor parsing
+  * V4L: uvcvideo: Use GFP_NOIO when allocating memory during resume
+  * V4L: uvcvideo: Don't free URB buffers on suspend
+  * V4L: uvcvideo: Make input device support optional
+  * V4L: uvcvideo: Add support for Medion Akoya Mini E1210 integrated
+    webcam
+  * V4L: saa7134: Copy tuner data earlier to avoid overwriting manual tuner
+    type
+  * V4L: cx23885: Bugfix for concurrent use of /dev/video0 and /dev/video1
+  * DVB: cx23885: Ensure PAD_CTRL is always reset to a sensible default
+  * DVB: cx23885: DVB Transport cards using DVB port VIDB/TS1 did not
+    stream
+  * DVB: cx23885: Reallocated the sram to avoid concurrent VIDB/C issues
+  * DVB: cx23885: SRAM changes for the 885 and 887 silicon parts
+  * x86: fix kernel_physical_mapping_init() for large x86 systems
+  * eCryptfs: use page_alloc not kmalloc to get a page of memory
+  * UML - Fix boot crash
+  * ixgbe: remove device ID for unsupported device
+  * mpc52xx_psc_spi: fix block transfer
+  * tmpfs: fix kernel BUG in shmem_delete_inode
+  * markers: fix markers read barrier for multiple probes
+  * VFS: increase pseudo-filesystem block size to PAGE_SIZE
+  * cpufreq acpi: only call _PPC after cpufreq ACPI init funcs got called
+    already
+  * b43legacy: Release mutex in error handling code
+  * ath5k: don't enable MSI, we cannot handle it yet
+  * Fix off-by-one error in iov_iter_advance()
+  * Linux 2.6.26.1
+  * ftrace: remove unneeded documentation
+  * romfs_readpage: don't report errors for pages beyond i_size
+  * netfilter: nf_nat_sip: c= is optional for session
+  * SCSI: bsg: fix bsg_mutex hang with device removal
+  * x86: idle process - add checking for NULL early param
+  * x86: io delay - add checking for NULL early param
+  * Close race in md_probe
+  * Kprobe smoke test lockdep warning
+  * netfilter: xt_time: fix time's time_mt()'s use of do_div()
+  * linear: correct disk numbering error check
+  * SCSI: ch: fix ch_remove oops
+  * NFS: Ensure we zap only the access and acl caches when setting new acls
+  * jbd: fix race between free buffer and commit transaction
+  * Input: i8042 - add Intel D845PESV to nopnp list
+  * Input: i8042 - add Gericom Bellagio to nomux blacklist
+  * Input: i8042 - add Acer Aspire 1360 to nomux blacklist
+  * Bluetooth: Signal user-space for HIDP and BNEP socket errors
+  * Add compat handler for PTRACE_GETSIGINFO
+  * ALSA: hda - Fix wrong volumes in AD1988 auto-probe mode
+  * ALSA: hda - Fix DMA position inaccuracy
+  * ALSA: hda - Add missing Thinkpad Z60m support
+  * ALSA: emu10k1 - Fix inverted Analog/Digital mixer switch on Audigy2
+  * vfs: fix lookup on deleted directory
+  * Ath5k: fix memory corruption
+  * Ath5k: kill tasklets on shutdown
+  * sound: ensure device number is valid in snd_seq_oss_synth_make_info
+  * Linux 2.6.26.2
+
+ -- Ben Collins <ben.collins@canonical.com>  Sun, 03 Aug 2008 13:25:02 -0400
+
+linux (2.6.26-5.14) intrepid; urgency=low
+
+  [ Ben Collins ]
+
+  * SAUCE: applesmc: Add MacBookAir
+  * build: Do not build ddeb unless we are on the buildd
+  * build: control: Consistency in arch fields.
+  * SAUCE: Update toshiba_acpi.c to version 0.19a
+    - LP: #77026
+  * build: Added perm blacklist support and per-module support to abi-check
+    - Blacklist p80211 module from abi checks
+  * ubuntu/lirc: Get rid of drivers symlink and use real include stuff
+
+
+  [ Colin Ian King ]
+
+  * SAUCE: acerhk module - add support for Amilo A1650g keyboard
+    - LP: #84159
+  * SAUCE: rt2x00: Fix OOPS on failed creation of rt2x00lib workqueue
+    - LP: #249242
+
+  [ Mario Limonciello ]
+
+  * Add LIRC back in
+
+  [ Tim Gardner ]
+
+  * Makefile race condition can lead to ndiswrapper build failure
+    - LP: #241547
+  * update linux-wlan-ng (prism2_usb) to upstream version 1861
+    - LP: #245026
+
+  [ Upstream Kernel Changes ]
+
+  * Fix typos from signal_32/64.h merge
+
+ -- Ben Collins <ben.collins@canonical.com>  Fri, 01 Aug 2008 00:05:01 -0400
+
+linux (2.6.26-5.13) intrepid; urgency=low
+
+  [ Ben Collins ]
+
+  * build: Make makedumpfile an amd64/i386 only build-dep
+  * ubuntu/acerhk: Fixup assembly to compile with newer binutils
+
+ -- Ben Collins <ben.collins@canonical.com>  Sat, 26 Jul 2008 16:41:50 -0400
+
+linux (2.6.26-4.12) intrepid; urgency=low
+
+  [ Ben Collins ]
+
+  * e1000e: Upgraded module to 0.4.1.7 upstream. Placed in ubuntu/,
+    in-kernel driver disabled
+  * config: Disable e1000e in-kernel, and enable newer driver in ubuntu/
+  * rfkill: Update to 1.3 drivers, and move to common location
+  * ubuntu: Actually link kconfig/kbuild into rfkill subdir
+  * config: Enable loading dsdt from initramfs
+    - LP: #246222
+  * ubuntu: [compcache] Update to fix crashes in improper BUG()
+  * build: Create a retag scripts to recover tags from rebases
+  * build: Updates for dbg pkg
+  * build: Make sure no empty lines show up in debian/files
+  * ubuntu: atl1e: Add new driver from 2.6.27-pre-rc1
+    - LP: #243894
+  * sys_getcwd: Fix some brokeness introduced by AppArmor __d_path
+    changes
+    - LP: #251223
+  * ubuntu: unionfs: Added v1.4 module from hardy
+  * build: Add sub-flavour infrastructure, and virtual subflav
+
+  [ Eric Piel ]
+
+  * ACPI: Allow custom DSDT tables to be loaded from initramfs
+
+  [ Kees Cook ]
+
+  * AppArmor: Smack VFS patches
+
+  [ Mario Limonciello ]
+
+  * Work around ACPI corruption upon suspend on some Dell machines.
+    - LP: #183033
+
+  [ Tim Gardner ]
+
+  * Export usbhid_modify_dquirk for LBM module bcm5974
+    - LP: #250838
+  * VIA - Add VIA DRM Chrome9 3D engine
+    - LP: #251862
+  * Define TRUE/FALSE for VIA DRM driver.
+
+ -- Ben Collins <ben.collins@canonical.com>  Tue, 15 Jul 2008 12:51:39 -0400
+
+linux (2.6.26-4.11) intrepid; urgency=low
+
+  [ Ben Collins ]
+
+  * config: Enable bcm5974 driver in all configs
+
+  [ 2.6.26-4.10 ]
+
+  [ Amit Kucheria ]
+
+  * Fix typo in GSPCA Makefile and make it compile
+
+  [ Ben Collins ]
+
+  * ubuntu: Remove UVC driver in favor of in-kernel one (-rc9)
+  * config: Updates for -rc9
+  * ubuntu: Add acx, prism2_usb wireless drivers
+  * config: Enable prism2_usb and acx drivers.
+  * ubuntu: Add at76 driver to build
+  * config: Enable at76_usb driver.
+  * iscsitarget: Fix prototype for bi_end_io callback.
+  * acx: Fix section type mismatch warnings
+  * fsam7400: Add sw kill switch driver
+  * config: Enable fsam7400 driver
+  * qc-usb: Added new driver
+  * config: Enable qc-usb driver
+  * drbd: Remove built-in connector usage
+  * drbd: Do not define idx/val for connector here
+  * connector.h: Add idx/val for drbd
+  * bcm5974: Added new driver
+
+  [ Kees Cook ]
+
+  * SAUCE: [AppArmor] merge with upstream subversion r1291
+  * SAUCE: [AppArmor] fix typo in selinux_inode_link
+  * SAUCE: [AppArmor] aufs patches
+
+  [ Michael Frey (Senior Manager, MID ]
+
+  * SAUCE: Send HCI_RESET for Broadcomm 2046
+    - LP: #241749
+
+  [ Tim Gardner ]
+
+  * SAUCE: Medion Akoya Mini E1210
+
+  [ Upstream Kernel Changes ]
+
+  * Revert "BAST: Remove old IDE driver"
+  * ARM: OMAP: DMA: Don't mark channel active in omap_enable_channel_irq
+  * ARM: OMAP: Correcting the gpmc prefetch control register address
+  * debugobjects: fix lockdep warning
+  * [ARM] 5115/1: pxafb: fix ifdef for command line option handling
+  * [ARM] 5116/1: pxafb: cleanup and fix order of failure handling
+  * [ARM] 5109/1: Mark rtc sa1100 driver as wakeup source before
+    registering it
+  * [ARM] Export dma_sync_sg_for_device()
+  * fix cgroup-inflicted breakage in block_dev.c
+  * [patch for 2.6.26 2/4] vfs: utimensat(): be consistent with utime() for
+    immutable and append-only files
+  * [patch for 2.6.26 1/4] vfs: utimensat(): ignore tv_sec if tv_nsec ==
+    UTIME_OMIT or UTIME_NOW
+  * [patch for 2.6.26 3/4] vfs: utimensat(): fix error checking for
+    {UTIME_NOW,UTIME_OMIT} case
+  * [patch for 2.6.26 4/4] vfs: utimensat(): fix write access check for
+    futimens()
+  * [patch 1/4] vfs: path_{get,put}() cleanups
+  * [patch 2/4] fs: make struct file arg to d_path const
+  * [patch 3/4] vfs: fix ERR_PTR abuse in generic_readlink
+  * [patch 4/4] flock: remove unused fields from file_lock_operations
+  * [patch 3/3] vfs: make d_path() consistent across mount operations
+  * [patch 1/3] vfs: dcache sparse fixes
+  * [patch 2/3] vfs: dcache cleanups
+  * udf: Fix regression in UDF anchor block detection
+  * [SCSI] ses: Fix timeout
+  * netfilter: ip6table_mangle: don't reroute in LOCAL_IN
+  * [SCSI] esp: Fix OOPS in esp_reset_cleanup().
+  * kernel/audit.c: nlh->nlmsg_type is gotten more than once
+  * audit: fix kernel-doc parameter notation
+  * remove useless argument type in audit_filter_user()
+  * Blackfin arch: fix bug - kernel boot fails when Spinlock and rw-lock
+    debugging enabled
+  * Blackfin arch: fix up section mismatch warning
+  * mac80211: implement EU regulatory domain
+  * b43: Do not return TX_BUSY from op_tx
+  * b43legacy: Do not return TX_BUSY from op_tx
+  * b43: Fix possible MMIO access while device is down
+  * b43legacy: Fix possible NULL pointer dereference in DMA code
+  * rt2x00: Fix unbalanced mutex locking
+  * iwlwifi: improve scanning band selection management
+  * [SCSI] esp: tidy up target reference counting
+  * [ARM] 5117/1: pxafb: fix __devinit/exit annotations
+  * thermal: Create CONFIG_THERMAL_HWMON=n
+  * ACPI: don't walk tables if ACPI was disabled
+  * dock: bay: Don't call acpi_walk_namespace() when ACPI is disabled.
+  * x86: shift bits the right way in native_read_tscp
+  * x86: section/warning fixes
+  * V4L/DVB (8004): Fix INPUT dependency at budget-ci
+  * V4L/DVB (8005): Fix OOPS if frontend is null
+  * V4L/DVB (8007): cx18/cx25840: the S-Video LUMA input can use all
+    In1-In8 inputs
+  * V4L/DVB (8008): cx18: remove duplicate audio and video input enums
+  * V4L/DVB (8010): em28xx: Properly register extensions for already
+    attached devices
+  * V4L/DVB (8011): em28xx: enable DVB for HVR-900
+  * V4L/DVB (8012): gl861: sleep a little to avoid I2C errors
+  * V4L/DVB (8013): gl861: remove useless identify_state
+  * V4L/DVB (8015): gl861: replace non critical msleep(0) with msleep(1) to
+    be on the safe side
+  * V4L/DVB (8017): Ensure em28xx extensions only get run against devs that
+    support them
+  * V4L/DVB (8018): Add em2860 chip ID
+  * V4L/DVB (8020): Fix callbacks functions of saa7134_empress
+  * V4L/DVB (8022): saa7134: fix race between opening and closing the
+    device
+  * V4L/DVB (8026): Avoids an OOPS if dev struct can't be successfully
+    recovered
+  * V4L/DVB (8027): saa7134: Avermedia A700: only s-video and composite
+    input are working
+  * V4L/DVB (8028): Improve error messages for tda1004x attach
+  * V4L/DVB (8029): Improve error message at tda1004x_attach
+  * V4L/DVB (8034): tda18271: fix IF notch frequency handling
+  * V4L/DVB (8035): tda18271: dont touch EB14 if rf_cal lookup is out of
+    range
+  * V4L/DVB (8036): tda18271: toggle rf agc speed mode on TDA18271HD/C2
+    only
+  * V4L/DVB (8037): tda18271: ensure that the thermometer is off during
+    channel configuration
+  * V4L/DVB (8039): pxa-camera: fix platform_get_irq() error handling.
+  * V4L/DVB (8040): soc-camera: remove soc_camera_host_class class
+  * V4L/DVB (8042): DVB-USB UMT-010 channel scan oops
+  * V4L/DVB (8043): au0828: add support for additional USB device id's
+  * V4L/DVB (8044): au8522: tuning optimizations
+  * V4L/DVB (8048): saa7134: Fix entries for Avermedia A16d and Avermedia
+    E506
+  * V4L/DVB (8061): cx18: only select tuner / frontend modules if
+    !DVB_FE_CUSTOMISE
+  * V4L/DVB (8063): cx18: Fix unintended auto configurations in
+    cx18-av-core
+  * V4L/DVB (8066): cx18: Fix audio mux input definitions for HVR-1600 Line
+    In 2 and FM radio
+  * V4L/DVB (8067): cx18: Fix firmware load for case when digital capture
+    happens first
+  * V4L/DVB (8068): cx18: Add I2C slave reset via GPIO upon initialization
+  * V4L/DVB (8069): cx18: Fix S-Video and Compsite inputs for the Yuan
+    MPC718 and enable card entry
+  * V4L/DVB (8071): tda10023: Fix possible kernel oops during
+    initialisation
+  * V4L/DVB (8073): av7110: Catch another type of ARM crash
+  * V4L/DVB (8074): av7110: OSD transfers should not be interrupted
+  * V4L/DVB (8075): stv0299: Uncorrected block count and bit error rate
+    fixed
+  * V4L/DVB (8092): videodev: simplify and fix standard enumeration
+  * V4L/DVB (8096): au8522: prevent false-positive lock status
+  * V4L/DVB (8097): xc5000: check device hardware state to determine if
+    firmware download is needed
+  * V4L/DVB (8100): V4L/vivi: fix possible memory leak in vivi_fillbuff
+  * V4L/DVB (8108): Fix open/close race in saa7134
+  * s2io: fix documentation about intr_type
+  * tc35815: Mark carrier-off before starting PHY
+  * tc35815: Fix receiver hangup on Rx FIFO overflow
+  * ixgbe: fix EEH recovery during reset on PPC
+  * igb: fix EEH recovery during reset on PPC
+  * e1000e: fix EEH recovery during reset on PPC
+  * pcnet_cs, axnet_cs: clear bogus interrupt before request_irq
+  * drivers/net/r6040.c: Eliminate double sizeof
+  * ipg: fix jumbo frame compilation
+  * ipg: use NULL, not zero, for pointers
+  * [netdrvr] 3c59x: remove irqs_disabled warning from local_bh_enable
+  * [netdrvr] netxen: fix netxen_pci_tbl[] breakage
+  * e100: Do pci_dma_sync after skb_alloc for proper operation on ixp4xx
+  * e1000: only enable TSO6 via ethtool when using correct hardware
+  * [netdrvr] Fix IOMMU overflow checking in s2io.c
+  * qla3xxx: Hold RTNL while calling dev_close()
+  * Hold RTNL while calling dev_close()
+  * sata_uli: hardreset is broken
+  * rt2x00: Fix lock dependency errror
+  * prism: islpci_eth.c endianness fix
+  * mac80211: fix an oops in several failure paths in key allocation
+  * firewire: fw-sbp2: fix parsing of logical unit directories
+  * kbuild: fix a.out.h export to userspace with O= build.
+  * Ensure interrupted recovery completed properly (v1 metadata plus
+    bitmap)
+  * Don't acknowlege that stripe-expand is complete until it really is.
+  * Fix error paths if md_probe fails.
+  * hamradio: remove unused variable
+  * tcp: calculate tcp_mem based on low memory instead of all memory
+  * tcp: fix for splice receive when used with software LRO
+  * af_unix: fix 'poll for write'/connected DGRAM sockets
+  * netdevice: Fix typo of dev_unicast_add() comment
+  * pkt_sched: ERR_PTR() ususally encodes an negative errno, not positive.
+  * pkt_sched: Remove CONFIG_NET_SCH_RR
+  * include/linux/netdevice.h: don't export MAX_HEADER to userspace
+  * tcp: /proc/net/tcp rto,ato values not scaled properly (v2)
+  * netlink: Fix some doc comments in net/netlink/attr.c
+  * CONNECTOR: add a proc entry to list connectors
+  * inet fragments: fix race between inet_frag_find and
+    inet_frag_secret_rebuild
+  * net/inet_lro: remove setting skb->ip_summed when not LRO-able
+  * netlabel: Fix a problem when dumping the default IPv6 static labels
+  * ipv6 route: Convert rt6_device_match() to use RT6_LOOKUP_F_xxx flags.
+  * sched: fix cpu hotplug
+  * Fix and clean top .gitignore
+  * x86: fix cpu hotplug crash
+  * ptrace GET/SET FPXREGS broken
+  * Input: add KEY_MEDIA_REPEAT definition
+  * Input: fix locking in force-feedback core
+  * [ARM] 5131/1: Annotate platform_secondary_init with trace_hardirqs_off
+  * ide: fix /proc/ide/ide?/mate reporting
+  * netfilter: nf_conntrack_tcp: fixing to check the lower bound of valid
+    ACK
+  * textsearch: fix Boyer-Moore text search bug
+  * hostap: don't report useless WDS frames by default
+  * hostap: fix sparse warnings
+  * mac80211: don't accept WEP keys other than WEP40 and WEP104
+  * V4L/DVB (8145a): USB Video Class driver
+  * [IA64] Bugfix for system with 32 cpus
+  * [IA64] export account_system_vtime
+  * sched: fix divide error when trying to configure rt_period to zero
+  * x86: fix NODES_SHIFT Kconfig range
+  * block: Fix the starving writes bug in the anticipatory IO scheduler
+  * Properly notify block layer of sync writes
+  * rcu: fix hotplug vs rcu race
+  * I2C: S3C2410: Check ACK on byte transmission
+  * I2C: S3C2410: Fixup error codes returned rom a transfer.
+  * I2C: S3C2410: Add MODULE_ALIAS() for s3c2440 device.
+  * PCI: Restrict VPD read permission to root
+  * powerpc/bootwrapper: update for initrd with simpleImage
+  * i2c: Documentation: fix device matching description
+  * i2c: Fix bad hint about irqs in i2c.h
+  * powerpc/legacy_serial: Bail if reg-offset/shift properties are present
+  * powerpc/mpc5200: Fix lite5200b suspend/resume
+  * ipv4: fix sysctl documentation of time related values
+  * net-sched: change tcf_destroy_chain() to clear start of filter list
+  * net-sched: fix filter destruction in atm/hfsc qdisc destruction
+  * netlink: Unneeded local variable
+  * net: Tyop of sk_filter() comment
+  * netdevice: Fix wrong string handle in kernel command line parsing
+  * net: fib_rules: fix error code for unsupported families
+  * dm crypt: use cond_resched
+  * V4L/DVB (8178): uvc: Fix compilation breakage for the other drivers, if
+    uvc is selected
+  * PCI: Limit VPD read/write lengths for Broadcom 5706, 5708, 5709 rev.
+  * PCI: acpiphp: cleanup notify handler on all root bridges
+  * drivers/input/ff-core.c needs <linux/sched.h>
+  * DRM/i915: only use tiled blits on 965+
+  * tty: Fix inverted logic in send_break
+  * x86: fix Intel Mac booting with EFI
+  * arch/x86/mm/init_64.c: early_memtest(): fix types
+  * 9p: fix O_APPEND in legacy mode
+  * slub: Do not use 192 byte sized cache if minimum alignment is 128 byte
+  * Do not overwrite nr_zones on !NUMA when initialising zlcache_ptr
+  * [MIPS] IP32: Fix unexpected irq 71
+  * [MIPS] IP22: Fix crashes due to wrong L1_CACHE_BYTES
+  * [MIPS] cevt-txx9: Reset timer counter on initialization
+  * hrtimer: prevent migration for raising softirq
+  * svcrpc: fix handling of garbage args
+  * OHCI: Fix problem if SM501 and another platform driver is selected
+  * USB: fix cdc-acm resume()
+  * USB: ehci - fix timer regression
+  * USB: ohci - record data toggle after unlink
+  * USB: mass storage: new id for US_SC_CYP_ATACB
+  * sisusbvga: Fix oops on disconnect.
+  * USB: New device ID for ftdi_sio driver
+  * USB: fix interrupt disabling for HCDs with shared interrupt handlers
+  * USB: don't lose disconnections during suspend
+  * USB: another option device id
+  * USB: add a pl2303 device id
+  * USB: fix Oops on loading ipaq module since 2.6.26
+  * USB: adding comment for ipaq forcing number of ports
+  * [MIPS] Fix bug in atomic_sub_if_positive.
+  * xen: fix address truncation in pte mfn<->pfn conversion
+  * sata_sil24: add DID for another adaptec flavor
+  * ahci: always clear all bits in irq_stat
+  * libata-sff: improve HSM violation reporting
+  * sata_mv: safer logic for limit_warnings
+  * Update maintainers for powerpc
+  * Christoph has moved
+  * mm: dirty page accounting vs VM_MIXEDMAP
+  * rtc: rtc_read_alarm() handles wraparound
+  * firmware: fix the request_firmware() dummy
+  * serial: fix serial_match_port() for dynamic major tty-device numbers
+  * get_user_pages(): fix possible page leak on oom
+  * rtc-x1205: Fix alarm set
+  * rtc: fix CMOS time error after writing /proc/acpi/alarm
+  * pci: VT3336 can't do MSI either
+  * Miguel Ojeda has moved
+  * ext3: add missing unlock to error path in ext3_quota_write()
+  * ext4: add missing unlock to an error path in ext4_quota_write()
+  * reiserfs: add missing unlock to an error path in reiserfs_quota_write()
+  * ecryptfs: remove unnecessary mux from ecryptfs_init_ecryptfs_miscdev()
+  * lib: taint kernel in common report_bug() WARN path.
+  * gpio: pca953x (i2c) handles max7310 too
+  * fsl_diu_fb: fix build with CONFIG_PM=y, plus fix some warnings
+  * Update taskstats-struct document for scaled time accounting
+  * cciss: fix regression that no device nodes are created if no logical
+    drives are configured.
+  * delay accounting: maintainer update
+  * Doc*/kernel-parameters.txt: fix stale references
+  * hdaps: add support for various newer Lenovo thinkpads
+  * mn10300: export certain arch symbols required to build allmodconfig
+  * mn10300: provide __ucmpdi2() for MN10300
+  * Introduce rculist.h
+  * man-pages is supported
+  * ntfs: update help text
+  * add kernel-doc for simple_read_from_buffer and memory_read_from_buffer
+  * w100fb: do not depend on SHARPSL
+  * w100fb: add 80 MHz modeline
+  * MFD maintainer
+  * cgroups: document the effect of attaching PID 0 to a cgroup
+  * spi: fix the read path in spidev
+  * doc: doc maintainers
+  * security: filesystem capabilities: fix fragile setuid fixup code
+  * security: filesystem capabilities: fix CAP_SETPCAP handling
+  * Alpha Linux kernel fails with inconsistent kallsyms data
+  * cpusets: document proc status cpus and mems allowed lists
+  * MAINTAINERS: update the email address of Andreas Dilger
+  * cciss: read config to obtain max outstanding commands per controller
+  * olpc: sdhci: add quirk for the Marvell CaFe's vdd/powerup issue
+  * olpc: sdhci: add quirk for the Marvell CaFe's interrupt timeout
+  * cpumask: introduce new APIs
+  * mm: switch node meminfo Active & Inactive pages to Kbytes
+  * Update MAINTAINERS file for the TPM device driver
+  * devcgroup: fix odd behaviour when writing 'a' to devices.allow
+  * doc: document the relax_domain_level kernel boot argument
+  * mmc: don't use DMA on newer ENE controllers
+  * mempolicy: mask off internal flags for userspace API
+  * x86 ACPI: normalize segment descriptor register on resume
+  * x86 ACPI: fix resume from suspend to RAM on uniprocessor x86-64
+  * softlockup: print a module list on being stuck
+  * ide: fix hwif->gendev refcounting
+  * ide: ide_unregister() warm-plug bugfix
+  * ide: ide_unregister() locking bugfix
+  * ahci: give another shot at clearing all bits in irq_stat
+  * Fix clear_refs_write() use of struct mm_walk
+  * Move _RET_IP_ and _THIS_IP_ to include/linux/kernel.h
+  * Fix pagemap_read() use of struct mm_walk
+  * Linux 2.6.26-rc9
+  * Revert "USB: don't explicitly reenable root-hub status interrupts"
+  * Revert "PCI: Correct last two HP entries in the bfsort whitelist"
+  * iwlwifi: fix incorrect 5GHz rates reported in monitor mode
+  * iwlwifi: drop skb silently for Tx request in monitor mode
+  * libertas: support USB persistence on suspend/resume (resend)
+  * tcp: net/ipv4/tcp.c needs linux/scatterlist.h
+  * tcp: fix a size_t < 0 comparison in tcp_read_sock
+  * bridge: fix use-after-free in br_cleanup_bridges()
+  * Add missing skb->dev assignment in Frame Relay RX code
+  * forcedeth: fix lockdep warning on ethtool -s
+  * ehea: fix might sleep problem
+  * ehea: add MODULE_DEVICE_TABLE
+  * ehea: fix race condition
+  * ehea: Access iph->tot_len with correct endianness
+  * pasemi_mac: Access iph->tot_len with correct endianness
+  * ibm_newemac: Fixes kernel crashes when speed of cable connected changes
+  * ibm_newemac: Fixes entry of short packets
+  * fs_enet: restore promiscuous and multicast settings in restart()
+  * can: add sanity checks
+  * x86: KVM guest: Add memory clobber to hypercalls
+  * KVM: IOAPIC: Fix level-triggered irq injection hang
+  * [SCSI] erase invalid data returned by device
+  * pxamci: fix byte aligned DMA transfers
+  * vsprintf: split out '%s' handling logic
+  * vsprintf: split out '%p' handling logic
+  * vsprintf: add infrastructure support for extended '%p' specifiers
+  * vsprintf: add support for '%pS' and '%pF' pointer formats
+  * powerpc: Fix unterminated of_device_id array in legacy_serial.c
+  * [UML] fix gcc ICEs and unresolved externs
+  * ocfs2/dlm: Fixes oops in dlm_new_lockres()
+  * hostap_cs: correct poor NULL checks in suspend/resume routines
+  * drivers/net/wireless/iwlwifi/iwl-3945.c Fix type issue on 64bit
+  * mac80211: move netif_carrier_on to after
+    ieee80211_bss_info_change_notify
+  * mac80211: Only flush workqueue when last interface was removed
+  * zd1211rw: add ID for AirTies WUS-201
+  * ssb-pcicore: Fix IRQ-vector init on embedded devices
+  * mac80211: don't report selected IBSS when not found
+  * crypto: tcrypt - Fix memory leak in test_cipher
+  * sctp: Mark the tsn as received after all allocations finish
+  * [S390] protect _PAGE_SPECIAL bit against mprotect
+  * irda: via-ircc proper dma freeing
+  * irda: New device ID for nsc-ircc
+  * irda: Fix netlink error path return value
+  * [SCSI] mptspi: fix oops in mptspi_dv_renegotiate_work()
+  * Correct hash flushing from huge_ptep_set_wrprotect()
+  * ide: add __ide_default_irq() inline helper
+  * palm_bk3710: fix IDECLK period calculation
+  * it8213: fix return value in it8213_init_one()
+  * [MIPS] Atlas, decstation: Fix section mismatches triggered by
+    defconfigs
+  * [MIPS] Fix 32bit kernels on R4k with 128 byte cache line size
+  * NFS: Fix readdir cache invalidation
+  * SUNRPC: Fix a double-free in rpcbind
+  * SUNRPC: Fix an rpcbind breakage for the case of IPv6 lookups
+  * reiserfs: discard prealloc in reiserfs_delete_inode
+  * Fix broken fix for fsl-diu-db
+  * RDMA/cxgb3: Fix regression caused by class_device -> device conversion
+  * ipv6: fix race between ipv6_del_addr and DAD timer
+  * sctp: Add documentation for sctp sysctl variable
+  * kernel/printk.c: Made printk_recursion_bug_msg static.
+  * powerpc: Add missing reference to coherent_dma_mask
+  * rc80211_pid: Fix fast_start parameter handling
+  * rt2x00: Disable synchronization during initialization
+  * zd1211rw: stop beacons on remove_interface
+  * libertas: fix memory alignment problems on the blackfin
+  * netfilter: nf_conntrack_tcp: fix endless loop
+  * netfilter: nf_nat_snmp_basic: fix a range check in NAT for SNMP
+  * md: ensure all blocks are uptodate or locked when syncing
+  * sched: fix cpu hotplug
+  * x86: fix /dev/mem compatibility under PAT
+  * crypto: chainiv - Invoke completion function
+  * ocfs2: Fix flags in ocfs2_file_lock
+  * kernel/kprobes.c: Made kprobe_blacklist static.
+  * arch/x86/kernel/.gitignore: Added vmlinux.lds to .gitignore file
+    because it shouldn't be tracked.
+  * ftrace: Documentation
+  * Fix PREEMPT_RCU without HOTPLUG_CPU
+  * sched: fix cpu hotplug, cleanup
+  * exec: fix stack excutability without PT_GNU_STACK
+  * slub: Fix use-after-preempt of per-CPU data structure
+  * Documentation: clarify tcp_{r,w}mem sysctl docs
+  * ip: sysctl documentation cleanup
+  * tcp: correct kcalloc usage
+  * ipv4: fib_trie: Fix lookup error return
+  * netlabel: netlink_unicast calls kfree_skb on error path by itself
+  * ipv6: missed namespace context in ipv6_rthdr_rcv
+  * xfrm: Add a XFRM_STATE_AF_UNSPEC flag to xfrm_usersa_info
+  * tun: Persistent devices can get stuck in xoff state
+  * tpm: add Intel TPM TIS device HID
+  * rapidio: fix device reference counting
+  * Fix name of Russell King in various comments
+  * rtc: fix reported IRQ rate for when HPET is enabled
+  * libata-acpi: filter out DIPM enable
+  * Added Targa Visionary 1000 IDE adapter to pata_sis.c
+  * libata-acpi: don't call sleeping function from invalid context
+  * Fix reference counting race on log buffers
+  * [SCSI] ipr: Fix HDIO_GET_IDENTITY oops for SATA devices
+  * IPMI: return correct value from ipmi_write
+  * x86: fix ldt limit for 64 bit
+  * [SCSI] fusion: default MSI to disabled for SPI and FC controllers
+  * [SCSI] bsg: fix oops on remove
+  * drivers/char/pcmcia/ipwireless/hardware.c fix resource leak
+  * drivers/isdn/i4l/isdn_common.c fix small resource leak
+  * fbdev: bugfix for multiprocess defio
+  * serial8250: sanity check nr_uarts on all paths.
+  * ov7670: clean up ov7670_read semantics
+  * rtc-fm3130: fix chip naming
+  * rtc-pcf8563: add chip id
+  * OProfile kernel maintainership changes
+  * frv: fix irqs_disabled() to return an int, not an unsigned long
+  * cifs: fix inode leak in cifs_get_inode_info_unix
+  * cifs: fix wksidarr declaration to be big-endian friendly
+  * cpusets, hotplug, scheduler: fix scheduler domain breakage
+  * Documentation/HOWTO: correct wrong kernel bugzilla FAQ URL
+  * devcgroup: always show positive major/minor num
+  * devcgroup: fix permission check when adding entry to child cgroup
+  * Linux 2.6.26
+
+ -- Ben Collins <ben.collins@canonical.com>  Mon, 14 Jul 2008 13:41:50 -0400
+
+linux (2.6.26-3.9) intrepid; urgency=low
+
+  * abi: Add dca and ioatdma to modules.ignore
+
+  [ 2.6.26-3.8 ]
+
+  [ Ben Collins ]
+
+  * ubuntu: Add heci driver 3.2.0.24
+  * ubuntu: Add heci to kconfig/kbuild
+  * config: Enable heci module on all flavours
+  * dm-bbr: Update to get it to compile with 2.6.26
+  * config: Enable dm-bbr
+  * ubuntu: Add some media drivers
+  * config: Enable misc media drivers
+  * udeb: Switch to uvesafb in fb-modules
+  * abi: Add more modules to ignore (known)
+
+  [ 2.6.26-3.7 ]
+
+  [Amit Kucheria]
+
+  * SAUCE: make fc transport removal of target configurable
+     - LP: #163075
+  * SAUCE: pm: Config option to disable handling of console during
+    suspend/resume
+
+  [Ben Collins]
+
+  * SAUCE: input/mouse/alps: Do not call psmouse_reset() for alps
+  * SAUCE: irda: Default to dongle type 9 on IBM hardware
+  * SAUCE: tulip: Let dmfe handle davicom on non-sparc
+  * SAUCE: tulip: Define ULI PCI ID's
+  * SAUCE: version: Implement version_signature proc file.
+  * build: Cleanup arches
+  * build: Remove remnants of unused binary-custom infrastructure
+  * build: Remove disable_d_i (not needed) and cleanup ppa build stuff
+  * ubuntu: New modules, acer-acpi
+  * build: Remove -virtual, and rebuild configs
+  * ubuntu: Add drbd module
+  * acer-acpi: Fix makefile
+  * x86/Kconfig: Fix missing quote for ubuntu Kconfig source
+  * ubuntu: Add iscsitarget module
+  * ubuntu: Added Amiga FS driver
+  * ubuntu: Add squashfs driver
+  * ubuntu: Remove asfs (Amiga FS). Need to be in linux-ports instead
+  * squashfs: Move headers to real include directory
+  * build/configs: The Great Config Consistency Check of 2008
+  * ubuntu: Move third-party includes to ubuntu/include
+  * ubuntu: Add aufs module
+  * ubuntu: Added atl2 driver
+  * ubuntu: Add dm-radi4-5 driver
+  * build: Add CONFIG_DEBUG_SECTION_MISMATCH=y to get old style warnings
+    from build
+  * ubuntu/Makefile: Fixup dm-raid4-5 and add kludge for kbuild
+  * squashfs: Fixes for VFS changes
+  * ubuntu/dm-raid4-5: Fixups for moved/renamed headers/functions in core
+    md
+  * ubuntu: Add ndiswrapper driver
+  * d-i: Update module listings
+  * build: Disable xd block device (ancient)
+  * ndiswrapper: Fixup makefile
+  * d-i: Remove efi-modules. The only module, efivars, is built-in
+  * build: Remove install-source, obsolete and caused build failure
+  * Ubuntu-2.6.26-1.3
+  * build: linux-doc rules got broken when disabling html side. Fixed now.
+  * Ubuntu-2.6.26-1.4
+  * x86: Update to -rc6 allows CONFIG_PCI_OLPC to work with PCI_GOANY
+  * d-i: Make virtio-ring optional (it's built-in on i386)
+  * Ubuntu-2.6.26-1.4
+  * Ubuntu-2.6.26-1.5
+  * config: Enable DVB devices
+  * ubuntu/aufs: Make aufs a bool config, since it needs to be built-in
+  * config: Build aufs into the kernels
+  * build: Fix arguments passed to link-headers script
+  * config: Disable early printk
+  * d-i: Move isofs to storage-core and kill st (scsi tape) from list
+  * config: Enable non-promiscuous access to /dev/mem
+  * x86: Add option to disable decompression info messages
+  * config: Enable no-bz-chatter config options
+  * build: Re-add linux-source package
+  * d-i: Re-add socket-modules. Accidentally removed
+    - LP: #241295
+  * Ubuntu-2.6.26-2.6
+  * Use makedumpfile to generate a vmcoreinfo file.
+  * build: Build-Depend on makedumpfile for vmcoreinfo generation
+  * build: Remove debug print from git-ubuntu-log
+  * Updated configs for -rc7
+  * build: postinst, do not call depmod with -F
+  * config: Enable rtc-cmos as a built-in driver.
+  * control: Provide ndiswrapper-modules-1.9
+  * build: Generate vmcoreinfo in image build for crashdumps without debug
+    image
+  * config: Disable vesafb, since we'll prefer uvesafb
+  * build: Copy uvesafb module to initrd mod directory
+  * abi-check: New, more robust script
+  * config: Enable heap randomization by default
+  * abi-check: Cleanup output and call with perl (not $SHELL)
+  * abi: Ignore missing vesafb (known)
+  * config: Disable pcspkr (in favor of snd-pcsp)
+  * swap: Add notify_swap_entry_free callback for compcache
+  * compcache: Added ram backed compressed swap module
+  * ubuntu: Enable kbuild and kconfig for compcache
+  * config: Enable compcache and tlsf allocator as modules
+  * config: Updated for -rc8. Disables XEN on i386
+  * config: Switch i386-server to 64G, enable PAE, 64-bit res, and XEN
+  * ubuntu: Add misc drivers from hardy lum
+  * ubuntu: Enable build of misc/ subdir
+  * config: Enable misc drivers
+  * aufs: Fix warning about single non-string-literal arg to printf style
+    function
+  * drivers: Remove some duplicate device entries in various modules
+  * config: Disable some duplicate drivers
+  * keyspan: Remove duplicate device ID's
+  * check-aliases: Cleanup output, and fix rolling checks
+  * ubuntu: Disable dm-bbr for now
+  * dm-bbr: First cut at forward portiong. Still needs work.
+  * ubuntu: Disable dm-bbr in kbuild/kconfig
+
+  [Chuck Short]
+
+  * SAUCE: ata: blacklist FUJITSU MHW2160BH PL
+     - LP: #175834
+  * SAUCE: [USB]: add ASUS LCM to the blacklist
+
+  [Colin Ian King]
+
+  * SAUCE: airprime.c supports more devices
+    - LP: #208250
+  * SAUCE: Enable speedstep for sonoma processors. 
+     - LP: #132271
+  * Add dm-loop
+  * Add dm-loop BOM
+
+  [Kyle McMartin]
+
+  * SAUCE: fix orinoco_cs oops
+
+  [Mario Limonciello]
+
+  * SAUCE: Enable Reset and SCO workaround on Dell 410 BT adapter
+
+  [Matthew Garrett]
+
+  * SAUCE: hostap: send events on data interface as well as master
+    interface
+
+  [Phillip Lougher]
+
+  * SAUCE: r8169: disable TSO by default for RTL8111/8168B chipsets.
+
+  [Stefan Bader]
+
+  * SAUCE: Export dm_disk function of device-mapper
+  * SAUCE: Restore VT fonts on switch
+  * SAUCE: Always use SCO protocol (disable eSCO support) Bug: #39414
+  * SAUCE: mmc: Increase power_up deleay to fix TI readers OriginalAuthor:
+    Pascal Terjan <pterjan@mandriva.com> Bug: #137686
+  * SAUCE: Add blacklist support to fix Belkin bluetooth dongle. Bug:
+    #140511
+  * SAUCE: Lower warning level of pci resource allocation messages. Bug:
+    159241
+  * SAUCE: Lower message level for PCI memory and I/O allocation.
+    - LP: #159241
+  * Modify log generation to catch bug numbers when adding with git-am.
+
+  [Tim Gardner]
+
+  * Added the debian directory. Ignore: yes
+  * Add support for UBUNTUINCLUDE Ignore: yes
+  * LUM headers go in /usr/src Ignore: yes
+  * First pass at 2.6.25 configs Ignore: yes
+  * i386 -generic builds. Ignore: yes
+  * SAUCE: Increase CONFIG_IDE_MAX_HWIFS to 8 (from 4)
+  * SAUCE: Add extra headers to linux-libc-dev OriginalAuthor: Soren Hansen
+    OriginalLocation:
+    https://lists.ubuntu.com/archives/kernel-team/2007-November/001891.html
+  * Set CONFIG_DEVKMEM=n Ignore: yes
+  * Enabled ALSA and CGROUPS for i386 Ignore: yes
+  * Enabled amd64 configs. Ignore: yes
+  * CONFIG_STANDALONE=n Ignore: yes
+  * CONFIG_BLK_DEV_4DRIVES=n for i386 Ignore: yes
+  * CONFIG: CONFIG_DEFAULT_RELATIME=y for all flavours. Ignore: yes
+  * Set CONFIG_EDD_OFF=y Ignore: yes
+  * SAUCE: Blacklist Bluetooth Dell Wireless 370 for SCO MTU
+    OriginalAuthor: Mario Limonciello <Mario_Limonciello@Dell.com> Bug:
+    #209715
+  * SAUCE: Catch nonsense keycodes and silently ignore
+  * SAUCE: frame buffer regression - screen blank except for blinking
+    cursor after fbcon vtswitch OriginalAuthor: Matthew Garrett
+    <mjg59@srcf.ucam.org> Bug: #201591
+  * SAUCE: Added support for HDAPS on various ThinkPads from Lenovo and IBM
+    OriginalAuthor: Klaus S. Madsen <ubuntu@hjernemadsen.org>
+    OriginalAuthor: Chuck Short <zulcss@ubuntu.com>
+  * SAUCE: Guest OS does not recognize a lun with non zero target id on
+    Vmware ESX Server
+  * SAUCE: orinoco_cs.ko missing
+  * Set CONFIG_FB_VESA=m for i386/amd64 Ignore: yes
+  * Set CONFIG_PM_DISABLE_CONSOLE=y for all flavours Ignore: yes
+  * Thorough review of amd64 -generic config Ignore: yes
+  * Build PPA packages for Hardy until the Intrepid archive is opened.
+  * Deleted obsolete flavours Ignore: yes
+  * Don't build docs for PPA Ignore: yes
+  * Build all standard packages in PPA. Ignore: yes
+  * Remove duplicate USB ids
+  * SAUCE: DVB-USB UMT-010 driver oops on install Bug: #115284
+  * Update configs after rebase to 2.6.26-rc1 Ignore: yes
+  * Update configs after rebase Ignore: yes
+  * Disable V4L until the build issues get ironed out. Ignore: yes
+  * Update configs after rebase. Ignore: yes
+  * Another device enable pass Ignore: yes
+  * Update configs after merge. Ignore: yes
+  * SAUCE: fn key doesn't work in hardy with macbook pro fourth generation
+    (4,1)
+    - LP: #207127
+  * Enabled CONFIG_CIFS_DFS_UPCALL=y and CONFIG_CIFS_UPCALL=y
+    - LP: #236830
+
+  [Upstream Kernel Changes]
+
+  * Revert "[WATCHDOG] hpwdt: Add CFLAGS to get driver working"
+  * mac80211: detect driver tx bugs
+  * hwmon: (lm85) Fix function RANGE_TO_REG()
+  * hwmon: (adt7473) Initialize max_duty_at_overheat before use
+  * hwmon: Update the sysfs interface documentation
+  * hwmon: (abituguru3) Identify Abit AW8D board as such
+  * hwmon: (w83791d) new maintainer
+  * hwmon: (abituguru3) update driver detection
+  * hwmon: (lm75) sensor reading bugfix
+  * ipv6: Remove options header when setsockopt's optlen is 0
+  * ipv6: Drop packets for loopback address from outside of the box.
+  * sched: rt: dont stop the period timer when there are tasks wanting to
+    run
+  * sched: fix wait_for_completion_timeout() spurious failure under heavy
+    load
+  * x86: fix NULL pointer deref in __switch_to
+  * xen: Use wmb instead of rmb in xen_evtchn_do_upcall().
+  * xen: mask unwanted pte bits in __supported_pte_mask
+  * xen: don't drop NX bit
+  * sched: refactor wait_for_completion_timeout()
+  * Ext4: Fix online resize block group descriptor corruption
+  * [IA64] SN2: security hole in sn2_ptc_proc_write
+  * alpha: fix module load failures on smp (bug #10926)
+  * alpha: link failure fix
+  * alpha: fix compile failures with gcc-4.3 (bug #10438)
+  * alpha: resurrect Cypress IDE quirk
+  * pppoe: warning fix
+  * sctp: Make sure N * sizeof(union sctp_addr) does not overflow.
+  * netns: Don't receive new packets in a dead network namespace.
+  * Add return value to reserve_bootmem_node()
+  * Slab: Fix memory leak in fallback_alloc()
+  * Fix performance regression on lmbench select benchmark
+  * ALSA: aw2 - Fix Oops at initialization
+  * ALSA: sb - Fix wrong assertions
+  * futexes: fix fault handling in futex_lock_pi
+  * IB/mthca: Clear ICM pages before handing to FW
+  * tty_driver: Update required method documentation
+  * removed unused var real_tty on n_tty_ioctl()
+  * Fix ZERO_PAGE breakage with vmware
+  * mm: fix race in COW logic
+  * NFS: Reduce the NFS mount code stack usage.
+  * NFS: Fix filehandle size comparisons in the mount code
+  * NFS: nfs_updatepage(): don't mark page as dirty if an error occurred
+  * alpha: fix compile error in arch/alpha/mm/init.c
+  * KVM: Fix race between timer migration and vcpu migration
+  * KVM: close timer injection race window in __vcpu_run
+  * KVM: MMU: Fix rmap_write_protect() hugepage iteration bug
+  * KVM: MMU: large page update_pte issue with non-PAE 32-bit guests
+    (resend)
+  * KVM: MMU: Fix oops on guest userspace access to guest pagetable
+  * KVM: ioapic: fix lost interrupt when changing a device's irq
+  * KVM: VMX: Fix host msr corruption with preemption enabled
+  * [GFS2] BUG: unable to handle kernel paging request at ffff81002690e000
+  * xen: remove support for non-PAE 32-bit
+  * kgdb: documentation update - remove kgdboe
+  * kgdb: sparse fix
+  * [IA64] Fix boot failure on ia64/sn2
+  * [IA64] Handle count==0 in sn2_ptc_proc_write()
+  * [IA64] Eliminate NULL test after alloc_bootmem in iosapic_alloc_rte()
+  * [GFS2] fix gfs2 block allocation (cleaned up)
+  * x86: Add structs and functions for paravirt clocksource
+  * x86: Make xen use the paravirt clocksource structs and functions
+  * KVM: Make kvm host use the paravirt clocksource structs
+  * x86: KVM guest: Use the paravirt clocksource structs and functions
+  * KVM: Remove now unused structs from kvm_para.h
+  * enable bus mastering on i915 at resume time
+  * Linux 2.6.26-rc8
+  * # Ubuntu external driver commit.
+  * # Ubuntu commit template.
+
+ -- Ben Collins <ben.collins@canonical.com>  Sat, 21 Jun 2008 09:05:15 -0400
+
+linux (2.6.26-2.6) intrepid; urgency=low
+
+  [Ben Collins]
+
+  * Revert "SAUCE: Export symbols for aufs (in lum) (not needed)
+  * config: Enable DVB devices
+  * ubuntu/aufs: Make aufs a bool config, since it needs to be built-in
+  * config: Build aufs into the kernels
+  * build: Fix arguments passed to link-headers script
+  * config: Disable early printk
+  * d-i: Move isofs to storage-core and kill st (scsi tape) from list
+  * config: Enable non-promiscuous access to /dev/mem
+  * x86: Add option to disable decompression info messages
+  * config: Enable no-bz-chatter config options
+  * build: Re-add linux-source package
+  * d-i: Re-add socket-modules. Accidentally removed
+    - LP: #241295
+
+  [Colin Ian King]
+
+  * Add dm-loop
+
+  [Tim Gardner]
+
+  * Revert "SAUCE: USB bluetooth device 0x0e5e:0x6622 floods errors to
+    syslog (merged upstream)
+
+ -- Ben Collins <ben.collins@canonical.com>  Mon, 16 Jun 2008 10:56:01 -0400
+
+linux (2.6.26-1.5) intrepid; urgency=low
+
+  * d-i: Make virtio-ring optional (it's built-in on i386)
+  * Rebased on 2.6.26-rc6
+
+  [Ubuntu-2.6.26-1.4 Changes below]
+
+  * build: linux-doc rules got broken when disabling html side. Fixed now.
+
+  [Ubuntu-2.6.26-1.3 Changes below]
+
+  * build: Remove install-source, obsolete and caused build failure
+
+  [Ubuntu-2.6.26-1.2 Changes below]
+
+  * Remove efi-modules from d-i module list (efivars is built-in). Caused a
+    build failure.
+  * Patch to arch/x86/xen/time.c to remove __divdi3 usage (build failure on
+    i386).
+
+  [Ubuntu-2.6.26-1.1 Changes below]
+
+  [Amit Kucheria]
+
+  * SAUCE: make fc transport removal of target configurable
+  * SAUCE: Add AGP support for Radeon Mobility 9000 chipset
+  * SAUCE: pm: Config option to disable handling of console during
+    suspend/resume
+
+  [Ben Collins]
+
+  * SAUCE: input/mouse/alps: Do not call psmouse_reset() for alps
+  * SAUCE: irda: Default to dongle type 9 on IBM hardware
+  * SAUCE: tulip: Let dmfe handle davicom on non-sparc
+  * SAUCE: tulip: Define ULI PCI ID's
+  * SAUCE: version: Implement version_signature proc file.
+  * build: Remove remnants of unused binary-custom infrastructure
+  * mmc_block: Fix bad allocation on 64-bit (zero len array)
+  * ubuntu: New modules, acer-acpi
+  * build: Remove -virtual, and rebuild configs
+  * ubuntu: Add drbd module
+  * ubuntu: Add iscsitarget module
+  * ubuntu: Add squashfs driver
+  * build/configs: The Great Config Consistency Check of 2008
+  * ubuntu: Add aufs module
+  * ubuntu: Added atl2 driver
+  * ubuntu: Add dm-radi4-5 driver
+  * build: Add CONFIG_DEBUG_SECTION_MISMATCH=y to get old style warnings
+    from build
+  * squashfs: Fixes for VFS changes
+  * ubuntu/dm-raid4-5: Fixups for moved/renamed headers/functions in core
+    md
+  * ubuntu: Add ndiswrapper driver
+  * d-i: Update module listings
+
+  [Chuck Short]
+
+  * SAUCE: ata: blacklist FUJITSU MHW2160BH PL
+  * SAUCE: [USB]: add ASUS LCM to the blacklist
+
+  [Colin Ian King]
+
+  * SAUCE: Enable speedstep for sonoma processors.
+  * SAUCE: airprime.c supports more devices
+
+  [Kyle McMartin]
+
+  * SAUCE: fix orinoco_cs oops
+
+  [Mario Limonciello]
+
+  * SAUCE: Enable Reset and SCO workaround on Dell 410 BT adapter
+
+  [Matthew Garrett]
+
+  * SAUCE: hostap: send events on data interface as well as master
+    interface
+
+  [Phillip Lougher]
+
+  * SAUCE: r8169: disable TSO by default for RTL8111/8168B chipsets.
+
+  [Stefan Bader]
+
+  * SAUCE: Export dm_disk function of device-mapper
+  * SAUCE: Restore VT fonts on switch
+  * SAUCE: Always use SCO protocol (disable eSCO support) Bug: #39414
+  * SAUCE: mmc: Increase power_up deleay to fix TI readers
+  * SAUCE: Add blacklist support to fix Belkin bluetooth dongle.
+  * SAUCE: Lower warning level of pci resource allocation messages.
+  * SAUCE: Lower message level for PCI memory and I/O allocation.
+    - LP: #159241
+  * Modify log generation to catch bug numbers when adding with git-am.
+
+  [Tim Gardner]
+
+  * SAUCE: hdaps module does not load on Thinkpad T61P
+  * SAUCE: Add extra headers to linux-libc-dev
+  * SAUCE: Export symbols for aufs (in lum).
+  * SAUCE: USB bluetooth device 0x0e5e:0x6622 floods errors to syslog
+  * SAUCE: Blacklist Bluetooth Dell Wireless 370 for SCO MTU
+  * SAUCE: Catch nonsense keycodes and silently ignore
+  * SAUCE: frame buffer regression - screen blank except for blinking
+    cursor after fbcon vtswitch
+  * SAUCE: Added support for HDAPS on various ThinkPads from Lenovo and IBM
+  * SAUCE: Guest OS does not recognize a lun with non zero target id on
+    Vmware ESX Server
+  * SAUCE: Modualrize vesafb
+  * SAUCE: DVB-USB UMT-010 driver oops on install
+  * SAUCE: fn key doesn't work in hardy with macbook pro fourth generation
+    (4,1)
+    - LP: #207127
+
+ -- Ben Collins <ben.collins@canonical.com>  Wed, 11 Jun 2008 05:28:35 -0400
--- linux-rt-2.6.29.5.orig/debian/control
+++ linux-rt-2.6.29.5/debian/control
@@ -0,0 +1,72 @@
+Source: linux-rt
+Section: devel
+Priority: optional
+Maintainer: Alessio Igor Bogani <abogani@ubuntu.com>
+Standards-Version: 3.6.1
+Build-Depends: debhelper (>= 3), module-init-tools, kernel-wedge (>= 2.24ubuntu1), makedumpfile [!armel], quilt
+Build-Depends-Indep: xmlto, docbook-utils, gs, transfig, bzip2, sharutils
+
+Package: linux-rt-headers-2.6.29.5-1
+Architecture: all
+Section: devel
+Priority: optional
+Depends: coreutils | fileutils (>= 4.0)
+Provides: linux-rt-headers, linux-rt-headers-2.6
+Description: Header files related to Linux kernel version 2.6.29.5
+ This package provides kernel header files for version 2.6.29.5, for sites
+ that want the latest kernel headers. Please read
+ /usr/share/doc/linux-headers-2.6.29.5-1/debian.README.gz for details
+
+Package: linux-image-2.6.29.5-1-rt
+Architecture: i386 amd64
+Section: base
+Priority: optional
+Pre-Depends: dpkg (>= 1.10.24)
+Provides: linux-image, linux-image-2.6, fuse-module, kvm-api-4, redhat-cluster-modules, ivtv-modules, ndiswrapper-modules-1.9
+Depends: initramfs-tools (>= 0.36ubuntu6), coreutils | fileutils (>= 4.0), module-init-tools (>= 3.3-pre11-4ubuntu3)
+Conflicts: hotplug (<< 0.0.20040105-1)
+Recommends: grub | lilo (>= 19.1)
+Suggests: fdutils, linux-doc-2.6.29.5 | linux-source-2.6.29.5
+Description: Linux kernel image for version 2.6.29.5 on Ingo Molnar's full real time preemption patch (2.6.28-rt)
+ This package contains the Linux kernel image for version 2.6.29.5 on
+ Ingo Molnar's full real time preemption patch (2.6.28-rt).
+ .
+ Also includes the corresponding System.map file, the modules built by the
+ packager, and scripts that try to ensure that the system is not left in an
+ unbootable state after an update.
+ .
+ Supports Generic processors.
+ .
+ Geared toward real time systems.
+ .
+ You likely do not want to install this package directly. Instead, install
+ the linux-rt meta-package, which will ensure that upgrades work
+ correctly, and that supporting packages are also installed.
+
+Package: linux-headers-2.6.29.5-1-rt
+Architecture: i386 amd64
+Section: devel
+Priority: optional
+Depends: coreutils | fileutils (>= 4.0), linux-rt-headers-2.6.29.5-1, ${shlibs:Depends}
+Provides: linux-headers, linux-headers-2.6
+Description: Linux kernel headers for version 2.6.29.5 on Ingo Molnar's full real time preemption patch (2.6.28-rt)
+ This package provides kernel header files for version 2.6.29.5 on
+ Ingo Molnar's full real time preemption patch (2.6.28-rt).
+ .
+ This is for sites that want the latest kernel headers.  Please read
+ /usr/share/doc/linux-headers-2.6.29.5-1/debian.README.gz for details.
+
+Package: linux-image-debug-2.6.29.5-1-rt
+Architecture: i386 amd64
+Section: devel
+Priority: optional
+Provides: linux-debug
+Description: Linux kernel debug image for version 2.6.29.5 on Ingo Molnar's full real time preemption patch (2.6.28-rt)
+ This package provides a kernel debug image for version 2.6.29.5 on
+ Ingo Molnar's full real time preemption patch (2.6.28-rt).
+ .
+ This is for sites that wish to debug the kernel.
+ .
+ The kernel image contained in this package is NOT meant to boot from. It
+ is uncompressed, and unstripped. This package also includes the
+ unstripped modules.
--- linux-rt-2.6.29.5.orig/debian/patches/series
+++ linux-rt-2.6.29.5/debian/patches/series
@@ -0,0 +1 @@
+2.6.29.5-rt22
--- linux-rt-2.6.29.5.orig/debian/patches/2.6.29.5-rt22
+++ linux-rt-2.6.29.5/debian/patches/2.6.29.5-rt22
@@ -0,0 +1,174663 @@
+Index: linux-2.6-tip/Documentation/ABI/testing/debugfs-kmemtrace
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/Documentation/ABI/testing/debugfs-kmemtrace
+@@ -0,0 +1,71 @@
++What:		/sys/kernel/debug/kmemtrace/
++Date:		July 2008
++Contact:	Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
++Description:
++
++In kmemtrace-enabled kernels, the following files are created:
++
++/sys/kernel/debug/kmemtrace/
++	cpu<n>		(0400)	Per-CPU tracing data, see below. (binary)
++	total_overruns	(0400)	Total number of bytes which were dropped from
++				cpu<n> files because of full buffer condition,
++				non-binary. (text)
++	abi_version	(0400)	Kernel's kmemtrace ABI version. (text)
++
++Each per-CPU file should be read according to the relay interface. That is,
++the reader should set affinity to that specific CPU and, as currently done by
++the userspace application (though there are other methods), use poll() with
++an infinite timeout before every read(). Otherwise, erroneous data may be
++read. The binary data has the following _core_ format:
++
++	Event ID	(1 byte)	Unsigned integer, one of:
++		0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
++		1 - represents a freeing of previously allocated memory
++		    (KMEMTRACE_EVENT_FREE)
++	Type ID		(1 byte)	Unsigned integer, one of:
++		0 - this is a kmalloc() / kfree()
++		1 - this is a kmem_cache_alloc() / kmem_cache_free()
++		2 - this is a __get_free_pages() et al.
++	Event size	(2 bytes)	Unsigned integer representing the
++					size of this event. Used to extend
++					kmemtrace. Discard the bytes you
++					don't know about.
++	Sequence number	(4 bytes)	Signed integer used to reorder data
++					logged on SMP machines. Wraparound
++					must be taken into account, although
++					it is unlikely.
++	Caller address	(8 bytes)	Return address to the caller.
++	Pointer to mem	(8 bytes)	Pointer to target memory area. Can be
++					NULL, but not all such calls might be
++					recorded.
++
++In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
++
++	Requested bytes	(8 bytes)	Total number of requested bytes,
++					unsigned, must not be zero.
++	Allocated bytes (8 bytes)	Total number of actually allocated
++					bytes, unsigned, must not be lower
++					than requested bytes.
++	Requested flags	(4 bytes)	GFP flags supplied by the caller.
++	Target CPU	(4 bytes)	Signed integer, valid for event id 1.
++					If equal to -1, target CPU is the same
++					as origin CPU, but the reverse might
++					not be true.
++
++The data is made available in the same endianness the machine has.
++
++Other event ids and type ids may be defined and added. Other fields may be
++added by increasing event size, but see below for details.
++Every modification to the ABI, including new id definitions, are followed
++by bumping the ABI version by one.
++
++Adding new data to the packet (features) is done at the end of the mandatory
++data:
++	Feature size	(2 byte)
++	Feature ID	(1 byte)
++	Feature data	(Feature size - 3 bytes)
++
++
++Users:
++	kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
++
+Index: linux-2.6-tip/Documentation/DMA-API.txt
+===================================================================
+--- linux-2.6-tip.orig/Documentation/DMA-API.txt
++++ linux-2.6-tip/Documentation/DMA-API.txt
+@@ -609,3 +609,109 @@ size is the size (and should be a page-s
+ The return value will be either a pointer to the processor virtual
+ address of the memory, or an error (via PTR_ERR()) if any part of the
+ region is occupied.
++
++Part III - Debug drivers use of the DMA-API
++-------------------------------------------
++
++The DMA-API as described above as some constraints. DMA addresses must be
++released with the corresponding function with the same size for example. With
++the advent of hardware IOMMUs it becomes more and more important that drivers
++do not violate those constraints. In the worst case such a violation can
++result in data corruption up to destroyed filesystems.
++
++To debug drivers and find bugs in the usage of the DMA-API checking code can
++be compiled into the kernel which will tell the developer about those
++violations. If your architecture supports it you can select the "Enable
++debugging of DMA-API usage" option in your kernel configuration. Enabling this
++option has a performance impact. Do not enable it in production kernels.
++
++If you boot the resulting kernel will contain code which does some bookkeeping
++about what DMA memory was allocated for which device. If this code detects an
++error it prints a warning message with some details into your kernel log. An
++example warning message may look like this:
++
++------------[ cut here ]------------
++WARNING: at /data2/repos/linux-2.6-iommu/lib/dma-debug.c:448
++	check_unmap+0x203/0x490()
++Hardware name:
++forcedeth 0000:00:08.0: DMA-API: device driver frees DMA memory with wrong
++	function [device address=0x00000000640444be] [size=66 bytes] [mapped as
++single] [unmapped as page]
++Modules linked in: nfsd exportfs bridge stp llc r8169
++Pid: 0, comm: swapper Tainted: G        W  2.6.28-dmatest-09289-g8bb99c0 #1
++Call Trace:
++ <IRQ>  [<ffffffff80240b22>] warn_slowpath+0xf2/0x130
++ [<ffffffff80647b70>] _spin_unlock+0x10/0x30
++ [<ffffffff80537e75>] usb_hcd_link_urb_to_ep+0x75/0xc0
++ [<ffffffff80647c22>] _spin_unlock_irqrestore+0x12/0x40
++ [<ffffffff8055347f>] ohci_urb_enqueue+0x19f/0x7c0
++ [<ffffffff80252f96>] queue_work+0x56/0x60
++ [<ffffffff80237e10>] enqueue_task_fair+0x20/0x50
++ [<ffffffff80539279>] usb_hcd_submit_urb+0x379/0xbc0
++ [<ffffffff803b78c3>] cpumask_next_and+0x23/0x40
++ [<ffffffff80235177>] find_busiest_group+0x207/0x8a0
++ [<ffffffff8064784f>] _spin_lock_irqsave+0x1f/0x50
++ [<ffffffff803c7ea3>] check_unmap+0x203/0x490
++ [<ffffffff803c8259>] debug_dma_unmap_page+0x49/0x50
++ [<ffffffff80485f26>] nv_tx_done_optimized+0xc6/0x2c0
++ [<ffffffff80486c13>] nv_nic_irq_optimized+0x73/0x2b0
++ [<ffffffff8026df84>] handle_IRQ_event+0x34/0x70
++ [<ffffffff8026ffe9>] handle_edge_irq+0xc9/0x150
++ [<ffffffff8020e3ab>] do_IRQ+0xcb/0x1c0
++ [<ffffffff8020c093>] ret_from_intr+0x0/0xa
++ <EOI> <4>---[ end trace f6435a98e2a38c0e ]---
++
++The driver developer can find the driver and the device including a stacktrace
++of the DMA-API call which caused this warning.
++
++Per default only the first error will result in a warning message. All other
++errors will only silently counted. This limitation exist to prevent the code
++from flooding your kernel log. To support debugging a device driver this can
++be disabled via debugfs. See the debugfs interface documentation below for
++details.
++
++The debugfs directory for the DMA-API debugging code is called dma-api/. In
++this directory the following files can currently be found:
++
++	dma-api/all_errors	This file contains a numeric value. If this
++				value is not equal to zero the debugging code
++				will print a warning for every error it finds
++				into the kernel log. Be carefull with this
++				option. It can easily flood your logs.
++
++	dma-api/disabled	This read-only file contains the character 'Y'
++				if the debugging code is disabled. This can
++				happen when it runs out of memory or if it was
++				disabled at boot time
++
++	dma-api/error_count	This file is read-only and shows the total
++				numbers of errors found.
++
++	dma-api/num_errors	The number in this file shows how many
++				warnings will be printed to the kernel log
++				before it stops. This number is initialized to
++				one at system boot and be set by writing into
++				this file
++
++	dma-api/min_free_entries
++				This read-only file can be read to get the
++				minimum number of free dma_debug_entries the
++				allocator has ever seen. If this value goes
++				down to zero the code will disable itself
++				because it is not longer reliable.
++
++	dma-api/num_free_entries
++				The current number of free dma_debug_entries
++				in the allocator.
++
++If you have this code compiled into your kernel it will be enabled by default.
++If you want to boot without the bookkeeping anyway you can provide
++'dma_debug=off' as a boot parameter. This will disable DMA-API debugging.
++Notice that you can not enable it again at runtime. You have to reboot to do
++so.
++
++When the code disables itself at runtime this is most likely because it ran
++out of dma_debug_entries. These entries are preallocated at boot. The number
++of preallocated entries is defined per architecture. If it is too low for you
++boot with 'dma_debug_entries=<your_desired_number>' to overwrite the
++architectural default.
+Index: linux-2.6-tip/Documentation/DocBook/genericirq.tmpl
+===================================================================
+--- linux-2.6-tip.orig/Documentation/DocBook/genericirq.tmpl
++++ linux-2.6-tip/Documentation/DocBook/genericirq.tmpl
+@@ -440,6 +440,7 @@ desc->chip->end();
+      used in the generic IRQ layer.
+      </para>
+ !Iinclude/linux/irq.h
++!Iinclude/linux/interrupt.h
+   </chapter>
+ 
+   <chapter id="pubfunctions">
+Index: linux-2.6-tip/Documentation/cputopology.txt
+===================================================================
+--- linux-2.6-tip.orig/Documentation/cputopology.txt
++++ linux-2.6-tip/Documentation/cputopology.txt
+@@ -18,11 +18,11 @@ For an architecture to support this feat
+ these macros in include/asm-XXX/topology.h:
+ #define topology_physical_package_id(cpu)
+ #define topology_core_id(cpu)
+-#define topology_thread_siblings(cpu)
+-#define topology_core_siblings(cpu)
++#define topology_thread_cpumask(cpu)
++#define topology_core_cpumask(cpu)
+ 
+ The type of **_id is int.
+-The type of siblings is cpumask_t.
++The type of siblings is (const) struct cpumask *.
+ 
+ To be consistent on all architectures, include/linux/topology.h
+ provides default definitions for any of the above macros that are
+Index: linux-2.6-tip/Documentation/feature-removal-schedule.txt
+===================================================================
+--- linux-2.6-tip.orig/Documentation/feature-removal-schedule.txt
++++ linux-2.6-tip/Documentation/feature-removal-schedule.txt
+@@ -344,3 +344,20 @@ Why:	See commits 129f8ae9b1b5be94517da76
+ 	Removal is subject to fixing any remaining bugs in ACPI which may
+ 	cause the thermal throttling not to happen at the right time.
+ Who:	Dave Jones <davej@redhat.com>, Matthew Garrett <mjg@redhat.com>
++
++-----------------------------
++
++What:	__do_IRQ all in one fits nothing interrupt handler
++When:	2.6.32
++Why:	__do_IRQ was kept for easy migration to the type flow handlers.
++	More than two years of migration time is enough.
++Who:	Thomas Gleixner <tglx@linutronix.de>
++
++-----------------------------
++
++What:	obsolete generic irq defines and typedefs
++When:	2.6.30
++Why:	The defines and typedefs (hw_interrupt_type, no_irq_type, irq_desc_t)
++	have been kept around for migration reasons. After more than two years
++	it's time to remove them finally
++Who:	Thomas Gleixner <tglx@linutronix.de>
+Index: linux-2.6-tip/Documentation/ftrace.txt
+===================================================================
+--- linux-2.6-tip.orig/Documentation/ftrace.txt
++++ linux-2.6-tip/Documentation/ftrace.txt
+@@ -15,31 +15,31 @@ Introduction
+ 
+ Ftrace is an internal tracer designed to help out developers and
+ designers of systems to find what is going on inside the kernel.
+-It can be used for debugging or analyzing latencies and performance
+-issues that take place outside of user-space.
++It can be used for debugging or analyzing latencies and
++performance issues that take place outside of user-space.
+ 
+ Although ftrace is the function tracer, it also includes an
+-infrastructure that allows for other types of tracing. Some of the
+-tracers that are currently in ftrace include a tracer to trace
+-context switches, the time it takes for a high priority task to
+-run after it was woken up, the time interrupts are disabled, and
+-more (ftrace allows for tracer plugins, which means that the list of
+-tracers can always grow).
++infrastructure that allows for other types of tracing. Some of
++the tracers that are currently in ftrace include a tracer to
++trace context switches, the time it takes for a high priority
++task to run after it was woken up, the time interrupts are
++disabled, and more (ftrace allows for tracer plugins, which
++means that the list of tracers can always grow).
+ 
+ 
+ The File System
+ ---------------
+ 
+-Ftrace uses the debugfs file system to hold the control files as well
+-as the files to display output.
++Ftrace uses the debugfs file system to hold the control files as
++well as the files to display output.
+ 
+ To mount the debugfs system:
+ 
+   # mkdir /debug
+   # mount -t debugfs nodev /debug
+ 
+-(Note: it is more common to mount at /sys/kernel/debug, but for simplicity
+- this document will use /debug)
++( Note: it is more common to mount at /sys/kernel/debug, but for
++  simplicity this document will use /debug)
+ 
+ That's it! (assuming that you have ftrace configured into your kernel)
+ 
+@@ -50,90 +50,124 @@ of ftrace. Here is a list of some of the
+ 
+  Note: all time values are in microseconds.
+ 
+-  current_tracer: This is used to set or display the current tracer
+-		that is configured.
++  current_tracer:
+ 
+-  available_tracers: This holds the different types of tracers that
+-		have been compiled into the kernel. The tracers
+-		listed here can be configured by echoing their name
+-		into current_tracer.
+-
+-  tracing_enabled: This sets or displays whether the current_tracer
+-		is activated and tracing or not. Echo 0 into this
+-		file to disable the tracer or 1 to enable it.
+-
+-  trace: This file holds the output of the trace in a human readable
+-		format (described below).
+-
+-  latency_trace: This file shows the same trace but the information
+-		is organized more to display possible latencies
+-		in the system (described below).
+-
+-  trace_pipe: The output is the same as the "trace" file but this
+-		file is meant to be streamed with live tracing.
+-		Reads from this file will block until new data
+-		is retrieved. Unlike the "trace" and "latency_trace"
+-		files, this file is a consumer. This means reading
+-		from this file causes sequential reads to display
+-		more current data. Once data is read from this
+-		file, it is consumed, and will not be read
+-		again with a sequential read. The "trace" and
+-		"latency_trace" files are static, and if the
+-		tracer is not adding more data, they will display
+-		the same information every time they are read.
+-
+-  trace_options: This file lets the user control the amount of data
+-		that is displayed in one of the above output
+-		files.
+-
+-  trace_max_latency: Some of the tracers record the max latency.
+-		For example, the time interrupts are disabled.
+-		This time is saved in this file. The max trace
+-		will also be stored, and displayed by either
+-		"trace" or "latency_trace".  A new max trace will
+-		only be recorded if the latency is greater than
+-		the value in this file. (in microseconds)
+-
+-  buffer_size_kb: This sets or displays the number of kilobytes each CPU
+-		buffer can hold. The tracer buffers are the same size
+-		for each CPU. The displayed number is the size of the
+-		CPU buffer and not total size of all buffers. The
+-		trace buffers are allocated in pages (blocks of memory
+-		that the kernel uses for allocation, usually 4 KB in size).
+-		If the last page allocated has room for more bytes
+-		than requested, the rest of the page will be used,
+-		making the actual allocation bigger than requested.
+-		(Note, the size may not be a multiple of the page size due
+-		to buffer managment overhead.)
+-
+-		This can only be updated when the current_tracer
+-		is set to "nop".
+-
+-  tracing_cpumask: This is a mask that lets the user only trace
+-		on specified CPUS. The format is a hex string
+-		representing the CPUS.
+-
+-  set_ftrace_filter: When dynamic ftrace is configured in (see the
+-		section below "dynamic ftrace"), the code is dynamically
+-		modified (code text rewrite) to disable calling of the
+-		function profiler (mcount). This lets tracing be configured
+-		in with practically no overhead in performance.  This also
+-		has a side effect of enabling or disabling specific functions
+-		to be traced. Echoing names of functions into this file
+-		will limit the trace to only those functions.
+-
+-  set_ftrace_notrace: This has an effect opposite to that of
+-		set_ftrace_filter. Any function that is added here will not
+-		be traced. If a function exists in both set_ftrace_filter
+-		and set_ftrace_notrace,	the function will _not_ be traced.
+-
+-  set_ftrace_pid: Have the function tracer only trace a single thread.
+-
+-  available_filter_functions: This lists the functions that ftrace
+-		has processed and can trace. These are the function
+-		names that you can pass to "set_ftrace_filter" or
+-		"set_ftrace_notrace". (See the section "dynamic ftrace"
+-		below for more details.)
++	This is used to set or display the current tracer
++	that is configured.
++
++  available_tracers:
++
++	This holds the different types of tracers that
++	have been compiled into the kernel. The
++	tracers listed here can be configured by
++	echoing their name into current_tracer.
++
++  tracing_enabled:
++
++	This sets or displays whether the current_tracer
++	is activated and tracing or not. Echo 0 into this
++	file to disable the tracer or 1 to enable it.
++
++  trace:
++
++	This file holds the output of the trace in a human
++	readable format (described below).
++
++  latency_trace:
++
++	This file shows the same trace but the information
++	is organized more to display possible latencies
++	in the system (described below).
++
++  trace_pipe:
++
++	The output is the same as the "trace" file but this
++	file is meant to be streamed with live tracing.
++	Reads from this file will block until new data
++	is retrieved. Unlike the "trace" and "latency_trace"
++	files, this file is a consumer. This means reading
++	from this file causes sequential reads to display
++	more current data. Once data is read from this
++	file, it is consumed, and will not be read
++	again with a sequential read. The "trace" and
++	"latency_trace" files are static, and if the
++	tracer is not adding more data, they will display
++	the same information every time they are read.
++
++  trace_options:
++
++	This file lets the user control the amount of data
++	that is displayed in one of the above output
++	files.
++
++  tracing_max_latency:
++
++	Some of the tracers record the max latency.
++	For example, the time interrupts are disabled.
++	This time is saved in this file. The max trace
++	will also be stored, and displayed by either
++	"trace" or "latency_trace".  A new max trace will
++	only be recorded if the latency is greater than
++	the value in this file. (in microseconds)
++
++  buffer_size_kb:
++
++	This sets or displays the number of kilobytes each CPU
++	buffer can hold. The tracer buffers are the same size
++	for each CPU. The displayed number is the size of the
++	CPU buffer and not total size of all buffers. The
++	trace buffers are allocated in pages (blocks of memory
++	that the kernel uses for allocation, usually 4 KB in size).
++	If the last page allocated has room for more bytes
++	than requested, the rest of the page will be used,
++	making the actual allocation bigger than requested.
++	( Note, the size may not be a multiple of the page size
++	  due to buffer managment overhead. )
++
++	This can only be updated when the current_tracer
++	is set to "nop".
++
++  tracing_cpumask:
++
++	This is a mask that lets the user only trace
++	on specified CPUS. The format is a hex string
++	representing the CPUS.
++
++  set_ftrace_filter:
++
++	When dynamic ftrace is configured in (see the
++	section below "dynamic ftrace"), the code is dynamically
++	modified (code text rewrite) to disable calling of the
++	function profiler (mcount). This lets tracing be configured
++	in with practically no overhead in performance.  This also
++	has a side effect of enabling or disabling specific functions
++	to be traced. Echoing names of functions into this file
++	will limit the trace to only those functions.
++
++  set_ftrace_notrace:
++
++	This has an effect opposite to that of
++	set_ftrace_filter. Any function that is added here will not
++	be traced. If a function exists in both set_ftrace_filter
++	and set_ftrace_notrace,	the function will _not_ be traced.
++
++  set_ftrace_pid:
++
++	Have the function tracer only trace a single thread.
++
++  set_graph_function:
++
++	Set a "trigger" function where tracing should start
++	with the function graph tracer (See the section
++	"dynamic ftrace" for more details).
++
++  available_filter_functions:
++
++	This lists the functions that ftrace
++	has processed and can trace. These are the function
++	names that you can pass to "set_ftrace_filter" or
++	"set_ftrace_notrace". (See the section "dynamic ftrace"
++	below for more details.)
+ 
+ 
+ The Tracers
+@@ -141,36 +175,66 @@ The Tracers
+ 
+ Here is the list of current tracers that may be configured.
+ 
+-  function - function tracer that uses mcount to trace all functions.
++  "function"
++
++	Function call tracer to trace all kernel functions.
++
++  "function_graph_tracer"
++
++	Similar to the function tracer except that the
++	function tracer probes the functions on their entry
++	whereas the function graph tracer traces on both entry
++	and exit of the functions. It then provides the ability
++	to draw a graph of function calls similar to C code
++	source.
++
++  "sched_switch"
++
++	Traces the context switches and wakeups between tasks.
++
++  "irqsoff"
++
++	Traces the areas that disable interrupts and saves
++	the trace with the longest max latency.
++	See tracing_max_latency. When a new max is recorded,
++	it replaces the old trace. It is best to view this
++	trace via the latency_trace file.
++
++  "preemptoff"
++
++	Similar to irqsoff but traces and records the amount of
++	time for which preemption is disabled.
+ 
+-  sched_switch - traces the context switches between tasks.
++  "preemptirqsoff"
+ 
+-  irqsoff - traces the areas that disable interrupts and saves
+-  		the trace with the longest max latency.
+-		See tracing_max_latency.  When a new max is recorded,
+-		it replaces the old trace. It is best to view this
+-		trace via the latency_trace file.
++	Similar to irqsoff and preemptoff, but traces and
++	records the largest time for which irqs and/or preemption
++	is disabled.
+ 
+-  preemptoff - Similar to irqsoff but traces and records the amount of
+-		time for which preemption is disabled.
++  "wakeup"
+ 
+-  preemptirqsoff - Similar to irqsoff and preemptoff, but traces and
+-		 records the largest time for which irqs and/or preemption
+-		 is disabled.
++	Traces and records the max latency that it takes for
++	the highest priority task to get scheduled after
++	it has been woken up.
+ 
+-  wakeup - Traces and records the max latency that it takes for
+-		the highest priority task to get scheduled after
+-		it has been woken up.
++  "hw-branch-tracer"
+ 
+-  nop - This is not a tracer. To remove all tracers from tracing
+-		simply echo "nop" into current_tracer.
++	Uses the BTS CPU feature on x86 CPUs to traces all
++	branches executed.
++
++  "nop"
++
++	This is the "trace nothing" tracer. To remove all
++	tracers from tracing simply echo "nop" into
++	current_tracer.
+ 
+ 
+ Examples of using the tracer
+ ----------------------------
+ 
+-Here are typical examples of using the tracers when controlling them only
+-with the debugfs interface (without using any user-land utilities).
++Here are typical examples of using the tracers when controlling
++them only with the debugfs interface (without using any
++user-land utilities).
+ 
+ Output format:
+ --------------
+@@ -187,16 +251,16 @@ Here is an example of the output format 
+             bash-4251  [01] 10152.583855: _atomic_dec_and_lock <-dput
+                              --------
+ 
+-A header is printed with the tracer name that is represented by the trace.
+-In this case the tracer is "function". Then a header showing the format. Task
+-name "bash", the task PID "4251", the CPU that it was running on
+-"01", the timestamp in <secs>.<usecs> format, the function name that was
+-traced "path_put" and the parent function that called this function
+-"path_walk". The timestamp is the time at which the function was
+-entered.
++A header is printed with the tracer name that is represented by
++the trace. In this case the tracer is "function". Then a header
++showing the format. Task name "bash", the task PID "4251", the
++CPU that it was running on "01", the timestamp in <secs>.<usecs>
++format, the function name that was traced "path_put" and the
++parent function that called this function "path_walk". The
++timestamp is the time at which the function was entered.
+ 
+-The sched_switch tracer also includes tracing of task wakeups and
+-context switches.
++The sched_switch tracer also includes tracing of task wakeups
++and context switches.
+ 
+      ksoftirqd/1-7     [01]  1453.070013:      7:115:R   +  2916:115:S
+      ksoftirqd/1-7     [01]  1453.070013:      7:115:R   +    10:115:S
+@@ -205,8 +269,8 @@ context switches.
+      kondemand/1-2916  [01]  1453.070013:   2916:115:S ==>     7:115:R
+      ksoftirqd/1-7     [01]  1453.070013:      7:115:S ==>     0:140:R
+ 
+-Wake ups are represented by a "+" and the context switches are shown as
+-"==>".  The format is:
++Wake ups are represented by a "+" and the context switches are
++shown as "==>".  The format is:
+ 
+  Context switches:
+ 
+@@ -220,19 +284,20 @@ Wake ups are represented by a "+" and th
+ 
+   <pid>:<prio>:<state>    +  <pid>:<prio>:<state>
+ 
+-The prio is the internal kernel priority, which is the inverse of the
+-priority that is usually displayed by user-space tools. Zero represents
+-the highest priority (99). Prio 100 starts the "nice" priorities with
+-100 being equal to nice -20 and 139 being nice 19. The prio "140" is
+-reserved for the idle task which is the lowest priority thread (pid 0).
++The prio is the internal kernel priority, which is the inverse
++of the priority that is usually displayed by user-space tools.
++Zero represents the highest priority (99). Prio 100 starts the
++"nice" priorities with 100 being equal to nice -20 and 139 being
++nice 19. The prio "140" is reserved for the idle task which is
++the lowest priority thread (pid 0).
+ 
+ 
+ Latency trace format
+ --------------------
+ 
+-For traces that display latency times, the latency_trace file gives
+-somewhat more information to see why a latency happened. Here is a typical
+-trace.
++For traces that display latency times, the latency_trace file
++gives somewhat more information to see why a latency happened.
++Here is a typical trace.
+ 
+ # tracer: irqsoff
+ #
+@@ -259,20 +324,20 @@ irqsoff latency trace v1.1.5 on 2.6.26-r
+   <idle>-0     0d.s1   98us : trace_hardirqs_on (do_softirq)
+ 
+ 
++This shows that the current tracer is "irqsoff" tracing the time
++for which interrupts were disabled. It gives the trace version
++and the version of the kernel upon which this was executed on
++(2.6.26-rc8). Then it displays the max latency in microsecs (97
++us). The number of trace entries displayed and the total number
++recorded (both are three: #3/3). The type of preemption that was
++used (PREEMPT). VP, KP, SP, and HP are always zero and are
++reserved for later use. #P is the number of online CPUS (#P:2).
+ 
+-This shows that the current tracer is "irqsoff" tracing the time for which
+-interrupts were disabled. It gives the trace version and the version
+-of the kernel upon which this was executed on (2.6.26-rc8). Then it displays
+-the max latency in microsecs (97 us). The number of trace entries displayed
+-and the total number recorded (both are three: #3/3). The type of
+-preemption that was used (PREEMPT). VP, KP, SP, and HP are always zero
+-and are reserved for later use. #P is the number of online CPUS (#P:2).
+-
+-The task is the process that was running when the latency occurred.
+-(swapper pid: 0).
++The task is the process that was running when the latency
++occurred. (swapper pid: 0).
+ 
+-The start and stop (the functions in which the interrupts were disabled and
+-enabled respectively) that caused the latencies:
++The start and stop (the functions in which the interrupts were
++disabled and enabled respectively) that caused the latencies:
+ 
+   apic_timer_interrupt is where the interrupts were disabled.
+   do_softirq is where they were enabled again.
+@@ -308,12 +373,12 @@ The above is mostly meaningful for kerne
+ 	latency_trace file is relative to the start of the trace.
+ 
+   delay: This is just to help catch your eye a bit better. And
+-	needs to be fixed to be only relative to the same CPU.
+-	The marks are determined by the difference between this
+-	current trace and the next trace.
+-	 '!' - greater than preempt_mark_thresh (default 100)
+-	 '+' - greater than 1 microsecond
+-	 ' ' - less than or equal to 1 microsecond.
++	 needs to be fixed to be only relative to the same CPU.
++	 The marks are determined by the difference between this
++	 current trace and the next trace.
++	  '!' - greater than preempt_mark_thresh (default 100)
++	  '+' - greater than 1 microsecond
++	  ' ' - less than or equal to 1 microsecond.
+ 
+   The rest is the same as the 'trace' file.
+ 
+@@ -321,14 +386,15 @@ The above is mostly meaningful for kerne
+ trace_options
+ -------------
+ 
+-The trace_options file is used to control what gets printed in the trace
+-output. To see what is available, simply cat the file:
++The trace_options file is used to control what gets printed in
++the trace output. To see what is available, simply cat the file:
+ 
+   cat /debug/tracing/trace_options
+   print-parent nosym-offset nosym-addr noverbose noraw nohex nobin \
+- noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
++  noblock nostacktrace nosched-tree nouserstacktrace nosym-userobj
+ 
+-To disable one of the options, echo in the option prepended with "no".
++To disable one of the options, echo in the option prepended with
++"no".
+ 
+   echo noprint-parent > /debug/tracing/trace_options
+ 
+@@ -338,8 +404,8 @@ To enable an option, leave off the "no".
+ 
+ Here are the available options:
+ 
+-  print-parent - On function traces, display the calling function
+-		as well as the function being traced.
++  print-parent - On function traces, display the calling (parent)
++		 function as well as the function being traced.
+ 
+   print-parent:
+    bash-4000  [01]  1477.606694: simple_strtoul <-strict_strtoul
+@@ -348,15 +414,16 @@ Here are the available options:
+    bash-4000  [01]  1477.606694: simple_strtoul
+ 
+ 
+-  sym-offset - Display not only the function name, but also the offset
+-		in the function. For example, instead of seeing just
+-		"ktime_get", you will see "ktime_get+0xb/0x20".
++  sym-offset - Display not only the function name, but also the
++	       offset in the function. For example, instead of
++	       seeing just "ktime_get", you will see
++	       "ktime_get+0xb/0x20".
+ 
+   sym-offset:
+    bash-4000  [01]  1477.606694: simple_strtoul+0x6/0xa0
+ 
+-  sym-addr - this will also display the function address as well as
+-		the function name.
++  sym-addr - this will also display the function address as well
++	     as the function name.
+ 
+   sym-addr:
+    bash-4000  [01]  1477.606694: simple_strtoul <c0339346>
+@@ -366,35 +433,41 @@ Here are the available options:
+     bash  4000 1 0 00000000 00010a95 [58127d26] 1720.415ms \
+     (+0.000ms): simple_strtoul (strict_strtoul)
+ 
+-  raw - This will display raw numbers. This option is best for use with
+-	user applications that can translate the raw numbers better than
+-	having it done in the kernel.
++  raw - This will display raw numbers. This option is best for
++	use with user applications that can translate the raw
++	numbers better than having it done in the kernel.
+ 
+-  hex - Similar to raw, but the numbers will be in a hexadecimal format.
++  hex - Similar to raw, but the numbers will be in a hexadecimal
++	format.
+ 
+   bin - This will print out the formats in raw binary.
+ 
+   block - TBD (needs update)
+ 
+-  stacktrace - This is one of the options that changes the trace itself.
+-		When a trace is recorded, so is the stack of functions.
+-		This allows for back traces of trace sites.
+-
+-  userstacktrace - This option changes the trace.
+-		   It records a stacktrace of the current userspace thread.
+-
+-  sym-userobj - when user stacktrace are enabled, look up which object the
+-		address belongs to, and print a relative address
+-		This is especially useful when ASLR is on, otherwise you don't
+-		get a chance to resolve the address to object/file/line after the app is no
+-		longer running
++  stacktrace - This is one of the options that changes the trace
++	       itself. When a trace is recorded, so is the stack
++	       of functions. This allows for back traces of
++	       trace sites.
++
++  userstacktrace - This option changes the trace. It records a
++		   stacktrace of the current userspace thread.
++
++  sym-userobj - when user stacktrace are enabled, look up which
++		object the address belongs to, and print a
++		relative address. This is especially useful when
++		ASLR is on, otherwise you don't get a chance to
++		resolve the address to object/file/line after
++		the app is no longer running
+ 
+-		The lookup is performed when you read trace,trace_pipe,latency_trace. Example:
++		The lookup is performed when you read
++		trace,trace_pipe,latency_trace. Example:
+ 
+ 		a.out-1623  [000] 40874.465068: /root/a.out[+0x480] <-/root/a.out[+0
+ x494] <- /root/a.out[+0x4a8] <- /lib/libc-2.7.so[+0x1e1a6]
+ 
+-  sched-tree - TBD (any users??)
++  sched-tree - trace all tasks that are on the runqueue, at
++	       every scheduling event. Will add overhead if
++	       there's a lot of tasks running at once.
+ 
+ 
+ sched_switch
+@@ -431,18 +504,19 @@ of how to use it.
+  [...]
+ 
+ 
+-As we have discussed previously about this format, the header shows
+-the name of the trace and points to the options. The "FUNCTION"
+-is a misnomer since here it represents the wake ups and context
+-switches.
+-
+-The sched_switch file only lists the wake ups (represented with '+')
+-and context switches ('==>') with the previous task or current task
+-first followed by the next task or task waking up. The format for both
+-of these is PID:KERNEL-PRIO:TASK-STATE. Remember that the KERNEL-PRIO
+-is the inverse of the actual priority with zero (0) being the highest
+-priority and the nice values starting at 100 (nice -20). Below is
+-a quick chart to map the kernel priority to user land priorities.
++As we have discussed previously about this format, the header
++shows the name of the trace and points to the options. The
++"FUNCTION" is a misnomer since here it represents the wake ups
++and context switches.
++
++The sched_switch file only lists the wake ups (represented with
++'+') and context switches ('==>') with the previous task or
++current task first followed by the next task or task waking up.
++The format for both of these is PID:KERNEL-PRIO:TASK-STATE.
++Remember that the KERNEL-PRIO is the inverse of the actual
++priority with zero (0) being the highest priority and the nice
++values starting at 100 (nice -20). Below is a quick chart to map
++the kernel priority to user land priorities.
+ 
+   Kernel priority: 0 to 99    ==> user RT priority 99 to 0
+   Kernel priority: 100 to 139 ==> user nice -20 to 19
+@@ -463,10 +537,10 @@ The task states are:
+ ftrace_enabled
+ --------------
+ 
+-The following tracers (listed below) give different output depending
+-on whether or not the sysctl ftrace_enabled is set. To set ftrace_enabled,
+-one can either use the sysctl function or set it via the proc
+-file system interface.
++The following tracers (listed below) give different output
++depending on whether or not the sysctl ftrace_enabled is set. To
++set ftrace_enabled, one can either use the sysctl function or
++set it via the proc file system interface.
+ 
+   sysctl kernel.ftrace_enabled=1
+ 
+@@ -474,12 +548,12 @@ file system interface.
+ 
+   echo 1 > /proc/sys/kernel/ftrace_enabled
+ 
+-To disable ftrace_enabled simply replace the '1' with '0' in
+-the above commands.
++To disable ftrace_enabled simply replace the '1' with '0' in the
++above commands.
+ 
+-When ftrace_enabled is set the tracers will also record the functions
+-that are within the trace. The descriptions of the tracers
+-will also show an example with ftrace enabled.
++When ftrace_enabled is set the tracers will also record the
++functions that are within the trace. The descriptions of the
++tracers will also show an example with ftrace enabled.
+ 
+ 
+ irqsoff
+@@ -487,17 +561,18 @@ irqsoff
+ 
+ When interrupts are disabled, the CPU can not react to any other
+ external event (besides NMIs and SMIs). This prevents the timer
+-interrupt from triggering or the mouse interrupt from letting the
+-kernel know of a new mouse event. The result is a latency with the
+-reaction time.
+-
+-The irqsoff tracer tracks the time for which interrupts are disabled.
+-When a new maximum latency is hit, the tracer saves the trace leading up
+-to that latency point so that every time a new maximum is reached, the old
+-saved trace is discarded and the new trace is saved.
++interrupt from triggering or the mouse interrupt from letting
++the kernel know of a new mouse event. The result is a latency
++with the reaction time.
++
++The irqsoff tracer tracks the time for which interrupts are
++disabled. When a new maximum latency is hit, the tracer saves
++the trace leading up to that latency point so that every time a
++new maximum is reached, the old saved trace is discarded and the
++new trace is saved.
+ 
+-To reset the maximum, echo 0 into tracing_max_latency. Here is an
+-example:
++To reset the maximum, echo 0 into tracing_max_latency. Here is
++an example:
+ 
+  # echo irqsoff > /debug/tracing/current_tracer
+  # echo 0 > /debug/tracing/tracing_max_latency
+@@ -532,10 +607,11 @@ irqsoff latency trace v1.1.5 on 2.6.26
+ 
+ 
+ Here we see that that we had a latency of 12 microsecs (which is
+-very good). The _write_lock_irq in sys_setpgid disabled interrupts.
+-The difference between the 12 and the displayed timestamp 14us occurred
+-because the clock was incremented between the time of recording the max
+-latency and the time of recording the function that had that latency.
++very good). The _write_lock_irq in sys_setpgid disabled
++interrupts. The difference between the 12 and the displayed
++timestamp 14us occurred because the clock was incremented
++between the time of recording the max latency and the time of
++recording the function that had that latency.
+ 
+ Note the above example had ftrace_enabled not set. If we set the
+ ftrace_enabled, we get a much larger output:
+@@ -586,24 +662,24 @@ irqsoff latency trace v1.1.5 on 2.6.26-r
+ 
+ 
+ Here we traced a 50 microsecond latency. But we also see all the
+-functions that were called during that time. Note that by enabling
+-function tracing, we incur an added overhead. This overhead may
+-extend the latency times. But nevertheless, this trace has provided
+-some very helpful debugging information.
++functions that were called during that time. Note that by
++enabling function tracing, we incur an added overhead. This
++overhead may extend the latency times. But nevertheless, this
++trace has provided some very helpful debugging information.
+ 
+ 
+ preemptoff
+ ----------
+ 
+-When preemption is disabled, we may be able to receive interrupts but
+-the task cannot be preempted and a higher priority task must wait
+-for preemption to be enabled again before it can preempt a lower
+-priority task.
++When preemption is disabled, we may be able to receive
++interrupts but the task cannot be preempted and a higher
++priority task must wait for preemption to be enabled again
++before it can preempt a lower priority task.
+ 
+ The preemptoff tracer traces the places that disable preemption.
+-Like the irqsoff tracer, it records the maximum latency for which preemption
+-was disabled. The control of preemptoff tracer is much like the irqsoff
+-tracer.
++Like the irqsoff tracer, it records the maximum latency for
++which preemption was disabled. The control of preemptoff tracer
++is much like the irqsoff tracer.
+ 
+  # echo preemptoff > /debug/tracing/current_tracer
+  # echo 0 > /debug/tracing/tracing_max_latency
+@@ -637,11 +713,12 @@ preemptoff latency trace v1.1.5 on 2.6.2
+     sshd-4261  0d.s1   30us : trace_preempt_on (__do_softirq)
+ 
+ 
+-This has some more changes. Preemption was disabled when an interrupt
+-came in (notice the 'h'), and was enabled while doing a softirq.
+-(notice the 's'). But we also see that interrupts have been disabled
+-when entering the preempt off section and leaving it (the 'd').
+-We do not know if interrupts were enabled in the mean time.
++This has some more changes. Preemption was disabled when an
++interrupt came in (notice the 'h'), and was enabled while doing
++a softirq. (notice the 's'). But we also see that interrupts
++have been disabled when entering the preempt off section and
++leaving it (the 'd'). We do not know if interrupts were enabled
++in the mean time.
+ 
+ # tracer: preemptoff
+ #
+@@ -700,28 +777,30 @@ preemptoff latency trace v1.1.5 on 2.6.2
+     sshd-4261  0d.s1   64us : trace_preempt_on (__do_softirq)
+ 
+ 
+-The above is an example of the preemptoff trace with ftrace_enabled
+-set. Here we see that interrupts were disabled the entire time.
+-The irq_enter code lets us know that we entered an interrupt 'h'.
+-Before that, the functions being traced still show that it is not
+-in an interrupt, but we can see from the functions themselves that
+-this is not the case.
+-
+-Notice that __do_softirq when called does not have a preempt_count.
+-It may seem that we missed a preempt enabling. What really happened
+-is that the preempt count is held on the thread's stack and we
+-switched to the softirq stack (4K stacks in effect). The code
+-does not copy the preempt count, but because interrupts are disabled,
+-we do not need to worry about it. Having a tracer like this is good
+-for letting people know what really happens inside the kernel.
++The above is an example of the preemptoff trace with
++ftrace_enabled set. Here we see that interrupts were disabled
++the entire time. The irq_enter code lets us know that we entered
++an interrupt 'h'. Before that, the functions being traced still
++show that it is not in an interrupt, but we can see from the
++functions themselves that this is not the case.
++
++Notice that __do_softirq when called does not have a
++preempt_count. It may seem that we missed a preempt enabling.
++What really happened is that the preempt count is held on the
++thread's stack and we switched to the softirq stack (4K stacks
++in effect). The code does not copy the preempt count, but
++because interrupts are disabled, we do not need to worry about
++it. Having a tracer like this is good for letting people know
++what really happens inside the kernel.
+ 
+ 
+ preemptirqsoff
+ --------------
+ 
+-Knowing the locations that have interrupts disabled or preemption
+-disabled for the longest times is helpful. But sometimes we would
+-like to know when either preemption and/or interrupts are disabled.
++Knowing the locations that have interrupts disabled or
++preemption disabled for the longest times is helpful. But
++sometimes we would like to know when either preemption and/or
++interrupts are disabled.
+ 
+ Consider the following code:
+ 
+@@ -741,11 +820,13 @@ The preemptoff tracer will record the to
+ call_function_with_irqs_and_preemption_off() and
+ call_function_with_preemption_off().
+ 
+-But neither will trace the time that interrupts and/or preemption
+-is disabled. This total time is the time that we can not schedule.
+-To record this time, use the preemptirqsoff tracer.
++But neither will trace the time that interrupts and/or
++preemption is disabled. This total time is the time that we can
++not schedule. To record this time, use the preemptirqsoff
++tracer.
+ 
+-Again, using this trace is much like the irqsoff and preemptoff tracers.
++Again, using this trace is much like the irqsoff and preemptoff
++tracers.
+ 
+  # echo preemptirqsoff > /debug/tracing/current_tracer
+  # echo 0 > /debug/tracing/tracing_max_latency
+@@ -781,9 +862,10 @@ preemptirqsoff latency trace v1.1.5 on 2
+ 
+ 
+ The trace_hardirqs_off_thunk is called from assembly on x86 when
+-interrupts are disabled in the assembly code. Without the function
+-tracing, we do not know if interrupts were enabled within the preemption
+-points. We do see that it started with preemption enabled.
++interrupts are disabled in the assembly code. Without the
++function tracing, we do not know if interrupts were enabled
++within the preemption points. We do see that it started with
++preemption enabled.
+ 
+ Here is a trace with ftrace_enabled set:
+ 
+@@ -871,40 +953,42 @@ preemptirqsoff latency trace v1.1.5 on 2
+     sshd-4261  0d.s1  105us : trace_preempt_on (__do_softirq)
+ 
+ 
+-This is a very interesting trace. It started with the preemption of
+-the ls task. We see that the task had the "need_resched" bit set
+-via the 'N' in the trace.  Interrupts were disabled before the spin_lock
+-at the beginning of the trace. We see that a schedule took place to run
+-sshd.  When the interrupts were enabled, we took an interrupt.
+-On return from the interrupt handler, the softirq ran. We took another
+-interrupt while running the softirq as we see from the capital 'H'.
++This is a very interesting trace. It started with the preemption
++of the ls task. We see that the task had the "need_resched" bit
++set via the 'N' in the trace.  Interrupts were disabled before
++the spin_lock at the beginning of the trace. We see that a
++schedule took place to run sshd.  When the interrupts were
++enabled, we took an interrupt. On return from the interrupt
++handler, the softirq ran. We took another interrupt while
++running the softirq as we see from the capital 'H'.
+ 
+ 
+ wakeup
+ ------
+ 
+-In a Real-Time environment it is very important to know the wakeup
+-time it takes for the highest priority task that is woken up to the
+-time that it executes. This is also known as "schedule latency".
+-I stress the point that this is about RT tasks. It is also important
+-to know the scheduling latency of non-RT tasks, but the average
+-schedule latency is better for non-RT tasks. Tools like
+-LatencyTop are more appropriate for such measurements.
++In a Real-Time environment it is very important to know the
++wakeup time it takes for the highest priority task that is woken
++up to the time that it executes. This is also known as "schedule
++latency". I stress the point that this is about RT tasks. It is
++also important to know the scheduling latency of non-RT tasks,
++but the average schedule latency is better for non-RT tasks.
++Tools like LatencyTop are more appropriate for such
++measurements.
+ 
+ Real-Time environments are interested in the worst case latency.
+-That is the longest latency it takes for something to happen, and
+-not the average. We can have a very fast scheduler that may only
+-have a large latency once in a while, but that would not work well
+-with Real-Time tasks.  The wakeup tracer was designed to record
+-the worst case wakeups of RT tasks. Non-RT tasks are not recorded
+-because the tracer only records one worst case and tracing non-RT
+-tasks that are unpredictable will overwrite the worst case latency
+-of RT tasks.
+-
+-Since this tracer only deals with RT tasks, we will run this slightly
+-differently than we did with the previous tracers. Instead of performing
+-an 'ls', we will run 'sleep 1' under 'chrt' which changes the
+-priority of the task.
++That is the longest latency it takes for something to happen,
++and not the average. We can have a very fast scheduler that may
++only have a large latency once in a while, but that would not
++work well with Real-Time tasks.  The wakeup tracer was designed
++to record the worst case wakeups of RT tasks. Non-RT tasks are
++not recorded because the tracer only records one worst case and
++tracing non-RT tasks that are unpredictable will overwrite the
++worst case latency of RT tasks.
++
++Since this tracer only deals with RT tasks, we will run this
++slightly differently than we did with the previous tracers.
++Instead of performing an 'ls', we will run 'sleep 1' under
++'chrt' which changes the priority of the task.
+ 
+  # echo wakeup > /debug/tracing/current_tracer
+  # echo 0 > /debug/tracing/tracing_max_latency
+@@ -934,17 +1018,16 @@ wakeup latency trace v1.1.5 on 2.6.26-rc
+   <idle>-0     1d..4    4us : schedule (cpu_idle)
+ 
+ 
+-
+-Running this on an idle system, we see that it only took 4 microseconds
+-to perform the task switch.  Note, since the trace marker in the
+-schedule is before the actual "switch", we stop the tracing when
+-the recorded task is about to schedule in. This may change if
+-we add a new marker at the end of the scheduler.
+-
+-Notice that the recorded task is 'sleep' with the PID of 4901 and it
+-has an rt_prio of 5. This priority is user-space priority and not
+-the internal kernel priority. The policy is 1 for SCHED_FIFO and 2
+-for SCHED_RR.
++Running this on an idle system, we see that it only took 4
++microseconds to perform the task switch.  Note, since the trace
++marker in the schedule is before the actual "switch", we stop
++the tracing when the recorded task is about to schedule in. This
++may change if we add a new marker at the end of the scheduler.
++
++Notice that the recorded task is 'sleep' with the PID of 4901
++and it has an rt_prio of 5. This priority is user-space priority
++and not the internal kernel priority. The policy is 1 for
++SCHED_FIFO and 2 for SCHED_RR.
+ 
+ Doing the same with chrt -r 5 and ftrace_enabled set.
+ 
+@@ -1001,24 +1084,25 @@ ksoftirq-7     1d..6   49us : _spin_unlo
+ ksoftirq-7     1d..6   49us : sub_preempt_count (_spin_unlock)
+ ksoftirq-7     1d..4   50us : schedule (__cond_resched)
+ 
+-The interrupt went off while running ksoftirqd. This task runs at
+-SCHED_OTHER. Why did not we see the 'N' set early? This may be
+-a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K stacks
+-configured, the interrupt and softirq run with their own stack.
+-Some information is held on the top of the task's stack (need_resched
+-and preempt_count are both stored there). The setting of the NEED_RESCHED
+-bit is done directly to the task's stack, but the reading of the
+-NEED_RESCHED is done by looking at the current stack, which in this case
+-is the stack for the hard interrupt. This hides the fact that NEED_RESCHED
+-has been set. We do not see the 'N' until we switch back to the task's
++The interrupt went off while running ksoftirqd. This task runs
++at SCHED_OTHER. Why did not we see the 'N' set early? This may
++be a harmless bug with x86_32 and 4K stacks. On x86_32 with 4K
++stacks configured, the interrupt and softirq run with their own
++stack. Some information is held on the top of the task's stack
++(need_resched and preempt_count are both stored there). The
++setting of the NEED_RESCHED bit is done directly to the task's
++stack, but the reading of the NEED_RESCHED is done by looking at
++the current stack, which in this case is the stack for the hard
++interrupt. This hides the fact that NEED_RESCHED has been set.
++We do not see the 'N' until we switch back to the task's
+ assigned stack.
+ 
+ function
+ --------
+ 
+ This tracer is the function tracer. Enabling the function tracer
+-can be done from the debug file system. Make sure the ftrace_enabled is
+-set; otherwise this tracer is a nop.
++can be done from the debug file system. Make sure the
++ftrace_enabled is set; otherwise this tracer is a nop.
+ 
+  # sysctl kernel.ftrace_enabled=1
+  # echo function > /debug/tracing/current_tracer
+@@ -1048,14 +1132,15 @@ set; otherwise this tracer is a nop.
+ [...]
+ 
+ 
+-Note: function tracer uses ring buffers to store the above entries.
+-The newest data may overwrite the oldest data. Sometimes using echo to
+-stop the trace is not sufficient because the tracing could have overwritten
+-the data that you wanted to record. For this reason, it is sometimes better to
+-disable tracing directly from a program. This allows you to stop the
+-tracing at the point that you hit the part that you are interested in.
+-To disable the tracing directly from a C program, something like following
+-code snippet can be used:
++Note: function tracer uses ring buffers to store the above
++entries. The newest data may overwrite the oldest data.
++Sometimes using echo to stop the trace is not sufficient because
++the tracing could have overwritten the data that you wanted to
++record. For this reason, it is sometimes better to disable
++tracing directly from a program. This allows you to stop the
++tracing at the point that you hit the part that you are
++interested in. To disable the tracing directly from a C program,
++something like following code snippet can be used:
+ 
+ int trace_fd;
+ [...]
+@@ -1070,10 +1155,10 @@ int main(int argc, char *argv[]) {
+ }
+ 
+ Note: Here we hard coded the path name. The debugfs mount is not
+-guaranteed to be at /debug (and is more commonly at /sys/kernel/debug).
+-For simple one time traces, the above is sufficent. For anything else,
+-a search through /proc/mounts may be needed to find where the debugfs
+-file-system is mounted.
++guaranteed to be at /debug (and is more commonly at
++/sys/kernel/debug). For simple one time traces, the above is
++sufficent. For anything else, a search through /proc/mounts may
++be needed to find where the debugfs file-system is mounted.
+ 
+ 
+ Single thread tracing
+@@ -1152,49 +1237,297 @@ int main (int argc, char **argv)
+         return 0;
+ }
+ 
++
++hw-branch-tracer (x86 only)
++---------------------------
++
++This tracer uses the x86 last branch tracing hardware feature to
++collect a branch trace on all cpus with relatively low overhead.
++
++The tracer uses a fixed-size circular buffer per cpu and only
++traces ring 0 branches. The trace file dumps that buffer in the
++following format:
++
++# tracer: hw-branch-tracer
++#
++# CPU#        TO  <-  FROM
++   0  scheduler_tick+0xb5/0x1bf	  <-  task_tick_idle+0x5/0x6
++   2  run_posix_cpu_timers+0x2b/0x72a	  <-  run_posix_cpu_timers+0x25/0x72a
++   0  scheduler_tick+0x139/0x1bf	  <-  scheduler_tick+0xed/0x1bf
++   0  scheduler_tick+0x17c/0x1bf	  <-  scheduler_tick+0x148/0x1bf
++   2  run_posix_cpu_timers+0x9e/0x72a	  <-  run_posix_cpu_timers+0x5e/0x72a
++   0  scheduler_tick+0x1b6/0x1bf	  <-  scheduler_tick+0x1aa/0x1bf
++
++
++The tracer may be used to dump the trace for the oops'ing cpu on
++a kernel oops into the system log. To enable this,
++ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one
++can either use the sysctl function or set it via the proc system
++interface.
++
++  sysctl kernel.ftrace_dump_on_oops=1
++
++or
++
++  echo 1 > /proc/sys/kernel/ftrace_dump_on_oops
++
++
++Here's an example of such a dump after a null pointer
++dereference in a kernel module:
++
++[57848.105921] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
++[57848.106019] IP: [<ffffffffa0000006>] open+0x6/0x14 [oops]
++[57848.106019] PGD 2354e9067 PUD 2375e7067 PMD 0
++[57848.106019] Oops: 0002 [#1] SMP
++[57848.106019] last sysfs file: /sys/devices/pci0000:00/0000:00:1e.0/0000:20:05.0/local_cpus
++[57848.106019] Dumping ftrace buffer:
++[57848.106019] ---------------------------------
++[...]
++[57848.106019]    0  chrdev_open+0xe6/0x165	  <-  cdev_put+0x23/0x24
++[57848.106019]    0  chrdev_open+0x117/0x165	  <-  chrdev_open+0xfa/0x165
++[57848.106019]    0  chrdev_open+0x120/0x165	  <-  chrdev_open+0x11c/0x165
++[57848.106019]    0  chrdev_open+0x134/0x165	  <-  chrdev_open+0x12b/0x165
++[57848.106019]    0  open+0x0/0x14 [oops]	  <-  chrdev_open+0x144/0x165
++[57848.106019]    0  page_fault+0x0/0x30	  <-  open+0x6/0x14 [oops]
++[57848.106019]    0  error_entry+0x0/0x5b	  <-  page_fault+0x4/0x30
++[57848.106019]    0  error_kernelspace+0x0/0x31	  <-  error_entry+0x59/0x5b
++[57848.106019]    0  error_sti+0x0/0x1	  <-  error_kernelspace+0x2d/0x31
++[57848.106019]    0  page_fault+0x9/0x30	  <-  error_sti+0x0/0x1
++[57848.106019]    0  do_page_fault+0x0/0x881	  <-  page_fault+0x1a/0x30
++[...]
++[57848.106019]    0  do_page_fault+0x66b/0x881	  <-  is_prefetch+0x1ee/0x1f2
++[57848.106019]    0  do_page_fault+0x6e0/0x881	  <-  do_page_fault+0x67a/0x881
++[57848.106019]    0  oops_begin+0x0/0x96	  <-  do_page_fault+0x6e0/0x881
++[57848.106019]    0  trace_hw_branch_oops+0x0/0x2d	  <-  oops_begin+0x9/0x96
++[...]
++[57848.106019]    0  ds_suspend_bts+0x2a/0xe3	  <-  ds_suspend_bts+0x1a/0xe3
++[57848.106019] ---------------------------------
++[57848.106019] CPU 0
++[57848.106019] Modules linked in: oops
++[57848.106019] Pid: 5542, comm: cat Tainted: G        W  2.6.28 #23
++[57848.106019] RIP: 0010:[<ffffffffa0000006>]  [<ffffffffa0000006>] open+0x6/0x14 [oops]
++[57848.106019] RSP: 0018:ffff880235457d48  EFLAGS: 00010246
++[...]
++
++
++function graph tracer
++---------------------------
++
++This tracer is similar to the function tracer except that it
++probes a function on its entry and its exit. This is done by
++using a dynamically allocated stack of return addresses in each
++task_struct. On function entry the tracer overwrites the return
++address of each function traced to set a custom probe. Thus the
++original return address is stored on the stack of return address
++in the task_struct.
++
++Probing on both ends of a function leads to special features
++such as:
++
++- measure of a function's time execution
++- having a reliable call stack to draw function calls graph
++
++This tracer is useful in several situations:
++
++- you want to find the reason of a strange kernel behavior and
++  need to see what happens in detail on any areas (or specific
++  ones).
++
++- you are experiencing weird latencies but it's difficult to
++  find its origin.
++
++- you want to find quickly which path is taken by a specific
++  function
++
++- you just want to peek inside a working kernel and want to see
++  what happens there.
++
++# tracer: function_graph
++#
++# CPU  DURATION                  FUNCTION CALLS
++# |     |   |                     |   |   |   |
++
++ 0)               |  sys_open() {
++ 0)               |    do_sys_open() {
++ 0)               |      getname() {
++ 0)               |        kmem_cache_alloc() {
++ 0)   1.382 us    |          __might_sleep();
++ 0)   2.478 us    |        }
++ 0)               |        strncpy_from_user() {
++ 0)               |          might_fault() {
++ 0)   1.389 us    |            __might_sleep();
++ 0)   2.553 us    |          }
++ 0)   3.807 us    |        }
++ 0)   7.876 us    |      }
++ 0)               |      alloc_fd() {
++ 0)   0.668 us    |        _spin_lock();
++ 0)   0.570 us    |        expand_files();
++ 0)   0.586 us    |        _spin_unlock();
++
++
++There are several columns that can be dynamically
++enabled/disabled. You can use every combination of options you
++want, depending on your needs.
++
++- The cpu number on which the function executed is default
++  enabled.  It is sometimes better to only trace one cpu (see
++  tracing_cpu_mask file) or you might sometimes see unordered
++  function calls while cpu tracing switch.
++
++	hide: echo nofuncgraph-cpu > /debug/tracing/trace_options
++	show: echo funcgraph-cpu > /debug/tracing/trace_options
++
++- The duration (function's time of execution) is displayed on
++  the closing bracket line of a function or on the same line
++  than the current function in case of a leaf one. It is default
++  enabled.
++
++	hide: echo nofuncgraph-duration > /debug/tracing/trace_options
++	show: echo funcgraph-duration > /debug/tracing/trace_options
++
++- The overhead field precedes the duration field in case of
++  reached duration thresholds.
++
++	hide: echo nofuncgraph-overhead > /debug/tracing/trace_options
++	show: echo funcgraph-overhead > /debug/tracing/trace_options
++	depends on: funcgraph-duration
++
++  ie:
++
++  0)               |    up_write() {
++  0)   0.646 us    |      _spin_lock_irqsave();
++  0)   0.684 us    |      _spin_unlock_irqrestore();
++  0)   3.123 us    |    }
++  0)   0.548 us    |    fput();
++  0) + 58.628 us   |  }
++
++  [...]
++
++  0)               |      putname() {
++  0)               |        kmem_cache_free() {
++  0)   0.518 us    |          __phys_addr();
++  0)   1.757 us    |        }
++  0)   2.861 us    |      }
++  0) ! 115.305 us  |    }
++  0) ! 116.402 us  |  }
++
++  + means that the function exceeded 10 usecs.
++  ! means that the function exceeded 100 usecs.
++
++
++- The task/pid field displays the thread cmdline and pid which
++  executed the function. It is default disabled.
++
++	hide: echo nofuncgraph-proc > /debug/tracing/trace_options
++	show: echo funcgraph-proc > /debug/tracing/trace_options
++
++  ie:
++
++  # tracer: function_graph
++  #
++  # CPU  TASK/PID        DURATION                  FUNCTION CALLS
++  # |    |    |           |   |                     |   |   |   |
++  0)    sh-4802     |               |                  d_free() {
++  0)    sh-4802     |               |                    call_rcu() {
++  0)    sh-4802     |               |                      __call_rcu() {
++  0)    sh-4802     |   0.616 us    |                        rcu_process_gp_end();
++  0)    sh-4802     |   0.586 us    |                        check_for_new_grace_period();
++  0)    sh-4802     |   2.899 us    |                      }
++  0)    sh-4802     |   4.040 us    |                    }
++  0)    sh-4802     |   5.151 us    |                  }
++  0)    sh-4802     | + 49.370 us   |                }
++
++
++- The absolute time field is an absolute timestamp given by the
++  system clock since it started. A snapshot of this time is
++  given on each entry/exit of functions
++
++	hide: echo nofuncgraph-abstime > /debug/tracing/trace_options
++	show: echo funcgraph-abstime > /debug/tracing/trace_options
++
++  ie:
++
++  #
++  #      TIME       CPU  DURATION                  FUNCTION CALLS
++  #       |         |     |   |                     |   |   |   |
++  360.774522 |   1)   0.541 us    |                                          }
++  360.774522 |   1)   4.663 us    |                                        }
++  360.774523 |   1)   0.541 us    |                                        __wake_up_bit();
++  360.774524 |   1)   6.796 us    |                                      }
++  360.774524 |   1)   7.952 us    |                                    }
++  360.774525 |   1)   9.063 us    |                                  }
++  360.774525 |   1)   0.615 us    |                                  journal_mark_dirty();
++  360.774527 |   1)   0.578 us    |                                  __brelse();
++  360.774528 |   1)               |                                  reiserfs_prepare_for_journal() {
++  360.774528 |   1)               |                                    unlock_buffer() {
++  360.774529 |   1)               |                                      wake_up_bit() {
++  360.774529 |   1)               |                                        bit_waitqueue() {
++  360.774530 |   1)   0.594 us    |                                          __phys_addr();
++
++
++You can put some comments on specific functions by using
++trace_printk() For example, if you want to put a comment inside
++the __might_sleep() function, you just have to include
++<linux/ftrace.h> and call trace_printk() inside __might_sleep()
++
++trace_printk("I'm a comment!\n")
++
++will produce:
++
++ 1)               |             __might_sleep() {
++ 1)               |                /* I'm a comment! */
++ 1)   1.449 us    |             }
++
++
++You might find other useful features for this tracer in the
++following "dynamic ftrace" section such as tracing only specific
++functions or tasks.
++
+ dynamic ftrace
+ --------------
+ 
+ If CONFIG_DYNAMIC_FTRACE is set, the system will run with
+ virtually no overhead when function tracing is disabled. The way
+ this works is the mcount function call (placed at the start of
+-every kernel function, produced by the -pg switch in gcc), starts
+-of pointing to a simple return. (Enabling FTRACE will include the
+--pg switch in the compiling of the kernel.)
++every kernel function, produced by the -pg switch in gcc),
++starts of pointing to a simple return. (Enabling FTRACE will
++include the -pg switch in the compiling of the kernel.)
+ 
+ At compile time every C file object is run through the
+ recordmcount.pl script (located in the scripts directory). This
+ script will process the C object using objdump to find all the
+-locations in the .text section that call mcount. (Note, only
+-the .text section is processed, since processing other sections
+-like .init.text may cause races due to those sections being freed).
+-
+-A new section called "__mcount_loc" is created that holds references
+-to all the mcount call sites in the .text section. This section is
+-compiled back into the original object. The final linker will add
+-all these references into a single table.
++locations in the .text section that call mcount. (Note, only the
++.text section is processed, since processing other sections like
++.init.text may cause races due to those sections being freed).
++
++A new section called "__mcount_loc" is created that holds
++references to all the mcount call sites in the .text section.
++This section is compiled back into the original object. The
++final linker will add all these references into a single table.
+ 
+ On boot up, before SMP is initialized, the dynamic ftrace code
+-scans this table and updates all the locations into nops. It also
+-records the locations, which are added to the available_filter_functions
+-list.  Modules are processed as they are loaded and before they are
+-executed.  When a module is unloaded, it also removes its functions from
+-the ftrace function list. This is automatic in the module unload
+-code, and the module author does not need to worry about it.
+-
+-When tracing is enabled, kstop_machine is called to prevent races
+-with the CPUS executing code being modified (which can cause the
+-CPU to do undesireable things), and the nops are patched back
+-to calls. But this time, they do not call mcount (which is just
+-a function stub). They now call into the ftrace infrastructure.
++scans this table and updates all the locations into nops. It
++also records the locations, which are added to the
++available_filter_functions list.  Modules are processed as they
++are loaded and before they are executed.  When a module is
++unloaded, it also removes its functions from the ftrace function
++list. This is automatic in the module unload code, and the
++module author does not need to worry about it.
++
++When tracing is enabled, kstop_machine is called to prevent
++races with the CPUS executing code being modified (which can
++cause the CPU to do undesireable things), and the nops are
++patched back to calls. But this time, they do not call mcount
++(which is just a function stub). They now call into the ftrace
++infrastructure.
+ 
+ One special side-effect to the recording of the functions being
+ traced is that we can now selectively choose which functions we
+-wish to trace and which ones we want the mcount calls to remain as
+-nops.
++wish to trace and which ones we want the mcount calls to remain
++as nops.
+ 
+-Two files are used, one for enabling and one for disabling the tracing
+-of specified functions. They are:
++Two files are used, one for enabling and one for disabling the
++tracing of specified functions. They are:
+ 
+   set_ftrace_filter
+ 
+@@ -1202,8 +1535,8 @@ and
+ 
+   set_ftrace_notrace
+ 
+-A list of available functions that you can add to these files is listed
+-in:
++A list of available functions that you can add to these files is
++listed in:
+ 
+    available_filter_functions
+ 
+@@ -1240,8 +1573,8 @@ hrtimer_interrupt
+ sys_nanosleep
+ 
+ 
+-Perhaps this is not enough. The filters also allow simple wild cards.
+-Only the following are currently available
++Perhaps this is not enough. The filters also allow simple wild
++cards. Only the following are currently available
+ 
+   <match>*  - will match functions that begin with <match>
+   *<match>  - will match functions that end with <match>
+@@ -1251,9 +1584,9 @@ These are the only wild cards which are 
+ 
+   <match>*<match> will not work.
+ 
+-Note: It is better to use quotes to enclose the wild cards, otherwise
+-  the shell may expand the parameters into names of files in the local
+-  directory.
++Note: It is better to use quotes to enclose the wild cards,
++      otherwise the shell may expand the parameters into names
++      of files in the local directory.
+ 
+  # echo 'hrtimer_*' > /debug/tracing/set_ftrace_filter
+ 
+@@ -1299,7 +1632,8 @@ This is because the '>' and '>>' act jus
+ To rewrite the filters, use '>'
+ To append to the filters, use '>>'
+ 
+-To clear out a filter so that all functions will be recorded again:
++To clear out a filter so that all functions will be recorded
++again:
+ 
+  # echo > /debug/tracing/set_ftrace_filter
+  # cat /debug/tracing/set_ftrace_filter
+@@ -1331,7 +1665,8 @@ hrtimer_get_res
+ hrtimer_init_sleeper
+ 
+ 
+-The set_ftrace_notrace prevents those functions from being traced.
++The set_ftrace_notrace prevents those functions from being
++traced.
+ 
+  # echo '*preempt*' '*lock*' > /debug/tracing/set_ftrace_notrace
+ 
+@@ -1353,13 +1688,75 @@ Produces:
+ 
+ We can see that there's no more lock or preempt tracing.
+ 
++
++Dynamic ftrace with the function graph tracer
++---------------------------------------------
++
++Although what has been explained above concerns both the
++function tracer and the function-graph-tracer, there are some
++special features only available in the function-graph tracer.
++
++If you want to trace only one function and all of its children,
++you just have to echo its name into set_graph_function:
++
++ echo __do_fault > set_graph_function
++
++will produce the following "expanded" trace of the __do_fault()
++function:
++
++ 0)               |  __do_fault() {
++ 0)               |    filemap_fault() {
++ 0)               |      find_lock_page() {
++ 0)   0.804 us    |        find_get_page();
++ 0)               |        __might_sleep() {
++ 0)   1.329 us    |        }
++ 0)   3.904 us    |      }
++ 0)   4.979 us    |    }
++ 0)   0.653 us    |    _spin_lock();
++ 0)   0.578 us    |    page_add_file_rmap();
++ 0)   0.525 us    |    native_set_pte_at();
++ 0)   0.585 us    |    _spin_unlock();
++ 0)               |    unlock_page() {
++ 0)   0.541 us    |      page_waitqueue();
++ 0)   0.639 us    |      __wake_up_bit();
++ 0)   2.786 us    |    }
++ 0) + 14.237 us   |  }
++ 0)               |  __do_fault() {
++ 0)               |    filemap_fault() {
++ 0)               |      find_lock_page() {
++ 0)   0.698 us    |        find_get_page();
++ 0)               |        __might_sleep() {
++ 0)   1.412 us    |        }
++ 0)   3.950 us    |      }
++ 0)   5.098 us    |    }
++ 0)   0.631 us    |    _spin_lock();
++ 0)   0.571 us    |    page_add_file_rmap();
++ 0)   0.526 us    |    native_set_pte_at();
++ 0)   0.586 us    |    _spin_unlock();
++ 0)               |    unlock_page() {
++ 0)   0.533 us    |      page_waitqueue();
++ 0)   0.638 us    |      __wake_up_bit();
++ 0)   2.793 us    |    }
++ 0) + 14.012 us   |  }
++
++You can also expand several functions at once:
++
++ echo sys_open > set_graph_function
++ echo sys_close >> set_graph_function
++
++Now if you want to go back to trace all functions you can clear
++this special filter via:
++
++ echo > set_graph_function
++
++
+ trace_pipe
+ ----------
+ 
+-The trace_pipe outputs the same content as the trace file, but the effect
+-on the tracing is different. Every read from trace_pipe is consumed.
+-This means that subsequent reads will be different. The trace
+-is live.
++The trace_pipe outputs the same content as the trace file, but
++the effect on the tracing is different. Every read from
++trace_pipe is consumed. This means that subsequent reads will be
++different. The trace is live.
+ 
+  # echo function > /debug/tracing/current_tracer
+  # cat /debug/tracing/trace_pipe > /tmp/trace.out &
+@@ -1387,38 +1784,45 @@ is live.
+             bash-4043  [00] 41.267111: select_task_rq_rt <-try_to_wake_up
+ 
+ 
+-Note, reading the trace_pipe file will block until more input is added.
+-By changing the tracer, trace_pipe will issue an EOF. We needed
+-to set the function tracer _before_ we "cat" the trace_pipe file.
++Note, reading the trace_pipe file will block until more input is
++added. By changing the tracer, trace_pipe will issue an EOF. We
++needed to set the function tracer _before_ we "cat" the
++trace_pipe file.
+ 
+ 
+ trace entries
+ -------------
+ 
+-Having too much or not enough data can be troublesome in diagnosing
+-an issue in the kernel. The file buffer_size_kb is used to modify
+-the size of the internal trace buffers. The number listed
+-is the number of entries that can be recorded per CPU. To know
+-the full size, multiply the number of possible CPUS with the
+-number of entries.
++Having too much or not enough data can be troublesome in
++diagnosing an issue in the kernel. The file buffer_size_kb is
++used to modify the size of the internal trace buffers. The
++number listed is the number of entries that can be recorded per
++CPU. To know the full size, multiply the number of possible CPUS
++with the number of entries.
+ 
+  # cat /debug/tracing/buffer_size_kb
+ 1408 (units kilobytes)
+ 
+-Note, to modify this, you must have tracing completely disabled. To do that,
+-echo "nop" into the current_tracer. If the current_tracer is not set
+-to "nop", an EINVAL error will be returned.
++Note, to modify this, you must have tracing completely disabled.
++To do that, echo "nop" into the current_tracer. If the
++current_tracer is not set to "nop", an EINVAL error will be
++returned.
+ 
+  # echo nop > /debug/tracing/current_tracer
+  # echo 10000 > /debug/tracing/buffer_size_kb
+  # cat /debug/tracing/buffer_size_kb
+ 10000 (units kilobytes)
+ 
+-The number of pages which will be allocated is limited to a percentage
+-of available memory. Allocating too much will produce an error.
++The number of pages which will be allocated is limited to a
++percentage of available memory. Allocating too much will produce
++an error.
+ 
+  # echo 1000000000000 > /debug/tracing/buffer_size_kb
+ -bash: echo: write error: Cannot allocate memory
+  # cat /debug/tracing/buffer_size_kb
+ 85
+ 
++-----------
++
++More details can be found in the source code, in the
++kernel/tracing/*.c files.
+Index: linux-2.6-tip/Documentation/kernel-parameters.txt
+===================================================================
+--- linux-2.6-tip.orig/Documentation/kernel-parameters.txt
++++ linux-2.6-tip/Documentation/kernel-parameters.txt
+@@ -49,6 +49,7 @@ parameter is applicable:
+ 	ISAPNP	ISA PnP code is enabled.
+ 	ISDN	Appropriate ISDN support is enabled.
+ 	JOY	Appropriate joystick support is enabled.
++	KMEMTRACE kmemtrace is enabled.
+ 	LIBATA  Libata driver is enabled
+ 	LP	Printer support is enabled.
+ 	LOOP	Loopback device support is enabled.
+@@ -491,11 +492,23 @@ and is between 256 and 4096 characters. 
+ 			Range: 0 - 8192
+ 			Default: 64
+ 
++	dma_debug=off	If the kernel is compiled with DMA_API_DEBUG support
++			this option disables the debugging code at boot.
++
++	dma_debug_entries=<number>
++			This option allows to tune the number of preallocated
++			entries for DMA-API debugging code. One entry is
++			required per DMA-API allocation. Use this if the
++			DMA-API debugging code disables itself because the
++			architectural default is too low.
++
+ 	hpet=		[X86-32,HPET] option to control HPET usage
+-			Format: { enable (default) | disable | force }
++			Format: { enable (default) | disable | force |
++				verbose }
+ 			disable: disable HPET and use PIT instead
+ 			force: allow force enabled of undocumented chips (ICH4,
+ 			VIA, nVidia)
++			verbose: show contents of HPET registers during setup
+ 
+ 	com20020=	[HW,NET] ARCnet - COM20020 chipset
+ 			Format:
+@@ -604,6 +617,9 @@ and is between 256 and 4096 characters. 
+ 
+ 	debug_objects	[KNL] Enable object debugging
+ 
++	no_debug_objects
++			[KNL] Disable object debugging
++
+ 	debugpat	[X86] Enable PAT debugging
+ 
+ 	decnet.addr=	[HW,NET]
+@@ -1047,6 +1063,15 @@ and is between 256 and 4096 characters. 
+ 			use the HighMem zone if it exists, and the Normal
+ 			zone if it does not.
+ 
++	kmemtrace.enable=	[KNL,KMEMTRACE] Format: { yes | no }
++				Controls whether kmemtrace is enabled
++				at boot-time.
++
++	kmemtrace.subbufs=n	[KNL,KMEMTRACE] Overrides the number of
++			subbufs kmemtrace's relay channel has. Set this
++			higher than default (KMEMTRACE_N_SUBBUFS in code) if
++			you experience buffer overruns.
++
+ 	movablecore=nn[KMG]	[KNL,X86-32,IA-64,PPC,X86-64] This parameter
+ 			is similar to kernelcore except it specifies the
+ 			amount of memory used for migratable allocations.
+@@ -1310,8 +1335,13 @@ and is between 256 and 4096 characters. 
+ 
+ 	memtest=	[KNL,X86] Enable memtest
+ 			Format: <integer>
+-			range: 0,4 : pattern number
+ 			default : 0 <disable>
++			Specifies the number of memtest passes to be
++			performed. Each pass selects another test
++			pattern from a given set of patterns. Memtest
++			fills the memory with this pattern, validates
++			memory contents and reserves bad memory
++			regions that are detected.
+ 
+ 	meye.*=		[HW] Set MotionEye Camera parameters
+ 			See Documentation/video4linux/meye.txt.
+@@ -2329,6 +2359,8 @@ and is between 256 and 4096 characters. 
+ 
+ 	tp720=		[HW,PS2]
+ 
++	trace_buf_size=nn[KMG] [ftrace] will set tracing buffer size.
++
+ 	trix=		[HW,OSS] MediaTrix AudioTrix Pro
+ 			Format:
+ 			<io>,<irq>,<dma>,<dma2>,<sb_io>,<sb_irq>,<sb_dma>,<mpu_io>,<mpu_irq>
+Index: linux-2.6-tip/Documentation/kmemcheck.txt
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/Documentation/kmemcheck.txt
+@@ -0,0 +1,129 @@
++Contents
++========
++
++  1. How to use
++  2. Technical description
++  3. Changes to the slab allocators
++  4. Problems
++  5. Parameters
++  6. Future enhancements
++
++
++How to use (IMPORTANT)
++======================
++
++Always remember this: kmemcheck _will_ give false positives. So don't enable
++it and spam the mailing list with its reports; you are not going to be heard,
++and it will make people's skins thicker for when the real errors are found.
++
++Instead, I encourage maintainers and developers to find errors in _their_
++_own_ code. And if you find false positives, you can try to work around them,
++try to figure out if it's a real bug or not, or simply ignore them. Most
++developers know their own code and will quickly and efficiently determine the
++root cause of a kmemcheck report. This is therefore also the most efficient
++way to work with kmemcheck.
++
++If you still want to run kmemcheck to inspect others' code, the rule of thumb
++should be: If it's not obvious (to you), don't tell us about it either. Most
++likely the code is correct and you'll only waste our time. If you can work
++out the error, please do send the maintainer a heads up and/or a patch, but
++don't expect him/her to fix something that wasn't wrong in the first place.
++
++
++Technical description
++=====================
++
++kmemcheck works by marking memory pages non-present. This means that whenever
++somebody attempts to access the page, a page fault is generated. The page
++fault handler notices that the page was in fact only hidden, and so it calls
++on the kmemcheck code to make further investigations.
++
++When the investigations are completed, kmemcheck "shows" the page by marking
++it present (as it would be under normal circumstances). This way, the
++interrupted code can continue as usual.
++
++But after the instruction has been executed, we should hide the page again, so
++that we can catch the next access too! Now kmemcheck makes use of a debugging
++feature of the processor, namely single-stepping. When the processor has
++finished the one instruction that generated the memory access, a debug
++exception is raised. From here, we simply hide the page again and continue
++execution, this time with the single-stepping feature turned off.
++
++
++Changes to the slab allocators
++==============================
++
++kmemcheck requires some assistance from the memory allocator in order to work.
++The memory allocator needs to
++
++1. Tell kmemcheck about newly allocated pages and pages that are about to
++   be freed. This allows kmemcheck to set up and tear down the shadow memory
++   for the pages in question. The shadow memory stores the status of each byte
++   in the allocation proper, e.g. whether it is initialized or uninitialized.
++2. Tell kmemcheck which parts of memory should be marked uninitialized. There
++   are actually a few more states, such as "not yet allocated" and "recently
++   freed".
++
++If a slab cache is set up using the SLAB_NOTRACK flag, it will never return
++memory that can take page faults because of kmemcheck.
++
++If a slab cache is NOT set up using the SLAB_NOTRACK flag, callers can still
++request memory with the __GFP_NOTRACK flag. This does not prevent the page
++faults from occurring, however, but marks the object in question as being
++initialized so that no warnings will ever be produced for this object.
++
++Currently, the SLAB and SLUB allocators are supported by kmemcheck.
++
++
++Problems
++========
++
++The most prominent problem seems to be that of bit-fields. kmemcheck can only
++track memory with byte granularity. Therefore, when gcc generates code to
++access only one bit in a bit-field, there is really no way for kmemcheck to
++know which of the other bits will be used or thrown away. Consequently, there
++may be bogus warnings for bit-field accesses. We have added a "bitfields" API
++to get around this problem. See include/linux/kmemcheck.h for detailed
++instructions!
++
++
++Parameters
++==========
++
++In addition to enabling CONFIG_KMEMCHECK before the kernel is compiled, the
++parameter kmemcheck=1 must be passed to the kernel when it is started in order
++to actually do the tracking. So by default, there is only a very small
++(probably negligible) overhead for enabling the config option.
++
++Similarly, kmemcheck may be turned on or off at run-time using, respectively:
++
++echo 1 > /proc/sys/kernel/kmemcheck
++	and
++echo 0 > /proc/sys/kernel/kmemcheck
++
++Note that this is a lazy setting; once turned off, the old allocations will
++still have to take a single page fault exception before tracking is turned off
++for that particular page. Enabling kmemcheck on will only enable tracking for
++allocations made from that point onwards.
++
++The default mode is the one-shot mode, where only the first error is reported
++before kmemcheck is disabled. This mode can be enabled by passing kmemcheck=2
++to the kernel at boot, or running
++
++echo 2 > /proc/sys/kernel/kmemcheck
++
++when the kernel is already running.
++
++
++Future enhancements
++===================
++
++There is already some preliminary support for catching use-after-free errors.
++What still needs to be done is delaying kfree() so that memory is not
++reallocated immediately after freeing it. [Suggested by Pekka Enberg.]
++
++It should be possible to allow SMP systems by duplicating the page tables for
++each processor in the system. This is probably extremely difficult, however.
++[Suggested by Ingo Molnar.]
++
++Support for instruction set extensions like XMM, SSE2, etc.
+Index: linux-2.6-tip/Documentation/lockdep-design.txt
+===================================================================
+--- linux-2.6-tip.orig/Documentation/lockdep-design.txt
++++ linux-2.6-tip/Documentation/lockdep-design.txt
+@@ -27,33 +27,37 @@ lock-class.
+ State
+ -----
+ 
+-The validator tracks lock-class usage history into 5 separate state bits:
++The validator tracks lock-class usage history into 4n + 1 separate state bits:
+ 
+-- 'ever held in hardirq context'                    [ == hardirq-safe   ]
+-- 'ever held in softirq context'                    [ == softirq-safe   ]
+-- 'ever held with hardirqs enabled'                 [ == hardirq-unsafe ]
+-- 'ever held with softirqs and hardirqs enabled'    [ == softirq-unsafe ]
++- 'ever held in STATE context'
++- 'ever head as readlock in STATE context'
++- 'ever head with STATE enabled'
++- 'ever head as readlock with STATE enabled'
++
++Where STATE can be either one of (kernel/lockdep_states.h)
++ - hardirq
++ - softirq
++ - reclaim_fs
+ 
+ - 'ever used'                                       [ == !unused        ]
+ 
+-When locking rules are violated, these 4 state bits are presented in the
+-locking error messages, inside curlies.  A contrived example:
++When locking rules are violated, these state bits are presented in the
++locking error messages, inside curlies. A contrived example:
+ 
+    modprobe/2287 is trying to acquire lock:
+-    (&sio_locks[i].lock){--..}, at: [<c02867fd>] mutex_lock+0x21/0x24
++    (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
+ 
+    but task is already holding lock:
+-    (&sio_locks[i].lock){--..}, at: [<c02867fd>] mutex_lock+0x21/0x24
++    (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
+ 
+ 
+-The bit position indicates hardirq, softirq, hardirq-read,
+-softirq-read respectively, and the character displayed in each
+-indicates:
++The bit position indicates STATE, STATE-read, for each of the states listed
++above, and the character displayed in each indicates:
+ 
+    '.'  acquired while irqs disabled
+    '+'  acquired in irq context
+    '-'  acquired with irqs enabled
+-   '?' read acquired in irq context with irqs enabled.
++   '?'  acquired in irq context with irqs enabled.
+ 
+ Unused mutexes cannot be part of the cause of an error.
+ 
+Index: linux-2.6-tip/Documentation/perf_counter/Makefile
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/Documentation/perf_counter/Makefile
+@@ -0,0 +1,12 @@
++BINS = kerneltop perfstat
++
++all: $(BINS)
++
++kerneltop: kerneltop.c ../../include/linux/perf_counter.h
++	cc -O6 -Wall -lrt -o $@ $<
++
++perfstat: kerneltop
++	ln -sf kerneltop perfstat
++
++clean:
++	rm $(BINS)
+Index: linux-2.6-tip/Documentation/perf_counter/design.txt
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/Documentation/perf_counter/design.txt
+@@ -0,0 +1,283 @@
++
++Performance Counters for Linux
++------------------------------
++
++Performance counters are special hardware registers available on most modern
++CPUs. These registers count the number of certain types of hw events: such
++as instructions executed, cachemisses suffered, or branches mis-predicted -
++without slowing down the kernel or applications. These registers can also
++trigger interrupts when a threshold number of events have passed - and can
++thus be used to profile the code that runs on that CPU.
++
++The Linux Performance Counter subsystem provides an abstraction of these
++hardware capabilities. It provides per task and per CPU counters, counter
++groups, and it provides event capabilities on top of those.  It
++provides "virtual" 64-bit counters, regardless of the width of the
++underlying hardware counters.
++
++Performance counters are accessed via special file descriptors.
++There's one file descriptor per virtual counter used.
++
++The special file descriptor is opened via the perf_counter_open()
++system call:
++
++   int sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr,
++			     pid_t pid, int cpu, int group_fd,
++			     unsigned long flags);
++
++The syscall returns the new fd. The fd can be used via the normal
++VFS system calls: read() can be used to read the counter, fcntl()
++can be used to set the blocking mode, etc.
++
++Multiple counters can be kept open at a time, and the counters
++can be poll()ed.
++
++When creating a new counter fd, 'perf_counter_hw_event' is:
++
++/*
++ * Event to monitor via a performance monitoring counter:
++ */
++struct perf_counter_hw_event {
++	__u64			event_config;
++
++	__u64			irq_period;
++	__u64			record_type;
++	__u64			read_format;
++
++	__u64			disabled       :  1, /* off by default        */
++				nmi	       :  1, /* NMI sampling          */
++				inherit	       :  1, /* children inherit it   */
++				pinned	       :  1, /* must always be on PMU */
++				exclusive      :  1, /* only group on PMU     */
++				exclude_user   :  1, /* don't count user      */
++				exclude_kernel :  1, /* ditto kernel          */
++				exclude_hv     :  1, /* ditto hypervisor      */
++				exclude_idle   :  1, /* don't count when idle */
++
++				__reserved_1   : 55;
++
++	__u32			extra_config_len;
++
++	__u32			__reserved_4;
++	__u64			__reserved_2;
++	__u64			__reserved_3;
++};
++
++The 'event_config' field specifies what the counter should count.  It
++is divided into 3 bit-fields:
++
++raw_type: 1 bit (most significant bit)		0x8000_0000_0000_0000
++type:	  7 bits (next most significant)	0x7f00_0000_0000_0000
++event_id: 56 bits (least significant)		0x00ff_0000_0000_0000
++
++If 'raw_type' is 1, then the counter will count a hardware event
++specified by the remaining 63 bits of event_config.  The encoding is
++machine-specific.
++
++If 'raw_type' is 0, then the 'type' field says what kind of counter
++this is, with the following encoding:
++
++enum perf_event_types {
++	PERF_TYPE_HARDWARE		= 0,
++	PERF_TYPE_SOFTWARE		= 1,
++	PERF_TYPE_TRACEPOINT		= 2,
++};
++
++A counter of PERF_TYPE_HARDWARE will count the hardware event
++specified by 'event_id':
++
++/*
++ * Generalized performance counter event types, used by the hw_event.event_id
++ * parameter of the sys_perf_counter_open() syscall:
++ */
++enum hw_event_ids {
++	/*
++	 * Common hardware events, generalized by the kernel:
++	 */
++	PERF_COUNT_CPU_CYCLES		= 0,
++	PERF_COUNT_INSTRUCTIONS		= 1,
++	PERF_COUNT_CACHE_REFERENCES	= 2,
++	PERF_COUNT_CACHE_MISSES		= 3,
++	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
++	PERF_COUNT_BRANCH_MISSES	= 5,
++	PERF_COUNT_BUS_CYCLES		= 6,
++};
++
++These are standardized types of events that work relatively uniformly
++on all CPUs that implement Performance Counters support under Linux,
++although there may be variations (e.g., different CPUs might count
++cache references and misses at different levels of the cache hierarchy).
++If a CPU is not able to count the selected event, then the system call
++will return -EINVAL.
++
++More hw_event_types are supported as well, but they are CPU-specific
++and accessed as raw events.  For example, to count "External bus
++cycles while bus lock signal asserted" events on Intel Core CPUs, pass
++in a 0x4064 event_id value and set hw_event.raw_type to 1.
++
++A counter of type PERF_TYPE_SOFTWARE will count one of the available
++software events, selected by 'event_id':
++
++/*
++ * Special "software" counters provided by the kernel, even if the hardware
++ * does not support performance counters. These counters measure various
++ * physical and sw events of the kernel (and allow the profiling of them as
++ * well):
++ */
++enum sw_event_ids {
++	PERF_COUNT_CPU_CLOCK		= 0,
++	PERF_COUNT_TASK_CLOCK		= 1,
++	PERF_COUNT_PAGE_FAULTS		= 2,
++	PERF_COUNT_CONTEXT_SWITCHES	= 3,
++	PERF_COUNT_CPU_MIGRATIONS	= 4,
++	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
++	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
++};
++
++Counters come in two flavours: counting counters and sampling
++counters.  A "counting" counter is one that is used for counting the
++number of events that occur, and is characterised by having
++irq_period = 0 and record_type = PERF_RECORD_SIMPLE.  A read() on a
++counting counter simply returns the current value of the counter as
++an 8-byte number.
++
++A "sampling" counter is one that is set up to generate an interrupt
++every N events, where N is given by 'irq_period'.  A sampling counter
++has irq_period > 0 and record_type != PERF_RECORD_SIMPLE.  The
++record_type controls what data is recorded on each interrupt, and the
++available values are currently:
++
++/*
++ * IRQ-notification data record type:
++ */
++enum perf_counter_record_type {
++	PERF_RECORD_SIMPLE		= 0,
++	PERF_RECORD_IRQ			= 1,
++	PERF_RECORD_GROUP		= 2,
++};
++
++A record_type value of PERF_RECORD_IRQ will record the instruction
++pointer (IP) at which the interrupt occurred.  A record_type value of
++PERF_RECORD_GROUP will record the event_config and counter value of
++all of the other counters in the group, and should only be used on a
++group leader (see below).  Currently these two values are mutually
++exclusive, but record_type will become a bit-mask in future and
++support other values.
++
++A sampling counter has an event queue, into which an event is placed
++on each interrupt.  A read() on a sampling counter will read the next
++event from the event queue.  If the queue is empty, the read() will
++either block or return an EAGAIN error, depending on whether the fd
++has been set to non-blocking mode or not.
++
++The 'disabled' bit specifies whether the counter starts out disabled
++or enabled.  If it is initially disabled, it can be enabled by ioctl
++or prctl (see below).
++
++The 'nmi' bit specifies, for hardware events, whether the counter
++should be set up to request non-maskable interrupts (NMIs) or normal
++interrupts.  This bit is ignored if the user doesn't have
++CAP_SYS_ADMIN privilege (i.e. is not root) or if the CPU doesn't
++generate NMIs from hardware counters.
++
++The 'inherit' bit, if set, specifies that this counter should count
++events on descendant tasks as well as the task specified.  This only
++applies to new descendents, not to any existing descendents at the
++time the counter is created (nor to any new descendents of existing
++descendents).
++
++The 'pinned' bit, if set, specifies that the counter should always be
++on the CPU if at all possible.  It only applies to hardware counters
++and only to group leaders.  If a pinned counter cannot be put onto the
++CPU (e.g. because there are not enough hardware counters or because of
++a conflict with some other event), then the counter goes into an
++'error' state, where reads return end-of-file (i.e. read() returns 0)
++until the counter is subsequently enabled or disabled.
++
++The 'exclusive' bit, if set, specifies that when this counter's group
++is on the CPU, it should be the only group using the CPU's counters.
++In future, this will allow sophisticated monitoring programs to supply
++extra configuration information via 'extra_config_len' to exploit
++advanced features of the CPU's Performance Monitor Unit (PMU) that are
++not otherwise accessible and that might disrupt other hardware
++counters.
++
++The 'exclude_user', 'exclude_kernel' and 'exclude_hv' bits provide a
++way to request that counting of events be restricted to times when the
++CPU is in user, kernel and/or hypervisor mode.
++
++
++The 'pid' parameter to the perf_counter_open() system call allows the
++counter to be specific to a task:
++
++ pid == 0: if the pid parameter is zero, the counter is attached to the
++ current task.
++
++ pid > 0: the counter is attached to a specific task (if the current task
++ has sufficient privilege to do so)
++
++ pid < 0: all tasks are counted (per cpu counters)
++
++The 'cpu' parameter allows a counter to be made specific to a CPU:
++
++ cpu >= 0: the counter is restricted to a specific CPU
++ cpu == -1: the counter counts on all CPUs
++
++(Note: the combination of 'pid == -1' and 'cpu == -1' is not valid.)
++
++A 'pid > 0' and 'cpu == -1' counter is a per task counter that counts
++events of that task and 'follows' that task to whatever CPU the task
++gets schedule to. Per task counters can be created by any user, for
++their own tasks.
++
++A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
++all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
++
++The 'flags' parameter is currently unused and must be zero.
++
++The 'group_fd' parameter allows counter "groups" to be set up.  A
++counter group has one counter which is the group "leader".  The leader
++is created first, with group_fd = -1 in the perf_counter_open call
++that creates it.  The rest of the group members are created
++subsequently, with group_fd giving the fd of the group leader.
++(A single counter on its own is created with group_fd = -1 and is
++considered to be a group with only 1 member.)
++
++A counter group is scheduled onto the CPU as a unit, that is, it will
++only be put onto the CPU if all of the counters in the group can be
++put onto the CPU.  This means that the values of the member counters
++can be meaningfully compared, added, divided (to get ratios), etc.,
++with each other, since they have counted events for the same set of
++executed instructions.
++
++Counters can be enabled and disabled in two ways: via ioctl and via
++prctl.  When a counter is disabled, it doesn't count or generate
++events but does continue to exist and maintain its count value.
++
++An individual counter or counter group can be enabled with
++
++	ioctl(fd, PERF_COUNTER_IOC_ENABLE);
++
++or disabled with
++
++	ioctl(fd, PERF_COUNTER_IOC_DISABLE);
++
++Enabling or disabling the leader of a group enables or disables the
++whole group; that is, while the group leader is disabled, none of the
++counters in the group will count.  Enabling or disabling a member of a
++group other than the leader only affects that counter - disabling an
++non-leader stops that counter from counting but doesn't affect any
++other counter.
++
++A process can enable or disable all the counter groups that are
++attached to it, using prctl:
++
++	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
++
++	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
++
++This applies to all counters on the current process, whether created
++by this process or by another, and doesn't affect any counters that
++this process has created on other processes.  It only enables or
++disables the group leaders, not any other members in the groups.
++
+Index: linux-2.6-tip/Documentation/perf_counter/kerneltop.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/Documentation/perf_counter/kerneltop.c
+@@ -0,0 +1,1328 @@
++/*
++ * kerneltop.c: show top kernel functions - performance counters showcase
++
++   Build with:
++
++     cc -O6 -Wall -c -o kerneltop.o kerneltop.c -lrt
++
++   Sample output:
++
++------------------------------------------------------------------------------
++ KernelTop:    2669 irqs/sec  [NMI, cache-misses/cache-refs],  (all, cpu: 2)
++------------------------------------------------------------------------------
++
++             weight         RIP          kernel function
++             ______   ________________   _______________
++
++              35.20 - ffffffff804ce74b : skb_copy_and_csum_dev
++              33.00 - ffffffff804cb740 : sock_alloc_send_skb
++              31.26 - ffffffff804ce808 : skb_push
++              22.43 - ffffffff80510004 : tcp_established_options
++              19.00 - ffffffff8027d250 : find_get_page
++              15.76 - ffffffff804e4fc9 : eth_type_trans
++              15.20 - ffffffff804d8baa : dst_release
++              14.86 - ffffffff804cf5d8 : skb_release_head_state
++              14.00 - ffffffff802217d5 : read_hpet
++              12.00 - ffffffff804ffb7f : __ip_local_out
++              11.97 - ffffffff804fc0c8 : ip_local_deliver_finish
++               8.54 - ffffffff805001a3 : ip_queue_xmit
++ */
++
++/*
++ * perfstat:  /usr/bin/time -alike performance counter statistics utility
++
++          It summarizes the counter events of all tasks (and child tasks),
++          covering all CPUs that the command (or workload) executes on.
++          It only counts the per-task events of the workload started,
++          independent of how many other tasks run on those CPUs.
++
++   Sample output:
++
++   $ ./perfstat -e 1 -e 3 -e 5 ls -lR /usr/include/ >/dev/null
++
++   Performance counter stats for 'ls':
++
++           163516953 instructions
++                2295 cache-misses
++             2855182 branch-misses
++ */
++
++ /*
++  * Copyright (C) 2008, Red Hat Inc, Ingo Molnar <mingo@redhat.com>
++  *
++  * Improvements and fixes by:
++  *
++  *   Arjan van de Ven <arjan@linux.intel.com>
++  *   Yanmin Zhang <yanmin.zhang@intel.com>
++  *   Wu Fengguang <fengguang.wu@intel.com>
++  *   Mike Galbraith <efault@gmx.de>
++  *   Paul Mackerras <paulus@samba.org>
++  *
++  * Released under the GPL v2. (and only v2, not any later version)
++  */
++
++#define _GNU_SOURCE
++#include <sys/types.h>
++#include <sys/stat.h>
++#include <sys/time.h>
++#include <unistd.h>
++#include <stdint.h>
++#include <stdlib.h>
++#include <string.h>
++#include <limits.h>
++#include <getopt.h>
++#include <assert.h>
++#include <fcntl.h>
++#include <stdio.h>
++#include <errno.h>
++#include <ctype.h>
++#include <time.h>
++
++#include <sys/syscall.h>
++#include <sys/ioctl.h>
++#include <sys/poll.h>
++#include <sys/prctl.h>
++#include <sys/wait.h>
++#include <sys/uio.h>
++#include <sys/mman.h>
++
++#include <linux/unistd.h>
++#include <linux/types.h>
++
++#include "../../include/linux/perf_counter.h"
++
++
++/*
++ * prctl(PR_TASK_PERF_COUNTERS_DISABLE) will (cheaply) disable all
++ * counters in the current task.
++ */
++#define PR_TASK_PERF_COUNTERS_DISABLE   31
++#define PR_TASK_PERF_COUNTERS_ENABLE    32
++
++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
++
++#define rdclock()                                       \
++({                                                      \
++        struct timespec ts;                             \
++                                                        \
++        clock_gettime(CLOCK_MONOTONIC, &ts);            \
++        ts.tv_sec * 1000000000ULL + ts.tv_nsec;         \
++})
++
++/*
++ * Pick up some kernel type conventions:
++ */
++#define __user
++#define asmlinkage
++
++#ifdef __x86_64__
++#define __NR_perf_counter_open 298
++#define rmb()		asm volatile("lfence" ::: "memory")
++#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
++#endif
++
++#ifdef __i386__
++#define __NR_perf_counter_open 336
++#define rmb()		asm volatile("lfence" ::: "memory")
++#define cpu_relax()	asm volatile("rep; nop" ::: "memory");
++#endif
++
++#ifdef __powerpc__
++#define __NR_perf_counter_open 319
++#define rmb() 		asm volatile ("sync" ::: "memory")
++#define cpu_relax()	asm volatile ("" ::: "memory");
++#endif
++
++#define unlikely(x)	__builtin_expect(!!(x), 0)
++#define min(x, y) ({				\
++	typeof(x) _min1 = (x);			\
++	typeof(y) _min2 = (y);			\
++	(void) (&_min1 == &_min2);		\
++	_min1 < _min2 ? _min1 : _min2; })
++
++asmlinkage int sys_perf_counter_open(
++        struct perf_counter_hw_event    *hw_event_uptr          __user,
++        pid_t                           pid,
++        int                             cpu,
++        int                             group_fd,
++        unsigned long                   flags)
++{
++        return syscall(
++                __NR_perf_counter_open, hw_event_uptr, pid, cpu, group_fd, flags);
++}
++
++#define MAX_COUNTERS			64
++#define MAX_NR_CPUS			256
++
++#define EID(type, id) (((__u64)(type) << PERF_COUNTER_TYPE_SHIFT) | (id))
++
++static int			run_perfstat			=  0;
++static int			system_wide			=  0;
++
++static int			nr_counters			=  0;
++static __u64			event_id[MAX_COUNTERS]		= {
++	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),
++	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),
++	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),
++	EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),
++
++	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),
++	EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),
++	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),
++	EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),
++};
++static int			default_interval = 100000;
++static int			event_count[MAX_COUNTERS];
++static int			fd[MAX_NR_CPUS][MAX_COUNTERS];
++
++static __u64			count_filter		       = 100;
++
++static int			tid				= -1;
++static int			profile_cpu			= -1;
++static int			nr_cpus				=  0;
++static int			nmi				=  1;
++static int			group				=  0;
++static unsigned int		page_size;
++static unsigned int		mmap_pages			=  16;
++
++static char			*vmlinux;
++
++static char			*sym_filter;
++static unsigned long		filter_start;
++static unsigned long		filter_end;
++
++static int			delay_secs			=  2;
++static int			zero;
++static int			dump_symtab;
++
++struct source_line {
++	uint64_t		EIP;
++	unsigned long		count;
++	char			*line;
++	struct source_line	*next;
++};
++
++static struct source_line	*lines;
++static struct source_line	**lines_tail;
++
++const unsigned int default_count[] = {
++	1000000,
++	1000000,
++	  10000,
++	  10000,
++	1000000,
++	  10000,
++};
++
++static char *hw_event_names[] = {
++	"CPU cycles",
++	"instructions",
++	"cache references",
++	"cache misses",
++	"branches",
++	"branch misses",
++	"bus cycles",
++};
++
++static char *sw_event_names[] = {
++	"cpu clock ticks",
++	"task clock ticks",
++	"pagefaults",
++	"context switches",
++	"CPU migrations",
++	"minor faults",
++	"major faults",
++};
++
++struct event_symbol {
++	__u64 event;
++	char *symbol;
++};
++
++static struct event_symbol event_symbols[] = {
++	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cpu-cycles",		},
++	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CPU_CYCLES),		"cycles",		},
++	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_INSTRUCTIONS),		"instructions",		},
++	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_REFERENCES),		"cache-references",	},
++	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_CACHE_MISSES),		"cache-misses",		},
++	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branch-instructions",	},
++	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_INSTRUCTIONS),	"branches",		},
++	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BRANCH_MISSES),		"branch-misses",	},
++	{EID(PERF_TYPE_HARDWARE, PERF_COUNT_BUS_CYCLES),		"bus-cycles",		},
++
++	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK),			"cpu-clock",		},
++	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK),		"task-clock",		},
++	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"page-faults",		},
++	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS),		"faults",		},
++	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MIN),		"minor-faults",		},
++	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_PAGE_FAULTS_MAJ),		"major-faults",		},
++	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"context-switches",	},
++	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CONTEXT_SWITCHES),		"cs",			},
++	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"cpu-migrations",	},
++	{EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_MIGRATIONS),		"migrations",		},
++};
++
++#define __PERF_COUNTER_FIELD(config, name) \
++	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
++
++#define PERF_COUNTER_RAW(config)	__PERF_COUNTER_FIELD(config, RAW)
++#define PERF_COUNTER_CONFIG(config)	__PERF_COUNTER_FIELD(config, CONFIG)
++#define PERF_COUNTER_TYPE(config)	__PERF_COUNTER_FIELD(config, TYPE)
++#define PERF_COUNTER_ID(config)		__PERF_COUNTER_FIELD(config, EVENT)
++
++static void display_events_help(void)
++{
++	unsigned int i;
++	__u64 e;
++
++	printf(
++	" -e EVENT     --event=EVENT   #  symbolic-name        abbreviations");
++
++	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
++		int type, id;
++
++		e = event_symbols[i].event;
++		type = PERF_COUNTER_TYPE(e);
++		id = PERF_COUNTER_ID(e);
++
++		printf("\n                             %d:%d: %-20s",
++				type, id, event_symbols[i].symbol);
++	}
++
++	printf("\n"
++	"                           rNNN: raw PMU events (eventsel+umask)\n\n");
++}
++
++static void display_perfstat_help(void)
++{
++	printf(
++	"Usage: perfstat [<events...>] <cmd...>\n\n"
++	"PerfStat Options (up to %d event types can be specified):\n\n",
++		 MAX_COUNTERS);
++
++	display_events_help();
++
++	printf(
++	" -a                           # system-wide collection\n");
++	exit(0);
++}
++
++static void display_help(void)
++{
++	if (run_perfstat)
++		return display_perfstat_help();
++
++	printf(
++	"Usage: kerneltop [<options>]\n"
++	"   Or: kerneltop -S [<options>] COMMAND [ARGS]\n\n"
++	"KernelTop Options (up to %d event types can be specified at once):\n\n",
++		 MAX_COUNTERS);
++
++	display_events_help();
++
++	printf(
++	" -S        --stat             # perfstat COMMAND\n"
++	" -a                           # system-wide collection (for perfstat)\n\n"
++	" -c CNT    --count=CNT        # event period to sample\n\n"
++	" -C CPU    --cpu=CPU          # CPU (-1 for all)                 [default: -1]\n"
++	" -p PID    --pid=PID          # PID of sampled task (-1 for all) [default: -1]\n\n"
++	" -d delay  --delay=<seconds>  # sampling/display delay           [default:  2]\n"
++	" -f CNT    --filter=CNT       # min-event-count filter          [default: 100]\n\n"
++	" -s symbol --symbol=<symbol>  # function to be showed annotated one-shot\n"
++	" -x path   --vmlinux=<path>   # the vmlinux binary, required for -s use\n"
++	" -z        --zero             # zero counts after display\n"
++	" -D        --dump_symtab      # dump symbol table to stderr on startup\n"
++	" -m pages  --mmap_pages=<pages> # number of mmap data pages\n"
++	);
++
++	exit(0);
++}
++
++static char *event_name(int ctr)
++{
++	__u64 config = event_id[ctr];
++	int type = PERF_COUNTER_TYPE(config);
++	int id = PERF_COUNTER_ID(config);
++	static char buf[32];
++
++	if (PERF_COUNTER_RAW(config)) {
++		sprintf(buf, "raw 0x%llx", PERF_COUNTER_CONFIG(config));
++		return buf;
++	}
++
++	switch (type) {
++	case PERF_TYPE_HARDWARE:
++		if (id < PERF_HW_EVENTS_MAX)
++			return hw_event_names[id];
++		return "unknown-hardware";
++
++	case PERF_TYPE_SOFTWARE:
++		if (id < PERF_SW_EVENTS_MAX)
++			return sw_event_names[id];
++		return "unknown-software";
++
++	default:
++		break;
++	}
++
++	return "unknown";
++}
++
++/*
++ * Each event can have multiple symbolic names.
++ * Symbolic names are (almost) exactly matched.
++ */
++static __u64 match_event_symbols(char *str)
++{
++	__u64 config, id;
++	int type;
++	unsigned int i;
++
++	if (sscanf(str, "r%llx", &config) == 1)
++		return config | PERF_COUNTER_RAW_MASK;
++
++	if (sscanf(str, "%d:%llu", &type, &id) == 2)
++		return EID(type, id);
++
++	for (i = 0; i < ARRAY_SIZE(event_symbols); i++) {
++		if (!strncmp(str, event_symbols[i].symbol,
++			     strlen(event_symbols[i].symbol)))
++			return event_symbols[i].event;
++	}
++
++	return ~0ULL;
++}
++
++static int parse_events(char *str)
++{
++	__u64 config;
++
++again:
++	if (nr_counters == MAX_COUNTERS)
++		return -1;
++
++	config = match_event_symbols(str);
++	if (config == ~0ULL)
++		return -1;
++
++	event_id[nr_counters] = config;
++	nr_counters++;
++
++	str = strstr(str, ",");
++	if (str) {
++		str++;
++		goto again;
++	}
++
++	return 0;
++}
++
++
++/*
++ * perfstat
++ */
++
++char fault_here[1000000];
++
++static void create_perfstat_counter(int counter)
++{
++	struct perf_counter_hw_event hw_event;
++
++	memset(&hw_event, 0, sizeof(hw_event));
++	hw_event.config		= event_id[counter];
++	hw_event.record_type	= PERF_RECORD_SIMPLE;
++	hw_event.nmi		= 0;
++
++	if (system_wide) {
++		int cpu;
++		for (cpu = 0; cpu < nr_cpus; cpu ++) {
++			fd[cpu][counter] = sys_perf_counter_open(&hw_event, -1, cpu, -1, 0);
++			if (fd[cpu][counter] < 0) {
++				printf("perfstat error: syscall returned with %d (%s)\n",
++						fd[cpu][counter], strerror(errno));
++				exit(-1);
++			}
++		}
++	} else {
++		hw_event.inherit	= 1;
++		hw_event.disabled	= 1;
++
++		fd[0][counter] = sys_perf_counter_open(&hw_event, 0, -1, -1, 0);
++		if (fd[0][counter] < 0) {
++			printf("perfstat error: syscall returned with %d (%s)\n",
++					fd[0][counter], strerror(errno));
++			exit(-1);
++		}
++	}
++}
++
++int do_perfstat(int argc, char *argv[])
++{
++	unsigned long long t0, t1;
++	int counter;
++	ssize_t res;
++	int status;
++	int pid;
++
++	if (!system_wide)
++		nr_cpus = 1;
++
++	for (counter = 0; counter < nr_counters; counter++)
++		create_perfstat_counter(counter);
++
++	argc -= optind;
++	argv += optind;
++
++	if (!argc)
++		display_help();
++
++	/*
++	 * Enable counters and exec the command:
++	 */
++	t0 = rdclock();
++	prctl(PR_TASK_PERF_COUNTERS_ENABLE);
++
++	if ((pid = fork()) < 0)
++		perror("failed to fork");
++	if (!pid) {
++		if (execvp(argv[0], argv)) {
++			perror(argv[0]);
++			exit(-1);
++		}
++	}
++	while (wait(&status) >= 0)
++		;
++	prctl(PR_TASK_PERF_COUNTERS_DISABLE);
++	t1 = rdclock();
++
++	fflush(stdout);
++
++	fprintf(stderr, "\n");
++	fprintf(stderr, " Performance counter stats for \'%s\':\n",
++		argv[0]);
++	fprintf(stderr, "\n");
++
++	for (counter = 0; counter < nr_counters; counter++) {
++		int cpu;
++		__u64 count, single_count;
++
++		count = 0;
++		for (cpu = 0; cpu < nr_cpus; cpu ++) {
++			res = read(fd[cpu][counter],
++					(char *) &single_count, sizeof(single_count));
++			assert(res == sizeof(single_count));
++			count += single_count;
++		}
++
++		if (event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_CPU_CLOCK) ||
++		    event_id[counter] == EID(PERF_TYPE_SOFTWARE, PERF_COUNT_TASK_CLOCK)) {
++
++			double msecs = (double)count / 1000000;
++
++			fprintf(stderr, " %14.6f  %-20s (msecs)\n",
++				msecs, event_name(counter));
++		} else {
++			fprintf(stderr, " %14Ld  %-20s (events)\n",
++				count, event_name(counter));
++		}
++	}
++	fprintf(stderr, "\n");
++	fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n",
++			(double)(t1-t0)/1e6);
++	fprintf(stderr, "\n");
++
++	return 0;
++}
++
++/*
++ * Symbols
++ */
++
++static uint64_t			min_ip;
++static uint64_t			max_ip = -1ll;
++
++struct sym_entry {
++	unsigned long long	addr;
++	char			*sym;
++	unsigned long		count[MAX_COUNTERS];
++	int			skip;
++	struct source_line	*source;
++};
++
++#define MAX_SYMS		100000
++
++static int sym_table_count;
++
++struct sym_entry		*sym_filter_entry;
++
++static struct sym_entry		sym_table[MAX_SYMS];
++
++static void show_details(struct sym_entry *sym);
++
++/*
++ * Ordering weight: count-1 * count-2 * ... / count-n
++ */
++static double sym_weight(const struct sym_entry *sym)
++{
++	double weight;
++	int counter;
++
++	weight = sym->count[0];
++
++	for (counter = 1; counter < nr_counters-1; counter++)
++		weight *= sym->count[counter];
++
++	weight /= (sym->count[counter] + 1);
++
++	return weight;
++}
++
++static int compare(const void *__sym1, const void *__sym2)
++{
++	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
++
++	return sym_weight(sym1) < sym_weight(sym2);
++}
++
++static time_t			last_refresh;
++static long			events;
++static long			userspace_events;
++static const char		CONSOLE_CLEAR[] = "[H[2J";
++
++static struct sym_entry		tmp[MAX_SYMS];
++
++static void print_sym_table(void)
++{
++	int i, printed;
++	int counter;
++	float events_per_sec = events/delay_secs;
++	float kevents_per_sec = (events-userspace_events)/delay_secs;
++
++	memcpy(tmp, sym_table, sizeof(sym_table[0])*sym_table_count);
++	qsort(tmp, sym_table_count, sizeof(tmp[0]), compare);
++
++	write(1, CONSOLE_CLEAR, strlen(CONSOLE_CLEAR));
++
++	printf(
++"------------------------------------------------------------------------------\n");
++	printf( " KernelTop:%8.0f irqs/sec  kernel:%3.1f%% [%s, ",
++		events_per_sec,
++		100.0 - (100.0*((events_per_sec-kevents_per_sec)/events_per_sec)),
++		nmi ? "NMI" : "IRQ");
++
++	if (nr_counters == 1)
++		printf("%d ", event_count[0]);
++
++	for (counter = 0; counter < nr_counters; counter++) {
++		if (counter)
++			printf("/");
++
++		printf("%s", event_name(counter));
++	}
++
++	printf( "], ");
++
++	if (tid != -1)
++		printf(" (tid: %d", tid);
++	else
++		printf(" (all");
++
++	if (profile_cpu != -1)
++		printf(", cpu: %d)\n", profile_cpu);
++	else {
++		if (tid != -1)
++			printf(")\n");
++		else
++			printf(", %d CPUs)\n", nr_cpus);
++	}
++
++	printf("------------------------------------------------------------------------------\n\n");
++
++	if (nr_counters == 1)
++		printf("             events");
++	else
++		printf("  weight     events");
++
++	printf("         RIP          kernel function\n"
++	       	       "  ______     ______   ________________   _______________\n\n"
++	);
++
++	printed = 0;
++	for (i = 0; i < sym_table_count; i++) {
++		int count;
++
++		if (nr_counters == 1) {
++			if (printed <= 18 &&
++					tmp[i].count[0] >= count_filter) {
++				printf("%19.2f - %016llx : %s\n",
++				  sym_weight(tmp + i), tmp[i].addr, tmp[i].sym);
++				printed++;
++			}
++		} else {
++			if (printed <= 18 &&
++					tmp[i].count[0] >= count_filter) {
++				printf("%8.1f %10ld - %016llx : %s\n",
++				  sym_weight(tmp + i),
++				  tmp[i].count[0],
++				  tmp[i].addr, tmp[i].sym);
++				printed++;
++			}
++		}
++		/*
++		 * Add decay to the counts:
++		 */
++		for (count = 0; count < nr_counters; count++)
++			sym_table[i].count[count] = zero ? 0 : sym_table[i].count[count] * 7 / 8;
++	}
++
++	if (sym_filter_entry)
++		show_details(sym_filter_entry);
++
++	last_refresh = time(NULL);
++
++	{
++		struct pollfd stdin_poll = { .fd = 0, .events = POLLIN };
++
++		if (poll(&stdin_poll, 1, 0) == 1) {
++			printf("key pressed - exiting.\n");
++			exit(0);
++		}
++	}
++}
++
++static int read_symbol(FILE *in, struct sym_entry *s)
++{
++	static int filter_match = 0;
++	char *sym, stype;
++	char str[500];
++	int rc, pos;
++
++	rc = fscanf(in, "%llx %c %499s", &s->addr, &stype, str);
++	if (rc == EOF)
++		return -1;
++
++	assert(rc == 3);
++
++	/* skip until end of line: */
++	pos = strlen(str);
++	do {
++		rc = fgetc(in);
++		if (rc == '\n' || rc == EOF || pos >= 499)
++			break;
++		str[pos] = rc;
++		pos++;
++	} while (1);
++	str[pos] = 0;
++
++	sym = str;
++
++	/* Filter out known duplicates and non-text symbols. */
++	if (!strcmp(sym, "_text"))
++		return 1;
++	if (!min_ip && !strcmp(sym, "_stext"))
++		return 1;
++	if (!strcmp(sym, "_etext") || !strcmp(sym, "_sinittext"))
++		return 1;
++	if (stype != 'T' && stype != 't')
++		return 1;
++	if (!strncmp("init_module", sym, 11) || !strncmp("cleanup_module", sym, 14))
++		return 1;
++	if (strstr(sym, "_text_start") || strstr(sym, "_text_end"))
++		return 1;
++
++	s->sym = malloc(strlen(str));
++	assert(s->sym);
++
++	strcpy((char *)s->sym, str);
++	s->skip = 0;
++
++	/* Tag events to be skipped. */
++	if (!strcmp("default_idle", s->sym) || !strcmp("cpu_idle", s->sym))
++		s->skip = 1;
++	else if (!strcmp("enter_idle", s->sym) || !strcmp("exit_idle", s->sym))
++		s->skip = 1;
++	else if (!strcmp("mwait_idle", s->sym))
++		s->skip = 1;
++
++	if (filter_match == 1) {
++		filter_end = s->addr;
++		filter_match = -1;
++		if (filter_end - filter_start > 10000) {
++			printf("hm, too large filter symbol <%s> - skipping.\n",
++				sym_filter);
++			printf("symbol filter start: %016lx\n", filter_start);
++			printf("                end: %016lx\n", filter_end);
++			filter_end = filter_start = 0;
++			sym_filter = NULL;
++			sleep(1);
++		}
++	}
++	if (filter_match == 0 && sym_filter && !strcmp(s->sym, sym_filter)) {
++		filter_match = 1;
++		filter_start = s->addr;
++	}
++
++	return 0;
++}
++
++int compare_addr(const void *__sym1, const void *__sym2)
++{
++	const struct sym_entry *sym1 = __sym1, *sym2 = __sym2;
++
++	return sym1->addr > sym2->addr;
++}
++
++static void sort_symbol_table(void)
++{
++	int i, dups;
++
++	do {
++		qsort(sym_table, sym_table_count, sizeof(sym_table[0]), compare_addr);
++		for (i = 0, dups = 0; i < sym_table_count; i++) {
++			if (sym_table[i].addr == sym_table[i+1].addr) {
++				sym_table[i+1].addr = -1ll;
++				dups++;
++			}
++		}
++		sym_table_count -= dups;
++	} while(dups);
++}
++
++static void parse_symbols(void)
++{
++	struct sym_entry *last;
++
++	FILE *kallsyms = fopen("/proc/kallsyms", "r");
++
++	if (!kallsyms) {
++		printf("Could not open /proc/kallsyms - no CONFIG_KALLSYMS_ALL=y?\n");
++		exit(-1);
++	}
++
++	while (!feof(kallsyms)) {
++		if (read_symbol(kallsyms, &sym_table[sym_table_count]) == 0) {
++			sym_table_count++;
++			assert(sym_table_count <= MAX_SYMS);
++		}
++	}
++
++	sort_symbol_table();
++	min_ip = sym_table[0].addr;
++	max_ip = sym_table[sym_table_count-1].addr;
++	last = sym_table + sym_table_count++;
++
++	last->addr = -1ll;
++	last->sym = "<end>";
++
++	if (filter_end) {
++		int count;
++		for (count=0; count < sym_table_count; count ++) {
++			if (!strcmp(sym_table[count].sym, sym_filter)) {
++				sym_filter_entry = &sym_table[count];
++				break;
++			}
++		}
++	}
++	if (dump_symtab) {
++		int i;
++
++		for (i = 0; i < sym_table_count; i++)
++			fprintf(stderr, "%llx %s\n",
++				sym_table[i].addr, sym_table[i].sym);
++	}
++}
++
++/*
++ * Source lines
++ */
++
++static void parse_vmlinux(char *filename)
++{
++	FILE *file;
++	char command[PATH_MAX*2];
++	if (!filename)
++		return;
++
++	sprintf(command, "objdump --start-address=0x%016lx --stop-address=0x%016lx -dS %s", filter_start, filter_end, filename);
++
++	file = popen(command, "r");
++	if (!file)
++		return;
++
++	lines_tail = &lines;
++	while (!feof(file)) {
++		struct source_line *src;
++		size_t dummy = 0;
++		char *c;
++
++		src = malloc(sizeof(struct source_line));
++		assert(src != NULL);
++		memset(src, 0, sizeof(struct source_line));
++
++		if (getline(&src->line, &dummy, file) < 0)
++			break;
++		if (!src->line)
++			break;
++
++		c = strchr(src->line, '\n');
++		if (c)
++			*c = 0;
++
++		src->next = NULL;
++		*lines_tail = src;
++		lines_tail = &src->next;
++
++		if (strlen(src->line)>8 && src->line[8] == ':')
++			src->EIP = strtoull(src->line, NULL, 16);
++		if (strlen(src->line)>8 && src->line[16] == ':')
++			src->EIP = strtoull(src->line, NULL, 16);
++	}
++	pclose(file);
++}
++
++static void record_precise_ip(uint64_t ip)
++{
++	struct source_line *line;
++
++	for (line = lines; line; line = line->next) {
++		if (line->EIP == ip)
++			line->count++;
++		if (line->EIP > ip)
++			break;
++	}
++}
++
++static void lookup_sym_in_vmlinux(struct sym_entry *sym)
++{
++	struct source_line *line;
++	char pattern[PATH_MAX];
++	sprintf(pattern, "<%s>:", sym->sym);
++
++	for (line = lines; line; line = line->next) {
++		if (strstr(line->line, pattern)) {
++			sym->source = line;
++			break;
++		}
++	}
++}
++
++static void show_lines(struct source_line *line_queue, int line_queue_count)
++{
++	int i;
++	struct source_line *line;
++
++	line = line_queue;
++	for (i = 0; i < line_queue_count; i++) {
++		printf("%8li\t%s\n", line->count, line->line);
++		line = line->next;
++	}
++}
++
++#define TRACE_COUNT     3
++
++static void show_details(struct sym_entry *sym)
++{
++	struct source_line *line;
++	struct source_line *line_queue = NULL;
++	int displayed = 0;
++	int line_queue_count = 0;
++
++	if (!sym->source)
++		lookup_sym_in_vmlinux(sym);
++	if (!sym->source)
++		return;
++
++	printf("Showing details for %s\n", sym->sym);
++
++	line = sym->source;
++	while (line) {
++		if (displayed && strstr(line->line, ">:"))
++			break;
++
++		if (!line_queue_count)
++			line_queue = line;
++		line_queue_count ++;
++
++		if (line->count >= count_filter) {
++			show_lines(line_queue, line_queue_count);
++			line_queue_count = 0;
++			line_queue = NULL;
++		} else if (line_queue_count > TRACE_COUNT) {
++			line_queue = line_queue->next;
++			line_queue_count --;
++		}
++
++		line->count = 0;
++		displayed++;
++		if (displayed > 300)
++			break;
++		line = line->next;
++	}
++}
++
++/*
++ * Binary search in the histogram table and record the hit:
++ */
++static void record_ip(uint64_t ip, int counter)
++{
++	int left_idx, middle_idx, right_idx, idx;
++	unsigned long left, middle, right;
++
++	record_precise_ip(ip);
++
++	left_idx = 0;
++	right_idx = sym_table_count-1;
++	assert(ip <= max_ip && ip >= min_ip);
++
++	while (left_idx + 1 < right_idx) {
++		middle_idx = (left_idx + right_idx) / 2;
++
++		left   = sym_table[  left_idx].addr;
++		middle = sym_table[middle_idx].addr;
++		right  = sym_table[ right_idx].addr;
++
++		if (!(left <= middle && middle <= right)) {
++			printf("%016lx...\n%016lx...\n%016lx\n", left, middle, right);
++			printf("%d %d %d\n", left_idx, middle_idx, right_idx);
++		}
++		assert(left <= middle && middle <= right);
++		if (!(left <= ip && ip <= right)) {
++			printf(" left: %016lx\n", left);
++			printf("   ip: %016lx\n", (unsigned long)ip);
++			printf("right: %016lx\n", right);
++		}
++		assert(left <= ip && ip <= right);
++		/*
++		 * [ left .... target .... middle .... right ]
++		 *   => right := middle
++		 */
++		if (ip < middle) {
++			right_idx = middle_idx;
++			continue;
++		}
++		/*
++		 * [ left .... middle ... target ... right ]
++		 *   => left := middle
++		 */
++		left_idx = middle_idx;
++	}
++
++	idx = left_idx;
++
++	if (!sym_table[idx].skip)
++		sym_table[idx].count[counter]++;
++	else events--;
++}
++
++static void process_event(uint64_t ip, int counter)
++{
++	events++;
++
++	if (ip < min_ip || ip > max_ip) {
++		userspace_events++;
++		return;
++	}
++
++	record_ip(ip, counter);
++}
++
++static void process_options(int argc, char *argv[])
++{
++	int error = 0, counter;
++
++	if (strstr(argv[0], "perfstat"))
++		run_perfstat = 1;
++
++	for (;;) {
++		int option_index = 0;
++		/** Options for getopt */
++		static struct option long_options[] = {
++			{"count",	required_argument,	NULL, 'c'},
++			{"cpu",		required_argument,	NULL, 'C'},
++			{"delay",	required_argument,	NULL, 'd'},
++			{"dump_symtab",	no_argument,		NULL, 'D'},
++			{"event",	required_argument,	NULL, 'e'},
++			{"filter",	required_argument,	NULL, 'f'},
++			{"group",	required_argument,	NULL, 'g'},
++			{"help",	no_argument,		NULL, 'h'},
++			{"nmi",		required_argument,	NULL, 'n'},
++			{"pid",		required_argument,	NULL, 'p'},
++			{"vmlinux",	required_argument,	NULL, 'x'},
++			{"symbol",	required_argument,	NULL, 's'},
++			{"stat",	no_argument,		NULL, 'S'},
++			{"zero",	no_argument,		NULL, 'z'},
++			{"mmap_pages",	required_argument,	NULL, 'm'},
++			{NULL,		0,			NULL,  0 }
++		};
++		int c = getopt_long(argc, argv, "+:ac:C:d:De:f:g:hn:m:p:s:Sx:z",
++				    long_options, &option_index);
++		if (c == -1)
++			break;
++
++		switch (c) {
++		case 'a': system_wide			=	       1; break;
++		case 'c': default_interval		=   atoi(optarg); break;
++		case 'C':
++			/* CPU and PID are mutually exclusive */
++			if (tid != -1) {
++				printf("WARNING: CPU switch overriding PID\n");
++				sleep(1);
++				tid = -1;
++			}
++			profile_cpu			=   atoi(optarg); break;
++		case 'd': delay_secs			=   atoi(optarg); break;
++		case 'D': dump_symtab			=              1; break;
++
++		case 'e': error				= parse_events(optarg); break;
++
++		case 'f': count_filter			=   atoi(optarg); break;
++		case 'g': group				=   atoi(optarg); break;
++		case 'h':      				  display_help(); break;
++		case 'n': nmi				=   atoi(optarg); break;
++		case 'p':
++			/* CPU and PID are mutually exclusive */
++			if (profile_cpu != -1) {
++				printf("WARNING: PID switch overriding CPU\n");
++				sleep(1);
++				profile_cpu = -1;
++			}
++			tid				=   atoi(optarg); break;
++		case 's': sym_filter			= strdup(optarg); break;
++		case 'S': run_perfstat			=	       1; break;
++		case 'x': vmlinux			= strdup(optarg); break;
++		case 'z': zero				=              1; break;
++		case 'm': mmap_pages			=   atoi(optarg); break;
++		default: error = 1; break;
++		}
++	}
++	if (error)
++		display_help();
++
++	if (!nr_counters) {
++		if (run_perfstat)
++			nr_counters = 8;
++		else {
++			nr_counters = 1;
++			event_id[0] = 0;
++		}
++	}
++
++	for (counter = 0; counter < nr_counters; counter++) {
++		if (event_count[counter])
++			continue;
++
++		event_count[counter] = default_interval;
++	}
++}
++
++struct mmap_data {
++	int counter;
++	void *base;
++	unsigned int mask;
++	unsigned int prev;
++};
++
++static unsigned int mmap_read_head(struct mmap_data *md)
++{
++	struct perf_counter_mmap_page *pc = md->base;
++	unsigned int seq, head;
++
++repeat:
++	rmb();
++	seq = pc->lock;
++
++	if (unlikely(seq & 1)) {
++		cpu_relax();
++		goto repeat;
++	}
++
++	head = pc->data_head;
++
++	rmb();
++	if (pc->lock != seq)
++		goto repeat;
++
++	return head;
++}
++
++struct timeval last_read, this_read;
++
++static void mmap_read(struct mmap_data *md)
++{
++	unsigned int head = mmap_read_head(md);
++	unsigned int old = md->prev;
++	unsigned char *data = md->base + page_size;
++	int diff;
++
++	gettimeofday(&this_read, NULL);
++
++	/*
++	 * If we're further behind than half the buffer, there's a chance
++	 * the writer will bite our tail and screw up the events under us.
++	 *
++	 * If we somehow ended up ahead of the head, we got messed up.
++	 *
++	 * In either case, truncate and restart at head.
++	 */
++	diff = head - old;
++	if (diff > md->mask / 2 || diff < 0) {
++		struct timeval iv;
++		unsigned long msecs;
++
++		timersub(&this_read, &last_read, &iv);
++		msecs = iv.tv_sec*1000 + iv.tv_usec/1000;
++
++		fprintf(stderr, "WARNING: failed to keep up with mmap data."
++				"  Last read %lu msecs ago.\n", msecs);
++
++		/*
++		 * head points to a known good entry, start there.
++		 */
++		old = head;
++	}
++
++	last_read = this_read;
++
++	for (; old != head;) {
++		struct event_struct {
++			struct perf_event_header header;
++			__u64 ip;
++			__u32 pid, tid;
++		} *event = (struct event_struct *)&data[old & md->mask];
++		struct event_struct event_copy;
++
++		unsigned int size = event->header.size;
++
++		/*
++		 * Event straddles the mmap boundary -- header should always
++		 * be inside due to u64 alignment of output.
++		 */
++		if ((old & md->mask) + size != ((old + size) & md->mask)) {
++			unsigned int offset = old;
++			unsigned int len = sizeof(*event), cpy;
++			void *dst = &event_copy;
++
++			do {
++				cpy = min(md->mask + 1 - (offset & md->mask), len);
++				memcpy(dst, &data[offset & md->mask], cpy);
++				offset += cpy;
++				dst += cpy;
++				len -= cpy;
++			} while (len);
++
++			event = &event_copy;
++		}
++
++		old += size;
++
++		switch (event->header.type) {
++		case PERF_EVENT_IP:
++		case PERF_EVENT_IP | __PERF_EVENT_TID:
++			process_event(event->ip, md->counter);
++			break;
++		}
++	}
++
++	md->prev = old;
++}
++
++int main(int argc, char *argv[])
++{
++	struct pollfd event_array[MAX_NR_CPUS * MAX_COUNTERS];
++	struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS];
++	struct perf_counter_hw_event hw_event;
++	int i, counter, group_fd, nr_poll = 0;
++	unsigned int cpu;
++	int ret;
++
++	page_size = sysconf(_SC_PAGE_SIZE);
++
++	process_options(argc, argv);
++
++	nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
++	assert(nr_cpus <= MAX_NR_CPUS);
++	assert(nr_cpus >= 0);
++
++	if (run_perfstat)
++		return do_perfstat(argc, argv);
++
++	if (tid != -1 || profile_cpu != -1)
++		nr_cpus = 1;
++
++	parse_symbols();
++	if (vmlinux && sym_filter_entry)
++		parse_vmlinux(vmlinux);
++
++	for (i = 0; i < nr_cpus; i++) {
++		group_fd = -1;
++		for (counter = 0; counter < nr_counters; counter++) {
++
++			cpu	= profile_cpu;
++			if (tid == -1 && profile_cpu == -1)
++				cpu = i;
++
++			memset(&hw_event, 0, sizeof(hw_event));
++			hw_event.config		= event_id[counter];
++			hw_event.irq_period	= event_count[counter];
++			hw_event.record_type	= PERF_RECORD_IRQ;
++			hw_event.nmi		= nmi;
++			hw_event.include_tid	= 1;
++
++			fd[i][counter] = sys_perf_counter_open(&hw_event, tid, cpu, group_fd, 0);
++			if (fd[i][counter] < 0) {
++				int err = errno;
++				printf("kerneltop error: syscall returned with %d (%s)\n",
++					fd[i][counter], strerror(err));
++				if (err == EPERM)
++					printf("Are you root?\n");
++				exit(-1);
++			}
++			assert(fd[i][counter] >= 0);
++			fcntl(fd[i][counter], F_SETFL, O_NONBLOCK);
++
++			/*
++			 * First counter acts as the group leader:
++			 */
++			if (group && group_fd == -1)
++				group_fd = fd[i][counter];
++
++			event_array[nr_poll].fd = fd[i][counter];
++			event_array[nr_poll].events = POLLIN;
++			nr_poll++;
++
++			mmap_array[i][counter].counter = counter;
++			mmap_array[i][counter].prev = 0;
++			mmap_array[i][counter].mask = mmap_pages*page_size - 1;
++			mmap_array[i][counter].base = mmap(NULL, (mmap_pages+1)*page_size,
++					PROT_READ, MAP_SHARED, fd[i][counter], 0);
++			if (mmap_array[i][counter].base == MAP_FAILED) {
++				printf("kerneltop error: failed to mmap with %d (%s)\n",
++						errno, strerror(errno));
++				exit(-1);
++			}
++		}
++	}
++
++	printf("KernelTop refresh period: %d seconds\n", delay_secs);
++	last_refresh = time(NULL);
++
++	while (1) {
++		int hits = events;
++
++		for (i = 0; i < nr_cpus; i++) {
++			for (counter = 0; counter < nr_counters; counter++)
++				mmap_read(&mmap_array[i][counter]);
++		}
++
++		if (time(NULL) >= last_refresh + delay_secs) {
++			print_sym_table();
++			events = userspace_events = 0;
++		}
++
++		if (hits == events)
++			ret = poll(event_array, nr_poll, 1000);
++		hits = events;
++	}
++
++	return 0;
++}
+Index: linux-2.6-tip/Documentation/scheduler/00-INDEX
+===================================================================
+--- linux-2.6-tip.orig/Documentation/scheduler/00-INDEX
++++ linux-2.6-tip/Documentation/scheduler/00-INDEX
+@@ -2,8 +2,6 @@
+ 	- this file.
+ sched-arch.txt
+ 	- CPU Scheduler implementation hints for architecture specific code.
+-sched-coding.txt
+-	- reference for various scheduler-related methods in the O(1) scheduler.
+ sched-design-CFS.txt
+ 	- goals, design and implementation of the Complete Fair Scheduler.
+ sched-domains.txt
+Index: linux-2.6-tip/Documentation/scheduler/sched-coding.txt
+===================================================================
+--- linux-2.6-tip.orig/Documentation/scheduler/sched-coding.txt
++++ /dev/null
+@@ -1,126 +0,0 @@
+-     Reference for various scheduler-related methods in the O(1) scheduler
+-		Robert Love <rml@tech9.net>, MontaVista Software
+-
+-
+-Note most of these methods are local to kernel/sched.c - this is by design.
+-The scheduler is meant to be self-contained and abstracted away.  This document
+-is primarily for understanding the scheduler, not interfacing to it.  Some of
+-the discussed interfaces, however, are general process/scheduling methods.
+-They are typically defined in include/linux/sched.h.
+-
+-
+-Main Scheduling Methods
+------------------------
+-
+-void load_balance(runqueue_t *this_rq, int idle)
+-	Attempts to pull tasks from one cpu to another to balance cpu usage,
+-	if needed.  This method is called explicitly if the runqueues are
+-	imbalanced or periodically by the timer tick.  Prior to calling,
+-	the current runqueue must be locked and interrupts disabled.
+-
+-void schedule()
+-	The main scheduling function.  Upon return, the highest priority
+-	process will be active.
+-
+-
+-Locking
+--------
+-
+-Each runqueue has its own lock, rq->lock.  When multiple runqueues need
+-to be locked, lock acquires must be ordered by ascending &runqueue value.
+-
+-A specific runqueue is locked via
+-
+-	task_rq_lock(task_t pid, unsigned long *flags)
+-
+-which disables preemption, disables interrupts, and locks the runqueue pid is
+-running on.  Likewise,
+-
+-	task_rq_unlock(task_t pid, unsigned long *flags)
+-
+-unlocks the runqueue pid is running on, restores interrupts to their previous
+-state, and reenables preemption.
+-
+-The routines
+-
+-	double_rq_lock(runqueue_t *rq1, runqueue_t *rq2)
+-
+-and
+-
+-	double_rq_unlock(runqueue_t *rq1, runqueue_t *rq2)
+-
+-safely lock and unlock, respectively, the two specified runqueues.  They do
+-not, however, disable and restore interrupts.  Users are required to do so
+-manually before and after calls.
+-
+-
+-Values
+-------
+-
+-MAX_PRIO
+-	The maximum priority of the system, stored in the task as task->prio.
+-	Lower priorities are higher.  Normal (non-RT) priorities range from
+-	MAX_RT_PRIO to (MAX_PRIO - 1).
+-MAX_RT_PRIO
+-	The maximum real-time priority of the system.  Valid RT priorities
+-	range from 0 to (MAX_RT_PRIO - 1).
+-MAX_USER_RT_PRIO
+-	The maximum real-time priority that is exported to user-space.  Should
+-	always be equal to or less than MAX_RT_PRIO.  Setting it less allows
+-	kernel threads to have higher priorities than any user-space task.
+-MIN_TIMESLICE
+-MAX_TIMESLICE
+-	Respectively, the minimum and maximum timeslices (quanta) of a process.
+-
+-Data
+-----
+-
+-struct runqueue
+-	The main per-CPU runqueue data structure.
+-struct task_struct
+-	The main per-process data structure.
+-
+-
+-General Methods
+----------------
+-
+-cpu_rq(cpu)
+-	Returns the runqueue of the specified cpu.
+-this_rq()
+-	Returns the runqueue of the current cpu.
+-task_rq(pid)
+-	Returns the runqueue which holds the specified pid.
+-cpu_curr(cpu)
+-	Returns the task currently running on the given cpu.
+-rt_task(pid)
+-	Returns true if pid is real-time, false if not.
+-
+-
+-Process Control Methods
+------------------------
+-
+-void set_user_nice(task_t *p, long nice)
+-	Sets the "nice" value of task p to the given value.
+-int setscheduler(pid_t pid, int policy, struct sched_param *param)
+-	Sets the scheduling policy and parameters for the given pid.
+-int set_cpus_allowed(task_t *p, unsigned long new_mask)
+-	Sets a given task's CPU affinity and migrates it to a proper cpu.
+-	Callers must have a valid reference to the task and assure the
+-	task not exit prematurely.  No locks can be held during the call.
+-set_task_state(tsk, state_value)
+-	Sets the given task's state to the given value.
+-set_current_state(state_value)
+-	Sets the current task's state to the given value.
+-void set_tsk_need_resched(struct task_struct *tsk)
+-	Sets need_resched in the given task.
+-void clear_tsk_need_resched(struct task_struct *tsk)
+-	Clears need_resched in the given task.
+-void set_need_resched()
+-	Sets need_resched in the current task.
+-void clear_need_resched()
+-	Clears need_resched in the current task.
+-int need_resched()
+-	Returns true if need_resched is set in the current task, false
+-	otherwise.
+-yield()
+-	Place the current process at the end of the runqueue and call schedule.
+Index: linux-2.6-tip/Documentation/sysrq.txt
+===================================================================
+--- linux-2.6-tip.orig/Documentation/sysrq.txt
++++ linux-2.6-tip/Documentation/sysrq.txt
+@@ -113,6 +113,8 @@ On all -  write a character to /proc/sys
+ 
+ 'x'	- Used by xmon interface on ppc/powerpc platforms.
+ 
++'z'	- Dump the ftrace buffer
++
+ '0'-'9' - Sets the console log level, controlling which kernel messages
+           will be printed to your console. ('0', for example would make
+           it so that only emergency messages like PANICs or OOPSes would
+Index: linux-2.6-tip/Documentation/tracepoints.txt
+===================================================================
+--- linux-2.6-tip.orig/Documentation/tracepoints.txt
++++ linux-2.6-tip/Documentation/tracepoints.txt
+@@ -45,8 +45,8 @@ In include/trace/subsys.h :
+ #include <linux/tracepoint.h>
+ 
+ DECLARE_TRACE(subsys_eventname,
+-	TPPROTO(int firstarg, struct task_struct *p),
+-	TPARGS(firstarg, p));
++	TP_PROTO(int firstarg, struct task_struct *p),
++	TP_ARGS(firstarg, p));
+ 
+ In subsys/file.c (where the tracing statement must be added) :
+ 
+@@ -66,10 +66,10 @@ Where :
+     - subsys is the name of your subsystem.
+     - eventname is the name of the event to trace.
+ 
+-- TPPROTO(int firstarg, struct task_struct *p) is the prototype of the
++- TP_PROTO(int firstarg, struct task_struct *p) is the prototype of the
+   function called by this tracepoint.
+ 
+-- TPARGS(firstarg, p) are the parameters names, same as found in the
++- TP_ARGS(firstarg, p) are the parameters names, same as found in the
+   prototype.
+ 
+ Connecting a function (probe) to a tracepoint is done by providing a
+@@ -103,13 +103,14 @@ used to export the defined tracepoints.
+ 
+ * Probe / tracepoint example
+ 
+-See the example provided in samples/tracepoints/src
++See the example provided in samples/tracepoints
+ 
+-Compile them with your kernel.
++Compile them with your kernel.  They are built during 'make' (not
++'make modules') when CONFIG_SAMPLE_TRACEPOINTS=m.
+ 
+ Run, as root :
+-modprobe tracepoint-example (insmod order is not important)
+-modprobe tracepoint-probe-example
+-cat /proc/tracepoint-example (returns an expected error)
+-rmmod tracepoint-example tracepoint-probe-example
++modprobe tracepoint-sample (insmod order is not important)
++modprobe tracepoint-probe-sample
++cat /proc/tracepoint-sample (returns an expected error)
++rmmod tracepoint-sample tracepoint-probe-sample
+ dmesg
+Index: linux-2.6-tip/Documentation/vm/kmemtrace.txt
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/Documentation/vm/kmemtrace.txt
+@@ -0,0 +1,126 @@
++			kmemtrace - Kernel Memory Tracer
++
++			  by Eduard - Gabriel Munteanu
++			     <eduard.munteanu@linux360.ro>
++
++I. Introduction
++===============
++
++kmemtrace helps kernel developers figure out two things:
++1) how different allocators (SLAB, SLUB etc.) perform
++2) how kernel code allocates memory and how much
++
++To do this, we trace every allocation and export information to the userspace
++through the relay interface. We export things such as the number of requested
++bytes, the number of bytes actually allocated (i.e. including internal
++fragmentation), whether this is a slab allocation or a plain kmalloc() and so
++on.
++
++The actual analysis is performed by a userspace tool (see section III for
++details on where to get it from). It logs the data exported by the kernel,
++processes it and (as of writing this) can provide the following information:
++- the total amount of memory allocated and fragmentation per call-site
++- the amount of memory allocated and fragmentation per allocation
++- total memory allocated and fragmentation in the collected dataset
++- number of cross-CPU allocation and frees (makes sense in NUMA environments)
++
++Moreover, it can potentially find inconsistent and erroneous behavior in
++kernel code, such as using slab free functions on kmalloc'ed memory or
++allocating less memory than requested (but not truly failed allocations).
++
++kmemtrace also makes provisions for tracing on some arch and analysing the
++data on another.
++
++II. Design and goals
++====================
++
++kmemtrace was designed to handle rather large amounts of data. Thus, it uses
++the relay interface to export whatever is logged to userspace, which then
++stores it. Analysis and reporting is done asynchronously, that is, after the
++data is collected and stored. By design, it allows one to log and analyse
++on different machines and different arches.
++
++As of writing this, the ABI is not considered stable, though it might not
++change much. However, no guarantees are made about compatibility yet. When
++deemed stable, the ABI should still allow easy extension while maintaining
++backward compatibility. This is described further in Documentation/ABI.
++
++Summary of design goals:
++	- allow logging and analysis to be done across different machines
++	- be fast and anticipate usage in high-load environments (*)
++	- be reasonably extensible
++	- make it possible for GNU/Linux distributions to have kmemtrace
++	included in their repositories
++
++(*) - one of the reasons Pekka Enberg's original userspace data analysis
++    tool's code was rewritten from Perl to C (although this is more than a
++    simple conversion)
++
++
++III. Quick usage guide
++======================
++
++1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
++CONFIG_KMEMTRACE).
++
++2) Get the userspace tool and build it:
++$ git-clone git://repo.or.cz/kmemtrace-user.git		# current repository
++$ cd kmemtrace-user/
++$ ./autogen.sh
++$ ./configure
++$ make
++
++3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
++'single' runlevel (so that relay buffers don't fill up easily), and run
++kmemtrace:
++# '$' does not mean user, but root here.
++$ mount -t debugfs none /sys/kernel/debug
++$ mount -t proc none /proc
++$ cd path/to/kmemtrace-user/
++$ ./kmemtraced
++Wait a bit, then stop it with CTRL+C.
++$ cat /sys/kernel/debug/kmemtrace/total_overruns	# Check if we didn't
++							# overrun, should
++							# be zero.
++$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
++		check its correctness]
++$ ./kmemtrace-report
++
++Now you should have a nice and short summary of how the allocator performs.
++
++IV. FAQ and known issues
++========================
++
++Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
++this? Should I worry?
++A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
++large the number is. You can fix it by supplying a higher
++'kmemtrace.subbufs=N' kernel parameter.
++---
++
++Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
++A: This is a bug and should be reported. It can occur for a variety of
++reasons:
++	- possible bugs in relay code
++	- possible misuse of relay by kmemtrace
++	- timestamps being collected unorderly
++Or you may fix it yourself and send us a patch.
++---
++
++Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
++A: This is a known issue and I'm working on it. These might be true errors
++in kernel code, which may have inconsistent behavior (e.g. allocating memory
++with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
++out this behavior may work with SLAB, but may fail with other allocators.
++
++It may also be due to lack of tracing in some unusual allocator functions.
++
++We don't want bug reports regarding this issue yet.
++---
++
++V. See also
++===========
++
++Documentation/kernel-parameters.txt
++Documentation/ABI/testing/debugfs-kmemtrace
++
+Index: linux-2.6-tip/Documentation/x86/boot.txt
+===================================================================
+--- linux-2.6-tip.orig/Documentation/x86/boot.txt
++++ linux-2.6-tip/Documentation/x86/boot.txt
+@@ -158,7 +158,7 @@ Offset	Proto	Name		Meaning
+ 0202/4	2.00+	header		Magic signature "HdrS"
+ 0206/2	2.00+	version		Boot protocol version supported
+ 0208/4	2.00+	realmode_swtch	Boot loader hook (see below)
+-020C/2	2.00+	start_sys	The load-low segment (0x1000) (obsolete)
++020C/2	2.00+	start_sys_seg	The load-low segment (0x1000) (obsolete)
+ 020E/2	2.00+	kernel_version	Pointer to kernel version string
+ 0210/1	2.00+	type_of_loader	Boot loader identifier
+ 0211/1	2.00+	loadflags	Boot protocol option flags
+@@ -170,10 +170,11 @@ Offset	Proto	Name		Meaning
+ 0224/2	2.01+	heap_end_ptr	Free memory after setup end
+ 0226/2	N/A	pad1		Unused
+ 0228/4	2.02+	cmd_line_ptr	32-bit pointer to the kernel command line
+-022C/4	2.03+	initrd_addr_max	Highest legal initrd address
++022C/4	2.03+	ramdisk_max	Highest legal initrd address
+ 0230/4	2.05+	kernel_alignment Physical addr alignment required for kernel
+ 0234/1	2.05+	relocatable_kernel Whether kernel is relocatable or not
+-0235/3	N/A	pad2		Unused
++0235/1	N/A	pad2		Unused
++0236/2	N/A	pad3		Unused
+ 0238/4	2.06+	cmdline_size	Maximum size of the kernel command line
+ 023C/4	2.07+	hardware_subarch Hardware subarchitecture
+ 0240/8	2.07+	hardware_subarch_data Subarchitecture-specific data
+@@ -299,14 +300,14 @@ Protocol:	2.00+
+   e.g. 0x0204 for version 2.04, and 0x0a11 for a hypothetical version
+   10.17.
+ 
+-Field name:	readmode_swtch
++Field name:	realmode_swtch
+ Type:		modify (optional)
+ Offset/size:	0x208/4
+ Protocol:	2.00+
+ 
+   Boot loader hook (see ADVANCED BOOT LOADER HOOKS below.)
+ 
+-Field name:	start_sys
++Field name:	start_sys_seg
+ Type:		read
+ Offset/size:	0x20c/2
+ Protocol:	2.00+
+@@ -468,7 +469,7 @@ Protocol:	2.02+
+   zero, the kernel will assume that your boot loader does not support
+   the 2.02+ protocol.
+ 
+-Field name:	initrd_addr_max
++Field name:	ramdisk_max
+ Type:		read
+ Offset/size:	0x22c/4
+ Protocol:	2.03+
+@@ -542,7 +543,10 @@ Protocol:	2.08+
+ 
+   The payload may be compressed. The format of both the compressed and
+   uncompressed data should be determined using the standard magic
+-  numbers. Currently only gzip compressed ELF is used.
++  numbers.  The currently supported compression formats are gzip
++  (magic numbers 1F 8B or 1F 9E), bzip2 (magic number 42 5A) and LZMA
++  (magic number 5D 00).  The uncompressed payload is currently always ELF
++  (magic number 7F 45 4C 46).
+   
+ Field name:	payload_length
+ Type:		read
+Index: linux-2.6-tip/Documentation/x86/earlyprintk.txt
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/Documentation/x86/earlyprintk.txt
+@@ -0,0 +1,101 @@
++
++Mini-HOWTO for using the earlyprintk=dbgp boot option with a
++USB2 Debug port key and a debug cable, on x86 systems.
++
++You need two computers, the 'USB debug key' special gadget and
++and two USB cables, connected like this:
++
++  [host/target] <-------> [USB debug key] <-------> [client/console]
++
++1. There are three specific hardware requirements:
++
++ a.) Host/target system needs to have USB debug port capability.
++
++ You can check this capability by looking at a 'Debug port' bit in
++ the lspci -vvv output:
++
++ # lspci -vvv
++ ...
++ 00:1d.7 USB Controller: Intel Corporation 82801H (ICH8 Family) USB2 EHCI Controller #1 (rev 03) (prog-if 20 [EHCI])
++         Subsystem: Lenovo ThinkPad T61
++         Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx-
++         Status: Cap+ 66MHz- UDF- FastB2B+ ParErr- DEVSEL=medium >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
++         Latency: 0
++         Interrupt: pin D routed to IRQ 19
++         Region 0: Memory at fe227000 (32-bit, non-prefetchable) [size=1K]
++         Capabilities: [50] Power Management version 2
++                 Flags: PMEClk- DSI- D1- D2- AuxCurrent=375mA PME(D0+,D1-,D2-,D3hot+,D3cold+)
++                 Status: D0 PME-Enable- DSel=0 DScale=0 PME+
++         Capabilities: [58] Debug port: BAR=1 offset=00a0
++                            ^^^^^^^^^^^ <==================== [ HERE ]
++	 Kernel driver in use: ehci_hcd
++         Kernel modules: ehci-hcd
++ ...
++
++( If your system does not list a debug port capability then you probably
++  wont be able to use the USB debug key. )
++
++ b.) You also need a Netchip USB debug cable/key:
++
++        http://www.plxtech.com/products/NET2000/NET20DC/default.asp
++
++     This is a small blue plastic connector with two USB connections,
++     it draws power from its USB connections.
++
++ c.) Thirdly, you need a second client/console system with a regular USB port.
++
++2. Software requirements:
++
++ a.) On the host/target system:
++
++    You need to enable the following kernel config option:
++
++      CONFIG_EARLY_PRINTK_DBGP=y
++
++    And you need to add the boot command line: "earlyprintk=dbgp".
++    (If you are using Grub, append it to the 'kernel' line in
++     /etc/grub.conf)
++
++    NOTE: normally earlyprintk console gets turned off once the
++    regular console is alive - use "earlyprintk=dbgp,keep" to keep
++    this channel open beyond early bootup. This can be useful for
++    debugging crashes under Xorg, etc.
++
++ b.) On the client/console system:
++
++    You should enable the following kernel config option:
++
++      CONFIG_USB_SERIAL_DEBUG=y
++
++    On the next bootup with the modified kernel you should
++    get a /dev/ttyUSBx device(s).
++
++    Now this channel of kernel messages is ready to be used: start
++    your favorite terminal emulator (minicom, etc.) and set
++    it up to use /dev/ttyUSB0 - or use a raw 'cat /dev/ttyUSBx' to
++    see the raw output.
++
++ c.) On Nvidia Southbridge based systems: the kernel will try to probe
++     and find out which port has debug device connected.
++
++3. Testing that it works fine:
++
++   You can test the output by using earlyprintk=dbgp,keep and provoking
++   kernel messages on the host/target system. You can provoke a harmless
++   kernel message by for example doing:
++
++     echo h > /proc/sysrq-trigger
++
++   On the host/target system you should see this help line in "dmesg" output:
++
++     SysRq : HELP : loglevel(0-9) reBoot Crashdump terminate-all-tasks(E) memory-full-oom-kill(F) kill-all-tasks(I) saK show-backtrace-all-active-cpus(L) show-memory-usage(M) nice-all-RT-tasks(N) powerOff show-registers(P) show-all-timers(Q) unRaw Sync show-task-states(T) Unmount show-blocked-tasks(W) dump-ftrace-buffer(Z)
++
++   On the client/console system do:
++
++       cat /dev/ttyUSB0
++
++   And you should see the help line above displayed shortly after you've
++   provoked it on the host system.
++
++If it does not work then please ask about it on the linux-kernel@vger.kernel.org
++mailing list or contact the x86 maintainers.
+Index: linux-2.6-tip/MAINTAINERS
+===================================================================
+--- linux-2.6-tip.orig/MAINTAINERS
++++ linux-2.6-tip/MAINTAINERS
+@@ -1952,6 +1952,15 @@ L:	linux-media@vger.kernel.org
+ T:	git kernel.org:/pub/scm/linux/kernel/git/mchehab/linux-2.6.git
+ S:	Maintained
+ 
++HARDWARE LATENCY DETECTOR
++P:	Jon Masters
++M:	jcm@jonmasters.org
++W:	http://www.kernel.org/pub/linux/kernel/people/jcm/hwlat_detector/
++S:	Supported
++L:	linux-kernel@vger.kernel.org
++F:	Documentation/hwlat_detector.txt
++F:	drivers/misc/hwlat_detector.c
++
+ HARDWARE MONITORING
+ L:	lm-sensors@lm-sensors.org
+ W:	http://www.lm-sensors.org/
+@@ -2621,6 +2630,20 @@ M:	jason.wessel@windriver.com
+ L:	kgdb-bugreport@lists.sourceforge.net
+ S:	Maintained
+ 
++KMEMCHECK
++P:	Vegard Nossum
++M:	vegardno@ifi.uio.no
++P	Pekka Enberg
++M:	penberg@cs.helsinki.fi
++L:	linux-kernel@vger.kernel.org
++S:	Maintained
++
++KMEMTRACE
++P:	Eduard - Gabriel Munteanu
++M:	eduard.munteanu@linux360.ro
++L:	linux-kernel@vger.kernel.org
++S:	Maintained
++
+ KPROBES
+ P:	Ananth N Mavinakayanahalli
+ M:	ananth@in.ibm.com
+Index: linux-2.6-tip/Makefile
+===================================================================
+--- linux-2.6-tip.orig/Makefile
++++ linux-2.6-tip/Makefile
+@@ -533,8 +533,9 @@ KBUILD_CFLAGS += $(call cc-option,-Wfram
+ endif
+ 
+ # Force gcc to behave correct even for buggy distributions
+-# Arch Makefiles may override this setting
++ifndef CONFIG_CC_STACKPROTECTOR
+ KBUILD_CFLAGS += $(call cc-option, -fno-stack-protector)
++endif
+ 
+ ifdef CONFIG_FRAME_POINTER
+ KBUILD_CFLAGS	+= -fno-omit-frame-pointer -fno-optimize-sibling-calls
+@@ -551,6 +552,10 @@ ifdef CONFIG_FUNCTION_TRACER
+ KBUILD_CFLAGS	+= -pg
+ endif
+ 
++ifndef CONFIG_ALLOW_WARNINGS
++KBUILD_CFLAGS	+= -Werror ${WERROR}
++endif
++
+ # We trigger additional mismatches with less inlining
+ ifdef CONFIG_DEBUG_SECTION_MISMATCH
+ KBUILD_CFLAGS += $(call cc-option, -fno-inline-functions-called-once)
+Index: linux-2.6-tip/arch/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/Kconfig
++++ linux-2.6-tip/arch/Kconfig
+@@ -6,6 +6,7 @@ config OPROFILE
+ 	tristate "OProfile system profiling (EXPERIMENTAL)"
+ 	depends on PROFILING
+ 	depends on HAVE_OPROFILE
++	depends on TRACING_SUPPORT
+ 	select TRACING
+ 	select RING_BUFFER
+ 	help
+@@ -32,6 +33,11 @@ config OPROFILE_IBS
+ config HAVE_OPROFILE
+ 	bool
+ 
++config PROFILE_NMI
++	bool
++	depends on OPROFILE
++	default y
++
+ config KPROBES
+ 	bool "Kprobes"
+ 	depends on KALLSYMS && MODULES
+@@ -106,3 +112,5 @@ config HAVE_CLK
+ 	  The <linux/clk.h> calls support software clock gating and
+ 	  thus are a key power management tool on many systems.
+ 
++config HAVE_DMA_API_DEBUG
++	bool
+Index: linux-2.6-tip/arch/alpha/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/alpha/include/asm/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/arch/alpha/include/asm/hardirq.h
+===================================================================
+--- linux-2.6-tip.orig/arch/alpha/include/asm/hardirq.h
++++ linux-2.6-tip/arch/alpha/include/asm/hardirq.h
+@@ -14,17 +14,4 @@ typedef struct {
+ 
+ void ack_bad_irq(unsigned int irq);
+ 
+-#define HARDIRQ_BITS	12
+-
+-/*
+- * The hardirq mask has to be large enough to have
+- * space for potentially nestable IRQ sources in the system
+- * to nest on a single CPU. On Alpha, interrupts are masked at the CPU
+- * by IPL as well as at the system level. We only have 8 IPLs (UNIX PALcode)
+- * so we really only have 8 nestable IRQs, but allow some overhead
+- */
+-#if (1 << HARDIRQ_BITS) < 16
+-#error HARDIRQ_BITS is too low!
+-#endif
+-
+ #endif /* _ALPHA_HARDIRQ_H */
+Index: linux-2.6-tip/arch/alpha/include/asm/statfs.h
+===================================================================
+--- linux-2.6-tip.orig/arch/alpha/include/asm/statfs.h
++++ linux-2.6-tip/arch/alpha/include/asm/statfs.h
+@@ -1,6 +1,8 @@
+ #ifndef _ALPHA_STATFS_H
+ #define _ALPHA_STATFS_H
+ 
++#include <linux/types.h>
++
+ /* Alpha is the only 64-bit platform with 32-bit statfs. And doesn't
+    even seem to implement statfs64 */
+ #define __statfs_word __u32
+Index: linux-2.6-tip/arch/alpha/include/asm/swab.h
+===================================================================
+--- linux-2.6-tip.orig/arch/alpha/include/asm/swab.h
++++ linux-2.6-tip/arch/alpha/include/asm/swab.h
+@@ -1,7 +1,7 @@
+ #ifndef _ALPHA_SWAB_H
+ #define _ALPHA_SWAB_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ #include <linux/compiler.h>
+ #include <asm/compiler.h>
+ 
+Index: linux-2.6-tip/arch/alpha/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/alpha/kernel/irq.c
++++ linux-2.6-tip/arch/alpha/kernel/irq.c
+@@ -55,7 +55,7 @@ int irq_select_affinity(unsigned int irq
+ 		cpu = (cpu < (NR_CPUS-1) ? cpu + 1 : 0);
+ 	last_cpu = cpu;
+ 
+-	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
++	cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
+ 	irq_desc[irq].chip->set_affinity(irq, cpumask_of(cpu));
+ 	return 0;
+ }
+@@ -90,7 +90,7 @@ show_interrupts(struct seq_file *p, void
+ 		seq_printf(p, "%10u ", kstat_irqs(irq));
+ #else
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[irq]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(irq, j));
+ #endif
+ 		seq_printf(p, " %14s", irq_desc[irq].chip->typename);
+ 		seq_printf(p, "  %c%s",
+Index: linux-2.6-tip/arch/alpha/kernel/irq_alpha.c
+===================================================================
+--- linux-2.6-tip.orig/arch/alpha/kernel/irq_alpha.c
++++ linux-2.6-tip/arch/alpha/kernel/irq_alpha.c
+@@ -64,7 +64,7 @@ do_entInt(unsigned long type, unsigned l
+ 		smp_percpu_timer_interrupt(regs);
+ 		cpu = smp_processor_id();
+ 		if (cpu != boot_cpuid) {
+-		        kstat_cpu(cpu).irqs[RTC_IRQ]++;
++		        kstat_incr_irqs_this_cpu(RTC_IRQ, irq_to_desc(RTC_IRQ));
+ 		} else {
+ 			handle_irq(RTC_IRQ);
+ 		}
+Index: linux-2.6-tip/arch/alpha/mm/init.c
+===================================================================
+--- linux-2.6-tip.orig/arch/alpha/mm/init.c
++++ linux-2.6-tip/arch/alpha/mm/init.c
+@@ -189,9 +189,21 @@ callback_init(void * kernel_end)
+ 
+ 	if (alpha_using_srm) {
+ 		static struct vm_struct console_remap_vm;
+-		unsigned long vaddr = VMALLOC_START;
++		unsigned long nr_pages = 0;
++		unsigned long vaddr;
+ 		unsigned long i, j;
+ 
++		/* calculate needed size */
++		for (i = 0; i < crb->map_entries; ++i)
++			nr_pages += crb->map[i].count;
++
++		/* register the vm area */
++		console_remap_vm.flags = VM_ALLOC;
++		console_remap_vm.size = nr_pages << PAGE_SHIFT;
++		vm_area_register_early(&console_remap_vm, PAGE_SIZE);
++
++		vaddr = (unsigned long)console_remap_vm.addr;
++
+ 		/* Set up the third level PTEs and update the virtual
+ 		   addresses of the CRB entries.  */
+ 		for (i = 0; i < crb->map_entries; ++i) {
+@@ -213,12 +225,6 @@ callback_init(void * kernel_end)
+ 				vaddr += PAGE_SIZE;
+ 			}
+ 		}
+-
+-		/* Let vmalloc know that we've allocated some space.  */
+-		console_remap_vm.flags = VM_ALLOC;
+-		console_remap_vm.addr = (void *) VMALLOC_START;
+-		console_remap_vm.size = vaddr - VMALLOC_START;
+-		vmlist = &console_remap_vm;
+ 	}
+ 
+ 	callback_init_done = 1;
+Index: linux-2.6-tip/arch/arm/include/asm/a.out.h
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/include/asm/a.out.h
++++ linux-2.6-tip/arch/arm/include/asm/a.out.h
+@@ -2,7 +2,7 @@
+ #define __ARM_A_OUT_H__
+ 
+ #include <linux/personality.h>
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ struct exec
+ {
+Index: linux-2.6-tip/arch/arm/include/asm/setup.h
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/include/asm/setup.h
++++ linux-2.6-tip/arch/arm/include/asm/setup.h
+@@ -14,7 +14,7 @@
+ #ifndef __ASMARM_SETUP_H
+ #define __ASMARM_SETUP_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ #define COMMAND_LINE_SIZE 1024
+ 
+Index: linux-2.6-tip/arch/arm/include/asm/swab.h
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/include/asm/swab.h
++++ linux-2.6-tip/arch/arm/include/asm/swab.h
+@@ -16,7 +16,7 @@
+ #define __ASM_ARM_SWAB_H
+ 
+ #include <linux/compiler.h>
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ #if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+ #  define __SWAB_64_THRU_32__
+Index: linux-2.6-tip/arch/arm/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/kernel/irq.c
++++ linux-2.6-tip/arch/arm/kernel/irq.c
+@@ -76,7 +76,7 @@ int show_interrupts(struct seq_file *p, 
+ 
+ 		seq_printf(p, "%3d: ", i);
+ 		for_each_present_cpu(cpu)
+-			seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
+ 		seq_printf(p, " %10s", irq_desc[i].chip->name ? : "-");
+ 		seq_printf(p, "  %s", action->name);
+ 		for (action = action->next; action; action = action->next)
+@@ -101,9 +101,14 @@ unlock:
+ /* Handle bad interrupts */
+ static struct irq_desc bad_irq_desc = {
+ 	.handle_irq = handle_bad_irq,
+-	.lock = __SPIN_LOCK_UNLOCKED(bad_irq_desc.lock),
++	.lock = RAW_SPIN_LOCK_UNLOCKED(bad_irq_desc.lock),
+ };
+ 
++#ifdef CONFIG_CPUMASK_OFFSTACK
++/* We are not allocating bad_irq_desc.affinity or .pending_mask */
++#error "ARM architecture does not support CONFIG_CPUMASK_OFFSTACK."
++#endif
++
+ /*
+  * do_IRQ handles all hardware IRQ's.  Decoded IRQs should not
+  * come via this function.  Instead, they should provide their
+@@ -161,7 +166,7 @@ void __init init_IRQ(void)
+ 		irq_desc[irq].status |= IRQ_NOREQUEST | IRQ_NOPROBE;
+ 
+ #ifdef CONFIG_SMP
+-	bad_irq_desc.affinity = CPU_MASK_ALL;
++	cpumask_setall(bad_irq_desc.affinity);
+ 	bad_irq_desc.cpu = smp_processor_id();
+ #endif
+ 	init_arch_irq();
+@@ -191,15 +196,16 @@ void migrate_irqs(void)
+ 		struct irq_desc *desc = irq_desc + i;
+ 
+ 		if (desc->cpu == cpu) {
+-			unsigned int newcpu = any_online_cpu(desc->affinity);
+-
+-			if (newcpu == NR_CPUS) {
++			unsigned int newcpu = cpumask_any_and(desc->affinity,
++							      cpu_online_mask);
++			if (newcpu >= nr_cpu_ids) {
+ 				if (printk_ratelimit())
+ 					printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n",
+ 					       i, cpu);
+ 
+-				cpus_setall(desc->affinity);
+-				newcpu = any_online_cpu(desc->affinity);
++				cpumask_setall(desc->affinity);
++				newcpu = cpumask_any_and(desc->affinity,
++							 cpu_online_mask);
+ 			}
+ 
+ 			route_irq(desc, i, newcpu);
+Index: linux-2.6-tip/arch/arm/kernel/vmlinux.lds.S
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/kernel/vmlinux.lds.S
++++ linux-2.6-tip/arch/arm/kernel/vmlinux.lds.S
+@@ -64,7 +64,9 @@ SECTIONS
+ 		__initramfs_end = .;
+ #endif
+ 		. = ALIGN(4096);
++		__per_cpu_load = .;
+ 		__per_cpu_start = .;
++			*(.data.percpu.page_aligned)
+ 			*(.data.percpu)
+ 			*(.data.percpu.shared_aligned)
+ 		__per_cpu_end = .;
+Index: linux-2.6-tip/arch/arm/mach-ns9xxx/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mach-ns9xxx/irq.c
++++ linux-2.6-tip/arch/arm/mach-ns9xxx/irq.c
+@@ -63,7 +63,6 @@ static struct irq_chip ns9xxx_chip = {
+ #else
+ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc)
+ {
+-	unsigned int cpu = smp_processor_id();
+ 	struct irqaction *action;
+ 	irqreturn_t action_ret;
+ 
+@@ -72,7 +71,7 @@ static void handle_prio_irq(unsigned int
+ 	BUG_ON(desc->status & IRQ_INPROGRESS);
+ 
+ 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+-	kstat_cpu(cpu).irqs[irq]++;
++	kstat_incr_irqs_this_cpu(irq, desc);
+ 
+ 	action = desc->action;
+ 	if (unlikely(!action || (desc->status & IRQ_DISABLED)))
+Index: linux-2.6-tip/arch/arm/oprofile/op_model_mpcore.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/oprofile/op_model_mpcore.c
++++ linux-2.6-tip/arch/arm/oprofile/op_model_mpcore.c
+@@ -263,7 +263,7 @@ static void em_route_irq(int irq, unsign
+ 	const struct cpumask *mask = cpumask_of(cpu);
+ 
+ 	spin_lock_irq(&desc->lock);
+-	desc->affinity = *mask;
++	cpumask_copy(desc->affinity, mask);
+ 	desc->chip->set_affinity(irq, mask);
+ 	spin_unlock_irq(&desc->lock);
+ }
+Index: linux-2.6-tip/arch/avr32/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/avr32/Kconfig
++++ linux-2.6-tip/arch/avr32/Kconfig
+@@ -181,7 +181,7 @@ source "kernel/Kconfig.preempt"
+ config QUICKLIST
+ 	def_bool y
+ 
+-config HAVE_ARCH_BOOTMEM_NODE
++config HAVE_ARCH_BOOTMEM
+ 	def_bool n
+ 
+ config ARCH_HAVE_MEMORY_PRESENT
+Index: linux-2.6-tip/arch/avr32/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/avr32/include/asm/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/arch/avr32/include/asm/hardirq.h
+===================================================================
+--- linux-2.6-tip.orig/arch/avr32/include/asm/hardirq.h
++++ linux-2.6-tip/arch/avr32/include/asm/hardirq.h
+@@ -20,15 +20,4 @@ void ack_bad_irq(unsigned int irq);
+ 
+ #endif /* __ASSEMBLY__ */
+ 
+-#define HARDIRQ_BITS	12
+-
+-/*
+- * The hardirq mask has to be large enough to have
+- * space for potentially all IRQ sources in the system
+- * nesting on a single CPU:
+- */
+-#if (1 << HARDIRQ_BITS) < NR_IRQS
+-# error HARDIRQ_BITS is too low!
+-#endif
+-
+ #endif /* __ASM_AVR32_HARDIRQ_H */
+Index: linux-2.6-tip/arch/avr32/include/asm/swab.h
+===================================================================
+--- linux-2.6-tip.orig/arch/avr32/include/asm/swab.h
++++ linux-2.6-tip/arch/avr32/include/asm/swab.h
+@@ -4,7 +4,7 @@
+ #ifndef __ASM_AVR32_SWAB_H
+ #define __ASM_AVR32_SWAB_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ #include <linux/compiler.h>
+ 
+ #define __SWAB_64_THRU_32__
+Index: linux-2.6-tip/arch/avr32/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/avr32/kernel/irq.c
++++ linux-2.6-tip/arch/avr32/kernel/irq.c
+@@ -58,7 +58,7 @@ int show_interrupts(struct seq_file *p, 
+ 
+ 		seq_printf(p, "%3d: ", i);
+ 		for_each_online_cpu(cpu)
+-			seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
+ 		seq_printf(p, " %8s", irq_desc[i].chip->name ? : "-");
+ 		seq_printf(p, "  %s", action->name);
+ 		for (action = action->next; action; action = action->next)
+Index: linux-2.6-tip/arch/blackfin/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/blackfin/include/asm/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/arch/blackfin/include/asm/percpu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/blackfin/include/asm/percpu.h
++++ linux-2.6-tip/arch/blackfin/include/asm/percpu.h
+@@ -3,14 +3,4 @@
+ 
+ #include <asm-generic/percpu.h>
+ 
+-#ifdef CONFIG_MODULES
+-#define PERCPU_MODULE_RESERVE 8192
+-#else
+-#define PERCPU_MODULE_RESERVE 0
+-#endif
+-
+-#define PERCPU_ENOUGH_ROOM \
+-	(ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) + \
+-	 PERCPU_MODULE_RESERVE)
+-
+ #endif	/* __ARCH_BLACKFIN_PERCPU__ */
+Index: linux-2.6-tip/arch/blackfin/include/asm/swab.h
+===================================================================
+--- linux-2.6-tip.orig/arch/blackfin/include/asm/swab.h
++++ linux-2.6-tip/arch/blackfin/include/asm/swab.h
+@@ -1,7 +1,7 @@
+ #ifndef _BLACKFIN_SWAB_H
+ #define _BLACKFIN_SWAB_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ #include <linux/compiler.h>
+ 
+ #if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+Index: linux-2.6-tip/arch/blackfin/kernel/irqchip.c
+===================================================================
+--- linux-2.6-tip.orig/arch/blackfin/kernel/irqchip.c
++++ linux-2.6-tip/arch/blackfin/kernel/irqchip.c
+@@ -70,6 +70,11 @@ static struct irq_desc bad_irq_desc = {
+ #endif
+ };
+ 
++#ifdef CONFIG_CPUMASK_OFFSTACK
++/* We are not allocating a variable-sized bad_irq_desc.affinity */
++#error "Blackfin architecture does not support CONFIG_CPUMASK_OFFSTACK."
++#endif
++
+ int show_interrupts(struct seq_file *p, void *v)
+ {
+ 	int i = *(loff_t *) v, j;
+@@ -83,7 +88,7 @@ int show_interrupts(struct seq_file *p, 
+ 			goto skip;
+ 		seq_printf(p, "%3d: ", i);
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ 		seq_printf(p, " %8s", irq_desc[i].chip->name);
+ 		seq_printf(p, "  %s", action->name);
+ 		for (action = action->next; action; action = action->next)
+Index: linux-2.6-tip/arch/cris/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/cris/include/asm/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/arch/cris/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/cris/kernel/irq.c
++++ linux-2.6-tip/arch/cris/kernel/irq.c
+@@ -66,7 +66,7 @@ int show_interrupts(struct seq_file *p, 
+ 		seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ #endif
+ 		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+ 		seq_printf(p, "  %s", action->name);
+Index: linux-2.6-tip/arch/frv/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/frv/kernel/irq.c
++++ linux-2.6-tip/arch/frv/kernel/irq.c
+@@ -74,7 +74,7 @@ int show_interrupts(struct seq_file *p, 
+ 		if (action) {
+ 			seq_printf(p, "%3d: ", i);
+ 			for_each_present_cpu(cpu)
+-				seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
++				seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
+ 			seq_printf(p, " %10s", irq_desc[i].chip->name ? : "-");
+ 			seq_printf(p, "  %s", action->name);
+ 			for (action = action->next;
+Index: linux-2.6-tip/arch/h8300/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/h8300/include/asm/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/arch/h8300/include/asm/swab.h
+===================================================================
+--- linux-2.6-tip.orig/arch/h8300/include/asm/swab.h
++++ linux-2.6-tip/arch/h8300/include/asm/swab.h
+@@ -1,7 +1,7 @@
+ #ifndef _H8300_SWAB_H
+ #define _H8300_SWAB_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ #if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+ #  define __SWAB_64_THRU_32__
+Index: linux-2.6-tip/arch/h8300/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/h8300/kernel/irq.c
++++ linux-2.6-tip/arch/h8300/kernel/irq.c
+@@ -183,7 +183,7 @@ asmlinkage void do_IRQ(int irq)
+ #if defined(CONFIG_PROC_FS)
+ int show_interrupts(struct seq_file *p, void *v)
+ {
+-	int i = *(loff_t *) v, j;
++	int i = *(loff_t *) v;
+ 	struct irqaction * action;
+ 	unsigned long flags;
+ 
+@@ -196,7 +196,7 @@ int show_interrupts(struct seq_file *p, 
+ 		if (!action)
+ 			goto unlock;
+ 		seq_printf(p, "%3d: ",i);
+-		seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++		seq_printf(p, "%10u ", kstat_irqs(i));
+ 		seq_printf(p, " %14s", irq_desc[i].chip->name);
+ 		seq_printf(p, "-%-8s", irq_desc[i].name);
+ 		seq_printf(p, "  %s", action->name);
+Index: linux-2.6-tip/arch/ia64/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/Kconfig
++++ linux-2.6-tip/arch/ia64/Kconfig
+@@ -22,6 +22,9 @@ config IA64
+ 	select HAVE_OPROFILE
+ 	select HAVE_KPROBES
+ 	select HAVE_KRETPROBES
++	select HAVE_FTRACE_MCOUNT_RECORD
++	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
++	select HAVE_FUNCTION_TRACER
+ 	select HAVE_DMA_ATTRS
+ 	select HAVE_KVM
+ 	select HAVE_ARCH_TRACEHOOK
+Index: linux-2.6-tip/arch/ia64/dig/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/dig/Makefile
++++ linux-2.6-tip/arch/ia64/dig/Makefile
+@@ -7,8 +7,8 @@
+ 
+ obj-y := setup.o
+ ifeq ($(CONFIG_DMAR), y)
+-obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o dig_vtd_iommu.o
++obj-$(CONFIG_IA64_GENERIC) += machvec.o machvec_vtd.o
+ else
+ obj-$(CONFIG_IA64_GENERIC) += machvec.o
+ endif
+-obj-$(CONFIG_IA64_DIG_VTD) += dig_vtd_iommu.o
++
+Index: linux-2.6-tip/arch/ia64/dig/dig_vtd_iommu.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/dig/dig_vtd_iommu.c
++++ /dev/null
+@@ -1,59 +0,0 @@
+-#include <linux/types.h>
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/intel-iommu.h>
+-
+-void *
+-vtd_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
+-		 gfp_t flags)
+-{
+-	return intel_alloc_coherent(dev, size, dma_handle, flags);
+-}
+-EXPORT_SYMBOL_GPL(vtd_alloc_coherent);
+-
+-void
+-vtd_free_coherent(struct device *dev, size_t size, void *vaddr,
+-		 dma_addr_t dma_handle)
+-{
+-	intel_free_coherent(dev, size, vaddr, dma_handle);
+-}
+-EXPORT_SYMBOL_GPL(vtd_free_coherent);
+-
+-dma_addr_t
+-vtd_map_single_attrs(struct device *dev, void *addr, size_t size,
+-		     int dir, struct dma_attrs *attrs)
+-{
+-	return intel_map_single(dev, (phys_addr_t)addr, size, dir);
+-}
+-EXPORT_SYMBOL_GPL(vtd_map_single_attrs);
+-
+-void
+-vtd_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
+-		       int dir, struct dma_attrs *attrs)
+-{
+-	intel_unmap_single(dev, iova, size, dir);
+-}
+-EXPORT_SYMBOL_GPL(vtd_unmap_single_attrs);
+-
+-int
+-vtd_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+-		 int dir, struct dma_attrs *attrs)
+-{
+-	return intel_map_sg(dev, sglist, nents, dir);
+-}
+-EXPORT_SYMBOL_GPL(vtd_map_sg_attrs);
+-
+-void
+-vtd_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
+-		   int nents, int dir, struct dma_attrs *attrs)
+-{
+-	intel_unmap_sg(dev, sglist, nents, dir);
+-}
+-EXPORT_SYMBOL_GPL(vtd_unmap_sg_attrs);
+-
+-int
+-vtd_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+-{
+-	return 0;
+-}
+-EXPORT_SYMBOL_GPL(vtd_dma_mapping_error);
+Index: linux-2.6-tip/arch/ia64/hp/common/hwsw_iommu.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/hp/common/hwsw_iommu.c
++++ linux-2.6-tip/arch/ia64/hp/common/hwsw_iommu.c
+@@ -13,48 +13,33 @@
+  */
+ 
+ #include <linux/device.h>
++#include <linux/dma-mapping.h>
+ #include <linux/swiotlb.h>
+-
+ #include <asm/machvec.h>
+ 
++extern struct dma_map_ops sba_dma_ops, swiotlb_dma_ops;
++
+ /* swiotlb declarations & definitions: */
+ extern int swiotlb_late_init_with_default_size (size_t size);
+ 
+-/* hwiommu declarations & definitions: */
+-
+-extern ia64_mv_dma_alloc_coherent	sba_alloc_coherent;
+-extern ia64_mv_dma_free_coherent	sba_free_coherent;
+-extern ia64_mv_dma_map_single_attrs	sba_map_single_attrs;
+-extern ia64_mv_dma_unmap_single_attrs	sba_unmap_single_attrs;
+-extern ia64_mv_dma_map_sg_attrs		sba_map_sg_attrs;
+-extern ia64_mv_dma_unmap_sg_attrs	sba_unmap_sg_attrs;
+-extern ia64_mv_dma_supported		sba_dma_supported;
+-extern ia64_mv_dma_mapping_error	sba_dma_mapping_error;
+-
+-#define hwiommu_alloc_coherent		sba_alloc_coherent
+-#define hwiommu_free_coherent		sba_free_coherent
+-#define hwiommu_map_single_attrs	sba_map_single_attrs
+-#define hwiommu_unmap_single_attrs	sba_unmap_single_attrs
+-#define hwiommu_map_sg_attrs		sba_map_sg_attrs
+-#define hwiommu_unmap_sg_attrs		sba_unmap_sg_attrs
+-#define hwiommu_dma_supported		sba_dma_supported
+-#define hwiommu_dma_mapping_error	sba_dma_mapping_error
+-#define hwiommu_sync_single_for_cpu	machvec_dma_sync_single
+-#define hwiommu_sync_sg_for_cpu		machvec_dma_sync_sg
+-#define hwiommu_sync_single_for_device	machvec_dma_sync_single
+-#define hwiommu_sync_sg_for_device	machvec_dma_sync_sg
+-
+-
+ /*
+  * Note: we need to make the determination of whether or not to use
+  * the sw I/O TLB based purely on the device structure.  Anything else
+  * would be unreliable or would be too intrusive.
+  */
+-static inline int
+-use_swiotlb (struct device *dev)
++static inline int use_swiotlb(struct device *dev)
++{
++	return dev && dev->dma_mask &&
++		!sba_dma_ops.dma_supported(dev, *dev->dma_mask);
++}
++
++struct dma_map_ops *hwsw_dma_get_ops(struct device *dev)
+ {
+-	return dev && dev->dma_mask && !hwiommu_dma_supported(dev, *dev->dma_mask);
++	if (use_swiotlb(dev))
++		return &swiotlb_dma_ops;
++	return &sba_dma_ops;
+ }
++EXPORT_SYMBOL(hwsw_dma_get_ops);
+ 
+ void __init
+ hwsw_init (void)
+@@ -71,125 +56,3 @@ hwsw_init (void)
+ #endif
+ 	}
+ }
+-
+-void *
+-hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
+-{
+-	if (use_swiotlb(dev))
+-		return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
+-	else
+-		return hwiommu_alloc_coherent(dev, size, dma_handle, flags);
+-}
+-
+-void
+-hwsw_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
+-{
+-	if (use_swiotlb(dev))
+-		swiotlb_free_coherent(dev, size, vaddr, dma_handle);
+-	else
+-		hwiommu_free_coherent(dev, size, vaddr, dma_handle);
+-}
+-
+-dma_addr_t
+-hwsw_map_single_attrs(struct device *dev, void *addr, size_t size, int dir,
+-		       struct dma_attrs *attrs)
+-{
+-	if (use_swiotlb(dev))
+-		return swiotlb_map_single_attrs(dev, addr, size, dir, attrs);
+-	else
+-		return hwiommu_map_single_attrs(dev, addr, size, dir, attrs);
+-}
+-EXPORT_SYMBOL(hwsw_map_single_attrs);
+-
+-void
+-hwsw_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
+-			 int dir, struct dma_attrs *attrs)
+-{
+-	if (use_swiotlb(dev))
+-		return swiotlb_unmap_single_attrs(dev, iova, size, dir, attrs);
+-	else
+-		return hwiommu_unmap_single_attrs(dev, iova, size, dir, attrs);
+-}
+-EXPORT_SYMBOL(hwsw_unmap_single_attrs);
+-
+-int
+-hwsw_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+-		   int dir, struct dma_attrs *attrs)
+-{
+-	if (use_swiotlb(dev))
+-		return swiotlb_map_sg_attrs(dev, sglist, nents, dir, attrs);
+-	else
+-		return hwiommu_map_sg_attrs(dev, sglist, nents, dir, attrs);
+-}
+-EXPORT_SYMBOL(hwsw_map_sg_attrs);
+-
+-void
+-hwsw_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+-		     int dir, struct dma_attrs *attrs)
+-{
+-	if (use_swiotlb(dev))
+-		return swiotlb_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
+-	else
+-		return hwiommu_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
+-}
+-EXPORT_SYMBOL(hwsw_unmap_sg_attrs);
+-
+-void
+-hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int dir)
+-{
+-	if (use_swiotlb(dev))
+-		swiotlb_sync_single_for_cpu(dev, addr, size, dir);
+-	else
+-		hwiommu_sync_single_for_cpu(dev, addr, size, dir);
+-}
+-
+-void
+-hwsw_sync_sg_for_cpu (struct device *dev, struct scatterlist *sg, int nelems, int dir)
+-{
+-	if (use_swiotlb(dev))
+-		swiotlb_sync_sg_for_cpu(dev, sg, nelems, dir);
+-	else
+-		hwiommu_sync_sg_for_cpu(dev, sg, nelems, dir);
+-}
+-
+-void
+-hwsw_sync_single_for_device (struct device *dev, dma_addr_t addr, size_t size, int dir)
+-{
+-	if (use_swiotlb(dev))
+-		swiotlb_sync_single_for_device(dev, addr, size, dir);
+-	else
+-		hwiommu_sync_single_for_device(dev, addr, size, dir);
+-}
+-
+-void
+-hwsw_sync_sg_for_device (struct device *dev, struct scatterlist *sg, int nelems, int dir)
+-{
+-	if (use_swiotlb(dev))
+-		swiotlb_sync_sg_for_device(dev, sg, nelems, dir);
+-	else
+-		hwiommu_sync_sg_for_device(dev, sg, nelems, dir);
+-}
+-
+-int
+-hwsw_dma_supported (struct device *dev, u64 mask)
+-{
+-	if (hwiommu_dma_supported(dev, mask))
+-		return 1;
+-	return swiotlb_dma_supported(dev, mask);
+-}
+-
+-int
+-hwsw_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+-{
+-	return hwiommu_dma_mapping_error(dev, dma_addr) ||
+-		swiotlb_dma_mapping_error(dev, dma_addr);
+-}
+-
+-EXPORT_SYMBOL(hwsw_dma_mapping_error);
+-EXPORT_SYMBOL(hwsw_dma_supported);
+-EXPORT_SYMBOL(hwsw_alloc_coherent);
+-EXPORT_SYMBOL(hwsw_free_coherent);
+-EXPORT_SYMBOL(hwsw_sync_single_for_cpu);
+-EXPORT_SYMBOL(hwsw_sync_single_for_device);
+-EXPORT_SYMBOL(hwsw_sync_sg_for_cpu);
+-EXPORT_SYMBOL(hwsw_sync_sg_for_device);
+Index: linux-2.6-tip/arch/ia64/hp/common/sba_iommu.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/hp/common/sba_iommu.c
++++ linux-2.6-tip/arch/ia64/hp/common/sba_iommu.c
+@@ -36,6 +36,7 @@
+ #include <linux/bitops.h>         /* hweight64() */
+ #include <linux/crash_dump.h>
+ #include <linux/iommu-helper.h>
++#include <linux/dma-mapping.h>
+ 
+ #include <asm/delay.h>		/* ia64_get_itc() */
+ #include <asm/io.h>
+@@ -908,11 +909,13 @@ sba_mark_invalid(struct ioc *ioc, dma_ad
+  *
+  * See Documentation/PCI/PCI-DMA-mapping.txt
+  */
+-dma_addr_t
+-sba_map_single_attrs(struct device *dev, void *addr, size_t size, int dir,
+-		     struct dma_attrs *attrs)
++static dma_addr_t sba_map_page(struct device *dev, struct page *page,
++			       unsigned long poff, size_t size,
++			       enum dma_data_direction dir,
++			       struct dma_attrs *attrs)
+ {
+ 	struct ioc *ioc;
++	void *addr = page_address(page) + poff;
+ 	dma_addr_t iovp;
+ 	dma_addr_t offset;
+ 	u64 *pdir_start;
+@@ -990,7 +993,14 @@ sba_map_single_attrs(struct device *dev,
+ #endif
+ 	return SBA_IOVA(ioc, iovp, offset);
+ }
+-EXPORT_SYMBOL(sba_map_single_attrs);
++
++static dma_addr_t sba_map_single_attrs(struct device *dev, void *addr,
++				       size_t size, enum dma_data_direction dir,
++				       struct dma_attrs *attrs)
++{
++	return sba_map_page(dev, virt_to_page(addr),
++			    (unsigned long)addr & ~PAGE_MASK, size, dir, attrs);
++}
+ 
+ #ifdef ENABLE_MARK_CLEAN
+ static SBA_INLINE void
+@@ -1026,8 +1036,8 @@ sba_mark_clean(struct ioc *ioc, dma_addr
+  *
+  * See Documentation/PCI/PCI-DMA-mapping.txt
+  */
+-void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
+-			    int dir, struct dma_attrs *attrs)
++static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size,
++			   enum dma_data_direction dir, struct dma_attrs *attrs)
+ {
+ 	struct ioc *ioc;
+ #if DELAYED_RESOURCE_CNT > 0
+@@ -1094,7 +1104,12 @@ void sba_unmap_single_attrs(struct devic
+ 	spin_unlock_irqrestore(&ioc->res_lock, flags);
+ #endif /* DELAYED_RESOURCE_CNT == 0 */
+ }
+-EXPORT_SYMBOL(sba_unmap_single_attrs);
++
++void sba_unmap_single_attrs(struct device *dev, dma_addr_t iova, size_t size,
++			    enum dma_data_direction dir, struct dma_attrs *attrs)
++{
++	sba_unmap_page(dev, iova, size, dir, attrs);
++}
+ 
+ /**
+  * sba_alloc_coherent - allocate/map shared mem for DMA
+@@ -1104,7 +1119,7 @@ EXPORT_SYMBOL(sba_unmap_single_attrs);
+  *
+  * See Documentation/PCI/PCI-DMA-mapping.txt
+  */
+-void *
++static void *
+ sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags)
+ {
+ 	struct ioc *ioc;
+@@ -1167,7 +1182,8 @@ sba_alloc_coherent (struct device *dev, 
+  *
+  * See Documentation/PCI/PCI-DMA-mapping.txt
+  */
+-void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
++static void sba_free_coherent (struct device *dev, size_t size, void *vaddr,
++			       dma_addr_t dma_handle)
+ {
+ 	sba_unmap_single_attrs(dev, dma_handle, size, 0, NULL);
+ 	free_pages((unsigned long) vaddr, get_order(size));
+@@ -1422,8 +1438,9 @@ sba_coalesce_chunks(struct ioc *ioc, str
+  *
+  * See Documentation/PCI/PCI-DMA-mapping.txt
+  */
+-int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, int nents,
+-		     int dir, struct dma_attrs *attrs)
++static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
++			    int nents, enum dma_data_direction dir,
++			    struct dma_attrs *attrs)
+ {
+ 	struct ioc *ioc;
+ 	int coalesced, filled = 0;
+@@ -1502,7 +1519,6 @@ int sba_map_sg_attrs(struct device *dev,
+ 
+ 	return filled;
+ }
+-EXPORT_SYMBOL(sba_map_sg_attrs);
+ 
+ /**
+  * sba_unmap_sg_attrs - unmap Scatter/Gather list
+@@ -1514,8 +1530,9 @@ EXPORT_SYMBOL(sba_map_sg_attrs);
+  *
+  * See Documentation/PCI/PCI-DMA-mapping.txt
+  */
+-void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
+-			int nents, int dir, struct dma_attrs *attrs)
++static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
++			       int nents, enum dma_data_direction dir,
++			       struct dma_attrs *attrs)
+ {
+ #ifdef ASSERT_PDIR_SANITY
+ 	struct ioc *ioc;
+@@ -1551,7 +1568,6 @@ void sba_unmap_sg_attrs(struct device *d
+ #endif
+ 
+ }
+-EXPORT_SYMBOL(sba_unmap_sg_attrs);
+ 
+ /**************************************************************
+ *
+@@ -2064,6 +2080,8 @@ static struct acpi_driver acpi_sba_ioc_d
+ 	},
+ };
+ 
++extern struct dma_map_ops swiotlb_dma_ops;
++
+ static int __init
+ sba_init(void)
+ {
+@@ -2077,6 +2095,7 @@ sba_init(void)
+ 	 * a successful kdump kernel boot is to use the swiotlb.
+ 	 */
+ 	if (is_kdump_kernel()) {
++		dma_ops = &swiotlb_dma_ops;
+ 		if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
+ 			panic("Unable to initialize software I/O TLB:"
+ 				  " Try machvec=dig boot option");
+@@ -2092,6 +2111,7 @@ sba_init(void)
+ 		 * If we didn't find something sba_iommu can claim, we
+ 		 * need to setup the swiotlb and switch to the dig machvec.
+ 		 */
++		dma_ops = &swiotlb_dma_ops;
+ 		if (swiotlb_late_init_with_default_size(64 * (1<<20)) != 0)
+ 			panic("Unable to find SBA IOMMU or initialize "
+ 			      "software I/O TLB: Try machvec=dig boot option");
+@@ -2138,15 +2158,13 @@ nosbagart(char *str)
+ 	return 1;
+ }
+ 
+-int
+-sba_dma_supported (struct device *dev, u64 mask)
++static int sba_dma_supported (struct device *dev, u64 mask)
+ {
+ 	/* make sure it's at least 32bit capable */
+ 	return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL);
+ }
+ 
+-int
+-sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
++static int sba_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+ {
+ 	return 0;
+ }
+@@ -2176,7 +2194,22 @@ sba_page_override(char *str)
+ 
+ __setup("sbapagesize=",sba_page_override);
+ 
+-EXPORT_SYMBOL(sba_dma_mapping_error);
+-EXPORT_SYMBOL(sba_dma_supported);
+-EXPORT_SYMBOL(sba_alloc_coherent);
+-EXPORT_SYMBOL(sba_free_coherent);
++struct dma_map_ops sba_dma_ops = {
++	.alloc_coherent		= sba_alloc_coherent,
++	.free_coherent		= sba_free_coherent,
++	.map_page		= sba_map_page,
++	.unmap_page		= sba_unmap_page,
++	.map_sg			= sba_map_sg_attrs,
++	.unmap_sg		= sba_unmap_sg_attrs,
++	.sync_single_for_cpu	= machvec_dma_sync_single,
++	.sync_sg_for_cpu	= machvec_dma_sync_sg,
++	.sync_single_for_device	= machvec_dma_sync_single,
++	.sync_sg_for_device	= machvec_dma_sync_sg,
++	.dma_supported		= sba_dma_supported,
++	.mapping_error		= sba_dma_mapping_error,
++};
++
++void sba_dma_init(void)
++{
++	dma_ops = &sba_dma_ops;
++}
+Index: linux-2.6-tip/arch/ia64/include/asm/dma-mapping.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/dma-mapping.h
++++ linux-2.6-tip/arch/ia64/include/asm/dma-mapping.h
+@@ -11,99 +11,128 @@
+ 
+ #define ARCH_HAS_DMA_GET_REQUIRED_MASK
+ 
+-struct dma_mapping_ops {
+-	int             (*mapping_error)(struct device *dev,
+-					 dma_addr_t dma_addr);
+-	void*           (*alloc_coherent)(struct device *dev, size_t size,
+-				dma_addr_t *dma_handle, gfp_t gfp);
+-	void            (*free_coherent)(struct device *dev, size_t size,
+-				void *vaddr, dma_addr_t dma_handle);
+-	dma_addr_t      (*map_single)(struct device *hwdev, unsigned long ptr,
+-				size_t size, int direction);
+-	void            (*unmap_single)(struct device *dev, dma_addr_t addr,
+-				size_t size, int direction);
+-	void            (*sync_single_for_cpu)(struct device *hwdev,
+-				dma_addr_t dma_handle, size_t size,
+-				int direction);
+-	void            (*sync_single_for_device)(struct device *hwdev,
+-				dma_addr_t dma_handle, size_t size,
+-				int direction);
+-	void            (*sync_single_range_for_cpu)(struct device *hwdev,
+-				dma_addr_t dma_handle, unsigned long offset,
+-				size_t size, int direction);
+-	void            (*sync_single_range_for_device)(struct device *hwdev,
+-				dma_addr_t dma_handle, unsigned long offset,
+-				size_t size, int direction);
+-	void            (*sync_sg_for_cpu)(struct device *hwdev,
+-				struct scatterlist *sg, int nelems,
+-				int direction);
+-	void            (*sync_sg_for_device)(struct device *hwdev,
+-				struct scatterlist *sg, int nelems,
+-				int direction);
+-	int             (*map_sg)(struct device *hwdev, struct scatterlist *sg,
+-				int nents, int direction);
+-	void            (*unmap_sg)(struct device *hwdev,
+-				struct scatterlist *sg, int nents,
+-				int direction);
+-	int             (*dma_supported_op)(struct device *hwdev, u64 mask);
+-	int		is_phys;
+-};
+-
+-extern struct dma_mapping_ops *dma_ops;
++extern struct dma_map_ops *dma_ops;
+ extern struct ia64_machine_vector ia64_mv;
+ extern void set_iommu_machvec(void);
+ 
+-#define dma_alloc_coherent(dev, size, handle, gfp)	\
+-	platform_dma_alloc_coherent(dev, size, handle, (gfp) | GFP_DMA)
++extern void machvec_dma_sync_single(struct device *, dma_addr_t, size_t,
++				    enum dma_data_direction);
++extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int,
++				enum dma_data_direction);
+ 
+-/* coherent mem. is cheap */
+-static inline void *
+-dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
+-		      gfp_t flag)
++static inline void *dma_alloc_coherent(struct device *dev, size_t size,
++				       dma_addr_t *daddr, gfp_t gfp)
+ {
+-	return dma_alloc_coherent(dev, size, dma_handle, flag);
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	return ops->alloc_coherent(dev, size, daddr, gfp);
+ }
+-#define dma_free_coherent	platform_dma_free_coherent
+-static inline void
+-dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
+-		     dma_addr_t dma_handle)
++
++static inline void dma_free_coherent(struct device *dev, size_t size,
++				     void *caddr, dma_addr_t daddr)
+ {
+-	dma_free_coherent(dev, size, cpu_addr, dma_handle);
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	ops->free_coherent(dev, size, caddr, daddr);
+ }
+-#define dma_map_single_attrs	platform_dma_map_single_attrs
+-static inline dma_addr_t dma_map_single(struct device *dev, void *cpu_addr,
+-					size_t size, int dir)
++
++#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f)
++#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h)
++
++static inline dma_addr_t dma_map_single_attrs(struct device *dev,
++					      void *caddr, size_t size,
++					      enum dma_data_direction dir,
++					      struct dma_attrs *attrs)
+ {
+-	return dma_map_single_attrs(dev, cpu_addr, size, dir, NULL);
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	return ops->map_page(dev, virt_to_page(caddr),
++			     (unsigned long)caddr & ~PAGE_MASK, size,
++			     dir, attrs);
+ }
+-#define dma_map_sg_attrs	platform_dma_map_sg_attrs
+-static inline int dma_map_sg(struct device *dev, struct scatterlist *sgl,
+-			     int nents, int dir)
++
++static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t daddr,
++					  size_t size,
++					  enum dma_data_direction dir,
++					  struct dma_attrs *attrs)
+ {
+-	return dma_map_sg_attrs(dev, sgl, nents, dir, NULL);
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	ops->unmap_page(dev, daddr, size, dir, attrs);
+ }
+-#define dma_unmap_single_attrs	platform_dma_unmap_single_attrs
+-static inline void dma_unmap_single(struct device *dev, dma_addr_t cpu_addr,
+-				    size_t size, int dir)
++
++#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, NULL)
++#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, NULL)
++
++static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
++				   int nents, enum dma_data_direction dir,
++				   struct dma_attrs *attrs)
+ {
+-	return dma_unmap_single_attrs(dev, cpu_addr, size, dir, NULL);
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	return ops->map_sg(dev, sgl, nents, dir, attrs);
+ }
+-#define dma_unmap_sg_attrs	platform_dma_unmap_sg_attrs
+-static inline void dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
+-				int nents, int dir)
++
++static inline void dma_unmap_sg_attrs(struct device *dev,
++				      struct scatterlist *sgl, int nents,
++				      enum dma_data_direction dir,
++				      struct dma_attrs *attrs)
+ {
+-	return dma_unmap_sg_attrs(dev, sgl, nents, dir, NULL);
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	ops->unmap_sg(dev, sgl, nents, dir, attrs);
+ }
+-#define dma_sync_single_for_cpu	platform_dma_sync_single_for_cpu
+-#define dma_sync_sg_for_cpu	platform_dma_sync_sg_for_cpu
+-#define dma_sync_single_for_device platform_dma_sync_single_for_device
+-#define dma_sync_sg_for_device	platform_dma_sync_sg_for_device
+-#define dma_mapping_error	platform_dma_mapping_error
+ 
+-#define dma_map_page(dev, pg, off, size, dir)				\
+-	dma_map_single(dev, page_address(pg) + (off), (size), (dir))
+-#define dma_unmap_page(dev, dma_addr, size, dir)			\
+-	dma_unmap_single(dev, dma_addr, size, dir)
++#define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, NULL)
++#define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, NULL)
++
++static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t daddr,
++					   size_t size,
++					   enum dma_data_direction dir)
++{
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	ops->sync_single_for_cpu(dev, daddr, size, dir);
++}
++
++static inline void dma_sync_sg_for_cpu(struct device *dev,
++				       struct scatterlist *sgl,
++				       int nents, enum dma_data_direction dir)
++{
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	ops->sync_sg_for_cpu(dev, sgl, nents, dir);
++}
++
++static inline void dma_sync_single_for_device(struct device *dev,
++					      dma_addr_t daddr,
++					      size_t size,
++					      enum dma_data_direction dir)
++{
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	ops->sync_single_for_device(dev, daddr, size, dir);
++}
++
++static inline void dma_sync_sg_for_device(struct device *dev,
++					  struct scatterlist *sgl,
++					  int nents,
++					  enum dma_data_direction dir)
++{
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	ops->sync_sg_for_device(dev, sgl, nents, dir);
++}
++
++static inline int dma_mapping_error(struct device *dev, dma_addr_t daddr)
++{
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	return ops->mapping_error(dev, daddr);
++}
++
++static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
++				      size_t offset, size_t size,
++				      enum dma_data_direction dir)
++{
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	return ops->map_page(dev, page, offset, size, dir, NULL);
++}
++
++static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
++				  size_t size, enum dma_data_direction dir)
++{
++	dma_unmap_single(dev, addr, size, dir);
++}
+ 
+ /*
+  * Rest of this file is part of the "Advanced DMA API".  Use at your own risk.
+@@ -115,7 +144,11 @@ static inline void dma_unmap_sg(struct d
+ #define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir)	\
+ 	dma_sync_single_for_device(dev, dma_handle, size, dir)
+ 
+-#define dma_supported		platform_dma_supported
++static inline int dma_supported(struct device *dev, u64 mask)
++{
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
++	return ops->dma_supported(dev, mask);
++}
+ 
+ static inline int
+ dma_set_mask (struct device *dev, u64 mask)
+@@ -141,11 +174,4 @@ dma_cache_sync (struct device *dev, void
+ 
+ #define dma_is_consistent(d, h)	(1)	/* all we do is coherent memory... */
+ 
+-static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
+-{
+-	return dma_ops;
+-}
+-
+-
+-
+ #endif /* _ASM_IA64_DMA_MAPPING_H */
+Index: linux-2.6-tip/arch/ia64/include/asm/fpu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/fpu.h
++++ linux-2.6-tip/arch/ia64/include/asm/fpu.h
+@@ -6,8 +6,6 @@
+  *	David Mosberger-Tang <davidm@hpl.hp.com>
+  */
+ 
+-#include <asm/types.h>
+-
+ /* floating point status register: */
+ #define FPSR_TRAP_VD	(1 << 0)	/* invalid op trap disabled */
+ #define FPSR_TRAP_DD	(1 << 1)	/* denormal trap disabled */
+Index: linux-2.6-tip/arch/ia64/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/ia64/include/asm/ftrace.h
+@@ -0,0 +1,28 @@
++#ifndef _ASM_IA64_FTRACE_H
++#define _ASM_IA64_FTRACE_H
++
++#ifdef CONFIG_FUNCTION_TRACER
++#define MCOUNT_INSN_SIZE        32 /* sizeof mcount call */
++
++#ifndef __ASSEMBLY__
++extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0);
++#define mcount _mcount
++
++#include <asm/kprobes.h>
++/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */
++#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip)
++#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip)
++
++static inline unsigned long ftrace_call_adjust(unsigned long addr)
++{
++	/* second bundle, insn 2 */
++	return addr - 0x12;
++}
++
++struct dyn_arch_ftrace {
++};
++#endif
++
++#endif /* CONFIG_FUNCTION_TRACER */
++
++#endif /* _ASM_IA64_FTRACE_H */
+Index: linux-2.6-tip/arch/ia64/include/asm/gcc_intrin.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/gcc_intrin.h
++++ linux-2.6-tip/arch/ia64/include/asm/gcc_intrin.h
+@@ -6,6 +6,7 @@
+  * Copyright (C) 2002,2003 Suresh Siddha <suresh.b.siddha@intel.com>
+  */
+ 
++#include <linux/types.h>
+ #include <linux/compiler.h>
+ 
+ /* define this macro to get some asm stmts included in 'c' files */
+Index: linux-2.6-tip/arch/ia64/include/asm/hardirq.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/hardirq.h
++++ linux-2.6-tip/arch/ia64/include/asm/hardirq.h
+@@ -20,16 +20,6 @@
+ 
+ #define local_softirq_pending()		(local_cpu_data->softirq_pending)
+ 
+-#define HARDIRQ_BITS	14
+-
+-/*
+- * The hardirq mask has to be large enough to have space for potentially all IRQ sources
+- * in the system nesting on a single CPU:
+- */
+-#if (1 << HARDIRQ_BITS) < NR_IRQS
+-# error HARDIRQ_BITS is too low!
+-#endif
+-
+ extern void __iomem *ipi_base_addr;
+ 
+ void ack_bad_irq(unsigned int irq);
+Index: linux-2.6-tip/arch/ia64/include/asm/intrinsics.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/intrinsics.h
++++ linux-2.6-tip/arch/ia64/include/asm/intrinsics.h
+@@ -10,6 +10,7 @@
+ 
+ #ifndef __ASSEMBLY__
+ 
++#include <linux/types.h>
+ /* include compiler specific intrinsics */
+ #include <asm/ia64regs.h>
+ #ifdef __INTEL_COMPILER
+Index: linux-2.6-tip/arch/ia64/include/asm/kvm.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/kvm.h
++++ linux-2.6-tip/arch/ia64/include/asm/kvm.h
+@@ -21,8 +21,7 @@
+  *
+  */
+ 
+-#include <asm/types.h>
+-
++#include <linux/types.h>
+ #include <linux/ioctl.h>
+ 
+ /* Select x86 specific features in <linux/kvm.h> */
+Index: linux-2.6-tip/arch/ia64/include/asm/machvec.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/machvec.h
++++ linux-2.6-tip/arch/ia64/include/asm/machvec.h
+@@ -11,7 +11,6 @@
+ #define _ASM_IA64_MACHVEC_H
+ 
+ #include <linux/types.h>
+-#include <linux/swiotlb.h>
+ 
+ /* forward declarations: */
+ struct device;
+@@ -45,24 +44,8 @@ typedef void ia64_mv_kernel_launch_event
+ 
+ /* DMA-mapping interface: */
+ typedef void ia64_mv_dma_init (void);
+-typedef void *ia64_mv_dma_alloc_coherent (struct device *, size_t, dma_addr_t *, gfp_t);
+-typedef void ia64_mv_dma_free_coherent (struct device *, size_t, void *, dma_addr_t);
+-typedef dma_addr_t ia64_mv_dma_map_single (struct device *, void *, size_t, int);
+-typedef void ia64_mv_dma_unmap_single (struct device *, dma_addr_t, size_t, int);
+-typedef int ia64_mv_dma_map_sg (struct device *, struct scatterlist *, int, int);
+-typedef void ia64_mv_dma_unmap_sg (struct device *, struct scatterlist *, int, int);
+-typedef void ia64_mv_dma_sync_single_for_cpu (struct device *, dma_addr_t, size_t, int);
+-typedef void ia64_mv_dma_sync_sg_for_cpu (struct device *, struct scatterlist *, int, int);
+-typedef void ia64_mv_dma_sync_single_for_device (struct device *, dma_addr_t, size_t, int);
+-typedef void ia64_mv_dma_sync_sg_for_device (struct device *, struct scatterlist *, int, int);
+-typedef int ia64_mv_dma_mapping_error(struct device *, dma_addr_t dma_addr);
+-typedef int ia64_mv_dma_supported (struct device *, u64);
+-
+-typedef dma_addr_t ia64_mv_dma_map_single_attrs (struct device *, void *, size_t, int, struct dma_attrs *);
+-typedef void ia64_mv_dma_unmap_single_attrs (struct device *, dma_addr_t, size_t, int, struct dma_attrs *);
+-typedef int ia64_mv_dma_map_sg_attrs (struct device *, struct scatterlist *, int, int, struct dma_attrs *);
+-typedef void ia64_mv_dma_unmap_sg_attrs (struct device *, struct scatterlist *, int, int, struct dma_attrs *);
+ typedef u64 ia64_mv_dma_get_required_mask (struct device *);
++typedef struct dma_map_ops *ia64_mv_dma_get_ops(struct device *);
+ 
+ /*
+  * WARNING: The legacy I/O space is _architected_.  Platforms are
+@@ -114,8 +97,6 @@ machvec_noop_bus (struct pci_bus *bus)
+ 
+ extern void machvec_setup (char **);
+ extern void machvec_timer_interrupt (int, void *);
+-extern void machvec_dma_sync_single (struct device *, dma_addr_t, size_t, int);
+-extern void machvec_dma_sync_sg (struct device *, struct scatterlist *, int, int);
+ extern void machvec_tlb_migrate_finish (struct mm_struct *);
+ 
+ # if defined (CONFIG_IA64_HP_SIM)
+@@ -148,19 +129,8 @@ extern void machvec_tlb_migrate_finish (
+ #  define platform_global_tlb_purge	ia64_mv.global_tlb_purge
+ #  define platform_tlb_migrate_finish	ia64_mv.tlb_migrate_finish
+ #  define platform_dma_init		ia64_mv.dma_init
+-#  define platform_dma_alloc_coherent	ia64_mv.dma_alloc_coherent
+-#  define platform_dma_free_coherent	ia64_mv.dma_free_coherent
+-#  define platform_dma_map_single_attrs	ia64_mv.dma_map_single_attrs
+-#  define platform_dma_unmap_single_attrs	ia64_mv.dma_unmap_single_attrs
+-#  define platform_dma_map_sg_attrs	ia64_mv.dma_map_sg_attrs
+-#  define platform_dma_unmap_sg_attrs	ia64_mv.dma_unmap_sg_attrs
+-#  define platform_dma_sync_single_for_cpu ia64_mv.dma_sync_single_for_cpu
+-#  define platform_dma_sync_sg_for_cpu	ia64_mv.dma_sync_sg_for_cpu
+-#  define platform_dma_sync_single_for_device ia64_mv.dma_sync_single_for_device
+-#  define platform_dma_sync_sg_for_device ia64_mv.dma_sync_sg_for_device
+-#  define platform_dma_mapping_error		ia64_mv.dma_mapping_error
+-#  define platform_dma_supported	ia64_mv.dma_supported
+ #  define platform_dma_get_required_mask ia64_mv.dma_get_required_mask
++#  define platform_dma_get_ops		ia64_mv.dma_get_ops
+ #  define platform_irq_to_vector	ia64_mv.irq_to_vector
+ #  define platform_local_vector_to_irq	ia64_mv.local_vector_to_irq
+ #  define platform_pci_get_legacy_mem	ia64_mv.pci_get_legacy_mem
+@@ -203,19 +173,8 @@ struct ia64_machine_vector {
+ 	ia64_mv_global_tlb_purge_t *global_tlb_purge;
+ 	ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish;
+ 	ia64_mv_dma_init *dma_init;
+-	ia64_mv_dma_alloc_coherent *dma_alloc_coherent;
+-	ia64_mv_dma_free_coherent *dma_free_coherent;
+-	ia64_mv_dma_map_single_attrs *dma_map_single_attrs;
+-	ia64_mv_dma_unmap_single_attrs *dma_unmap_single_attrs;
+-	ia64_mv_dma_map_sg_attrs *dma_map_sg_attrs;
+-	ia64_mv_dma_unmap_sg_attrs *dma_unmap_sg_attrs;
+-	ia64_mv_dma_sync_single_for_cpu *dma_sync_single_for_cpu;
+-	ia64_mv_dma_sync_sg_for_cpu *dma_sync_sg_for_cpu;
+-	ia64_mv_dma_sync_single_for_device *dma_sync_single_for_device;
+-	ia64_mv_dma_sync_sg_for_device *dma_sync_sg_for_device;
+-	ia64_mv_dma_mapping_error *dma_mapping_error;
+-	ia64_mv_dma_supported *dma_supported;
+ 	ia64_mv_dma_get_required_mask *dma_get_required_mask;
++	ia64_mv_dma_get_ops *dma_get_ops;
+ 	ia64_mv_irq_to_vector *irq_to_vector;
+ 	ia64_mv_local_vector_to_irq *local_vector_to_irq;
+ 	ia64_mv_pci_get_legacy_mem_t *pci_get_legacy_mem;
+@@ -254,19 +213,8 @@ struct ia64_machine_vector {
+ 	platform_global_tlb_purge,		\
+ 	platform_tlb_migrate_finish,		\
+ 	platform_dma_init,			\
+-	platform_dma_alloc_coherent,		\
+-	platform_dma_free_coherent,		\
+-	platform_dma_map_single_attrs,		\
+-	platform_dma_unmap_single_attrs,	\
+-	platform_dma_map_sg_attrs,		\
+-	platform_dma_unmap_sg_attrs,		\
+-	platform_dma_sync_single_for_cpu,	\
+-	platform_dma_sync_sg_for_cpu,		\
+-	platform_dma_sync_single_for_device,	\
+-	platform_dma_sync_sg_for_device,	\
+-	platform_dma_mapping_error,			\
+-	platform_dma_supported,			\
+ 	platform_dma_get_required_mask,		\
++	platform_dma_get_ops,			\
+ 	platform_irq_to_vector,			\
+ 	platform_local_vector_to_irq,		\
+ 	platform_pci_get_legacy_mem,		\
+@@ -302,6 +250,9 @@ extern void machvec_init_from_cmdline(co
+ #  error Unknown configuration.  Update arch/ia64/include/asm/machvec.h.
+ # endif /* CONFIG_IA64_GENERIC */
+ 
++extern void swiotlb_dma_init(void);
++extern struct dma_map_ops *dma_get_ops(struct device *);
++
+ /*
+  * Define default versions so we can extend machvec for new platforms without having
+  * to update the machvec files for all existing platforms.
+@@ -332,43 +283,10 @@ extern void machvec_init_from_cmdline(co
+ # define platform_kernel_launch_event	machvec_noop
+ #endif
+ #ifndef platform_dma_init
+-# define platform_dma_init		swiotlb_init
+-#endif
+-#ifndef platform_dma_alloc_coherent
+-# define platform_dma_alloc_coherent	swiotlb_alloc_coherent
+-#endif
+-#ifndef platform_dma_free_coherent
+-# define platform_dma_free_coherent	swiotlb_free_coherent
+-#endif
+-#ifndef platform_dma_map_single_attrs
+-# define platform_dma_map_single_attrs	swiotlb_map_single_attrs
+-#endif
+-#ifndef platform_dma_unmap_single_attrs
+-# define platform_dma_unmap_single_attrs	swiotlb_unmap_single_attrs
+-#endif
+-#ifndef platform_dma_map_sg_attrs
+-# define platform_dma_map_sg_attrs	swiotlb_map_sg_attrs
+-#endif
+-#ifndef platform_dma_unmap_sg_attrs
+-# define platform_dma_unmap_sg_attrs	swiotlb_unmap_sg_attrs
+-#endif
+-#ifndef platform_dma_sync_single_for_cpu
+-# define platform_dma_sync_single_for_cpu	swiotlb_sync_single_for_cpu
+-#endif
+-#ifndef platform_dma_sync_sg_for_cpu
+-# define platform_dma_sync_sg_for_cpu		swiotlb_sync_sg_for_cpu
+-#endif
+-#ifndef platform_dma_sync_single_for_device
+-# define platform_dma_sync_single_for_device	swiotlb_sync_single_for_device
+-#endif
+-#ifndef platform_dma_sync_sg_for_device
+-# define platform_dma_sync_sg_for_device	swiotlb_sync_sg_for_device
+-#endif
+-#ifndef platform_dma_mapping_error
+-# define platform_dma_mapping_error		swiotlb_dma_mapping_error
++# define platform_dma_init		swiotlb_dma_init
+ #endif
+-#ifndef platform_dma_supported
+-# define  platform_dma_supported	swiotlb_dma_supported
++#ifndef platform_dma_get_ops
++# define platform_dma_get_ops		dma_get_ops
+ #endif
+ #ifndef platform_dma_get_required_mask
+ # define  platform_dma_get_required_mask	ia64_dma_get_required_mask
+Index: linux-2.6-tip/arch/ia64/include/asm/machvec_dig_vtd.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/machvec_dig_vtd.h
++++ linux-2.6-tip/arch/ia64/include/asm/machvec_dig_vtd.h
+@@ -2,14 +2,6 @@
+ #define _ASM_IA64_MACHVEC_DIG_VTD_h
+ 
+ extern ia64_mv_setup_t			dig_setup;
+-extern ia64_mv_dma_alloc_coherent	vtd_alloc_coherent;
+-extern ia64_mv_dma_free_coherent	vtd_free_coherent;
+-extern ia64_mv_dma_map_single_attrs	vtd_map_single_attrs;
+-extern ia64_mv_dma_unmap_single_attrs	vtd_unmap_single_attrs;
+-extern ia64_mv_dma_map_sg_attrs		vtd_map_sg_attrs;
+-extern ia64_mv_dma_unmap_sg_attrs	vtd_unmap_sg_attrs;
+-extern ia64_mv_dma_supported		iommu_dma_supported;
+-extern ia64_mv_dma_mapping_error	vtd_dma_mapping_error;
+ extern ia64_mv_dma_init			pci_iommu_alloc;
+ 
+ /*
+@@ -22,17 +14,5 @@ extern ia64_mv_dma_init			pci_iommu_allo
+ #define platform_name				"dig_vtd"
+ #define platform_setup				dig_setup
+ #define platform_dma_init			pci_iommu_alloc
+-#define platform_dma_alloc_coherent		vtd_alloc_coherent
+-#define platform_dma_free_coherent		vtd_free_coherent
+-#define platform_dma_map_single_attrs		vtd_map_single_attrs
+-#define platform_dma_unmap_single_attrs		vtd_unmap_single_attrs
+-#define platform_dma_map_sg_attrs		vtd_map_sg_attrs
+-#define platform_dma_unmap_sg_attrs		vtd_unmap_sg_attrs
+-#define platform_dma_sync_single_for_cpu	machvec_dma_sync_single
+-#define platform_dma_sync_sg_for_cpu		machvec_dma_sync_sg
+-#define platform_dma_sync_single_for_device	machvec_dma_sync_single
+-#define platform_dma_sync_sg_for_device		machvec_dma_sync_sg
+-#define platform_dma_supported			iommu_dma_supported
+-#define platform_dma_mapping_error		vtd_dma_mapping_error
+ 
+ #endif /* _ASM_IA64_MACHVEC_DIG_VTD_h */
+Index: linux-2.6-tip/arch/ia64/include/asm/machvec_hpzx1.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/machvec_hpzx1.h
++++ linux-2.6-tip/arch/ia64/include/asm/machvec_hpzx1.h
+@@ -2,14 +2,7 @@
+ #define _ASM_IA64_MACHVEC_HPZX1_h
+ 
+ extern ia64_mv_setup_t			dig_setup;
+-extern ia64_mv_dma_alloc_coherent	sba_alloc_coherent;
+-extern ia64_mv_dma_free_coherent	sba_free_coherent;
+-extern ia64_mv_dma_map_single_attrs	sba_map_single_attrs;
+-extern ia64_mv_dma_unmap_single_attrs	sba_unmap_single_attrs;
+-extern ia64_mv_dma_map_sg_attrs		sba_map_sg_attrs;
+-extern ia64_mv_dma_unmap_sg_attrs	sba_unmap_sg_attrs;
+-extern ia64_mv_dma_supported		sba_dma_supported;
+-extern ia64_mv_dma_mapping_error	sba_dma_mapping_error;
++extern ia64_mv_dma_init			sba_dma_init;
+ 
+ /*
+  * This stuff has dual use!
+@@ -20,18 +13,6 @@ extern ia64_mv_dma_mapping_error	sba_dma
+  */
+ #define platform_name				"hpzx1"
+ #define platform_setup				dig_setup
+-#define platform_dma_init			machvec_noop
+-#define platform_dma_alloc_coherent		sba_alloc_coherent
+-#define platform_dma_free_coherent		sba_free_coherent
+-#define platform_dma_map_single_attrs		sba_map_single_attrs
+-#define platform_dma_unmap_single_attrs		sba_unmap_single_attrs
+-#define platform_dma_map_sg_attrs		sba_map_sg_attrs
+-#define platform_dma_unmap_sg_attrs		sba_unmap_sg_attrs
+-#define platform_dma_sync_single_for_cpu	machvec_dma_sync_single
+-#define platform_dma_sync_sg_for_cpu		machvec_dma_sync_sg
+-#define platform_dma_sync_single_for_device	machvec_dma_sync_single
+-#define platform_dma_sync_sg_for_device		machvec_dma_sync_sg
+-#define platform_dma_supported			sba_dma_supported
+-#define platform_dma_mapping_error		sba_dma_mapping_error
++#define platform_dma_init			sba_dma_init
+ 
+ #endif /* _ASM_IA64_MACHVEC_HPZX1_h */
+Index: linux-2.6-tip/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h
++++ linux-2.6-tip/arch/ia64/include/asm/machvec_hpzx1_swiotlb.h
+@@ -2,18 +2,7 @@
+ #define _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h
+ 
+ extern ia64_mv_setup_t				dig_setup;
+-extern ia64_mv_dma_alloc_coherent		hwsw_alloc_coherent;
+-extern ia64_mv_dma_free_coherent		hwsw_free_coherent;
+-extern ia64_mv_dma_map_single_attrs		hwsw_map_single_attrs;
+-extern ia64_mv_dma_unmap_single_attrs		hwsw_unmap_single_attrs;
+-extern ia64_mv_dma_map_sg_attrs			hwsw_map_sg_attrs;
+-extern ia64_mv_dma_unmap_sg_attrs		hwsw_unmap_sg_attrs;
+-extern ia64_mv_dma_supported			hwsw_dma_supported;
+-extern ia64_mv_dma_mapping_error		hwsw_dma_mapping_error;
+-extern ia64_mv_dma_sync_single_for_cpu		hwsw_sync_single_for_cpu;
+-extern ia64_mv_dma_sync_sg_for_cpu		hwsw_sync_sg_for_cpu;
+-extern ia64_mv_dma_sync_single_for_device	hwsw_sync_single_for_device;
+-extern ia64_mv_dma_sync_sg_for_device		hwsw_sync_sg_for_device;
++extern ia64_mv_dma_get_ops			hwsw_dma_get_ops;
+ 
+ /*
+  * This stuff has dual use!
+@@ -23,20 +12,8 @@ extern ia64_mv_dma_sync_sg_for_device		h
+  * the macros are used directly.
+  */
+ #define platform_name				"hpzx1_swiotlb"
+-
+ #define platform_setup				dig_setup
+ #define platform_dma_init			machvec_noop
+-#define platform_dma_alloc_coherent		hwsw_alloc_coherent
+-#define platform_dma_free_coherent		hwsw_free_coherent
+-#define platform_dma_map_single_attrs		hwsw_map_single_attrs
+-#define platform_dma_unmap_single_attrs		hwsw_unmap_single_attrs
+-#define platform_dma_map_sg_attrs		hwsw_map_sg_attrs
+-#define platform_dma_unmap_sg_attrs		hwsw_unmap_sg_attrs
+-#define platform_dma_supported			hwsw_dma_supported
+-#define platform_dma_mapping_error		hwsw_dma_mapping_error
+-#define platform_dma_sync_single_for_cpu	hwsw_sync_single_for_cpu
+-#define platform_dma_sync_sg_for_cpu		hwsw_sync_sg_for_cpu
+-#define platform_dma_sync_single_for_device	hwsw_sync_single_for_device
+-#define platform_dma_sync_sg_for_device		hwsw_sync_sg_for_device
++#define platform_dma_get_ops			hwsw_dma_get_ops
+ 
+ #endif /* _ASM_IA64_MACHVEC_HPZX1_SWIOTLB_h */
+Index: linux-2.6-tip/arch/ia64/include/asm/machvec_sn2.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/machvec_sn2.h
++++ linux-2.6-tip/arch/ia64/include/asm/machvec_sn2.h
+@@ -55,19 +55,8 @@ extern ia64_mv_readb_t __sn_readb_relaxe
+ extern ia64_mv_readw_t __sn_readw_relaxed;
+ extern ia64_mv_readl_t __sn_readl_relaxed;
+ extern ia64_mv_readq_t __sn_readq_relaxed;
+-extern ia64_mv_dma_alloc_coherent	sn_dma_alloc_coherent;
+-extern ia64_mv_dma_free_coherent	sn_dma_free_coherent;
+-extern ia64_mv_dma_map_single_attrs	sn_dma_map_single_attrs;
+-extern ia64_mv_dma_unmap_single_attrs	sn_dma_unmap_single_attrs;
+-extern ia64_mv_dma_map_sg_attrs		sn_dma_map_sg_attrs;
+-extern ia64_mv_dma_unmap_sg_attrs	sn_dma_unmap_sg_attrs;
+-extern ia64_mv_dma_sync_single_for_cpu	sn_dma_sync_single_for_cpu;
+-extern ia64_mv_dma_sync_sg_for_cpu	sn_dma_sync_sg_for_cpu;
+-extern ia64_mv_dma_sync_single_for_device sn_dma_sync_single_for_device;
+-extern ia64_mv_dma_sync_sg_for_device	sn_dma_sync_sg_for_device;
+-extern ia64_mv_dma_mapping_error	sn_dma_mapping_error;
+-extern ia64_mv_dma_supported		sn_dma_supported;
+ extern ia64_mv_dma_get_required_mask	sn_dma_get_required_mask;
++extern ia64_mv_dma_init			sn_dma_init;
+ extern ia64_mv_migrate_t		sn_migrate;
+ extern ia64_mv_kernel_launch_event_t	sn_kernel_launch_event;
+ extern ia64_mv_setup_msi_irq_t		sn_setup_msi_irq;
+@@ -111,20 +100,8 @@ extern ia64_mv_pci_fixup_bus_t		sn_pci_f
+ #define platform_pci_get_legacy_mem	sn_pci_get_legacy_mem
+ #define platform_pci_legacy_read	sn_pci_legacy_read
+ #define platform_pci_legacy_write	sn_pci_legacy_write
+-#define platform_dma_init		machvec_noop
+-#define platform_dma_alloc_coherent	sn_dma_alloc_coherent
+-#define platform_dma_free_coherent	sn_dma_free_coherent
+-#define platform_dma_map_single_attrs	sn_dma_map_single_attrs
+-#define platform_dma_unmap_single_attrs	sn_dma_unmap_single_attrs
+-#define platform_dma_map_sg_attrs	sn_dma_map_sg_attrs
+-#define platform_dma_unmap_sg_attrs	sn_dma_unmap_sg_attrs
+-#define platform_dma_sync_single_for_cpu sn_dma_sync_single_for_cpu
+-#define platform_dma_sync_sg_for_cpu	sn_dma_sync_sg_for_cpu
+-#define platform_dma_sync_single_for_device sn_dma_sync_single_for_device
+-#define platform_dma_sync_sg_for_device	sn_dma_sync_sg_for_device
+-#define platform_dma_mapping_error		sn_dma_mapping_error
+-#define platform_dma_supported		sn_dma_supported
+ #define platform_dma_get_required_mask	sn_dma_get_required_mask
++#define platform_dma_init		sn_dma_init
+ #define platform_migrate		sn_migrate
+ #define platform_kernel_launch_event    sn_kernel_launch_event
+ #ifdef CONFIG_PCI_MSI
+Index: linux-2.6-tip/arch/ia64/include/asm/percpu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/percpu.h
++++ linux-2.6-tip/arch/ia64/include/asm/percpu.h
+@@ -27,12 +27,12 @@ extern void *per_cpu_init(void);
+ 
+ #else /* ! SMP */
+ 
+-#define PER_CPU_ATTRIBUTES	__attribute__((__section__(".data.percpu")))
+-
+ #define per_cpu_init()				(__phys_per_cpu_start)
+ 
+ #endif	/* SMP */
+ 
++#define PER_CPU_BASE_SECTION ".data.percpu"
++
+ /*
+  * Be extremely careful when taking the address of this variable!  Due to virtual
+  * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
+Index: linux-2.6-tip/arch/ia64/include/asm/swab.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/swab.h
++++ linux-2.6-tip/arch/ia64/include/asm/swab.h
+@@ -6,7 +6,7 @@
+  *	David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co.
+  */
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ #include <asm/intrinsics.h>
+ #include <linux/compiler.h>
+ 
+Index: linux-2.6-tip/arch/ia64/include/asm/topology.h
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/include/asm/topology.h
++++ linux-2.6-tip/arch/ia64/include/asm/topology.h
+@@ -84,7 +84,7 @@ void build_cpu_to_node_map(void);
+ 	.child			= NULL,			\
+ 	.groups			= NULL,			\
+ 	.min_interval		= 8,			\
+-	.max_interval		= 8*(min(num_online_cpus(), 32)), \
++	.max_interval		= 8*(min(num_online_cpus(), 32U)), \
+ 	.busy_factor		= 64,			\
+ 	.imbalance_pct		= 125,			\
+ 	.cache_nice_tries	= 2,			\
+Index: linux-2.6-tip/arch/ia64/include/asm/uv/uv.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/ia64/include/asm/uv/uv.h
+@@ -0,0 +1,13 @@
++#ifndef _ASM_IA64_UV_UV_H
++#define _ASM_IA64_UV_UV_H
++
++#include <asm/system.h>
++#include <asm/sn/simulator.h>
++
++static inline int is_uv_system(void)
++{
++	/* temporary support for running on hardware simulator */
++	return IS_MEDUSA() || ia64_platform_is("uv");
++}
++
++#endif	/* _ASM_IA64_UV_UV_H */
+Index: linux-2.6-tip/arch/ia64/kernel/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/Makefile
++++ linux-2.6-tip/arch/ia64/kernel/Makefile
+@@ -2,12 +2,16 @@
+ # Makefile for the linux kernel.
+ #
+ 
++ifdef CONFIG_DYNAMIC_FTRACE
++CFLAGS_REMOVE_ftrace.o = -pg
++endif
++
+ extra-y	:= head.o init_task.o vmlinux.lds
+ 
+ obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o	\
+ 	 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o		\
+ 	 salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
+-	 unwind.o mca.o mca_asm.o topology.o
++	 unwind.o mca.o mca_asm.o topology.o dma-mapping.o
+ 
+ obj-$(CONFIG_IA64_BRL_EMU)	+= brl_emu.o
+ obj-$(CONFIG_IA64_GENERIC)	+= acpi-ext.o
+@@ -28,6 +32,7 @@ obj-$(CONFIG_IA64_CYCLONE)	+= cyclone.o
+ obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
+ obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
+ obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
++obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
+ obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
+ obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+ obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
+@@ -43,9 +48,7 @@ ifneq ($(CONFIG_IA64_ESI),)
+ obj-y				+= esi_stub.o	# must be in kernel proper
+ endif
+ obj-$(CONFIG_DMAR)		+= pci-dma.o
+-ifeq ($(CONFIG_DMAR), y)
+ obj-$(CONFIG_SWIOTLB)		+= pci-swiotlb.o
+-endif
+ 
+ # The gate DSO image is built using a special linker script.
+ targets += gate.so gate-syms.o
+Index: linux-2.6-tip/arch/ia64/kernel/acpi.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/acpi.c
++++ linux-2.6-tip/arch/ia64/kernel/acpi.c
+@@ -199,6 +199,10 @@ char *__init __acpi_map_table(unsigned l
+ 	return __va(phys_addr);
+ }
+ 
++void __init __acpi_unmap_table(char *map, unsigned long size)
++{
++}
++
+ /* --------------------------------------------------------------------------
+                             Boot-time Table Parsing
+    -------------------------------------------------------------------------- */
+Index: linux-2.6-tip/arch/ia64/kernel/dma-mapping.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/ia64/kernel/dma-mapping.c
+@@ -0,0 +1,13 @@
++#include <linux/dma-mapping.h>
++
++/* Set this to 1 if there is a HW IOMMU in the system */
++int iommu_detected __read_mostly;
++
++struct dma_map_ops *dma_ops;
++EXPORT_SYMBOL(dma_ops);
++
++struct dma_map_ops *dma_get_ops(struct device *dev)
++{
++	return dma_ops;
++}
++EXPORT_SYMBOL(dma_get_ops);
+Index: linux-2.6-tip/arch/ia64/kernel/entry.S
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/entry.S
++++ linux-2.6-tip/arch/ia64/kernel/entry.S
+@@ -47,6 +47,7 @@
+ #include <asm/processor.h>
+ #include <asm/thread_info.h>
+ #include <asm/unistd.h>
++#include <asm/ftrace.h>
+ 
+ #include "minstate.h"
+ 
+@@ -1404,6 +1405,105 @@ GLOBAL_ENTRY(unw_init_running)
+ 	br.ret.sptk.many rp
+ END(unw_init_running)
+ 
++#ifdef CONFIG_FUNCTION_TRACER
++#ifdef CONFIG_DYNAMIC_FTRACE
++GLOBAL_ENTRY(_mcount)
++	br ftrace_stub
++END(_mcount)
++
++.here:
++	br.ret.sptk.many b0
++
++GLOBAL_ENTRY(ftrace_caller)
++	alloc out0 = ar.pfs, 8, 0, 4, 0
++	mov out3 = r0
++	;;
++	mov out2 = b0
++	add r3 = 0x20, r3
++	mov out1 = r1;
++	br.call.sptk.many b0 = ftrace_patch_gp
++	//this might be called from module, so we must patch gp
++ftrace_patch_gp:
++	movl gp=__gp
++	mov b0 = r3
++	;;
++.global ftrace_call;
++ftrace_call:
++{
++	.mlx
++	nop.m 0x0
++	movl r3 = .here;;
++}
++	alloc loc0 = ar.pfs, 4, 4, 2, 0
++	;;
++	mov loc1 = b0
++	mov out0 = b0
++	mov loc2 = r8
++	mov loc3 = r15
++	;;
++	adds out0 = -MCOUNT_INSN_SIZE, out0
++	mov out1 = in2
++	mov b6 = r3
++
++	br.call.sptk.many b0 = b6
++	;;
++	mov ar.pfs = loc0
++	mov b0 = loc1
++	mov r8 = loc2
++	mov r15 = loc3
++	br ftrace_stub
++	;;
++END(ftrace_caller)
++
++#else
++GLOBAL_ENTRY(_mcount)
++	movl r2 = ftrace_stub
++	movl r3 = ftrace_trace_function;;
++	ld8 r3 = [r3];;
++	ld8 r3 = [r3];;
++	cmp.eq p7,p0 = r2, r3
++(p7)	br.sptk.many ftrace_stub
++	;;
++
++	alloc loc0 = ar.pfs, 4, 4, 2, 0
++	;;
++	mov loc1 = b0
++	mov out0 = b0
++	mov loc2 = r8
++	mov loc3 = r15
++	;;
++	adds out0 = -MCOUNT_INSN_SIZE, out0
++	mov out1 = in2
++	mov b6 = r3
++
++	br.call.sptk.many b0 = b6
++	;;
++	mov ar.pfs = loc0
++	mov b0 = loc1
++	mov r8 = loc2
++	mov r15 = loc3
++	br ftrace_stub
++	;;
++END(_mcount)
++#endif
++
++GLOBAL_ENTRY(ftrace_stub)
++	mov r3 = b0
++	movl r2 = _mcount_ret_helper
++	;;
++	mov b6 = r2
++	mov b7 = r3
++	br.ret.sptk.many b6
++
++_mcount_ret_helper:
++	mov b0 = r42
++	mov r1 = r41
++	mov ar.pfs = r40
++	br b7
++END(ftrace_stub)
++
++#endif /* CONFIG_FUNCTION_TRACER */
++
+ 	.rodata
+ 	.align 8
+ 	.globl sys_call_table
+Index: linux-2.6-tip/arch/ia64/kernel/ftrace.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/ia64/kernel/ftrace.c
+@@ -0,0 +1,206 @@
++/*
++ * Dynamic function tracing support.
++ *
++ * Copyright (C) 2008 Shaohua Li <shaohua.li@intel.com>
++ *
++ * For licencing details, see COPYING.
++ *
++ * Defines low-level handling of mcount calls when the kernel
++ * is compiled with the -pg flag. When using dynamic ftrace, the
++ * mcount call-sites get patched lazily with NOP till they are
++ * enabled. All code mutation routines here take effect atomically.
++ */
++
++#include <linux/uaccess.h>
++#include <linux/ftrace.h>
++
++#include <asm/cacheflush.h>
++#include <asm/patch.h>
++
++/* In IA64, each function will be added below two bundles with -pg option */
++static unsigned char __attribute__((aligned(8)))
++ftrace_orig_code[MCOUNT_INSN_SIZE] = {
++	0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */
++	0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */
++	0x05, 0x00, 0xc4, 0x00,             /* mov r42=b0 */
++	0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */
++	0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */
++	0x08, 0x00, 0x00, 0x50              /* br.call.sptk.many b0 = _mcount;; */
++};
++
++struct ftrace_orig_insn {
++	u64 dummy1, dummy2, dummy3;
++	u64 dummy4:64-41+13;
++	u64 imm20:20;
++	u64 dummy5:3;
++	u64 sign:1;
++	u64 dummy6:4;
++};
++
++/* mcount stub will be converted below for nop */
++static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
++	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
++	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
++	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
++	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */
++	0x00, 0x00, 0x04, 0x00
++};
++
++static unsigned char *ftrace_nop_replace(void)
++{
++	return ftrace_nop_code;
++}
++
++/*
++ * mcount stub will be converted below for call
++ * Note: Just the last instruction is changed against nop
++ * */
++static unsigned char __attribute__((aligned(8)))
++ftrace_call_code[MCOUNT_INSN_SIZE] = {
++	0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */
++	0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */
++	0x00, 0x00, 0x04, 0x00,             /* nop.i 0x0 */
++	0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */
++	0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/
++	0xf8, 0xff, 0xff, 0xc8
++};
++
++struct ftrace_call_insn {
++	u64 dummy1, dummy2;
++	u64 dummy3:48;
++	u64 imm39_l:16;
++	u64 imm39_h:23;
++	u64 dummy4:13;
++	u64 imm20:20;
++	u64 dummy5:3;
++	u64 i:1;
++	u64 dummy6:4;
++};
++
++static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
++{
++	struct ftrace_call_insn *code = (void *)ftrace_call_code;
++	unsigned long offset = addr - (ip + 0x10);
++
++	code->imm39_l = offset >> 24;
++	code->imm39_h = offset >> 40;
++	code->imm20 = offset >> 4;
++	code->i = offset >> 63;
++	return ftrace_call_code;
++}
++
++static int
++ftrace_modify_code(unsigned long ip, unsigned char *old_code,
++		   unsigned char *new_code, int do_check)
++{
++	unsigned char replaced[MCOUNT_INSN_SIZE];
++
++	/*
++	 * Note: Due to modules and __init, code can
++	 *  disappear and change, we need to protect against faulting
++	 *  as well as code changing. We do this by using the
++	 *  probe_kernel_* functions.
++	 *
++	 * No real locking needed, this code is run through
++	 * kstop_machine, or before SMP starts.
++	 */
++
++	if (!do_check)
++		goto skip_check;
++
++	/* read the text we want to modify */
++	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
++		return -EFAULT;
++
++	/* Make sure it is what we expect it to be */
++	if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0)
++		return -EINVAL;
++
++skip_check:
++	/* replace the text with the new text */
++	if (probe_kernel_write(((void *)ip), new_code, MCOUNT_INSN_SIZE))
++		return -EPERM;
++	flush_icache_range(ip, ip + MCOUNT_INSN_SIZE);
++
++	return 0;
++}
++
++static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr)
++{
++	unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE];
++	unsigned long ip = rec->ip;
++
++	if (probe_kernel_read(replaced, (void *)ip, MCOUNT_INSN_SIZE))
++		return -EFAULT;
++	if (rec->flags & FTRACE_FL_CONVERTED) {
++		struct ftrace_call_insn *call_insn, *tmp_call;
++
++		call_insn = (void *)ftrace_call_code;
++		tmp_call = (void *)replaced;
++		call_insn->imm39_l = tmp_call->imm39_l;
++		call_insn->imm39_h = tmp_call->imm39_h;
++		call_insn->imm20 = tmp_call->imm20;
++		call_insn->i = tmp_call->i;
++		if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0)
++			return -EINVAL;
++		return 0;
++	} else {
++		struct ftrace_orig_insn *call_insn, *tmp_call;
++
++		call_insn = (void *)ftrace_orig_code;
++		tmp_call = (void *)replaced;
++		call_insn->sign = tmp_call->sign;
++		call_insn->imm20 = tmp_call->imm20;
++		if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0)
++			return -EINVAL;
++		return 0;
++	}
++}
++
++int ftrace_make_nop(struct module *mod,
++		    struct dyn_ftrace *rec, unsigned long addr)
++{
++	int ret;
++	char *new;
++
++	ret = ftrace_make_nop_check(rec, addr);
++	if (ret)
++		return ret;
++	new = ftrace_nop_replace();
++	return ftrace_modify_code(rec->ip, NULL, new, 0);
++}
++
++int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
++{
++	unsigned long ip = rec->ip;
++	unsigned char *old, *new;
++
++	old=  ftrace_nop_replace();
++	new = ftrace_call_replace(ip, addr);
++	return ftrace_modify_code(ip, old, new, 1);
++}
++
++/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */
++int ftrace_update_ftrace_func(ftrace_func_t func)
++{
++	unsigned long ip;
++	unsigned long addr = ((struct fnptr *)ftrace_call)->ip;
++
++	if (func == ftrace_stub)
++		return 0;
++	ip = ((struct fnptr *)func)->ip;
++
++	ia64_patch_imm64(addr + 2, ip);
++
++	flush_icache_range(addr, addr + 16);
++	return 0;
++}
++
++/* run from kstop_machine */
++int __init ftrace_dyn_arch_init(void *data)
++{
++	*(unsigned long *)data = 0;
++
++	return 0;
++}
+Index: linux-2.6-tip/arch/ia64/kernel/ia64_ksyms.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/ia64_ksyms.c
++++ linux-2.6-tip/arch/ia64/kernel/ia64_ksyms.c
+@@ -112,3 +112,9 @@ EXPORT_SYMBOL_GPL(esi_call_phys);
+ #endif
+ extern char ia64_ivt[];
+ EXPORT_SYMBOL(ia64_ivt);
++
++#include <asm/ftrace.h>
++#ifdef CONFIG_FUNCTION_TRACER
++/* mcount is defined in assembly */
++EXPORT_SYMBOL(_mcount);
++#endif
+Index: linux-2.6-tip/arch/ia64/kernel/iosapic.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/iosapic.c
++++ linux-2.6-tip/arch/ia64/kernel/iosapic.c
+@@ -880,7 +880,7 @@ iosapic_unregister_intr (unsigned int gs
+ 	if (iosapic_intr_info[irq].count == 0) {
+ #ifdef CONFIG_SMP
+ 		/* Clear affinity */
+-		cpus_setall(idesc->affinity);
++		cpumask_setall(idesc->affinity);
+ #endif
+ 		/* Clear the interrupt information */
+ 		iosapic_intr_info[irq].dest = 0;
+Index: linux-2.6-tip/arch/ia64/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/irq.c
++++ linux-2.6-tip/arch/ia64/kernel/irq.c
+@@ -80,7 +80,7 @@ int show_interrupts(struct seq_file *p, 
+ 		seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+ 		for_each_online_cpu(j) {
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ 		}
+ #endif
+ 		seq_printf(p, " %14s", irq_desc[i].chip->name);
+@@ -103,7 +103,7 @@ static char irq_redir [NR_IRQS]; // = { 
+ void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
+ {
+ 	if (irq < NR_IRQS) {
+-		cpumask_copy(&irq_desc[irq].affinity,
++		cpumask_copy(irq_desc[irq].affinity,
+ 			     cpumask_of(cpu_logical_id(hwid)));
+ 		irq_redir[irq] = (char) (redir & 0xff);
+ 	}
+@@ -148,7 +148,7 @@ static void migrate_irqs(void)
+ 		if (desc->status == IRQ_PER_CPU)
+ 			continue;
+ 
+-		if (cpumask_any_and(&irq_desc[irq].affinity, cpu_online_mask)
++		if (cpumask_any_and(irq_desc[irq].affinity, cpu_online_mask)
+ 		    >= nr_cpu_ids) {
+ 			/*
+ 			 * Save it for phase 2 processing
+Index: linux-2.6-tip/arch/ia64/kernel/irq_ia64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/irq_ia64.c
++++ linux-2.6-tip/arch/ia64/kernel/irq_ia64.c
+@@ -493,11 +493,13 @@ ia64_handle_irq (ia64_vector vector, str
+ 	saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
+ 	ia64_srlz_d();
+ 	while (vector != IA64_SPURIOUS_INT_VECTOR) {
++		struct irq_desc *desc = irq_to_desc(vector);
++
+ 		if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
+ 			smp_local_flush_tlb();
+-			kstat_this_cpu.irqs[vector]++;
++			kstat_incr_irqs_this_cpu(vector, desc);
+ 		} else if (unlikely(IS_RESCHEDULE(vector)))
+-			kstat_this_cpu.irqs[vector]++;
++			kstat_incr_irqs_this_cpu(vector, desc);
+ 		else {
+ 			int irq = local_vector_to_irq(vector);
+ 
+@@ -551,11 +553,13 @@ void ia64_process_pending_intr(void)
+ 	  * Perform normal interrupt style processing
+ 	  */
+ 	while (vector != IA64_SPURIOUS_INT_VECTOR) {
++		struct irq_desc *desc = irq_to_desc(vector);
++
+ 		if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) {
+ 			smp_local_flush_tlb();
+-			kstat_this_cpu.irqs[vector]++;
++			kstat_incr_irqs_this_cpu(vector, desc);
+ 		} else if (unlikely(IS_RESCHEDULE(vector)))
+-			kstat_this_cpu.irqs[vector]++;
++			kstat_incr_irqs_this_cpu(vector, desc);
+ 		else {
+ 			struct pt_regs *old_regs = set_irq_regs(NULL);
+ 			int irq = local_vector_to_irq(vector);
+Index: linux-2.6-tip/arch/ia64/kernel/machvec.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/machvec.c
++++ linux-2.6-tip/arch/ia64/kernel/machvec.c
+@@ -1,5 +1,5 @@
+ #include <linux/module.h>
+-
++#include <linux/dma-mapping.h>
+ #include <asm/machvec.h>
+ #include <asm/system.h>
+ 
+@@ -75,14 +75,16 @@ machvec_timer_interrupt (int irq, void *
+ EXPORT_SYMBOL(machvec_timer_interrupt);
+ 
+ void
+-machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir)
++machvec_dma_sync_single(struct device *hwdev, dma_addr_t dma_handle, size_t size,
++			enum dma_data_direction dir)
+ {
+ 	mb();
+ }
+ EXPORT_SYMBOL(machvec_dma_sync_single);
+ 
+ void
+-machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir)
++machvec_dma_sync_sg(struct device *hwdev, struct scatterlist *sg, int n,
++		    enum dma_data_direction dir)
+ {
+ 	mb();
+ }
+Index: linux-2.6-tip/arch/ia64/kernel/msi_ia64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/msi_ia64.c
++++ linux-2.6-tip/arch/ia64/kernel/msi_ia64.c
+@@ -75,7 +75,7 @@ static void ia64_set_msi_irq_affinity(un
+ 	msg.data = data;
+ 
+ 	write_msi_msg(irq, &msg);
+-	irq_desc[irq].affinity = cpumask_of_cpu(cpu);
++	cpumask_copy(irq_desc[irq].affinity, cpumask_of(cpu));
+ }
+ #endif /* CONFIG_SMP */
+ 
+@@ -187,7 +187,7 @@ static void dmar_msi_set_affinity(unsign
+ 	msg.address_lo |= MSI_ADDR_DESTID_CPU(cpu_physical_id(cpu));
+ 
+ 	dmar_msi_write(irq, &msg);
+-	irq_desc[irq].affinity = *mask;
++	cpumask_copy(irq_desc[irq].affinity, mask);
+ }
+ #endif /* CONFIG_SMP */
+ 
+Index: linux-2.6-tip/arch/ia64/kernel/pci-dma.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/pci-dma.c
++++ linux-2.6-tip/arch/ia64/kernel/pci-dma.c
+@@ -32,9 +32,6 @@ int force_iommu __read_mostly = 1;
+ int force_iommu __read_mostly;
+ #endif
+ 
+-/* Set this to 1 if there is a HW IOMMU in the system */
+-int iommu_detected __read_mostly;
+-
+ /* Dummy device used for NULL arguments (normally ISA). Better would
+    be probably a smaller DMA mask, but this is bug-to-bug compatible
+    to i386. */
+@@ -44,18 +41,7 @@ struct device fallback_dev = {
+ 	.dma_mask = &fallback_dev.coherent_dma_mask,
+ };
+ 
+-void __init pci_iommu_alloc(void)
+-{
+-	/*
+-	 * The order of these functions is important for
+-	 * fall-back/fail-over reasons
+-	 */
+-	detect_intel_iommu();
+-
+-#ifdef CONFIG_SWIOTLB
+-	pci_swiotlb_init();
+-#endif
+-}
++extern struct dma_map_ops intel_dma_ops;
+ 
+ static int __init pci_iommu_init(void)
+ {
+@@ -79,15 +65,12 @@ iommu_dma_init(void)
+ 	return;
+ }
+ 
+-struct dma_mapping_ops *dma_ops;
+-EXPORT_SYMBOL(dma_ops);
+-
+ int iommu_dma_supported(struct device *dev, u64 mask)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(dev);
++	struct dma_map_ops *ops = platform_dma_get_ops(dev);
+ 
+-	if (ops->dma_supported_op)
+-		return ops->dma_supported_op(dev, mask);
++	if (ops->dma_supported)
++		return ops->dma_supported(dev, mask);
+ 
+ 	/* Copied from i386. Doesn't make much sense, because it will
+ 	   only work for pci_alloc_coherent.
+@@ -116,4 +99,25 @@ int iommu_dma_supported(struct device *d
+ }
+ EXPORT_SYMBOL(iommu_dma_supported);
+ 
++void __init pci_iommu_alloc(void)
++{
++	dma_ops = &intel_dma_ops;
++
++	dma_ops->sync_single_for_cpu = machvec_dma_sync_single;
++	dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg;
++	dma_ops->sync_single_for_device = machvec_dma_sync_single;
++	dma_ops->sync_sg_for_device = machvec_dma_sync_sg;
++	dma_ops->dma_supported = iommu_dma_supported;
++
++	/*
++	 * The order of these functions is important for
++	 * fall-back/fail-over reasons
++	 */
++	detect_intel_iommu();
++
++#ifdef CONFIG_SWIOTLB
++	pci_swiotlb_init();
++#endif
++}
++
+ #endif
+Index: linux-2.6-tip/arch/ia64/kernel/pci-swiotlb.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/pci-swiotlb.c
++++ linux-2.6-tip/arch/ia64/kernel/pci-swiotlb.c
+@@ -13,23 +13,37 @@
+ int swiotlb __read_mostly;
+ EXPORT_SYMBOL(swiotlb);
+ 
+-struct dma_mapping_ops swiotlb_dma_ops = {
+-	.mapping_error = swiotlb_dma_mapping_error,
+-	.alloc_coherent = swiotlb_alloc_coherent,
++static void *ia64_swiotlb_alloc_coherent(struct device *dev, size_t size,
++					 dma_addr_t *dma_handle, gfp_t gfp)
++{
++	if (dev->coherent_dma_mask != DMA_64BIT_MASK)
++		gfp |= GFP_DMA;
++	return swiotlb_alloc_coherent(dev, size, dma_handle, gfp);
++}
++
++struct dma_map_ops swiotlb_dma_ops = {
++	.alloc_coherent = ia64_swiotlb_alloc_coherent,
+ 	.free_coherent = swiotlb_free_coherent,
+-	.map_single = swiotlb_map_single,
+-	.unmap_single = swiotlb_unmap_single,
++	.map_page = swiotlb_map_page,
++	.unmap_page = swiotlb_unmap_page,
++	.map_sg = swiotlb_map_sg_attrs,
++	.unmap_sg = swiotlb_unmap_sg_attrs,
+ 	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+ 	.sync_single_for_device = swiotlb_sync_single_for_device,
+ 	.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
+ 	.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
+ 	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+ 	.sync_sg_for_device = swiotlb_sync_sg_for_device,
+-	.map_sg = swiotlb_map_sg,
+-	.unmap_sg = swiotlb_unmap_sg,
+-	.dma_supported_op = swiotlb_dma_supported,
++	.dma_supported = swiotlb_dma_supported,
++	.mapping_error = swiotlb_dma_mapping_error,
+ };
+ 
++void __init swiotlb_dma_init(void)
++{
++	dma_ops = &swiotlb_dma_ops;
++	swiotlb_init();
++}
++
+ void __init pci_swiotlb_init(void)
+ {
+ 	if (!iommu_detected) {
+Index: linux-2.6-tip/arch/ia64/kernel/vmlinux.lds.S
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/kernel/vmlinux.lds.S
++++ linux-2.6-tip/arch/ia64/kernel/vmlinux.lds.S
+@@ -213,16 +213,9 @@ SECTIONS
+         { *(.data.cacheline_aligned) }
+ 
+   /* Per-cpu data: */
+-  percpu : { } :percpu
+   . = ALIGN(PERCPU_PAGE_SIZE);
+-  __phys_per_cpu_start = .;
+-  .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
+-	{
+-		__per_cpu_start = .;
+-		*(.data.percpu)
+-		*(.data.percpu.shared_aligned)
+-		__per_cpu_end = .;
+-	}
++  PERCPU_VADDR(PERCPU_ADDR, :percpu)
++  __phys_per_cpu_start = __per_cpu_load;
+   . = __phys_per_cpu_start + PERCPU_PAGE_SIZE;	/* ensure percpu data fits
+   						 * into percpu page size
+ 						 */
+Index: linux-2.6-tip/arch/ia64/sn/kernel/msi_sn.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/sn/kernel/msi_sn.c
++++ linux-2.6-tip/arch/ia64/sn/kernel/msi_sn.c
+@@ -205,7 +205,7 @@ static void sn_set_msi_irq_affinity(unsi
+ 	msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
+ 
+ 	write_msi_msg(irq, &msg);
+-	irq_desc[irq].affinity = *cpu_mask;
++	cpumask_copy(irq_desc[irq].affinity, cpu_mask);
+ }
+ #endif /* CONFIG_SMP */
+ 
+Index: linux-2.6-tip/arch/ia64/sn/pci/pci_dma.c
+===================================================================
+--- linux-2.6-tip.orig/arch/ia64/sn/pci/pci_dma.c
++++ linux-2.6-tip/arch/ia64/sn/pci/pci_dma.c
+@@ -10,7 +10,7 @@
+  */
+ 
+ #include <linux/module.h>
+-#include <linux/dma-attrs.h>
++#include <linux/dma-mapping.h>
+ #include <asm/dma.h>
+ #include <asm/sn/intr.h>
+ #include <asm/sn/pcibus_provider_defs.h>
+@@ -31,7 +31,7 @@
+  * this function.  Of course, SN only supports devices that have 32 or more
+  * address bits when using the PMU.
+  */
+-int sn_dma_supported(struct device *dev, u64 mask)
++static int sn_dma_supported(struct device *dev, u64 mask)
+ {
+ 	BUG_ON(dev->bus != &pci_bus_type);
+ 
+@@ -39,7 +39,6 @@ int sn_dma_supported(struct device *dev,
+ 		return 0;
+ 	return 1;
+ }
+-EXPORT_SYMBOL(sn_dma_supported);
+ 
+ /**
+  * sn_dma_set_mask - set the DMA mask
+@@ -75,8 +74,8 @@ EXPORT_SYMBOL(sn_dma_set_mask);
+  * queue for a SCSI controller).  See Documentation/DMA-API.txt for
+  * more information.
+  */
+-void *sn_dma_alloc_coherent(struct device *dev, size_t size,
+-			    dma_addr_t * dma_handle, gfp_t flags)
++static void *sn_dma_alloc_coherent(struct device *dev, size_t size,
++				   dma_addr_t * dma_handle, gfp_t flags)
+ {
+ 	void *cpuaddr;
+ 	unsigned long phys_addr;
+@@ -124,7 +123,6 @@ void *sn_dma_alloc_coherent(struct devic
+ 
+ 	return cpuaddr;
+ }
+-EXPORT_SYMBOL(sn_dma_alloc_coherent);
+ 
+ /**
+  * sn_pci_free_coherent - free memory associated with coherent DMAable region
+@@ -136,8 +134,8 @@ EXPORT_SYMBOL(sn_dma_alloc_coherent);
+  * Frees the memory allocated by dma_alloc_coherent(), potentially unmapping
+  * any associated IOMMU mappings.
+  */
+-void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
+-			  dma_addr_t dma_handle)
++static void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
++				 dma_addr_t dma_handle)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(dev);
+ 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+@@ -147,7 +145,6 @@ void sn_dma_free_coherent(struct device 
+ 	provider->dma_unmap(pdev, dma_handle, 0);
+ 	free_pages((unsigned long)cpu_addr, get_order(size));
+ }
+-EXPORT_SYMBOL(sn_dma_free_coherent);
+ 
+ /**
+  * sn_dma_map_single_attrs - map a single page for DMA
+@@ -173,10 +170,12 @@ EXPORT_SYMBOL(sn_dma_free_coherent);
+  * TODO: simplify our interface;
+  *       figure out how to save dmamap handle so can use two step.
+  */
+-dma_addr_t sn_dma_map_single_attrs(struct device *dev, void *cpu_addr,
+-				   size_t size, int direction,
+-				   struct dma_attrs *attrs)
++static dma_addr_t sn_dma_map_page(struct device *dev, struct page *page,
++				  unsigned long offset, size_t size,
++				  enum dma_data_direction dir,
++				  struct dma_attrs *attrs)
+ {
++	void *cpu_addr = page_address(page) + offset;
+ 	dma_addr_t dma_addr;
+ 	unsigned long phys_addr;
+ 	struct pci_dev *pdev = to_pci_dev(dev);
+@@ -201,7 +200,6 @@ dma_addr_t sn_dma_map_single_attrs(struc
+ 	}
+ 	return dma_addr;
+ }
+-EXPORT_SYMBOL(sn_dma_map_single_attrs);
+ 
+ /**
+  * sn_dma_unmap_single_attrs - unamp a DMA mapped page
+@@ -215,21 +213,20 @@ EXPORT_SYMBOL(sn_dma_map_single_attrs);
+  * by @dma_handle into the coherence domain.  On SN, we're always cache
+  * coherent, so we just need to free any ATEs associated with this mapping.
+  */
+-void sn_dma_unmap_single_attrs(struct device *dev, dma_addr_t dma_addr,
+-			       size_t size, int direction,
+-			       struct dma_attrs *attrs)
++static void sn_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
++			      size_t size, enum dma_data_direction dir,
++			      struct dma_attrs *attrs)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(dev);
+ 	struct sn_pcibus_provider *provider = SN_PCIDEV_BUSPROVIDER(pdev);
+ 
+ 	BUG_ON(dev->bus != &pci_bus_type);
+ 
+-	provider->dma_unmap(pdev, dma_addr, direction);
++	provider->dma_unmap(pdev, dma_addr, dir);
+ }
+-EXPORT_SYMBOL(sn_dma_unmap_single_attrs);
+ 
+ /**
+- * sn_dma_unmap_sg_attrs - unmap a DMA scatterlist
++ * sn_dma_unmap_sg - unmap a DMA scatterlist
+  * @dev: device to unmap
+  * @sg: scatterlist to unmap
+  * @nhwentries: number of scatterlist entries
+@@ -238,9 +235,9 @@ EXPORT_SYMBOL(sn_dma_unmap_single_attrs)
+  *
+  * Unmap a set of streaming mode DMA translations.
+  */
+-void sn_dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sgl,
+-			   int nhwentries, int direction,
+-			   struct dma_attrs *attrs)
++static void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sgl,
++			    int nhwentries, enum dma_data_direction dir,
++			    struct dma_attrs *attrs)
+ {
+ 	int i;
+ 	struct pci_dev *pdev = to_pci_dev(dev);
+@@ -250,15 +247,14 @@ void sn_dma_unmap_sg_attrs(struct device
+ 	BUG_ON(dev->bus != &pci_bus_type);
+ 
+ 	for_each_sg(sgl, sg, nhwentries, i) {
+-		provider->dma_unmap(pdev, sg->dma_address, direction);
++		provider->dma_unmap(pdev, sg->dma_address, dir);
+ 		sg->dma_address = (dma_addr_t) NULL;
+ 		sg->dma_length = 0;
+ 	}
+ }
+-EXPORT_SYMBOL(sn_dma_unmap_sg_attrs);
+ 
+ /**
+- * sn_dma_map_sg_attrs - map a scatterlist for DMA
++ * sn_dma_map_sg - map a scatterlist for DMA
+  * @dev: device to map for
+  * @sg: scatterlist to map
+  * @nhwentries: number of entries
+@@ -272,8 +268,9 @@ EXPORT_SYMBOL(sn_dma_unmap_sg_attrs);
+  *
+  * Maps each entry of @sg for DMA.
+  */
+-int sn_dma_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+-			int nhwentries, int direction, struct dma_attrs *attrs)
++static int sn_dma_map_sg(struct device *dev, struct scatterlist *sgl,
++			 int nhwentries, enum dma_data_direction dir,
++			 struct dma_attrs *attrs)
+ {
+ 	unsigned long phys_addr;
+ 	struct scatterlist *saved_sg = sgl, *sg;
+@@ -310,8 +307,7 @@ int sn_dma_map_sg_attrs(struct device *d
+ 			 * Free any successfully allocated entries.
+ 			 */
+ 			if (i > 0)
+-				sn_dma_unmap_sg_attrs(dev, saved_sg, i,
+-						      direction, attrs);
++				sn_dma_unmap_sg(dev, saved_sg, i, dir, attrs);
+ 			return 0;
+ 		}
+ 
+@@ -320,41 +316,36 @@ int sn_dma_map_sg_attrs(struct device *d
+ 
+ 	return nhwentries;
+ }
+-EXPORT_SYMBOL(sn_dma_map_sg_attrs);
+ 
+-void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+-				size_t size, int direction)
++static void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
++				       size_t size, enum dma_data_direction dir)
+ {
+ 	BUG_ON(dev->bus != &pci_bus_type);
+ }
+-EXPORT_SYMBOL(sn_dma_sync_single_for_cpu);
+ 
+-void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
+-				   size_t size, int direction)
++static void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
++					  size_t size,
++					  enum dma_data_direction dir)
+ {
+ 	BUG_ON(dev->bus != &pci_bus_type);
+ }
+-EXPORT_SYMBOL(sn_dma_sync_single_for_device);
+ 
+-void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+-			    int nelems, int direction)
++static void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
++				   int nelems, enum dma_data_direction dir)
+ {
+ 	BUG_ON(dev->bus != &pci_bus_type);
+ }
+-EXPORT_SYMBOL(sn_dma_sync_sg_for_cpu);
+ 
+-void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+-			       int nelems, int direction)
++static void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
++				      int nelems, enum dma_data_direction dir)
+ {
+ 	BUG_ON(dev->bus != &pci_bus_type);
+ }
+-EXPORT_SYMBOL(sn_dma_sync_sg_for_device);
+ 
+-int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
++static int sn_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+ {
+ 	return 0;
+ }
+-EXPORT_SYMBOL(sn_dma_mapping_error);
+ 
+ u64 sn_dma_get_required_mask(struct device *dev)
+ {
+@@ -471,3 +462,23 @@ int sn_pci_legacy_write(struct pci_bus *
+  out:
+ 	return ret;
+ }
++
++static struct dma_map_ops sn_dma_ops = {
++	.alloc_coherent		= sn_dma_alloc_coherent,
++	.free_coherent		= sn_dma_free_coherent,
++	.map_page		= sn_dma_map_page,
++	.unmap_page		= sn_dma_unmap_page,
++	.map_sg			= sn_dma_map_sg,
++	.unmap_sg		= sn_dma_unmap_sg,
++	.sync_single_for_cpu 	= sn_dma_sync_single_for_cpu,
++	.sync_sg_for_cpu	= sn_dma_sync_sg_for_cpu,
++	.sync_single_for_device = sn_dma_sync_single_for_device,
++	.sync_sg_for_device	= sn_dma_sync_sg_for_device,
++	.mapping_error		= sn_dma_mapping_error,
++	.dma_supported		= sn_dma_supported,
++};
++
++void sn_dma_init(void)
++{
++	dma_ops = &sn_dma_ops;
++}
+Index: linux-2.6-tip/arch/m32r/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/m32r/kernel/irq.c
++++ linux-2.6-tip/arch/m32r/kernel/irq.c
+@@ -49,7 +49,7 @@ int show_interrupts(struct seq_file *p, 
+ 		seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ #endif
+ 		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+ 		seq_printf(p, "  %s", action->name);
+Index: linux-2.6-tip/arch/m68k/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/m68k/include/asm/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/arch/mips/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/mips/include/asm/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/arch/mips/include/asm/irq.h
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/include/asm/irq.h
++++ linux-2.6-tip/arch/mips/include/asm/irq.h
+@@ -66,7 +66,7 @@ extern void smtc_forward_irq(unsigned in
+  */
+ #define IRQ_AFFINITY_HOOK(irq)						\
+ do {									\
+-    if (!cpu_isset(smp_processor_id(), irq_desc[irq].affinity)) {	\
++    if (!cpumask_test_cpu(smp_processor_id(), irq_desc[irq].affinity)) {\
+ 	smtc_forward_irq(irq);						\
+ 	irq_exit();							\
+ 	return;								\
+Index: linux-2.6-tip/arch/mips/include/asm/sigcontext.h
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/include/asm/sigcontext.h
++++ linux-2.6-tip/arch/mips/include/asm/sigcontext.h
+@@ -9,6 +9,7 @@
+ #ifndef _ASM_SIGCONTEXT_H
+ #define _ASM_SIGCONTEXT_H
+ 
++#include <linux/types.h>
+ #include <asm/sgidefs.h>
+ 
+ #if _MIPS_SIM == _MIPS_SIM_ABI32
+Index: linux-2.6-tip/arch/mips/include/asm/swab.h
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/include/asm/swab.h
++++ linux-2.6-tip/arch/mips/include/asm/swab.h
+@@ -9,7 +9,7 @@
+ #define _ASM_SWAB_H
+ 
+ #include <linux/compiler.h>
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ #define __SWAB_64_THRU_32__
+ 
+Index: linux-2.6-tip/arch/mips/kernel/irq-gic.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/kernel/irq-gic.c
++++ linux-2.6-tip/arch/mips/kernel/irq-gic.c
+@@ -187,7 +187,7 @@ static void gic_set_affinity(unsigned in
+ 		set_bit(irq, pcpu_masks[first_cpu(tmp)].pcpu_mask);
+ 
+ 	}
+-	irq_desc[irq].affinity = *cpumask;
++	cpumask_copy(irq_desc[irq].affinity, cpumask);
+ 	spin_unlock_irqrestore(&gic_lock, flags);
+ 
+ }
+Index: linux-2.6-tip/arch/mips/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/kernel/irq.c
++++ linux-2.6-tip/arch/mips/kernel/irq.c
+@@ -108,7 +108,7 @@ int show_interrupts(struct seq_file *p, 
+ 		seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ #endif
+ 		seq_printf(p, " %14s", irq_desc[i].chip->name);
+ 		seq_printf(p, "  %s", action->name);
+Index: linux-2.6-tip/arch/mips/kernel/smtc.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/kernel/smtc.c
++++ linux-2.6-tip/arch/mips/kernel/smtc.c
+@@ -686,7 +686,7 @@ void smtc_forward_irq(unsigned int irq)
+ 	 * and efficiency, we just pick the easiest one to find.
+ 	 */
+ 
+-	target = first_cpu(irq_desc[irq].affinity);
++	target = cpumask_first(irq_desc[irq].affinity);
+ 
+ 	/*
+ 	 * We depend on the platform code to have correctly processed
+@@ -921,11 +921,13 @@ void ipi_decode(struct smtc_ipi *pipi)
+ 	struct clock_event_device *cd;
+ 	void *arg_copy = pipi->arg;
+ 	int type_copy = pipi->type;
++	int irq = MIPS_CPU_IRQ_BASE + 1;
++
+ 	smtc_ipi_nq(&freeIPIq, pipi);
+ 	switch (type_copy) {
+ 	case SMTC_CLOCK_TICK:
+ 		irq_enter();
+-		kstat_this_cpu.irqs[MIPS_CPU_IRQ_BASE + 1]++;
++		kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
+ 		cd = &per_cpu(mips_clockevent_device, cpu);
+ 		cd->event_handler(cd);
+ 		irq_exit();
+Index: linux-2.6-tip/arch/mips/mti-malta/malta-smtc.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/mti-malta/malta-smtc.c
++++ linux-2.6-tip/arch/mips/mti-malta/malta-smtc.c
+@@ -116,7 +116,7 @@ struct plat_smp_ops msmtc_smp_ops = {
+ 
+ void plat_set_irq_affinity(unsigned int irq, const struct cpumask *affinity)
+ {
+-	cpumask_t tmask = *affinity;
++	cpumask_t tmask;
+ 	int cpu = 0;
+ 	void smtc_set_irq_affinity(unsigned int irq, cpumask_t aff);
+ 
+@@ -139,11 +139,12 @@ void plat_set_irq_affinity(unsigned int 
+ 	 * be made to forward to an offline "CPU".
+ 	 */
+ 
++	cpumask_copy(&tmask, affinity);
+ 	for_each_cpu(cpu, affinity) {
+ 		if ((cpu_data[cpu].vpe_id != 0) || !cpu_online(cpu))
+ 			cpu_clear(cpu, tmask);
+ 	}
+-	irq_desc[irq].affinity = tmask;
++	cpumask_copy(irq_desc[irq].affinity, &tmask);
+ 
+ 	if (cpus_empty(tmask))
+ 		/*
+Index: linux-2.6-tip/arch/mips/sgi-ip22/ip22-int.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/sgi-ip22/ip22-int.c
++++ linux-2.6-tip/arch/mips/sgi-ip22/ip22-int.c
+@@ -155,7 +155,7 @@ static void indy_buserror_irq(void)
+ 	int irq = SGI_BUSERR_IRQ;
+ 
+ 	irq_enter();
+-	kstat_this_cpu.irqs[irq]++;
++	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
+ 	ip22_be_interrupt(irq);
+ 	irq_exit();
+ }
+Index: linux-2.6-tip/arch/mips/sgi-ip22/ip22-time.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/sgi-ip22/ip22-time.c
++++ linux-2.6-tip/arch/mips/sgi-ip22/ip22-time.c
+@@ -122,7 +122,7 @@ void indy_8254timer_irq(void)
+ 	char c;
+ 
+ 	irq_enter();
+-	kstat_this_cpu.irqs[irq]++;
++	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
+ 	printk(KERN_ALERT "Oops, got 8254 interrupt.\n");
+ 	ArcRead(0, &c, 1, &cnt);
+ 	ArcEnterInteractiveMode();
+Index: linux-2.6-tip/arch/mips/sibyte/bcm1480/smp.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/sibyte/bcm1480/smp.c
++++ linux-2.6-tip/arch/mips/sibyte/bcm1480/smp.c
+@@ -178,9 +178,10 @@ struct plat_smp_ops bcm1480_smp_ops = {
+ void bcm1480_mailbox_interrupt(void)
+ {
+ 	int cpu = smp_processor_id();
++	int irq = K_BCM1480_INT_MBOX_0_0;
+ 	unsigned int action;
+ 
+-	kstat_this_cpu.irqs[K_BCM1480_INT_MBOX_0_0]++;
++	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
+ 	/* Load the mailbox register to figure out what we're supposed to do */
+ 	action = (__raw_readq(mailbox_0_regs[cpu]) >> 48) & 0xffff;
+ 
+Index: linux-2.6-tip/arch/mips/sibyte/sb1250/smp.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/sibyte/sb1250/smp.c
++++ linux-2.6-tip/arch/mips/sibyte/sb1250/smp.c
+@@ -166,9 +166,10 @@ struct plat_smp_ops sb_smp_ops = {
+ void sb1250_mailbox_interrupt(void)
+ {
+ 	int cpu = smp_processor_id();
++	int irq = K_INT_MBOX_0;
+ 	unsigned int action;
+ 
+-	kstat_this_cpu.irqs[K_INT_MBOX_0]++;
++	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
+ 	/* Load the mailbox register to figure out what we're supposed to do */
+ 	action = (____raw_readq(mailbox_regs[cpu]) >> 48) & 0xffff;
+ 
+Index: linux-2.6-tip/arch/mn10300/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mn10300/kernel/irq.c
++++ linux-2.6-tip/arch/mn10300/kernel/irq.c
+@@ -221,7 +221,7 @@ int show_interrupts(struct seq_file *p, 
+ 		if (action) {
+ 			seq_printf(p, "%3d: ", i);
+ 			for_each_present_cpu(cpu)
+-				seq_printf(p, "%10u ", kstat_cpu(cpu).irqs[i]);
++				seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu));
+ 			seq_printf(p, " %14s.%u", irq_desc[i].chip->name,
+ 				   (GxICR(i) & GxICR_LEVEL) >>
+ 				   GxICR_LEVEL_SHIFT);
+Index: linux-2.6-tip/arch/mn10300/kernel/mn10300-watchdog.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mn10300/kernel/mn10300-watchdog.c
++++ linux-2.6-tip/arch/mn10300/kernel/mn10300-watchdog.c
+@@ -130,6 +130,7 @@ void watchdog_interrupt(struct pt_regs *
+ 	 * the stack NMI-atomically, it's safe to use smp_processor_id().
+ 	 */
+ 	int sum, cpu = smp_processor_id();
++	int irq = NMIIRQ;
+ 	u8 wdt, tmp;
+ 
+ 	wdt = WDCTR & ~WDCTR_WDCNE;
+@@ -138,7 +139,7 @@ void watchdog_interrupt(struct pt_regs *
+ 	NMICR = NMICR_WDIF;
+ 
+ 	nmi_count(cpu)++;
+-	kstat_this_cpu.irqs[NMIIRQ]++;
++	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
+ 	sum = irq_stat[cpu].__irq_count;
+ 
+ 	if (last_irq_sums[cpu] == sum) {
+Index: linux-2.6-tip/arch/parisc/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/parisc/include/asm/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/arch/parisc/include/asm/pdc.h
+===================================================================
+--- linux-2.6-tip.orig/arch/parisc/include/asm/pdc.h
++++ linux-2.6-tip/arch/parisc/include/asm/pdc.h
+@@ -336,10 +336,11 @@
+ #define NUM_PDC_RESULT	32
+ 
+ #if !defined(__ASSEMBLY__)
+-#ifdef __KERNEL__
+ 
+ #include <linux/types.h>
+ 
++#ifdef __KERNEL__
++
+ extern int pdc_type;
+ 
+ /* Values for pdc_type */
+Index: linux-2.6-tip/arch/parisc/include/asm/swab.h
+===================================================================
+--- linux-2.6-tip.orig/arch/parisc/include/asm/swab.h
++++ linux-2.6-tip/arch/parisc/include/asm/swab.h
+@@ -1,7 +1,7 @@
+ #ifndef _PARISC_SWAB_H
+ #define _PARISC_SWAB_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ #include <linux/compiler.h>
+ 
+ #define __SWAB_64_THRU_32__
+Index: linux-2.6-tip/arch/parisc/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/parisc/kernel/irq.c
++++ linux-2.6-tip/arch/parisc/kernel/irq.c
+@@ -138,7 +138,7 @@ static void cpu_set_affinity_irq(unsigne
+ 	if (cpu_dest < 0)
+ 		return;
+ 
+-	cpumask_copy(&irq_desc[irq].affinity, &cpumask_of_cpu(cpu_dest));
++	cpumask_copy(&irq_desc[irq].affinity, dest);
+ }
+ #endif
+ 
+@@ -185,7 +185,7 @@ int show_interrupts(struct seq_file *p, 
+ 		seq_printf(p, "%3d: ", i);
+ #ifdef CONFIG_SMP
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ #else
+ 		seq_printf(p, "%10u ", kstat_irqs(i));
+ #endif
+Index: linux-2.6-tip/arch/powerpc/include/asm/bootx.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/bootx.h
++++ linux-2.6-tip/arch/powerpc/include/asm/bootx.h
+@@ -9,7 +9,7 @@
+ #ifndef __ASM_BOOTX_H__
+ #define __ASM_BOOTX_H__
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ #ifdef macintosh
+ #include <Types.h>
+Index: linux-2.6-tip/arch/powerpc/include/asm/elf.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/elf.h
++++ linux-2.6-tip/arch/powerpc/include/asm/elf.h
+@@ -7,7 +7,7 @@
+ #include <asm/string.h>
+ #endif
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ #include <asm/ptrace.h>
+ #include <asm/cputable.h>
+ #include <asm/auxvec.h>
+Index: linux-2.6-tip/arch/powerpc/include/asm/hw_irq.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/hw_irq.h
++++ linux-2.6-tip/arch/powerpc/include/asm/hw_irq.h
+@@ -131,5 +131,44 @@ static inline int irqs_disabled_flags(un
+  */
+ struct hw_interrupt_type;
+ 
++#ifdef CONFIG_PERF_COUNTERS
++static inline unsigned long get_perf_counter_pending(void)
++{
++	unsigned long x;
++
++	asm volatile("lbz %0,%1(13)"
++		: "=r" (x)
++		: "i" (offsetof(struct paca_struct, perf_counter_pending)));
++	return x;
++}
++
++static inline void set_perf_counter_pending(void)
++{
++	asm volatile("stb %0,%1(13)" : :
++		"r" (1),
++		"i" (offsetof(struct paca_struct, perf_counter_pending)));
++}
++
++static inline void clear_perf_counter_pending(void)
++{
++	asm volatile("stb %0,%1(13)" : :
++		"r" (0),
++		"i" (offsetof(struct paca_struct, perf_counter_pending)));
++}
++
++extern void perf_counter_do_pending(void);
++
++#else
++
++static inline unsigned long get_perf_counter_pending(void)
++{
++	return 0;
++}
++
++static inline void set_perf_counter_pending(void) {}
++static inline void clear_perf_counter_pending(void) {}
++static inline void perf_counter_do_pending(void) {}
++#endif /* CONFIG_PERF_COUNTERS */
++
+ #endif	/* __KERNEL__ */
+ #endif	/* _ASM_POWERPC_HW_IRQ_H */
+Index: linux-2.6-tip/arch/powerpc/include/asm/kvm.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/kvm.h
++++ linux-2.6-tip/arch/powerpc/include/asm/kvm.h
+@@ -20,7 +20,7 @@
+ #ifndef __LINUX_KVM_POWERPC_H
+ #define __LINUX_KVM_POWERPC_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ struct kvm_regs {
+ 	__u64 pc;
+Index: linux-2.6-tip/arch/powerpc/include/asm/mmzone.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/mmzone.h
++++ linux-2.6-tip/arch/powerpc/include/asm/mmzone.h
+@@ -8,6 +8,7 @@
+ #define _ASM_MMZONE_H_
+ #ifdef __KERNEL__
+ 
++#include <linux/cpumask.h>
+ 
+ /*
+  * generic non-linear memory support:
+Index: linux-2.6-tip/arch/powerpc/include/asm/paca.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/paca.h
++++ linux-2.6-tip/arch/powerpc/include/asm/paca.h
+@@ -99,6 +99,7 @@ struct paca_struct {
+ 	u8 soft_enabled;		/* irq soft-enable flag */
+ 	u8 hard_enabled;		/* set if irqs are enabled in MSR */
+ 	u8 io_sync;			/* writel() needs spin_unlock sync */
++	u8 perf_counter_pending;	/* PM interrupt while soft-disabled */
+ 
+ 	/* Stuff for accurate time accounting */
+ 	u64 user_time;			/* accumulated usermode TB ticks */
+Index: linux-2.6-tip/arch/powerpc/include/asm/perf_counter.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/powerpc/include/asm/perf_counter.h
+@@ -0,0 +1,72 @@
++/*
++ * Performance counter support - PowerPC-specific definitions.
++ *
++ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <linux/types.h>
++
++#define MAX_HWCOUNTERS		8
++#define MAX_EVENT_ALTERNATIVES	8
++
++/*
++ * This struct provides the constants and functions needed to
++ * describe the PMU on a particular POWER-family CPU.
++ */
++struct power_pmu {
++	int	n_counter;
++	int	max_alternatives;
++	u64	add_fields;
++	u64	test_adder;
++	int	(*compute_mmcr)(unsigned int events[], int n_ev,
++				unsigned int hwc[], u64 mmcr[]);
++	int	(*get_constraint)(unsigned int event, u64 *mskp, u64 *valp);
++	int	(*get_alternatives)(unsigned int event, unsigned int alt[]);
++	void	(*disable_pmc)(unsigned int pmc, u64 mmcr[]);
++	int	n_generic;
++	int	*generic_events;
++};
++
++extern struct power_pmu *ppmu;
++
++/*
++ * The power_pmu.get_constraint function returns a 64-bit value and
++ * a 64-bit mask that express the constraints between this event and
++ * other events.
++ *
++ * The value and mask are divided up into (non-overlapping) bitfields
++ * of three different types:
++ *
++ * Select field: this expresses the constraint that some set of bits
++ * in MMCR* needs to be set to a specific value for this event.  For a
++ * select field, the mask contains 1s in every bit of the field, and
++ * the value contains a unique value for each possible setting of the
++ * MMCR* bits.  The constraint checking code will ensure that two events
++ * that set the same field in their masks have the same value in their
++ * value dwords.
++ *
++ * Add field: this expresses the constraint that there can be at most
++ * N events in a particular class.  A field of k bits can be used for
++ * N <= 2^(k-1) - 1.  The mask has the most significant bit of the field
++ * set (and the other bits 0), and the value has only the least significant
++ * bit of the field set.  In addition, the 'add_fields' and 'test_adder'
++ * in the struct power_pmu for this processor come into play.  The
++ * add_fields value contains 1 in the LSB of the field, and the
++ * test_adder contains 2^(k-1) - 1 - N in the field.
++ *
++ * NAND field: this expresses the constraint that you may not have events
++ * in all of a set of classes.  (For example, on PPC970, you can't select
++ * events from the FPU, ISU and IDU simultaneously, although any two are
++ * possible.)  For N classes, the field is N+1 bits wide, and each class
++ * is assigned one bit from the least-significant N bits.  The mask has
++ * only the most-significant bit set, and the value has only the bit
++ * for the event's class set.  The test_adder has the least significant
++ * bit set in the field.
++ *
++ * If an event is not subject to the constraint expressed by a particular
++ * field, then it will have 0 in both the mask and value for that field.
++ */
+Index: linux-2.6-tip/arch/powerpc/include/asm/ps3fb.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/ps3fb.h
++++ linux-2.6-tip/arch/powerpc/include/asm/ps3fb.h
+@@ -19,6 +19,7 @@
+ #ifndef _ASM_POWERPC_PS3FB_H_
+ #define _ASM_POWERPC_PS3FB_H_
+ 
++#include <linux/types.h>
+ #include <linux/ioctl.h>
+ 
+ /* ioctl */
+Index: linux-2.6-tip/arch/powerpc/include/asm/spu_info.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/spu_info.h
++++ linux-2.6-tip/arch/powerpc/include/asm/spu_info.h
+@@ -23,9 +23,10 @@
+ #ifndef _SPU_INFO_H
+ #define _SPU_INFO_H
+ 
++#include <linux/types.h>
++
+ #ifdef __KERNEL__
+ #include <asm/spu.h>
+-#include <linux/types.h>
+ #else
+ struct mfc_cq_sr {
+ 	__u64 mfc_cq_data0_RW;
+Index: linux-2.6-tip/arch/powerpc/include/asm/swab.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/swab.h
++++ linux-2.6-tip/arch/powerpc/include/asm/swab.h
+@@ -8,7 +8,7 @@
+  * 2 of the License, or (at your option) any later version.
+  */
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ #include <linux/compiler.h>
+ 
+ #ifdef __GNUC__
+Index: linux-2.6-tip/arch/powerpc/include/asm/systbl.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/systbl.h
++++ linux-2.6-tip/arch/powerpc/include/asm/systbl.h
+@@ -322,3 +322,4 @@ SYSCALL_SPU(epoll_create1)
+ SYSCALL_SPU(dup3)
+ SYSCALL_SPU(pipe2)
+ SYSCALL(inotify_init1)
++SYSCALL_SPU(perf_counter_open)
+Index: linux-2.6-tip/arch/powerpc/include/asm/unistd.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/unistd.h
++++ linux-2.6-tip/arch/powerpc/include/asm/unistd.h
+@@ -341,10 +341,11 @@
+ #define __NR_dup3		316
+ #define __NR_pipe2		317
+ #define __NR_inotify_init1	318
++#define __NR_perf_counter_open	319
+ 
+ #ifdef __KERNEL__
+ 
+-#define __NR_syscalls		319
++#define __NR_syscalls		320
+ 
+ #define __NR__exit __NR_exit
+ #define NR_syscalls	__NR_syscalls
+Index: linux-2.6-tip/arch/powerpc/kernel/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/Makefile
++++ linux-2.6-tip/arch/powerpc/kernel/Makefile
+@@ -94,6 +94,8 @@ obj-$(CONFIG_AUDIT)		+= audit.o
+ obj64-$(CONFIG_AUDIT)		+= compat_audit.o
+ 
+ obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
++obj-$(CONFIG_PERF_COUNTERS)	+= perf_counter.o power4-pmu.o ppc970-pmu.o \
++				   power5-pmu.o power5+-pmu.o power6-pmu.o
+ 
+ obj-$(CONFIG_8XX_MINIMAL_FPEMU) += softemu8xx.o
+ 
+Index: linux-2.6-tip/arch/powerpc/kernel/asm-offsets.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/asm-offsets.c
++++ linux-2.6-tip/arch/powerpc/kernel/asm-offsets.c
+@@ -131,6 +131,7 @@ int main(void)
+ 	DEFINE(PACAKMSR, offsetof(struct paca_struct, kernel_msr));
+ 	DEFINE(PACASOFTIRQEN, offsetof(struct paca_struct, soft_enabled));
+ 	DEFINE(PACAHARDIRQEN, offsetof(struct paca_struct, hard_enabled));
++	DEFINE(PACAPERFPEND, offsetof(struct paca_struct, perf_counter_pending));
+ 	DEFINE(PACASLBCACHE, offsetof(struct paca_struct, slb_cache));
+ 	DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr));
+ 	DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id));
+Index: linux-2.6-tip/arch/powerpc/kernel/entry_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/entry_64.S
++++ linux-2.6-tip/arch/powerpc/kernel/entry_64.S
+@@ -526,6 +526,15 @@ ALT_FW_FTR_SECTION_END_IFCLR(FW_FEATURE_
+ 2:
+ 	TRACE_AND_RESTORE_IRQ(r5);
+ 
++#ifdef CONFIG_PERF_COUNTERS
++	/* check paca->perf_counter_pending if we're enabling ints */
++	lbz	r3,PACAPERFPEND(r13)
++	and.	r3,r3,r5
++	beq	27f
++	bl	.perf_counter_do_pending
++27:
++#endif /* CONFIG_PERF_COUNTERS */
++
+ 	/* extract EE bit and use it to restore paca->hard_enabled */
+ 	ld	r3,_MSR(r1)
+ 	rldicl	r4,r3,49,63		/* r0 = (r3 >> 15) & 1 */
+@@ -616,44 +625,52 @@ do_work:
+ 	bne	restore
+ 	/* here we are preempting the current task */
+ 1:
++	/*
++	 * preempt_schedule_irq() expects interrupts disabled and returns
++	 * with interrupts disabled. No need to check preemption again,
++	 * preempt_schedule_irq just did that for us.
++	 */
++	bl	.preempt_schedule_irq
+ #ifdef CONFIG_TRACE_IRQFLAGS
+ 	bl	.trace_hardirqs_on
++#endif /* CONFIG_TRACE_IRQFLAGS */
++
+ 	/* Note: we just clobbered r10 which used to contain the previous
+ 	 * MSR before the hard-disabling done by the caller of do_work.
+ 	 * We don't have that value anymore, but it doesn't matter as
+ 	 * we will hard-enable unconditionally, we can just reload the
+ 	 * current MSR into r10
+ 	 */
++	bl		.preempt_schedule_irq
+ 	mfmsr	r10
+-#endif /* CONFIG_TRACE_IRQFLAGS */
+-	li	r0,1
+-	stb	r0,PACASOFTIRQEN(r13)
+-	stb	r0,PACAHARDIRQEN(r13)
+-	ori	r10,r10,MSR_EE
+-	mtmsrd	r10,1		/* reenable interrupts */
+-	bl	.preempt_schedule
+-	mfmsr	r10
+-	clrrdi	r9,r1,THREAD_SHIFT
+-	rldicl	r10,r10,48,1	/* disable interrupts again */
+-	rotldi	r10,r10,16
+-	mtmsrd	r10,1
+-	ld	r4,TI_FLAGS(r9)
+-	andi.	r0,r4,_TIF_NEED_RESCHED
+-	bne	1b
++	clrrdi  r9,r1,THREAD_SHIFT
++	rldicl  r10,r10,48,1    /* disable interrupts again */
++	rotldi  r10,r10,16
++	mtmsrd  r10,1
++	ld      r4,TI_FLAGS(r9)
++	andi.   r0,r4,(_TIF_NEED_RESCHED)
++	bne     1b
+ 	b	restore
+ 
+ user_work:
+ #endif
+-	/* Enable interrupts */
+-	ori	r10,r10,MSR_EE
+-	mtmsrd	r10,1
+-
+ 	andi.	r0,r4,_TIF_NEED_RESCHED
+ 	beq	1f
+-	bl	.schedule
++
++	/* preempt_schedule_irq() expects interrupts disabled. */
++	bl	.preempt_schedule_irq
+ 	b	.ret_from_except_lite
+ 
+-1:	bl	.save_nvgprs
++	/* here we are preempting the current task */
++1:	li	r0,1
++	stb	r0,PACASOFTIRQEN(r13)
++	stb	r0,PACAHARDIRQEN(r13)
++
++	/* Enable interrupts */
++	ori	r10,r10,MSR_EE
++	mtmsrd	r10,1
++
++	bl	.save_nvgprs
+ 	addi	r3,r1,STACK_FRAME_OVERHEAD
+ 	bl	.do_signal
+ 	b	.ret_from_except
+Index: linux-2.6-tip/arch/powerpc/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/irq.c
++++ linux-2.6-tip/arch/powerpc/kernel/irq.c
+@@ -135,6 +135,11 @@ notrace void raw_local_irq_restore(unsig
+ 			iseries_handle_interrupts();
+ 	}
+ 
++	if (get_perf_counter_pending()) {
++		clear_perf_counter_pending();
++		perf_counter_do_pending();
++	}
++
+ 	/*
+ 	 * if (get_paca()->hard_enabled) return;
+ 	 * But again we need to take care that gcc gets hard_enabled directly
+@@ -190,7 +195,7 @@ int show_interrupts(struct seq_file *p, 
+ 		seq_printf(p, "%3d: ", i);
+ #ifdef CONFIG_SMP
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ #else
+ 		seq_printf(p, "%10u ", kstat_irqs(i));
+ #endif /* CONFIG_SMP */
+@@ -231,7 +236,7 @@ void fixup_irqs(cpumask_t map)
+ 		if (irq_desc[irq].status & IRQ_PER_CPU)
+ 			continue;
+ 
+-		cpus_and(mask, irq_desc[irq].affinity, map);
++		cpumask_and(&mask, irq_desc[irq].affinity, &map);
+ 		if (any_online_cpu(mask) == NR_CPUS) {
+ 			printk("Breaking affinity for irq %i\n", irq);
+ 			mask = map;
+@@ -438,7 +443,7 @@ void do_softirq(void)
+  */
+ 
+ static LIST_HEAD(irq_hosts);
+-static DEFINE_SPINLOCK(irq_big_lock);
++static DEFINE_RAW_SPINLOCK(irq_big_lock);
+ static unsigned int revmap_trees_allocated;
+ static DEFINE_MUTEX(revmap_trees_mutex);
+ struct irq_map_entry irq_map[NR_IRQS];
+Index: linux-2.6-tip/arch/powerpc/kernel/perf_counter.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/powerpc/kernel/perf_counter.c
+@@ -0,0 +1,827 @@
++/*
++ * Performance counter support - powerpc architecture code
++ *
++ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/perf_counter.h>
++#include <linux/percpu.h>
++#include <linux/hardirq.h>
++#include <asm/reg.h>
++#include <asm/pmc.h>
++#include <asm/machdep.h>
++#include <asm/firmware.h>
++
++struct cpu_hw_counters {
++	int n_counters;
++	int n_percpu;
++	int disabled;
++	int n_added;
++	struct perf_counter *counter[MAX_HWCOUNTERS];
++	unsigned int events[MAX_HWCOUNTERS];
++	u64 mmcr[3];
++	u8 pmcs_enabled;
++};
++DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters);
++
++struct power_pmu *ppmu;
++
++/*
++ * Normally, to ignore kernel events we set the FCS (freeze counters
++ * in supervisor mode) bit in MMCR0, but if the kernel runs with the
++ * hypervisor bit set in the MSR, or if we are running on a processor
++ * where the hypervisor bit is forced to 1 (as on Apple G5 processors),
++ * then we need to use the FCHV bit to ignore kernel events.
++ */
++static unsigned int freeze_counters_kernel = MMCR0_FCS;
++
++void perf_counter_print_debug(void)
++{
++}
++
++/*
++ * Read one performance monitor counter (PMC).
++ */
++static unsigned long read_pmc(int idx)
++{
++	unsigned long val;
++
++	switch (idx) {
++	case 1:
++		val = mfspr(SPRN_PMC1);
++		break;
++	case 2:
++		val = mfspr(SPRN_PMC2);
++		break;
++	case 3:
++		val = mfspr(SPRN_PMC3);
++		break;
++	case 4:
++		val = mfspr(SPRN_PMC4);
++		break;
++	case 5:
++		val = mfspr(SPRN_PMC5);
++		break;
++	case 6:
++		val = mfspr(SPRN_PMC6);
++		break;
++	case 7:
++		val = mfspr(SPRN_PMC7);
++		break;
++	case 8:
++		val = mfspr(SPRN_PMC8);
++		break;
++	default:
++		printk(KERN_ERR "oops trying to read PMC%d\n", idx);
++		val = 0;
++	}
++	return val;
++}
++
++/*
++ * Write one PMC.
++ */
++static void write_pmc(int idx, unsigned long val)
++{
++	switch (idx) {
++	case 1:
++		mtspr(SPRN_PMC1, val);
++		break;
++	case 2:
++		mtspr(SPRN_PMC2, val);
++		break;
++	case 3:
++		mtspr(SPRN_PMC3, val);
++		break;
++	case 4:
++		mtspr(SPRN_PMC4, val);
++		break;
++	case 5:
++		mtspr(SPRN_PMC5, val);
++		break;
++	case 6:
++		mtspr(SPRN_PMC6, val);
++		break;
++	case 7:
++		mtspr(SPRN_PMC7, val);
++		break;
++	case 8:
++		mtspr(SPRN_PMC8, val);
++		break;
++	default:
++		printk(KERN_ERR "oops trying to write PMC%d\n", idx);
++	}
++}
++
++/*
++ * Check if a set of events can all go on the PMU at once.
++ * If they can't, this will look at alternative codes for the events
++ * and see if any combination of alternative codes is feasible.
++ * The feasible set is returned in event[].
++ */
++static int power_check_constraints(unsigned int event[], int n_ev)
++{
++	u64 mask, value, nv;
++	unsigned int alternatives[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
++	u64 amasks[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
++	u64 avalues[MAX_HWCOUNTERS][MAX_EVENT_ALTERNATIVES];
++	u64 smasks[MAX_HWCOUNTERS], svalues[MAX_HWCOUNTERS];
++	int n_alt[MAX_HWCOUNTERS], choice[MAX_HWCOUNTERS];
++	int i, j;
++	u64 addf = ppmu->add_fields;
++	u64 tadd = ppmu->test_adder;
++
++	if (n_ev > ppmu->n_counter)
++		return -1;
++
++	/* First see if the events will go on as-is */
++	for (i = 0; i < n_ev; ++i) {
++		alternatives[i][0] = event[i];
++		if (ppmu->get_constraint(event[i], &amasks[i][0],
++					 &avalues[i][0]))
++			return -1;
++		choice[i] = 0;
++	}
++	value = mask = 0;
++	for (i = 0; i < n_ev; ++i) {
++		nv = (value | avalues[i][0]) + (value & avalues[i][0] & addf);
++		if ((((nv + tadd) ^ value) & mask) != 0 ||
++		    (((nv + tadd) ^ avalues[i][0]) & amasks[i][0]) != 0)
++			break;
++		value = nv;
++		mask |= amasks[i][0];
++	}
++	if (i == n_ev)
++		return 0;	/* all OK */
++
++	/* doesn't work, gather alternatives... */
++	if (!ppmu->get_alternatives)
++		return -1;
++	for (i = 0; i < n_ev; ++i) {
++		n_alt[i] = ppmu->get_alternatives(event[i], alternatives[i]);
++		for (j = 1; j < n_alt[i]; ++j)
++			ppmu->get_constraint(alternatives[i][j],
++					     &amasks[i][j], &avalues[i][j]);
++	}
++
++	/* enumerate all possibilities and see if any will work */
++	i = 0;
++	j = -1;
++	value = mask = nv = 0;
++	while (i < n_ev) {
++		if (j >= 0) {
++			/* we're backtracking, restore context */
++			value = svalues[i];
++			mask = smasks[i];
++			j = choice[i];
++		}
++		/*
++		 * See if any alternative k for event i,
++		 * where k > j, will satisfy the constraints.
++		 */
++		while (++j < n_alt[i]) {
++			nv = (value | avalues[i][j]) +
++				(value & avalues[i][j] & addf);
++			if ((((nv + tadd) ^ value) & mask) == 0 &&
++			    (((nv + tadd) ^ avalues[i][j])
++			     & amasks[i][j]) == 0)
++				break;
++		}
++		if (j >= n_alt[i]) {
++			/*
++			 * No feasible alternative, backtrack
++			 * to event i-1 and continue enumerating its
++			 * alternatives from where we got up to.
++			 */
++			if (--i < 0)
++				return -1;
++		} else {
++			/*
++			 * Found a feasible alternative for event i,
++			 * remember where we got up to with this event,
++			 * go on to the next event, and start with
++			 * the first alternative for it.
++			 */
++			choice[i] = j;
++			svalues[i] = value;
++			smasks[i] = mask;
++			value = nv;
++			mask |= amasks[i][j];
++			++i;
++			j = -1;
++		}
++	}
++
++	/* OK, we have a feasible combination, tell the caller the solution */
++	for (i = 0; i < n_ev; ++i)
++		event[i] = alternatives[i][choice[i]];
++	return 0;
++}
++
++/*
++ * Check if newly-added counters have consistent settings for
++ * exclude_{user,kernel,hv} with each other and any previously
++ * added counters.
++ */
++static int check_excludes(struct perf_counter **ctrs, int n_prev, int n_new)
++{
++	int eu, ek, eh;
++	int i, n;
++	struct perf_counter *counter;
++
++	n = n_prev + n_new;
++	if (n <= 1)
++		return 0;
++
++	eu = ctrs[0]->hw_event.exclude_user;
++	ek = ctrs[0]->hw_event.exclude_kernel;
++	eh = ctrs[0]->hw_event.exclude_hv;
++	if (n_prev == 0)
++		n_prev = 1;
++	for (i = n_prev; i < n; ++i) {
++		counter = ctrs[i];
++		if (counter->hw_event.exclude_user != eu ||
++		    counter->hw_event.exclude_kernel != ek ||
++		    counter->hw_event.exclude_hv != eh)
++			return -EAGAIN;
++	}
++	return 0;
++}
++
++static void power_perf_read(struct perf_counter *counter)
++{
++	long val, delta, prev;
++
++	if (!counter->hw.idx)
++		return;
++	/*
++	 * Performance monitor interrupts come even when interrupts
++	 * are soft-disabled, as long as interrupts are hard-enabled.
++	 * Therefore we treat them like NMIs.
++	 */
++	do {
++		prev = atomic64_read(&counter->hw.prev_count);
++		barrier();
++		val = read_pmc(counter->hw.idx);
++	} while (atomic64_cmpxchg(&counter->hw.prev_count, prev, val) != prev);
++
++	/* The counters are only 32 bits wide */
++	delta = (val - prev) & 0xfffffffful;
++	atomic64_add(delta, &counter->count);
++	atomic64_sub(delta, &counter->hw.period_left);
++}
++
++/*
++ * Disable all counters to prevent PMU interrupts and to allow
++ * counters to be added or removed.
++ */
++u64 hw_perf_save_disable(void)
++{
++	struct cpu_hw_counters *cpuhw;
++	unsigned long ret;
++	unsigned long flags;
++
++	local_irq_save(flags);
++	cpuhw = &__get_cpu_var(cpu_hw_counters);
++
++	ret = cpuhw->disabled;
++	if (!ret) {
++		cpuhw->disabled = 1;
++		cpuhw->n_added = 0;
++
++		/*
++		 * Check if we ever enabled the PMU on this cpu.
++		 */
++		if (!cpuhw->pmcs_enabled) {
++			if (ppc_md.enable_pmcs)
++				ppc_md.enable_pmcs();
++			cpuhw->pmcs_enabled = 1;
++		}
++
++		/*
++		 * Set the 'freeze counters' bit.
++		 * The barrier is to make sure the mtspr has been
++		 * executed and the PMU has frozen the counters
++		 * before we return.
++		 */
++		mtspr(SPRN_MMCR0, mfspr(SPRN_MMCR0) | MMCR0_FC);
++		mb();
++	}
++	local_irq_restore(flags);
++	return ret;
++}
++
++/*
++ * Re-enable all counters if disable == 0.
++ * If we were previously disabled and counters were added, then
++ * put the new config on the PMU.
++ */
++void hw_perf_restore(u64 disable)
++{
++	struct perf_counter *counter;
++	struct cpu_hw_counters *cpuhw;
++	unsigned long flags;
++	long i;
++	unsigned long val;
++	s64 left;
++	unsigned int hwc_index[MAX_HWCOUNTERS];
++
++	if (disable)
++		return;
++	local_irq_save(flags);
++	cpuhw = &__get_cpu_var(cpu_hw_counters);
++	cpuhw->disabled = 0;
++
++	/*
++	 * If we didn't change anything, or only removed counters,
++	 * no need to recalculate MMCR* settings and reset the PMCs.
++	 * Just reenable the PMU with the current MMCR* settings
++	 * (possibly updated for removal of counters).
++	 */
++	if (!cpuhw->n_added) {
++		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
++		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
++		mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
++		if (cpuhw->n_counters == 0)
++			get_lppaca()->pmcregs_in_use = 0;
++		goto out;
++	}
++
++	/*
++	 * Compute MMCR* values for the new set of counters
++	 */
++	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_counters, hwc_index,
++			       cpuhw->mmcr)) {
++		/* shouldn't ever get here */
++		printk(KERN_ERR "oops compute_mmcr failed\n");
++		goto out;
++	}
++
++	/*
++	 * Add in MMCR0 freeze bits corresponding to the
++	 * hw_event.exclude_* bits for the first counter.
++	 * We have already checked that all counters have the
++	 * same values for these bits as the first counter.
++	 */
++	counter = cpuhw->counter[0];
++	if (counter->hw_event.exclude_user)
++		cpuhw->mmcr[0] |= MMCR0_FCP;
++	if (counter->hw_event.exclude_kernel)
++		cpuhw->mmcr[0] |= freeze_counters_kernel;
++	if (counter->hw_event.exclude_hv)
++		cpuhw->mmcr[0] |= MMCR0_FCHV;
++
++	/*
++	 * Write the new configuration to MMCR* with the freeze
++	 * bit set and set the hardware counters to their initial values.
++	 * Then unfreeze the counters.
++	 */
++	get_lppaca()->pmcregs_in_use = 1;
++	mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
++	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
++	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
++				| MMCR0_FC);
++
++	/*
++	 * Read off any pre-existing counters that need to move
++	 * to another PMC.
++	 */
++	for (i = 0; i < cpuhw->n_counters; ++i) {
++		counter = cpuhw->counter[i];
++		if (counter->hw.idx && counter->hw.idx != hwc_index[i] + 1) {
++			power_perf_read(counter);
++			write_pmc(counter->hw.idx, 0);
++			counter->hw.idx = 0;
++		}
++	}
++
++	/*
++	 * Initialize the PMCs for all the new and moved counters.
++	 */
++	for (i = 0; i < cpuhw->n_counters; ++i) {
++		counter = cpuhw->counter[i];
++		if (counter->hw.idx)
++			continue;
++		val = 0;
++		if (counter->hw_event.irq_period) {
++			left = atomic64_read(&counter->hw.period_left);
++			if (left < 0x80000000L)
++				val = 0x80000000L - left;
++		}
++		atomic64_set(&counter->hw.prev_count, val);
++		counter->hw.idx = hwc_index[i] + 1;
++		write_pmc(counter->hw.idx, val);
++		perf_counter_update_userpage(counter);
++	}
++	mb();
++	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
++	mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
++
++ out:
++	local_irq_restore(flags);
++}
++
++static int collect_events(struct perf_counter *group, int max_count,
++			  struct perf_counter *ctrs[], unsigned int *events)
++{
++	int n = 0;
++	struct perf_counter *counter;
++
++	if (!is_software_counter(group)) {
++		if (n >= max_count)
++			return -1;
++		ctrs[n] = group;
++		events[n++] = group->hw.config;
++	}
++	list_for_each_entry(counter, &group->sibling_list, list_entry) {
++		if (!is_software_counter(counter) &&
++		    counter->state != PERF_COUNTER_STATE_OFF) {
++			if (n >= max_count)
++				return -1;
++			ctrs[n] = counter;
++			events[n++] = counter->hw.config;
++		}
++	}
++	return n;
++}
++
++static void counter_sched_in(struct perf_counter *counter, int cpu)
++{
++	counter->state = PERF_COUNTER_STATE_ACTIVE;
++	counter->oncpu = cpu;
++	counter->tstamp_running += counter->ctx->time_now -
++		counter->tstamp_stopped;
++	if (is_software_counter(counter))
++		counter->hw_ops->enable(counter);
++}
++
++/*
++ * Called to enable a whole group of counters.
++ * Returns 1 if the group was enabled, or -EAGAIN if it could not be.
++ * Assumes the caller has disabled interrupts and has
++ * frozen the PMU with hw_perf_save_disable.
++ */
++int hw_perf_group_sched_in(struct perf_counter *group_leader,
++	       struct perf_cpu_context *cpuctx,
++	       struct perf_counter_context *ctx, int cpu)
++{
++	struct cpu_hw_counters *cpuhw;
++	long i, n, n0;
++	struct perf_counter *sub;
++
++	cpuhw = &__get_cpu_var(cpu_hw_counters);
++	n0 = cpuhw->n_counters;
++	n = collect_events(group_leader, ppmu->n_counter - n0,
++			   &cpuhw->counter[n0], &cpuhw->events[n0]);
++	if (n < 0)
++		return -EAGAIN;
++	if (check_excludes(cpuhw->counter, n0, n))
++		return -EAGAIN;
++	if (power_check_constraints(cpuhw->events, n + n0))
++		return -EAGAIN;
++	cpuhw->n_counters = n0 + n;
++	cpuhw->n_added += n;
++
++	/*
++	 * OK, this group can go on; update counter states etc.,
++	 * and enable any software counters
++	 */
++	for (i = n0; i < n0 + n; ++i)
++		cpuhw->counter[i]->hw.config = cpuhw->events[i];
++	cpuctx->active_oncpu += n;
++	n = 1;
++	counter_sched_in(group_leader, cpu);
++	list_for_each_entry(sub, &group_leader->sibling_list, list_entry) {
++		if (sub->state != PERF_COUNTER_STATE_OFF) {
++			counter_sched_in(sub, cpu);
++			++n;
++		}
++	}
++	ctx->nr_active += n;
++
++	return 1;
++}
++
++/*
++ * Add a counter to the PMU.
++ * If all counters are not already frozen, then we disable and
++ * re-enable the PMU in order to get hw_perf_restore to do the
++ * actual work of reconfiguring the PMU.
++ */
++static int power_perf_enable(struct perf_counter *counter)
++{
++	struct cpu_hw_counters *cpuhw;
++	unsigned long flags;
++	u64 pmudis;
++	int n0;
++	int ret = -EAGAIN;
++
++	local_irq_save(flags);
++	pmudis = hw_perf_save_disable();
++
++	/*
++	 * Add the counter to the list (if there is room)
++	 * and check whether the total set is still feasible.
++	 */
++	cpuhw = &__get_cpu_var(cpu_hw_counters);
++	n0 = cpuhw->n_counters;
++	if (n0 >= ppmu->n_counter)
++		goto out;
++	cpuhw->counter[n0] = counter;
++	cpuhw->events[n0] = counter->hw.config;
++	if (check_excludes(cpuhw->counter, n0, 1))
++		goto out;
++	if (power_check_constraints(cpuhw->events, n0 + 1))
++		goto out;
++
++	counter->hw.config = cpuhw->events[n0];
++	++cpuhw->n_counters;
++	++cpuhw->n_added;
++
++	ret = 0;
++ out:
++	hw_perf_restore(pmudis);
++	local_irq_restore(flags);
++	return ret;
++}
++
++/*
++ * Remove a counter from the PMU.
++ */
++static void power_perf_disable(struct perf_counter *counter)
++{
++	struct cpu_hw_counters *cpuhw;
++	long i;
++	u64 pmudis;
++	unsigned long flags;
++
++	local_irq_save(flags);
++	pmudis = hw_perf_save_disable();
++
++	power_perf_read(counter);
++
++	cpuhw = &__get_cpu_var(cpu_hw_counters);
++	for (i = 0; i < cpuhw->n_counters; ++i) {
++		if (counter == cpuhw->counter[i]) {
++			while (++i < cpuhw->n_counters)
++				cpuhw->counter[i-1] = cpuhw->counter[i];
++			--cpuhw->n_counters;
++			ppmu->disable_pmc(counter->hw.idx - 1, cpuhw->mmcr);
++			write_pmc(counter->hw.idx, 0);
++			counter->hw.idx = 0;
++			perf_counter_update_userpage(counter);
++			break;
++		}
++	}
++	if (cpuhw->n_counters == 0) {
++		/* disable exceptions if no counters are running */
++		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
++	}
++
++	hw_perf_restore(pmudis);
++	local_irq_restore(flags);
++}
++
++struct hw_perf_counter_ops power_perf_ops = {
++	.enable = power_perf_enable,
++	.disable = power_perf_disable,
++	.read = power_perf_read
++};
++
++const struct hw_perf_counter_ops *
++hw_perf_counter_init(struct perf_counter *counter)
++{
++	unsigned long ev;
++	struct perf_counter *ctrs[MAX_HWCOUNTERS];
++	unsigned int events[MAX_HWCOUNTERS];
++	int n;
++
++	if (!ppmu)
++		return NULL;
++	if ((s64)counter->hw_event.irq_period < 0)
++		return NULL;
++	if (!perf_event_raw(&counter->hw_event)) {
++		ev = perf_event_id(&counter->hw_event);
++		if (ev >= ppmu->n_generic || ppmu->generic_events[ev] == 0)
++			return NULL;
++		ev = ppmu->generic_events[ev];
++	} else {
++		ev = perf_event_config(&counter->hw_event);
++	}
++	counter->hw.config_base = ev;
++	counter->hw.idx = 0;
++
++	/*
++	 * If we are not running on a hypervisor, force the
++	 * exclude_hv bit to 0 so that we don't care what
++	 * the user set it to.
++	 */
++	if (!firmware_has_feature(FW_FEATURE_LPAR))
++		counter->hw_event.exclude_hv = 0;
++	
++	/*
++	 * If this is in a group, check if it can go on with all the
++	 * other hardware counters in the group.  We assume the counter
++	 * hasn't been linked into its leader's sibling list at this point.
++	 */
++	n = 0;
++	if (counter->group_leader != counter) {
++		n = collect_events(counter->group_leader, ppmu->n_counter - 1,
++				   ctrs, events);
++		if (n < 0)
++			return NULL;
++	}
++	events[n] = ev;
++	ctrs[n] = counter;
++	if (check_excludes(ctrs, n, 1))
++		return NULL;
++	if (power_check_constraints(events, n + 1))
++		return NULL;
++
++	counter->hw.config = events[n];
++	atomic64_set(&counter->hw.period_left, counter->hw_event.irq_period);
++	return &power_perf_ops;
++}
++
++/*
++ * Handle wakeups.
++ */
++void perf_counter_do_pending(void)
++{
++	int i;
++	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
++	struct perf_counter *counter;
++
++	for (i = 0; i < cpuhw->n_counters; ++i) {
++		counter = cpuhw->counter[i];
++		if (counter && counter->wakeup_pending) {
++			counter->wakeup_pending = 0;
++			wake_up(&counter->waitq);
++		}
++	}
++}
++
++/*
++ * A counter has overflowed; update its count and record
++ * things if requested.  Note that interrupts are hard-disabled
++ * here so there is no possibility of being interrupted.
++ */
++static void record_and_restart(struct perf_counter *counter, long val,
++			       struct pt_regs *regs)
++{
++	s64 prev, delta, left;
++	int record = 0;
++
++	/* we don't have to worry about interrupts here */
++	prev = atomic64_read(&counter->hw.prev_count);
++	delta = (val - prev) & 0xfffffffful;
++	atomic64_add(delta, &counter->count);
++
++	/*
++	 * See if the total period for this counter has expired,
++	 * and update for the next period.
++	 */
++	val = 0;
++	left = atomic64_read(&counter->hw.period_left) - delta;
++	if (counter->hw_event.irq_period) {
++		if (left <= 0) {
++			left += counter->hw_event.irq_period;
++			if (left <= 0)
++				left = counter->hw_event.irq_period;
++			record = 1;
++		}
++		if (left < 0x80000000L)
++			val = 0x80000000L - left;
++	}
++	write_pmc(counter->hw.idx, val);
++	atomic64_set(&counter->hw.prev_count, val);
++	atomic64_set(&counter->hw.period_left, left);
++	perf_counter_update_userpage(counter);
++
++	/*
++	 * Finally record data if requested.
++	 */
++	if (record)
++		perf_counter_output(counter, 1, regs);
++}
++
++/*
++ * Performance monitor interrupt stuff
++ */
++static void perf_counter_interrupt(struct pt_regs *regs)
++{
++	int i;
++	struct cpu_hw_counters *cpuhw = &__get_cpu_var(cpu_hw_counters);
++	struct perf_counter *counter;
++	long val;
++	int need_wakeup = 0, found = 0;
++
++	for (i = 0; i < cpuhw->n_counters; ++i) {
++		counter = cpuhw->counter[i];
++		val = read_pmc(counter->hw.idx);
++		if ((int)val < 0) {
++			/* counter has overflowed */
++			found = 1;
++			record_and_restart(counter, val, regs);
++		}
++	}
++
++	/*
++	 * In case we didn't find and reset the counter that caused
++	 * the interrupt, scan all counters and reset any that are
++	 * negative, to avoid getting continual interrupts.
++	 * Any that we processed in the previous loop will not be negative.
++	 */
++	if (!found) {
++		for (i = 0; i < ppmu->n_counter; ++i) {
++			val = read_pmc(i + 1);
++			if ((int)val < 0)
++				write_pmc(i + 1, 0);
++		}
++	}
++
++	/*
++	 * Reset MMCR0 to its normal value.  This will set PMXE and
++	 * clear FC (freeze counters) and PMAO (perf mon alert occurred)
++	 * and thus allow interrupts to occur again.
++	 * XXX might want to use MSR.PM to keep the counters frozen until
++	 * we get back out of this interrupt.
++	 */
++	mtspr(SPRN_MMCR0, cpuhw->mmcr[0]);
++
++	/*
++	 * If we need a wakeup, check whether interrupts were soft-enabled
++	 * when we took the interrupt.  If they were, we can wake stuff up
++	 * immediately; otherwise we'll have do the wakeup when interrupts
++	 * get soft-enabled.
++	 */
++	if (get_perf_counter_pending() && regs->softe) {
++		irq_enter();
++		clear_perf_counter_pending();
++		perf_counter_do_pending();
++		irq_exit();
++	}
++}
++
++void hw_perf_counter_setup(int cpu)
++{
++	struct cpu_hw_counters *cpuhw = &per_cpu(cpu_hw_counters, cpu);
++
++	memset(cpuhw, 0, sizeof(*cpuhw));
++	cpuhw->mmcr[0] = MMCR0_FC;
++}
++
++extern struct power_pmu power4_pmu;
++extern struct power_pmu ppc970_pmu;
++extern struct power_pmu power5_pmu;
++extern struct power_pmu power5p_pmu;
++extern struct power_pmu power6_pmu;
++
++static int init_perf_counters(void)
++{
++	unsigned long pvr;
++
++	if (reserve_pmc_hardware(perf_counter_interrupt)) {
++		printk(KERN_ERR "Couldn't init performance monitor subsystem\n");
++		return -EBUSY;
++	}
++
++	/* XXX should get this from cputable */
++	pvr = mfspr(SPRN_PVR);
++	switch (PVR_VER(pvr)) {
++	case PV_POWER4:
++	case PV_POWER4p:
++		ppmu = &power4_pmu;
++		break;
++	case PV_970:
++	case PV_970FX:
++	case PV_970MP:
++		ppmu = &ppc970_pmu;
++		break;
++	case PV_POWER5:
++		ppmu = &power5_pmu;
++		break;
++	case PV_POWER5p:
++		ppmu = &power5p_pmu;
++		break;
++	case 0x3e:
++		ppmu = &power6_pmu;
++		break;
++	}
++
++	/*
++	 * Use FCHV to ignore kernel events if MSR.HV is set.
++	 */
++	if (mfmsr() & MSR_HV)
++		freeze_counters_kernel = MMCR0_FCHV;
++
++	return 0;
++}
++
++arch_initcall(init_perf_counters);
+Index: linux-2.6-tip/arch/powerpc/kernel/power4-pmu.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/powerpc/kernel/power4-pmu.c
+@@ -0,0 +1,557 @@
++/*
++ * Performance counter support for POWER4 (GP) and POWER4+ (GQ) processors.
++ *
++ * Copyright 2009 Paul Mackerras, IBM Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <linux/kernel.h>
++#include <linux/perf_counter.h>
++#include <asm/reg.h>
++
++/*
++ * Bits in event code for POWER4
++ */
++#define PM_PMC_SH	12	/* PMC number (1-based) for direct events */
++#define PM_PMC_MSK	0xf
++#define PM_UNIT_SH	8	/* TTMMUX number and setting - unit select */
++#define PM_UNIT_MSK	0xf
++#define PM_LOWER_SH	6
++#define PM_LOWER_MSK	1
++#define PM_LOWER_MSKS	0x40
++#define PM_BYTE_SH	4	/* Byte number of event bus to use */
++#define PM_BYTE_MSK	3
++#define PM_PMCSEL_MSK	7
++
++/*
++ * Unit code values
++ */
++#define PM_FPU		1
++#define PM_ISU1		2
++#define PM_IFU		3
++#define PM_IDU0		4
++#define PM_ISU1_ALT	6
++#define PM_ISU2		7
++#define PM_IFU_ALT	8
++#define PM_LSU0		9
++#define PM_LSU1		0xc
++#define PM_GPS		0xf
++
++/*
++ * Bits in MMCR0 for POWER4
++ */
++#define MMCR0_PMC1SEL_SH	8
++#define MMCR0_PMC2SEL_SH	1
++#define MMCR_PMCSEL_MSK		0x1f
++
++/*
++ * Bits in MMCR1 for POWER4
++ */
++#define MMCR1_TTM0SEL_SH	62
++#define MMCR1_TTC0SEL_SH	61
++#define MMCR1_TTM1SEL_SH	59
++#define MMCR1_TTC1SEL_SH	58
++#define MMCR1_TTM2SEL_SH	56
++#define MMCR1_TTC2SEL_SH	55
++#define MMCR1_TTM3SEL_SH	53
++#define MMCR1_TTC3SEL_SH	52
++#define MMCR1_TTMSEL_MSK	3
++#define MMCR1_TD_CP_DBG0SEL_SH	50
++#define MMCR1_TD_CP_DBG1SEL_SH	48
++#define MMCR1_TD_CP_DBG2SEL_SH	46
++#define MMCR1_TD_CP_DBG3SEL_SH	44
++#define MMCR1_DEBUG0SEL_SH	43
++#define MMCR1_DEBUG1SEL_SH	42
++#define MMCR1_DEBUG2SEL_SH	41
++#define MMCR1_DEBUG3SEL_SH	40
++#define MMCR1_PMC1_ADDER_SEL_SH	39
++#define MMCR1_PMC2_ADDER_SEL_SH	38
++#define MMCR1_PMC6_ADDER_SEL_SH	37
++#define MMCR1_PMC5_ADDER_SEL_SH	36
++#define MMCR1_PMC8_ADDER_SEL_SH	35
++#define MMCR1_PMC7_ADDER_SEL_SH	34
++#define MMCR1_PMC3_ADDER_SEL_SH	33
++#define MMCR1_PMC4_ADDER_SEL_SH	32
++#define MMCR1_PMC3SEL_SH	27
++#define MMCR1_PMC4SEL_SH	22
++#define MMCR1_PMC5SEL_SH	17
++#define MMCR1_PMC6SEL_SH	12
++#define MMCR1_PMC7SEL_SH	7
++#define MMCR1_PMC8SEL_SH	2	/* note bit 0 is in MMCRA for GP */
++
++static short mmcr1_adder_bits[8] = {
++	MMCR1_PMC1_ADDER_SEL_SH,
++	MMCR1_PMC2_ADDER_SEL_SH,
++	MMCR1_PMC3_ADDER_SEL_SH,
++	MMCR1_PMC4_ADDER_SEL_SH,
++	MMCR1_PMC5_ADDER_SEL_SH,
++	MMCR1_PMC6_ADDER_SEL_SH,
++	MMCR1_PMC7_ADDER_SEL_SH,
++	MMCR1_PMC8_ADDER_SEL_SH
++};
++
++/*
++ * Bits in MMCRA
++ */
++#define MMCRA_PMC8SEL0_SH	17	/* PMC8SEL bit 0 for GP */
++
++/*
++ * Layout of constraint bits:
++ * 6666555555555544444444443333333333222222222211111111110000000000
++ * 3210987654321098765432109876543210987654321098765432109876543210
++ *        |[  >[  >[   >|||[  >[  ><  ><  ><  ><  ><><><><><><><><>
++ *        | UC1 UC2 UC3 ||| PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
++ * 	  \SMPL	        ||\TTC3SEL
++ * 		        |\TTC_IFU_SEL
++ * 		        \TTM2SEL0
++ *
++ * SMPL - SAMPLE_ENABLE constraint
++ *     56: SAMPLE_ENABLE value 0x0100_0000_0000_0000
++ *
++ * UC1 - unit constraint 1: can't have all three of FPU/ISU1/IDU0|ISU2
++ *     55: UC1 error 0x0080_0000_0000_0000
++ *     54: FPU events needed 0x0040_0000_0000_0000
++ *     53: ISU1 events needed 0x0020_0000_0000_0000
++ *     52: IDU0|ISU2 events needed 0x0010_0000_0000_0000
++ *
++ * UC2 - unit constraint 2: can't have all three of FPU/IFU/LSU0
++ *     51: UC2 error 0x0008_0000_0000_0000
++ *     50: FPU events needed 0x0004_0000_0000_0000
++ *     49: IFU events needed 0x0002_0000_0000_0000
++ *     48: LSU0 events needed 0x0001_0000_0000_0000
++ *
++ * UC3 - unit constraint 3: can't have all four of LSU0/IFU/IDU0|ISU2/ISU1
++ *     47: UC3 error 0x8000_0000_0000
++ *     46: LSU0 events needed 0x4000_0000_0000
++ *     45: IFU events needed 0x2000_0000_0000
++ *     44: IDU0|ISU2 events needed 0x1000_0000_0000
++ *     43: ISU1 events needed 0x0800_0000_0000
++ *
++ * TTM2SEL0
++ *     42: 0 = IDU0 events needed
++ *     	   1 = ISU2 events needed 0x0400_0000_0000
++ *
++ * TTC_IFU_SEL
++ *     41: 0 = IFU.U events needed
++ *     	   1 = IFU.L events needed 0x0200_0000_0000
++ *
++ * TTC3SEL
++ *     40: 0 = LSU1.U events needed
++ *     	   1 = LSU1.L events needed 0x0100_0000_0000
++ *
++ * PS1
++ *     39: PS1 error 0x0080_0000_0000
++ *     36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
++ *
++ * PS2
++ *     35: PS2 error 0x0008_0000_0000
++ *     32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
++ *
++ * B0
++ *     28-31: Byte 0 event source 0xf000_0000
++ *     	   1 = FPU
++ * 	   2 = ISU1
++ * 	   3 = IFU
++ * 	   4 = IDU0
++ * 	   7 = ISU2
++ * 	   9 = LSU0
++ * 	   c = LSU1
++ * 	   f = GPS
++ *
++ * B1, B2, B3
++ *     24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
++ *
++ * P8
++ *     15: P8 error 0x8000
++ *     14-15: Count of events needing PMC8
++ *
++ * P1..P7
++ *     0-13: Count of events needing PMC1..PMC7
++ *
++ * Note: this doesn't allow events using IFU.U to be combined with events
++ * using IFU.L, though that is feasible (using TTM0 and TTM2).  However
++ * there are no listed events for IFU.L (they are debug events not
++ * verified for performance monitoring) so this shouldn't cause a
++ * problem.
++ */
++
++static struct unitinfo {
++	u64	value, mask;
++	int	unit;
++	int	lowerbit;
++} p4_unitinfo[16] = {
++	[PM_FPU]  = { 0x44000000000000ull, 0x88000000000000ull, PM_FPU, 0 },
++	[PM_ISU1] = { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
++	[PM_ISU1_ALT] =
++		    { 0x20080000000000ull, 0x88000000000000ull, PM_ISU1, 0 },
++	[PM_IFU]  = { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
++	[PM_IFU_ALT] =
++		    { 0x02200000000000ull, 0x08820000000000ull, PM_IFU, 41 },
++	[PM_IDU0] = { 0x10100000000000ull, 0x80840000000000ull, PM_IDU0, 1 },
++	[PM_ISU2] = { 0x10140000000000ull, 0x80840000000000ull, PM_ISU2, 0 },
++	[PM_LSU0] = { 0x01400000000000ull, 0x08800000000000ull, PM_LSU0, 0 },
++	[PM_LSU1] = { 0x00000000000000ull, 0x00010000000000ull, PM_LSU1, 40 },
++	[PM_GPS]  = { 0x00000000000000ull, 0x00000000000000ull, PM_GPS, 0 }
++};
++
++static unsigned char direct_marked_event[8] = {
++	(1<<2) | (1<<3),	/* PMC1: PM_MRK_GRP_DISP, PM_MRK_ST_CMPL */
++	(1<<3) | (1<<5),	/* PMC2: PM_THRESH_TIMEO, PM_MRK_BRU_FIN */
++	(1<<3),			/* PMC3: PM_MRK_ST_CMPL_INT */
++	(1<<4) | (1<<5),	/* PMC4: PM_MRK_GRP_CMPL, PM_MRK_CRU_FIN */
++	(1<<4) | (1<<5),	/* PMC5: PM_MRK_GRP_TIMEO */
++	(1<<3) | (1<<4) | (1<<5),
++		/* PMC6: PM_MRK_ST_GPS, PM_MRK_FXU_FIN, PM_MRK_GRP_ISSUED */
++	(1<<4) | (1<<5),	/* PMC7: PM_MRK_FPU_FIN, PM_MRK_INST_FIN */
++	(1<<4),			/* PMC8: PM_MRK_LSU_FIN */
++};
++
++/*
++ * Returns 1 if event counts things relating to marked instructions
++ * and thus needs the MMCRA_SAMPLE_ENABLE bit set, or 0 if not.
++ */
++static int p4_marked_instr_event(unsigned int event)
++{
++	int pmc, psel, unit, byte, bit;
++	unsigned int mask;
++
++	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
++	psel = event & PM_PMCSEL_MSK;
++	if (pmc) {
++		if (direct_marked_event[pmc - 1] & (1 << psel))
++			return 1;
++		if (psel == 0)		/* add events */
++			bit = (pmc <= 4)? pmc - 1: 8 - pmc;
++		else if (psel == 6)	/* decode events */
++			bit = 4;
++		else
++			return 0;
++	} else
++		bit = psel;
++
++	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
++	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
++	mask = 0;
++	switch (unit) {
++	case PM_LSU1:
++		if (event & PM_LOWER_MSKS)
++			mask = 1 << 28;		/* byte 7 bit 4 */
++		else
++			mask = 6 << 24;		/* byte 3 bits 1 and 2 */
++		break;
++	case PM_LSU0:
++		/* byte 3, bit 3; byte 2 bits 0,2,3,4,5; byte 1 */
++		mask = 0x083dff00;
++	}
++	return (mask >> (byte * 8 + bit)) & 1;
++}
++
++static int p4_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
++{
++	int pmc, byte, unit, lower, sh;
++	u64 mask = 0, value = 0;
++	int grp = -1;
++
++	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
++	if (pmc) {
++		if (pmc > 8)
++			return -1;
++		sh = (pmc - 1) * 2;
++		mask |= 2 << sh;
++		value |= 1 << sh;
++		grp = ((pmc - 1) >> 1) & 1;
++	}
++	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
++	byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
++	if (unit) {
++		lower = (event >> PM_LOWER_SH) & PM_LOWER_MSK;
++
++		/*
++		 * Bus events on bytes 0 and 2 can be counted
++		 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
++		 */
++		if (!pmc)
++			grp = byte & 1;
++
++		if (!p4_unitinfo[unit].unit)
++			return -1;
++		mask  |= p4_unitinfo[unit].mask;
++		value |= p4_unitinfo[unit].value;
++		sh = p4_unitinfo[unit].lowerbit;
++		if (sh > 1)
++			value |= (u64)lower << sh;
++		else if (lower != sh)
++			return -1;
++		unit = p4_unitinfo[unit].unit;
++
++		/* Set byte lane select field */
++		mask  |= 0xfULL << (28 - 4 * byte);
++		value |= (u64)unit << (28 - 4 * byte);
++	}
++	if (grp == 0) {
++		/* increment PMC1/2/5/6 field */
++		mask  |= 0x8000000000ull;
++		value |= 0x1000000000ull;
++	} else {
++		/* increment PMC3/4/7/8 field */
++		mask  |= 0x800000000ull;
++		value |= 0x100000000ull;
++	}
++
++	/* Marked instruction events need sample_enable set */
++	if (p4_marked_instr_event(event)) {
++		mask  |= 1ull << 56;
++		value |= 1ull << 56;
++	}
++
++	/* PMCSEL=6 decode events on byte 2 need sample_enable clear */
++	if (pmc && (event & PM_PMCSEL_MSK) == 6 && byte == 2)
++		mask  |= 1ull << 56;
++
++	*maskp = mask;
++	*valp = value;
++	return 0;
++}
++
++static unsigned int ppc_inst_cmpl[] = {
++	0x1001, 0x4001, 0x6001, 0x7001, 0x8001
++};
++
++static int p4_get_alternatives(unsigned int event, unsigned int alt[])
++{
++	int i, j, na;
++
++	alt[0] = event;
++	na = 1;
++
++	/* 2 possibilities for PM_GRP_DISP_REJECT */
++	if (event == 0x8003 || event == 0x0224) {
++		alt[1] = event ^ (0x8003 ^ 0x0224);
++		return 2;
++	}
++
++	/* 2 possibilities for PM_ST_MISS_L1 */
++	if (event == 0x0c13 || event == 0x0c23) {
++		alt[1] = event ^ (0x0c13 ^ 0x0c23);
++		return 2;
++	}
++
++	/* several possibilities for PM_INST_CMPL */
++	for (i = 0; i < ARRAY_SIZE(ppc_inst_cmpl); ++i) {
++		if (event == ppc_inst_cmpl[i]) {
++			for (j = 0; j < ARRAY_SIZE(ppc_inst_cmpl); ++j)
++				if (j != i)
++					alt[na++] = ppc_inst_cmpl[j];
++			break;
++		}
++	}
++
++	return na;
++}
++
++static int p4_compute_mmcr(unsigned int event[], int n_ev,
++			   unsigned int hwc[], u64 mmcr[])
++{
++	u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
++	unsigned int pmc, unit, byte, psel, lower;
++	unsigned int ttm, grp;
++	unsigned int pmc_inuse = 0;
++	unsigned int pmc_grp_use[2];
++	unsigned char busbyte[4];
++	unsigned char unituse[16];
++	unsigned int unitlower = 0;
++	int i;
++
++	if (n_ev > 8)
++		return -1;
++
++	/* First pass to count resource use */
++	pmc_grp_use[0] = pmc_grp_use[1] = 0;
++	memset(busbyte, 0, sizeof(busbyte));
++	memset(unituse, 0, sizeof(unituse));
++	for (i = 0; i < n_ev; ++i) {
++		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
++		if (pmc) {
++			if (pmc_inuse & (1 << (pmc - 1)))
++				return -1;
++			pmc_inuse |= 1 << (pmc - 1);
++			/* count 1/2/5/6 vs 3/4/7/8 use */
++			++pmc_grp_use[((pmc - 1) >> 1) & 1];
++		}
++		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
++		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
++		lower = (event[i] >> PM_LOWER_SH) & PM_LOWER_MSK;
++		if (unit) {
++			if (!pmc)
++				++pmc_grp_use[byte & 1];
++			if (unit == 6 || unit == 8)
++				/* map alt ISU1/IFU codes: 6->2, 8->3 */
++				unit = (unit >> 1) - 1;
++			if (busbyte[byte] && busbyte[byte] != unit)
++				return -1;
++			busbyte[byte] = unit;
++			lower <<= unit;
++			if (unituse[unit] && lower != (unitlower & lower))
++				return -1;
++			unituse[unit] = 1;
++			unitlower |= lower;
++		}
++	}
++	if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
++		return -1;
++
++	/*
++	 * Assign resources and set multiplexer selects.
++	 *
++	 * Units 1,2,3 are on TTM0, 4,6,7 on TTM1, 8,10 on TTM2.
++	 * Each TTMx can only select one unit, but since
++	 * units 2 and 6 are both ISU1, and 3 and 8 are both IFU,
++	 * we have some choices.
++	 */
++	if (unituse[2] & (unituse[1] | (unituse[3] & unituse[9]))) {
++		unituse[6] = 1;		/* Move 2 to 6 */
++		unituse[2] = 0;
++	}
++	if (unituse[3] & (unituse[1] | unituse[2])) {
++		unituse[8] = 1;		/* Move 3 to 8 */
++		unituse[3] = 0;
++		unitlower = (unitlower & ~8) | ((unitlower & 8) << 5);
++	}
++	/* Check only one unit per TTMx */
++	if (unituse[1] + unituse[2] + unituse[3] > 1 ||
++	    unituse[4] + unituse[6] + unituse[7] > 1 ||
++	    unituse[8] + unituse[9] > 1 ||
++	    (unituse[5] | unituse[10] | unituse[11] |
++	     unituse[13] | unituse[14]))
++		return -1;
++
++	/* Set TTMxSEL fields.  Note, units 1-3 => TTM0SEL codes 0-2 */
++	mmcr1 |= (u64)(unituse[3] * 2 + unituse[2]) << MMCR1_TTM0SEL_SH;
++	mmcr1 |= (u64)(unituse[7] * 3 + unituse[6] * 2) << MMCR1_TTM1SEL_SH;
++	mmcr1 |= (u64)unituse[9] << MMCR1_TTM2SEL_SH;
++
++	/* Set TTCxSEL fields. */
++	if (unitlower & 0xe)
++		mmcr1 |= 1ull << MMCR1_TTC0SEL_SH;
++	if (unitlower & 0xf0)
++		mmcr1 |= 1ull << MMCR1_TTC1SEL_SH;
++	if (unitlower & 0xf00)
++		mmcr1 |= 1ull << MMCR1_TTC2SEL_SH;
++	if (unitlower & 0x7000)
++		mmcr1 |= 1ull << MMCR1_TTC3SEL_SH;
++
++	/* Set byte lane select fields. */
++	for (byte = 0; byte < 4; ++byte) {
++		unit = busbyte[byte];
++		if (!unit)
++			continue;
++		if (unit == 0xf) {
++			/* special case for GPS */
++			mmcr1 |= 1ull << (MMCR1_DEBUG0SEL_SH - byte);
++		} else {
++			if (!unituse[unit])
++				ttm = unit - 1;		/* 2->1, 3->2 */
++			else
++				ttm = unit >> 2;
++			mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2*byte);
++		}
++	}
++
++	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
++	for (i = 0; i < n_ev; ++i) {
++		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
++		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
++		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
++		psel = event[i] & PM_PMCSEL_MSK;
++		if (!pmc) {
++			/* Bus event or 00xxx direct event (off or cycles) */
++			if (unit)
++				psel |= 0x10 | ((byte & 2) << 2);
++			for (pmc = 0; pmc < 8; ++pmc) {
++				if (pmc_inuse & (1 << pmc))
++					continue;
++				grp = (pmc >> 1) & 1;
++				if (unit) {
++					if (grp == (byte & 1))
++						break;
++				} else if (pmc_grp_use[grp] < 4) {
++					++pmc_grp_use[grp];
++					break;
++				}
++			}
++			pmc_inuse |= 1 << pmc;
++		} else {
++			/* Direct event */
++			--pmc;
++			if (psel == 0 && (byte & 2))
++				/* add events on higher-numbered bus */
++				mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
++			else if (psel == 6 && byte == 3)
++				/* seem to need to set sample_enable here */
++				mmcra |= MMCRA_SAMPLE_ENABLE;
++			psel |= 8;
++		}
++		if (pmc <= 1)
++			mmcr0 |= psel << (MMCR0_PMC1SEL_SH - 7 * pmc);
++		else
++			mmcr1 |= psel << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
++		if (pmc == 7)	/* PMC8 */
++			mmcra |= (psel & 1) << MMCRA_PMC8SEL0_SH;
++		hwc[i] = pmc;
++		if (p4_marked_instr_event(event[i]))
++			mmcra |= MMCRA_SAMPLE_ENABLE;
++	}
++
++	if (pmc_inuse & 1)
++		mmcr0 |= MMCR0_PMC1CE;
++	if (pmc_inuse & 0xfe)
++		mmcr0 |= MMCR0_PMCjCE;
++
++	mmcra |= 0x2000;	/* mark only one IOP per PPC instruction */
++
++	/* Return MMCRx values */
++	mmcr[0] = mmcr0;
++	mmcr[1] = mmcr1;
++	mmcr[2] = mmcra;
++	return 0;
++}
++
++static void p4_disable_pmc(unsigned int pmc, u64 mmcr[])
++{
++	/*
++	 * Setting the PMCxSEL field to 0 disables PMC x.
++	 * (Note that pmc is 0-based here, not 1-based.)
++	 */
++	if (pmc <= 1) {
++		mmcr[0] &= ~(0x1fUL << (MMCR0_PMC1SEL_SH - 7 * pmc));
++	} else {
++		mmcr[1] &= ~(0x1fUL << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2)));
++		if (pmc == 7)
++			mmcr[2] &= ~(1UL << MMCRA_PMC8SEL0_SH);
++	}
++}
++
++static int p4_generic_events[] = {
++	[PERF_COUNT_CPU_CYCLES] = 7,
++	[PERF_COUNT_INSTRUCTIONS] = 0x1001,
++	[PERF_COUNT_CACHE_REFERENCES] = 0x8c10,		/* PM_LD_REF_L1 */
++	[PERF_COUNT_CACHE_MISSES] = 0x3c10,		/* PM_LD_MISS_L1 */
++	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x330,	/* PM_BR_ISSUED */
++	[PERF_COUNT_BRANCH_MISSES] = 0x331,		/* PM_BR_MPRED_CR */
++};
++
++struct power_pmu power4_pmu = {
++	.n_counter = 8,
++	.max_alternatives = 5,
++	.add_fields = 0x0000001100005555ull,
++	.test_adder = 0x0011083300000000ull,
++	.compute_mmcr = p4_compute_mmcr,
++	.get_constraint = p4_get_constraint,
++	.get_alternatives = p4_get_alternatives,
++	.disable_pmc = p4_disable_pmc,
++	.n_generic = ARRAY_SIZE(p4_generic_events),
++	.generic_events = p4_generic_events,
++};
+Index: linux-2.6-tip/arch/powerpc/kernel/power5+-pmu.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/powerpc/kernel/power5+-pmu.c
+@@ -0,0 +1,452 @@
++/*
++ * Performance counter support for POWER5 (not POWER5++) processors.
++ *
++ * Copyright 2009 Paul Mackerras, IBM Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <linux/kernel.h>
++#include <linux/perf_counter.h>
++#include <asm/reg.h>
++
++/*
++ * Bits in event code for POWER5+ (POWER5 GS) and POWER5++ (POWER5 GS DD3)
++ */
++#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
++#define PM_PMC_MSK	0xf
++#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
++#define PM_UNIT_SH	16	/* TTMMUX number and setting - unit select */
++#define PM_UNIT_MSK	0xf
++#define PM_BYTE_SH	12	/* Byte number of event bus to use */
++#define PM_BYTE_MSK	7
++#define PM_GRS_SH	8	/* Storage subsystem mux select */
++#define PM_GRS_MSK	7
++#define PM_BUSEVENT_MSK	0x80	/* Set if event uses event bus */
++#define PM_PMCSEL_MSK	0x7f
++
++/* Values in PM_UNIT field */
++#define PM_FPU		0
++#define PM_ISU0		1
++#define PM_IFU		2
++#define PM_ISU1		3
++#define PM_IDU		4
++#define PM_ISU0_ALT	6
++#define PM_GRS		7
++#define PM_LSU0		8
++#define PM_LSU1		0xc
++#define PM_LASTUNIT	0xc
++
++/*
++ * Bits in MMCR1 for POWER5+
++ */
++#define MMCR1_TTM0SEL_SH	62
++#define MMCR1_TTM1SEL_SH	60
++#define MMCR1_TTM2SEL_SH	58
++#define MMCR1_TTM3SEL_SH	56
++#define MMCR1_TTMSEL_MSK	3
++#define MMCR1_TD_CP_DBG0SEL_SH	54
++#define MMCR1_TD_CP_DBG1SEL_SH	52
++#define MMCR1_TD_CP_DBG2SEL_SH	50
++#define MMCR1_TD_CP_DBG3SEL_SH	48
++#define MMCR1_GRS_L2SEL_SH	46
++#define MMCR1_GRS_L2SEL_MSK	3
++#define MMCR1_GRS_L3SEL_SH	44
++#define MMCR1_GRS_L3SEL_MSK	3
++#define MMCR1_GRS_MCSEL_SH	41
++#define MMCR1_GRS_MCSEL_MSK	7
++#define MMCR1_GRS_FABSEL_SH	39
++#define MMCR1_GRS_FABSEL_MSK	3
++#define MMCR1_PMC1_ADDER_SEL_SH	35
++#define MMCR1_PMC2_ADDER_SEL_SH	34
++#define MMCR1_PMC3_ADDER_SEL_SH	33
++#define MMCR1_PMC4_ADDER_SEL_SH	32
++#define MMCR1_PMC1SEL_SH	25
++#define MMCR1_PMC2SEL_SH	17
++#define MMCR1_PMC3SEL_SH	9
++#define MMCR1_PMC4SEL_SH	1
++#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
++#define MMCR1_PMCSEL_MSK	0x7f
++
++/*
++ * Bits in MMCRA
++ */
++
++/*
++ * Layout of constraint bits:
++ * 6666555555555544444444443333333333222222222211111111110000000000
++ * 3210987654321098765432109876543210987654321098765432109876543210
++ *             [  ><><>< ><> <><>[  >      <  ><  ><  ><  ><><><><>
++ *             NC  G0G1G2 G3 T0T1 UC        B0  B1  B2  B3 P4P3P2P1
++ *
++ * NC - number of counters
++ *     51: NC error 0x0008_0000_0000_0000
++ *     48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
++ *
++ * G0..G3 - GRS mux constraints
++ *     46-47: GRS_L2SEL value
++ *     44-45: GRS_L3SEL value
++ *     41-44: GRS_MCSEL value
++ *     39-40: GRS_FABSEL value
++ *	Note that these match up with their bit positions in MMCR1
++ *
++ * T0 - TTM0 constraint
++ *     36-37: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0x30_0000_0000
++ *
++ * T1 - TTM1 constraint
++ *     34-35: TTM1SEL value (0=IDU, 3=GRS) 0x0c_0000_0000
++ *
++ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
++ *     33: UC3 error 0x02_0000_0000
++ *     32: FPU|IFU|ISU1 events needed 0x01_0000_0000
++ *     31: ISU0 events needed 0x01_8000_0000
++ *     30: IDU|GRS events needed 0x00_4000_0000
++ *
++ * B0
++ *     20-23: Byte 0 event source 0x00f0_0000
++ *	      Encoding as for the event code
++ *
++ * B1, B2, B3
++ *     16-19, 12-15, 8-11: Byte 1, 2, 3 event sources
++ *
++ * P4
++ *     7: P1 error 0x80
++ *     6-7: Count of events needing PMC4
++ *
++ * P1..P3
++ *     0-6: Count of events needing PMC1..PMC3
++ */
++
++static const int grsel_shift[8] = {
++	MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
++	MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
++	MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
++};
++
++/* Masks and values for using events from the various units */
++static u64 unit_cons[PM_LASTUNIT+1][2] = {
++	[PM_FPU] =   { 0x3200000000ull, 0x0100000000ull },
++	[PM_ISU0] =  { 0x0200000000ull, 0x0080000000ull },
++	[PM_ISU1] =  { 0x3200000000ull, 0x3100000000ull },
++	[PM_IFU] =   { 0x3200000000ull, 0x2100000000ull },
++	[PM_IDU] =   { 0x0e00000000ull, 0x0040000000ull },
++	[PM_GRS] =   { 0x0e00000000ull, 0x0c40000000ull },
++};
++
++static int power5p_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
++{
++	int pmc, byte, unit, sh;
++	int bit, fmask;
++	u64 mask = 0, value = 0;
++
++	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
++	if (pmc) {
++		if (pmc > 4)
++			return -1;
++		sh = (pmc - 1) * 2;
++		mask |= 2 << sh;
++		value |= 1 << sh;
++	}
++	if (event & PM_BUSEVENT_MSK) {
++		unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
++		if (unit > PM_LASTUNIT)
++			return -1;
++		if (unit == PM_ISU0_ALT)
++			unit = PM_ISU0;
++		mask |= unit_cons[unit][0];
++		value |= unit_cons[unit][1];
++		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
++		if (byte >= 4) {
++			if (unit != PM_LSU1)
++				return -1;
++			/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
++			++unit;
++			byte &= 3;
++		}
++		if (unit == PM_GRS) {
++			bit = event & 7;
++			fmask = (bit == 6)? 7: 3;
++			sh = grsel_shift[bit];
++			mask |= (u64)fmask << sh;
++			value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
++		}
++		/* Set byte lane select field */
++		mask  |= 0xfULL << (20 - 4 * byte);
++		value |= (u64)unit << (20 - 4 * byte);
++	}
++	mask  |= 0x8000000000000ull;
++	value |= 0x1000000000000ull;
++	*maskp = mask;
++	*valp = value;
++	return 0;
++}
++
++#define MAX_ALT	3	/* at most 3 alternatives for any event */
++
++static const unsigned int event_alternatives[][MAX_ALT] = {
++	{ 0x100c0,  0x40001f },			/* PM_GCT_FULL_CYC */
++	{ 0x120e4,  0x400002 },			/* PM_GRP_DISP_REJECT */
++	{ 0x230e2,  0x323087 },			/* PM_BR_PRED_CR */
++	{ 0x230e3,  0x223087, 0x3230a0 },	/* PM_BR_PRED_TA */
++	{ 0x410c7,  0x441084 },			/* PM_THRD_L2MISS_BOTH_CYC */
++	{ 0x800c4,  0xc20e0 },			/* PM_DTLB_MISS */
++	{ 0xc50c6,  0xc60e0 },			/* PM_MRK_DTLB_MISS */
++	{ 0x100009, 0x200009 },			/* PM_INST_CMPL */
++	{ 0x200015, 0x300015 },			/* PM_LSU_LMQ_SRQ_EMPTY_CYC */
++	{ 0x300009, 0x400009 },			/* PM_INST_DISP */
++};
++
++/*
++ * Scan the alternatives table for a match and return the
++ * index into the alternatives table if found, else -1.
++ */
++static int find_alternative(unsigned int event)
++{
++	int i, j;
++
++	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
++		if (event < event_alternatives[i][0])
++			break;
++		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
++			if (event == event_alternatives[i][j])
++				return i;
++	}
++	return -1;
++}
++
++static const unsigned char bytedecode_alternatives[4][4] = {
++	/* PMC 1 */	{ 0x21, 0x23, 0x25, 0x27 },
++	/* PMC 2 */	{ 0x07, 0x17, 0x0e, 0x1e },
++	/* PMC 3 */	{ 0x20, 0x22, 0x24, 0x26 },
++	/* PMC 4 */	{ 0x07, 0x17, 0x0e, 0x1e }
++};
++
++/*
++ * Some direct events for decodes of event bus byte 3 have alternative
++ * PMCSEL values on other counters.  This returns the alternative
++ * event code for those that do, or -1 otherwise.  This also handles
++ * alternative PCMSEL values for add events.
++ */
++static int find_alternative_bdecode(unsigned int event)
++{
++	int pmc, altpmc, pp, j;
++
++	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
++	if (pmc == 0 || pmc > 4)
++		return -1;
++	altpmc = 5 - pmc;	/* 1 <-> 4, 2 <-> 3 */
++	pp = event & PM_PMCSEL_MSK;
++	for (j = 0; j < 4; ++j) {
++		if (bytedecode_alternatives[pmc - 1][j] == pp) {
++			return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
++				(altpmc << PM_PMC_SH) |
++				bytedecode_alternatives[altpmc - 1][j];
++		}
++	}
++
++	/* new decode alternatives for power5+ */
++	if (pmc == 1 && (pp == 0x0d || pp == 0x0e))
++		return event + (2 << PM_PMC_SH) + (0x2e - 0x0d);
++	if (pmc == 3 && (pp == 0x2e || pp == 0x2f))
++		return event - (2 << PM_PMC_SH) - (0x2e - 0x0d);
++
++	/* alternative add event encodings */
++	if (pp == 0x10 || pp == 0x28)
++		return ((event ^ (0x10 ^ 0x28)) & ~PM_PMC_MSKS) |
++			(altpmc << PM_PMC_SH);
++
++	return -1;
++}
++
++static int power5p_get_alternatives(unsigned int event, unsigned int alt[])
++{
++	int i, j, ae, nalt = 1;
++
++	alt[0] = event;
++	nalt = 1;
++	i = find_alternative(event);
++	if (i >= 0) {
++		for (j = 0; j < MAX_ALT; ++j) {
++			ae = event_alternatives[i][j];
++			if (ae && ae != event)
++				alt[nalt++] = ae;
++		}
++	} else {
++		ae = find_alternative_bdecode(event);
++		if (ae > 0)
++			alt[nalt++] = ae;
++	}
++	return nalt;
++}
++
++static int power5p_compute_mmcr(unsigned int event[], int n_ev,
++				unsigned int hwc[], u64 mmcr[])
++{
++	u64 mmcr1 = 0;
++	unsigned int pmc, unit, byte, psel;
++	unsigned int ttm;
++	int i, isbus, bit, grsel;
++	unsigned int pmc_inuse = 0;
++	unsigned char busbyte[4];
++	unsigned char unituse[16];
++	int ttmuse;
++
++	if (n_ev > 4)
++		return -1;
++
++	/* First pass to count resource use */
++	memset(busbyte, 0, sizeof(busbyte));
++	memset(unituse, 0, sizeof(unituse));
++	for (i = 0; i < n_ev; ++i) {
++		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
++		if (pmc) {
++			if (pmc > 4)
++				return -1;
++			if (pmc_inuse & (1 << (pmc - 1)))
++				return -1;
++			pmc_inuse |= 1 << (pmc - 1);
++		}
++		if (event[i] & PM_BUSEVENT_MSK) {
++			unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
++			byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
++			if (unit > PM_LASTUNIT)
++				return -1;
++			if (unit == PM_ISU0_ALT)
++				unit = PM_ISU0;
++			if (byte >= 4) {
++				if (unit != PM_LSU1)
++					return -1;
++				++unit;
++				byte &= 3;
++			}
++			if (busbyte[byte] && busbyte[byte] != unit)
++				return -1;
++			busbyte[byte] = unit;
++			unituse[unit] = 1;
++		}
++	}
++
++	/*
++	 * Assign resources and set multiplexer selects.
++	 *
++	 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
++	 * choice we have to deal with.
++	 */
++	if (unituse[PM_ISU0] &
++	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
++		unituse[PM_ISU0_ALT] = 1;	/* move ISU to TTM1 */
++		unituse[PM_ISU0] = 0;
++	}
++	/* Set TTM[01]SEL fields. */
++	ttmuse = 0;
++	for (i = PM_FPU; i <= PM_ISU1; ++i) {
++		if (!unituse[i])
++			continue;
++		if (ttmuse++)
++			return -1;
++		mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
++	}
++	ttmuse = 0;
++	for (; i <= PM_GRS; ++i) {
++		if (!unituse[i])
++			continue;
++		if (ttmuse++)
++			return -1;
++		mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
++	}
++	if (ttmuse > 1)
++		return -1;
++
++	/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
++	for (byte = 0; byte < 4; ++byte) {
++		unit = busbyte[byte];
++		if (!unit)
++			continue;
++		if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
++			/* get ISU0 through TTM1 rather than TTM0 */
++			unit = PM_ISU0_ALT;
++		} else if (unit == PM_LSU1 + 1) {
++			/* select lower word of LSU1 for this byte */
++			mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
++		}
++		ttm = unit >> 2;
++		mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
++	}
++
++	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
++	for (i = 0; i < n_ev; ++i) {
++		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
++		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
++		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
++		psel = event[i] & PM_PMCSEL_MSK;
++		isbus = event[i] & PM_BUSEVENT_MSK;
++		if (!pmc) {
++			/* Bus event or any-PMC direct event */
++			for (pmc = 0; pmc < 4; ++pmc) {
++				if (!(pmc_inuse & (1 << pmc)))
++					break;
++			}
++			if (pmc >= 4)
++				return -1;
++			pmc_inuse |= 1 << pmc;
++		} else {
++			/* Direct event */
++			--pmc;
++			if (isbus && (byte & 2) &&
++			    (psel == 8 || psel == 0x10 || psel == 0x28))
++				/* add events on higher-numbered bus */
++				mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
++		}
++		if (isbus && unit == PM_GRS) {
++			bit = psel & 7;
++			grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
++			mmcr1 |= (u64)grsel << grsel_shift[bit];
++		}
++		if ((psel & 0x58) == 0x40 && (byte & 1) != ((pmc >> 1) & 1))
++			/* select alternate byte lane */
++			psel |= 0x10;
++		if (pmc <= 3)
++			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
++		hwc[i] = pmc;
++	}
++
++	/* Return MMCRx values */
++	mmcr[0] = 0;
++	if (pmc_inuse & 1)
++		mmcr[0] = MMCR0_PMC1CE;
++	if (pmc_inuse & 0x3e)
++		mmcr[0] |= MMCR0_PMCjCE;
++	mmcr[1] = mmcr1;
++	mmcr[2] = 0;
++	return 0;
++}
++
++static void power5p_disable_pmc(unsigned int pmc, u64 mmcr[])
++{
++	if (pmc <= 3)
++		mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
++}
++
++static int power5p_generic_events[] = {
++	[PERF_COUNT_CPU_CYCLES] = 0xf,
++	[PERF_COUNT_INSTRUCTIONS] = 0x100009,
++	[PERF_COUNT_CACHE_REFERENCES] = 0x1c10a8,	/* LD_REF_L1 */
++	[PERF_COUNT_CACHE_MISSES] = 0x3c1088,		/* LD_MISS_L1 */
++	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4,	/* BR_ISSUED */ 
++	[PERF_COUNT_BRANCH_MISSES] = 0x230e5,		/* BR_MPRED_CR */
++};
++
++struct power_pmu power5p_pmu = {
++	.n_counter = 4,
++	.max_alternatives = MAX_ALT,
++	.add_fields = 0x7000000000055ull,
++	.test_adder = 0x3000040000000ull,
++	.compute_mmcr = power5p_compute_mmcr,
++	.get_constraint = power5p_get_constraint,
++	.get_alternatives = power5p_get_alternatives,
++	.disable_pmc = power5p_disable_pmc,
++	.n_generic = ARRAY_SIZE(power5p_generic_events),
++	.generic_events = power5p_generic_events,
++};
+Index: linux-2.6-tip/arch/powerpc/kernel/power5-pmu.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/powerpc/kernel/power5-pmu.c
+@@ -0,0 +1,475 @@
++/*
++ * Performance counter support for POWER5 (not POWER5++) processors.
++ *
++ * Copyright 2009 Paul Mackerras, IBM Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <linux/kernel.h>
++#include <linux/perf_counter.h>
++#include <asm/reg.h>
++
++/*
++ * Bits in event code for POWER5 (not POWER5++)
++ */
++#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
++#define PM_PMC_MSK	0xf
++#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
++#define PM_UNIT_SH	16	/* TTMMUX number and setting - unit select */
++#define PM_UNIT_MSK	0xf
++#define PM_BYTE_SH	12	/* Byte number of event bus to use */
++#define PM_BYTE_MSK	7
++#define PM_GRS_SH	8	/* Storage subsystem mux select */
++#define PM_GRS_MSK	7
++#define PM_BUSEVENT_MSK	0x80	/* Set if event uses event bus */
++#define PM_PMCSEL_MSK	0x7f
++
++/* Values in PM_UNIT field */
++#define PM_FPU		0
++#define PM_ISU0		1
++#define PM_IFU		2
++#define PM_ISU1		3
++#define PM_IDU		4
++#define PM_ISU0_ALT	6
++#define PM_GRS		7
++#define PM_LSU0		8
++#define PM_LSU1		0xc
++#define PM_LASTUNIT	0xc
++
++/*
++ * Bits in MMCR1 for POWER5
++ */
++#define MMCR1_TTM0SEL_SH	62
++#define MMCR1_TTM1SEL_SH	60
++#define MMCR1_TTM2SEL_SH	58
++#define MMCR1_TTM3SEL_SH	56
++#define MMCR1_TTMSEL_MSK	3
++#define MMCR1_TD_CP_DBG0SEL_SH	54
++#define MMCR1_TD_CP_DBG1SEL_SH	52
++#define MMCR1_TD_CP_DBG2SEL_SH	50
++#define MMCR1_TD_CP_DBG3SEL_SH	48
++#define MMCR1_GRS_L2SEL_SH	46
++#define MMCR1_GRS_L2SEL_MSK	3
++#define MMCR1_GRS_L3SEL_SH	44
++#define MMCR1_GRS_L3SEL_MSK	3
++#define MMCR1_GRS_MCSEL_SH	41
++#define MMCR1_GRS_MCSEL_MSK	7
++#define MMCR1_GRS_FABSEL_SH	39
++#define MMCR1_GRS_FABSEL_MSK	3
++#define MMCR1_PMC1_ADDER_SEL_SH	35
++#define MMCR1_PMC2_ADDER_SEL_SH	34
++#define MMCR1_PMC3_ADDER_SEL_SH	33
++#define MMCR1_PMC4_ADDER_SEL_SH	32
++#define MMCR1_PMC1SEL_SH	25
++#define MMCR1_PMC2SEL_SH	17
++#define MMCR1_PMC3SEL_SH	9
++#define MMCR1_PMC4SEL_SH	1
++#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
++#define MMCR1_PMCSEL_MSK	0x7f
++
++/*
++ * Bits in MMCRA
++ */
++
++/*
++ * Layout of constraint bits:
++ * 6666555555555544444444443333333333222222222211111111110000000000
++ * 3210987654321098765432109876543210987654321098765432109876543210
++ *         <><>[  ><><>< ><> [  >[ >[ ><  ><  ><  ><  ><><><><><><>
++ *         T0T1 NC G0G1G2 G3  UC PS1PS2 B0  B1  B2  B3 P6P5P4P3P2P1
++ *
++ * T0 - TTM0 constraint
++ *     54-55: TTM0SEL value (0=FPU, 2=IFU, 3=ISU1) 0xc0_0000_0000_0000
++ *
++ * T1 - TTM1 constraint
++ *     52-53: TTM1SEL value (0=IDU, 3=GRS) 0x30_0000_0000_0000
++ *
++ * NC - number of counters
++ *     51: NC error 0x0008_0000_0000_0000
++ *     48-50: number of events needing PMC1-4 0x0007_0000_0000_0000
++ *
++ * G0..G3 - GRS mux constraints
++ *     46-47: GRS_L2SEL value
++ *     44-45: GRS_L3SEL value
++ *     41-44: GRS_MCSEL value
++ *     39-40: GRS_FABSEL value
++ *	Note that these match up with their bit positions in MMCR1
++ *
++ * UC - unit constraint: can't have all three of FPU|IFU|ISU1, ISU0, IDU|GRS
++ *     37: UC3 error 0x20_0000_0000
++ *     36: FPU|IFU|ISU1 events needed 0x10_0000_0000
++ *     35: ISU0 events needed 0x08_0000_0000
++ *     34: IDU|GRS events needed 0x04_0000_0000
++ *
++ * PS1
++ *     33: PS1 error 0x2_0000_0000
++ *     31-32: count of events needing PMC1/2 0x1_8000_0000
++ *
++ * PS2
++ *     30: PS2 error 0x4000_0000
++ *     28-29: count of events needing PMC3/4 0x3000_0000
++ *
++ * B0
++ *     24-27: Byte 0 event source 0x0f00_0000
++ *	      Encoding as for the event code
++ *
++ * B1, B2, B3
++ *     20-23, 16-19, 12-15: Byte 1, 2, 3 event sources
++ *
++ * P1..P6
++ *     0-11: Count of events needing PMC1..PMC6
++ */
++
++static const int grsel_shift[8] = {
++	MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH, MMCR1_GRS_L2SEL_SH,
++	MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH, MMCR1_GRS_L3SEL_SH,
++	MMCR1_GRS_MCSEL_SH, MMCR1_GRS_FABSEL_SH
++};
++
++/* Masks and values for using events from the various units */
++static u64 unit_cons[PM_LASTUNIT+1][2] = {
++	[PM_FPU] =   { 0xc0002000000000ull, 0x00001000000000ull },
++	[PM_ISU0] =  { 0x00002000000000ull, 0x00000800000000ull },
++	[PM_ISU1] =  { 0xc0002000000000ull, 0xc0001000000000ull },
++	[PM_IFU] =   { 0xc0002000000000ull, 0x80001000000000ull },
++	[PM_IDU] =   { 0x30002000000000ull, 0x00000400000000ull },
++	[PM_GRS] =   { 0x30002000000000ull, 0x30000400000000ull },
++};
++
++static int power5_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
++{
++	int pmc, byte, unit, sh;
++	int bit, fmask;
++	u64 mask = 0, value = 0;
++	int grp = -1;
++
++	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
++	if (pmc) {
++		if (pmc > 6)
++			return -1;
++		sh = (pmc - 1) * 2;
++		mask |= 2 << sh;
++		value |= 1 << sh;
++		if (pmc <= 4)
++			grp = (pmc - 1) >> 1;
++		else if (event != 0x500009 && event != 0x600005)
++			return -1;
++	}
++	if (event & PM_BUSEVENT_MSK) {
++		unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
++		if (unit > PM_LASTUNIT)
++			return -1;
++		if (unit == PM_ISU0_ALT)
++			unit = PM_ISU0;
++		mask |= unit_cons[unit][0];
++		value |= unit_cons[unit][1];
++		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
++		if (byte >= 4) {
++			if (unit != PM_LSU1)
++				return -1;
++			/* Map LSU1 low word (bytes 4-7) to unit LSU1+1 */
++			++unit;
++			byte &= 3;
++		}
++		if (unit == PM_GRS) {
++			bit = event & 7;
++			fmask = (bit == 6)? 7: 3;
++			sh = grsel_shift[bit];
++			mask |= (u64)fmask << sh;
++			value |= (u64)((event >> PM_GRS_SH) & fmask) << sh;
++		}
++		/*
++		 * Bus events on bytes 0 and 2 can be counted
++		 * on PMC1/2; bytes 1 and 3 on PMC3/4.
++		 */
++		if (!pmc)
++			grp = byte & 1;
++		/* Set byte lane select field */
++		mask  |= 0xfULL << (24 - 4 * byte);
++		value |= (u64)unit << (24 - 4 * byte);
++	}
++	if (grp == 0) {
++		/* increment PMC1/2 field */
++		mask  |= 0x200000000ull;
++		value |= 0x080000000ull;
++	} else if (grp == 1) {
++		/* increment PMC3/4 field */
++		mask  |= 0x40000000ull;
++		value |= 0x10000000ull;
++	}
++	if (pmc < 5) {
++		/* need a counter from PMC1-4 set */
++		mask  |= 0x8000000000000ull;
++		value |= 0x1000000000000ull;
++	}
++	*maskp = mask;
++	*valp = value;
++	return 0;
++}
++
++#define MAX_ALT	3	/* at most 3 alternatives for any event */
++
++static const unsigned int event_alternatives[][MAX_ALT] = {
++	{ 0x120e4,  0x400002 },			/* PM_GRP_DISP_REJECT */
++	{ 0x410c7,  0x441084 },			/* PM_THRD_L2MISS_BOTH_CYC */
++	{ 0x100005, 0x600005 },			/* PM_RUN_CYC */
++	{ 0x100009, 0x200009, 0x500009 },	/* PM_INST_CMPL */
++	{ 0x300009, 0x400009 },			/* PM_INST_DISP */
++};
++
++/*
++ * Scan the alternatives table for a match and return the
++ * index into the alternatives table if found, else -1.
++ */
++static int find_alternative(unsigned int event)
++{
++	int i, j;
++
++	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
++		if (event < event_alternatives[i][0])
++			break;
++		for (j = 0; j < MAX_ALT && event_alternatives[i][j]; ++j)
++			if (event == event_alternatives[i][j])
++				return i;
++	}
++	return -1;
++}
++
++static const unsigned char bytedecode_alternatives[4][4] = {
++	/* PMC 1 */	{ 0x21, 0x23, 0x25, 0x27 },
++	/* PMC 2 */	{ 0x07, 0x17, 0x0e, 0x1e },
++	/* PMC 3 */	{ 0x20, 0x22, 0x24, 0x26 },
++	/* PMC 4 */	{ 0x07, 0x17, 0x0e, 0x1e }
++};
++
++/*
++ * Some direct events for decodes of event bus byte 3 have alternative
++ * PMCSEL values on other counters.  This returns the alternative
++ * event code for those that do, or -1 otherwise.
++ */
++static int find_alternative_bdecode(unsigned int event)
++{
++	int pmc, altpmc, pp, j;
++
++	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
++	if (pmc == 0 || pmc > 4)
++		return -1;
++	altpmc = 5 - pmc;	/* 1 <-> 4, 2 <-> 3 */
++	pp = event & PM_PMCSEL_MSK;
++	for (j = 0; j < 4; ++j) {
++		if (bytedecode_alternatives[pmc - 1][j] == pp) {
++			return (event & ~(PM_PMC_MSKS | PM_PMCSEL_MSK)) |
++				(altpmc << PM_PMC_SH) |
++				bytedecode_alternatives[altpmc - 1][j];
++		}
++	}
++	return -1;
++}
++
++static int power5_get_alternatives(unsigned int event, unsigned int alt[])
++{
++	int i, j, ae, nalt = 1;
++
++	alt[0] = event;
++	nalt = 1;
++	i = find_alternative(event);
++	if (i >= 0) {
++		for (j = 0; j < MAX_ALT; ++j) {
++			ae = event_alternatives[i][j];
++			if (ae && ae != event)
++				alt[nalt++] = ae;
++		}
++	} else {
++		ae = find_alternative_bdecode(event);
++		if (ae > 0)
++			alt[nalt++] = ae;
++	}
++	return nalt;
++}
++
++static int power5_compute_mmcr(unsigned int event[], int n_ev,
++			       unsigned int hwc[], u64 mmcr[])
++{
++	u64 mmcr1 = 0;
++	unsigned int pmc, unit, byte, psel;
++	unsigned int ttm, grp;
++	int i, isbus, bit, grsel;
++	unsigned int pmc_inuse = 0;
++	unsigned int pmc_grp_use[2];
++	unsigned char busbyte[4];
++	unsigned char unituse[16];
++	int ttmuse;
++
++	if (n_ev > 6)
++		return -1;
++
++	/* First pass to count resource use */
++	pmc_grp_use[0] = pmc_grp_use[1] = 0;
++	memset(busbyte, 0, sizeof(busbyte));
++	memset(unituse, 0, sizeof(unituse));
++	for (i = 0; i < n_ev; ++i) {
++		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
++		if (pmc) {
++			if (pmc > 6)
++				return -1;
++			if (pmc_inuse & (1 << (pmc - 1)))
++				return -1;
++			pmc_inuse |= 1 << (pmc - 1);
++			/* count 1/2 vs 3/4 use */
++			if (pmc <= 4)
++				++pmc_grp_use[(pmc - 1) >> 1];
++		}
++		if (event[i] & PM_BUSEVENT_MSK) {
++			unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
++			byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
++			if (unit > PM_LASTUNIT)
++				return -1;
++			if (unit == PM_ISU0_ALT)
++				unit = PM_ISU0;
++			if (byte >= 4) {
++				if (unit != PM_LSU1)
++					return -1;
++				++unit;
++				byte &= 3;
++			}
++			if (!pmc)
++				++pmc_grp_use[byte & 1];
++			if (busbyte[byte] && busbyte[byte] != unit)
++				return -1;
++			busbyte[byte] = unit;
++			unituse[unit] = 1;
++		}
++	}
++	if (pmc_grp_use[0] > 2 || pmc_grp_use[1] > 2)
++		return -1;
++
++	/*
++	 * Assign resources and set multiplexer selects.
++	 *
++	 * PM_ISU0 can go either on TTM0 or TTM1, but that's the only
++	 * choice we have to deal with.
++	 */
++	if (unituse[PM_ISU0] &
++	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_ISU1])) {
++		unituse[PM_ISU0_ALT] = 1;	/* move ISU to TTM1 */
++		unituse[PM_ISU0] = 0;
++	}
++	/* Set TTM[01]SEL fields. */
++	ttmuse = 0;
++	for (i = PM_FPU; i <= PM_ISU1; ++i) {
++		if (!unituse[i])
++			continue;
++		if (ttmuse++)
++			return -1;
++		mmcr1 |= (u64)i << MMCR1_TTM0SEL_SH;
++	}
++	ttmuse = 0;
++	for (; i <= PM_GRS; ++i) {
++		if (!unituse[i])
++			continue;
++		if (ttmuse++)
++			return -1;
++		mmcr1 |= (u64)(i & 3) << MMCR1_TTM1SEL_SH;
++	}
++	if (ttmuse > 1)
++		return -1;
++
++	/* Set byte lane select fields, TTM[23]SEL and GRS_*SEL. */
++	for (byte = 0; byte < 4; ++byte) {
++		unit = busbyte[byte];
++		if (!unit)
++			continue;
++		if (unit == PM_ISU0 && unituse[PM_ISU0_ALT]) {
++			/* get ISU0 through TTM1 rather than TTM0 */
++			unit = PM_ISU0_ALT;
++		} else if (unit == PM_LSU1 + 1) {
++			/* select lower word of LSU1 for this byte */
++			mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
++		}
++		ttm = unit >> 2;
++		mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
++	}
++
++	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
++	for (i = 0; i < n_ev; ++i) {
++		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
++		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
++		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
++		psel = event[i] & PM_PMCSEL_MSK;
++		isbus = event[i] & PM_BUSEVENT_MSK;
++		if (!pmc) {
++			/* Bus event or any-PMC direct event */
++			for (pmc = 0; pmc < 4; ++pmc) {
++				if (pmc_inuse & (1 << pmc))
++					continue;
++				grp = (pmc >> 1) & 1;
++				if (isbus) {
++					if (grp == (byte & 1))
++						break;
++				} else if (pmc_grp_use[grp] < 2) {
++					++pmc_grp_use[grp];
++					break;
++				}
++			}
++			pmc_inuse |= 1 << pmc;
++		} else if (pmc <= 4) {
++			/* Direct event */
++			--pmc;
++			if ((psel == 8 || psel == 0x10) && isbus && (byte & 2))
++				/* add events on higher-numbered bus */
++				mmcr1 |= 1ull << (MMCR1_PMC1_ADDER_SEL_SH - pmc);
++		} else {
++			/* Instructions or run cycles on PMC5/6 */
++			--pmc;
++		}
++		if (isbus && unit == PM_GRS) {
++			bit = psel & 7;
++			grsel = (event[i] >> PM_GRS_SH) & PM_GRS_MSK;
++			mmcr1 |= (u64)grsel << grsel_shift[bit];
++		}
++		if (pmc <= 3)
++			mmcr1 |= psel << MMCR1_PMCSEL_SH(pmc);
++		hwc[i] = pmc;
++	}
++
++	/* Return MMCRx values */
++	mmcr[0] = 0;
++	if (pmc_inuse & 1)
++		mmcr[0] = MMCR0_PMC1CE;
++	if (pmc_inuse & 0x3e)
++		mmcr[0] |= MMCR0_PMCjCE;
++	mmcr[1] = mmcr1;
++	mmcr[2] = 0;
++	return 0;
++}
++
++static void power5_disable_pmc(unsigned int pmc, u64 mmcr[])
++{
++	if (pmc <= 3)
++		mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
++}
++
++static int power5_generic_events[] = {
++	[PERF_COUNT_CPU_CYCLES] = 0xf,
++	[PERF_COUNT_INSTRUCTIONS] = 0x100009,
++	[PERF_COUNT_CACHE_REFERENCES] = 0x4c1090,	/* LD_REF_L1 */
++	[PERF_COUNT_CACHE_MISSES] = 0x3c1088,		/* LD_MISS_L1 */
++	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x230e4,	/* BR_ISSUED */ 
++	[PERF_COUNT_BRANCH_MISSES] = 0x230e5,		/* BR_MPRED_CR */
++};
++
++struct power_pmu power5_pmu = {
++	.n_counter = 6,
++	.max_alternatives = MAX_ALT,
++	.add_fields = 0x7000090000555ull,
++	.test_adder = 0x3000490000000ull,
++	.compute_mmcr = power5_compute_mmcr,
++	.get_constraint = power5_get_constraint,
++	.get_alternatives = power5_get_alternatives,
++	.disable_pmc = power5_disable_pmc,
++	.n_generic = ARRAY_SIZE(power5_generic_events),
++	.generic_events = power5_generic_events,
++};
+Index: linux-2.6-tip/arch/powerpc/kernel/power6-pmu.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/powerpc/kernel/power6-pmu.c
+@@ -0,0 +1,283 @@
++/*
++ * Performance counter support for POWER6 processors.
++ *
++ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <linux/kernel.h>
++#include <linux/perf_counter.h>
++#include <asm/reg.h>
++
++/*
++ * Bits in event code for POWER6
++ */
++#define PM_PMC_SH	20	/* PMC number (1-based) for direct events */
++#define PM_PMC_MSK	0x7
++#define PM_PMC_MSKS	(PM_PMC_MSK << PM_PMC_SH)
++#define PM_UNIT_SH	16	/* Unit event comes (TTMxSEL encoding) */
++#define PM_UNIT_MSK	0xf
++#define PM_UNIT_MSKS	(PM_UNIT_MSK << PM_UNIT_SH)
++#define PM_LLAV		0x8000	/* Load lookahead match value */
++#define PM_LLA		0x4000	/* Load lookahead match enable */
++#define PM_BYTE_SH	12	/* Byte of event bus to use */
++#define PM_BYTE_MSK	3
++#define PM_SUBUNIT_SH	8	/* Subunit event comes from (NEST_SEL enc.) */
++#define PM_SUBUNIT_MSK	7
++#define PM_SUBUNIT_MSKS	(PM_SUBUNIT_MSK << PM_SUBUNIT_SH)
++#define PM_PMCSEL_MSK	0xff	/* PMCxSEL value */
++#define PM_BUSEVENT_MSK	0xf3700
++
++/*
++ * Bits in MMCR1 for POWER6
++ */
++#define MMCR1_TTM0SEL_SH	60
++#define MMCR1_TTMSEL_SH(n)	(MMCR1_TTM0SEL_SH - (n) * 4)
++#define MMCR1_TTMSEL_MSK	0xf
++#define MMCR1_TTMSEL(m, n)	(((m) >> MMCR1_TTMSEL_SH(n)) & MMCR1_TTMSEL_MSK)
++#define MMCR1_NESTSEL_SH	45
++#define MMCR1_NESTSEL_MSK	0x7
++#define MMCR1_NESTSEL(m)	(((m) >> MMCR1_NESTSEL_SH) & MMCR1_NESTSEL_MSK)
++#define MMCR1_PMC1_LLA		((u64)1 << 44)
++#define MMCR1_PMC1_LLA_VALUE	((u64)1 << 39)
++#define MMCR1_PMC1_ADDR_SEL	((u64)1 << 35)
++#define MMCR1_PMC1SEL_SH	24
++#define MMCR1_PMCSEL_SH(n)	(MMCR1_PMC1SEL_SH - (n) * 8)
++#define MMCR1_PMCSEL_MSK	0xff
++
++/*
++ * Assign PMC numbers and compute MMCR1 value for a set of events
++ */
++static int p6_compute_mmcr(unsigned int event[], int n_ev,
++			   unsigned int hwc[], u64 mmcr[])
++{
++	u64 mmcr1 = 0;
++	int i;
++	unsigned int pmc, ev, b, u, s, psel;
++	unsigned int ttmset = 0;
++	unsigned int pmc_inuse = 0;
++
++	if (n_ev > 4)
++		return -1;
++	for (i = 0; i < n_ev; ++i) {
++		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
++		if (pmc) {
++			if (pmc_inuse & (1 << (pmc - 1)))
++				return -1;	/* collision! */
++			pmc_inuse |= 1 << (pmc - 1);
++		}
++	}
++	for (i = 0; i < n_ev; ++i) {
++		ev = event[i];
++		pmc = (ev >> PM_PMC_SH) & PM_PMC_MSK;
++		if (pmc) {
++			--pmc;
++		} else {
++			/* can go on any PMC; find a free one */
++			for (pmc = 0; pmc < 4; ++pmc)
++				if (!(pmc_inuse & (1 << pmc)))
++					break;
++			pmc_inuse |= 1 << pmc;
++		}
++		hwc[i] = pmc;
++		psel = ev & PM_PMCSEL_MSK;
++		if (ev & PM_BUSEVENT_MSK) {
++			/* this event uses the event bus */
++			b = (ev >> PM_BYTE_SH) & PM_BYTE_MSK;
++			u = (ev >> PM_UNIT_SH) & PM_UNIT_MSK;
++			/* check for conflict on this byte of event bus */
++			if ((ttmset & (1 << b)) && MMCR1_TTMSEL(mmcr1, b) != u)
++				return -1;
++			mmcr1 |= (u64)u << MMCR1_TTMSEL_SH(b);
++			ttmset |= 1 << b;
++			if (u == 5) {
++				/* Nest events have a further mux */
++				s = (ev >> PM_SUBUNIT_SH) & PM_SUBUNIT_MSK;
++				if ((ttmset & 0x10) &&
++				    MMCR1_NESTSEL(mmcr1) != s)
++					return -1;
++				ttmset |= 0x10;
++				mmcr1 |= (u64)s << MMCR1_NESTSEL_SH;
++			}
++			if (0x30 <= psel && psel <= 0x3d) {
++				/* these need the PMCx_ADDR_SEL bits */
++				if (b >= 2)
++					mmcr1 |= MMCR1_PMC1_ADDR_SEL >> pmc;
++			}
++			/* bus select values are different for PMC3/4 */
++			if (pmc >= 2 && (psel & 0x90) == 0x80)
++				psel ^= 0x20;
++		}
++		if (ev & PM_LLA) {
++			mmcr1 |= MMCR1_PMC1_LLA >> pmc;
++			if (ev & PM_LLAV)
++				mmcr1 |= MMCR1_PMC1_LLA_VALUE >> pmc;
++		}
++		mmcr1 |= (u64)psel << MMCR1_PMCSEL_SH(pmc);
++	}
++	mmcr[0] = 0;
++	if (pmc_inuse & 1)
++		mmcr[0] = MMCR0_PMC1CE;
++	if (pmc_inuse & 0xe)
++		mmcr[0] |= MMCR0_PMCjCE;
++	mmcr[1] = mmcr1;
++	mmcr[2] = 0;
++	return 0;
++}
++
++/*
++ * Layout of constraint bits:
++ *
++ *	0-1	add field: number of uses of PMC1 (max 1)
++ *	2-3, 4-5, 6-7: ditto for PMC2, 3, 4
++ *	8-10	select field: nest (subunit) event selector
++ *	16-19	select field: unit on byte 0 of event bus
++ *	20-23, 24-27, 28-31 ditto for bytes 1, 2, 3
++ */
++static int p6_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
++{
++	int pmc, byte, sh;
++	unsigned int mask = 0, value = 0;
++
++	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
++	if (pmc) {
++		if (pmc > 4)
++			return -1;
++		sh = (pmc - 1) * 2;
++		mask |= 2 << sh;
++		value |= 1 << sh;
++	}
++	if (event & PM_BUSEVENT_MSK) {
++		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
++		sh = byte * 4;
++		mask |= PM_UNIT_MSKS << sh;
++		value |= (event & PM_UNIT_MSKS) << sh;
++		if ((event & PM_UNIT_MSKS) == (5 << PM_UNIT_SH)) {
++			mask |= PM_SUBUNIT_MSKS;
++			value |= event & PM_SUBUNIT_MSKS;
++		}
++	}
++	*maskp = mask;
++	*valp = value;
++	return 0;
++}
++
++#define MAX_ALT	4	/* at most 4 alternatives for any event */
++
++static const unsigned int event_alternatives[][MAX_ALT] = {
++	{ 0x0130e8, 0x2000f6, 0x3000fc },	/* PM_PTEG_RELOAD_VALID */
++	{ 0x080080, 0x10000d, 0x30000c, 0x4000f0 }, /* PM_LD_MISS_L1 */
++	{ 0x080088, 0x200054, 0x3000f0 },	/* PM_ST_MISS_L1 */
++	{ 0x10000a, 0x2000f4 },			/* PM_RUN_CYC */
++	{ 0x10000b, 0x2000f5 },			/* PM_RUN_COUNT */
++	{ 0x10000e, 0x400010 },			/* PM_PURR */
++	{ 0x100010, 0x4000f8 },			/* PM_FLUSH */
++	{ 0x10001a, 0x200010 },			/* PM_MRK_INST_DISP */
++	{ 0x100026, 0x3000f8 },			/* PM_TB_BIT_TRANS */
++	{ 0x100054, 0x2000f0 },			/* PM_ST_FIN */
++	{ 0x100056, 0x2000fc },			/* PM_L1_ICACHE_MISS */
++	{ 0x1000f0, 0x40000a },			/* PM_INST_IMC_MATCH_CMPL */
++	{ 0x1000f8, 0x200008 },			/* PM_GCT_EMPTY_CYC */
++	{ 0x1000fc, 0x400006 },			/* PM_LSU_DERAT_MISS_CYC */
++	{ 0x20000e, 0x400007 },			/* PM_LSU_DERAT_MISS */
++	{ 0x200012, 0x300012 },			/* PM_INST_DISP */
++	{ 0x2000f2, 0x3000f2 },			/* PM_INST_DISP */
++	{ 0x2000f8, 0x300010 },			/* PM_EXT_INT */
++	{ 0x2000fe, 0x300056 },			/* PM_DATA_FROM_L2MISS */
++	{ 0x2d0030, 0x30001a },			/* PM_MRK_FPU_FIN */
++	{ 0x30000a, 0x400018 },			/* PM_MRK_INST_FIN */
++	{ 0x3000f6, 0x40000e },			/* PM_L1_DCACHE_RELOAD_VALID */
++	{ 0x3000fe, 0x400056 },			/* PM_DATA_FROM_L3MISS */
++};
++
++/*
++ * This could be made more efficient with a binary search on
++ * a presorted list, if necessary
++ */
++static int find_alternatives_list(unsigned int event)
++{
++	int i, j;
++	unsigned int alt;
++
++	for (i = 0; i < ARRAY_SIZE(event_alternatives); ++i) {
++		if (event < event_alternatives[i][0])
++			return -1;
++		for (j = 0; j < MAX_ALT; ++j) {
++			alt = event_alternatives[i][j];
++			if (!alt || event < alt)
++				break;
++			if (event == alt)
++				return i;
++		}
++	}
++	return -1;
++}
++
++static int p6_get_alternatives(unsigned int event, unsigned int alt[])
++{
++	int i, j;
++	unsigned int aevent, psel, pmc;
++	unsigned int nalt = 1;
++
++	alt[0] = event;
++
++	/* check the alternatives table */
++	i = find_alternatives_list(event);
++	if (i >= 0) {
++		/* copy out alternatives from list */
++		for (j = 0; j < MAX_ALT; ++j) {
++			aevent = event_alternatives[i][j];
++			if (!aevent)
++				break;
++			if (aevent != event)
++				alt[nalt++] = aevent;
++		}
++
++	} else {
++		/* Check for alternative ways of computing sum events */
++		/* PMCSEL 0x32 counter N == PMCSEL 0x34 counter 5-N */
++		psel = event & (PM_PMCSEL_MSK & ~1);	/* ignore edge bit */
++		pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
++		if (pmc && (psel == 0x32 || psel == 0x34))
++			alt[nalt++] = ((event ^ 0x6) & ~PM_PMC_MSKS) |
++				((5 - pmc) << PM_PMC_SH);
++
++		/* PMCSEL 0x38 counter N == PMCSEL 0x3a counter N+/-2 */
++		if (pmc && (psel == 0x38 || psel == 0x3a))
++			alt[nalt++] = ((event ^ 0x2) & ~PM_PMC_MSKS) |
++				((pmc > 2? pmc - 2: pmc + 2) << PM_PMC_SH);
++	}
++
++	return nalt;
++}
++
++static void p6_disable_pmc(unsigned int pmc, u64 mmcr[])
++{
++	/* Set PMCxSEL to 0 to disable PMCx */
++	mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
++}
++
++static int power6_generic_events[] = {
++	[PERF_COUNT_CPU_CYCLES] = 0x1e,
++	[PERF_COUNT_INSTRUCTIONS] = 2,
++	[PERF_COUNT_CACHE_REFERENCES] = 0x280030,	/* LD_REF_L1 */
++	[PERF_COUNT_CACHE_MISSES] = 0x30000c,		/* LD_MISS_L1 */
++	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x410a0,	/* BR_PRED */ 
++	[PERF_COUNT_BRANCH_MISSES] = 0x400052,		/* BR_MPRED */
++};
++
++struct power_pmu power6_pmu = {
++	.n_counter = 4,
++	.max_alternatives = MAX_ALT,
++	.add_fields = 0x55,
++	.test_adder = 0,
++	.compute_mmcr = p6_compute_mmcr,
++	.get_constraint = p6_get_constraint,
++	.get_alternatives = p6_get_alternatives,
++	.disable_pmc = p6_disable_pmc,
++	.n_generic = ARRAY_SIZE(power6_generic_events),
++	.generic_events = power6_generic_events,
++};
+Index: linux-2.6-tip/arch/powerpc/kernel/ppc970-pmu.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/powerpc/kernel/ppc970-pmu.c
+@@ -0,0 +1,375 @@
++/*
++ * Performance counter support for PPC970-family processors.
++ *
++ * Copyright 2008-2009 Paul Mackerras, IBM Corporation.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++#include <linux/string.h>
++#include <linux/perf_counter.h>
++#include <asm/reg.h>
++
++/*
++ * Bits in event code for PPC970
++ */
++#define PM_PMC_SH	12	/* PMC number (1-based) for direct events */
++#define PM_PMC_MSK	0xf
++#define PM_UNIT_SH	8	/* TTMMUX number and setting - unit select */
++#define PM_UNIT_MSK	0xf
++#define PM_BYTE_SH	4	/* Byte number of event bus to use */
++#define PM_BYTE_MSK	3
++#define PM_PMCSEL_MSK	0xf
++
++/* Values in PM_UNIT field */
++#define PM_NONE		0
++#define PM_FPU		1
++#define PM_VPU		2
++#define PM_ISU		3
++#define PM_IFU		4
++#define PM_IDU		5
++#define PM_STS		6
++#define PM_LSU0		7
++#define PM_LSU1U	8
++#define PM_LSU1L	9
++#define PM_LASTUNIT	9
++
++/*
++ * Bits in MMCR0 for PPC970
++ */
++#define MMCR0_PMC1SEL_SH	8
++#define MMCR0_PMC2SEL_SH	1
++#define MMCR_PMCSEL_MSK		0x1f
++
++/*
++ * Bits in MMCR1 for PPC970
++ */
++#define MMCR1_TTM0SEL_SH	62
++#define MMCR1_TTM1SEL_SH	59
++#define MMCR1_TTM3SEL_SH	53
++#define MMCR1_TTMSEL_MSK	3
++#define MMCR1_TD_CP_DBG0SEL_SH	50
++#define MMCR1_TD_CP_DBG1SEL_SH	48
++#define MMCR1_TD_CP_DBG2SEL_SH	46
++#define MMCR1_TD_CP_DBG3SEL_SH	44
++#define MMCR1_PMC1_ADDER_SEL_SH	39
++#define MMCR1_PMC2_ADDER_SEL_SH	38
++#define MMCR1_PMC6_ADDER_SEL_SH	37
++#define MMCR1_PMC5_ADDER_SEL_SH	36
++#define MMCR1_PMC8_ADDER_SEL_SH	35
++#define MMCR1_PMC7_ADDER_SEL_SH	34
++#define MMCR1_PMC3_ADDER_SEL_SH	33
++#define MMCR1_PMC4_ADDER_SEL_SH	32
++#define MMCR1_PMC3SEL_SH	27
++#define MMCR1_PMC4SEL_SH	22
++#define MMCR1_PMC5SEL_SH	17
++#define MMCR1_PMC6SEL_SH	12
++#define MMCR1_PMC7SEL_SH	7
++#define MMCR1_PMC8SEL_SH	2
++
++static short mmcr1_adder_bits[8] = {
++	MMCR1_PMC1_ADDER_SEL_SH,
++	MMCR1_PMC2_ADDER_SEL_SH,
++	MMCR1_PMC3_ADDER_SEL_SH,
++	MMCR1_PMC4_ADDER_SEL_SH,
++	MMCR1_PMC5_ADDER_SEL_SH,
++	MMCR1_PMC6_ADDER_SEL_SH,
++	MMCR1_PMC7_ADDER_SEL_SH,
++	MMCR1_PMC8_ADDER_SEL_SH
++};
++
++/*
++ * Bits in MMCRA
++ */
++
++/*
++ * Layout of constraint bits:
++ * 6666555555555544444444443333333333222222222211111111110000000000
++ * 3210987654321098765432109876543210987654321098765432109876543210
++ *                 <><>[  >[  >[  ><  ><  ><  ><  ><><><><><><><><>
++ *                 T0T1 UC  PS1 PS2 B0  B1  B2  B3 P1P2P3P4P5P6P7P8
++ *
++ * T0 - TTM0 constraint
++ *     46-47: TTM0SEL value (0=FPU, 2=IFU, 3=VPU) 0xC000_0000_0000
++ *
++ * T1 - TTM1 constraint
++ *     44-45: TTM1SEL value (0=IDU, 3=STS) 0x3000_0000_0000
++ *
++ * UC - unit constraint: can't have all three of FPU|IFU|VPU, ISU, IDU|STS
++ *     43: UC3 error 0x0800_0000_0000
++ *     42: FPU|IFU|VPU events needed 0x0400_0000_0000
++ *     41: ISU events needed 0x0200_0000_0000
++ *     40: IDU|STS events needed 0x0100_0000_0000
++ *
++ * PS1
++ *     39: PS1 error 0x0080_0000_0000
++ *     36-38: count of events needing PMC1/2/5/6 0x0070_0000_0000
++ *
++ * PS2
++ *     35: PS2 error 0x0008_0000_0000
++ *     32-34: count of events needing PMC3/4/7/8 0x0007_0000_0000
++ *
++ * B0
++ *     28-31: Byte 0 event source 0xf000_0000
++ *	      Encoding as for the event code
++ *
++ * B1, B2, B3
++ *     24-27, 20-23, 16-19: Byte 1, 2, 3 event sources
++ *
++ * P1
++ *     15: P1 error 0x8000
++ *     14-15: Count of events needing PMC1
++ *
++ * P2..P8
++ *     0-13: Count of events needing PMC2..PMC8
++ */
++
++/* Masks and values for using events from the various units */
++static u64 unit_cons[PM_LASTUNIT+1][2] = {
++	[PM_FPU] =   { 0xc80000000000ull, 0x040000000000ull },
++	[PM_VPU] =   { 0xc80000000000ull, 0xc40000000000ull },
++	[PM_ISU] =   { 0x080000000000ull, 0x020000000000ull },
++	[PM_IFU] =   { 0xc80000000000ull, 0x840000000000ull },
++	[PM_IDU] =   { 0x380000000000ull, 0x010000000000ull },
++	[PM_STS] =   { 0x380000000000ull, 0x310000000000ull },
++};
++
++static int p970_get_constraint(unsigned int event, u64 *maskp, u64 *valp)
++{
++	int pmc, byte, unit, sh;
++	u64 mask = 0, value = 0;
++	int grp = -1;
++
++	pmc = (event >> PM_PMC_SH) & PM_PMC_MSK;
++	if (pmc) {
++		if (pmc > 8)
++			return -1;
++		sh = (pmc - 1) * 2;
++		mask |= 2 << sh;
++		value |= 1 << sh;
++		grp = ((pmc - 1) >> 1) & 1;
++	}
++	unit = (event >> PM_UNIT_SH) & PM_UNIT_MSK;
++	if (unit) {
++		if (unit > PM_LASTUNIT)
++			return -1;
++		mask |= unit_cons[unit][0];
++		value |= unit_cons[unit][1];
++		byte = (event >> PM_BYTE_SH) & PM_BYTE_MSK;
++		/*
++		 * Bus events on bytes 0 and 2 can be counted
++		 * on PMC1/2/5/6; bytes 1 and 3 on PMC3/4/7/8.
++		 */
++		if (!pmc)
++			grp = byte & 1;
++		/* Set byte lane select field */
++		mask  |= 0xfULL << (28 - 4 * byte);
++		value |= (u64)unit << (28 - 4 * byte);
++	}
++	if (grp == 0) {
++		/* increment PMC1/2/5/6 field */
++		mask  |= 0x8000000000ull;
++		value |= 0x1000000000ull;
++	} else if (grp == 1) {
++		/* increment PMC3/4/7/8 field */
++		mask  |= 0x800000000ull;
++		value |= 0x100000000ull;
++	}
++	*maskp = mask;
++	*valp = value;
++	return 0;
++}
++
++static int p970_get_alternatives(unsigned int event, unsigned int alt[])
++{
++	alt[0] = event;
++
++	/* 2 alternatives for LSU empty */
++	if (event == 0x2002 || event == 0x3002) {
++		alt[1] = event ^ 0x1000;
++		return 2;
++	}
++		
++	return 1;
++}
++
++static int p970_compute_mmcr(unsigned int event[], int n_ev,
++			     unsigned int hwc[], u64 mmcr[])
++{
++	u64 mmcr0 = 0, mmcr1 = 0, mmcra = 0;
++	unsigned int pmc, unit, byte, psel;
++	unsigned int ttm, grp;
++	unsigned int pmc_inuse = 0;
++	unsigned int pmc_grp_use[2];
++	unsigned char busbyte[4];
++	unsigned char unituse[16];
++	unsigned char unitmap[] = { 0, 0<<3, 3<<3, 1<<3, 2<<3, 0|4, 3|4 };
++	unsigned char ttmuse[2];
++	unsigned char pmcsel[8];
++	int i;
++
++	if (n_ev > 8)
++		return -1;
++
++	/* First pass to count resource use */
++	pmc_grp_use[0] = pmc_grp_use[1] = 0;
++	memset(busbyte, 0, sizeof(busbyte));
++	memset(unituse, 0, sizeof(unituse));
++	for (i = 0; i < n_ev; ++i) {
++		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
++		if (pmc) {
++			if (pmc_inuse & (1 << (pmc - 1)))
++				return -1;
++			pmc_inuse |= 1 << (pmc - 1);
++			/* count 1/2/5/6 vs 3/4/7/8 use */
++			++pmc_grp_use[((pmc - 1) >> 1) & 1];
++		}
++		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
++		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
++		if (unit) {
++			if (unit > PM_LASTUNIT)
++				return -1;
++			if (!pmc)
++				++pmc_grp_use[byte & 1];
++			if (busbyte[byte] && busbyte[byte] != unit)
++				return -1;
++			busbyte[byte] = unit;
++			unituse[unit] = 1;
++		}
++	}
++	if (pmc_grp_use[0] > 4 || pmc_grp_use[1] > 4)
++		return -1;
++
++	/*
++	 * Assign resources and set multiplexer selects.
++	 *
++	 * PM_ISU can go either on TTM0 or TTM1, but that's the only
++	 * choice we have to deal with.
++	 */
++	if (unituse[PM_ISU] &
++	    (unituse[PM_FPU] | unituse[PM_IFU] | unituse[PM_VPU]))
++		unitmap[PM_ISU] = 2 | 4;	/* move ISU to TTM1 */
++	/* Set TTM[01]SEL fields. */
++	ttmuse[0] = ttmuse[1] = 0;
++	for (i = PM_FPU; i <= PM_STS; ++i) {
++		if (!unituse[i])
++			continue;
++		ttm = unitmap[i];
++		++ttmuse[(ttm >> 2) & 1];
++		mmcr1 |= (u64)(ttm & ~4) << MMCR1_TTM1SEL_SH;
++	}
++	/* Check only one unit per TTMx */
++	if (ttmuse[0] > 1 || ttmuse[1] > 1)
++		return -1;
++
++	/* Set byte lane select fields and TTM3SEL. */
++	for (byte = 0; byte < 4; ++byte) {
++		unit = busbyte[byte];
++		if (!unit)
++			continue;
++		if (unit <= PM_STS)
++			ttm = (unitmap[unit] >> 2) & 1;
++		else if (unit == PM_LSU0)
++			ttm = 2;
++		else {
++			ttm = 3;
++			if (unit == PM_LSU1L && byte >= 2)
++				mmcr1 |= 1ull << (MMCR1_TTM3SEL_SH + 3 - byte);
++		}
++		mmcr1 |= (u64)ttm << (MMCR1_TD_CP_DBG0SEL_SH - 2 * byte);
++	}
++
++	/* Second pass: assign PMCs, set PMCxSEL and PMCx_ADDER_SEL fields */
++	memset(pmcsel, 0x8, sizeof(pmcsel));	/* 8 means don't count */
++	for (i = 0; i < n_ev; ++i) {
++		pmc = (event[i] >> PM_PMC_SH) & PM_PMC_MSK;
++		unit = (event[i] >> PM_UNIT_SH) & PM_UNIT_MSK;
++		byte = (event[i] >> PM_BYTE_SH) & PM_BYTE_MSK;
++		psel = event[i] & PM_PMCSEL_MSK;
++		if (!pmc) {
++			/* Bus event or any-PMC direct event */
++			if (unit)
++				psel |= 0x10 | ((byte & 2) << 2);
++			else
++				psel |= 8;
++			for (pmc = 0; pmc < 8; ++pmc) {
++				if (pmc_inuse & (1 << pmc))
++					continue;
++				grp = (pmc >> 1) & 1;
++				if (unit) {
++					if (grp == (byte & 1))
++						break;
++				} else if (pmc_grp_use[grp] < 4) {
++					++pmc_grp_use[grp];
++					break;
++				}
++			}
++			pmc_inuse |= 1 << pmc;
++		} else {
++			/* Direct event */
++			--pmc;
++			if (psel == 0 && (byte & 2))
++				/* add events on higher-numbered bus */
++				mmcr1 |= 1ull << mmcr1_adder_bits[pmc];
++		}
++		pmcsel[pmc] = psel;
++		hwc[i] = pmc;
++	}
++	for (pmc = 0; pmc < 2; ++pmc)
++		mmcr0 |= pmcsel[pmc] << (MMCR0_PMC1SEL_SH - 7 * pmc);
++	for (; pmc < 8; ++pmc)
++		mmcr1 |= (u64)pmcsel[pmc] << (MMCR1_PMC3SEL_SH - 5 * (pmc - 2));
++	if (pmc_inuse & 1)
++		mmcr0 |= MMCR0_PMC1CE;
++	if (pmc_inuse & 0xfe)
++		mmcr0 |= MMCR0_PMCjCE;
++
++	mmcra |= 0x2000;	/* mark only one IOP per PPC instruction */
++
++	/* Return MMCRx values */
++	mmcr[0] = mmcr0;
++	mmcr[1] = mmcr1;
++	mmcr[2] = mmcra;
++	return 0;
++}
++
++static void p970_disable_pmc(unsigned int pmc, u64 mmcr[])
++{
++	int shift, i;
++
++	if (pmc <= 1) {
++		shift = MMCR0_PMC1SEL_SH - 7 * pmc;
++		i = 0;
++	} else {
++		shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
++		i = 1;
++	}
++	/*
++	 * Setting the PMCxSEL field to 0x08 disables PMC x.
++	 */
++	mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
++}
++
++static int ppc970_generic_events[] = {
++	[PERF_COUNT_CPU_CYCLES] = 7,
++	[PERF_COUNT_INSTRUCTIONS] = 1,
++	[PERF_COUNT_CACHE_REFERENCES] = 0x8810,		/* PM_LD_REF_L1 */
++	[PERF_COUNT_CACHE_MISSES] = 0x3810,		/* PM_LD_MISS_L1 */
++	[PERF_COUNT_BRANCH_INSTRUCTIONS] = 0x431,	/* PM_BR_ISSUED */
++	[PERF_COUNT_BRANCH_MISSES] = 0x327,		/* PM_GRP_BR_MPRED */
++};
++
++struct power_pmu ppc970_pmu = {
++	.n_counter = 8,
++	.max_alternatives = 2,
++	.add_fields = 0x001100005555ull,
++	.test_adder = 0x013300000000ull,
++	.compute_mmcr = p970_compute_mmcr,
++	.get_constraint = p970_get_constraint,
++	.get_alternatives = p970_get_alternatives,
++	.disable_pmc = p970_disable_pmc,
++	.n_generic = ARRAY_SIZE(ppc970_generic_events),
++	.generic_events = ppc970_generic_events,
++};
+Index: linux-2.6-tip/arch/powerpc/kernel/vmlinux.lds.S
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/vmlinux.lds.S
++++ linux-2.6-tip/arch/powerpc/kernel/vmlinux.lds.S
+@@ -181,13 +181,7 @@ SECTIONS
+ 		__initramfs_end = .;
+ 	}
+ #endif
+-	. = ALIGN(PAGE_SIZE);
+-	.data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
+-		__per_cpu_start = .;
+-		*(.data.percpu)
+-		*(.data.percpu.shared_aligned)
+-		__per_cpu_end = .;
+-	}
++	PERCPU(PAGE_SIZE)
+ 
+ 	. = ALIGN(8);
+ 	.machine.desc : AT(ADDR(.machine.desc) - LOAD_OFFSET) {
+Index: linux-2.6-tip/arch/powerpc/mm/fault.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/mm/fault.c
++++ linux-2.6-tip/arch/powerpc/mm/fault.c
+@@ -29,6 +29,7 @@
+ #include <linux/module.h>
+ #include <linux/kprobes.h>
+ #include <linux/kdebug.h>
++#include <linux/perf_counter.h>
+ 
+ #include <asm/firmware.h>
+ #include <asm/page.h>
+@@ -158,7 +159,7 @@ int __kprobes do_page_fault(struct pt_re
+ 	}
+ #endif /* !(CONFIG_4xx || CONFIG_BOOKE)*/
+ 
+-	if (in_atomic() || mm == NULL) {
++	if (in_atomic() || mm == NULL || current->pagefault_disabled) {
+ 		if (!user_mode(regs))
+ 			return SIGSEGV;
+ 		/* in_atomic() in user mode is really bad,
+@@ -170,6 +171,8 @@ int __kprobes do_page_fault(struct pt_re
+ 		die("Weird page fault", regs, SIGSEGV);
+ 	}
+ 
++	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
++
+ 	/* When running in the kernel we expect faults to occur only to
+ 	 * addresses in user space.  All other faults represent errors in the
+ 	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
+@@ -321,6 +324,7 @@ good_area:
+ 	}
+ 	if (ret & VM_FAULT_MAJOR) {
+ 		current->maj_flt++;
++		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
+ #ifdef CONFIG_PPC_SMLPAR
+ 		if (firmware_has_feature(FW_FEATURE_CMO)) {
+ 			preempt_disable();
+@@ -328,8 +332,10 @@ good_area:
+ 			preempt_enable();
+ 		}
+ #endif
+-	} else
++	} else {
+ 		current->min_flt++;
++		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
++	}
+ 	up_read(&mm->mmap_sem);
+ 	return 0;
+ 
+Index: linux-2.6-tip/arch/powerpc/platforms/Kconfig.cputype
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/Kconfig.cputype
++++ linux-2.6-tip/arch/powerpc/platforms/Kconfig.cputype
+@@ -1,6 +1,7 @@
+ config PPC64
+ 	bool "64-bit kernel"
+ 	default n
++	select HAVE_PERF_COUNTERS
+ 	help
+ 	  This option selects whether a 32-bit or a 64-bit kernel
+ 	  will be built.
+Index: linux-2.6-tip/arch/powerpc/platforms/cell/interrupt.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/interrupt.c
++++ linux-2.6-tip/arch/powerpc/platforms/cell/interrupt.c
+@@ -237,8 +237,6 @@ extern int noirqdebug;
+ 
+ static void handle_iic_irq(unsigned int irq, struct irq_desc *desc)
+ {
+-	const unsigned int cpu = smp_processor_id();
+-
+ 	spin_lock(&desc->lock);
+ 
+ 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+@@ -254,7 +252,7 @@ static void handle_iic_irq(unsigned int 
+ 		goto out_eoi;
+ 	}
+ 
+-	kstat_cpu(cpu).irqs[irq]++;
++	kstat_incr_irqs_this_cpu(irq, desc);
+ 
+ 	/* Mark the IRQ currently in progress.*/
+ 	desc->status |= IRQ_INPROGRESS;
+Index: linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sched.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/spufs/sched.c
++++ linux-2.6-tip/arch/powerpc/platforms/cell/spufs/sched.c
+@@ -508,7 +508,7 @@ static void __spu_add_to_rq(struct spu_c
+ 		list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
+ 		set_bit(ctx->prio, spu_prio->bitmap);
+ 		if (!spu_prio->nr_waiting++)
+-			__mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
++			mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
+ 	}
+ }
+ 
+Index: linux-2.6-tip/arch/powerpc/platforms/pseries/xics.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/pseries/xics.c
++++ linux-2.6-tip/arch/powerpc/platforms/pseries/xics.c
+@@ -153,9 +153,10 @@ static int get_irq_server(unsigned int v
+ {
+ 	int server;
+ 	/* For the moment only implement delivery to all cpus or one cpu */
+-	cpumask_t cpumask = irq_desc[virq].affinity;
++	cpumask_t cpumask;
+ 	cpumask_t tmp = CPU_MASK_NONE;
+ 
++	cpumask_copy(&cpumask, irq_desc[virq].affinity);
+ 	if (!distribute_irqs)
+ 		return default_server;
+ 
+@@ -869,7 +870,7 @@ void xics_migrate_irqs_away(void)
+ 		       virq, cpu);
+ 
+ 		/* Reset affinity to all cpus */
+-		irq_desc[virq].affinity = CPU_MASK_ALL;
++		cpumask_setall(irq_desc[virq].affinity);
+ 		desc->chip->set_affinity(virq, cpu_all_mask);
+ unlock:
+ 		spin_unlock_irqrestore(&desc->lock, flags);
+Index: linux-2.6-tip/arch/powerpc/sysdev/mpic.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/sysdev/mpic.c
++++ linux-2.6-tip/arch/powerpc/sysdev/mpic.c
+@@ -46,7 +46,7 @@
+ 
+ static struct mpic *mpics;
+ static struct mpic *mpic_primary;
+-static DEFINE_SPINLOCK(mpic_lock);
++static DEFINE_RAW_SPINLOCK(mpic_lock);
+ 
+ #ifdef CONFIG_PPC32	/* XXX for now */
+ #ifdef CONFIG_IRQ_ALL_CPUS
+@@ -566,9 +566,10 @@ static void __init mpic_scan_ht_pics(str
+ #ifdef CONFIG_SMP
+ static int irq_choose_cpu(unsigned int virt_irq)
+ {
+-	cpumask_t mask = irq_desc[virt_irq].affinity;
++	cpumask_t mask;
+ 	int cpuid;
+ 
++	cpumask_copy(&mask, irq_desc[virt_irq].affinity);
+ 	if (cpus_equal(mask, CPU_MASK_ALL)) {
+ 		static int irq_rover;
+ 		static DEFINE_SPINLOCK(irq_rover_lock);
+Index: linux-2.6-tip/arch/s390/include/asm/smp.h
+===================================================================
+--- linux-2.6-tip.orig/arch/s390/include/asm/smp.h
++++ linux-2.6-tip/arch/s390/include/asm/smp.h
+@@ -97,12 +97,6 @@ extern void arch_send_call_function_ipi(
+ #endif
+ 
+ #ifndef CONFIG_SMP
+-static inline void smp_send_stop(void)
+-{
+-	/* Disable all interrupts/machine checks */
+-	__load_psw_mask(psw_kernel_bits & ~PSW_MASK_MCHECK);
+-}
+-
+ #define hard_smp_processor_id()		0
+ #define smp_cpu_not_running(cpu)	1
+ #endif
+Index: linux-2.6-tip/arch/sh/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/sh/kernel/irq.c
++++ linux-2.6-tip/arch/sh/kernel/irq.c
+@@ -51,7 +51,7 @@ int show_interrupts(struct seq_file *p, 
+ 			goto unlock;
+ 		seq_printf(p, "%3d: ",i);
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ 		seq_printf(p, " %14s", irq_desc[i].chip->name);
+ 		seq_printf(p, "-%-8s", irq_desc[i].name);
+ 		seq_printf(p, "  %s", action->name);
+Index: linux-2.6-tip/arch/sparc/include/asm/mmzone.h
+===================================================================
+--- linux-2.6-tip.orig/arch/sparc/include/asm/mmzone.h
++++ linux-2.6-tip/arch/sparc/include/asm/mmzone.h
+@@ -3,6 +3,8 @@
+ 
+ #ifdef CONFIG_NEED_MULTIPLE_NODES
+ 
++#include <linux/cpumask.h>
++
+ extern struct pglist_data *node_data[];
+ 
+ #define NODE_DATA(nid)		(node_data[nid])
+Index: linux-2.6-tip/arch/sparc/kernel/irq_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/sparc/kernel/irq_64.c
++++ linux-2.6-tip/arch/sparc/kernel/irq_64.c
+@@ -185,7 +185,7 @@ int show_interrupts(struct seq_file *p, 
+ 		seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ #endif
+ 		seq_printf(p, " %9s", irq_desc[i].chip->typename);
+ 		seq_printf(p, "  %s", action->name);
+@@ -252,9 +252,10 @@ struct irq_handler_data {
+ #ifdef CONFIG_SMP
+ static int irq_choose_cpu(unsigned int virt_irq)
+ {
+-	cpumask_t mask = irq_desc[virt_irq].affinity;
++	cpumask_t mask;
+ 	int cpuid;
+ 
++	cpumask_copy(&mask, irq_desc[virt_irq].affinity);
+ 	if (cpus_equal(mask, CPU_MASK_ALL)) {
+ 		static int irq_rover;
+ 		static DEFINE_SPINLOCK(irq_rover_lock);
+@@ -805,7 +806,7 @@ void fixup_irqs(void)
+ 		    !(irq_desc[irq].status & IRQ_PER_CPU)) {
+ 			if (irq_desc[irq].chip->set_affinity)
+ 				irq_desc[irq].chip->set_affinity(irq,
+-					&irq_desc[irq].affinity);
++					irq_desc[irq].affinity);
+ 		}
+ 		spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
+ 	}
+Index: linux-2.6-tip/arch/sparc/kernel/time_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/sparc/kernel/time_64.c
++++ linux-2.6-tip/arch/sparc/kernel/time_64.c
+@@ -36,10 +36,10 @@
+ #include <linux/clocksource.h>
+ #include <linux/of_device.h>
+ #include <linux/platform_device.h>
++#include <linux/irq.h>
+ 
+ #include <asm/oplib.h>
+ #include <asm/timer.h>
+-#include <asm/irq.h>
+ #include <asm/io.h>
+ #include <asm/prom.h>
+ #include <asm/starfire.h>
+@@ -729,7 +729,7 @@ void timer_interrupt(int irq, struct pt_
+ 
+ 	irq_enter();
+ 
+-	kstat_this_cpu.irqs[0]++;
++	kstat_incr_irqs_this_cpu(0, irq_to_desc(0));
+ 
+ 	if (unlikely(!evt->event_handler)) {
+ 		printk(KERN_WARNING
+Index: linux-2.6-tip/arch/um/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/um/include/asm/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/arch/um/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/um/kernel/irq.c
++++ linux-2.6-tip/arch/um/kernel/irq.c
+@@ -42,7 +42,7 @@ int show_interrupts(struct seq_file *p, 
+ 		seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ #endif
+ 		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+ 		seq_printf(p, "  %s", action->name);
+Index: linux-2.6-tip/arch/x86/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/Kconfig
++++ linux-2.6-tip/arch/x86/Kconfig
+@@ -5,7 +5,7 @@ mainmenu "Linux Kernel Configuration for
+ config 64BIT
+ 	bool "64-bit kernel" if ARCH = "x86"
+ 	default ARCH = "x86_64"
+-	help
++	---help---
+ 	  Say yes to build a 64-bit kernel - formerly known as x86_64
+ 	  Say no to build a 32-bit kernel - formerly known as i386
+ 
+@@ -34,12 +34,19 @@ config X86
+ 	select HAVE_FUNCTION_TRACER
+ 	select HAVE_FUNCTION_GRAPH_TRACER
+ 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
+-	select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64)
+-	select HAVE_ARCH_KGDB if !X86_VOYAGER
++	select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE
++	select HAVE_FTRACE_SYSCALLS
++	select HAVE_KVM
++	select HAVE_ARCH_KGDB
+ 	select HAVE_ARCH_TRACEHOOK
+ 	select HAVE_GENERIC_DMA_COHERENT if X86_32
+ 	select HAVE_EFFICIENT_UNALIGNED_ACCESS
+ 	select USER_STACKTRACE_SUPPORT
++	select HAVE_KERNEL_GZIP
++	select HAVE_KERNEL_BZIP2
++	select HAVE_KERNEL_LZMA
++	select HAVE_ARCH_KMEMCHECK
++	select HAVE_DMA_API_DEBUG
+ 
+ config ARCH_DEFCONFIG
+ 	string
+@@ -108,10 +115,18 @@ config ARCH_MAY_HAVE_PC_FDC
+ 	def_bool y
+ 
+ config RWSEM_GENERIC_SPINLOCK
+-	def_bool !X86_XADD
++	bool
++	depends on !X86_XADD || PREEMPT_RT
++	default y
++
++config ASM_SEMAPHORES
++	bool
++	default y
+ 
+ config RWSEM_XCHGADD_ALGORITHM
+-	def_bool X86_XADD
++	bool
++	depends on X86_XADD && !RWSEM_GENERIC_SPINLOCK && !PREEMPT_RT
++	default y
+ 
+ config ARCH_HAS_CPU_IDLE_WAIT
+ 	def_bool y
+@@ -133,18 +148,19 @@ config ARCH_HAS_CACHE_LINE_SIZE
+ 	def_bool y
+ 
+ config HAVE_SETUP_PER_CPU_AREA
+-	def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER)
++	def_bool y
++
++config HAVE_DYNAMIC_PER_CPU_AREA
++	def_bool y
+ 
+ config HAVE_CPUMASK_OF_CPU_MAP
+ 	def_bool X86_64_SMP
+ 
+ config ARCH_HIBERNATION_POSSIBLE
+ 	def_bool y
+-	depends on !SMP || !X86_VOYAGER
+ 
+ config ARCH_SUSPEND_POSSIBLE
+ 	def_bool y
+-	depends on !X86_VOYAGER
+ 
+ config ZONE_DMA32
+ 	bool
+@@ -165,6 +181,9 @@ config GENERIC_HARDIRQS
+ 	bool
+ 	default y
+ 
++config GENERIC_HARDIRQS_NO__DO_IRQ
++       def_bool y
++
+ config GENERIC_IRQ_PROBE
+ 	bool
+ 	default y
+@@ -174,11 +193,6 @@ config GENERIC_PENDING_IRQ
+ 	depends on GENERIC_HARDIRQS && SMP
+ 	default y
+ 
+-config X86_SMP
+-	bool
+-	depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64)
+-	default y
+-
+ config USE_GENERIC_SMP_HELPERS
+ 	def_bool y
+ 	depends on SMP
+@@ -194,19 +208,17 @@ config X86_64_SMP
+ config X86_HT
+ 	bool
+ 	depends on SMP
+-	depends on (X86_32 && !X86_VOYAGER) || X86_64
+-	default y
+-
+-config X86_BIOS_REBOOT
+-	bool
+-	depends on !X86_VOYAGER
+ 	default y
+ 
+ config X86_TRAMPOLINE
+ 	bool
+-	depends on X86_SMP || (X86_VOYAGER && SMP) || (64BIT && ACPI_SLEEP)
++	depends on SMP || (64BIT && ACPI_SLEEP)
+ 	default y
+ 
++config X86_32_LAZY_GS
++	def_bool y
++	depends on X86_32 && !CC_STACKPROTECTOR
++
+ config KTIME_SCALAR
+ 	def_bool X86_32
+ source "init/Kconfig"
+@@ -244,14 +256,24 @@ config SMP
+ 
+ 	  If you don't know what to do here, say N.
+ 
+-config X86_HAS_BOOT_CPU_ID
+-	def_bool y
+-	depends on X86_VOYAGER
++config X86_X2APIC
++	bool "Support x2apic"
++	depends on X86_LOCAL_APIC && X86_64
++	---help---
++	  This enables x2apic support on CPUs that have this feature.
++
++	  This allows 32-bit apic IDs (so it can support very large systems),
++	  and accesses the local apic via MSRs not via mmio.
++
++	  ( On certain CPU models you may need to enable INTR_REMAP too,
++	    to get functional x2apic mode. )
++
++	  If you don't know what to do here, say N.
+ 
+ config SPARSE_IRQ
+ 	bool "Support sparse irq numbering"
+ 	depends on PCI_MSI || HT_IRQ
+-	help
++	---help---
+ 	  This enables support for sparse irqs. This is useful for distro
+ 	  kernels that want to define a high CONFIG_NR_CPUS value but still
+ 	  want to have low kernel memory footprint on smaller machines.
+@@ -265,114 +287,140 @@ config NUMA_MIGRATE_IRQ_DESC
+ 	bool "Move irq desc when changing irq smp_affinity"
+ 	depends on SPARSE_IRQ && NUMA
+ 	default n
+-	help
++	---help---
+ 	  This enables moving irq_desc to cpu/node that irq will use handled.
+ 
+ 	  If you don't know what to do here, say N.
+ 
+-config X86_FIND_SMP_CONFIG
+-	def_bool y
+-	depends on X86_MPPARSE || X86_VOYAGER
+-
+ config X86_MPPARSE
+ 	bool "Enable MPS table" if ACPI
+ 	default y
+ 	depends on X86_LOCAL_APIC
+-	help
++	---help---
+ 	  For old smp systems that do not have proper acpi support. Newer systems
+ 	  (esp with 64bit cpus) with acpi support, MADT and DSDT will override it
+ 
+-choice
+-	prompt "Subarchitecture Type"
+-	default X86_PC
++config X86_BIGSMP
++	bool "Support for big SMP systems with more than 8 CPUs"
++	depends on X86_32 && SMP
++	---help---
++	  This option is needed for the systems that have more than 8 CPUs
+ 
+-config X86_PC
+-	bool "PC-compatible"
+-	help
+-	  Choose this option if your computer is a standard PC or compatible.
++if X86_32
++config X86_EXTENDED_PLATFORM
++	bool "Support for extended (non-PC) x86 platforms"
++	default y
++	---help---
++	  If you disable this option then the kernel will only support
++	  standard PC platforms. (which covers the vast majority of
++	  systems out there.)
++
++	  If you enable this option then you'll be able to select support
++	  for the following (non-PC) 32 bit x86 platforms:
++		AMD Elan
++		NUMAQ (IBM/Sequent)
++		RDC R-321x SoC
++		SGI 320/540 (Visual Workstation)
++		Summit/EXA (IBM x440)
++		Unisys ES7000 IA32 series
++
++	  If you have one of these systems, or if you want to build a
++	  generic distribution kernel, say Y here - otherwise say N.
++endif
++
++if X86_64
++config X86_EXTENDED_PLATFORM
++	bool "Support for extended (non-PC) x86 platforms"
++	default y
++	---help---
++	  If you disable this option then the kernel will only support
++	  standard PC platforms. (which covers the vast majority of
++	  systems out there.)
++
++	  If you enable this option then you'll be able to select support
++	  for the following (non-PC) 64 bit x86 platforms:
++		ScaleMP vSMP
++		SGI Ultraviolet
++
++	  If you have one of these systems, or if you want to build a
++	  generic distribution kernel, say Y here - otherwise say N.
++endif
++# This is an alphabetically sorted list of 64 bit extended platforms
++# Please maintain the alphabetic order if and when there are additions
++
++config X86_VSMP
++	bool "ScaleMP vSMP"
++	select PARAVIRT
++	depends on X86_64 && PCI
++	depends on X86_EXTENDED_PLATFORM
++	---help---
++	  Support for ScaleMP vSMP systems.  Say 'Y' here if this kernel is
++	  supposed to run on these EM64T-based machines.  Only choose this option
++	  if you have one of these machines.
++
++config X86_UV
++	bool "SGI Ultraviolet"
++	depends on X86_64
++	depends on X86_EXTENDED_PLATFORM
++	select X86_X2APIC
++	---help---
++	  This option is needed in order to support SGI Ultraviolet systems.
++	  If you don't have one of these, you should say N here.
++
++# Following is an alphabetically sorted list of 32 bit extended platforms
++# Please maintain the alphabetic order if and when there are additions
+ 
+ config X86_ELAN
+ 	bool "AMD Elan"
+ 	depends on X86_32
+-	help
++	depends on X86_EXTENDED_PLATFORM
++	---help---
+ 	  Select this for an AMD Elan processor.
+ 
+ 	  Do not use this option for K6/Athlon/Opteron processors!
+ 
+ 	  If unsure, choose "PC-compatible" instead.
+ 
+-config X86_VOYAGER
+-	bool "Voyager (NCR)"
+-	depends on X86_32 && (SMP || BROKEN) && !PCI
+-	help
+-	  Voyager is an MCA-based 32-way capable SMP architecture proprietary
+-	  to NCR Corp.  Machine classes 345x/35xx/4100/51xx are Voyager-based.
+-
+-	  *** WARNING ***
+-
+-	  If you do not specifically know you have a Voyager based machine,
+-	  say N here, otherwise the kernel you build will not be bootable.
+-
+-config X86_GENERICARCH
+-       bool "Generic architecture"
++config X86_RDC321X
++	bool "RDC R-321x SoC"
+ 	depends on X86_32
+-       help
+-          This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
++	depends on X86_EXTENDED_PLATFORM
++	select M486
++	select X86_REBOOTFIXUPS
++	---help---
++	  This option is needed for RDC R-321x system-on-chip, also known
++	  as R-8610-(G).
++	  If you don't have one of these chips, you should say N here.
++
++config X86_32_NON_STANDARD
++	bool "Support non-standard 32-bit SMP architectures"
++	depends on X86_32 && SMP
++	depends on X86_EXTENDED_PLATFORM
++	---help---
++	  This option compiles in the NUMAQ, Summit, bigsmp, ES7000, default
+ 	  subarchitectures.  It is intended for a generic binary kernel.
+ 	  if you select them all, kernel will probe it one by one. and will
+ 	  fallback to default.
+ 
+-if X86_GENERICARCH
++# Alphabetically sorted list of Non standard 32 bit platforms
+ 
+ config X86_NUMAQ
+ 	bool "NUMAQ (IBM/Sequent)"
+-	depends on SMP && X86_32 && PCI && X86_MPPARSE
++	depends on X86_32_NON_STANDARD
+ 	select NUMA
+-	help
++	select X86_MPPARSE
++	---help---
+ 	  This option is used for getting Linux to run on a NUMAQ (IBM/Sequent)
+ 	  NUMA multiquad box. This changes the way that processors are
+ 	  bootstrapped, and uses Clustered Logical APIC addressing mode instead
+ 	  of Flat Logical.  You will need a new lynxer.elf file to flash your
+ 	  firmware with - send email to <Martin.Bligh@us.ibm.com>.
+ 
+-config X86_SUMMIT
+-	bool "Summit/EXA (IBM x440)"
+-	depends on X86_32 && SMP
+-	help
+-	  This option is needed for IBM systems that use the Summit/EXA chipset.
+-	  In particular, it is needed for the x440.
+-
+-config X86_ES7000
+-	bool "Support for Unisys ES7000 IA32 series"
+-	depends on X86_32 && SMP
+-	help
+-	  Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
+-	  supposed to run on an IA32-based Unisys ES7000 system.
+-
+-config X86_BIGSMP
+-	bool "Support for big SMP systems with more than 8 CPUs"
+-	depends on X86_32 && SMP
+-	help
+-	  This option is needed for the systems that have more than 8 CPUs
+-	  and if the system is not of any sub-arch type above.
+-
+-endif
+-
+-config X86_VSMP
+-	bool "Support for ScaleMP vSMP"
+-	select PARAVIRT
+-	depends on X86_64 && PCI
+-	help
+-	  Support for ScaleMP vSMP systems.  Say 'Y' here if this kernel is
+-	  supposed to run on these EM64T-based machines.  Only choose this option
+-	  if you have one of these machines.
+-
+-endchoice
+-
+ config X86_VISWS
+ 	bool "SGI 320/540 (Visual Workstation)"
+-	depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT
+-	help
++	depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT
++	depends on X86_32_NON_STANDARD
++	---help---
+ 	  The SGI Visual Workstation series is an IA32-based workstation
+ 	  based on SGI systems chips with some legacy PC hardware attached.
+ 
+@@ -381,21 +429,25 @@ config X86_VISWS
+ 	  A kernel compiled for the Visual Workstation will run on general
+ 	  PCs as well. See <file:Documentation/sgi-visws.txt> for details.
+ 
+-config X86_RDC321X
+-	bool "RDC R-321x SoC"
+-	depends on X86_32
+-	select M486
+-	select X86_REBOOTFIXUPS
+-	help
+-	  This option is needed for RDC R-321x system-on-chip, also known
+-	  as R-8610-(G).
+-	  If you don't have one of these chips, you should say N here.
++config X86_SUMMIT
++	bool "Summit/EXA (IBM x440)"
++	depends on X86_32_NON_STANDARD
++	---help---
++	  This option is needed for IBM systems that use the Summit/EXA chipset.
++	  In particular, it is needed for the x440.
++
++config X86_ES7000
++	bool "Unisys ES7000 IA32 series"
++	depends on X86_32_NON_STANDARD && X86_BIGSMP
++	---help---
++	  Support for Unisys ES7000 systems.  Say 'Y' here if this kernel is
++	  supposed to run on an IA32-based Unisys ES7000 system.
+ 
+ config SCHED_OMIT_FRAME_POINTER
+ 	def_bool y
+ 	prompt "Single-depth WCHAN output"
+ 	depends on X86
+-	help
++	---help---
+ 	  Calculate simpler /proc/<PID>/wchan values. If this option
+ 	  is disabled then wchan values will recurse back to the
+ 	  caller function. This provides more accurate wchan values,
+@@ -405,7 +457,7 @@ config SCHED_OMIT_FRAME_POINTER
+ 
+ menuconfig PARAVIRT_GUEST
+ 	bool "Paravirtualized guest support"
+-	help
++	---help---
+ 	  Say Y here to get to see options related to running Linux under
+ 	  various hypervisors.  This option alone does not add any kernel code.
+ 
+@@ -419,8 +471,7 @@ config VMI
+ 	bool "VMI Guest support"
+ 	select PARAVIRT
+ 	depends on X86_32
+-	depends on !X86_VOYAGER
+-	help
++	---help---
+ 	  VMI provides a paravirtualized interface to the VMware ESX server
+ 	  (it could be used by other hypervisors in theory too, but is not
+ 	  at the moment), by linking the kernel to a GPL-ed ROM module
+@@ -430,8 +481,7 @@ config KVM_CLOCK
+ 	bool "KVM paravirtualized clock"
+ 	select PARAVIRT
+ 	select PARAVIRT_CLOCK
+-	depends on !X86_VOYAGER
+-	help
++	---help---
+ 	  Turning on this option will allow you to run a paravirtualized clock
+ 	  when running over the KVM hypervisor. Instead of relying on a PIT
+ 	  (or probably other) emulation by the underlying device model, the host
+@@ -441,17 +491,15 @@ config KVM_CLOCK
+ config KVM_GUEST
+ 	bool "KVM Guest support"
+ 	select PARAVIRT
+-	depends on !X86_VOYAGER
+-	help
+-	 This option enables various optimizations for running under the KVM
+-	 hypervisor.
++	---help---
++	  This option enables various optimizations for running under the KVM
++	  hypervisor.
+ 
+ source "arch/x86/lguest/Kconfig"
+ 
+ config PARAVIRT
+ 	bool "Enable paravirtualization code"
+-	depends on !X86_VOYAGER
+-	help
++	---help---
+ 	  This changes the kernel so it can modify itself when it is run
+ 	  under a hypervisor, potentially improving performance significantly
+ 	  over full virtualization.  However, when run without a hypervisor
+@@ -464,51 +512,51 @@ config PARAVIRT_CLOCK
+ endif
+ 
+ config PARAVIRT_DEBUG
+-       bool "paravirt-ops debugging"
+-       depends on PARAVIRT && DEBUG_KERNEL
+-       help
+-         Enable to debug paravirt_ops internals.  Specifically, BUG if
+-	 a paravirt_op is missing when it is called.
++	bool "paravirt-ops debugging"
++	depends on PARAVIRT && DEBUG_KERNEL
++	---help---
++	  Enable to debug paravirt_ops internals.  Specifically, BUG if
++	  a paravirt_op is missing when it is called.
+ 
+ config MEMTEST
+ 	bool "Memtest"
+-	help
++	---help---
+ 	  This option adds a kernel parameter 'memtest', which allows memtest
+ 	  to be set.
+-		memtest=0, mean disabled; -- default
+-		memtest=1, mean do 1 test pattern;
+-		...
+-		memtest=4, mean do 4 test patterns.
++	        memtest=0, mean disabled; -- default
++	        memtest=1, mean do 1 test pattern;
++	        ...
++	        memtest=4, mean do 4 test patterns.
+ 	  If you are unsure how to answer this question, answer N.
+ 
+ config X86_SUMMIT_NUMA
+ 	def_bool y
+-	depends on X86_32 && NUMA && X86_GENERICARCH
++	depends on X86_32 && NUMA && X86_32_NON_STANDARD
+ 
+ config X86_CYCLONE_TIMER
+ 	def_bool y
+-	depends on X86_GENERICARCH
++	depends on X86_32_NON_STANDARD
+ 
+ source "arch/x86/Kconfig.cpu"
+ 
+ config HPET_TIMER
+ 	def_bool X86_64
+ 	prompt "HPET Timer Support" if X86_32
+-	help
+-         Use the IA-PC HPET (High Precision Event Timer) to manage
+-         time in preference to the PIT and RTC, if a HPET is
+-         present.
+-         HPET is the next generation timer replacing legacy 8254s.
+-         The HPET provides a stable time base on SMP
+-         systems, unlike the TSC, but it is more expensive to access,
+-         as it is off-chip.  You can find the HPET spec at
+-         <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>.
+-
+-         You can safely choose Y here.  However, HPET will only be
+-         activated if the platform and the BIOS support this feature.
+-         Otherwise the 8254 will be used for timing services.
++	---help---
++	  Use the IA-PC HPET (High Precision Event Timer) to manage
++	  time in preference to the PIT and RTC, if a HPET is
++	  present.
++	  HPET is the next generation timer replacing legacy 8254s.
++	  The HPET provides a stable time base on SMP
++	  systems, unlike the TSC, but it is more expensive to access,
++	  as it is off-chip.  You can find the HPET spec at
++	  <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>.
++
++	  You can safely choose Y here.  However, HPET will only be
++	  activated if the platform and the BIOS support this feature.
++	  Otherwise the 8254 will be used for timing services.
+ 
+-         Choose N to continue using the legacy 8254 timer.
++	  Choose N to continue using the legacy 8254 timer.
+ 
+ config HPET_EMULATE_RTC
+ 	def_bool y
+@@ -519,7 +567,7 @@ config HPET_EMULATE_RTC
+ config DMI
+ 	default y
+ 	bool "Enable DMI scanning" if EMBEDDED
+-	help
++	---help---
+ 	  Enabled scanning of DMI to identify machine quirks. Say Y
+ 	  here unless you have verified that your setup is not
+ 	  affected by entries in the DMI blacklist. Required by PNP
+@@ -531,7 +579,7 @@ config GART_IOMMU
+ 	select SWIOTLB
+ 	select AGP
+ 	depends on X86_64 && PCI
+-	help
++	---help---
+ 	  Support for full DMA access of devices with 32bit memory access only
+ 	  on systems with more than 3GB. This is usually needed for USB,
+ 	  sound, many IDE/SATA chipsets and some other devices.
+@@ -546,7 +594,7 @@ config CALGARY_IOMMU
+ 	bool "IBM Calgary IOMMU support"
+ 	select SWIOTLB
+ 	depends on X86_64 && PCI && EXPERIMENTAL
+-	help
++	---help---
+ 	  Support for hardware IOMMUs in IBM's xSeries x366 and x460
+ 	  systems. Needed to run systems with more than 3GB of memory
+ 	  properly with 32-bit PCI devices that do not support DAC
+@@ -564,7 +612,7 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT
+ 	def_bool y
+ 	prompt "Should Calgary be enabled by default?"
+ 	depends on CALGARY_IOMMU
+-	help
++	---help---
+ 	  Should Calgary be enabled by default? if you choose 'y', Calgary
+ 	  will be used (if it exists). If you choose 'n', Calgary will not be
+ 	  used even if it exists. If you choose 'n' and would like to use
+@@ -576,7 +624,7 @@ config AMD_IOMMU
+ 	select SWIOTLB
+ 	select PCI_MSI
+ 	depends on X86_64 && PCI && ACPI
+-	help
++	---help---
+ 	  With this option you can enable support for AMD IOMMU hardware in
+ 	  your system. An IOMMU is a hardware component which provides
+ 	  remapping of DMA memory accesses from devices. With an AMD IOMMU you
+@@ -591,7 +639,7 @@ config AMD_IOMMU_STATS
+ 	bool "Export AMD IOMMU statistics to debugfs"
+ 	depends on AMD_IOMMU
+ 	select DEBUG_FS
+-	help
++	---help---
+ 	  This option enables code in the AMD IOMMU driver to collect various
+ 	  statistics about whats happening in the driver and exports that
+ 	  information to userspace via debugfs.
+@@ -600,7 +648,7 @@ config AMD_IOMMU_STATS
+ # need this always selected by IOMMU for the VIA workaround
+ config SWIOTLB
+ 	def_bool y if X86_64
+-	help
++	---help---
+ 	  Support for software bounce buffers used on x86-64 systems
+ 	  which don't have a hardware IOMMU (e.g. the current generation
+ 	  of Intel's x86-64 CPUs). Using this PCI devices which can only
+@@ -615,10 +663,10 @@ config IOMMU_API
+ 
+ config MAXSMP
+ 	bool "Configure Maximum number of SMP Processors and NUMA Nodes"
+-	depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
++	depends on 0 && X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL
+ 	select CPUMASK_OFFSTACK
+ 	default n
+-	help
++	---help---
+ 	  Configure maximum number of CPUS and NUMA Nodes for this architecture.
+ 	  If unsure, say N.
+ 
+@@ -629,7 +677,7 @@ config NR_CPUS
+ 	default "4096" if MAXSMP
+ 	default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000)
+ 	default "8" if SMP
+-	help
++	---help---
+ 	  This allows you to specify the maximum number of CPUs which this
+ 	  kernel will support.  The maximum supported value is 512 and the
+ 	  minimum value which makes sense is 2.
+@@ -640,7 +688,7 @@ config NR_CPUS
+ config SCHED_SMT
+ 	bool "SMT (Hyperthreading) scheduler support"
+ 	depends on X86_HT
+-	help
++	---help---
+ 	  SMT scheduler support improves the CPU scheduler's decision making
+ 	  when dealing with Intel Pentium 4 chips with HyperThreading at a
+ 	  cost of slightly increased overhead in some places. If unsure say
+@@ -650,7 +698,7 @@ config SCHED_MC
+ 	def_bool y
+ 	prompt "Multi-core scheduler support"
+ 	depends on X86_HT
+-	help
++	---help---
+ 	  Multi-core scheduler support improves the CPU scheduler's decision
+ 	  making when dealing with multi-core CPU chips at a cost of slightly
+ 	  increased overhead in some places. If unsure say N here.
+@@ -659,8 +707,8 @@ source "kernel/Kconfig.preempt"
+ 
+ config X86_UP_APIC
+ 	bool "Local APIC support on uniprocessors"
+-	depends on X86_32 && !SMP && !(X86_VOYAGER || X86_GENERICARCH)
+-	help
++	depends on X86_32 && !SMP && !X86_32_NON_STANDARD
++	---help---
+ 	  A local APIC (Advanced Programmable Interrupt Controller) is an
+ 	  integrated interrupt controller in the CPU. If you have a single-CPU
+ 	  system which has a processor with a local APIC, you can say Y here to
+@@ -673,7 +721,7 @@ config X86_UP_APIC
+ config X86_UP_IOAPIC
+ 	bool "IO-APIC support on uniprocessors"
+ 	depends on X86_UP_APIC
+-	help
++	---help---
+ 	  An IO-APIC (I/O Advanced Programmable Interrupt Controller) is an
+ 	  SMP-capable replacement for PC-style interrupt controllers. Most
+ 	  SMP systems and many recent uniprocessor systems have one.
+@@ -684,11 +732,12 @@ config X86_UP_IOAPIC
+ 
+ config X86_LOCAL_APIC
+ 	def_bool y
+-	depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
++	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
++	select HAVE_PERF_COUNTERS if (!M386 && !M486)
+ 
+ config X86_IO_APIC
+ 	def_bool y
+-	depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH))
++	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC
+ 
+ config X86_VISWS_APIC
+ 	def_bool y
+@@ -698,7 +747,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
+ 	bool "Reroute for broken boot IRQs"
+ 	default n
+ 	depends on X86_IO_APIC
+-	help
++	---help---
+ 	  This option enables a workaround that fixes a source of
+ 	  spurious interrupts. This is recommended when threaded
+ 	  interrupt handling is used on systems where the generation of
+@@ -720,7 +769,6 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
+ 
+ config X86_MCE
+ 	bool "Machine Check Exception"
+-	depends on !X86_VOYAGER
+ 	---help---
+ 	  Machine Check Exception support allows the processor to notify the
+ 	  kernel if it detects a problem (e.g. overheating, component failure).
+@@ -739,7 +787,7 @@ config X86_MCE_INTEL
+ 	def_bool y
+ 	prompt "Intel MCE features"
+ 	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
+-	help
++	---help---
+ 	   Additional support for intel specific MCE features such as
+ 	   the thermal monitor.
+ 
+@@ -747,14 +795,19 @@ config X86_MCE_AMD
+ 	def_bool y
+ 	prompt "AMD MCE features"
+ 	depends on X86_64 && X86_MCE && X86_LOCAL_APIC
+-	help
++	---help---
+ 	   Additional support for AMD specific MCE features such as
+ 	   the DRAM Error Threshold.
+ 
++config X86_MCE_THRESHOLD
++	depends on X86_MCE_AMD || X86_MCE_INTEL
++	bool
++	default y
++
+ config X86_MCE_NONFATAL
+ 	tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4"
+ 	depends on X86_32 && X86_MCE
+-	help
++	---help---
+ 	  Enabling this feature starts a timer that triggers every 5 seconds which
+ 	  will look at the machine check registers to see if anything happened.
+ 	  Non-fatal problems automatically get corrected (but still logged).
+@@ -767,7 +820,7 @@ config X86_MCE_NONFATAL
+ config X86_MCE_P4THERMAL
+ 	bool "check for P4 thermal throttling interrupt."
+ 	depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP)
+-	help
++	---help---
+ 	  Enabling this feature will cause a message to be printed when the P4
+ 	  enters thermal throttling.
+ 
+@@ -775,11 +828,11 @@ config VM86
+ 	bool "Enable VM86 support" if EMBEDDED
+ 	default y
+ 	depends on X86_32
+-	help
+-          This option is required by programs like DOSEMU to run 16-bit legacy
++	---help---
++	  This option is required by programs like DOSEMU to run 16-bit legacy
+ 	  code on X86 processors. It also may be needed by software like
+-          XFree86 to initialize some video cards via BIOS. Disabling this
+-          option saves about 6k.
++	  XFree86 to initialize some video cards via BIOS. Disabling this
++	  option saves about 6k.
+ 
+ config TOSHIBA
+ 	tristate "Toshiba Laptop support"
+@@ -853,33 +906,33 @@ config MICROCODE
+ 	  module will be called microcode.
+ 
+ config MICROCODE_INTEL
+-       bool "Intel microcode patch loading support"
+-       depends on MICROCODE
+-       default MICROCODE
+-       select FW_LOADER
+-       --help---
+-         This options enables microcode patch loading support for Intel
+-         processors.
+-
+-         For latest news and information on obtaining all the required
+-         Intel ingredients for this driver, check:
+-         <http://www.urbanmyth.org/microcode/>.
++	bool "Intel microcode patch loading support"
++	depends on MICROCODE
++	default MICROCODE
++	select FW_LOADER
++	---help---
++	  This options enables microcode patch loading support for Intel
++	  processors.
++
++	  For latest news and information on obtaining all the required
++	  Intel ingredients for this driver, check:
++	  <http://www.urbanmyth.org/microcode/>.
+ 
+ config MICROCODE_AMD
+-       bool "AMD microcode patch loading support"
+-       depends on MICROCODE
+-       select FW_LOADER
+-       --help---
+-         If you select this option, microcode patch loading support for AMD
+-	 processors will be enabled.
++	bool "AMD microcode patch loading support"
++	depends on MICROCODE
++	select FW_LOADER
++	---help---
++	  If you select this option, microcode patch loading support for AMD
++	  processors will be enabled.
+ 
+-   config MICROCODE_OLD_INTERFACE
++config MICROCODE_OLD_INTERFACE
+ 	def_bool y
+ 	depends on MICROCODE
+ 
+ config X86_MSR
+ 	tristate "/dev/cpu/*/msr - Model-specific register support"
+-	help
++	---help---
+ 	  This device gives privileged processes access to the x86
+ 	  Model-Specific Registers (MSRs).  It is a character device with
+ 	  major 202 and minors 0 to 31 for /dev/cpu/0/msr to /dev/cpu/31/msr.
+@@ -888,12 +941,18 @@ config X86_MSR
+ 
+ config X86_CPUID
+ 	tristate "/dev/cpu/*/cpuid - CPU information support"
+-	help
++	---help---
+ 	  This device gives processes access to the x86 CPUID instruction to
+ 	  be executed on a specific processor.  It is a character device
+ 	  with major 203 and minors 0 to 31 for /dev/cpu/0/cpuid to
+ 	  /dev/cpu/31/cpuid.
+ 
++config X86_CPU_DEBUG
++	tristate "/sys/kernel/debug/x86/cpu/* - CPU Debug support"
++	---help---
++	  If you select this option, this will provide various x86 CPUs
++	  information through debugfs.
++
+ choice
+ 	prompt "High Memory Support"
+ 	default HIGHMEM4G if !X86_NUMAQ
+@@ -940,7 +999,7 @@ config NOHIGHMEM
+ config HIGHMEM4G
+ 	bool "4GB"
+ 	depends on !X86_NUMAQ
+-	help
++	---help---
+ 	  Select this if you have a 32-bit processor and between 1 and 4
+ 	  gigabytes of physical RAM.
+ 
+@@ -948,7 +1007,7 @@ config HIGHMEM64G
+ 	bool "64GB"
+ 	depends on !M386 && !M486
+ 	select X86_PAE
+-	help
++	---help---
+ 	  Select this if you have a 32-bit processor and more than 4
+ 	  gigabytes of physical RAM.
+ 
+@@ -959,7 +1018,7 @@ choice
+ 	prompt "Memory split" if EMBEDDED
+ 	default VMSPLIT_3G
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select the desired split between kernel and user memory.
+ 
+ 	  If the address range available to the kernel is less than the
+@@ -1005,20 +1064,20 @@ config HIGHMEM
+ config X86_PAE
+ 	bool "PAE (Physical Address Extension) Support"
+ 	depends on X86_32 && !HIGHMEM4G
+-	help
++	---help---
+ 	  PAE is required for NX support, and furthermore enables
+ 	  larger swapspace support for non-overcommit purposes. It
+ 	  has the cost of more pagetable lookup overhead, and also
+ 	  consumes more pagetable space per process.
+ 
+ config ARCH_PHYS_ADDR_T_64BIT
+-       def_bool X86_64 || X86_PAE
++	def_bool X86_64 || X86_PAE
+ 
+ config DIRECT_GBPAGES
+ 	bool "Enable 1GB pages for kernel pagetables" if EMBEDDED
+ 	default y
+ 	depends on X86_64
+-	help
++	---help---
+ 	  Allow the kernel linear mapping to use 1GB pages on CPUs that
+ 	  support it. This can improve the kernel's performance a tiny bit by
+ 	  reducing TLB pressure. If in doubt, say "Y".
+@@ -1028,9 +1087,8 @@ config NUMA
+ 	bool "Numa Memory Allocation and Scheduler Support"
+ 	depends on SMP
+ 	depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL)
+-	default n if X86_PC
+ 	default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP)
+-	help
++	---help---
+ 	  Enable NUMA (Non Uniform Memory Access) support.
+ 
+ 	  The kernel will try to allocate memory used by a CPU on the
+@@ -1053,19 +1111,19 @@ config K8_NUMA
+ 	def_bool y
+ 	prompt "Old style AMD Opteron NUMA detection"
+ 	depends on X86_64 && NUMA && PCI
+-	help
+-	 Enable K8 NUMA node topology detection.  You should say Y here if
+-	 you have a multi processor AMD K8 system. This uses an old
+-	 method to read the NUMA configuration directly from the builtin
+-	 Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
+-	 instead, which also takes priority if both are compiled in.
++	---help---
++	  Enable K8 NUMA node topology detection.  You should say Y here if
++	  you have a multi processor AMD K8 system. This uses an old
++	  method to read the NUMA configuration directly from the builtin
++	  Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
++	  instead, which also takes priority if both are compiled in.
+ 
+ config X86_64_ACPI_NUMA
+ 	def_bool y
+ 	prompt "ACPI NUMA detection"
+ 	depends on X86_64 && NUMA && ACPI && PCI
+ 	select ACPI_NUMA
+-	help
++	---help---
+ 	  Enable ACPI SRAT based node topology detection.
+ 
+ # Some NUMA nodes have memory ranges that span
+@@ -1080,24 +1138,24 @@ config NODES_SPAN_OTHER_NODES
+ config NUMA_EMU
+ 	bool "NUMA emulation"
+ 	depends on X86_64 && NUMA
+-	help
++	---help---
+ 	  Enable NUMA emulation. A flat machine will be split
+ 	  into virtual nodes when booted with "numa=fake=N", where N is the
+ 	  number of nodes. This is only useful for debugging.
+ 
+ config NODES_SHIFT
+ 	int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP
+-	range 1 9   if X86_64
++	range 1 9
+ 	default "9" if MAXSMP
+ 	default "6" if X86_64
+ 	default "4" if X86_NUMAQ
+ 	default "3"
+ 	depends on NEED_MULTIPLE_NODES
+-	help
++	---help---
+ 	  Specify the maximum number of NUMA Nodes available on the target
+ 	  system.  Increases memory reserved to accomodate various tables.
+ 
+-config HAVE_ARCH_BOOTMEM_NODE
++config HAVE_ARCH_BOOTMEM
+ 	def_bool y
+ 	depends on X86_32 && NUMA
+ 
+@@ -1131,7 +1189,7 @@ config ARCH_SPARSEMEM_DEFAULT
+ 
+ config ARCH_SPARSEMEM_ENABLE
+ 	def_bool y
+-	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_PC) || X86_GENERICARCH
++	depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD
+ 	select SPARSEMEM_STATIC if X86_32
+ 	select SPARSEMEM_VMEMMAP_ENABLE if X86_64
+ 
+@@ -1143,66 +1201,71 @@ config ARCH_MEMORY_PROBE
+ 	def_bool X86_64
+ 	depends on MEMORY_HOTPLUG
+ 
++config ILLEGAL_POINTER_VALUE
++       hex
++       default 0 if X86_32
++       default 0xdead000000000000 if X86_64
++
+ source "mm/Kconfig"
+ 
+ config HIGHPTE
+ 	bool "Allocate 3rd-level pagetables from highmem"
+ 	depends on X86_32 && (HIGHMEM4G || HIGHMEM64G)
+-	help
++	---help---
+ 	  The VM uses one page table entry for each page of physical memory.
+ 	  For systems with a lot of RAM, this can be wasteful of precious
+ 	  low memory.  Setting this option will put user-space page table
+ 	  entries in high memory.
+ 
+ config X86_CHECK_BIOS_CORRUPTION
+-        bool "Check for low memory corruption"
+-	help
+-	 Periodically check for memory corruption in low memory, which
+-	 is suspected to be caused by BIOS.  Even when enabled in the
+-	 configuration, it is disabled at runtime.  Enable it by
+-	 setting "memory_corruption_check=1" on the kernel command
+-	 line.  By default it scans the low 64k of memory every 60
+-	 seconds; see the memory_corruption_check_size and
+-	 memory_corruption_check_period parameters in
+-	 Documentation/kernel-parameters.txt to adjust this.
+-
+-	 When enabled with the default parameters, this option has
+-	 almost no overhead, as it reserves a relatively small amount
+-	 of memory and scans it infrequently.  It both detects corruption
+-	 and prevents it from affecting the running system.
+-
+-	 It is, however, intended as a diagnostic tool; if repeatable
+-	 BIOS-originated corruption always affects the same memory,
+-	 you can use memmap= to prevent the kernel from using that
+-	 memory.
++	bool "Check for low memory corruption"
++	---help---
++	  Periodically check for memory corruption in low memory, which
++	  is suspected to be caused by BIOS.  Even when enabled in the
++	  configuration, it is disabled at runtime.  Enable it by
++	  setting "memory_corruption_check=1" on the kernel command
++	  line.  By default it scans the low 64k of memory every 60
++	  seconds; see the memory_corruption_check_size and
++	  memory_corruption_check_period parameters in
++	  Documentation/kernel-parameters.txt to adjust this.
++
++	  When enabled with the default parameters, this option has
++	  almost no overhead, as it reserves a relatively small amount
++	  of memory and scans it infrequently.  It both detects corruption
++	  and prevents it from affecting the running system.
++
++	  It is, however, intended as a diagnostic tool; if repeatable
++	  BIOS-originated corruption always affects the same memory,
++	  you can use memmap= to prevent the kernel from using that
++	  memory.
+ 
+ config X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
+-        bool "Set the default setting of memory_corruption_check"
++	bool "Set the default setting of memory_corruption_check"
+ 	depends on X86_CHECK_BIOS_CORRUPTION
+ 	default y
+-	help
+-	 Set whether the default state of memory_corruption_check is
+-	 on or off.
++	---help---
++	  Set whether the default state of memory_corruption_check is
++	  on or off.
+ 
+ config X86_RESERVE_LOW_64K
+-        bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen"
++	bool "Reserve low 64K of RAM on AMI/Phoenix BIOSen"
+ 	default y
+-	help
+-	 Reserve the first 64K of physical RAM on BIOSes that are known
+-	 to potentially corrupt that memory range. A numbers of BIOSes are
+-	 known to utilize this area during suspend/resume, so it must not
+-	 be used by the kernel.
+-
+-	 Set this to N if you are absolutely sure that you trust the BIOS
+-	 to get all its memory reservations and usages right.
+-
+-	 If you have doubts about the BIOS (e.g. suspend/resume does not
+-	 work or there's kernel crashes after certain hardware hotplug
+-	 events) and it's not AMI or Phoenix, then you might want to enable
+-	 X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical
+-	 corruption patterns.
++	---help---
++	  Reserve the first 64K of physical RAM on BIOSes that are known
++	  to potentially corrupt that memory range. A numbers of BIOSes are
++	  known to utilize this area during suspend/resume, so it must not
++	  be used by the kernel.
++
++	  Set this to N if you are absolutely sure that you trust the BIOS
++	  to get all its memory reservations and usages right.
++
++	  If you have doubts about the BIOS (e.g. suspend/resume does not
++	  work or there's kernel crashes after certain hardware hotplug
++	  events) and it's not AMI or Phoenix, then you might want to enable
++	  X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check typical
++	  corruption patterns.
+ 
+-	 Say Y if unsure.
++	  Say Y if unsure.
+ 
+ config MATH_EMULATION
+ 	bool
+@@ -1268,7 +1331,7 @@ config MTRR_SANITIZER
+ 	def_bool y
+ 	prompt "MTRR cleanup support"
+ 	depends on MTRR
+-	help
++	---help---
+ 	  Convert MTRR layout from continuous to discrete, so X drivers can
+ 	  add writeback entries.
+ 
+@@ -1283,7 +1346,7 @@ config MTRR_SANITIZER_ENABLE_DEFAULT
+ 	range 0 1
+ 	default "0"
+ 	depends on MTRR_SANITIZER
+-	help
++	---help---
+ 	  Enable mtrr cleanup default value
+ 
+ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT
+@@ -1291,7 +1354,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAU
+ 	range 0 7
+ 	default "1"
+ 	depends on MTRR_SANITIZER
+-	help
++	---help---
+ 	  mtrr cleanup spare entries default, it can be changed via
+ 	  mtrr_spare_reg_nr=N on the kernel command line.
+ 
+@@ -1299,7 +1362,7 @@ config X86_PAT
+ 	bool
+ 	prompt "x86 PAT support"
+ 	depends on MTRR
+-	help
++	---help---
+ 	  Use PAT attributes to setup page level cache control.
+ 
+ 	  PATs are the modern equivalents of MTRRs and are much more
+@@ -1314,20 +1377,20 @@ config EFI
+ 	bool "EFI runtime service support"
+ 	depends on ACPI
+ 	---help---
+-	This enables the kernel to use EFI runtime services that are
+-	available (such as the EFI variable services).
++	  This enables the kernel to use EFI runtime services that are
++	  available (such as the EFI variable services).
+ 
+-	This option is only useful on systems that have EFI firmware.
+-  	In addition, you should use the latest ELILO loader available
+-  	at <http://elilo.sourceforge.net> in order to take advantage
+-  	of EFI runtime services. However, even with this option, the
+-  	resultant kernel should continue to boot on existing non-EFI
+-  	platforms.
++	  This option is only useful on systems that have EFI firmware.
++	  In addition, you should use the latest ELILO loader available
++	  at <http://elilo.sourceforge.net> in order to take advantage
++	  of EFI runtime services. However, even with this option, the
++	  resultant kernel should continue to boot on existing non-EFI
++	  platforms.
+ 
+ config SECCOMP
+ 	def_bool y
+ 	prompt "Enable seccomp to safely compute untrusted bytecode"
+-	help
++	---help---
+ 	  This kernel feature is useful for number crunching applications
+ 	  that may need to compute untrusted bytecode during their
+ 	  execution. By using pipes or other transports made available to
+@@ -1340,13 +1403,16 @@ config SECCOMP
+ 
+ 	  If unsure, say Y. Only embedded should say N here.
+ 
++config CC_STACKPROTECTOR_ALL
++	bool
++
+ config CC_STACKPROTECTOR
+ 	bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
+-	depends on X86_64 && EXPERIMENTAL && BROKEN
+-	help
+-         This option turns on the -fstack-protector GCC feature. This
+-	  feature puts, at the beginning of critical functions, a canary
+-	  value on the stack just before the return address, and validates
++	select CC_STACKPROTECTOR_ALL
++	---help---
++	  This option turns on the -fstack-protector GCC feature. This
++	  feature puts, at the beginning of functions, a canary value on
++	  the stack just before the return address, and validates
+ 	  the value just before actually returning.  Stack based buffer
+ 	  overflows (that need to overwrite this return address) now also
+ 	  overwrite the canary, which gets detected and the attack is then
+@@ -1354,22 +1420,14 @@ config CC_STACKPROTECTOR
+ 
+ 	  This feature requires gcc version 4.2 or above, or a distribution
+ 	  gcc with the feature backported. Older versions are automatically
+-	  detected and for those versions, this configuration option is ignored.
+-
+-config CC_STACKPROTECTOR_ALL
+-	bool "Use stack-protector for all functions"
+-	depends on CC_STACKPROTECTOR
+-	help
+-	  Normally, GCC only inserts the canary value protection for
+-	  functions that use large-ish on-stack buffers. By enabling
+-	  this option, GCC will be asked to do this for ALL functions.
++	  detected and for those versions, this configuration option is
++	  ignored. (and a warning is printed during bootup)
+ 
+ source kernel/Kconfig.hz
+ 
+ config KEXEC
+ 	bool "kexec system call"
+-	depends on X86_BIOS_REBOOT
+-	help
++	---help---
+ 	  kexec is a system call that implements the ability to shutdown your
+ 	  current kernel, and to start another kernel.  It is like a reboot
+ 	  but it is independent of the system firmware.   And like a reboot
+@@ -1386,7 +1444,7 @@ config KEXEC
+ config CRASH_DUMP
+ 	bool "kernel crash dumps"
+ 	depends on X86_64 || (X86_32 && HIGHMEM)
+-	help
++	---help---
+ 	  Generate crash dump after being started by kexec.
+ 	  This should be normally only set in special crash dump kernels
+ 	  which are loaded in the main kernel with kexec-tools into
+@@ -1400,8 +1458,8 @@ config CRASH_DUMP
+ config KEXEC_JUMP
+ 	bool "kexec jump (EXPERIMENTAL)"
+ 	depends on EXPERIMENTAL
+-	depends on KEXEC && HIBERNATION && X86_32
+-	help
++	depends on KEXEC && HIBERNATION
++	---help---
+ 	  Jump between original kernel and kexeced kernel and invoke
+ 	  code in physical address mode via KEXEC
+ 
+@@ -1410,7 +1468,7 @@ config PHYSICAL_START
+ 	default "0x1000000" if X86_NUMAQ
+ 	default "0x200000" if X86_64
+ 	default "0x100000"
+-	help
++	---help---
+ 	  This gives the physical address where the kernel is loaded.
+ 
+ 	  If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
+@@ -1451,7 +1509,7 @@ config PHYSICAL_START
+ config RELOCATABLE
+ 	bool "Build a relocatable kernel (EXPERIMENTAL)"
+ 	depends on EXPERIMENTAL
+-	help
++	---help---
+ 	  This builds a kernel image that retains relocation information
+ 	  so it can be loaded someplace besides the default 1MB.
+ 	  The relocations tend to make the kernel binary about 10% larger,
+@@ -1471,7 +1529,7 @@ config PHYSICAL_ALIGN
+ 	default "0x100000" if X86_32
+ 	default "0x200000" if X86_64
+ 	range 0x2000 0x400000
+-	help
++	---help---
+ 	  This value puts the alignment restrictions on physical address
+ 	  where kernel is loaded and run from. Kernel is compiled for an
+ 	  address which meets above alignment restriction.
+@@ -1492,7 +1550,7 @@ config PHYSICAL_ALIGN
+ 
+ config HOTPLUG_CPU
+ 	bool "Support for hot-pluggable CPUs"
+-	depends on SMP && HOTPLUG && !X86_VOYAGER
++	depends on SMP && HOTPLUG
+ 	---help---
+ 	  Say Y here to allow turning CPUs off and on. CPUs can be
+ 	  controlled through /sys/devices/system/cpu.
+@@ -1504,7 +1562,7 @@ config COMPAT_VDSO
+ 	def_bool y
+ 	prompt "Compat VDSO support"
+ 	depends on X86_32 || IA32_EMULATION
+-	help
++	---help---
+ 	  Map the 32-bit VDSO to the predictable old-style address too.
+ 	---help---
+ 	  Say N here if you are running a sufficiently recent glibc
+@@ -1516,7 +1574,7 @@ config COMPAT_VDSO
+ config CMDLINE_BOOL
+ 	bool "Built-in kernel command line"
+ 	default n
+-	help
++	---help---
+ 	  Allow for specifying boot arguments to the kernel at
+ 	  build time.  On some systems (e.g. embedded ones), it is
+ 	  necessary or convenient to provide some or all of the
+@@ -1534,7 +1592,7 @@ config CMDLINE
+ 	string "Built-in kernel command string"
+ 	depends on CMDLINE_BOOL
+ 	default ""
+-	help
++	---help---
+ 	  Enter arguments here that should be compiled into the kernel
+ 	  image and used at boot time.  If the boot loader provides a
+ 	  command line at boot time, it is appended to this string to
+@@ -1551,7 +1609,7 @@ config CMDLINE_OVERRIDE
+ 	bool "Built-in command line overrides boot loader arguments"
+ 	default n
+ 	depends on CMDLINE_BOOL
+-	help
++	---help---
+ 	  Set this option to 'Y' to have the kernel ignore the boot loader
+ 	  command line, and use ONLY the built-in command line.
+ 
+@@ -1572,8 +1630,11 @@ config HAVE_ARCH_EARLY_PFN_TO_NID
+ 	def_bool X86_64
+ 	depends on NUMA
+ 
++config HARDIRQS_SW_RESEND
++	bool
++	default y
++
+ menu "Power management and ACPI options"
+-	depends on !X86_VOYAGER
+ 
+ config ARCH_HIBERNATION_HEADER
+ 	def_bool y
+@@ -1651,7 +1712,7 @@ if APM
+ 
+ config APM_IGNORE_USER_SUSPEND
+ 	bool "Ignore USER SUSPEND"
+-	help
++	---help---
+ 	  This option will ignore USER SUSPEND requests. On machines with a
+ 	  compliant APM BIOS, you want to say N. However, on the NEC Versa M
+ 	  series notebooks, it is necessary to say Y because of a BIOS bug.
+@@ -1675,7 +1736,7 @@ config APM_DO_ENABLE
+ 
+ config APM_CPU_IDLE
+ 	bool "Make CPU Idle calls when idle"
+-	help
++	---help---
+ 	  Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop.
+ 	  On some machines, this can activate improved power savings, such as
+ 	  a slowed CPU clock rate, when the machine is idle. These idle calls
+@@ -1686,7 +1747,7 @@ config APM_CPU_IDLE
+ 
+ config APM_DISPLAY_BLANK
+ 	bool "Enable console blanking using APM"
+-	help
++	---help---
+ 	  Enable console blanking using the APM. Some laptops can use this to
+ 	  turn off the LCD backlight when the screen blanker of the Linux
+ 	  virtual console blanks the screen. Note that this is only used by
+@@ -1699,7 +1760,7 @@ config APM_DISPLAY_BLANK
+ 
+ config APM_ALLOW_INTS
+ 	bool "Allow interrupts during APM BIOS calls"
+-	help
++	---help---
+ 	  Normally we disable external interrupts while we are making calls to
+ 	  the APM BIOS as a measure to lessen the effects of a badly behaving
+ 	  BIOS implementation.  The BIOS should reenable interrupts if it
+@@ -1724,7 +1785,7 @@ config PCI
+ 	bool "PCI support"
+ 	default y
+ 	select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
+-	help
++	---help---
+ 	  Find out whether you have a PCI motherboard. PCI is the name of a
+ 	  bus system, i.e. the way the CPU talks to the other stuff inside
+ 	  your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
+@@ -1795,7 +1856,7 @@ config PCI_MMCONFIG
+ config DMAR
+ 	bool "Support for DMA Remapping Devices (EXPERIMENTAL)"
+ 	depends on X86_64 && PCI_MSI && ACPI && EXPERIMENTAL
+-	help
++	---help---
+ 	  DMA remapping (DMAR) devices support enables independent address
+ 	  translations for Direct Memory Access (DMA) from devices.
+ 	  These DMA remapping devices are reported via ACPI tables
+@@ -1817,29 +1878,30 @@ config DMAR_GFX_WA
+ 	def_bool y
+ 	prompt "Support for Graphics workaround"
+ 	depends on DMAR
+-	help
+-	 Current Graphics drivers tend to use physical address
+-	 for DMA and avoid using DMA APIs. Setting this config
+-	 option permits the IOMMU driver to set a unity map for
+-	 all the OS-visible memory. Hence the driver can continue
+-	 to use physical addresses for DMA.
++	---help---
++	  Current Graphics drivers tend to use physical address
++	  for DMA and avoid using DMA APIs. Setting this config
++	  option permits the IOMMU driver to set a unity map for
++	  all the OS-visible memory. Hence the driver can continue
++	  to use physical addresses for DMA.
+ 
+ config DMAR_FLOPPY_WA
+ 	def_bool y
+ 	depends on DMAR
+-	help
+-	 Floppy disk drivers are know to bypass DMA API calls
+-	 thereby failing to work when IOMMU is enabled. This
+-	 workaround will setup a 1:1 mapping for the first
+-	 16M to make floppy (an ISA device) work.
++	---help---
++	  Floppy disk drivers are know to bypass DMA API calls
++	  thereby failing to work when IOMMU is enabled. This
++	  workaround will setup a 1:1 mapping for the first
++	  16M to make floppy (an ISA device) work.
+ 
+ config INTR_REMAP
+ 	bool "Support for Interrupt Remapping (EXPERIMENTAL)"
+ 	depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
+-	help
+-	 Supports Interrupt remapping for IO-APIC and MSI devices.
+-	 To use x2apic mode in the CPU's which support x2APIC enhancements or
+-	 to support platforms with CPU's having > 8 bit APIC ID, say Y.
++	select X86_X2APIC
++	---help---
++	  Supports Interrupt remapping for IO-APIC and MSI devices.
++	  To use x2apic mode in the CPU's which support x2APIC enhancements or
++	  to support platforms with CPU's having > 8 bit APIC ID, say Y.
+ 
+ source "drivers/pci/pcie/Kconfig"
+ 
+@@ -1853,8 +1915,7 @@ if X86_32
+ 
+ config ISA
+ 	bool "ISA support"
+-	depends on !X86_VOYAGER
+-	help
++	---help---
+ 	  Find out whether you have ISA slots on your motherboard.  ISA is the
+ 	  name of a bus system, i.e. the way the CPU talks to the other stuff
+ 	  inside your box.  Other bus systems are PCI, EISA, MicroChannel
+@@ -1880,9 +1941,8 @@ config EISA
+ source "drivers/eisa/Kconfig"
+ 
+ config MCA
+-	bool "MCA support" if !X86_VOYAGER
+-	default y if X86_VOYAGER
+-	help
++	bool "MCA support"
++	---help---
+ 	  MicroChannel Architecture is found in some IBM PS/2 machines and
+ 	  laptops.  It is a bus system similar to PCI or ISA. See
+ 	  <file:Documentation/mca.txt> (and especially the web page given
+@@ -1892,8 +1952,7 @@ source "drivers/mca/Kconfig"
+ 
+ config SCx200
+ 	tristate "NatSemi SCx200 support"
+-	depends on !X86_VOYAGER
+-	help
++	---help---
+ 	  This provides basic support for National Semiconductor's
+ 	  (now AMD's) Geode processors.  The driver probes for the
+ 	  PCI-IDs of several on-chip devices, so its a good dependency
+@@ -1905,7 +1964,7 @@ config SCx200HR_TIMER
+ 	tristate "NatSemi SCx200 27MHz High-Resolution Timer Support"
+ 	depends on SCx200 && GENERIC_TIME
+ 	default y
+-	help
++	---help---
+ 	  This driver provides a clocksource built upon the on-chip
+ 	  27MHz high-resolution timer.  Its also a workaround for
+ 	  NSC Geode SC-1100's buggy TSC, which loses time when the
+@@ -1916,7 +1975,7 @@ config GEODE_MFGPT_TIMER
+ 	def_bool y
+ 	prompt "Geode Multi-Function General Purpose Timer (MFGPT) events"
+ 	depends on MGEODE_LX && GENERIC_TIME && GENERIC_CLOCKEVENTS
+-	help
++	---help---
+ 	  This driver provides a clock event source based on the MFGPT
+ 	  timer(s) in the CS5535 and CS5536 companion chip for the geode.
+ 	  MFGPTs have a better resolution and max interval than the
+@@ -1925,7 +1984,7 @@ config GEODE_MFGPT_TIMER
+ config OLPC
+ 	bool "One Laptop Per Child support"
+ 	default n
+-	help
++	---help---
+ 	  Add support for detecting the unique features of the OLPC
+ 	  XO hardware.
+ 
+@@ -1950,16 +2009,16 @@ config IA32_EMULATION
+ 	bool "IA32 Emulation"
+ 	depends on X86_64
+ 	select COMPAT_BINFMT_ELF
+-	help
++	---help---
+ 	  Include code to run 32-bit programs under a 64-bit kernel. You should
+ 	  likely turn this on, unless you're 100% sure that you don't have any
+ 	  32-bit programs left.
+ 
+ config IA32_AOUT
+-       tristate "IA32 a.out support"
+-       depends on IA32_EMULATION
+-       help
+-         Support old a.out binaries in the 32bit emulation.
++	tristate "IA32 a.out support"
++	depends on IA32_EMULATION
++	---help---
++	  Support old a.out binaries in the 32bit emulation.
+ 
+ config COMPAT
+ 	def_bool y
+Index: linux-2.6-tip/arch/x86/Kconfig.cpu
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/Kconfig.cpu
++++ linux-2.6-tip/arch/x86/Kconfig.cpu
+@@ -50,7 +50,7 @@ config M386
+ config M486
+ 	bool "486"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for a 486 series processor, either Intel or one of the
+ 	  compatible processors from AMD, Cyrix, IBM, or Intel.  Includes DX,
+ 	  DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
+@@ -59,7 +59,7 @@ config M486
+ config M586
+ 	bool "586/K5/5x86/6x86/6x86MX"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for an 586 or 686 series processor such as the AMD K5,
+ 	  the Cyrix 5x86, 6x86 and 6x86MX.  This choice does not
+ 	  assume the RDTSC (Read Time Stamp Counter) instruction.
+@@ -67,21 +67,21 @@ config M586
+ config M586TSC
+ 	bool "Pentium-Classic"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for a Pentium Classic processor with the RDTSC (Read
+ 	  Time Stamp Counter) instruction for benchmarking.
+ 
+ config M586MMX
+ 	bool "Pentium-MMX"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for a Pentium with the MMX graphics/multimedia
+ 	  extended instructions.
+ 
+ config M686
+ 	bool "Pentium-Pro"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for Intel Pentium Pro chips.  This enables the use of
+ 	  Pentium Pro extended instructions, and disables the init-time guard
+ 	  against the f00f bug found in earlier Pentiums.
+@@ -89,7 +89,7 @@ config M686
+ config MPENTIUMII
+ 	bool "Pentium-II/Celeron(pre-Coppermine)"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for Intel chips based on the Pentium-II and
+ 	  pre-Coppermine Celeron core.  This option enables an unaligned
+ 	  copy optimization, compiles the kernel with optimization flags
+@@ -99,7 +99,7 @@ config MPENTIUMII
+ config MPENTIUMIII
+ 	bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for Intel chips based on the Pentium-III and
+ 	  Celeron-Coppermine core.  This option enables use of some
+ 	  extended prefetch instructions in addition to the Pentium II
+@@ -108,14 +108,14 @@ config MPENTIUMIII
+ config MPENTIUMM
+ 	bool "Pentium M"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for Intel Pentium M (not Pentium-4 M)
+ 	  notebook chips.
+ 
+ config MPENTIUM4
+ 	bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/older Xeon"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for Intel Pentium 4 chips.  This includes the
+ 	  Pentium 4, Pentium D, P4-based Celeron and Xeon, and
+ 	  Pentium-4 M (not Pentium M) chips.  This option enables compile
+@@ -151,7 +151,7 @@ config MPENTIUM4
+ config MK6
+ 	bool "K6/K6-II/K6-III"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for an AMD K6-family processor.  Enables use of
+ 	  some extended instructions, and passes appropriate optimization
+ 	  flags to GCC.
+@@ -159,14 +159,14 @@ config MK6
+ config MK7
+ 	bool "Athlon/Duron/K7"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for an AMD Athlon K7-family processor.  Enables use of
+ 	  some extended instructions, and passes appropriate optimization
+ 	  flags to GCC.
+ 
+ config MK8
+ 	bool "Opteron/Athlon64/Hammer/K8"
+-	help
++	---help---
+ 	  Select this for an AMD Opteron or Athlon64 Hammer-family processor.
+ 	  Enables use of some extended instructions, and passes appropriate
+ 	  optimization flags to GCC.
+@@ -174,7 +174,7 @@ config MK8
+ config MCRUSOE
+ 	bool "Crusoe"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for a Transmeta Crusoe processor.  Treats the processor
+ 	  like a 586 with TSC, and sets some GCC optimization flags (like a
+ 	  Pentium Pro with no alignment requirements).
+@@ -182,13 +182,13 @@ config MCRUSOE
+ config MEFFICEON
+ 	bool "Efficeon"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for a Transmeta Efficeon processor.
+ 
+ config MWINCHIPC6
+ 	bool "Winchip-C6"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for an IDT Winchip C6 chip.  Linux and GCC
+ 	  treat this chip as a 586TSC with some extended instructions
+ 	  and alignment requirements.
+@@ -196,7 +196,7 @@ config MWINCHIPC6
+ config MWINCHIP3D
+ 	bool "Winchip-2/Winchip-2A/Winchip-3"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for an IDT Winchip-2, 2A or 3.  Linux and GCC
+ 	  treat this chip as a 586TSC with some extended instructions
+ 	  and alignment requirements.  Also enable out of order memory
+@@ -206,19 +206,19 @@ config MWINCHIP3D
+ config MGEODEGX1
+ 	bool "GeodeGX1"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for a Geode GX1 (Cyrix MediaGX) chip.
+ 
+ config MGEODE_LX
+ 	bool "Geode GX/LX"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for AMD Geode GX and LX processors.
+ 
+ config MCYRIXIII
+ 	bool "CyrixIII/VIA-C3"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for a Cyrix III or C3 chip.  Presently Linux and GCC
+ 	  treat this chip as a generic 586. Whilst the CPU is 686 class,
+ 	  it lacks the cmov extension which gcc assumes is present when
+@@ -230,7 +230,7 @@ config MCYRIXIII
+ config MVIAC3_2
+ 	bool "VIA C3-2 (Nehemiah)"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for a VIA C3 "Nehemiah". Selecting this enables usage
+ 	  of SSE and tells gcc to treat the CPU as a 686.
+ 	  Note, this kernel will not boot on older (pre model 9) C3s.
+@@ -238,14 +238,14 @@ config MVIAC3_2
+ config MVIAC7
+ 	bool "VIA C7"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Select this for a VIA C7.  Selecting this uses the correct cache
+ 	  shift and tells gcc to treat the CPU as a 686.
+ 
+ config MPSC
+ 	bool "Intel P4 / older Netburst based Xeon"
+ 	depends on X86_64
+-	help
++	---help---
+ 	  Optimize for Intel Pentium 4, Pentium D and older Nocona/Dempsey
+ 	  Xeon CPUs with Intel 64bit which is compatible with x86-64.
+ 	  Note that the latest Xeons (Xeon 51xx and 53xx) are not based on the
+@@ -255,7 +255,7 @@ config MPSC
+ 
+ config MCORE2
+ 	bool "Core 2/newer Xeon"
+-	help
++	---help---
+ 
+ 	  Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and
+ 	  53xx) CPUs. You can distinguish newer from older Xeons by the CPU
+@@ -265,7 +265,7 @@ config MCORE2
+ config GENERIC_CPU
+ 	bool "Generic-x86-64"
+ 	depends on X86_64
+-	help
++	---help---
+ 	  Generic x86-64 CPU.
+ 	  Run equally well on all x86-64 CPUs.
+ 
+@@ -274,7 +274,7 @@ endchoice
+ config X86_GENERIC
+ 	bool "Generic x86 support"
+ 	depends on X86_32
+-	help
++	---help---
+ 	  Instead of just including optimizations for the selected
+ 	  x86 variant (e.g. PII, Crusoe or Athlon), include some more
+ 	  generic optimizations as well. This will make the kernel
+@@ -294,25 +294,23 @@ config X86_CPU
+ # Define implied options from the CPU selection here
+ config X86_L1_CACHE_BYTES
+ 	int
+-	default "128" if GENERIC_CPU || MPSC
+-	default "64" if MK8 || MCORE2
+-	depends on X86_64
++	default "128" if MPSC
++	default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32
+ 
+ config X86_INTERNODE_CACHE_BYTES
+ 	int
+ 	default "4096" if X86_VSMP
+ 	default X86_L1_CACHE_BYTES if !X86_VSMP
+-	depends on X86_64
+ 
+ config X86_CMPXCHG
+ 	def_bool X86_64 || (X86_32 && !M386)
+ 
+ config X86_L1_CACHE_SHIFT
+ 	int
+-	default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC
++	default "7" if MPENTIUM4 || MPSC
+ 	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
+ 	default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
+-	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7
++	default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
+ 
+ config X86_XADD
+ 	def_bool y
+@@ -321,7 +319,7 @@ config X86_XADD
+ config X86_PPRO_FENCE
+ 	bool "PentiumPro memory ordering errata workaround"
+ 	depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
+-	help
++	---help---
+ 	  Old PentiumPro multiprocessor systems had errata that could cause
+ 	  memory operations to violate the x86 ordering standard in rare cases.
+ 	  Enabling this option will attempt to work around some (but not all)
+@@ -414,14 +412,14 @@ config X86_DEBUGCTLMSR
+ 
+ menuconfig PROCESSOR_SELECT
+ 	bool "Supported processor vendors" if EMBEDDED
+-	help
++	---help---
+ 	  This lets you choose what x86 vendor support code your kernel
+ 	  will include.
+ 
+ config CPU_SUP_INTEL
+ 	default y
+ 	bool "Support Intel processors" if PROCESSOR_SELECT
+-	help
++	---help---
+ 	  This enables detection, tunings and quirks for Intel processors
+ 
+ 	  You need this enabled if you want your kernel to run on an
+@@ -435,7 +433,7 @@ config CPU_SUP_CYRIX_32
+ 	default y
+ 	bool "Support Cyrix processors" if PROCESSOR_SELECT
+ 	depends on !64BIT
+-	help
++	---help---
+ 	  This enables detection, tunings and quirks for Cyrix processors
+ 
+ 	  You need this enabled if you want your kernel to run on a
+@@ -448,7 +446,7 @@ config CPU_SUP_CYRIX_32
+ config CPU_SUP_AMD
+ 	default y
+ 	bool "Support AMD processors" if PROCESSOR_SELECT
+-	help
++	---help---
+ 	  This enables detection, tunings and quirks for AMD processors
+ 
+ 	  You need this enabled if you want your kernel to run on an
+@@ -458,25 +456,10 @@ config CPU_SUP_AMD
+ 
+ 	  If unsure, say N.
+ 
+-config CPU_SUP_CENTAUR_32
+-	default y
+-	bool "Support Centaur processors" if PROCESSOR_SELECT
+-	depends on !64BIT
+-	help
+-	  This enables detection, tunings and quirks for Centaur processors
+-
+-	  You need this enabled if you want your kernel to run on a
+-	  Centaur CPU. Disabling this option on other types of CPUs
+-	  makes the kernel a tiny bit smaller. Disabling it on a Centaur
+-	  CPU might render the kernel unbootable.
+-
+-	  If unsure, say N.
+-
+-config CPU_SUP_CENTAUR_64
++config CPU_SUP_CENTAUR
+ 	default y
+ 	bool "Support Centaur processors" if PROCESSOR_SELECT
+-	depends on 64BIT
+-	help
++	---help---
+ 	  This enables detection, tunings and quirks for Centaur processors
+ 
+ 	  You need this enabled if you want your kernel to run on a
+@@ -490,7 +473,7 @@ config CPU_SUP_TRANSMETA_32
+ 	default y
+ 	bool "Support Transmeta processors" if PROCESSOR_SELECT
+ 	depends on !64BIT
+-	help
++	---help---
+ 	  This enables detection, tunings and quirks for Transmeta processors
+ 
+ 	  You need this enabled if you want your kernel to run on a
+@@ -504,7 +487,7 @@ config CPU_SUP_UMC_32
+ 	default y
+ 	bool "Support UMC processors" if PROCESSOR_SELECT
+ 	depends on !64BIT
+-	help
++	---help---
+ 	  This enables detection, tunings and quirks for UMC processors
+ 
+ 	  You need this enabled if you want your kernel to run on a
+@@ -523,8 +506,7 @@ config X86_PTRACE_BTS
+ 	bool "Branch Trace Store"
+ 	default y
+ 	depends on X86_DEBUGCTLMSR
+-	depends on BROKEN
+-	help
++	---help---
+ 	  This adds a ptrace interface to the hardware's branch trace store.
+ 
+ 	  Debuggers may use it to collect an execution trace of the debugged
+Index: linux-2.6-tip/arch/x86/Kconfig.debug
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/Kconfig.debug
++++ linux-2.6-tip/arch/x86/Kconfig.debug
+@@ -7,7 +7,7 @@ source "lib/Kconfig.debug"
+ 
+ config STRICT_DEVMEM
+ 	bool "Filter access to /dev/mem"
+-	help
++	---help---
+ 	  If this option is disabled, you allow userspace (root) access to all
+ 	  of memory, including kernel and userspace memory. Accidental
+ 	  access to this is obviously disastrous, but specific access can
+@@ -25,7 +25,7 @@ config STRICT_DEVMEM
+ config X86_VERBOSE_BOOTUP
+ 	bool "Enable verbose x86 bootup info messages"
+ 	default y
+-	help
++	---help---
+ 	  Enables the informational output from the decompression stage
+ 	  (e.g. bzImage) of the boot. If you disable this you will still
+ 	  see errors. Disable this if you want silent bootup.
+@@ -33,7 +33,7 @@ config X86_VERBOSE_BOOTUP
+ config EARLY_PRINTK
+ 	bool "Early printk" if EMBEDDED
+ 	default y
+-	help
++	---help---
+ 	  Write kernel log output directly into the VGA buffer or to a serial
+ 	  port.
+ 
+@@ -47,7 +47,7 @@ config EARLY_PRINTK_DBGP
+ 	bool "Early printk via EHCI debug port"
+ 	default n
+ 	depends on EARLY_PRINTK && PCI
+-	help
++	---help---
+ 	  Write kernel log output directly into the EHCI debug port.
+ 
+ 	  This is useful for kernel debugging when your machine crashes very
+@@ -59,14 +59,14 @@ config EARLY_PRINTK_DBGP
+ config DEBUG_STACKOVERFLOW
+ 	bool "Check for stack overflows"
+ 	depends on DEBUG_KERNEL
+-	help
++	---help---
+ 	  This option will cause messages to be printed if free stack space
+ 	  drops below a certain limit.
+ 
+ config DEBUG_STACK_USAGE
+ 	bool "Stack utilization instrumentation"
+ 	depends on DEBUG_KERNEL
+-	help
++	---help---
+ 	  Enables the display of the minimum amount of free stack which each
+ 	  task has ever had available in the sysrq-T and sysrq-P debug output.
+ 
+@@ -75,7 +75,8 @@ config DEBUG_STACK_USAGE
+ config DEBUG_PAGEALLOC
+ 	bool "Debug page memory allocations"
+ 	depends on DEBUG_KERNEL
+-	help
++	depends on !KMEMCHECK
++	---help---
+ 	  Unmap pages from the kernel linear mapping after free_pages().
+ 	  This results in a large slowdown, but helps to find certain types
+ 	  of memory corruptions.
+@@ -83,9 +84,10 @@ config DEBUG_PAGEALLOC
+ config DEBUG_PER_CPU_MAPS
+ 	bool "Debug access to per_cpu maps"
+ 	depends on DEBUG_KERNEL
+-	depends on X86_SMP
++	depends on SMP
++	depends on !PREEMPT_RT
+ 	default n
+-	help
++	---help---
+ 	  Say Y to verify that the per_cpu map being accessed has
+ 	  been setup.  Adds a fair amount of code to kernel memory
+ 	  and decreases performance.
+@@ -96,7 +98,7 @@ config X86_PTDUMP
+ 	bool "Export kernel pagetable layout to userspace via debugfs"
+ 	depends on DEBUG_KERNEL
+ 	select DEBUG_FS
+-	help
++	---help---
+ 	  Say Y here if you want to show the kernel pagetable layout in a
+ 	  debugfs file. This information is only useful for kernel developers
+ 	  who are working in architecture specific areas of the kernel.
+@@ -108,7 +110,7 @@ config DEBUG_RODATA
+ 	bool "Write protect kernel read-only data structures"
+ 	default y
+ 	depends on DEBUG_KERNEL
+-	help
++	---help---
+ 	  Mark the kernel read-only data as write-protected in the pagetables,
+ 	  in order to catch accidental (and incorrect) writes to such const
+ 	  data. This is recommended so that we can catch kernel bugs sooner.
+@@ -117,7 +119,8 @@ config DEBUG_RODATA
+ config DEBUG_RODATA_TEST
+ 	bool "Testcase for the DEBUG_RODATA feature"
+ 	depends on DEBUG_RODATA
+-	help
++	default y
++	---help---
+ 	  This option enables a testcase for the DEBUG_RODATA
+ 	  feature as well as for the change_page_attr() infrastructure.
+ 	  If in doubt, say "N"
+@@ -125,7 +128,7 @@ config DEBUG_RODATA_TEST
+ config DEBUG_NX_TEST
+ 	tristate "Testcase for the NX non-executable stack feature"
+ 	depends on DEBUG_KERNEL && m
+-	help
++	---help---
+ 	  This option enables a testcase for the CPU NX capability
+ 	  and the software setup of this feature.
+ 	  If in doubt, say "N"
+@@ -133,7 +136,8 @@ config DEBUG_NX_TEST
+ config 4KSTACKS
+ 	bool "Use 4Kb for kernel stacks instead of 8Kb"
+ 	depends on X86_32
+-	help
++	default y
++	---help---
+ 	  If you say Y here the kernel will use a 4Kb stacksize for the
+ 	  kernel stack attached to each process/thread. This facilitates
+ 	  running more threads on a system and also reduces the pressure
+@@ -144,7 +148,7 @@ config DOUBLEFAULT
+ 	default y
+ 	bool "Enable doublefault exception handler" if EMBEDDED
+ 	depends on X86_32
+-	help
++	---help---
+ 	  This option allows trapping of rare doublefault exceptions that
+ 	  would otherwise cause a system to silently reboot. Disabling this
+ 	  option saves about 4k and might cause you much additional grey
+@@ -154,7 +158,7 @@ config IOMMU_DEBUG
+ 	bool "Enable IOMMU debugging"
+ 	depends on GART_IOMMU && DEBUG_KERNEL
+ 	depends on X86_64
+-	help
++	---help---
+ 	  Force the IOMMU to on even when you have less than 4GB of
+ 	  memory and add debugging code. On overflow always panic. And
+ 	  allow to enable IOMMU leak tracing. Can be disabled at boot
+@@ -170,7 +174,7 @@ config IOMMU_LEAK
+ 	bool "IOMMU leak tracing"
+ 	depends on DEBUG_KERNEL
+ 	depends on IOMMU_DEBUG
+-	help
++	---help---
+ 	  Add a simple leak tracer to the IOMMU code. This is useful when you
+ 	  are debugging a buggy device driver that leaks IOMMU mappings.
+ 
+@@ -203,25 +207,25 @@ choice
+ 
+ config IO_DELAY_0X80
+ 	bool "port 0x80 based port-IO delay [recommended]"
+-	help
++	---help---
+ 	  This is the traditional Linux IO delay used for in/out_p.
+ 	  It is the most tested hence safest selection here.
+ 
+ config IO_DELAY_0XED
+ 	bool "port 0xed based port-IO delay"
+-	help
++	---help---
+ 	  Use port 0xed as the IO delay. This frees up port 0x80 which is
+ 	  often used as a hardware-debug port.
+ 
+ config IO_DELAY_UDELAY
+ 	bool "udelay based port-IO delay"
+-	help
++	---help---
+ 	  Use udelay(2) as the IO delay method. This provides the delay
+ 	  while not having any side-effect on the IO port space.
+ 
+ config IO_DELAY_NONE
+ 	bool "no port-IO delay"
+-	help
++	---help---
+ 	  No port-IO delay. Will break on old boxes that require port-IO
+ 	  delay for certain operations. Should work on most new machines.
+ 
+@@ -255,18 +259,18 @@ config DEBUG_BOOT_PARAMS
+ 	bool "Debug boot parameters"
+ 	depends on DEBUG_KERNEL
+ 	depends on DEBUG_FS
+-	help
++	---help---
+ 	  This option will cause struct boot_params to be exported via debugfs.
+ 
+ config CPA_DEBUG
+ 	bool "CPA self-test code"
+ 	depends on DEBUG_KERNEL
+-	help
++	---help---
+ 	  Do change_page_attr() self-tests every 30 seconds.
+ 
+ config OPTIMIZE_INLINING
+ 	bool "Allow gcc to uninline functions marked 'inline'"
+-	help
++	---help---
+ 	  This option determines if the kernel forces gcc to inline the functions
+ 	  developers have marked 'inline'. Doing so takes away freedom from gcc to
+ 	  do what it thinks is best, which is desirable for the gcc 3.x series of
+@@ -279,4 +283,3 @@ config OPTIMIZE_INLINING
+ 	  If unsure, say N.
+ 
+ endmenu
+-
+Index: linux-2.6-tip/arch/x86/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/Makefile
++++ linux-2.6-tip/arch/x86/Makefile
+@@ -70,14 +70,22 @@ else
+         # this works around some issues with generating unwind tables in older gccs
+         # newer gccs do it by default
+         KBUILD_CFLAGS += -maccumulate-outgoing-args
++endif
+ 
+-        stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
+-        stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
+-                "$(CC)" -fstack-protector )
+-        stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
+-                "$(CC)" -fstack-protector-all )
++ifdef CONFIG_CC_STACKPROTECTOR
++	cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh
++        ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC)),y)
++                stackp-y := -fstack-protector
++                stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += -fstack-protector-all
++                KBUILD_CFLAGS += $(stackp-y)
++        else
++                $(warning stack protector enabled but no compiler support)
++        endif
++endif
+ 
+-        KBUILD_CFLAGS += $(stackp-y)
++# Don't unroll struct assignments with kmemcheck enabled
++ifeq ($(CONFIG_KMEMCHECK),y)
++	KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
+ endif
+ 
+ # Stackpointer is addressed different for 32 bit and 64 bit x86
+@@ -102,29 +110,6 @@ KBUILD_CFLAGS += -fno-asynchronous-unwin
+ # prevent gcc from generating any FP code by mistake
+ KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
+ 
+-###
+-# Sub architecture support
+-# fcore-y is linked before mcore-y files.
+-
+-# Default subarch .c files
+-mcore-y  := arch/x86/mach-default/
+-
+-# Voyager subarch support
+-mflags-$(CONFIG_X86_VOYAGER)	:= -Iarch/x86/include/asm/mach-voyager
+-mcore-$(CONFIG_X86_VOYAGER)	:= arch/x86/mach-voyager/
+-
+-# generic subarchitecture
+-mflags-$(CONFIG_X86_GENERICARCH):= -Iarch/x86/include/asm/mach-generic
+-fcore-$(CONFIG_X86_GENERICARCH)	+= arch/x86/mach-generic/
+-mcore-$(CONFIG_X86_GENERICARCH)	:= arch/x86/mach-default/
+-
+-# default subarch .h files
+-mflags-y += -Iarch/x86/include/asm/mach-default
+-
+-# 64 bit does not support subarch support - clear sub arch variables
+-fcore-$(CONFIG_X86_64)  :=
+-mcore-$(CONFIG_X86_64)  :=
+-
+ KBUILD_CFLAGS += $(mflags-y)
+ KBUILD_AFLAGS += $(mflags-y)
+ 
+@@ -150,9 +135,6 @@ core-$(CONFIG_LGUEST_GUEST) += arch/x86/
+ core-y += arch/x86/kernel/
+ core-y += arch/x86/mm/
+ 
+-# Remaining sub architecture files
+-core-y += $(mcore-y)
+-
+ core-y += arch/x86/crypto/
+ core-y += arch/x86/vdso/
+ core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/
+@@ -176,34 +158,23 @@ endif
+ 
+ boot := arch/x86/boot
+ 
+-PHONY += zImage bzImage compressed zlilo bzlilo \
+-         zdisk bzdisk fdimage fdimage144 fdimage288 isoimage install
++BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage install
++
++PHONY += bzImage $(BOOT_TARGETS)
+ 
+ # Default kernel to build
+ all: bzImage
+ 
+ # KBUILD_IMAGE specify target image being built
+-                    KBUILD_IMAGE := $(boot)/bzImage
+-zImage zlilo zdisk: KBUILD_IMAGE := $(boot)/zImage
++KBUILD_IMAGE := $(boot)/bzImage
+ 
+-zImage bzImage: vmlinux
++bzImage: vmlinux
+ 	$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
+ 	$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
+ 	$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
+ 
+-compressed: zImage
+-
+-zlilo bzlilo: vmlinux
+-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zlilo
+-
+-zdisk bzdisk: vmlinux
+-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) zdisk
+-
+-fdimage fdimage144 fdimage288 isoimage: vmlinux
+-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) $@
+-
+-install:
+-	$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(KBUILD_IMAGE) install
++$(BOOT_TARGETS): vmlinux
++	$(Q)$(MAKE) $(build)=$(boot) $@
+ 
+ PHONY += vdso_install
+ vdso_install:
+@@ -228,7 +199,3 @@ define archhelp
+   echo  '                  FDARGS="..."  arguments for the booted kernel'
+   echo  '                  FDINITRD=file initrd for the booted kernel'
+ endef
+-
+-CLEAN_FILES += arch/x86/boot/fdimage \
+-	       arch/x86/boot/image.iso \
+-	       arch/x86/boot/mtools.conf
+Index: linux-2.6-tip/arch/x86/boot/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/Makefile
++++ linux-2.6-tip/arch/x86/boot/Makefile
+@@ -6,33 +6,30 @@
+ # for more details.
+ #
+ # Copyright (C) 1994 by Linus Torvalds
++# Changed by many, many contributors over the years.
+ #
+ 
+ # ROOT_DEV specifies the default root-device when making the image.
+ # This can be either FLOPPY, CURRENT, /dev/xxxx or empty, in which case
+ # the default of FLOPPY is used by 'build'.
+ 
+-ROOT_DEV := CURRENT
++ROOT_DEV	:= CURRENT
+ 
+ # If you want to preset the SVGA mode, uncomment the next line and
+ # set SVGA_MODE to whatever number you want.
+ # Set it to -DSVGA_MODE=NORMAL_VGA if you just want the EGA/VGA mode.
+ # The number is the same as you would ordinarily press at bootup.
+ 
+-SVGA_MODE := -DSVGA_MODE=NORMAL_VGA
++SVGA_MODE	:= -DSVGA_MODE=NORMAL_VGA
+ 
+-# If you want the RAM disk device, define this to be the size in blocks.
+-
+-#RAMDISK := -DRAMDISK=512
+-
+-targets		:= vmlinux.bin setup.bin setup.elf zImage bzImage
++targets		:= vmlinux.bin setup.bin setup.elf bzImage
++targets		+= fdimage fdimage144 fdimage288 image.iso mtools.conf
+ subdir-		:= compressed
+ 
+ setup-y		+= a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o
+ setup-y		+= header.o main.o mca.o memory.o pm.o pmjump.o
+ setup-y		+= printf.o string.o tty.o video.o video-mode.o version.o
+ setup-$(CONFIG_X86_APM_BOOT) += apm.o
+-setup-$(CONFIG_X86_VOYAGER) += voyager.o
+ 
+ # The link order of the video-*.o modules can matter.  In particular,
+ # video-vga.o *must* be listed first, followed by video-vesa.o.
+@@ -72,17 +69,13 @@ KBUILD_CFLAGS	:= $(LINUXINCLUDE) -g -Os 
+ KBUILD_CFLAGS +=   $(call cc-option,-m32)
+ KBUILD_AFLAGS	:= $(KBUILD_CFLAGS) -D__ASSEMBLY__
+ 
+-$(obj)/zImage:  asflags-y := $(SVGA_MODE) $(RAMDISK)
+-$(obj)/bzImage: ccflags-y := -D__BIG_KERNEL__
+-$(obj)/bzImage: asflags-y := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__
+-$(obj)/bzImage: BUILDFLAGS   := -b
++$(obj)/bzImage: asflags-y  := $(SVGA_MODE)
+ 
+ quiet_cmd_image = BUILD   $@
+-cmd_image = $(obj)/tools/build $(BUILDFLAGS) $(obj)/setup.bin \
+-	    $(obj)/vmlinux.bin $(ROOT_DEV) > $@
++cmd_image = $(obj)/tools/build $(obj)/setup.bin $(obj)/vmlinux.bin \
++	$(ROOT_DEV) > $@
+ 
+-$(obj)/zImage $(obj)/bzImage: $(obj)/setup.bin \
+-			      $(obj)/vmlinux.bin $(obj)/tools/build FORCE
++$(obj)/bzImage: $(obj)/setup.bin $(obj)/vmlinux.bin $(obj)/tools/build FORCE
+ 	$(call if_changed,image)
+ 	@echo 'Kernel: $@ is ready' ' (#'`cat .version`')'
+ 
+@@ -117,9 +110,11 @@ $(obj)/setup.bin: $(obj)/setup.elf FORCE
+ $(obj)/compressed/vmlinux: FORCE
+ 	$(Q)$(MAKE) $(build)=$(obj)/compressed $@
+ 
+-# Set this if you want to pass append arguments to the zdisk/fdimage/isoimage kernel
++# Set this if you want to pass append arguments to the
++# bzdisk/fdimage/isoimage kernel
+ FDARGS =
+-# Set this if you want an initrd included with the zdisk/fdimage/isoimage kernel
++# Set this if you want an initrd included with the
++# bzdisk/fdimage/isoimage kernel
+ FDINITRD =
+ 
+ image_cmdline = default linux $(FDARGS) $(if $(FDINITRD),initrd=initrd.img,)
+@@ -128,7 +123,7 @@ $(obj)/mtools.conf: $(src)/mtools.conf.i
+ 	sed -e 's|@OBJ@|$(obj)|g' < $< > $@
+ 
+ # This requires write access to /dev/fd0
+-zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
++bzdisk: $(obj)/bzImage $(obj)/mtools.conf
+ 	MTOOLSRC=$(obj)/mtools.conf mformat a:			; sync
+ 	syslinux /dev/fd0					; sync
+ 	echo '$(image_cmdline)' | \
+@@ -136,10 +131,10 @@ zdisk: $(BOOTIMAGE) $(obj)/mtools.conf
+ 	if [ -f '$(FDINITRD)' ] ; then \
+ 		MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' a:initrd.img ; \
+ 	fi
+-	MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) a:linux	; sync
++	MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage a:linux	; sync
+ 
+ # These require being root or having syslinux 2.02 or higher installed
+-fdimage fdimage144: $(BOOTIMAGE) $(obj)/mtools.conf
++fdimage fdimage144: $(obj)/bzImage $(obj)/mtools.conf
+ 	dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=1440
+ 	MTOOLSRC=$(obj)/mtools.conf mformat v:			; sync
+ 	syslinux $(obj)/fdimage					; sync
+@@ -148,9 +143,9 @@ fdimage fdimage144: $(BOOTIMAGE) $(obj)/
+ 	if [ -f '$(FDINITRD)' ] ; then \
+ 		MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' v:initrd.img ; \
+ 	fi
+-	MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) v:linux	; sync
++	MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage v:linux	; sync
+ 
+-fdimage288: $(BOOTIMAGE) $(obj)/mtools.conf
++fdimage288: $(obj)/bzImage $(obj)/mtools.conf
+ 	dd if=/dev/zero of=$(obj)/fdimage bs=1024 count=2880
+ 	MTOOLSRC=$(obj)/mtools.conf mformat w:			; sync
+ 	syslinux $(obj)/fdimage					; sync
+@@ -159,9 +154,9 @@ fdimage288: $(BOOTIMAGE) $(obj)/mtools.c
+ 	if [ -f '$(FDINITRD)' ] ; then \
+ 		MTOOLSRC=$(obj)/mtools.conf mcopy '$(FDINITRD)' w:initrd.img ; \
+ 	fi
+-	MTOOLSRC=$(obj)/mtools.conf mcopy $(BOOTIMAGE) w:linux	; sync
++	MTOOLSRC=$(obj)/mtools.conf mcopy $(obj)/bzImage w:linux	; sync
+ 
+-isoimage: $(BOOTIMAGE)
++isoimage: $(obj)/bzImage
+ 	-rm -rf $(obj)/isoimage
+ 	mkdir $(obj)/isoimage
+ 	for i in lib lib64 share end ; do \
+@@ -171,7 +166,7 @@ isoimage: $(BOOTIMAGE)
+ 		fi ; \
+ 		if [ $$i = end ] ; then exit 1 ; fi ; \
+ 	done
+-	cp $(BOOTIMAGE) $(obj)/isoimage/linux
++	cp $(obj)/bzImage $(obj)/isoimage/linux
+ 	echo '$(image_cmdline)' > $(obj)/isoimage/isolinux.cfg
+ 	if [ -f '$(FDINITRD)' ] ; then \
+ 		cp '$(FDINITRD)' $(obj)/isoimage/initrd.img ; \
+@@ -182,12 +177,13 @@ isoimage: $(BOOTIMAGE)
+ 	isohybrid $(obj)/image.iso 2>/dev/null || true
+ 	rm -rf $(obj)/isoimage
+ 
+-zlilo: $(BOOTIMAGE)
++bzlilo: $(obj)/bzImage
+ 	if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi
+ 	if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi
+-	cat $(BOOTIMAGE) > $(INSTALL_PATH)/vmlinuz
++	cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz
+ 	cp System.map $(INSTALL_PATH)/
+ 	if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi
+ 
+ install:
+-	sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)"
++	sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
++		System.map "$(INSTALL_PATH)"
+Index: linux-2.6-tip/arch/x86/boot/a20.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/a20.c
++++ linux-2.6-tip/arch/x86/boot/a20.c
+@@ -2,6 +2,7 @@
+  *
+  *   Copyright (C) 1991, 1992 Linus Torvalds
+  *   Copyright 2007-2008 rPath, Inc. - All Rights Reserved
++ *   Copyright 2009 Intel Corporation
+  *
+  *   This file is part of the Linux kernel, and is made available under
+  *   the terms of the GNU General Public License version 2.
+@@ -15,16 +16,23 @@
+ #include "boot.h"
+ 
+ #define MAX_8042_LOOPS	100000
++#define MAX_8042_FF	32
+ 
+ static int empty_8042(void)
+ {
+ 	u8 status;
+ 	int loops = MAX_8042_LOOPS;
++	int ffs   = MAX_8042_FF;
+ 
+ 	while (loops--) {
+ 		io_delay();
+ 
+ 		status = inb(0x64);
++		if (status == 0xff) {
++			/* FF is a plausible, but very unlikely status */
++			if (!--ffs)
++				return -1; /* Assume no KBC present */
++		}
+ 		if (status & 1) {
+ 			/* Read and discard input data */
+ 			io_delay();
+@@ -118,44 +126,37 @@ static void enable_a20_fast(void)
+ 
+ int enable_a20(void)
+ {
+-#if defined(CONFIG_X86_ELAN)
+-	/* Elan croaks if we try to touch the KBC */
+-	enable_a20_fast();
+-	while (!a20_test_long())
+-		;
+-	return 0;
+-#elif defined(CONFIG_X86_VOYAGER)
+-	/* On Voyager, a20_test() is unsafe? */
+-	enable_a20_kbc();
+-	return 0;
+-#else
+        int loops = A20_ENABLE_LOOPS;
+-	while (loops--) {
+-		/* First, check to see if A20 is already enabled
+-		   (legacy free, etc.) */
+-		if (a20_test_short())
+-			return 0;
+-
+-		/* Next, try the BIOS (INT 0x15, AX=0x2401) */
+-		enable_a20_bios();
+-		if (a20_test_short())
+-			return 0;
+-
+-		/* Try enabling A20 through the keyboard controller */
+-		empty_8042();
+-		if (a20_test_short())
+-			return 0; /* BIOS worked, but with delayed reaction */
++       int kbc_err;
+ 
+-		enable_a20_kbc();
+-		if (a20_test_long())
+-			return 0;
+-
+-		/* Finally, try enabling the "fast A20 gate" */
+-		enable_a20_fast();
+-		if (a20_test_long())
+-			return 0;
+-	}
+-
+-	return -1;
+-#endif
++       while (loops--) {
++	       /* First, check to see if A20 is already enabled
++		  (legacy free, etc.) */
++	       if (a20_test_short())
++		       return 0;
++	       
++	       /* Next, try the BIOS (INT 0x15, AX=0x2401) */
++	       enable_a20_bios();
++	       if (a20_test_short())
++		       return 0;
++	       
++	       /* Try enabling A20 through the keyboard controller */
++	       kbc_err = empty_8042();
++
++	       if (a20_test_short())
++		       return 0; /* BIOS worked, but with delayed reaction */
++	
++	       if (!kbc_err) {
++		       enable_a20_kbc();
++		       if (a20_test_long())
++			       return 0;
++	       }
++	       
++	       /* Finally, try enabling the "fast A20 gate" */
++	       enable_a20_fast();
++	       if (a20_test_long())
++		       return 0;
++       }
++       
++       return -1;
+ }
+Index: linux-2.6-tip/arch/x86/boot/boot.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/boot.h
++++ linux-2.6-tip/arch/x86/boot/boot.h
+@@ -302,9 +302,6 @@ void probe_cards(int unsafe);
+ /* video-vesa.c */
+ void vesa_store_edid(void);
+ 
+-/* voyager.c */
+-int query_voyager(void);
+-
+ #endif /* __ASSEMBLY__ */
+ 
+ #endif /* BOOT_BOOT_H */
+Index: linux-2.6-tip/arch/x86/boot/compressed/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/compressed/Makefile
++++ linux-2.6-tip/arch/x86/boot/compressed/Makefile
+@@ -4,7 +4,7 @@
+ # create a compressed vmlinux image from the original vmlinux
+ #
+ 
+-targets := vmlinux vmlinux.bin vmlinux.bin.gz head_$(BITS).o misc.o piggy.o
++targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma head_$(BITS).o misc.o piggy.o
+ 
+ KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
+ KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
+@@ -47,18 +47,35 @@ ifeq ($(CONFIG_X86_32),y)
+ ifdef CONFIG_RELOCATABLE
+ $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE
+ 	$(call if_changed,gzip)
++$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE
++	$(call if_changed,bzip2)
++$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE
++	$(call if_changed,lzma)
+ else
+ $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+ 	$(call if_changed,gzip)
++$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
++	$(call if_changed,bzip2)
++$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
++	$(call if_changed,lzma)
+ endif
+ LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T
+ 
+ else
++
+ $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+ 	$(call if_changed,gzip)
++$(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE
++	$(call if_changed,bzip2)
++$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE
++	$(call if_changed,lzma)
+ 
+ LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T
+ endif
+ 
+-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE
++suffix_$(CONFIG_KERNEL_GZIP)  = gz
++suffix_$(CONFIG_KERNEL_BZIP2) = bz2
++suffix_$(CONFIG_KERNEL_LZMA)  = lzma
++
++$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE
+ 	$(call if_changed,ld)
+Index: linux-2.6-tip/arch/x86/boot/compressed/head_32.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/compressed/head_32.S
++++ linux-2.6-tip/arch/x86/boot/compressed/head_32.S
+@@ -25,14 +25,12 @@
+ 
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ #include <asm/boot.h>
+ #include <asm/asm-offsets.h>
+ 
+ .section ".text.head","ax",@progbits
+-	.globl startup_32
+-
+-startup_32:
++ENTRY(startup_32)
+ 	cld
+ 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
+ 	 * us to not reload segments */
+@@ -113,6 +111,8 @@ startup_32:
+  */
+ 	leal relocated(%ebx), %eax
+ 	jmp *%eax
++ENDPROC(startup_32)
++
+ .section ".text"
+ relocated:
+ 
+Index: linux-2.6-tip/arch/x86/boot/compressed/head_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/compressed/head_64.S
++++ linux-2.6-tip/arch/x86/boot/compressed/head_64.S
+@@ -26,8 +26,8 @@
+ 
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+-#include <asm/pgtable.h>
+-#include <asm/page.h>
++#include <asm/pgtable_types.h>
++#include <asm/page_types.h>
+ #include <asm/boot.h>
+ #include <asm/msr.h>
+ #include <asm/processor-flags.h>
+@@ -35,9 +35,7 @@
+ 
+ .section ".text.head"
+ 	.code32
+-	.globl startup_32
+-
+-startup_32:
++ENTRY(startup_32)
+ 	cld
+ 	/* test KEEP_SEGMENTS flag to see if the bootloader is asking
+ 	 * us to not reload segments */
+@@ -176,6 +174,7 @@ startup_32:
+ 
+ 	/* Jump from 32bit compatibility mode into 64bit mode. */
+ 	lret
++ENDPROC(startup_32)
+ 
+ no_longmode:
+ 	/* This isn't an x86-64 CPU so hang */
+@@ -295,7 +294,6 @@ relocated:
+ 	call	decompress_kernel
+ 	popq	%rsi
+ 
+-
+ /*
+  * Jump to the decompressed kernel.
+  */
+Index: linux-2.6-tip/arch/x86/boot/compressed/misc.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/compressed/misc.c
++++ linux-2.6-tip/arch/x86/boot/compressed/misc.c
+@@ -116,71 +116,13 @@
+ /*
+  * gzip declarations
+  */
+-
+-#define OF(args)	args
+ #define STATIC		static
+ 
+ #undef memset
+ #undef memcpy
+ #define memzero(s, n)	memset((s), 0, (n))
+ 
+-typedef unsigned char	uch;
+-typedef unsigned short	ush;
+-typedef unsigned long	ulg;
+-
+-/*
+- * Window size must be at least 32k, and a power of two.
+- * We don't actually have a window just a huge output buffer,
+- * so we report a 2G window size, as that should always be
+- * larger than our output buffer:
+- */
+-#define WSIZE		0x80000000
+-
+-/* Input buffer: */
+-static unsigned char	*inbuf;
+-
+-/* Sliding window buffer (and final output buffer): */
+-static unsigned char	*window;
+ 
+-/* Valid bytes in inbuf: */
+-static unsigned		insize;
+-
+-/* Index of next byte to be processed in inbuf: */
+-static unsigned		inptr;
+-
+-/* Bytes in output buffer: */
+-static unsigned		outcnt;
+-
+-/* gzip flag byte */
+-#define ASCII_FLAG	0x01 /* bit 0 set: file probably ASCII text */
+-#define CONTINUATION	0x02 /* bit 1 set: continuation of multi-part gz file */
+-#define EXTRA_FIELD	0x04 /* bit 2 set: extra field present */
+-#define ORIG_NAM	0x08 /* bit 3 set: original file name present */
+-#define COMMENT		0x10 /* bit 4 set: file comment present */
+-#define ENCRYPTED	0x20 /* bit 5 set: file is encrypted */
+-#define RESERVED	0xC0 /* bit 6, 7:  reserved */
+-
+-#define get_byte()	(inptr < insize ? inbuf[inptr++] : fill_inbuf())
+-
+-/* Diagnostic functions */
+-#ifdef DEBUG
+-#  define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0)
+-#  define Trace(x)	do { fprintf x; } while (0)
+-#  define Tracev(x)	do { if (verbose) fprintf x ; } while (0)
+-#  define Tracevv(x)	do { if (verbose > 1) fprintf x ; } while (0)
+-#  define Tracec(c, x)	do { if (verbose && (c)) fprintf x ; } while (0)
+-#  define Tracecv(c, x)	do { if (verbose > 1 && (c)) fprintf x ; } while (0)
+-#else
+-#  define Assert(cond, msg)
+-#  define Trace(x)
+-#  define Tracev(x)
+-#  define Tracevv(x)
+-#  define Tracec(c, x)
+-#  define Tracecv(c, x)
+-#endif
+-
+-static int  fill_inbuf(void);
+-static void flush_window(void);
+ static void error(char *m);
+ 
+ /*
+@@ -189,13 +131,8 @@ static void error(char *m);
+ static struct boot_params *real_mode;		/* Pointer to real-mode data */
+ static int quiet;
+ 
+-extern unsigned char input_data[];
+-extern int input_len;
+-
+-static long bytes_out;
+-
+ static void *memset(void *s, int c, unsigned n);
+-static void *memcpy(void *dest, const void *src, unsigned n);
++void *memcpy(void *dest, const void *src, unsigned n);
+ 
+ static void __putstr(int, const char *);
+ #define putstr(__x)  __putstr(0, __x)
+@@ -213,7 +150,17 @@ static char *vidmem;
+ static int vidport;
+ static int lines, cols;
+ 
+-#include "../../../../lib/inflate.c"
++#ifdef CONFIG_KERNEL_GZIP
++#include "../../../../lib/decompress_inflate.c"
++#endif
++
++#ifdef CONFIG_KERNEL_BZIP2
++#include "../../../../lib/decompress_bunzip2.c"
++#endif
++
++#ifdef CONFIG_KERNEL_LZMA
++#include "../../../../lib/decompress_unlzma.c"
++#endif
+ 
+ static void scroll(void)
+ {
+@@ -282,7 +229,7 @@ static void *memset(void *s, int c, unsi
+ 	return s;
+ }
+ 
+-static void *memcpy(void *dest, const void *src, unsigned n)
++void *memcpy(void *dest, const void *src, unsigned n)
+ {
+ 	int i;
+ 	const char *s = src;
+@@ -293,38 +240,6 @@ static void *memcpy(void *dest, const vo
+ 	return dest;
+ }
+ 
+-/* ===========================================================================
+- * Fill the input buffer. This is called only when the buffer is empty
+- * and at least one byte is really needed.
+- */
+-static int fill_inbuf(void)
+-{
+-	error("ran out of input data");
+-	return 0;
+-}
+-
+-/* ===========================================================================
+- * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+- * (Used for the decompressed data only.)
+- */
+-static void flush_window(void)
+-{
+-	/* With my window equal to my output buffer
+-	 * I only need to compute the crc here.
+-	 */
+-	unsigned long c = crc;         /* temporary variable */
+-	unsigned n;
+-	unsigned char *in, ch;
+-
+-	in = window;
+-	for (n = 0; n < outcnt; n++) {
+-		ch = *in++;
+-		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+-	}
+-	crc = c;
+-	bytes_out += (unsigned long)outcnt;
+-	outcnt = 0;
+-}
+ 
+ static void error(char *x)
+ {
+@@ -407,12 +322,8 @@ asmlinkage void decompress_kernel(void *
+ 	lines = real_mode->screen_info.orig_video_lines;
+ 	cols = real_mode->screen_info.orig_video_cols;
+ 
+-	window = output;		/* Output buffer (Normally at 1M) */
+ 	free_mem_ptr     = heap;	/* Heap */
+ 	free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
+-	inbuf  = input_data;		/* Input buffer */
+-	insize = input_len;
+-	inptr  = 0;
+ 
+ #ifdef CONFIG_X86_64
+ 	if ((unsigned long)output & (__KERNEL_ALIGN - 1))
+@@ -430,10 +341,9 @@ asmlinkage void decompress_kernel(void *
+ #endif
+ #endif
+ 
+-	makecrc();
+ 	if (!quiet)
+ 		putstr("\nDecompressing Linux... ");
+-	gunzip();
++	decompress(input_data, input_len, NULL, NULL, output, NULL, error);
+ 	parse_elf(output);
+ 	if (!quiet)
+ 		putstr("done.\nBooting the kernel.\n");
+Index: linux-2.6-tip/arch/x86/boot/copy.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/copy.S
++++ linux-2.6-tip/arch/x86/boot/copy.S
+@@ -8,6 +8,8 @@
+  *
+  * ----------------------------------------------------------------------- */
+ 
++#include <linux/linkage.h>
++
+ /*
+  * Memory copy routines
+  */
+@@ -15,9 +17,7 @@
+ 	.code16gcc
+ 	.text
+ 
+-	.globl	memcpy
+-	.type	memcpy, @function
+-memcpy:
++GLOBAL(memcpy)
+ 	pushw	%si
+ 	pushw	%di
+ 	movw	%ax, %di
+@@ -31,11 +31,9 @@ memcpy:
+ 	popw	%di
+ 	popw	%si
+ 	ret
+-	.size	memcpy, .-memcpy
++ENDPROC(memcpy)
+ 
+-	.globl	memset
+-	.type	memset, @function
+-memset:
++GLOBAL(memset)
+ 	pushw	%di
+ 	movw	%ax, %di
+ 	movzbl	%dl, %eax
+@@ -48,52 +46,42 @@ memset:
+ 	rep; stosb
+ 	popw	%di
+ 	ret
+-	.size	memset, .-memset
++ENDPROC(memset)
+ 
+-	.globl	copy_from_fs
+-	.type	copy_from_fs, @function
+-copy_from_fs:
++GLOBAL(copy_from_fs)
+ 	pushw	%ds
+ 	pushw	%fs
+ 	popw	%ds
+ 	call	memcpy
+ 	popw	%ds
+ 	ret
+-	.size	copy_from_fs, .-copy_from_fs
++ENDPROC(copy_from_fs)
+ 
+-	.globl	copy_to_fs
+-	.type	copy_to_fs, @function
+-copy_to_fs:
++GLOBAL(copy_to_fs)
+ 	pushw	%es
+ 	pushw	%fs
+ 	popw	%es
+ 	call	memcpy
+ 	popw	%es
+ 	ret
+-	.size	copy_to_fs, .-copy_to_fs
++ENDPROC(copy_to_fs)
+ 
+ #if 0 /* Not currently used, but can be enabled as needed */
+-
+-	.globl	copy_from_gs
+-	.type	copy_from_gs, @function
+-copy_from_gs:
++GLOBAL(copy_from_gs)
+ 	pushw	%ds
+ 	pushw	%gs
+ 	popw	%ds
+ 	call	memcpy
+ 	popw	%ds
+ 	ret
+-	.size	copy_from_gs, .-copy_from_gs
+-	.globl	copy_to_gs
++ENDPROC(copy_from_gs)
+ 
+-	.type	copy_to_gs, @function
+-copy_to_gs:
++GLOBAL(copy_to_gs)
+ 	pushw	%es
+ 	pushw	%gs
+ 	popw	%es
+ 	call	memcpy
+ 	popw	%es
+ 	ret
+-	.size	copy_to_gs, .-copy_to_gs
+-
++ENDPROC(copy_to_gs)
+ #endif
+Index: linux-2.6-tip/arch/x86/boot/header.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/header.S
++++ linux-2.6-tip/arch/x86/boot/header.S
+@@ -19,17 +19,13 @@
+ #include <linux/utsrelease.h>
+ #include <asm/boot.h>
+ #include <asm/e820.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ #include <asm/setup.h>
+ #include "boot.h"
+ #include "offsets.h"
+ 
+-SETUPSECTS	= 4			/* default nr of setup-sectors */
+ BOOTSEG		= 0x07C0		/* original address of boot-sector */
+-SYSSEG		= DEF_SYSSEG		/* system loaded at 0x10000 (65536) */
+-SYSSIZE		= DEF_SYSSIZE		/* system size: # of 16-byte clicks */
+-					/* to be loaded */
+-ROOT_DEV	= 0			/* ROOT_DEV is now written by "build" */
++SYSSEG		= 0x1000		/* historical load address >> 4 */
+ 
+ #ifndef SVGA_MODE
+ #define SVGA_MODE ASK_VGA
+@@ -97,12 +93,12 @@ bugger_off_msg:
+ 	.section ".header", "a"
+ 	.globl	hdr
+ hdr:
+-setup_sects:	.byte SETUPSECTS
++setup_sects:	.byte 0			/* Filled in by build.c */
+ root_flags:	.word ROOT_RDONLY
+-syssize:	.long SYSSIZE
+-ram_size:	.word RAMDISK
++syssize:	.long 0			/* Filled in by build.c */
++ram_size:	.word 0			/* Obsolete */
+ vid_mode:	.word SVGA_MODE
+-root_dev:	.word ROOT_DEV
++root_dev:	.word 0			/* Filled in by build.c */
+ boot_flag:	.word 0xAA55
+ 
+ 	# offset 512, entry point
+@@ -123,14 +119,15 @@ _start:
+ 					# or else old loadlin-1.5 will fail)
+ 		.globl realmode_swtch
+ realmode_swtch:	.word	0, 0		# default_switch, SETUPSEG
+-start_sys_seg:	.word	SYSSEG
++start_sys_seg:	.word	SYSSEG		# obsolete and meaningless, but just
++					# in case something decided to "use" it
+ 		.word	kernel_version-512 # pointing to kernel version string
+ 					# above section of header is compatible
+ 					# with loadlin-1.5 (header v1.5). Don't
+ 					# change it.
+ 
+-type_of_loader:	.byte	0		# = 0, old one (LILO, Loadlin,
+-					#      Bootlin, SYSLX, bootsect...)
++type_of_loader:	.byte	0		# 0 means ancient bootloader, newer
++					# bootloaders know to change this.
+ 					# See Documentation/i386/boot.txt for
+ 					# assigned ids
+ 
+@@ -142,11 +139,7 @@ CAN_USE_HEAP	= 0x80			# If set, the load
+ 					# space behind setup.S can be used for
+ 					# heap purposes.
+ 					# Only the loader knows what is free
+-#ifndef __BIG_KERNEL__
+-		.byte	0
+-#else
+ 		.byte	LOADED_HIGH
+-#endif
+ 
+ setup_move_size: .word  0x8000		# size to move, when setup is not
+ 					# loaded at 0x90000. We will move setup
+@@ -157,11 +150,7 @@ setup_move_size: .word  0x8000		# size t
+ 
+ code32_start:				# here loaders can put a different
+ 					# start address for 32-bit code.
+-#ifndef __BIG_KERNEL__
+-		.long	0x1000		#   0x1000 = default for zImage
+-#else
+ 		.long	0x100000	# 0x100000 = default for big kernel
+-#endif
+ 
+ ramdisk_image:	.long	0		# address of loaded ramdisk image
+ 					# Here the loader puts the 32-bit
+Index: linux-2.6-tip/arch/x86/boot/main.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/main.c
++++ linux-2.6-tip/arch/x86/boot/main.c
+@@ -149,11 +149,6 @@ void main(void)
+ 	/* Query MCA information */
+ 	query_mca();
+ 
+-	/* Voyager */
+-#ifdef CONFIG_X86_VOYAGER
+-	query_voyager();
+-#endif
+-
+ 	/* Query Intel SpeedStep (IST) information */
+ 	query_ist();
+ 
+Index: linux-2.6-tip/arch/x86/boot/pm.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/pm.c
++++ linux-2.6-tip/arch/x86/boot/pm.c
+@@ -33,47 +33,6 @@ static void realmode_switch_hook(void)
+ }
+ 
+ /*
+- * A zImage kernel is loaded at 0x10000 but wants to run at 0x1000.
+- * A bzImage kernel is loaded and runs at 0x100000.
+- */
+-static void move_kernel_around(void)
+-{
+-	/* Note: rely on the compile-time option here rather than
+-	   the LOADED_HIGH flag.  The Qemu kernel loader unconditionally
+-	   sets the loadflags to zero. */
+-#ifndef __BIG_KERNEL__
+-	u16 dst_seg, src_seg;
+-	u32 syssize;
+-
+-	dst_seg =  0x1000 >> 4;
+-	src_seg = 0x10000 >> 4;
+-	syssize = boot_params.hdr.syssize; /* Size in 16-byte paragraphs */
+-
+-	while (syssize) {
+-		int paras  = (syssize >= 0x1000) ? 0x1000 : syssize;
+-		int dwords = paras << 2;
+-
+-		asm volatile("pushw %%es ; "
+-			     "pushw %%ds ; "
+-			     "movw %1,%%es ; "
+-			     "movw %2,%%ds ; "
+-			     "xorw %%di,%%di ; "
+-			     "xorw %%si,%%si ; "
+-			     "rep;movsl ; "
+-			     "popw %%ds ; "
+-			     "popw %%es"
+-			     : "+c" (dwords)
+-			     : "r" (dst_seg), "r" (src_seg)
+-			     : "esi", "edi");
+-
+-		syssize -= paras;
+-		dst_seg += paras;
+-		src_seg += paras;
+-	}
+-#endif
+-}
+-
+-/*
+  * Disable all interrupts at the legacy PIC.
+  */
+ static void mask_all_interrupts(void)
+@@ -147,9 +106,6 @@ void go_to_protected_mode(void)
+ 	/* Hook before leaving real mode, also disables interrupts */
+ 	realmode_switch_hook();
+ 
+-	/* Move the kernel/setup to their final resting places */
+-	move_kernel_around();
+-
+ 	/* Enable the A20 gate */
+ 	if (enable_a20()) {
+ 		puts("A20 gate not responding, unable to boot...\n");
+Index: linux-2.6-tip/arch/x86/boot/pmjump.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/pmjump.S
++++ linux-2.6-tip/arch/x86/boot/pmjump.S
+@@ -15,18 +15,15 @@
+ #include <asm/boot.h>
+ #include <asm/processor-flags.h>
+ #include <asm/segment.h>
++#include <linux/linkage.h>
+ 
+ 	.text
+-
+-	.globl	protected_mode_jump
+-	.type	protected_mode_jump, @function
+-
+ 	.code16
+ 
+ /*
+  * void protected_mode_jump(u32 entrypoint, u32 bootparams);
+  */
+-protected_mode_jump:
++GLOBAL(protected_mode_jump)
+ 	movl	%edx, %esi		# Pointer to boot_params table
+ 
+ 	xorl	%ebx, %ebx
+@@ -47,12 +44,11 @@ protected_mode_jump:
+ 	.byte	0x66, 0xea		# ljmpl opcode
+ 2:	.long	in_pm32			# offset
+ 	.word	__BOOT_CS		# segment
+-
+-	.size	protected_mode_jump, .-protected_mode_jump
++ENDPROC(protected_mode_jump)
+ 
+ 	.code32
+-	.type	in_pm32, @function
+-in_pm32:
++	.section ".text32","ax"
++GLOBAL(in_pm32)
+ 	# Set up data segments for flat 32-bit mode
+ 	movl	%ecx, %ds
+ 	movl	%ecx, %es
+@@ -78,5 +74,4 @@ in_pm32:
+ 	lldt	%cx
+ 
+ 	jmpl	*%eax			# Jump to the 32-bit entrypoint
+-
+-	.size	in_pm32, .-in_pm32
++ENDPROC(in_pm32)
+Index: linux-2.6-tip/arch/x86/boot/setup.ld
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/setup.ld
++++ linux-2.6-tip/arch/x86/boot/setup.ld
+@@ -17,7 +17,8 @@ SECTIONS
+ 	.header		: { *(.header) }
+ 	.inittext	: { *(.inittext) }
+ 	.initdata	: { *(.initdata) }
+-	.text		: { *(.text*) }
++	.text		: { *(.text) }
++	.text32		: { *(.text32) }
+ 
+ 	. = ALIGN(16);
+ 	.rodata		: { *(.rodata*) }
+Index: linux-2.6-tip/arch/x86/boot/tools/build.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/tools/build.c
++++ linux-2.6-tip/arch/x86/boot/tools/build.c
+@@ -130,7 +130,7 @@ static void die(const char * str, ...)
+ 
+ static void usage(void)
+ {
+-	die("Usage: build [-b] setup system [rootdev] [> image]");
++	die("Usage: build setup system [rootdev] [> image]");
+ }
+ 
+ int main(int argc, char ** argv)
+@@ -145,11 +145,6 @@ int main(int argc, char ** argv)
+ 	void *kernel;
+ 	u32 crc = 0xffffffffUL;
+ 
+-	if (argc > 2 && !strcmp(argv[1], "-b"))
+-	  {
+-	    is_big_kernel = 1;
+-	    argc--, argv++;
+-	  }
+ 	if ((argc < 3) || (argc > 4))
+ 		usage();
+ 	if (argc > 3) {
+@@ -216,8 +211,6 @@ int main(int argc, char ** argv)
+ 		die("Unable to mmap '%s': %m", argv[2]);
+ 	/* Number of 16-byte paragraphs, including space for a 4-byte CRC */
+ 	sys_size = (sz + 15 + 4) / 16;
+-	if (!is_big_kernel && sys_size > DEF_SYSSIZE)
+-		die("System is too big. Try using bzImage or modules.");
+ 
+ 	/* Patch the setup code with the appropriate size parameters */
+ 	buf[0x1f1] = setup_sectors-1;
+Index: linux-2.6-tip/arch/x86/boot/video-mode.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/video-mode.c
++++ linux-2.6-tip/arch/x86/boot/video-mode.c
+@@ -147,7 +147,7 @@ static void vga_recalc_vertical(void)
+ int set_mode(u16 mode)
+ {
+ 	int rv;
+-	u16 real_mode;
++	u16 uninitialized_var(real_mode);
+ 
+ 	/* Very special mode numbers... */
+ 	if (mode == VIDEO_CURRENT_MODE)
+Index: linux-2.6-tip/arch/x86/boot/video-vga.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/video-vga.c
++++ linux-2.6-tip/arch/x86/boot/video-vga.c
+@@ -129,41 +129,45 @@ u16 vga_crtc(void)
+ 	return (inb(0x3cc) & 1) ? 0x3d4 : 0x3b4;
+ }
+ 
+-static void vga_set_480_scanlines(int end)
++static void vga_set_480_scanlines(int lines)
+ {
+-	u16 crtc;
+-	u8  csel;
++	u16 crtc;		/* CRTC base address */
++	u8  csel;		/* CRTC miscellaneous output register */
++	u8  ovfw;		/* CRTC overflow register */
++	int end = lines-1;
+ 
+ 	crtc = vga_crtc();
+ 
++	ovfw = 0x3c | ((end >> (8-1)) & 0x02) | ((end >> (9-6)) & 0x40);
++
+ 	out_idx(0x0c, crtc, 0x11); /* Vertical sync end, unlock CR0-7 */
+ 	out_idx(0x0b, crtc, 0x06); /* Vertical total */
+-	out_idx(0x3e, crtc, 0x07); /* Vertical overflow */
++	out_idx(ovfw, crtc, 0x07); /* Vertical overflow */
+ 	out_idx(0xea, crtc, 0x10); /* Vertical sync start */
+-	out_idx(end, crtc, 0x12); /* Vertical display end */
++	out_idx(end,  crtc, 0x12); /* Vertical display end */
+ 	out_idx(0xe7, crtc, 0x15); /* Vertical blank start */
+ 	out_idx(0x04, crtc, 0x16); /* Vertical blank end */
+ 	csel = inb(0x3cc);
+ 	csel &= 0x0d;
+ 	csel |= 0xe2;
+-	outb(csel, 0x3cc);
++	outb(csel, 0x3c2);
+ }
+ 
+ static void vga_set_80x30(void)
+ {
+-	vga_set_480_scanlines(0xdf);
++	vga_set_480_scanlines(30*16);
+ }
+ 
+ static void vga_set_80x34(void)
+ {
+ 	vga_set_14font();
+-	vga_set_480_scanlines(0xdb);
++	vga_set_480_scanlines(34*14);
+ }
+ 
+ static void vga_set_80x60(void)
+ {
+ 	vga_set_8font();
+-	vga_set_480_scanlines(0xdf);
++	vga_set_480_scanlines(60*8);
+ }
+ 
+ static int vga_set_mode(struct mode_info *mode)
+Index: linux-2.6-tip/arch/x86/boot/voyager.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/boot/voyager.c
++++ /dev/null
+@@ -1,40 +0,0 @@
+-/* -*- linux-c -*- ------------------------------------------------------- *
+- *
+- *   Copyright (C) 1991, 1992 Linus Torvalds
+- *   Copyright 2007 rPath, Inc. - All Rights Reserved
+- *
+- *   This file is part of the Linux kernel, and is made available under
+- *   the terms of the GNU General Public License version 2.
+- *
+- * ----------------------------------------------------------------------- */
+-
+-/*
+- * Get the Voyager config information
+- */
+-
+-#include "boot.h"
+-
+-int query_voyager(void)
+-{
+-	u8 err;
+-	u16 es, di;
+-	/* Abuse the apm_bios_info area for this */
+-	u8 *data_ptr = (u8 *)&boot_params.apm_bios_info;
+-
+-	data_ptr[0] = 0xff;	/* Flag on config not found(?) */
+-
+-	asm("pushw %%es ; "
+-	    "int $0x15 ; "
+-	    "setc %0 ; "
+-	    "movw %%es, %1 ; "
+-	    "popw %%es"
+-	    : "=q" (err), "=r" (es), "=D" (di)
+-	    : "a" (0xffc0));
+-
+-	if (err)
+-		return -1;	/* Not Voyager */
+-
+-	set_fs(es);
+-	copy_from_fs(data_ptr, di, 7);	/* Table is 7 bytes apparently */
+-	return 0;
+-}
+Index: linux-2.6-tip/arch/x86/configs/i386_defconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/configs/i386_defconfig
++++ linux-2.6-tip/arch/x86/configs/i386_defconfig
+@@ -1,14 +1,13 @@
+ #
+ # Automatically generated make config: don't edit
+-# Linux kernel version: 2.6.27-rc5
+-# Wed Sep  3 17:23:09 2008
++# Linux kernel version: 2.6.29-rc4
++# Tue Feb 24 15:50:58 2009
+ #
+ # CONFIG_64BIT is not set
+ CONFIG_X86_32=y
+ # CONFIG_X86_64 is not set
+ CONFIG_X86=y
+ CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
+-# CONFIG_GENERIC_LOCKBREAK is not set
+ CONFIG_GENERIC_TIME=y
+ CONFIG_GENERIC_CMOS_UPDATE=y
+ CONFIG_CLOCKSOURCE_WATCHDOG=y
+@@ -24,16 +23,14 @@ CONFIG_GENERIC_ISA_DMA=y
+ CONFIG_GENERIC_IOMAP=y
+ CONFIG_GENERIC_BUG=y
+ CONFIG_GENERIC_HWEIGHT=y
+-# CONFIG_GENERIC_GPIO is not set
+ CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+ # CONFIG_RWSEM_GENERIC_SPINLOCK is not set
+ CONFIG_RWSEM_XCHGADD_ALGORITHM=y
+-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+ CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
+ CONFIG_GENERIC_CALIBRATE_DELAY=y
+ # CONFIG_GENERIC_TIME_VSYSCALL is not set
+ CONFIG_ARCH_HAS_CPU_RELAX=y
++CONFIG_ARCH_HAS_DEFAULT_IDLE=y
+ CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
+ CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+ # CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
+@@ -42,12 +39,12 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y
+ # CONFIG_ZONE_DMA32 is not set
+ CONFIG_ARCH_POPULATES_NODE_MAP=y
+ # CONFIG_AUDIT_ARCH is not set
+-CONFIG_ARCH_SUPPORTS_AOUT=y
+ CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+ CONFIG_GENERIC_HARDIRQS=y
+ CONFIG_GENERIC_IRQ_PROBE=y
+ CONFIG_GENERIC_PENDING_IRQ=y
+ CONFIG_X86_SMP=y
++CONFIG_USE_GENERIC_SMP_HELPERS=y
+ CONFIG_X86_32_SMP=y
+ CONFIG_X86_HT=y
+ CONFIG_X86_BIOS_REBOOT=y
+@@ -76,30 +73,44 @@ CONFIG_TASK_IO_ACCOUNTING=y
+ CONFIG_AUDIT=y
+ CONFIG_AUDITSYSCALL=y
+ CONFIG_AUDIT_TREE=y
++
++#
++# RCU Subsystem
++#
++# CONFIG_CLASSIC_RCU is not set
++CONFIG_TREE_RCU=y
++# CONFIG_PREEMPT_RCU is not set
++# CONFIG_RCU_TRACE is not set
++CONFIG_RCU_FANOUT=32
++# CONFIG_RCU_FANOUT_EXACT is not set
++# CONFIG_TREE_RCU_TRACE is not set
++# CONFIG_PREEMPT_RCU_TRACE is not set
+ # CONFIG_IKCONFIG is not set
+ CONFIG_LOG_BUF_SHIFT=18
+-CONFIG_CGROUPS=y
+-# CONFIG_CGROUP_DEBUG is not set
+-CONFIG_CGROUP_NS=y
+-# CONFIG_CGROUP_DEVICE is not set
+-CONFIG_CPUSETS=y
+ CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
+ CONFIG_GROUP_SCHED=y
+ CONFIG_FAIR_GROUP_SCHED=y
+ # CONFIG_RT_GROUP_SCHED is not set
+ # CONFIG_USER_SCHED is not set
+ CONFIG_CGROUP_SCHED=y
++CONFIG_CGROUPS=y
++# CONFIG_CGROUP_DEBUG is not set
++CONFIG_CGROUP_NS=y
++CONFIG_CGROUP_FREEZER=y
++# CONFIG_CGROUP_DEVICE is not set
++CONFIG_CPUSETS=y
++CONFIG_PROC_PID_CPUSET=y
+ CONFIG_CGROUP_CPUACCT=y
+ CONFIG_RESOURCE_COUNTERS=y
+ # CONFIG_CGROUP_MEM_RES_CTLR is not set
+ # CONFIG_SYSFS_DEPRECATED_V2 is not set
+-CONFIG_PROC_PID_CPUSET=y
+ CONFIG_RELAY=y
+ CONFIG_NAMESPACES=y
+ CONFIG_UTS_NS=y
+ CONFIG_IPC_NS=y
+ CONFIG_USER_NS=y
+ CONFIG_PID_NS=y
++CONFIG_NET_NS=y
+ CONFIG_BLK_DEV_INITRD=y
+ CONFIG_INITRAMFS_SOURCE=""
+ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+@@ -124,12 +135,15 @@ CONFIG_SIGNALFD=y
+ CONFIG_TIMERFD=y
+ CONFIG_EVENTFD=y
+ CONFIG_SHMEM=y
++CONFIG_AIO=y
+ CONFIG_VM_EVENT_COUNTERS=y
++CONFIG_PCI_QUIRKS=y
+ CONFIG_SLUB_DEBUG=y
+ # CONFIG_SLAB is not set
+ CONFIG_SLUB=y
+ # CONFIG_SLOB is not set
+ CONFIG_PROFILING=y
++CONFIG_TRACEPOINTS=y
+ CONFIG_MARKERS=y
+ # CONFIG_OPROFILE is not set
+ CONFIG_HAVE_OPROFILE=y
+@@ -139,15 +153,10 @@ CONFIG_KRETPROBES=y
+ CONFIG_HAVE_IOREMAP_PROT=y
+ CONFIG_HAVE_KPROBES=y
+ CONFIG_HAVE_KRETPROBES=y
+-# CONFIG_HAVE_ARCH_TRACEHOOK is not set
+-# CONFIG_HAVE_DMA_ATTRS is not set
+-CONFIG_USE_GENERIC_SMP_HELPERS=y
+-# CONFIG_HAVE_CLK is not set
+-CONFIG_PROC_PAGE_MONITOR=y
++CONFIG_HAVE_ARCH_TRACEHOOK=y
+ CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+ CONFIG_SLABINFO=y
+ CONFIG_RT_MUTEXES=y
+-# CONFIG_TINY_SHMEM is not set
+ CONFIG_BASE_SMALL=0
+ CONFIG_MODULES=y
+ # CONFIG_MODULE_FORCE_LOAD is not set
+@@ -155,12 +164,10 @@ CONFIG_MODULE_UNLOAD=y
+ CONFIG_MODULE_FORCE_UNLOAD=y
+ # CONFIG_MODVERSIONS is not set
+ # CONFIG_MODULE_SRCVERSION_ALL is not set
+-CONFIG_KMOD=y
+ CONFIG_STOP_MACHINE=y
+ CONFIG_BLOCK=y
+ # CONFIG_LBD is not set
+ CONFIG_BLK_DEV_IO_TRACE=y
+-# CONFIG_LSF is not set
+ CONFIG_BLK_DEV_BSG=y
+ # CONFIG_BLK_DEV_INTEGRITY is not set
+ 
+@@ -176,7 +183,7 @@ CONFIG_IOSCHED_CFQ=y
+ CONFIG_DEFAULT_CFQ=y
+ # CONFIG_DEFAULT_NOOP is not set
+ CONFIG_DEFAULT_IOSCHED="cfq"
+-CONFIG_CLASSIC_RCU=y
++CONFIG_FREEZER=y
+ 
+ #
+ # Processor type and features
+@@ -186,15 +193,14 @@ CONFIG_NO_HZ=y
+ CONFIG_HIGH_RES_TIMERS=y
+ CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+ CONFIG_SMP=y
++CONFIG_SPARSE_IRQ=y
+ CONFIG_X86_FIND_SMP_CONFIG=y
+ CONFIG_X86_MPPARSE=y
+-CONFIG_X86_PC=y
+ # CONFIG_X86_ELAN is not set
+-# CONFIG_X86_VOYAGER is not set
+ # CONFIG_X86_GENERICARCH is not set
+ # CONFIG_X86_VSMP is not set
+ # CONFIG_X86_RDC321X is not set
+-CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y
++CONFIG_SCHED_OMIT_FRAME_POINTER=y
+ # CONFIG_PARAVIRT_GUEST is not set
+ # CONFIG_MEMTEST is not set
+ # CONFIG_M386 is not set
+@@ -238,10 +244,19 @@ CONFIG_X86_TSC=y
+ CONFIG_X86_CMOV=y
+ CONFIG_X86_MINIMUM_CPU_FAMILY=4
+ CONFIG_X86_DEBUGCTLMSR=y
++CONFIG_CPU_SUP_INTEL=y
++CONFIG_CPU_SUP_CYRIX_32=y
++CONFIG_CPU_SUP_AMD=y
++CONFIG_CPU_SUP_CENTAUR_32=y
++CONFIG_CPU_SUP_TRANSMETA_32=y
++CONFIG_CPU_SUP_UMC_32=y
++CONFIG_X86_DS=y
++CONFIG_X86_PTRACE_BTS=y
+ CONFIG_HPET_TIMER=y
+ CONFIG_HPET_EMULATE_RTC=y
+ CONFIG_DMI=y
+ # CONFIG_IOMMU_HELPER is not set
++# CONFIG_IOMMU_API is not set
+ CONFIG_NR_CPUS=64
+ CONFIG_SCHED_SMT=y
+ CONFIG_SCHED_MC=y
+@@ -250,12 +265,17 @@ CONFIG_PREEMPT_VOLUNTARY=y
+ # CONFIG_PREEMPT is not set
+ CONFIG_X86_LOCAL_APIC=y
+ CONFIG_X86_IO_APIC=y
+-# CONFIG_X86_MCE is not set
++CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
++CONFIG_X86_MCE=y
++CONFIG_X86_MCE_NONFATAL=y
++CONFIG_X86_MCE_P4THERMAL=y
+ CONFIG_VM86=y
+ # CONFIG_TOSHIBA is not set
+ # CONFIG_I8K is not set
+ CONFIG_X86_REBOOTFIXUPS=y
+ CONFIG_MICROCODE=y
++CONFIG_MICROCODE_INTEL=y
++CONFIG_MICROCODE_AMD=y
+ CONFIG_MICROCODE_OLD_INTERFACE=y
+ CONFIG_X86_MSR=y
+ CONFIG_X86_CPUID=y
+@@ -264,6 +284,7 @@ CONFIG_HIGHMEM4G=y
+ # CONFIG_HIGHMEM64G is not set
+ CONFIG_PAGE_OFFSET=0xC0000000
+ CONFIG_HIGHMEM=y
++# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set
+ CONFIG_ARCH_FLATMEM_ENABLE=y
+ CONFIG_ARCH_SPARSEMEM_ENABLE=y
+ CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+@@ -274,14 +295,17 @@ CONFIG_FLATMEM_MANUAL=y
+ CONFIG_FLATMEM=y
+ CONFIG_FLAT_NODE_MEM_MAP=y
+ CONFIG_SPARSEMEM_STATIC=y
+-# CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set
+ CONFIG_PAGEFLAGS_EXTENDED=y
+ CONFIG_SPLIT_PTLOCK_CPUS=4
+-CONFIG_RESOURCES_64BIT=y
++# CONFIG_PHYS_ADDR_T_64BIT is not set
+ CONFIG_ZONE_DMA_FLAG=1
+ CONFIG_BOUNCE=y
+ CONFIG_VIRT_TO_BUS=y
++CONFIG_UNEVICTABLE_LRU=y
+ CONFIG_HIGHPTE=y
++CONFIG_X86_CHECK_BIOS_CORRUPTION=y
++CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
++CONFIG_X86_RESERVE_LOW_64K=y
+ # CONFIG_MATH_EMULATION is not set
+ CONFIG_MTRR=y
+ # CONFIG_MTRR_SANITIZER is not set
+@@ -302,10 +326,11 @@ CONFIG_PHYSICAL_START=0x1000000
+ CONFIG_PHYSICAL_ALIGN=0x200000
+ CONFIG_HOTPLUG_CPU=y
+ # CONFIG_COMPAT_VDSO is not set
++# CONFIG_CMDLINE_BOOL is not set
+ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+ 
+ #
+-# Power management options
++# Power management and ACPI options
+ #
+ CONFIG_PM=y
+ CONFIG_PM_DEBUG=y
+@@ -331,19 +356,13 @@ CONFIG_ACPI_BATTERY=y
+ CONFIG_ACPI_BUTTON=y
+ CONFIG_ACPI_FAN=y
+ CONFIG_ACPI_DOCK=y
+-# CONFIG_ACPI_BAY is not set
+ CONFIG_ACPI_PROCESSOR=y
+ CONFIG_ACPI_HOTPLUG_CPU=y
+ CONFIG_ACPI_THERMAL=y
+-# CONFIG_ACPI_WMI is not set
+-# CONFIG_ACPI_ASUS is not set
+-# CONFIG_ACPI_TOSHIBA is not set
+ # CONFIG_ACPI_CUSTOM_DSDT is not set
+ CONFIG_ACPI_BLACKLIST_YEAR=0
+ # CONFIG_ACPI_DEBUG is not set
+-CONFIG_ACPI_EC=y
+ # CONFIG_ACPI_PCI_SLOT is not set
+-CONFIG_ACPI_POWER=y
+ CONFIG_ACPI_SYSTEM=y
+ CONFIG_X86_PM_TIMER=y
+ CONFIG_ACPI_CONTAINER=y
+@@ -388,7 +407,6 @@ CONFIG_X86_ACPI_CPUFREQ=y
+ #
+ # shared options
+ #
+-# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
+ # CONFIG_X86_SPEEDSTEP_LIB is not set
+ CONFIG_CPU_IDLE=y
+ CONFIG_CPU_IDLE_GOV_LADDER=y
+@@ -415,6 +433,7 @@ CONFIG_ARCH_SUPPORTS_MSI=y
+ CONFIG_PCI_MSI=y
+ # CONFIG_PCI_LEGACY is not set
+ # CONFIG_PCI_DEBUG is not set
++# CONFIG_PCI_STUB is not set
+ CONFIG_HT_IRQ=y
+ CONFIG_ISA_DMA_API=y
+ # CONFIG_ISA is not set
+@@ -452,13 +471,17 @@ CONFIG_HOTPLUG_PCI=y
+ # Executable file formats / Emulations
+ #
+ CONFIG_BINFMT_ELF=y
++CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
++CONFIG_HAVE_AOUT=y
+ # CONFIG_BINFMT_AOUT is not set
+ CONFIG_BINFMT_MISC=y
++CONFIG_HAVE_ATOMIC_IOMAP=y
+ CONFIG_NET=y
+ 
+ #
+ # Networking options
+ #
++CONFIG_COMPAT_NET_DEV_OPS=y
+ CONFIG_PACKET=y
+ CONFIG_PACKET_MMAP=y
+ CONFIG_UNIX=y
+@@ -519,7 +542,6 @@ CONFIG_DEFAULT_CUBIC=y
+ # CONFIG_DEFAULT_RENO is not set
+ CONFIG_DEFAULT_TCP_CONG="cubic"
+ CONFIG_TCP_MD5SIG=y
+-# CONFIG_IP_VS is not set
+ CONFIG_IPV6=y
+ # CONFIG_IPV6_PRIVACY is not set
+ # CONFIG_IPV6_ROUTER_PREF is not set
+@@ -557,19 +579,21 @@ CONFIG_NF_CONNTRACK_IRC=y
+ CONFIG_NF_CONNTRACK_SIP=y
+ CONFIG_NF_CT_NETLINK=y
+ CONFIG_NETFILTER_XTABLES=y
++CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+ CONFIG_NETFILTER_XT_TARGET_MARK=y
+ CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+ CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+ CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+ CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+ CONFIG_NETFILTER_XT_MATCH_MARK=y
+ CONFIG_NETFILTER_XT_MATCH_POLICY=y
+ CONFIG_NETFILTER_XT_MATCH_STATE=y
++# CONFIG_IP_VS is not set
+ 
+ #
+ # IP: Netfilter Configuration
+ #
++CONFIG_NF_DEFRAG_IPV4=y
+ CONFIG_NF_CONNTRACK_IPV4=y
+ CONFIG_NF_CONNTRACK_PROC_COMPAT=y
+ CONFIG_IP_NF_IPTABLES=y
+@@ -595,8 +619,8 @@ CONFIG_IP_NF_MANGLE=y
+ CONFIG_NF_CONNTRACK_IPV6=y
+ CONFIG_IP6_NF_IPTABLES=y
+ CONFIG_IP6_NF_MATCH_IPV6HEADER=y
+-CONFIG_IP6_NF_FILTER=y
+ CONFIG_IP6_NF_TARGET_LOG=y
++CONFIG_IP6_NF_FILTER=y
+ CONFIG_IP6_NF_TARGET_REJECT=y
+ CONFIG_IP6_NF_MANGLE=y
+ # CONFIG_IP_DCCP is not set
+@@ -604,6 +628,7 @@ CONFIG_IP6_NF_MANGLE=y
+ # CONFIG_TIPC is not set
+ # CONFIG_ATM is not set
+ # CONFIG_BRIDGE is not set
++# CONFIG_NET_DSA is not set
+ # CONFIG_VLAN_8021Q is not set
+ # CONFIG_DECNET is not set
+ CONFIG_LLC=y
+@@ -623,6 +648,7 @@ CONFIG_NET_SCHED=y
+ # CONFIG_NET_SCH_HTB is not set
+ # CONFIG_NET_SCH_HFSC is not set
+ # CONFIG_NET_SCH_PRIO is not set
++# CONFIG_NET_SCH_MULTIQ is not set
+ # CONFIG_NET_SCH_RED is not set
+ # CONFIG_NET_SCH_SFQ is not set
+ # CONFIG_NET_SCH_TEQL is not set
+@@ -630,6 +656,7 @@ CONFIG_NET_SCHED=y
+ # CONFIG_NET_SCH_GRED is not set
+ # CONFIG_NET_SCH_DSMARK is not set
+ # CONFIG_NET_SCH_NETEM is not set
++# CONFIG_NET_SCH_DRR is not set
+ # CONFIG_NET_SCH_INGRESS is not set
+ 
+ #
+@@ -644,6 +671,7 @@ CONFIG_NET_CLS=y
+ # CONFIG_NET_CLS_RSVP is not set
+ # CONFIG_NET_CLS_RSVP6 is not set
+ # CONFIG_NET_CLS_FLOW is not set
++# CONFIG_NET_CLS_CGROUP is not set
+ CONFIG_NET_EMATCH=y
+ CONFIG_NET_EMATCH_STACK=32
+ # CONFIG_NET_EMATCH_CMP is not set
+@@ -659,7 +687,9 @@ CONFIG_NET_CLS_ACT=y
+ # CONFIG_NET_ACT_NAT is not set
+ # CONFIG_NET_ACT_PEDIT is not set
+ # CONFIG_NET_ACT_SIMP is not set
++# CONFIG_NET_ACT_SKBEDIT is not set
+ CONFIG_NET_SCH_FIFO=y
++# CONFIG_DCB is not set
+ 
+ #
+ # Network testing
+@@ -676,29 +706,33 @@ CONFIG_HAMRADIO=y
+ # CONFIG_IRDA is not set
+ # CONFIG_BT is not set
+ # CONFIG_AF_RXRPC is not set
++# CONFIG_PHONET is not set
+ CONFIG_FIB_RULES=y
+-
+-#
+-# Wireless
+-#
++CONFIG_WIRELESS=y
+ CONFIG_CFG80211=y
++# CONFIG_CFG80211_REG_DEBUG is not set
+ CONFIG_NL80211=y
++CONFIG_WIRELESS_OLD_REGULATORY=y
+ CONFIG_WIRELESS_EXT=y
+ CONFIG_WIRELESS_EXT_SYSFS=y
++# CONFIG_LIB80211 is not set
+ CONFIG_MAC80211=y
+ 
+ #
+ # Rate control algorithm selection
+ #
+-CONFIG_MAC80211_RC_PID=y
+-CONFIG_MAC80211_RC_DEFAULT_PID=y
+-CONFIG_MAC80211_RC_DEFAULT="pid"
++CONFIG_MAC80211_RC_MINSTREL=y
++# CONFIG_MAC80211_RC_DEFAULT_PID is not set
++CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
++CONFIG_MAC80211_RC_DEFAULT="minstrel"
+ # CONFIG_MAC80211_MESH is not set
+ CONFIG_MAC80211_LEDS=y
+ # CONFIG_MAC80211_DEBUGFS is not set
+ # CONFIG_MAC80211_DEBUG_MENU is not set
+-# CONFIG_IEEE80211 is not set
+-# CONFIG_RFKILL is not set
++# CONFIG_WIMAX is not set
++CONFIG_RFKILL=y
++# CONFIG_RFKILL_INPUT is not set
++CONFIG_RFKILL_LEDS=y
+ # CONFIG_NET_9P is not set
+ 
+ #
+@@ -722,7 +756,7 @@ CONFIG_PROC_EVENTS=y
+ # CONFIG_MTD is not set
+ # CONFIG_PARPORT is not set
+ CONFIG_PNP=y
+-# CONFIG_PNP_DEBUG is not set
++CONFIG_PNP_DEBUG_MESSAGES=y
+ 
+ #
+ # Protocols
+@@ -750,20 +784,19 @@ CONFIG_BLK_DEV_RAM_SIZE=16384
+ CONFIG_MISC_DEVICES=y
+ # CONFIG_IBM_ASM is not set
+ # CONFIG_PHANTOM is not set
+-# CONFIG_EEPROM_93CX6 is not set
+ # CONFIG_SGI_IOC4 is not set
+ # CONFIG_TIFM_CORE is not set
+-# CONFIG_ACER_WMI is not set
+-# CONFIG_ASUS_LAPTOP is not set
+-# CONFIG_FUJITSU_LAPTOP is not set
+-# CONFIG_TC1100_WMI is not set
+-# CONFIG_MSI_LAPTOP is not set
+-# CONFIG_COMPAL_LAPTOP is not set
+-# CONFIG_SONY_LAPTOP is not set
+-# CONFIG_THINKPAD_ACPI is not set
+-# CONFIG_INTEL_MENLOW is not set
++# CONFIG_ICS932S401 is not set
+ # CONFIG_ENCLOSURE_SERVICES is not set
+ # CONFIG_HP_ILO is not set
++# CONFIG_C2PORT is not set
++
++#
++# EEPROM support
++#
++# CONFIG_EEPROM_AT24 is not set
++# CONFIG_EEPROM_LEGACY is not set
++# CONFIG_EEPROM_93CX6 is not set
+ CONFIG_HAVE_IDE=y
+ # CONFIG_IDE is not set
+ 
+@@ -802,7 +835,7 @@ CONFIG_SCSI_WAIT_SCAN=m
+ #
+ CONFIG_SCSI_SPI_ATTRS=y
+ # CONFIG_SCSI_FC_ATTRS is not set
+-CONFIG_SCSI_ISCSI_ATTRS=y
++# CONFIG_SCSI_ISCSI_ATTRS is not set
+ # CONFIG_SCSI_SAS_ATTRS is not set
+ # CONFIG_SCSI_SAS_LIBSAS is not set
+ # CONFIG_SCSI_SRP_ATTRS is not set
+@@ -875,6 +908,7 @@ CONFIG_PATA_OLDPIIX=y
+ CONFIG_PATA_SCH=y
+ CONFIG_MD=y
+ CONFIG_BLK_DEV_MD=y
++CONFIG_MD_AUTODETECT=y
+ # CONFIG_MD_LINEAR is not set
+ # CONFIG_MD_RAID0 is not set
+ # CONFIG_MD_RAID1 is not set
+@@ -930,6 +964,9 @@ CONFIG_PHYLIB=y
+ # CONFIG_BROADCOM_PHY is not set
+ # CONFIG_ICPLUS_PHY is not set
+ # CONFIG_REALTEK_PHY is not set
++# CONFIG_NATIONAL_PHY is not set
++# CONFIG_STE10XP is not set
++# CONFIG_LSI_ET1011C_PHY is not set
+ # CONFIG_FIXED_PHY is not set
+ # CONFIG_MDIO_BITBANG is not set
+ CONFIG_NET_ETHERNET=y
+@@ -953,6 +990,9 @@ CONFIG_NET_TULIP=y
+ # CONFIG_IBM_NEW_EMAC_RGMII is not set
+ # CONFIG_IBM_NEW_EMAC_TAH is not set
+ # CONFIG_IBM_NEW_EMAC_EMAC4 is not set
++# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
++# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
++# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+ CONFIG_NET_PCI=y
+ # CONFIG_PCNET32 is not set
+ # CONFIG_AMD8111_ETH is not set
+@@ -960,7 +1000,6 @@ CONFIG_NET_PCI=y
+ # CONFIG_B44 is not set
+ CONFIG_FORCEDETH=y
+ # CONFIG_FORCEDETH_NAPI is not set
+-# CONFIG_EEPRO100 is not set
+ CONFIG_E100=y
+ # CONFIG_FEALNX is not set
+ # CONFIG_NATSEMI is not set
+@@ -974,15 +1013,16 @@ CONFIG_8139TOO=y
+ # CONFIG_R6040 is not set
+ # CONFIG_SIS900 is not set
+ # CONFIG_EPIC100 is not set
++# CONFIG_SMSC9420 is not set
+ # CONFIG_SUNDANCE is not set
+ # CONFIG_TLAN is not set
+ # CONFIG_VIA_RHINE is not set
+ # CONFIG_SC92031 is not set
++# CONFIG_ATL2 is not set
+ CONFIG_NETDEV_1000=y
+ # CONFIG_ACENIC is not set
+ # CONFIG_DL2K is not set
+ CONFIG_E1000=y
+-# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
+ CONFIG_E1000E=y
+ # CONFIG_IP1000 is not set
+ # CONFIG_IGB is not set
+@@ -1000,18 +1040,23 @@ CONFIG_BNX2=y
+ # CONFIG_QLA3XXX is not set
+ # CONFIG_ATL1 is not set
+ # CONFIG_ATL1E is not set
++# CONFIG_JME is not set
+ CONFIG_NETDEV_10000=y
+ # CONFIG_CHELSIO_T1 is not set
++CONFIG_CHELSIO_T3_DEPENDS=y
+ # CONFIG_CHELSIO_T3 is not set
++# CONFIG_ENIC is not set
+ # CONFIG_IXGBE is not set
+ # CONFIG_IXGB is not set
+ # CONFIG_S2IO is not set
+ # CONFIG_MYRI10GE is not set
+ # CONFIG_NETXEN_NIC is not set
+ # CONFIG_NIU is not set
++# CONFIG_MLX4_EN is not set
+ # CONFIG_MLX4_CORE is not set
+ # CONFIG_TEHUTI is not set
+ # CONFIG_BNX2X is not set
++# CONFIG_QLGE is not set
+ # CONFIG_SFC is not set
+ CONFIG_TR=y
+ # CONFIG_IBMOL is not set
+@@ -1025,9 +1070,8 @@ CONFIG_TR=y
+ # CONFIG_WLAN_PRE80211 is not set
+ CONFIG_WLAN_80211=y
+ # CONFIG_PCMCIA_RAYCS is not set
+-# CONFIG_IPW2100 is not set
+-# CONFIG_IPW2200 is not set
+ # CONFIG_LIBERTAS is not set
++# CONFIG_LIBERTAS_THINFIRM is not set
+ # CONFIG_AIRO is not set
+ # CONFIG_HERMES is not set
+ # CONFIG_ATMEL is not set
+@@ -1044,6 +1088,8 @@ CONFIG_WLAN_80211=y
+ CONFIG_ATH5K=y
+ # CONFIG_ATH5K_DEBUG is not set
+ # CONFIG_ATH9K is not set
++# CONFIG_IPW2100 is not set
++# CONFIG_IPW2200 is not set
+ # CONFIG_IWLCORE is not set
+ # CONFIG_IWLWIFI_LEDS is not set
+ # CONFIG_IWLAGN is not set
+@@ -1055,6 +1101,10 @@ CONFIG_ATH5K=y
+ # CONFIG_RT2X00 is not set
+ 
+ #
++# Enable WiMAX (Networking options) to see the WiMAX drivers
++#
++
++#
+ # USB Network Adapters
+ #
+ # CONFIG_USB_CATC is not set
+@@ -1062,6 +1112,7 @@ CONFIG_ATH5K=y
+ # CONFIG_USB_PEGASUS is not set
+ # CONFIG_USB_RTL8150 is not set
+ # CONFIG_USB_USBNET is not set
++# CONFIG_USB_HSO is not set
+ CONFIG_NET_PCMCIA=y
+ # CONFIG_PCMCIA_3C589 is not set
+ # CONFIG_PCMCIA_3C574 is not set
+@@ -1123,6 +1174,7 @@ CONFIG_MOUSE_PS2_LOGIPS2PP=y
+ CONFIG_MOUSE_PS2_SYNAPTICS=y
+ CONFIG_MOUSE_PS2_LIFEBOOK=y
+ CONFIG_MOUSE_PS2_TRACKPOINT=y
++# CONFIG_MOUSE_PS2_ELANTECH is not set
+ # CONFIG_MOUSE_PS2_TOUCHKIT is not set
+ # CONFIG_MOUSE_SERIAL is not set
+ # CONFIG_MOUSE_APPLETOUCH is not set
+@@ -1160,15 +1212,16 @@ CONFIG_INPUT_TOUCHSCREEN=y
+ # CONFIG_TOUCHSCREEN_FUJITSU is not set
+ # CONFIG_TOUCHSCREEN_GUNZE is not set
+ # CONFIG_TOUCHSCREEN_ELO is not set
++# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
+ # CONFIG_TOUCHSCREEN_MTOUCH is not set
+ # CONFIG_TOUCHSCREEN_INEXIO is not set
+ # CONFIG_TOUCHSCREEN_MK712 is not set
+ # CONFIG_TOUCHSCREEN_PENMOUNT is not set
+ # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
+ # CONFIG_TOUCHSCREEN_TOUCHWIN is not set
+-# CONFIG_TOUCHSCREEN_UCB1400 is not set
+ # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
+ # CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
++# CONFIG_TOUCHSCREEN_TSC2007 is not set
+ CONFIG_INPUT_MISC=y
+ # CONFIG_INPUT_PCSPKR is not set
+ # CONFIG_INPUT_APANEL is not set
+@@ -1179,6 +1232,7 @@ CONFIG_INPUT_MISC=y
+ # CONFIG_INPUT_KEYSPAN_REMOTE is not set
+ # CONFIG_INPUT_POWERMATE is not set
+ # CONFIG_INPUT_YEALINK is not set
++# CONFIG_INPUT_CM109 is not set
+ # CONFIG_INPUT_UINPUT is not set
+ 
+ #
+@@ -1245,6 +1299,7 @@ CONFIG_SERIAL_CORE=y
+ CONFIG_SERIAL_CORE_CONSOLE=y
+ # CONFIG_SERIAL_JSM is not set
+ CONFIG_UNIX98_PTYS=y
++# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+ # CONFIG_LEGACY_PTYS is not set
+ # CONFIG_IPMI_HANDLER is not set
+ CONFIG_HW_RANDOM=y
+@@ -1279,6 +1334,7 @@ CONFIG_I2C=y
+ CONFIG_I2C_BOARDINFO=y
+ # CONFIG_I2C_CHARDEV is not set
+ CONFIG_I2C_HELPER_AUTO=y
++CONFIG_I2C_ALGOBIT=y
+ 
+ #
+ # I2C Hardware Bus support
+@@ -1331,8 +1387,6 @@ CONFIG_I2C_I801=y
+ # Miscellaneous I2C Chip support
+ #
+ # CONFIG_DS1682 is not set
+-# CONFIG_EEPROM_AT24 is not set
+-# CONFIG_EEPROM_LEGACY is not set
+ # CONFIG_SENSORS_PCF8574 is not set
+ # CONFIG_PCF8575 is not set
+ # CONFIG_SENSORS_PCA9539 is not set
+@@ -1351,8 +1405,78 @@ CONFIG_POWER_SUPPLY=y
+ # CONFIG_POWER_SUPPLY_DEBUG is not set
+ # CONFIG_PDA_POWER is not set
+ # CONFIG_BATTERY_DS2760 is not set
+-# CONFIG_HWMON is not set
++# CONFIG_BATTERY_BQ27x00 is not set
++CONFIG_HWMON=y
++# CONFIG_HWMON_VID is not set
++# CONFIG_SENSORS_ABITUGURU is not set
++# CONFIG_SENSORS_ABITUGURU3 is not set
++# CONFIG_SENSORS_AD7414 is not set
++# CONFIG_SENSORS_AD7418 is not set
++# CONFIG_SENSORS_ADM1021 is not set
++# CONFIG_SENSORS_ADM1025 is not set
++# CONFIG_SENSORS_ADM1026 is not set
++# CONFIG_SENSORS_ADM1029 is not set
++# CONFIG_SENSORS_ADM1031 is not set
++# CONFIG_SENSORS_ADM9240 is not set
++# CONFIG_SENSORS_ADT7462 is not set
++# CONFIG_SENSORS_ADT7470 is not set
++# CONFIG_SENSORS_ADT7473 is not set
++# CONFIG_SENSORS_ADT7475 is not set
++# CONFIG_SENSORS_K8TEMP is not set
++# CONFIG_SENSORS_ASB100 is not set
++# CONFIG_SENSORS_ATXP1 is not set
++# CONFIG_SENSORS_DS1621 is not set
++# CONFIG_SENSORS_I5K_AMB is not set
++# CONFIG_SENSORS_F71805F is not set
++# CONFIG_SENSORS_F71882FG is not set
++# CONFIG_SENSORS_F75375S is not set
++# CONFIG_SENSORS_FSCHER is not set
++# CONFIG_SENSORS_FSCPOS is not set
++# CONFIG_SENSORS_FSCHMD is not set
++# CONFIG_SENSORS_GL518SM is not set
++# CONFIG_SENSORS_GL520SM is not set
++# CONFIG_SENSORS_CORETEMP is not set
++# CONFIG_SENSORS_IT87 is not set
++# CONFIG_SENSORS_LM63 is not set
++# CONFIG_SENSORS_LM75 is not set
++# CONFIG_SENSORS_LM77 is not set
++# CONFIG_SENSORS_LM78 is not set
++# CONFIG_SENSORS_LM80 is not set
++# CONFIG_SENSORS_LM83 is not set
++# CONFIG_SENSORS_LM85 is not set
++# CONFIG_SENSORS_LM87 is not set
++# CONFIG_SENSORS_LM90 is not set
++# CONFIG_SENSORS_LM92 is not set
++# CONFIG_SENSORS_LM93 is not set
++# CONFIG_SENSORS_LTC4245 is not set
++# CONFIG_SENSORS_MAX1619 is not set
++# CONFIG_SENSORS_MAX6650 is not set
++# CONFIG_SENSORS_PC87360 is not set
++# CONFIG_SENSORS_PC87427 is not set
++# CONFIG_SENSORS_SIS5595 is not set
++# CONFIG_SENSORS_DME1737 is not set
++# CONFIG_SENSORS_SMSC47M1 is not set
++# CONFIG_SENSORS_SMSC47M192 is not set
++# CONFIG_SENSORS_SMSC47B397 is not set
++# CONFIG_SENSORS_ADS7828 is not set
++# CONFIG_SENSORS_THMC50 is not set
++# CONFIG_SENSORS_VIA686A is not set
++# CONFIG_SENSORS_VT1211 is not set
++# CONFIG_SENSORS_VT8231 is not set
++# CONFIG_SENSORS_W83781D is not set
++# CONFIG_SENSORS_W83791D is not set
++# CONFIG_SENSORS_W83792D is not set
++# CONFIG_SENSORS_W83793 is not set
++# CONFIG_SENSORS_W83L785TS is not set
++# CONFIG_SENSORS_W83L786NG is not set
++# CONFIG_SENSORS_W83627HF is not set
++# CONFIG_SENSORS_W83627EHF is not set
++# CONFIG_SENSORS_HDAPS is not set
++# CONFIG_SENSORS_LIS3LV02D is not set
++# CONFIG_SENSORS_APPLESMC is not set
++# CONFIG_HWMON_DEBUG_CHIP is not set
+ CONFIG_THERMAL=y
++# CONFIG_THERMAL_HWMON is not set
+ CONFIG_WATCHDOG=y
+ # CONFIG_WATCHDOG_NOWAYOUT is not set
+ 
+@@ -1372,6 +1496,7 @@ CONFIG_WATCHDOG=y
+ # CONFIG_I6300ESB_WDT is not set
+ # CONFIG_ITCO_WDT is not set
+ # CONFIG_IT8712F_WDT is not set
++# CONFIG_IT87_WDT is not set
+ # CONFIG_HP_WATCHDOG is not set
+ # CONFIG_SC1200_WDT is not set
+ # CONFIG_PC87413_WDT is not set
+@@ -1379,9 +1504,11 @@ CONFIG_WATCHDOG=y
+ # CONFIG_SBC8360_WDT is not set
+ # CONFIG_SBC7240_WDT is not set
+ # CONFIG_CPU5_WDT is not set
++# CONFIG_SMSC_SCH311X_WDT is not set
+ # CONFIG_SMSC37B787_WDT is not set
+ # CONFIG_W83627HF_WDT is not set
+ # CONFIG_W83697HF_WDT is not set
++# CONFIG_W83697UG_WDT is not set
+ # CONFIG_W83877F_WDT is not set
+ # CONFIG_W83977F_WDT is not set
+ # CONFIG_MACHZ_WDT is not set
+@@ -1397,11 +1524,11 @@ CONFIG_WATCHDOG=y
+ # USB-based Watchdog Cards
+ #
+ # CONFIG_USBPCWATCHDOG is not set
++CONFIG_SSB_POSSIBLE=y
+ 
+ #
+ # Sonics Silicon Backplane
+ #
+-CONFIG_SSB_POSSIBLE=y
+ # CONFIG_SSB is not set
+ 
+ #
+@@ -1410,7 +1537,13 @@ CONFIG_SSB_POSSIBLE=y
+ # CONFIG_MFD_CORE is not set
+ # CONFIG_MFD_SM501 is not set
+ # CONFIG_HTC_PASIC3 is not set
++# CONFIG_TWL4030_CORE is not set
+ # CONFIG_MFD_TMIO is not set
++# CONFIG_PMIC_DA903X is not set
++# CONFIG_MFD_WM8400 is not set
++# CONFIG_MFD_WM8350_I2C is not set
++# CONFIG_MFD_PCF50633 is not set
++# CONFIG_REGULATOR is not set
+ 
+ #
+ # Multimedia devices
+@@ -1450,6 +1583,7 @@ CONFIG_DRM=y
+ # CONFIG_DRM_I810 is not set
+ # CONFIG_DRM_I830 is not set
+ CONFIG_DRM_I915=y
++# CONFIG_DRM_I915_KMS is not set
+ # CONFIG_DRM_MGA is not set
+ # CONFIG_DRM_SIS is not set
+ # CONFIG_DRM_VIA is not set
+@@ -1459,6 +1593,7 @@ CONFIG_DRM_I915=y
+ CONFIG_FB=y
+ # CONFIG_FIRMWARE_EDID is not set
+ # CONFIG_FB_DDC is not set
++# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+ CONFIG_FB_CFB_FILLRECT=y
+ CONFIG_FB_CFB_COPYAREA=y
+ CONFIG_FB_CFB_IMAGEBLIT=y
+@@ -1487,7 +1622,6 @@ CONFIG_FB_TILEBLITTING=y
+ # CONFIG_FB_UVESA is not set
+ # CONFIG_FB_VESA is not set
+ CONFIG_FB_EFI=y
+-# CONFIG_FB_IMAC is not set
+ # CONFIG_FB_N411 is not set
+ # CONFIG_FB_HGA is not set
+ # CONFIG_FB_S1D13XXX is not set
+@@ -1503,6 +1637,7 @@ CONFIG_FB_EFI=y
+ # CONFIG_FB_S3 is not set
+ # CONFIG_FB_SAVAGE is not set
+ # CONFIG_FB_SIS is not set
++# CONFIG_FB_VIA is not set
+ # CONFIG_FB_NEOMAGIC is not set
+ # CONFIG_FB_KYRO is not set
+ # CONFIG_FB_3DFX is not set
+@@ -1515,12 +1650,15 @@ CONFIG_FB_EFI=y
+ # CONFIG_FB_CARMINE is not set
+ # CONFIG_FB_GEODE is not set
+ # CONFIG_FB_VIRTUAL is not set
++# CONFIG_FB_METRONOME is not set
++# CONFIG_FB_MB862XX is not set
+ CONFIG_BACKLIGHT_LCD_SUPPORT=y
+ # CONFIG_LCD_CLASS_DEVICE is not set
+ CONFIG_BACKLIGHT_CLASS_DEVICE=y
+-# CONFIG_BACKLIGHT_CORGI is not set
++CONFIG_BACKLIGHT_GENERIC=y
+ # CONFIG_BACKLIGHT_PROGEAR is not set
+ # CONFIG_BACKLIGHT_MBP_NVIDIA is not set
++# CONFIG_BACKLIGHT_SAHARA is not set
+ 
+ #
+ # Display device support
+@@ -1540,10 +1678,12 @@ CONFIG_LOGO=y
+ # CONFIG_LOGO_LINUX_VGA16 is not set
+ CONFIG_LOGO_LINUX_CLUT224=y
+ CONFIG_SOUND=y
++CONFIG_SOUND_OSS_CORE=y
+ CONFIG_SND=y
+ CONFIG_SND_TIMER=y
+ CONFIG_SND_PCM=y
+ CONFIG_SND_HWDEP=y
++CONFIG_SND_JACK=y
+ CONFIG_SND_SEQUENCER=y
+ CONFIG_SND_SEQ_DUMMY=y
+ CONFIG_SND_OSSEMUL=y
+@@ -1551,6 +1691,8 @@ CONFIG_SND_MIXER_OSS=y
+ CONFIG_SND_PCM_OSS=y
+ CONFIG_SND_PCM_OSS_PLUGINS=y
+ CONFIG_SND_SEQUENCER_OSS=y
++CONFIG_SND_HRTIMER=y
++CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
+ CONFIG_SND_DYNAMIC_MINORS=y
+ CONFIG_SND_SUPPORT_OLD_API=y
+ CONFIG_SND_VERBOSE_PROCFS=y
+@@ -1605,11 +1747,16 @@ CONFIG_SND_PCI=y
+ # CONFIG_SND_FM801 is not set
+ CONFIG_SND_HDA_INTEL=y
+ CONFIG_SND_HDA_HWDEP=y
++# CONFIG_SND_HDA_RECONFIG is not set
++# CONFIG_SND_HDA_INPUT_BEEP is not set
+ CONFIG_SND_HDA_CODEC_REALTEK=y
+ CONFIG_SND_HDA_CODEC_ANALOG=y
+ CONFIG_SND_HDA_CODEC_SIGMATEL=y
+ CONFIG_SND_HDA_CODEC_VIA=y
+ CONFIG_SND_HDA_CODEC_ATIHDMI=y
++CONFIG_SND_HDA_CODEC_NVHDMI=y
++CONFIG_SND_HDA_CODEC_INTELHDMI=y
++CONFIG_SND_HDA_ELD=y
+ CONFIG_SND_HDA_CODEC_CONEXANT=y
+ CONFIG_SND_HDA_CODEC_CMEDIA=y
+ CONFIG_SND_HDA_CODEC_SI3054=y
+@@ -1643,6 +1790,7 @@ CONFIG_SND_USB=y
+ # CONFIG_SND_USB_AUDIO is not set
+ # CONFIG_SND_USB_USX2Y is not set
+ # CONFIG_SND_USB_CAIAQ is not set
++# CONFIG_SND_USB_US122L is not set
+ CONFIG_SND_PCMCIA=y
+ # CONFIG_SND_VXPOCKET is not set
+ # CONFIG_SND_PDAUDIOCF is not set
+@@ -1657,15 +1805,37 @@ CONFIG_HIDRAW=y
+ # USB Input Devices
+ #
+ CONFIG_USB_HID=y
+-CONFIG_USB_HIDINPUT_POWERBOOK=y
+-CONFIG_HID_FF=y
+ CONFIG_HID_PID=y
++CONFIG_USB_HIDDEV=y
++
++#
++# Special HID drivers
++#
++CONFIG_HID_COMPAT=y
++CONFIG_HID_A4TECH=y
++CONFIG_HID_APPLE=y
++CONFIG_HID_BELKIN=y
++CONFIG_HID_CHERRY=y
++CONFIG_HID_CHICONY=y
++CONFIG_HID_CYPRESS=y
++CONFIG_HID_EZKEY=y
++CONFIG_HID_GYRATION=y
++CONFIG_HID_LOGITECH=y
+ CONFIG_LOGITECH_FF=y
+ # CONFIG_LOGIRUMBLEPAD2_FF is not set
++CONFIG_HID_MICROSOFT=y
++CONFIG_HID_MONTEREY=y
++CONFIG_HID_NTRIG=y
++CONFIG_HID_PANTHERLORD=y
+ CONFIG_PANTHERLORD_FF=y
++CONFIG_HID_PETALYNX=y
++CONFIG_HID_SAMSUNG=y
++CONFIG_HID_SONY=y
++CONFIG_HID_SUNPLUS=y
++# CONFIG_GREENASIA_FF is not set
++CONFIG_HID_TOPSEED=y
+ CONFIG_THRUSTMASTER_FF=y
+ CONFIG_ZEROPLUS_FF=y
+-CONFIG_USB_HIDDEV=y
+ CONFIG_USB_SUPPORT=y
+ CONFIG_USB_ARCH_HAS_HCD=y
+ CONFIG_USB_ARCH_HAS_OHCI=y
+@@ -1683,6 +1853,8 @@ CONFIG_USB_DEVICEFS=y
+ CONFIG_USB_SUSPEND=y
+ # CONFIG_USB_OTG is not set
+ CONFIG_USB_MON=y
++# CONFIG_USB_WUSB is not set
++# CONFIG_USB_WUSB_CBAF is not set
+ 
+ #
+ # USB Host Controller Drivers
+@@ -1691,6 +1863,7 @@ CONFIG_USB_MON=y
+ CONFIG_USB_EHCI_HCD=y
+ # CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+ # CONFIG_USB_EHCI_TT_NEWSCHED is not set
++# CONFIG_USB_OXU210HP_HCD is not set
+ # CONFIG_USB_ISP116X_HCD is not set
+ # CONFIG_USB_ISP1760_HCD is not set
+ CONFIG_USB_OHCI_HCD=y
+@@ -1700,6 +1873,8 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+ CONFIG_USB_UHCI_HCD=y
+ # CONFIG_USB_SL811_HCD is not set
+ # CONFIG_USB_R8A66597_HCD is not set
++# CONFIG_USB_WHCI_HCD is not set
++# CONFIG_USB_HWA_HCD is not set
+ 
+ #
+ # USB Device Class drivers
+@@ -1707,20 +1882,20 @@ CONFIG_USB_UHCI_HCD=y
+ # CONFIG_USB_ACM is not set
+ CONFIG_USB_PRINTER=y
+ # CONFIG_USB_WDM is not set
++# CONFIG_USB_TMC is not set
+ 
+ #
+-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
++# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+ #
+ 
+ #
+-# may also be needed; see USB_STORAGE Help for more information
++# see USB_STORAGE Help for more information
+ #
+ CONFIG_USB_STORAGE=y
+ # CONFIG_USB_STORAGE_DEBUG is not set
+ # CONFIG_USB_STORAGE_DATAFAB is not set
+ # CONFIG_USB_STORAGE_FREECOM is not set
+ # CONFIG_USB_STORAGE_ISD200 is not set
+-# CONFIG_USB_STORAGE_DPCM is not set
+ # CONFIG_USB_STORAGE_USBAT is not set
+ # CONFIG_USB_STORAGE_SDDR09 is not set
+ # CONFIG_USB_STORAGE_SDDR55 is not set
+@@ -1728,7 +1903,6 @@ CONFIG_USB_STORAGE=y
+ # CONFIG_USB_STORAGE_ALAUDA is not set
+ # CONFIG_USB_STORAGE_ONETOUCH is not set
+ # CONFIG_USB_STORAGE_KARMA is not set
+-# CONFIG_USB_STORAGE_SIERRA is not set
+ # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
+ CONFIG_USB_LIBUSUAL=y
+ 
+@@ -1749,6 +1923,7 @@ CONFIG_USB_LIBUSUAL=y
+ # CONFIG_USB_EMI62 is not set
+ # CONFIG_USB_EMI26 is not set
+ # CONFIG_USB_ADUTUX is not set
++# CONFIG_USB_SEVSEG is not set
+ # CONFIG_USB_RIO500 is not set
+ # CONFIG_USB_LEGOTOWER is not set
+ # CONFIG_USB_LCD is not set
+@@ -1766,7 +1941,13 @@ CONFIG_USB_LIBUSUAL=y
+ # CONFIG_USB_IOWARRIOR is not set
+ # CONFIG_USB_TEST is not set
+ # CONFIG_USB_ISIGHTFW is not set
++# CONFIG_USB_VST is not set
+ # CONFIG_USB_GADGET is not set
++
++#
++# OTG and related infrastructure
++#
++# CONFIG_UWB is not set
+ # CONFIG_MMC is not set
+ # CONFIG_MEMSTICK is not set
+ CONFIG_NEW_LEDS=y
+@@ -1775,6 +1956,7 @@ CONFIG_LEDS_CLASS=y
+ #
+ # LED drivers
+ #
++# CONFIG_LEDS_ALIX2 is not set
+ # CONFIG_LEDS_PCA9532 is not set
+ # CONFIG_LEDS_CLEVO_MAIL is not set
+ # CONFIG_LEDS_PCA955X is not set
+@@ -1785,6 +1967,7 @@ CONFIG_LEDS_CLASS=y
+ CONFIG_LEDS_TRIGGERS=y
+ # CONFIG_LEDS_TRIGGER_TIMER is not set
+ # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
++# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
+ # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+ # CONFIG_ACCESSIBILITY is not set
+ # CONFIG_INFINIBAND is not set
+@@ -1824,6 +2007,7 @@ CONFIG_RTC_INTF_DEV=y
+ # CONFIG_RTC_DRV_M41T80 is not set
+ # CONFIG_RTC_DRV_S35390A is not set
+ # CONFIG_RTC_DRV_FM3130 is not set
++# CONFIG_RTC_DRV_RX8581 is not set
+ 
+ #
+ # SPI RTC drivers
+@@ -1833,12 +2017,15 @@ CONFIG_RTC_INTF_DEV=y
+ # Platform RTC drivers
+ #
+ CONFIG_RTC_DRV_CMOS=y
++# CONFIG_RTC_DRV_DS1286 is not set
+ # CONFIG_RTC_DRV_DS1511 is not set
+ # CONFIG_RTC_DRV_DS1553 is not set
+ # CONFIG_RTC_DRV_DS1742 is not set
+ # CONFIG_RTC_DRV_STK17TA8 is not set
+ # CONFIG_RTC_DRV_M48T86 is not set
++# CONFIG_RTC_DRV_M48T35 is not set
+ # CONFIG_RTC_DRV_M48T59 is not set
++# CONFIG_RTC_DRV_BQ4802 is not set
+ # CONFIG_RTC_DRV_V3020 is not set
+ 
+ #
+@@ -1851,6 +2038,22 @@ CONFIG_DMADEVICES=y
+ #
+ # CONFIG_INTEL_IOATDMA is not set
+ # CONFIG_UIO is not set
++# CONFIG_STAGING is not set
++CONFIG_X86_PLATFORM_DEVICES=y
++# CONFIG_ACER_WMI is not set
++# CONFIG_ASUS_LAPTOP is not set
++# CONFIG_FUJITSU_LAPTOP is not set
++# CONFIG_TC1100_WMI is not set
++# CONFIG_MSI_LAPTOP is not set
++# CONFIG_PANASONIC_LAPTOP is not set
++# CONFIG_COMPAL_LAPTOP is not set
++# CONFIG_SONY_LAPTOP is not set
++# CONFIG_THINKPAD_ACPI is not set
++# CONFIG_INTEL_MENLOW is not set
++CONFIG_EEEPC_LAPTOP=y
++# CONFIG_ACPI_WMI is not set
++# CONFIG_ACPI_ASUS is not set
++# CONFIG_ACPI_TOSHIBA is not set
+ 
+ #
+ # Firmware Drivers
+@@ -1861,8 +2064,7 @@ CONFIG_EFI_VARS=y
+ # CONFIG_DELL_RBU is not set
+ # CONFIG_DCDBAS is not set
+ CONFIG_DMIID=y
+-CONFIG_ISCSI_IBFT_FIND=y
+-CONFIG_ISCSI_IBFT=y
++# CONFIG_ISCSI_IBFT_FIND is not set
+ 
+ #
+ # File systems
+@@ -1872,21 +2074,24 @@ CONFIG_EXT3_FS=y
+ CONFIG_EXT3_FS_XATTR=y
+ CONFIG_EXT3_FS_POSIX_ACL=y
+ CONFIG_EXT3_FS_SECURITY=y
+-# CONFIG_EXT4DEV_FS is not set
++# CONFIG_EXT4_FS is not set
+ CONFIG_JBD=y
+ # CONFIG_JBD_DEBUG is not set
+ CONFIG_FS_MBCACHE=y
+ # CONFIG_REISERFS_FS is not set
+ # CONFIG_JFS_FS is not set
+ CONFIG_FS_POSIX_ACL=y
++CONFIG_FILE_LOCKING=y
+ # CONFIG_XFS_FS is not set
+ # CONFIG_OCFS2_FS is not set
++# CONFIG_BTRFS_FS is not set
+ CONFIG_DNOTIFY=y
+ CONFIG_INOTIFY=y
+ CONFIG_INOTIFY_USER=y
+ CONFIG_QUOTA=y
+ CONFIG_QUOTA_NETLINK_INTERFACE=y
+ # CONFIG_PRINT_QUOTA_WARNING is not set
++CONFIG_QUOTA_TREE=y
+ # CONFIG_QFMT_V1 is not set
+ CONFIG_QFMT_V2=y
+ CONFIG_QUOTACTL=y
+@@ -1920,16 +2125,14 @@ CONFIG_PROC_FS=y
+ CONFIG_PROC_KCORE=y
+ CONFIG_PROC_VMCORE=y
+ CONFIG_PROC_SYSCTL=y
++CONFIG_PROC_PAGE_MONITOR=y
+ CONFIG_SYSFS=y
+ CONFIG_TMPFS=y
+ CONFIG_TMPFS_POSIX_ACL=y
+ CONFIG_HUGETLBFS=y
+ CONFIG_HUGETLB_PAGE=y
+ # CONFIG_CONFIGFS_FS is not set
+-
+-#
+-# Miscellaneous filesystems
+-#
++CONFIG_MISC_FILESYSTEMS=y
+ # CONFIG_ADFS_FS is not set
+ # CONFIG_AFFS_FS is not set
+ # CONFIG_ECRYPT_FS is not set
+@@ -1939,6 +2142,7 @@ CONFIG_HUGETLB_PAGE=y
+ # CONFIG_BFS_FS is not set
+ # CONFIG_EFS_FS is not set
+ # CONFIG_CRAMFS is not set
++# CONFIG_SQUASHFS is not set
+ # CONFIG_VXFS_FS is not set
+ # CONFIG_MINIX_FS is not set
+ # CONFIG_OMFS_FS is not set
+@@ -1960,6 +2164,7 @@ CONFIG_NFS_ACL_SUPPORT=y
+ CONFIG_NFS_COMMON=y
+ CONFIG_SUNRPC=y
+ CONFIG_SUNRPC_GSS=y
++# CONFIG_SUNRPC_REGISTER_V4 is not set
+ CONFIG_RPCSEC_GSS_KRB5=y
+ # CONFIG_RPCSEC_GSS_SPKM3 is not set
+ # CONFIG_SMB_FS is not set
+@@ -2036,7 +2241,7 @@ CONFIG_NLS_UTF8=y
+ #
+ CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+ CONFIG_PRINTK_TIME=y
+-CONFIG_ENABLE_WARN_DEPRECATED=y
++# CONFIG_ENABLE_WARN_DEPRECATED is not set
+ CONFIG_ENABLE_MUST_CHECK=y
+ CONFIG_FRAME_WARN=2048
+ CONFIG_MAGIC_SYSRQ=y
+@@ -2066,33 +2271,54 @@ CONFIG_TIMER_STATS=y
+ CONFIG_DEBUG_BUGVERBOSE=y
+ # CONFIG_DEBUG_INFO is not set
+ # CONFIG_DEBUG_VM is not set
++# CONFIG_DEBUG_VIRTUAL is not set
+ # CONFIG_DEBUG_WRITECOUNT is not set
+ CONFIG_DEBUG_MEMORY_INIT=y
+ # CONFIG_DEBUG_LIST is not set
+ # CONFIG_DEBUG_SG is not set
++# CONFIG_DEBUG_NOTIFIERS is not set
++CONFIG_ARCH_WANT_FRAME_POINTERS=y
+ CONFIG_FRAME_POINTER=y
+ # CONFIG_BOOT_PRINTK_DELAY is not set
+ # CONFIG_RCU_TORTURE_TEST is not set
++# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+ # CONFIG_KPROBES_SANITY_TEST is not set
+ # CONFIG_BACKTRACE_SELF_TEST is not set
++# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+ # CONFIG_LKDTM is not set
+ # CONFIG_FAULT_INJECTION is not set
+ # CONFIG_LATENCYTOP is not set
+ CONFIG_SYSCTL_SYSCALL_CHECK=y
+-CONFIG_HAVE_FTRACE=y
++CONFIG_USER_STACKTRACE_SUPPORT=y
++CONFIG_HAVE_FUNCTION_TRACER=y
++CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
++CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
+ CONFIG_HAVE_DYNAMIC_FTRACE=y
+-# CONFIG_FTRACE is not set
++CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
++CONFIG_HAVE_HW_BRANCH_TRACER=y
++
++#
++# Tracers
++#
++# CONFIG_FUNCTION_TRACER is not set
+ # CONFIG_IRQSOFF_TRACER is not set
+ # CONFIG_SYSPROF_TRACER is not set
+ # CONFIG_SCHED_TRACER is not set
+ # CONFIG_CONTEXT_SWITCH_TRACER is not set
++# CONFIG_BOOT_TRACER is not set
++# CONFIG_TRACE_BRANCH_PROFILING is not set
++# CONFIG_POWER_TRACER is not set
++# CONFIG_STACK_TRACER is not set
++# CONFIG_HW_BRANCH_TRACER is not set
+ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
++# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+ # CONFIG_SAMPLES is not set
+ CONFIG_HAVE_ARCH_KGDB=y
+ # CONFIG_KGDB is not set
+ # CONFIG_STRICT_DEVMEM is not set
+ CONFIG_X86_VERBOSE_BOOTUP=y
+ CONFIG_EARLY_PRINTK=y
++CONFIG_EARLY_PRINTK_DBGP=y
+ CONFIG_DEBUG_STACKOVERFLOW=y
+ CONFIG_DEBUG_STACK_USAGE=y
+ # CONFIG_DEBUG_PAGEALLOC is not set
+@@ -2123,8 +2349,10 @@ CONFIG_OPTIMIZE_INLINING=y
+ CONFIG_KEYS=y
+ CONFIG_KEYS_DEBUG_PROC_KEYS=y
+ CONFIG_SECURITY=y
++# CONFIG_SECURITYFS is not set
+ CONFIG_SECURITY_NETWORK=y
+ # CONFIG_SECURITY_NETWORK_XFRM is not set
++# CONFIG_SECURITY_PATH is not set
+ CONFIG_SECURITY_FILE_CAPABILITIES=y
+ # CONFIG_SECURITY_ROOTPLUG is not set
+ CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536
+@@ -2135,7 +2363,6 @@ CONFIG_SECURITY_SELINUX_DISABLE=y
+ CONFIG_SECURITY_SELINUX_DEVELOP=y
+ CONFIG_SECURITY_SELINUX_AVC_STATS=y
+ CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+-# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set
+ # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
+ # CONFIG_SECURITY_SMACK is not set
+ CONFIG_CRYPTO=y
+@@ -2143,11 +2370,18 @@ CONFIG_CRYPTO=y
+ #
+ # Crypto core or helper
+ #
++# CONFIG_CRYPTO_FIPS is not set
+ CONFIG_CRYPTO_ALGAPI=y
++CONFIG_CRYPTO_ALGAPI2=y
+ CONFIG_CRYPTO_AEAD=y
++CONFIG_CRYPTO_AEAD2=y
+ CONFIG_CRYPTO_BLKCIPHER=y
++CONFIG_CRYPTO_BLKCIPHER2=y
+ CONFIG_CRYPTO_HASH=y
++CONFIG_CRYPTO_HASH2=y
++CONFIG_CRYPTO_RNG2=y
+ CONFIG_CRYPTO_MANAGER=y
++CONFIG_CRYPTO_MANAGER2=y
+ # CONFIG_CRYPTO_GF128MUL is not set
+ # CONFIG_CRYPTO_NULL is not set
+ # CONFIG_CRYPTO_CRYPTD is not set
+@@ -2182,6 +2416,7 @@ CONFIG_CRYPTO_HMAC=y
+ # Digest
+ #
+ # CONFIG_CRYPTO_CRC32C is not set
++# CONFIG_CRYPTO_CRC32C_INTEL is not set
+ # CONFIG_CRYPTO_MD4 is not set
+ CONFIG_CRYPTO_MD5=y
+ # CONFIG_CRYPTO_MICHAEL_MIC is not set
+@@ -2222,6 +2457,11 @@ CONFIG_CRYPTO_DES=y
+ #
+ # CONFIG_CRYPTO_DEFLATE is not set
+ # CONFIG_CRYPTO_LZO is not set
++
++#
++# Random Number Generation
++#
++# CONFIG_CRYPTO_ANSI_CPRNG is not set
+ CONFIG_CRYPTO_HW=y
+ # CONFIG_CRYPTO_DEV_PADLOCK is not set
+ # CONFIG_CRYPTO_DEV_GEODE is not set
+@@ -2239,6 +2479,7 @@ CONFIG_VIRTUALIZATION=y
+ CONFIG_BITREVERSE=y
+ CONFIG_GENERIC_FIND_FIRST_BIT=y
+ CONFIG_GENERIC_FIND_NEXT_BIT=y
++CONFIG_GENERIC_FIND_LAST_BIT=y
+ # CONFIG_CRC_CCITT is not set
+ # CONFIG_CRC16 is not set
+ CONFIG_CRC_T10DIF=y
+Index: linux-2.6-tip/arch/x86/configs/x86_64_defconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/configs/x86_64_defconfig
++++ linux-2.6-tip/arch/x86/configs/x86_64_defconfig
+@@ -1,14 +1,13 @@
+ #
+ # Automatically generated make config: don't edit
+-# Linux kernel version: 2.6.27-rc5
+-# Wed Sep  3 17:13:39 2008
++# Linux kernel version: 2.6.29-rc4
++# Tue Feb 24 15:44:16 2009
+ #
+ CONFIG_64BIT=y
+ # CONFIG_X86_32 is not set
+ CONFIG_X86_64=y
+ CONFIG_X86=y
+ CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
+-# CONFIG_GENERIC_LOCKBREAK is not set
+ CONFIG_GENERIC_TIME=y
+ CONFIG_GENERIC_CMOS_UPDATE=y
+ CONFIG_CLOCKSOURCE_WATCHDOG=y
+@@ -23,17 +22,16 @@ CONFIG_ZONE_DMA=y
+ CONFIG_GENERIC_ISA_DMA=y
+ CONFIG_GENERIC_IOMAP=y
+ CONFIG_GENERIC_BUG=y
++CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
+ CONFIG_GENERIC_HWEIGHT=y
+-# CONFIG_GENERIC_GPIO is not set
+ CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+ CONFIG_RWSEM_GENERIC_SPINLOCK=y
+ # CONFIG_RWSEM_XCHGADD_ALGORITHM is not set
+-# CONFIG_ARCH_HAS_ILOG2_U32 is not set
+-# CONFIG_ARCH_HAS_ILOG2_U64 is not set
+ CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
+ CONFIG_GENERIC_CALIBRATE_DELAY=y
+ CONFIG_GENERIC_TIME_VSYSCALL=y
+ CONFIG_ARCH_HAS_CPU_RELAX=y
++CONFIG_ARCH_HAS_DEFAULT_IDLE=y
+ CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
+ CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+ CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
+@@ -42,12 +40,12 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y
+ CONFIG_ZONE_DMA32=y
+ CONFIG_ARCH_POPULATES_NODE_MAP=y
+ CONFIG_AUDIT_ARCH=y
+-CONFIG_ARCH_SUPPORTS_AOUT=y
+ CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+ CONFIG_GENERIC_HARDIRQS=y
+ CONFIG_GENERIC_IRQ_PROBE=y
+ CONFIG_GENERIC_PENDING_IRQ=y
+ CONFIG_X86_SMP=y
++CONFIG_USE_GENERIC_SMP_HELPERS=y
+ CONFIG_X86_64_SMP=y
+ CONFIG_X86_HT=y
+ CONFIG_X86_BIOS_REBOOT=y
+@@ -76,30 +74,44 @@ CONFIG_TASK_IO_ACCOUNTING=y
+ CONFIG_AUDIT=y
+ CONFIG_AUDITSYSCALL=y
+ CONFIG_AUDIT_TREE=y
++
++#
++# RCU Subsystem
++#
++# CONFIG_CLASSIC_RCU is not set
++CONFIG_TREE_RCU=y
++# CONFIG_PREEMPT_RCU is not set
++# CONFIG_RCU_TRACE is not set
++CONFIG_RCU_FANOUT=64
++# CONFIG_RCU_FANOUT_EXACT is not set
++# CONFIG_TREE_RCU_TRACE is not set
++# CONFIG_PREEMPT_RCU_TRACE is not set
+ # CONFIG_IKCONFIG is not set
+ CONFIG_LOG_BUF_SHIFT=18
+-CONFIG_CGROUPS=y
+-# CONFIG_CGROUP_DEBUG is not set
+-CONFIG_CGROUP_NS=y
+-# CONFIG_CGROUP_DEVICE is not set
+-CONFIG_CPUSETS=y
+ CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
+ CONFIG_GROUP_SCHED=y
+ CONFIG_FAIR_GROUP_SCHED=y
+ # CONFIG_RT_GROUP_SCHED is not set
+ # CONFIG_USER_SCHED is not set
+ CONFIG_CGROUP_SCHED=y
++CONFIG_CGROUPS=y
++# CONFIG_CGROUP_DEBUG is not set
++CONFIG_CGROUP_NS=y
++CONFIG_CGROUP_FREEZER=y
++# CONFIG_CGROUP_DEVICE is not set
++CONFIG_CPUSETS=y
++CONFIG_PROC_PID_CPUSET=y
+ CONFIG_CGROUP_CPUACCT=y
+ CONFIG_RESOURCE_COUNTERS=y
+ # CONFIG_CGROUP_MEM_RES_CTLR is not set
+ # CONFIG_SYSFS_DEPRECATED_V2 is not set
+-CONFIG_PROC_PID_CPUSET=y
+ CONFIG_RELAY=y
+ CONFIG_NAMESPACES=y
+ CONFIG_UTS_NS=y
+ CONFIG_IPC_NS=y
+ CONFIG_USER_NS=y
+ CONFIG_PID_NS=y
++CONFIG_NET_NS=y
+ CONFIG_BLK_DEV_INITRD=y
+ CONFIG_INITRAMFS_SOURCE=""
+ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+@@ -124,12 +136,15 @@ CONFIG_SIGNALFD=y
+ CONFIG_TIMERFD=y
+ CONFIG_EVENTFD=y
+ CONFIG_SHMEM=y
++CONFIG_AIO=y
+ CONFIG_VM_EVENT_COUNTERS=y
++CONFIG_PCI_QUIRKS=y
+ CONFIG_SLUB_DEBUG=y
+ # CONFIG_SLAB is not set
+ CONFIG_SLUB=y
+ # CONFIG_SLOB is not set
+ CONFIG_PROFILING=y
++CONFIG_TRACEPOINTS=y
+ CONFIG_MARKERS=y
+ # CONFIG_OPROFILE is not set
+ CONFIG_HAVE_OPROFILE=y
+@@ -139,15 +154,10 @@ CONFIG_KRETPROBES=y
+ CONFIG_HAVE_IOREMAP_PROT=y
+ CONFIG_HAVE_KPROBES=y
+ CONFIG_HAVE_KRETPROBES=y
+-# CONFIG_HAVE_ARCH_TRACEHOOK is not set
+-# CONFIG_HAVE_DMA_ATTRS is not set
+-CONFIG_USE_GENERIC_SMP_HELPERS=y
+-# CONFIG_HAVE_CLK is not set
+-CONFIG_PROC_PAGE_MONITOR=y
++CONFIG_HAVE_ARCH_TRACEHOOK=y
+ # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+ CONFIG_SLABINFO=y
+ CONFIG_RT_MUTEXES=y
+-# CONFIG_TINY_SHMEM is not set
+ CONFIG_BASE_SMALL=0
+ CONFIG_MODULES=y
+ # CONFIG_MODULE_FORCE_LOAD is not set
+@@ -155,7 +165,6 @@ CONFIG_MODULE_UNLOAD=y
+ CONFIG_MODULE_FORCE_UNLOAD=y
+ # CONFIG_MODVERSIONS is not set
+ # CONFIG_MODULE_SRCVERSION_ALL is not set
+-CONFIG_KMOD=y
+ CONFIG_STOP_MACHINE=y
+ CONFIG_BLOCK=y
+ CONFIG_BLK_DEV_IO_TRACE=y
+@@ -175,7 +184,7 @@ CONFIG_IOSCHED_CFQ=y
+ CONFIG_DEFAULT_CFQ=y
+ # CONFIG_DEFAULT_NOOP is not set
+ CONFIG_DEFAULT_IOSCHED="cfq"
+-CONFIG_CLASSIC_RCU=y
++CONFIG_FREEZER=y
+ 
+ #
+ # Processor type and features
+@@ -185,13 +194,14 @@ CONFIG_NO_HZ=y
+ CONFIG_HIGH_RES_TIMERS=y
+ CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+ CONFIG_SMP=y
++CONFIG_SPARSE_IRQ=y
++# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
+ CONFIG_X86_FIND_SMP_CONFIG=y
+ CONFIG_X86_MPPARSE=y
+-CONFIG_X86_PC=y
+ # CONFIG_X86_ELAN is not set
+-# CONFIG_X86_VOYAGER is not set
+ # CONFIG_X86_GENERICARCH is not set
+ # CONFIG_X86_VSMP is not set
++CONFIG_SCHED_OMIT_FRAME_POINTER=y
+ # CONFIG_PARAVIRT_GUEST is not set
+ # CONFIG_MEMTEST is not set
+ # CONFIG_M386 is not set
+@@ -230,6 +240,11 @@ CONFIG_X86_CMPXCHG64=y
+ CONFIG_X86_CMOV=y
+ CONFIG_X86_MINIMUM_CPU_FAMILY=64
+ CONFIG_X86_DEBUGCTLMSR=y
++CONFIG_CPU_SUP_INTEL=y
++CONFIG_CPU_SUP_AMD=y
++CONFIG_CPU_SUP_CENTAUR_64=y
++CONFIG_X86_DS=y
++CONFIG_X86_PTRACE_BTS=y
+ CONFIG_HPET_TIMER=y
+ CONFIG_HPET_EMULATE_RTC=y
+ CONFIG_DMI=y
+@@ -237,8 +252,11 @@ CONFIG_GART_IOMMU=y
+ CONFIG_CALGARY_IOMMU=y
+ CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
+ CONFIG_AMD_IOMMU=y
++CONFIG_AMD_IOMMU_STATS=y
+ CONFIG_SWIOTLB=y
+ CONFIG_IOMMU_HELPER=y
++CONFIG_IOMMU_API=y
++# CONFIG_MAXSMP is not set
+ CONFIG_NR_CPUS=64
+ CONFIG_SCHED_SMT=y
+ CONFIG_SCHED_MC=y
+@@ -247,12 +265,19 @@ CONFIG_PREEMPT_VOLUNTARY=y
+ # CONFIG_PREEMPT is not set
+ CONFIG_X86_LOCAL_APIC=y
+ CONFIG_X86_IO_APIC=y
+-# CONFIG_X86_MCE is not set
++CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
++CONFIG_X86_MCE=y
++CONFIG_X86_MCE_INTEL=y
++CONFIG_X86_MCE_AMD=y
+ # CONFIG_I8K is not set
+ CONFIG_MICROCODE=y
++CONFIG_MICROCODE_INTEL=y
++CONFIG_MICROCODE_AMD=y
+ CONFIG_MICROCODE_OLD_INTERFACE=y
+ CONFIG_X86_MSR=y
+ CONFIG_X86_CPUID=y
++CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
++CONFIG_DIRECT_GBPAGES=y
+ CONFIG_NUMA=y
+ CONFIG_K8_NUMA=y
+ CONFIG_X86_64_ACPI_NUMA=y
+@@ -269,7 +294,6 @@ CONFIG_SPARSEMEM_MANUAL=y
+ CONFIG_SPARSEMEM=y
+ CONFIG_NEED_MULTIPLE_NODES=y
+ CONFIG_HAVE_MEMORY_PRESENT=y
+-# CONFIG_SPARSEMEM_STATIC is not set
+ CONFIG_SPARSEMEM_EXTREME=y
+ CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
+ CONFIG_SPARSEMEM_VMEMMAP=y
+@@ -280,10 +304,14 @@ CONFIG_SPARSEMEM_VMEMMAP=y
+ CONFIG_PAGEFLAGS_EXTENDED=y
+ CONFIG_SPLIT_PTLOCK_CPUS=4
+ CONFIG_MIGRATION=y
+-CONFIG_RESOURCES_64BIT=y
++CONFIG_PHYS_ADDR_T_64BIT=y
+ CONFIG_ZONE_DMA_FLAG=1
+ CONFIG_BOUNCE=y
+ CONFIG_VIRT_TO_BUS=y
++CONFIG_UNEVICTABLE_LRU=y
++CONFIG_X86_CHECK_BIOS_CORRUPTION=y
++CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
++CONFIG_X86_RESERVE_LOW_64K=y
+ CONFIG_MTRR=y
+ # CONFIG_MTRR_SANITIZER is not set
+ CONFIG_X86_PAT=y
+@@ -302,11 +330,12 @@ CONFIG_PHYSICAL_START=0x1000000
+ CONFIG_PHYSICAL_ALIGN=0x200000
+ CONFIG_HOTPLUG_CPU=y
+ # CONFIG_COMPAT_VDSO is not set
++# CONFIG_CMDLINE_BOOL is not set
+ CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+ CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
+ 
+ #
+-# Power management options
++# Power management and ACPI options
+ #
+ CONFIG_ARCH_HIBERNATION_HEADER=y
+ CONFIG_PM=y
+@@ -333,20 +362,14 @@ CONFIG_ACPI_BATTERY=y
+ CONFIG_ACPI_BUTTON=y
+ CONFIG_ACPI_FAN=y
+ CONFIG_ACPI_DOCK=y
+-# CONFIG_ACPI_BAY is not set
+ CONFIG_ACPI_PROCESSOR=y
+ CONFIG_ACPI_HOTPLUG_CPU=y
+ CONFIG_ACPI_THERMAL=y
+ CONFIG_ACPI_NUMA=y
+-# CONFIG_ACPI_WMI is not set
+-# CONFIG_ACPI_ASUS is not set
+-# CONFIG_ACPI_TOSHIBA is not set
+ # CONFIG_ACPI_CUSTOM_DSDT is not set
+ CONFIG_ACPI_BLACKLIST_YEAR=0
+ # CONFIG_ACPI_DEBUG is not set
+-CONFIG_ACPI_EC=y
+ # CONFIG_ACPI_PCI_SLOT is not set
+-CONFIG_ACPI_POWER=y
+ CONFIG_ACPI_SYSTEM=y
+ CONFIG_X86_PM_TIMER=y
+ CONFIG_ACPI_CONTAINER=y
+@@ -381,13 +404,17 @@ CONFIG_X86_ACPI_CPUFREQ=y
+ #
+ # shared options
+ #
+-# CONFIG_X86_ACPI_CPUFREQ_PROC_INTF is not set
+ # CONFIG_X86_SPEEDSTEP_LIB is not set
+ CONFIG_CPU_IDLE=y
+ CONFIG_CPU_IDLE_GOV_LADDER=y
+ CONFIG_CPU_IDLE_GOV_MENU=y
+ 
+ #
++# Memory power savings
++#
++# CONFIG_I7300_IDLE is not set
++
++#
+ # Bus options (PCI etc.)
+ #
+ CONFIG_PCI=y
+@@ -395,8 +422,10 @@ CONFIG_PCI_DIRECT=y
+ CONFIG_PCI_MMCONFIG=y
+ CONFIG_PCI_DOMAINS=y
+ CONFIG_DMAR=y
++# CONFIG_DMAR_DEFAULT_ON is not set
+ CONFIG_DMAR_GFX_WA=y
+ CONFIG_DMAR_FLOPPY_WA=y
++# CONFIG_INTR_REMAP is not set
+ CONFIG_PCIEPORTBUS=y
+ # CONFIG_HOTPLUG_PCI_PCIE is not set
+ CONFIG_PCIEAER=y
+@@ -405,6 +434,7 @@ CONFIG_ARCH_SUPPORTS_MSI=y
+ CONFIG_PCI_MSI=y
+ # CONFIG_PCI_LEGACY is not set
+ # CONFIG_PCI_DEBUG is not set
++# CONFIG_PCI_STUB is not set
+ CONFIG_HT_IRQ=y
+ CONFIG_ISA_DMA_API=y
+ CONFIG_K8_NB=y
+@@ -438,6 +468,8 @@ CONFIG_HOTPLUG_PCI=y
+ #
+ CONFIG_BINFMT_ELF=y
+ CONFIG_COMPAT_BINFMT_ELF=y
++CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
++# CONFIG_HAVE_AOUT is not set
+ CONFIG_BINFMT_MISC=y
+ CONFIG_IA32_EMULATION=y
+ # CONFIG_IA32_AOUT is not set
+@@ -449,6 +481,7 @@ CONFIG_NET=y
+ #
+ # Networking options
+ #
++CONFIG_COMPAT_NET_DEV_OPS=y
+ CONFIG_PACKET=y
+ CONFIG_PACKET_MMAP=y
+ CONFIG_UNIX=y
+@@ -509,7 +542,6 @@ CONFIG_DEFAULT_CUBIC=y
+ # CONFIG_DEFAULT_RENO is not set
+ CONFIG_DEFAULT_TCP_CONG="cubic"
+ CONFIG_TCP_MD5SIG=y
+-# CONFIG_IP_VS is not set
+ CONFIG_IPV6=y
+ # CONFIG_IPV6_PRIVACY is not set
+ # CONFIG_IPV6_ROUTER_PREF is not set
+@@ -547,19 +579,21 @@ CONFIG_NF_CONNTRACK_IRC=y
+ CONFIG_NF_CONNTRACK_SIP=y
+ CONFIG_NF_CT_NETLINK=y
+ CONFIG_NETFILTER_XTABLES=y
++CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+ CONFIG_NETFILTER_XT_TARGET_MARK=y
+ CONFIG_NETFILTER_XT_TARGET_NFLOG=y
+ CONFIG_NETFILTER_XT_TARGET_SECMARK=y
+-CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=y
+ CONFIG_NETFILTER_XT_TARGET_TCPMSS=y
+ CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+ CONFIG_NETFILTER_XT_MATCH_MARK=y
+ CONFIG_NETFILTER_XT_MATCH_POLICY=y
+ CONFIG_NETFILTER_XT_MATCH_STATE=y
++# CONFIG_IP_VS is not set
+ 
+ #
+ # IP: Netfilter Configuration
+ #
++CONFIG_NF_DEFRAG_IPV4=y
+ CONFIG_NF_CONNTRACK_IPV4=y
+ CONFIG_NF_CONNTRACK_PROC_COMPAT=y
+ CONFIG_IP_NF_IPTABLES=y
+@@ -585,8 +619,8 @@ CONFIG_IP_NF_MANGLE=y
+ CONFIG_NF_CONNTRACK_IPV6=y
+ CONFIG_IP6_NF_IPTABLES=y
+ CONFIG_IP6_NF_MATCH_IPV6HEADER=y
+-CONFIG_IP6_NF_FILTER=y
+ CONFIG_IP6_NF_TARGET_LOG=y
++CONFIG_IP6_NF_FILTER=y
+ CONFIG_IP6_NF_TARGET_REJECT=y
+ CONFIG_IP6_NF_MANGLE=y
+ # CONFIG_IP_DCCP is not set
+@@ -594,6 +628,7 @@ CONFIG_IP6_NF_MANGLE=y
+ # CONFIG_TIPC is not set
+ # CONFIG_ATM is not set
+ # CONFIG_BRIDGE is not set
++# CONFIG_NET_DSA is not set
+ # CONFIG_VLAN_8021Q is not set
+ # CONFIG_DECNET is not set
+ CONFIG_LLC=y
+@@ -613,6 +648,7 @@ CONFIG_NET_SCHED=y
+ # CONFIG_NET_SCH_HTB is not set
+ # CONFIG_NET_SCH_HFSC is not set
+ # CONFIG_NET_SCH_PRIO is not set
++# CONFIG_NET_SCH_MULTIQ is not set
+ # CONFIG_NET_SCH_RED is not set
+ # CONFIG_NET_SCH_SFQ is not set
+ # CONFIG_NET_SCH_TEQL is not set
+@@ -620,6 +656,7 @@ CONFIG_NET_SCHED=y
+ # CONFIG_NET_SCH_GRED is not set
+ # CONFIG_NET_SCH_DSMARK is not set
+ # CONFIG_NET_SCH_NETEM is not set
++# CONFIG_NET_SCH_DRR is not set
+ # CONFIG_NET_SCH_INGRESS is not set
+ 
+ #
+@@ -634,6 +671,7 @@ CONFIG_NET_CLS=y
+ # CONFIG_NET_CLS_RSVP is not set
+ # CONFIG_NET_CLS_RSVP6 is not set
+ # CONFIG_NET_CLS_FLOW is not set
++# CONFIG_NET_CLS_CGROUP is not set
+ CONFIG_NET_EMATCH=y
+ CONFIG_NET_EMATCH_STACK=32
+ # CONFIG_NET_EMATCH_CMP is not set
+@@ -649,7 +687,9 @@ CONFIG_NET_CLS_ACT=y
+ # CONFIG_NET_ACT_NAT is not set
+ # CONFIG_NET_ACT_PEDIT is not set
+ # CONFIG_NET_ACT_SIMP is not set
++# CONFIG_NET_ACT_SKBEDIT is not set
+ CONFIG_NET_SCH_FIFO=y
++# CONFIG_DCB is not set
+ 
+ #
+ # Network testing
+@@ -666,29 +706,33 @@ CONFIG_HAMRADIO=y
+ # CONFIG_IRDA is not set
+ # CONFIG_BT is not set
+ # CONFIG_AF_RXRPC is not set
++# CONFIG_PHONET is not set
+ CONFIG_FIB_RULES=y
+-
+-#
+-# Wireless
+-#
++CONFIG_WIRELESS=y
+ CONFIG_CFG80211=y
++# CONFIG_CFG80211_REG_DEBUG is not set
+ CONFIG_NL80211=y
++CONFIG_WIRELESS_OLD_REGULATORY=y
+ CONFIG_WIRELESS_EXT=y
+ CONFIG_WIRELESS_EXT_SYSFS=y
++# CONFIG_LIB80211 is not set
+ CONFIG_MAC80211=y
+ 
+ #
+ # Rate control algorithm selection
+ #
+-CONFIG_MAC80211_RC_PID=y
+-CONFIG_MAC80211_RC_DEFAULT_PID=y
+-CONFIG_MAC80211_RC_DEFAULT="pid"
++CONFIG_MAC80211_RC_MINSTREL=y
++# CONFIG_MAC80211_RC_DEFAULT_PID is not set
++CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
++CONFIG_MAC80211_RC_DEFAULT="minstrel"
+ # CONFIG_MAC80211_MESH is not set
+ CONFIG_MAC80211_LEDS=y
+ # CONFIG_MAC80211_DEBUGFS is not set
+ # CONFIG_MAC80211_DEBUG_MENU is not set
+-# CONFIG_IEEE80211 is not set
+-# CONFIG_RFKILL is not set
++# CONFIG_WIMAX is not set
++CONFIG_RFKILL=y
++# CONFIG_RFKILL_INPUT is not set
++CONFIG_RFKILL_LEDS=y
+ # CONFIG_NET_9P is not set
+ 
+ #
+@@ -712,7 +756,7 @@ CONFIG_PROC_EVENTS=y
+ # CONFIG_MTD is not set
+ # CONFIG_PARPORT is not set
+ CONFIG_PNP=y
+-# CONFIG_PNP_DEBUG is not set
++CONFIG_PNP_DEBUG_MESSAGES=y
+ 
+ #
+ # Protocols
+@@ -740,21 +784,21 @@ CONFIG_BLK_DEV_RAM_SIZE=16384
+ CONFIG_MISC_DEVICES=y
+ # CONFIG_IBM_ASM is not set
+ # CONFIG_PHANTOM is not set
+-# CONFIG_EEPROM_93CX6 is not set
+ # CONFIG_SGI_IOC4 is not set
+ # CONFIG_TIFM_CORE is not set
+-# CONFIG_ACER_WMI is not set
+-# CONFIG_ASUS_LAPTOP is not set
+-# CONFIG_FUJITSU_LAPTOP is not set
+-# CONFIG_MSI_LAPTOP is not set
+-# CONFIG_COMPAL_LAPTOP is not set
+-# CONFIG_SONY_LAPTOP is not set
+-# CONFIG_THINKPAD_ACPI is not set
+-# CONFIG_INTEL_MENLOW is not set
++# CONFIG_ICS932S401 is not set
+ # CONFIG_ENCLOSURE_SERVICES is not set
+ # CONFIG_SGI_XP is not set
+ # CONFIG_HP_ILO is not set
+ # CONFIG_SGI_GRU is not set
++# CONFIG_C2PORT is not set
++
++#
++# EEPROM support
++#
++# CONFIG_EEPROM_AT24 is not set
++# CONFIG_EEPROM_LEGACY is not set
++# CONFIG_EEPROM_93CX6 is not set
+ CONFIG_HAVE_IDE=y
+ # CONFIG_IDE is not set
+ 
+@@ -793,7 +837,7 @@ CONFIG_SCSI_WAIT_SCAN=m
+ #
+ CONFIG_SCSI_SPI_ATTRS=y
+ # CONFIG_SCSI_FC_ATTRS is not set
+-CONFIG_SCSI_ISCSI_ATTRS=y
++# CONFIG_SCSI_ISCSI_ATTRS is not set
+ # CONFIG_SCSI_SAS_ATTRS is not set
+ # CONFIG_SCSI_SAS_LIBSAS is not set
+ # CONFIG_SCSI_SRP_ATTRS is not set
+@@ -864,6 +908,7 @@ CONFIG_PATA_OLDPIIX=y
+ CONFIG_PATA_SCH=y
+ CONFIG_MD=y
+ CONFIG_BLK_DEV_MD=y
++CONFIG_MD_AUTODETECT=y
+ # CONFIG_MD_LINEAR is not set
+ # CONFIG_MD_RAID0 is not set
+ # CONFIG_MD_RAID1 is not set
+@@ -919,6 +964,9 @@ CONFIG_PHYLIB=y
+ # CONFIG_BROADCOM_PHY is not set
+ # CONFIG_ICPLUS_PHY is not set
+ # CONFIG_REALTEK_PHY is not set
++# CONFIG_NATIONAL_PHY is not set
++# CONFIG_STE10XP is not set
++# CONFIG_LSI_ET1011C_PHY is not set
+ # CONFIG_FIXED_PHY is not set
+ # CONFIG_MDIO_BITBANG is not set
+ CONFIG_NET_ETHERNET=y
+@@ -942,6 +990,9 @@ CONFIG_NET_TULIP=y
+ # CONFIG_IBM_NEW_EMAC_RGMII is not set
+ # CONFIG_IBM_NEW_EMAC_TAH is not set
+ # CONFIG_IBM_NEW_EMAC_EMAC4 is not set
++# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
++# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
++# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+ CONFIG_NET_PCI=y
+ # CONFIG_PCNET32 is not set
+ # CONFIG_AMD8111_ETH is not set
+@@ -949,7 +1000,6 @@ CONFIG_NET_PCI=y
+ # CONFIG_B44 is not set
+ CONFIG_FORCEDETH=y
+ # CONFIG_FORCEDETH_NAPI is not set
+-# CONFIG_EEPRO100 is not set
+ CONFIG_E100=y
+ # CONFIG_FEALNX is not set
+ # CONFIG_NATSEMI is not set
+@@ -963,15 +1013,16 @@ CONFIG_8139TOO_PIO=y
+ # CONFIG_R6040 is not set
+ # CONFIG_SIS900 is not set
+ # CONFIG_EPIC100 is not set
++# CONFIG_SMSC9420 is not set
+ # CONFIG_SUNDANCE is not set
+ # CONFIG_TLAN is not set
+ # CONFIG_VIA_RHINE is not set
+ # CONFIG_SC92031 is not set
++# CONFIG_ATL2 is not set
+ CONFIG_NETDEV_1000=y
+ # CONFIG_ACENIC is not set
+ # CONFIG_DL2K is not set
+ CONFIG_E1000=y
+-# CONFIG_E1000_DISABLE_PACKET_SPLIT is not set
+ # CONFIG_E1000E is not set
+ # CONFIG_IP1000 is not set
+ # CONFIG_IGB is not set
+@@ -989,18 +1040,23 @@ CONFIG_TIGON3=y
+ # CONFIG_QLA3XXX is not set
+ # CONFIG_ATL1 is not set
+ # CONFIG_ATL1E is not set
++# CONFIG_JME is not set
+ CONFIG_NETDEV_10000=y
+ # CONFIG_CHELSIO_T1 is not set
++CONFIG_CHELSIO_T3_DEPENDS=y
+ # CONFIG_CHELSIO_T3 is not set
++# CONFIG_ENIC is not set
+ # CONFIG_IXGBE is not set
+ # CONFIG_IXGB is not set
+ # CONFIG_S2IO is not set
+ # CONFIG_MYRI10GE is not set
+ # CONFIG_NETXEN_NIC is not set
+ # CONFIG_NIU is not set
++# CONFIG_MLX4_EN is not set
+ # CONFIG_MLX4_CORE is not set
+ # CONFIG_TEHUTI is not set
+ # CONFIG_BNX2X is not set
++# CONFIG_QLGE is not set
+ # CONFIG_SFC is not set
+ CONFIG_TR=y
+ # CONFIG_IBMOL is not set
+@@ -1013,9 +1069,8 @@ CONFIG_TR=y
+ # CONFIG_WLAN_PRE80211 is not set
+ CONFIG_WLAN_80211=y
+ # CONFIG_PCMCIA_RAYCS is not set
+-# CONFIG_IPW2100 is not set
+-# CONFIG_IPW2200 is not set
+ # CONFIG_LIBERTAS is not set
++# CONFIG_LIBERTAS_THINFIRM is not set
+ # CONFIG_AIRO is not set
+ # CONFIG_HERMES is not set
+ # CONFIG_ATMEL is not set
+@@ -1032,6 +1087,8 @@ CONFIG_WLAN_80211=y
+ CONFIG_ATH5K=y
+ # CONFIG_ATH5K_DEBUG is not set
+ # CONFIG_ATH9K is not set
++# CONFIG_IPW2100 is not set
++# CONFIG_IPW2200 is not set
+ # CONFIG_IWLCORE is not set
+ # CONFIG_IWLWIFI_LEDS is not set
+ # CONFIG_IWLAGN is not set
+@@ -1043,6 +1100,10 @@ CONFIG_ATH5K=y
+ # CONFIG_RT2X00 is not set
+ 
+ #
++# Enable WiMAX (Networking options) to see the WiMAX drivers
++#
++
++#
+ # USB Network Adapters
+ #
+ # CONFIG_USB_CATC is not set
+@@ -1050,6 +1111,7 @@ CONFIG_ATH5K=y
+ # CONFIG_USB_PEGASUS is not set
+ # CONFIG_USB_RTL8150 is not set
+ # CONFIG_USB_USBNET is not set
++# CONFIG_USB_HSO is not set
+ CONFIG_NET_PCMCIA=y
+ # CONFIG_PCMCIA_3C589 is not set
+ # CONFIG_PCMCIA_3C574 is not set
+@@ -1059,6 +1121,7 @@ CONFIG_NET_PCMCIA=y
+ # CONFIG_PCMCIA_SMC91C92 is not set
+ # CONFIG_PCMCIA_XIRC2PS is not set
+ # CONFIG_PCMCIA_AXNET is not set
++# CONFIG_PCMCIA_IBMTR is not set
+ # CONFIG_WAN is not set
+ CONFIG_FDDI=y
+ # CONFIG_DEFXX is not set
+@@ -1110,6 +1173,7 @@ CONFIG_MOUSE_PS2_LOGIPS2PP=y
+ CONFIG_MOUSE_PS2_SYNAPTICS=y
+ CONFIG_MOUSE_PS2_LIFEBOOK=y
+ CONFIG_MOUSE_PS2_TRACKPOINT=y
++# CONFIG_MOUSE_PS2_ELANTECH is not set
+ # CONFIG_MOUSE_PS2_TOUCHKIT is not set
+ # CONFIG_MOUSE_SERIAL is not set
+ # CONFIG_MOUSE_APPLETOUCH is not set
+@@ -1147,15 +1211,16 @@ CONFIG_INPUT_TOUCHSCREEN=y
+ # CONFIG_TOUCHSCREEN_FUJITSU is not set
+ # CONFIG_TOUCHSCREEN_GUNZE is not set
+ # CONFIG_TOUCHSCREEN_ELO is not set
++# CONFIG_TOUCHSCREEN_WACOM_W8001 is not set
+ # CONFIG_TOUCHSCREEN_MTOUCH is not set
+ # CONFIG_TOUCHSCREEN_INEXIO is not set
+ # CONFIG_TOUCHSCREEN_MK712 is not set
+ # CONFIG_TOUCHSCREEN_PENMOUNT is not set
+ # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set
+ # CONFIG_TOUCHSCREEN_TOUCHWIN is not set
+-# CONFIG_TOUCHSCREEN_UCB1400 is not set
+ # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set
+ # CONFIG_TOUCHSCREEN_TOUCHIT213 is not set
++# CONFIG_TOUCHSCREEN_TSC2007 is not set
+ CONFIG_INPUT_MISC=y
+ # CONFIG_INPUT_PCSPKR is not set
+ # CONFIG_INPUT_APANEL is not set
+@@ -1165,6 +1230,7 @@ CONFIG_INPUT_MISC=y
+ # CONFIG_INPUT_KEYSPAN_REMOTE is not set
+ # CONFIG_INPUT_POWERMATE is not set
+ # CONFIG_INPUT_YEALINK is not set
++# CONFIG_INPUT_CM109 is not set
+ # CONFIG_INPUT_UINPUT is not set
+ 
+ #
+@@ -1231,6 +1297,7 @@ CONFIG_SERIAL_CORE=y
+ CONFIG_SERIAL_CORE_CONSOLE=y
+ # CONFIG_SERIAL_JSM is not set
+ CONFIG_UNIX98_PTYS=y
++# CONFIG_DEVPTS_MULTIPLE_INSTANCES is not set
+ # CONFIG_LEGACY_PTYS is not set
+ # CONFIG_IPMI_HANDLER is not set
+ CONFIG_HW_RANDOM=y
+@@ -1260,6 +1327,7 @@ CONFIG_I2C=y
+ CONFIG_I2C_BOARDINFO=y
+ # CONFIG_I2C_CHARDEV is not set
+ CONFIG_I2C_HELPER_AUTO=y
++CONFIG_I2C_ALGOBIT=y
+ 
+ #
+ # I2C Hardware Bus support
+@@ -1311,8 +1379,6 @@ CONFIG_I2C_I801=y
+ # Miscellaneous I2C Chip support
+ #
+ # CONFIG_DS1682 is not set
+-# CONFIG_EEPROM_AT24 is not set
+-# CONFIG_EEPROM_LEGACY is not set
+ # CONFIG_SENSORS_PCF8574 is not set
+ # CONFIG_PCF8575 is not set
+ # CONFIG_SENSORS_PCA9539 is not set
+@@ -1331,8 +1397,78 @@ CONFIG_POWER_SUPPLY=y
+ # CONFIG_POWER_SUPPLY_DEBUG is not set
+ # CONFIG_PDA_POWER is not set
+ # CONFIG_BATTERY_DS2760 is not set
+-# CONFIG_HWMON is not set
++# CONFIG_BATTERY_BQ27x00 is not set
++CONFIG_HWMON=y
++# CONFIG_HWMON_VID is not set
++# CONFIG_SENSORS_ABITUGURU is not set
++# CONFIG_SENSORS_ABITUGURU3 is not set
++# CONFIG_SENSORS_AD7414 is not set
++# CONFIG_SENSORS_AD7418 is not set
++# CONFIG_SENSORS_ADM1021 is not set
++# CONFIG_SENSORS_ADM1025 is not set
++# CONFIG_SENSORS_ADM1026 is not set
++# CONFIG_SENSORS_ADM1029 is not set
++# CONFIG_SENSORS_ADM1031 is not set
++# CONFIG_SENSORS_ADM9240 is not set
++# CONFIG_SENSORS_ADT7462 is not set
++# CONFIG_SENSORS_ADT7470 is not set
++# CONFIG_SENSORS_ADT7473 is not set
++# CONFIG_SENSORS_ADT7475 is not set
++# CONFIG_SENSORS_K8TEMP is not set
++# CONFIG_SENSORS_ASB100 is not set
++# CONFIG_SENSORS_ATXP1 is not set
++# CONFIG_SENSORS_DS1621 is not set
++# CONFIG_SENSORS_I5K_AMB is not set
++# CONFIG_SENSORS_F71805F is not set
++# CONFIG_SENSORS_F71882FG is not set
++# CONFIG_SENSORS_F75375S is not set
++# CONFIG_SENSORS_FSCHER is not set
++# CONFIG_SENSORS_FSCPOS is not set
++# CONFIG_SENSORS_FSCHMD is not set
++# CONFIG_SENSORS_GL518SM is not set
++# CONFIG_SENSORS_GL520SM is not set
++# CONFIG_SENSORS_CORETEMP is not set
++# CONFIG_SENSORS_IT87 is not set
++# CONFIG_SENSORS_LM63 is not set
++# CONFIG_SENSORS_LM75 is not set
++# CONFIG_SENSORS_LM77 is not set
++# CONFIG_SENSORS_LM78 is not set
++# CONFIG_SENSORS_LM80 is not set
++# CONFIG_SENSORS_LM83 is not set
++# CONFIG_SENSORS_LM85 is not set
++# CONFIG_SENSORS_LM87 is not set
++# CONFIG_SENSORS_LM90 is not set
++# CONFIG_SENSORS_LM92 is not set
++# CONFIG_SENSORS_LM93 is not set
++# CONFIG_SENSORS_LTC4245 is not set
++# CONFIG_SENSORS_MAX1619 is not set
++# CONFIG_SENSORS_MAX6650 is not set
++# CONFIG_SENSORS_PC87360 is not set
++# CONFIG_SENSORS_PC87427 is not set
++# CONFIG_SENSORS_SIS5595 is not set
++# CONFIG_SENSORS_DME1737 is not set
++# CONFIG_SENSORS_SMSC47M1 is not set
++# CONFIG_SENSORS_SMSC47M192 is not set
++# CONFIG_SENSORS_SMSC47B397 is not set
++# CONFIG_SENSORS_ADS7828 is not set
++# CONFIG_SENSORS_THMC50 is not set
++# CONFIG_SENSORS_VIA686A is not set
++# CONFIG_SENSORS_VT1211 is not set
++# CONFIG_SENSORS_VT8231 is not set
++# CONFIG_SENSORS_W83781D is not set
++# CONFIG_SENSORS_W83791D is not set
++# CONFIG_SENSORS_W83792D is not set
++# CONFIG_SENSORS_W83793 is not set
++# CONFIG_SENSORS_W83L785TS is not set
++# CONFIG_SENSORS_W83L786NG is not set
++# CONFIG_SENSORS_W83627HF is not set
++# CONFIG_SENSORS_W83627EHF is not set
++# CONFIG_SENSORS_HDAPS is not set
++# CONFIG_SENSORS_LIS3LV02D is not set
++# CONFIG_SENSORS_APPLESMC is not set
++# CONFIG_HWMON_DEBUG_CHIP is not set
+ CONFIG_THERMAL=y
++# CONFIG_THERMAL_HWMON is not set
+ CONFIG_WATCHDOG=y
+ # CONFIG_WATCHDOG_NOWAYOUT is not set
+ 
+@@ -1352,15 +1488,18 @@ CONFIG_WATCHDOG=y
+ # CONFIG_I6300ESB_WDT is not set
+ # CONFIG_ITCO_WDT is not set
+ # CONFIG_IT8712F_WDT is not set
++# CONFIG_IT87_WDT is not set
+ # CONFIG_HP_WATCHDOG is not set
+ # CONFIG_SC1200_WDT is not set
+ # CONFIG_PC87413_WDT is not set
+ # CONFIG_60XX_WDT is not set
+ # CONFIG_SBC8360_WDT is not set
+ # CONFIG_CPU5_WDT is not set
++# CONFIG_SMSC_SCH311X_WDT is not set
+ # CONFIG_SMSC37B787_WDT is not set
+ # CONFIG_W83627HF_WDT is not set
+ # CONFIG_W83697HF_WDT is not set
++# CONFIG_W83697UG_WDT is not set
+ # CONFIG_W83877F_WDT is not set
+ # CONFIG_W83977F_WDT is not set
+ # CONFIG_MACHZ_WDT is not set
+@@ -1376,11 +1515,11 @@ CONFIG_WATCHDOG=y
+ # USB-based Watchdog Cards
+ #
+ # CONFIG_USBPCWATCHDOG is not set
++CONFIG_SSB_POSSIBLE=y
+ 
+ #
+ # Sonics Silicon Backplane
+ #
+-CONFIG_SSB_POSSIBLE=y
+ # CONFIG_SSB is not set
+ 
+ #
+@@ -1389,7 +1528,13 @@ CONFIG_SSB_POSSIBLE=y
+ # CONFIG_MFD_CORE is not set
+ # CONFIG_MFD_SM501 is not set
+ # CONFIG_HTC_PASIC3 is not set
++# CONFIG_TWL4030_CORE is not set
+ # CONFIG_MFD_TMIO is not set
++# CONFIG_PMIC_DA903X is not set
++# CONFIG_MFD_WM8400 is not set
++# CONFIG_MFD_WM8350_I2C is not set
++# CONFIG_MFD_PCF50633 is not set
++# CONFIG_REGULATOR is not set
+ 
+ #
+ # Multimedia devices
+@@ -1423,6 +1568,7 @@ CONFIG_DRM=y
+ # CONFIG_DRM_I810 is not set
+ # CONFIG_DRM_I830 is not set
+ CONFIG_DRM_I915=y
++CONFIG_DRM_I915_KMS=y
+ # CONFIG_DRM_MGA is not set
+ # CONFIG_DRM_SIS is not set
+ # CONFIG_DRM_VIA is not set
+@@ -1432,6 +1578,7 @@ CONFIG_DRM_I915=y
+ CONFIG_FB=y
+ # CONFIG_FIRMWARE_EDID is not set
+ # CONFIG_FB_DDC is not set
++# CONFIG_FB_BOOT_VESA_SUPPORT is not set
+ CONFIG_FB_CFB_FILLRECT=y
+ CONFIG_FB_CFB_COPYAREA=y
+ CONFIG_FB_CFB_IMAGEBLIT=y
+@@ -1460,7 +1607,6 @@ CONFIG_FB_TILEBLITTING=y
+ # CONFIG_FB_UVESA is not set
+ # CONFIG_FB_VESA is not set
+ CONFIG_FB_EFI=y
+-# CONFIG_FB_IMAC is not set
+ # CONFIG_FB_N411 is not set
+ # CONFIG_FB_HGA is not set
+ # CONFIG_FB_S1D13XXX is not set
+@@ -1475,6 +1621,7 @@ CONFIG_FB_EFI=y
+ # CONFIG_FB_S3 is not set
+ # CONFIG_FB_SAVAGE is not set
+ # CONFIG_FB_SIS is not set
++# CONFIG_FB_VIA is not set
+ # CONFIG_FB_NEOMAGIC is not set
+ # CONFIG_FB_KYRO is not set
+ # CONFIG_FB_3DFX is not set
+@@ -1486,12 +1633,15 @@ CONFIG_FB_EFI=y
+ # CONFIG_FB_CARMINE is not set
+ # CONFIG_FB_GEODE is not set
+ # CONFIG_FB_VIRTUAL is not set
++# CONFIG_FB_METRONOME is not set
++# CONFIG_FB_MB862XX is not set
+ CONFIG_BACKLIGHT_LCD_SUPPORT=y
+ # CONFIG_LCD_CLASS_DEVICE is not set
+ CONFIG_BACKLIGHT_CLASS_DEVICE=y
+-# CONFIG_BACKLIGHT_CORGI is not set
++CONFIG_BACKLIGHT_GENERIC=y
+ # CONFIG_BACKLIGHT_PROGEAR is not set
+ # CONFIG_BACKLIGHT_MBP_NVIDIA is not set
++# CONFIG_BACKLIGHT_SAHARA is not set
+ 
+ #
+ # Display device support
+@@ -1511,10 +1661,12 @@ CONFIG_LOGO=y
+ # CONFIG_LOGO_LINUX_VGA16 is not set
+ CONFIG_LOGO_LINUX_CLUT224=y
+ CONFIG_SOUND=y
++CONFIG_SOUND_OSS_CORE=y
+ CONFIG_SND=y
+ CONFIG_SND_TIMER=y
+ CONFIG_SND_PCM=y
+ CONFIG_SND_HWDEP=y
++CONFIG_SND_JACK=y
+ CONFIG_SND_SEQUENCER=y
+ CONFIG_SND_SEQ_DUMMY=y
+ CONFIG_SND_OSSEMUL=y
+@@ -1522,6 +1674,8 @@ CONFIG_SND_MIXER_OSS=y
+ CONFIG_SND_PCM_OSS=y
+ CONFIG_SND_PCM_OSS_PLUGINS=y
+ CONFIG_SND_SEQUENCER_OSS=y
++CONFIG_SND_HRTIMER=y
++CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
+ CONFIG_SND_DYNAMIC_MINORS=y
+ CONFIG_SND_SUPPORT_OLD_API=y
+ CONFIG_SND_VERBOSE_PROCFS=y
+@@ -1575,11 +1729,16 @@ CONFIG_SND_PCI=y
+ # CONFIG_SND_FM801 is not set
+ CONFIG_SND_HDA_INTEL=y
+ CONFIG_SND_HDA_HWDEP=y
++# CONFIG_SND_HDA_RECONFIG is not set
++# CONFIG_SND_HDA_INPUT_BEEP is not set
+ CONFIG_SND_HDA_CODEC_REALTEK=y
+ CONFIG_SND_HDA_CODEC_ANALOG=y
+ CONFIG_SND_HDA_CODEC_SIGMATEL=y
+ CONFIG_SND_HDA_CODEC_VIA=y
+ CONFIG_SND_HDA_CODEC_ATIHDMI=y
++CONFIG_SND_HDA_CODEC_NVHDMI=y
++CONFIG_SND_HDA_CODEC_INTELHDMI=y
++CONFIG_SND_HDA_ELD=y
+ CONFIG_SND_HDA_CODEC_CONEXANT=y
+ CONFIG_SND_HDA_CODEC_CMEDIA=y
+ CONFIG_SND_HDA_CODEC_SI3054=y
+@@ -1612,6 +1771,7 @@ CONFIG_SND_USB=y
+ # CONFIG_SND_USB_AUDIO is not set
+ # CONFIG_SND_USB_USX2Y is not set
+ # CONFIG_SND_USB_CAIAQ is not set
++# CONFIG_SND_USB_US122L is not set
+ CONFIG_SND_PCMCIA=y
+ # CONFIG_SND_VXPOCKET is not set
+ # CONFIG_SND_PDAUDIOCF is not set
+@@ -1626,15 +1786,37 @@ CONFIG_HIDRAW=y
+ # USB Input Devices
+ #
+ CONFIG_USB_HID=y
+-CONFIG_USB_HIDINPUT_POWERBOOK=y
+-CONFIG_HID_FF=y
+ CONFIG_HID_PID=y
++CONFIG_USB_HIDDEV=y
++
++#
++# Special HID drivers
++#
++CONFIG_HID_COMPAT=y
++CONFIG_HID_A4TECH=y
++CONFIG_HID_APPLE=y
++CONFIG_HID_BELKIN=y
++CONFIG_HID_CHERRY=y
++CONFIG_HID_CHICONY=y
++CONFIG_HID_CYPRESS=y
++CONFIG_HID_EZKEY=y
++CONFIG_HID_GYRATION=y
++CONFIG_HID_LOGITECH=y
+ CONFIG_LOGITECH_FF=y
+ # CONFIG_LOGIRUMBLEPAD2_FF is not set
++CONFIG_HID_MICROSOFT=y
++CONFIG_HID_MONTEREY=y
++CONFIG_HID_NTRIG=y
++CONFIG_HID_PANTHERLORD=y
+ CONFIG_PANTHERLORD_FF=y
++CONFIG_HID_PETALYNX=y
++CONFIG_HID_SAMSUNG=y
++CONFIG_HID_SONY=y
++CONFIG_HID_SUNPLUS=y
++# CONFIG_GREENASIA_FF is not set
++CONFIG_HID_TOPSEED=y
+ CONFIG_THRUSTMASTER_FF=y
+ CONFIG_ZEROPLUS_FF=y
+-CONFIG_USB_HIDDEV=y
+ CONFIG_USB_SUPPORT=y
+ CONFIG_USB_ARCH_HAS_HCD=y
+ CONFIG_USB_ARCH_HAS_OHCI=y
+@@ -1652,6 +1834,8 @@ CONFIG_USB_DEVICEFS=y
+ CONFIG_USB_SUSPEND=y
+ # CONFIG_USB_OTG is not set
+ CONFIG_USB_MON=y
++# CONFIG_USB_WUSB is not set
++# CONFIG_USB_WUSB_CBAF is not set
+ 
+ #
+ # USB Host Controller Drivers
+@@ -1660,6 +1844,7 @@ CONFIG_USB_MON=y
+ CONFIG_USB_EHCI_HCD=y
+ # CONFIG_USB_EHCI_ROOT_HUB_TT is not set
+ # CONFIG_USB_EHCI_TT_NEWSCHED is not set
++# CONFIG_USB_OXU210HP_HCD is not set
+ # CONFIG_USB_ISP116X_HCD is not set
+ # CONFIG_USB_ISP1760_HCD is not set
+ CONFIG_USB_OHCI_HCD=y
+@@ -1669,6 +1854,8 @@ CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+ CONFIG_USB_UHCI_HCD=y
+ # CONFIG_USB_SL811_HCD is not set
+ # CONFIG_USB_R8A66597_HCD is not set
++# CONFIG_USB_WHCI_HCD is not set
++# CONFIG_USB_HWA_HCD is not set
+ 
+ #
+ # USB Device Class drivers
+@@ -1676,20 +1863,20 @@ CONFIG_USB_UHCI_HCD=y
+ # CONFIG_USB_ACM is not set
+ CONFIG_USB_PRINTER=y
+ # CONFIG_USB_WDM is not set
++# CONFIG_USB_TMC is not set
+ 
+ #
+-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
++# NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed;
+ #
+ 
+ #
+-# may also be needed; see USB_STORAGE Help for more information
++# see USB_STORAGE Help for more information
+ #
+ CONFIG_USB_STORAGE=y
+ # CONFIG_USB_STORAGE_DEBUG is not set
+ # CONFIG_USB_STORAGE_DATAFAB is not set
+ # CONFIG_USB_STORAGE_FREECOM is not set
+ # CONFIG_USB_STORAGE_ISD200 is not set
+-# CONFIG_USB_STORAGE_DPCM is not set
+ # CONFIG_USB_STORAGE_USBAT is not set
+ # CONFIG_USB_STORAGE_SDDR09 is not set
+ # CONFIG_USB_STORAGE_SDDR55 is not set
+@@ -1697,7 +1884,6 @@ CONFIG_USB_STORAGE=y
+ # CONFIG_USB_STORAGE_ALAUDA is not set
+ # CONFIG_USB_STORAGE_ONETOUCH is not set
+ # CONFIG_USB_STORAGE_KARMA is not set
+-# CONFIG_USB_STORAGE_SIERRA is not set
+ # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
+ CONFIG_USB_LIBUSUAL=y
+ 
+@@ -1718,6 +1904,7 @@ CONFIG_USB_LIBUSUAL=y
+ # CONFIG_USB_EMI62 is not set
+ # CONFIG_USB_EMI26 is not set
+ # CONFIG_USB_ADUTUX is not set
++# CONFIG_USB_SEVSEG is not set
+ # CONFIG_USB_RIO500 is not set
+ # CONFIG_USB_LEGOTOWER is not set
+ # CONFIG_USB_LCD is not set
+@@ -1735,7 +1922,13 @@ CONFIG_USB_LIBUSUAL=y
+ # CONFIG_USB_IOWARRIOR is not set
+ # CONFIG_USB_TEST is not set
+ # CONFIG_USB_ISIGHTFW is not set
++# CONFIG_USB_VST is not set
+ # CONFIG_USB_GADGET is not set
++
++#
++# OTG and related infrastructure
++#
++# CONFIG_UWB is not set
+ # CONFIG_MMC is not set
+ # CONFIG_MEMSTICK is not set
+ CONFIG_NEW_LEDS=y
+@@ -1744,6 +1937,7 @@ CONFIG_LEDS_CLASS=y
+ #
+ # LED drivers
+ #
++# CONFIG_LEDS_ALIX2 is not set
+ # CONFIG_LEDS_PCA9532 is not set
+ # CONFIG_LEDS_CLEVO_MAIL is not set
+ # CONFIG_LEDS_PCA955X is not set
+@@ -1754,6 +1948,7 @@ CONFIG_LEDS_CLASS=y
+ CONFIG_LEDS_TRIGGERS=y
+ # CONFIG_LEDS_TRIGGER_TIMER is not set
+ # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set
++# CONFIG_LEDS_TRIGGER_BACKLIGHT is not set
+ # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set
+ # CONFIG_ACCESSIBILITY is not set
+ # CONFIG_INFINIBAND is not set
+@@ -1793,6 +1988,7 @@ CONFIG_RTC_INTF_DEV=y
+ # CONFIG_RTC_DRV_M41T80 is not set
+ # CONFIG_RTC_DRV_S35390A is not set
+ # CONFIG_RTC_DRV_FM3130 is not set
++# CONFIG_RTC_DRV_RX8581 is not set
+ 
+ #
+ # SPI RTC drivers
+@@ -1802,12 +1998,15 @@ CONFIG_RTC_INTF_DEV=y
+ # Platform RTC drivers
+ #
+ CONFIG_RTC_DRV_CMOS=y
++# CONFIG_RTC_DRV_DS1286 is not set
+ # CONFIG_RTC_DRV_DS1511 is not set
+ # CONFIG_RTC_DRV_DS1553 is not set
+ # CONFIG_RTC_DRV_DS1742 is not set
+ # CONFIG_RTC_DRV_STK17TA8 is not set
+ # CONFIG_RTC_DRV_M48T86 is not set
++# CONFIG_RTC_DRV_M48T35 is not set
+ # CONFIG_RTC_DRV_M48T59 is not set
++# CONFIG_RTC_DRV_BQ4802 is not set
+ # CONFIG_RTC_DRV_V3020 is not set
+ 
+ #
+@@ -1820,6 +2019,21 @@ CONFIG_DMADEVICES=y
+ #
+ # CONFIG_INTEL_IOATDMA is not set
+ # CONFIG_UIO is not set
++# CONFIG_STAGING is not set
++CONFIG_X86_PLATFORM_DEVICES=y
++# CONFIG_ACER_WMI is not set
++# CONFIG_ASUS_LAPTOP is not set
++# CONFIG_FUJITSU_LAPTOP is not set
++# CONFIG_MSI_LAPTOP is not set
++# CONFIG_PANASONIC_LAPTOP is not set
++# CONFIG_COMPAL_LAPTOP is not set
++# CONFIG_SONY_LAPTOP is not set
++# CONFIG_THINKPAD_ACPI is not set
++# CONFIG_INTEL_MENLOW is not set
++CONFIG_EEEPC_LAPTOP=y
++# CONFIG_ACPI_WMI is not set
++# CONFIG_ACPI_ASUS is not set
++# CONFIG_ACPI_TOSHIBA is not set
+ 
+ #
+ # Firmware Drivers
+@@ -1830,8 +2044,7 @@ CONFIG_EFI_VARS=y
+ # CONFIG_DELL_RBU is not set
+ # CONFIG_DCDBAS is not set
+ CONFIG_DMIID=y
+-CONFIG_ISCSI_IBFT_FIND=y
+-CONFIG_ISCSI_IBFT=y
++# CONFIG_ISCSI_IBFT_FIND is not set
+ 
+ #
+ # File systems
+@@ -1841,22 +2054,25 @@ CONFIG_EXT3_FS=y
+ CONFIG_EXT3_FS_XATTR=y
+ CONFIG_EXT3_FS_POSIX_ACL=y
+ CONFIG_EXT3_FS_SECURITY=y
+-# CONFIG_EXT4DEV_FS is not set
++# CONFIG_EXT4_FS is not set
+ CONFIG_JBD=y
+ # CONFIG_JBD_DEBUG is not set
+ CONFIG_FS_MBCACHE=y
+ # CONFIG_REISERFS_FS is not set
+ # CONFIG_JFS_FS is not set
+ CONFIG_FS_POSIX_ACL=y
++CONFIG_FILE_LOCKING=y
+ # CONFIG_XFS_FS is not set
+ # CONFIG_GFS2_FS is not set
+ # CONFIG_OCFS2_FS is not set
++# CONFIG_BTRFS_FS is not set
+ CONFIG_DNOTIFY=y
+ CONFIG_INOTIFY=y
+ CONFIG_INOTIFY_USER=y
+ CONFIG_QUOTA=y
+ CONFIG_QUOTA_NETLINK_INTERFACE=y
+ # CONFIG_PRINT_QUOTA_WARNING is not set
++CONFIG_QUOTA_TREE=y
+ # CONFIG_QFMT_V1 is not set
+ CONFIG_QFMT_V2=y
+ CONFIG_QUOTACTL=y
+@@ -1890,16 +2106,14 @@ CONFIG_PROC_FS=y
+ CONFIG_PROC_KCORE=y
+ CONFIG_PROC_VMCORE=y
+ CONFIG_PROC_SYSCTL=y
++CONFIG_PROC_PAGE_MONITOR=y
+ CONFIG_SYSFS=y
+ CONFIG_TMPFS=y
+ CONFIG_TMPFS_POSIX_ACL=y
+ CONFIG_HUGETLBFS=y
+ CONFIG_HUGETLB_PAGE=y
+ # CONFIG_CONFIGFS_FS is not set
+-
+-#
+-# Miscellaneous filesystems
+-#
++CONFIG_MISC_FILESYSTEMS=y
+ # CONFIG_ADFS_FS is not set
+ # CONFIG_AFFS_FS is not set
+ # CONFIG_ECRYPT_FS is not set
+@@ -1909,6 +2123,7 @@ CONFIG_HUGETLB_PAGE=y
+ # CONFIG_BFS_FS is not set
+ # CONFIG_EFS_FS is not set
+ # CONFIG_CRAMFS is not set
++# CONFIG_SQUASHFS is not set
+ # CONFIG_VXFS_FS is not set
+ # CONFIG_MINIX_FS is not set
+ # CONFIG_OMFS_FS is not set
+@@ -1930,6 +2145,7 @@ CONFIG_NFS_ACL_SUPPORT=y
+ CONFIG_NFS_COMMON=y
+ CONFIG_SUNRPC=y
+ CONFIG_SUNRPC_GSS=y
++# CONFIG_SUNRPC_REGISTER_V4 is not set
+ CONFIG_RPCSEC_GSS_KRB5=y
+ # CONFIG_RPCSEC_GSS_SPKM3 is not set
+ # CONFIG_SMB_FS is not set
+@@ -2006,7 +2222,7 @@ CONFIG_NLS_UTF8=y
+ #
+ CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+ CONFIG_PRINTK_TIME=y
+-CONFIG_ENABLE_WARN_DEPRECATED=y
++# CONFIG_ENABLE_WARN_DEPRECATED is not set
+ CONFIG_ENABLE_MUST_CHECK=y
+ CONFIG_FRAME_WARN=2048
+ CONFIG_MAGIC_SYSRQ=y
+@@ -2035,40 +2251,60 @@ CONFIG_TIMER_STATS=y
+ CONFIG_DEBUG_BUGVERBOSE=y
+ # CONFIG_DEBUG_INFO is not set
+ # CONFIG_DEBUG_VM is not set
++# CONFIG_DEBUG_VIRTUAL is not set
+ # CONFIG_DEBUG_WRITECOUNT is not set
+ CONFIG_DEBUG_MEMORY_INIT=y
+ # CONFIG_DEBUG_LIST is not set
+ # CONFIG_DEBUG_SG is not set
++# CONFIG_DEBUG_NOTIFIERS is not set
++CONFIG_ARCH_WANT_FRAME_POINTERS=y
+ CONFIG_FRAME_POINTER=y
+ # CONFIG_BOOT_PRINTK_DELAY is not set
+ # CONFIG_RCU_TORTURE_TEST is not set
++# CONFIG_RCU_CPU_STALL_DETECTOR is not set
+ # CONFIG_KPROBES_SANITY_TEST is not set
+ # CONFIG_BACKTRACE_SELF_TEST is not set
++# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+ # CONFIG_LKDTM is not set
+ # CONFIG_FAULT_INJECTION is not set
+ # CONFIG_LATENCYTOP is not set
+ CONFIG_SYSCTL_SYSCALL_CHECK=y
+-CONFIG_HAVE_FTRACE=y
++CONFIG_USER_STACKTRACE_SUPPORT=y
++CONFIG_HAVE_FUNCTION_TRACER=y
++CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
++CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
+ CONFIG_HAVE_DYNAMIC_FTRACE=y
+-# CONFIG_FTRACE is not set
++CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
++CONFIG_HAVE_HW_BRANCH_TRACER=y
++
++#
++# Tracers
++#
++# CONFIG_FUNCTION_TRACER is not set
+ # CONFIG_IRQSOFF_TRACER is not set
+ # CONFIG_SYSPROF_TRACER is not set
+ # CONFIG_SCHED_TRACER is not set
+ # CONFIG_CONTEXT_SWITCH_TRACER is not set
++# CONFIG_BOOT_TRACER is not set
++# CONFIG_TRACE_BRANCH_PROFILING is not set
++# CONFIG_POWER_TRACER is not set
++# CONFIG_STACK_TRACER is not set
++# CONFIG_HW_BRANCH_TRACER is not set
+ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
++# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+ # CONFIG_SAMPLES is not set
+ CONFIG_HAVE_ARCH_KGDB=y
+ # CONFIG_KGDB is not set
+ # CONFIG_STRICT_DEVMEM is not set
+ CONFIG_X86_VERBOSE_BOOTUP=y
+ CONFIG_EARLY_PRINTK=y
++CONFIG_EARLY_PRINTK_DBGP=y
+ CONFIG_DEBUG_STACKOVERFLOW=y
+ CONFIG_DEBUG_STACK_USAGE=y
+ # CONFIG_DEBUG_PAGEALLOC is not set
+ # CONFIG_DEBUG_PER_CPU_MAPS is not set
+ # CONFIG_X86_PTDUMP is not set
+ CONFIG_DEBUG_RODATA=y
+-# CONFIG_DIRECT_GBPAGES is not set
+ # CONFIG_DEBUG_RODATA_TEST is not set
+ CONFIG_DEBUG_NX_TEST=m
+ # CONFIG_IOMMU_DEBUG is not set
+@@ -2092,8 +2328,10 @@ CONFIG_OPTIMIZE_INLINING=y
+ CONFIG_KEYS=y
+ CONFIG_KEYS_DEBUG_PROC_KEYS=y
+ CONFIG_SECURITY=y
++# CONFIG_SECURITYFS is not set
+ CONFIG_SECURITY_NETWORK=y
+ # CONFIG_SECURITY_NETWORK_XFRM is not set
++# CONFIG_SECURITY_PATH is not set
+ CONFIG_SECURITY_FILE_CAPABILITIES=y
+ # CONFIG_SECURITY_ROOTPLUG is not set
+ CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536
+@@ -2104,7 +2342,6 @@ CONFIG_SECURITY_SELINUX_DISABLE=y
+ CONFIG_SECURITY_SELINUX_DEVELOP=y
+ CONFIG_SECURITY_SELINUX_AVC_STATS=y
+ CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+-# CONFIG_SECURITY_SELINUX_ENABLE_SECMARK_DEFAULT is not set
+ # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
+ # CONFIG_SECURITY_SMACK is not set
+ CONFIG_CRYPTO=y
+@@ -2112,11 +2349,18 @@ CONFIG_CRYPTO=y
+ #
+ # Crypto core or helper
+ #
++# CONFIG_CRYPTO_FIPS is not set
+ CONFIG_CRYPTO_ALGAPI=y
++CONFIG_CRYPTO_ALGAPI2=y
+ CONFIG_CRYPTO_AEAD=y
++CONFIG_CRYPTO_AEAD2=y
+ CONFIG_CRYPTO_BLKCIPHER=y
++CONFIG_CRYPTO_BLKCIPHER2=y
+ CONFIG_CRYPTO_HASH=y
++CONFIG_CRYPTO_HASH2=y
++CONFIG_CRYPTO_RNG2=y
+ CONFIG_CRYPTO_MANAGER=y
++CONFIG_CRYPTO_MANAGER2=y
+ # CONFIG_CRYPTO_GF128MUL is not set
+ # CONFIG_CRYPTO_NULL is not set
+ # CONFIG_CRYPTO_CRYPTD is not set
+@@ -2151,6 +2395,7 @@ CONFIG_CRYPTO_HMAC=y
+ # Digest
+ #
+ # CONFIG_CRYPTO_CRC32C is not set
++# CONFIG_CRYPTO_CRC32C_INTEL is not set
+ # CONFIG_CRYPTO_MD4 is not set
+ CONFIG_CRYPTO_MD5=y
+ # CONFIG_CRYPTO_MICHAEL_MIC is not set
+@@ -2191,6 +2436,11 @@ CONFIG_CRYPTO_DES=y
+ #
+ # CONFIG_CRYPTO_DEFLATE is not set
+ # CONFIG_CRYPTO_LZO is not set
++
++#
++# Random Number Generation
++#
++# CONFIG_CRYPTO_ANSI_CPRNG is not set
+ CONFIG_CRYPTO_HW=y
+ # CONFIG_CRYPTO_DEV_HIFN_795X is not set
+ CONFIG_HAVE_KVM=y
+@@ -2205,6 +2455,7 @@ CONFIG_VIRTUALIZATION=y
+ CONFIG_BITREVERSE=y
+ CONFIG_GENERIC_FIND_FIRST_BIT=y
+ CONFIG_GENERIC_FIND_NEXT_BIT=y
++CONFIG_GENERIC_FIND_LAST_BIT=y
+ # CONFIG_CRC_CCITT is not set
+ # CONFIG_CRC16 is not set
+ CONFIG_CRC_T10DIF=y
+Index: linux-2.6-tip/arch/x86/ia32/ia32_signal.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/ia32/ia32_signal.c
++++ linux-2.6-tip/arch/x86/ia32/ia32_signal.c
+@@ -33,8 +33,6 @@
+ #include <asm/sigframe.h>
+ #include <asm/sys_ia32.h>
+ 
+-#define DEBUG_SIG 0
+-
+ #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
+ 
+ #define FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | \
+@@ -46,78 +44,83 @@ void signal_fault(struct pt_regs *regs, 
+ 
+ int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from)
+ {
+-	int err;
++	int err = 0;
+ 
+ 	if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
+ 		return -EFAULT;
+ 
+-	/* If you change siginfo_t structure, please make sure that
+-	   this code is fixed accordingly.
+-	   It should never copy any pad contained in the structure
+-	   to avoid security leaks, but must copy the generic
+-	   3 ints plus the relevant union member.  */
+-	err = __put_user(from->si_signo, &to->si_signo);
+-	err |= __put_user(from->si_errno, &to->si_errno);
+-	err |= __put_user((short)from->si_code, &to->si_code);
+-
+-	if (from->si_code < 0) {
+-		err |= __put_user(from->si_pid, &to->si_pid);
+-		err |= __put_user(from->si_uid, &to->si_uid);
+-		err |= __put_user(ptr_to_compat(from->si_ptr), &to->si_ptr);
+-	} else {
+-		/*
+-		 * First 32bits of unions are always present:
+-		 * si_pid === si_band === si_tid === si_addr(LS half)
+-		 */
+-		err |= __put_user(from->_sifields._pad[0],
+-				  &to->_sifields._pad[0]);
+-		switch (from->si_code >> 16) {
+-		case __SI_FAULT >> 16:
+-			break;
+-		case __SI_CHLD >> 16:
+-			err |= __put_user(from->si_utime, &to->si_utime);
+-			err |= __put_user(from->si_stime, &to->si_stime);
+-			err |= __put_user(from->si_status, &to->si_status);
+-			/* FALL THROUGH */
+-		default:
+-		case __SI_KILL >> 16:
+-			err |= __put_user(from->si_uid, &to->si_uid);
+-			break;
+-		case __SI_POLL >> 16:
+-			err |= __put_user(from->si_fd, &to->si_fd);
+-			break;
+-		case __SI_TIMER >> 16:
+-			err |= __put_user(from->si_overrun, &to->si_overrun);
+-			err |= __put_user(ptr_to_compat(from->si_ptr),
+-					  &to->si_ptr);
+-			break;
+-			 /* This is not generated by the kernel as of now.  */
+-		case __SI_RT >> 16:
+-		case __SI_MESGQ >> 16:
+-			err |= __put_user(from->si_uid, &to->si_uid);
+-			err |= __put_user(from->si_int, &to->si_int);
+-			break;
++	put_user_try {
++		/* If you change siginfo_t structure, please make sure that
++		   this code is fixed accordingly.
++		   It should never copy any pad contained in the structure
++		   to avoid security leaks, but must copy the generic
++		   3 ints plus the relevant union member.  */
++		put_user_ex(from->si_signo, &to->si_signo);
++		put_user_ex(from->si_errno, &to->si_errno);
++		put_user_ex((short)from->si_code, &to->si_code);
++
++		if (from->si_code < 0) {
++			put_user_ex(from->si_pid, &to->si_pid);
++			put_user_ex(from->si_uid, &to->si_uid);
++			put_user_ex(ptr_to_compat(from->si_ptr), &to->si_ptr);
++		} else {
++			/*
++			 * First 32bits of unions are always present:
++			 * si_pid === si_band === si_tid === si_addr(LS half)
++			 */
++			put_user_ex(from->_sifields._pad[0],
++					  &to->_sifields._pad[0]);
++			switch (from->si_code >> 16) {
++			case __SI_FAULT >> 16:
++				break;
++			case __SI_CHLD >> 16:
++				put_user_ex(from->si_utime, &to->si_utime);
++				put_user_ex(from->si_stime, &to->si_stime);
++				put_user_ex(from->si_status, &to->si_status);
++				/* FALL THROUGH */
++			default:
++			case __SI_KILL >> 16:
++				put_user_ex(from->si_uid, &to->si_uid);
++				break;
++			case __SI_POLL >> 16:
++				put_user_ex(from->si_fd, &to->si_fd);
++				break;
++			case __SI_TIMER >> 16:
++				put_user_ex(from->si_overrun, &to->si_overrun);
++				put_user_ex(ptr_to_compat(from->si_ptr),
++					    &to->si_ptr);
++				break;
++				 /* This is not generated by the kernel as of now.  */
++			case __SI_RT >> 16:
++			case __SI_MESGQ >> 16:
++				put_user_ex(from->si_uid, &to->si_uid);
++				put_user_ex(from->si_int, &to->si_int);
++				break;
++			}
+ 		}
+-	}
++	} put_user_catch(err);
++
+ 	return err;
+ }
+ 
+ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
+ {
+-	int err;
++	int err = 0;
+ 	u32 ptr32;
+ 
+ 	if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
+ 		return -EFAULT;
+ 
+-	err = __get_user(to->si_signo, &from->si_signo);
+-	err |= __get_user(to->si_errno, &from->si_errno);
+-	err |= __get_user(to->si_code, &from->si_code);
+-
+-	err |= __get_user(to->si_pid, &from->si_pid);
+-	err |= __get_user(to->si_uid, &from->si_uid);
+-	err |= __get_user(ptr32, &from->si_ptr);
+-	to->si_ptr = compat_ptr(ptr32);
++	get_user_try {
++		get_user_ex(to->si_signo, &from->si_signo);
++		get_user_ex(to->si_errno, &from->si_errno);
++		get_user_ex(to->si_code, &from->si_code);
++
++		get_user_ex(to->si_pid, &from->si_pid);
++		get_user_ex(to->si_uid, &from->si_uid);
++		get_user_ex(ptr32, &from->si_ptr);
++		to->si_ptr = compat_ptr(ptr32);
++	} get_user_catch(err);
+ 
+ 	return err;
+ }
+@@ -142,17 +145,23 @@ asmlinkage long sys32_sigaltstack(const 
+ 				  struct pt_regs *regs)
+ {
+ 	stack_t uss, uoss;
+-	int ret;
++	int ret, err = 0;
+ 	mm_segment_t seg;
+ 
+ 	if (uss_ptr) {
+ 		u32 ptr;
+ 
+ 		memset(&uss, 0, sizeof(stack_t));
+-		if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)) ||
+-			    __get_user(ptr, &uss_ptr->ss_sp) ||
+-			    __get_user(uss.ss_flags, &uss_ptr->ss_flags) ||
+-			    __get_user(uss.ss_size, &uss_ptr->ss_size))
++		if (!access_ok(VERIFY_READ, uss_ptr, sizeof(stack_ia32_t)))
++			return -EFAULT;
++
++		get_user_try {
++			get_user_ex(ptr, &uss_ptr->ss_sp);
++			get_user_ex(uss.ss_flags, &uss_ptr->ss_flags);
++			get_user_ex(uss.ss_size, &uss_ptr->ss_size);
++		} get_user_catch(err);
++
++		if (err)
+ 			return -EFAULT;
+ 		uss.ss_sp = compat_ptr(ptr);
+ 	}
+@@ -161,10 +170,16 @@ asmlinkage long sys32_sigaltstack(const 
+ 	ret = do_sigaltstack(uss_ptr ? &uss : NULL, &uoss, regs->sp);
+ 	set_fs(seg);
+ 	if (ret >= 0 && uoss_ptr)  {
+-		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)) ||
+-		    __put_user(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp) ||
+-		    __put_user(uoss.ss_flags, &uoss_ptr->ss_flags) ||
+-		    __put_user(uoss.ss_size, &uoss_ptr->ss_size))
++		if (!access_ok(VERIFY_WRITE, uoss_ptr, sizeof(stack_ia32_t)))
++			return -EFAULT;
++
++		put_user_try {
++			put_user_ex(ptr_to_compat(uoss.ss_sp), &uoss_ptr->ss_sp);
++			put_user_ex(uoss.ss_flags, &uoss_ptr->ss_flags);
++			put_user_ex(uoss.ss_size, &uoss_ptr->ss_size);
++		} put_user_catch(err);
++
++		if (err)
+ 			ret = -EFAULT;
+ 	}
+ 	return ret;
+@@ -173,75 +188,78 @@ asmlinkage long sys32_sigaltstack(const 
+ /*
+  * Do a signal return; undo the signal stack.
+  */
++#define loadsegment_gs(v)	load_gs_index(v)
++#define loadsegment_fs(v)	loadsegment(fs, v)
++#define loadsegment_ds(v)	loadsegment(ds, v)
++#define loadsegment_es(v)	loadsegment(es, v)
++
++#define get_user_seg(seg)	({ unsigned int v; savesegment(seg, v); v; })
++#define set_user_seg(seg, v)	loadsegment_##seg(v)
++
+ #define COPY(x)			{		\
+-	err |= __get_user(regs->x, &sc->x);	\
++	get_user_ex(regs->x, &sc->x);		\
+ }
+ 
+-#define COPY_SEG_CPL3(seg)	{			\
+-		unsigned short tmp;			\
+-		err |= __get_user(tmp, &sc->seg);	\
+-		regs->seg = tmp | 3;			\
+-}
++#define GET_SEG(seg)		({			\
++	unsigned short tmp;				\
++	get_user_ex(tmp, &sc->seg);			\
++	tmp;						\
++})
++
++#define COPY_SEG_CPL3(seg)	do {			\
++	regs->seg = GET_SEG(seg) | 3;			\
++} while (0)
+ 
+ #define RELOAD_SEG(seg)		{		\
+-	unsigned int cur, pre;			\
+-	err |= __get_user(pre, &sc->seg);	\
+-	savesegment(seg, cur);			\
++	unsigned int pre = GET_SEG(seg);	\
++	unsigned int cur = get_user_seg(seg);	\
+ 	pre |= 3;				\
+ 	if (pre != cur)				\
+-		loadsegment(seg, pre);		\
++		set_user_seg(seg, pre);		\
+ }
+ 
+ static int ia32_restore_sigcontext(struct pt_regs *regs,
+ 				   struct sigcontext_ia32 __user *sc,
+ 				   unsigned int *pax)
+ {
+-	unsigned int tmpflags, gs, oldgs, err = 0;
++	unsigned int tmpflags, err = 0;
+ 	void __user *buf;
+ 	u32 tmp;
+ 
+ 	/* Always make any pending restarted system calls return -EINTR */
+ 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ 
+-#if DEBUG_SIG
+-	printk(KERN_DEBUG "SIG restore_sigcontext: "
+-	       "sc=%p err(%x) eip(%x) cs(%x) flg(%x)\n",
+-	       sc, sc->err, sc->ip, sc->cs, sc->flags);
+-#endif
+-
+-	/*
+-	 * Reload fs and gs if they have changed in the signal
+-	 * handler.  This does not handle long fs/gs base changes in
+-	 * the handler, but does not clobber them at least in the
+-	 * normal case.
+-	 */
+-	err |= __get_user(gs, &sc->gs);
+-	gs |= 3;
+-	savesegment(gs, oldgs);
+-	if (gs != oldgs)
+-		load_gs_index(gs);
+-
+-	RELOAD_SEG(fs);
+-	RELOAD_SEG(ds);
+-	RELOAD_SEG(es);
+-
+-	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+-	COPY(dx); COPY(cx); COPY(ip);
+-	/* Don't touch extended registers */
+-
+-	COPY_SEG_CPL3(cs);
+-	COPY_SEG_CPL3(ss);
+-
+-	err |= __get_user(tmpflags, &sc->flags);
+-	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+-	/* disable syscall checks */
+-	regs->orig_ax = -1;
+-
+-	err |= __get_user(tmp, &sc->fpstate);
+-	buf = compat_ptr(tmp);
+-	err |= restore_i387_xstate_ia32(buf);
++	get_user_try {
++		/*
++		 * Reload fs and gs if they have changed in the signal
++		 * handler.  This does not handle long fs/gs base changes in
++		 * the handler, but does not clobber them at least in the
++		 * normal case.
++		 */
++		RELOAD_SEG(gs);
++		RELOAD_SEG(fs);
++		RELOAD_SEG(ds);
++		RELOAD_SEG(es);
++
++		COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
++		COPY(dx); COPY(cx); COPY(ip);
++		/* Don't touch extended registers */
++
++		COPY_SEG_CPL3(cs);
++		COPY_SEG_CPL3(ss);
++
++		get_user_ex(tmpflags, &sc->flags);
++		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
++		/* disable syscall checks */
++		regs->orig_ax = -1;
++
++		get_user_ex(tmp, &sc->fpstate);
++		buf = compat_ptr(tmp);
++		err |= restore_i387_xstate_ia32(buf);
++
++		get_user_ex(*pax, &sc->ax);
++	} get_user_catch(err);
+ 
+-	err |= __get_user(*pax, &sc->ax);
+ 	return err;
+ }
+ 
+@@ -317,38 +335,36 @@ static int ia32_setup_sigcontext(struct 
+ 				 void __user *fpstate,
+ 				 struct pt_regs *regs, unsigned int mask)
+ {
+-	int tmp, err = 0;
++	int err = 0;
+ 
+-	savesegment(gs, tmp);
+-	err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+-	savesegment(fs, tmp);
+-	err |= __put_user(tmp, (unsigned int __user *)&sc->fs);
+-	savesegment(ds, tmp);
+-	err |= __put_user(tmp, (unsigned int __user *)&sc->ds);
+-	savesegment(es, tmp);
+-	err |= __put_user(tmp, (unsigned int __user *)&sc->es);
+-
+-	err |= __put_user(regs->di, &sc->di);
+-	err |= __put_user(regs->si, &sc->si);
+-	err |= __put_user(regs->bp, &sc->bp);
+-	err |= __put_user(regs->sp, &sc->sp);
+-	err |= __put_user(regs->bx, &sc->bx);
+-	err |= __put_user(regs->dx, &sc->dx);
+-	err |= __put_user(regs->cx, &sc->cx);
+-	err |= __put_user(regs->ax, &sc->ax);
+-	err |= __put_user(current->thread.trap_no, &sc->trapno);
+-	err |= __put_user(current->thread.error_code, &sc->err);
+-	err |= __put_user(regs->ip, &sc->ip);
+-	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
+-	err |= __put_user(regs->flags, &sc->flags);
+-	err |= __put_user(regs->sp, &sc->sp_at_signal);
+-	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
+-
+-	err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate);
+-
+-	/* non-iBCS2 extensions.. */
+-	err |= __put_user(mask, &sc->oldmask);
+-	err |= __put_user(current->thread.cr2, &sc->cr2);
++	put_user_try {
++		put_user_ex(get_user_seg(gs), (unsigned int __user *)&sc->gs);
++		put_user_ex(get_user_seg(fs), (unsigned int __user *)&sc->fs);
++		put_user_ex(get_user_seg(ds), (unsigned int __user *)&sc->ds);
++		put_user_ex(get_user_seg(es), (unsigned int __user *)&sc->es);
++
++		put_user_ex(regs->di, &sc->di);
++		put_user_ex(regs->si, &sc->si);
++		put_user_ex(regs->bp, &sc->bp);
++		put_user_ex(regs->sp, &sc->sp);
++		put_user_ex(regs->bx, &sc->bx);
++		put_user_ex(regs->dx, &sc->dx);
++		put_user_ex(regs->cx, &sc->cx);
++		put_user_ex(regs->ax, &sc->ax);
++		put_user_ex(current->thread.trap_no, &sc->trapno);
++		put_user_ex(current->thread.error_code, &sc->err);
++		put_user_ex(regs->ip, &sc->ip);
++		put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
++		put_user_ex(regs->flags, &sc->flags);
++		put_user_ex(regs->sp, &sc->sp_at_signal);
++		put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
++
++		put_user_ex(ptr_to_compat(fpstate), &sc->fpstate);
++
++		/* non-iBCS2 extensions.. */
++		put_user_ex(mask, &sc->oldmask);
++		put_user_ex(current->thread.cr2, &sc->cr2);
++	} put_user_catch(err);
+ 
+ 	return err;
+ }
+@@ -437,13 +453,17 @@ int ia32_setup_frame(int sig, struct k_s
+ 		else
+ 			restorer = &frame->retcode;
+ 	}
+-	err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
+ 
+-	/*
+-	 * These are actually not used anymore, but left because some
+-	 * gdb versions depend on them as a marker.
+-	 */
+-	err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode);
++	put_user_try {
++		put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
++
++		/*
++		 * These are actually not used anymore, but left because some
++		 * gdb versions depend on them as a marker.
++		 */
++		put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
++	} put_user_catch(err);
++
+ 	if (err)
+ 		return -EFAULT;
+ 
+@@ -462,11 +482,6 @@ int ia32_setup_frame(int sig, struct k_s
+ 	regs->cs = __USER32_CS;
+ 	regs->ss = __USER32_DS;
+ 
+-#if DEBUG_SIG
+-	printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
+-	       current->comm, current->pid, frame, regs->ip, frame->pretcode);
+-#endif
+-
+ 	return 0;
+ }
+ 
+@@ -496,41 +511,40 @@ int ia32_setup_rt_frame(int sig, struct 
+ 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ 		return -EFAULT;
+ 
+-	err |= __put_user(sig, &frame->sig);
+-	err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo);
+-	err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc);
+-	err |= copy_siginfo_to_user32(&frame->info, info);
+-	if (err)
+-		return -EFAULT;
++	put_user_try {
++		put_user_ex(sig, &frame->sig);
++		put_user_ex(ptr_to_compat(&frame->info), &frame->pinfo);
++		put_user_ex(ptr_to_compat(&frame->uc), &frame->puc);
++		err |= copy_siginfo_to_user32(&frame->info, info);
++
++		/* Create the ucontext.  */
++		if (cpu_has_xsave)
++			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
++		else
++			put_user_ex(0, &frame->uc.uc_flags);
++		put_user_ex(0, &frame->uc.uc_link);
++		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
++		put_user_ex(sas_ss_flags(regs->sp),
++			    &frame->uc.uc_stack.ss_flags);
++		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
++		err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
++					     regs, set->sig[0]);
++		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ 
+-	/* Create the ucontext.  */
+-	if (cpu_has_xsave)
+-		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+-	else
+-		err |= __put_user(0, &frame->uc.uc_flags);
+-	err |= __put_user(0, &frame->uc.uc_link);
+-	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+-	err |= __put_user(sas_ss_flags(regs->sp),
+-			  &frame->uc.uc_stack.ss_flags);
+-	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+-	err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+-				     regs, set->sig[0]);
+-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+-	if (err)
+-		return -EFAULT;
++		if (ka->sa.sa_flags & SA_RESTORER)
++			restorer = ka->sa.sa_restorer;
++		else
++			restorer = VDSO32_SYMBOL(current->mm->context.vdso,
++						 rt_sigreturn);
++		put_user_ex(ptr_to_compat(restorer), &frame->pretcode);
++
++		/*
++		 * Not actually used anymore, but left because some gdb
++		 * versions need it.
++		 */
++		put_user_ex(*((u64 *)&code), (u64 *)frame->retcode);
++	} put_user_catch(err);
+ 
+-	if (ka->sa.sa_flags & SA_RESTORER)
+-		restorer = ka->sa.sa_restorer;
+-	else
+-		restorer = VDSO32_SYMBOL(current->mm->context.vdso,
+-					 rt_sigreturn);
+-	err |= __put_user(ptr_to_compat(restorer), &frame->pretcode);
+-
+-	/*
+-	 * Not actually used anymore, but left because some gdb
+-	 * versions need it.
+-	 */
+-	err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode);
+ 	if (err)
+ 		return -EFAULT;
+ 
+@@ -549,10 +563,5 @@ int ia32_setup_rt_frame(int sig, struct 
+ 	regs->cs = __USER32_CS;
+ 	regs->ss = __USER32_DS;
+ 
+-#if DEBUG_SIG
+-	printk(KERN_DEBUG "SIG deliver (%s:%d): sp=%p pc=%lx ra=%u\n",
+-	       current->comm, current->pid, frame, regs->ip, frame->pretcode);
+-#endif
+-
+ 	return 0;
+ }
+Index: linux-2.6-tip/arch/x86/ia32/ia32entry.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/ia32/ia32entry.S
++++ linux-2.6-tip/arch/x86/ia32/ia32entry.S
+@@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target)
+ 	CFI_DEF_CFA	rsp,0
+ 	CFI_REGISTER	rsp,rbp
+ 	SWAPGS_UNSAFE_STACK
+-	movq	%gs:pda_kernelstack, %rsp
+-	addq	$(PDA_STACKOFFSET),%rsp	
++	movq	PER_CPU_VAR(kernel_stack), %rsp
++	addq	$(KERNEL_STACK_OFFSET),%rsp
+ 	/*
+ 	 * No need to follow this irqs on/off section: the syscall
+ 	 * disabled irqs, here we enable it straight after entry:
+@@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target)
+ ENTRY(ia32_cstar_target)
+ 	CFI_STARTPROC32	simple
+ 	CFI_SIGNAL_FRAME
+-	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
++	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
+ 	CFI_REGISTER	rip,rcx
+ 	/*CFI_REGISTER	rflags,r11*/
+ 	SWAPGS_UNSAFE_STACK
+ 	movl	%esp,%r8d
+ 	CFI_REGISTER	rsp,r8
+-	movq	%gs:pda_kernelstack,%rsp
++	movq	PER_CPU_VAR(kernel_stack),%rsp
+ 	/*
+ 	 * No need to follow this irqs on/off section: the syscall
+ 	 * disabled irqs and here we enable it straight after entry:
+@@ -825,7 +825,11 @@ ia32_sys_call_table:
+ 	.quad compat_sys_signalfd4
+ 	.quad sys_eventfd2
+ 	.quad sys_epoll_create1
+-	.quad sys_dup3			/* 330 */
++	.quad sys_dup3				/* 330 */
+ 	.quad sys_pipe2
+ 	.quad sys_inotify_init1
++	.quad quiet_ni_syscall			/* preadv */
++	.quad quiet_ni_syscall			/* pwritev */
++	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
++	.quad sys_perf_counter_open
+ ia32_syscall_end:
+Index: linux-2.6-tip/arch/x86/include/asm/a.out-core.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/a.out-core.h
++++ linux-2.6-tip/arch/x86/include/asm/a.out-core.h
+@@ -55,7 +55,7 @@ static inline void aout_dump_thread(stru
+ 	dump->regs.ds = (u16)regs->ds;
+ 	dump->regs.es = (u16)regs->es;
+ 	dump->regs.fs = (u16)regs->fs;
+-	savesegment(gs, dump->regs.gs);
++	dump->regs.gs = get_user_gs(regs);
+ 	dump->regs.orig_ax = regs->orig_ax;
+ 	dump->regs.ip = regs->ip;
+ 	dump->regs.cs = (u16)regs->cs;
+Index: linux-2.6-tip/arch/x86/include/asm/acpi.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/acpi.h
++++ linux-2.6-tip/arch/x86/include/asm/acpi.h
+@@ -50,8 +50,8 @@
+ 
+ #define ACPI_ASM_MACROS
+ #define BREAKPOINT3
+-#define ACPI_DISABLE_IRQS() local_irq_disable()
+-#define ACPI_ENABLE_IRQS()  local_irq_enable()
++#define ACPI_DISABLE_IRQS() local_irq_disable_nort()
++#define ACPI_ENABLE_IRQS()  local_irq_enable_nort()
+ #define ACPI_FLUSH_CPU_CACHE()	wbinvd()
+ 
+ int __acpi_acquire_global_lock(unsigned int *lock);
+@@ -102,9 +102,6 @@ static inline void disable_acpi(void)
+ 	acpi_noirq = 1;
+ }
+ 
+-/* Fixmap pages to reserve for ACPI boot-time tables (see fixmap.h) */
+-#define FIX_ACPI_PAGES 4
+-
+ extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
+ 
+ static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
+Index: linux-2.6-tip/arch/x86/include/asm/apic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/apic.h
++++ linux-2.6-tip/arch/x86/include/asm/apic.h
+@@ -1,15 +1,18 @@
+ #ifndef _ASM_X86_APIC_H
+ #define _ASM_X86_APIC_H
+ 
+-#include <linux/pm.h>
++#include <linux/cpumask.h>
+ #include <linux/delay.h>
++#include <linux/pm.h>
+ 
+ #include <asm/alternative.h>
+-#include <asm/fixmap.h>
+-#include <asm/apicdef.h>
++#include <asm/cpufeature.h>
+ #include <asm/processor.h>
++#include <asm/apicdef.h>
++#include <asm/atomic.h>
++#include <asm/fixmap.h>
++#include <asm/mpspec.h>
+ #include <asm/system.h>
+-#include <asm/cpufeature.h>
+ #include <asm/msr.h>
+ 
+ #define ARCH_APICTIMER_STOPS_ON_C3	1
+@@ -33,7 +36,13 @@
+ 	} while (0)
+ 
+ 
++#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
+ extern void generic_apic_probe(void);
++#else
++static inline void generic_apic_probe(void)
++{
++}
++#endif
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+ 
+@@ -41,6 +50,21 @@ extern unsigned int apic_verbosity;
+ extern int local_apic_timer_c2_ok;
+ 
+ extern int disable_apic;
++
++#ifdef CONFIG_SMP
++extern void __inquire_remote_apic(int apicid);
++#else /* CONFIG_SMP */
++static inline void __inquire_remote_apic(int apicid)
++{
++}
++#endif /* CONFIG_SMP */
++
++static inline void default_inquire_remote_apic(int apicid)
++{
++	if (apic_verbosity >= APIC_DEBUG)
++		__inquire_remote_apic(apicid);
++}
++
+ /*
+  * Basic functions accessing APICs.
+  */
+@@ -51,7 +75,14 @@ extern int disable_apic;
+ #define setup_secondary_clock setup_secondary_APIC_clock
+ #endif
+ 
++#ifdef CONFIG_X86_VSMP
+ extern int is_vsmp_box(void);
++#else
++static inline int is_vsmp_box(void)
++{
++	return 0;
++}
++#endif
+ extern void xapic_wait_icr_idle(void);
+ extern u32 safe_xapic_wait_icr_idle(void);
+ extern void xapic_icr_write(u32, u32);
+@@ -71,6 +102,22 @@ static inline u32 native_apic_mem_read(u
+ 	return *((volatile u32 *)(APIC_BASE + reg));
+ }
+ 
++extern void native_apic_wait_icr_idle(void);
++extern u32 native_safe_apic_wait_icr_idle(void);
++extern void native_apic_icr_write(u32 low, u32 id);
++extern u64 native_apic_icr_read(void);
++
++#ifdef CONFIG_X86_X2APIC
++/*
++ * Make previous memory operations globally visible before
++ * sending the IPI through x2apic wrmsr. We need a serializing instruction or
++ * mfence for this.
++ */
++static inline void x2apic_wrmsr_fence(void)
++{
++	asm volatile("mfence" : : : "memory");
++}
++
+ static inline void native_apic_msr_write(u32 reg, u32 v)
+ {
+ 	if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
+@@ -91,8 +138,32 @@ static inline u32 native_apic_msr_read(u
+ 	return low;
+ }
+ 
+-#ifndef CONFIG_X86_32
+-extern int x2apic;
++static inline void native_x2apic_wait_icr_idle(void)
++{
++	/* no need to wait for icr idle in x2apic */
++	return;
++}
++
++static inline u32 native_safe_x2apic_wait_icr_idle(void)
++{
++	/* no need to wait for icr idle in x2apic */
++	return 0;
++}
++
++static inline void native_x2apic_icr_write(u32 low, u32 id)
++{
++	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
++}
++
++static inline u64 native_x2apic_icr_read(void)
++{
++	unsigned long val;
++
++	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
++	return val;
++}
++
++extern int x2apic, x2apic_phys;
+ extern void check_x2apic(void);
+ extern void enable_x2apic(void);
+ extern void enable_IR_x2apic(void);
+@@ -110,30 +181,27 @@ static inline int x2apic_enabled(void)
+ 	return 0;
+ }
+ #else
+-#define x2apic_enabled()	0
+-#endif
+-
+-struct apic_ops {
+-	u32 (*read)(u32 reg);
+-	void (*write)(u32 reg, u32 v);
+-	u64 (*icr_read)(void);
+-	void (*icr_write)(u32 low, u32 high);
+-	void (*wait_icr_idle)(void);
+-	u32 (*safe_wait_icr_idle)(void);
+-};
++static inline void check_x2apic(void)
++{
++}
++static inline void enable_x2apic(void)
++{
++}
++static inline void enable_IR_x2apic(void)
++{
++}
++static inline int x2apic_enabled(void)
++{
++	return 0;
++}
+ 
+-extern struct apic_ops *apic_ops;
++#define	x2apic	0
+ 
+-#define apic_read (apic_ops->read)
+-#define apic_write (apic_ops->write)
+-#define apic_icr_read (apic_ops->icr_read)
+-#define apic_icr_write (apic_ops->icr_write)
+-#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
+-#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
++#endif
+ 
+ extern int get_physical_broadcast(void);
+ 
+-#ifdef CONFIG_X86_64
++#ifdef CONFIG_X86_X2APIC
+ static inline void ack_x2APIC_irq(void)
+ {
+ 	/* Docs say use 0 for future compatibility */
+@@ -141,18 +209,6 @@ static inline void ack_x2APIC_irq(void)
+ }
+ #endif
+ 
+-
+-static inline void ack_APIC_irq(void)
+-{
+-	/*
+-	 * ack_APIC_irq() actually gets compiled as a single instruction
+-	 * ... yummie.
+-	 */
+-
+-	/* Docs say use 0 for future compatibility */
+-	apic_write(APIC_EOI, 0);
+-}
+-
+ extern int lapic_get_maxlvt(void);
+ extern void clear_local_APIC(void);
+ extern void connect_bsp_APIC(void);
+@@ -196,4 +252,329 @@ static inline void disable_local_APIC(vo
+ 
+ #endif /* !CONFIG_X86_LOCAL_APIC */
+ 
++#ifdef CONFIG_X86_64
++#define	SET_APIC_ID(x)		(apic->set_apic_id(x))
++#else
++
++#endif
++
++/*
++ * Copyright 2004 James Cleverdon, IBM.
++ * Subject to the GNU Public License, v.2
++ *
++ * Generic APIC sub-arch data struct.
++ *
++ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
++ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
++ * James Cleverdon.
++ */
++struct apic {
++	char *name;
++
++	int (*probe)(void);
++	int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
++	int (*apic_id_registered)(void);
++
++	u32 irq_delivery_mode;
++	u32 irq_dest_mode;
++
++	const struct cpumask *(*target_cpus)(void);
++
++	int disable_esr;
++
++	int dest_logical;
++	unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid);
++	unsigned long (*check_apicid_present)(int apicid);
++
++	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
++	void (*init_apic_ldr)(void);
++
++	physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map);
++
++	void (*setup_apic_routing)(void);
++	int (*multi_timer_check)(int apic, int irq);
++	int (*apicid_to_node)(int logical_apicid);
++	int (*cpu_to_logical_apicid)(int cpu);
++	int (*cpu_present_to_apicid)(int mps_cpu);
++	physid_mask_t (*apicid_to_cpu_present)(int phys_apicid);
++	void (*setup_portio_remap)(void);
++	int (*check_phys_apicid_present)(int boot_cpu_physical_apicid);
++	void (*enable_apic_mode)(void);
++	int (*phys_pkg_id)(int cpuid_apic, int index_msb);
++
++	/*
++	 * When one of the next two hooks returns 1 the apic
++	 * is switched to this. Essentially they are additional
++	 * probe functions:
++	 */
++	int (*mps_oem_check)(struct mpc_table *mpc, char *oem, char *productid);
++
++	unsigned int (*get_apic_id)(unsigned long x);
++	unsigned long (*set_apic_id)(unsigned int id);
++	unsigned long apic_id_mask;
++
++	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
++	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
++					       const struct cpumask *andmask);
++
++	/* ipi */
++	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
++	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
++					 int vector);
++	void (*send_IPI_allbutself)(int vector);
++	void (*send_IPI_all)(int vector);
++	void (*send_IPI_self)(int vector);
++
++	/* wakeup_secondary_cpu */
++	int (*wakeup_secondary_cpu)(int apicid, unsigned long start_eip);
++
++	int trampoline_phys_low;
++	int trampoline_phys_high;
++
++	void (*wait_for_init_deassert)(atomic_t *deassert);
++	void (*smp_callin_clear_local_apic)(void);
++	void (*inquire_remote_apic)(int apicid);
++
++	/* apic ops */
++	u32 (*read)(u32 reg);
++	void (*write)(u32 reg, u32 v);
++	u64 (*icr_read)(void);
++	void (*icr_write)(u32 low, u32 high);
++	void (*wait_icr_idle)(void);
++	u32 (*safe_wait_icr_idle)(void);
++};
++
++/*
++ * Pointer to the local APIC driver in use on this system (there's
++ * always just one such driver in use - the kernel decides via an
++ * early probing process which one it picks - and then sticks to it):
++ */
++extern struct apic *apic;
++
++/*
++ * APIC functionality to boot other CPUs - only used on SMP:
++ */
++#ifdef CONFIG_SMP
++extern atomic_t init_deasserted;
++extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
++#endif
++
++static inline u32 apic_read(u32 reg)
++{
++	return apic->read(reg);
++}
++
++static inline void apic_write(u32 reg, u32 val)
++{
++	apic->write(reg, val);
++}
++
++static inline u64 apic_icr_read(void)
++{
++	return apic->icr_read();
++}
++
++static inline void apic_icr_write(u32 low, u32 high)
++{
++	apic->icr_write(low, high);
++}
++
++static inline void apic_wait_icr_idle(void)
++{
++	apic->wait_icr_idle();
++}
++
++static inline u32 safe_apic_wait_icr_idle(void)
++{
++	return apic->safe_wait_icr_idle();
++}
++
++
++static inline void ack_APIC_irq(void)
++{
++#ifdef CONFIG_X86_LOCAL_APIC
++	/*
++	 * ack_APIC_irq() actually gets compiled as a single instruction
++	 * ... yummie.
++	 */
++
++	/* Docs say use 0 for future compatibility */
++	apic_write(APIC_EOI, 0);
++#endif
++}
++
++static inline unsigned default_get_apic_id(unsigned long x)
++{
++	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
++
++	if (APIC_XAPIC(ver))
++		return (x >> 24) & 0xFF;
++	else
++		return (x >> 24) & 0x0F;
++}
++
++/*
++ * Warm reset vector default position:
++ */
++#define DEFAULT_TRAMPOLINE_PHYS_LOW		0x467
++#define DEFAULT_TRAMPOLINE_PHYS_HIGH		0x469
++
++#ifdef CONFIG_X86_64
++extern struct apic apic_flat;
++extern struct apic apic_physflat;
++extern struct apic apic_x2apic_cluster;
++extern struct apic apic_x2apic_phys;
++extern int default_acpi_madt_oem_check(char *, char *);
++
++extern void apic_send_IPI_self(int vector);
++
++extern struct apic apic_x2apic_uv_x;
++DECLARE_PER_CPU(int, x2apic_extra_bits);
++
++extern int default_cpu_present_to_apicid(int mps_cpu);
++extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid);
++#endif
++
++static inline void default_wait_for_init_deassert(atomic_t *deassert)
++{
++	while (!atomic_read(deassert))
++		cpu_relax();
++	return;
++}
++
++extern void generic_bigsmp_probe(void);
++
++
++#ifdef CONFIG_X86_LOCAL_APIC
++
++#include <asm/smp.h>
++
++#define APIC_DFR_VALUE	(APIC_DFR_FLAT)
++
++static inline const struct cpumask *default_target_cpus(void)
++{
++#ifdef CONFIG_SMP
++	return cpu_online_mask;
++#else
++	return cpumask_of(0);
++#endif
++}
++
++DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
++
++
++static inline unsigned int read_apic_id(void)
++{
++	unsigned int reg;
++
++	reg = apic_read(APIC_ID);
++
++	return apic->get_apic_id(reg);
++}
++
++extern void default_setup_apic_routing(void);
++
++#ifdef CONFIG_X86_32
++/*
++ * Set up the logical destination ID.
++ *
++ * Intel recommends to set DFR, LDR and TPR before enabling
++ * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
++ * document number 292116).  So here it goes...
++ */
++extern void default_init_apic_ldr(void);
++
++static inline int default_apic_id_registered(void)
++{
++	return physid_isset(read_apic_id(), phys_cpu_present_map);
++}
++
++static inline int default_phys_pkg_id(int cpuid_apic, int index_msb)
++{
++	return cpuid_apic >> index_msb;
++}
++
++extern int default_apicid_to_node(int logical_apicid);
++
++#endif
++
++static inline unsigned int
++default_cpu_mask_to_apicid(const struct cpumask *cpumask)
++{
++	return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
++}
++
++static inline unsigned int
++default_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
++			       const struct cpumask *andmask)
++{
++	unsigned long mask1 = cpumask_bits(cpumask)[0];
++	unsigned long mask2 = cpumask_bits(andmask)[0];
++	unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
++
++	return (unsigned int)(mask1 & mask2 & mask3);
++}
++
++static inline unsigned long default_check_apicid_used(physid_mask_t bitmap, int apicid)
++{
++	return physid_isset(apicid, bitmap);
++}
++
++static inline unsigned long default_check_apicid_present(int bit)
++{
++	return physid_isset(bit, phys_cpu_present_map);
++}
++
++static inline physid_mask_t default_ioapic_phys_id_map(physid_mask_t phys_map)
++{
++	return phys_map;
++}
++
++/* Mapping from cpu number to logical apicid */
++static inline int default_cpu_to_logical_apicid(int cpu)
++{
++	return 1 << cpu;
++}
++
++static inline int __default_cpu_present_to_apicid(int mps_cpu)
++{
++	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
++		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
++	else
++		return BAD_APICID;
++}
++
++static inline int
++__default_check_phys_apicid_present(int boot_cpu_physical_apicid)
++{
++	return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map);
++}
++
++#ifdef CONFIG_X86_32
++static inline int default_cpu_present_to_apicid(int mps_cpu)
++{
++	return __default_cpu_present_to_apicid(mps_cpu);
++}
++
++static inline int
++default_check_phys_apicid_present(int boot_cpu_physical_apicid)
++{
++	return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
++}
++#else
++extern int default_cpu_present_to_apicid(int mps_cpu);
++extern int default_check_phys_apicid_present(int boot_cpu_physical_apicid);
++#endif
++
++static inline physid_mask_t default_apicid_to_cpu_present(int phys_apicid)
++{
++	return physid_mask_of_physid(phys_apicid);
++}
++
++#endif /* CONFIG_X86_LOCAL_APIC */
++
++#ifdef CONFIG_X86_32
++extern u8 cpu_2_logical_apicid[NR_CPUS];
++#endif
++
+ #endif /* _ASM_X86_APIC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/apicdef.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/apicdef.h
++++ linux-2.6-tip/arch/x86/include/asm/apicdef.h
+@@ -53,6 +53,7 @@
+ #define		APIC_ESR_SENDILL	0x00020
+ #define		APIC_ESR_RECVILL	0x00040
+ #define		APIC_ESR_ILLREGA	0x00080
++#define 	APIC_LVTCMCI	0x2f0
+ #define	APIC_ICR	0x300
+ #define		APIC_DEST_SELF		0x40000
+ #define		APIC_DEST_ALLINC	0x80000
+Index: linux-2.6-tip/arch/x86/include/asm/apicnum.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/apicnum.h
+@@ -0,0 +1,12 @@
++#ifndef _ASM_X86_APICNUM_H
++#define _ASM_X86_APICNUM_H
++
++/* define MAX_IO_APICS */
++#ifdef CONFIG_X86_32
++# define MAX_IO_APICS 64
++#else
++# define MAX_IO_APICS 128
++# define MAX_LOCAL_APIC 32768
++#endif
++
++#endif /* _ASM_X86_APICNUM_H */
+Index: linux-2.6-tip/arch/x86/include/asm/apm.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/apm.h
+@@ -0,0 +1,73 @@
++/*
++ *  Machine specific APM BIOS functions for generic.
++ *  Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp>
++ */
++
++#ifndef _ASM_X86_MACH_DEFAULT_APM_H
++#define _ASM_X86_MACH_DEFAULT_APM_H
++
++#ifdef APM_ZERO_SEGS
++#	define APM_DO_ZERO_SEGS \
++		"pushl %%ds\n\t" \
++		"pushl %%es\n\t" \
++		"xorl %%edx, %%edx\n\t" \
++		"mov %%dx, %%ds\n\t" \
++		"mov %%dx, %%es\n\t" \
++		"mov %%dx, %%fs\n\t" \
++		"mov %%dx, %%gs\n\t"
++#	define APM_DO_POP_SEGS \
++		"popl %%es\n\t" \
++		"popl %%ds\n\t"
++#else
++#	define APM_DO_ZERO_SEGS
++#	define APM_DO_POP_SEGS
++#endif
++
++static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
++					u32 *eax, u32 *ebx, u32 *ecx,
++					u32 *edx, u32 *esi)
++{
++	/*
++	 * N.B. We do NOT need a cld after the BIOS call
++	 * because we always save and restore the flags.
++	 */
++	__asm__ __volatile__(APM_DO_ZERO_SEGS
++		"pushl %%edi\n\t"
++		"pushl %%ebp\n\t"
++		"lcall *%%cs:apm_bios_entry\n\t"
++		"setc %%al\n\t"
++		"popl %%ebp\n\t"
++		"popl %%edi\n\t"
++		APM_DO_POP_SEGS
++		: "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx),
++		  "=S" (*esi)
++		: "a" (func), "b" (ebx_in), "c" (ecx_in)
++		: "memory", "cc");
++}
++
++static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
++						u32 ecx_in, u32 *eax)
++{
++	int	cx, dx, si;
++	u8	error;
++
++	/*
++	 * N.B. We do NOT need a cld after the BIOS call
++	 * because we always save and restore the flags.
++	 */
++	__asm__ __volatile__(APM_DO_ZERO_SEGS
++		"pushl %%edi\n\t"
++		"pushl %%ebp\n\t"
++		"lcall *%%cs:apm_bios_entry\n\t"
++		"setc %%bl\n\t"
++		"popl %%ebp\n\t"
++		"popl %%edi\n\t"
++		APM_DO_POP_SEGS
++		: "=a" (*eax), "=b" (error), "=c" (cx), "=d" (dx),
++		  "=S" (si)
++		: "a" (func), "b" (ebx_in), "c" (ecx_in)
++		: "memory", "cc");
++	return error;
++}
++
++#endif /* _ASM_X86_MACH_DEFAULT_APM_H */
+Index: linux-2.6-tip/arch/x86/include/asm/arch_hooks.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/arch_hooks.h
++++ /dev/null
+@@ -1,26 +0,0 @@
+-#ifndef _ASM_X86_ARCH_HOOKS_H
+-#define _ASM_X86_ARCH_HOOKS_H
+-
+-#include <linux/interrupt.h>
+-
+-/*
+- *	linux/include/asm/arch_hooks.h
+- *
+- *	define the architecture specific hooks
+- */
+-
+-/* these aren't arch hooks, they are generic routines
+- * that can be used by the hooks */
+-extern void init_ISA_irqs(void);
+-extern irqreturn_t timer_interrupt(int irq, void *dev_id);
+-
+-/* these are the defined hooks */
+-extern void intr_init_hook(void);
+-extern void pre_intr_init_hook(void);
+-extern void pre_setup_arch_hook(void);
+-extern void trap_init_hook(void);
+-extern void pre_time_init_hook(void);
+-extern void time_init_hook(void);
+-extern void mca_nmi_hook(void);
+-
+-#endif /* _ASM_X86_ARCH_HOOKS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/atomic_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/atomic_32.h
++++ linux-2.6-tip/arch/x86/include/asm/atomic_32.h
+@@ -180,10 +180,10 @@ static inline int atomic_add_return(int 
+ 
+ #ifdef CONFIG_M386
+ no_xadd: /* Legacy 386 processor */
+-	local_irq_save(flags);
++	raw_local_irq_save(flags);
+ 	__i = atomic_read(v);
+ 	atomic_set(v, i + __i);
+-	local_irq_restore(flags);
++	raw_local_irq_restore(flags);
+ 	return i + __i;
+ #endif
+ }
+@@ -247,5 +247,223 @@ static inline int atomic_add_unless(atom
+ #define smp_mb__before_atomic_inc()	barrier()
+ #define smp_mb__after_atomic_inc()	barrier()
+ 
++/* An 64bit atomic type */
++
++typedef struct {
++	unsigned long long counter;
++} atomic64_t;
++
++#define ATOMIC64_INIT(val)	{ (val) }
++
++/**
++ * atomic64_read - read atomic64 variable
++ * @v: pointer of type atomic64_t
++ *
++ * Atomically reads the value of @v.
++ * Doesn't imply a read memory barrier.
++ */
++#define __atomic64_read(ptr)		((ptr)->counter)
++
++static inline unsigned long long
++cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new)
++{
++	asm volatile(
++
++		LOCK_PREFIX "cmpxchg8b (%[ptr])\n"
++
++		     :		"=A" (old)
++
++		     : [ptr]	"D" (ptr),
++				"A" (old),
++				"b" (ll_low(new)),
++				"c" (ll_high(new))
++
++		     : "memory");
++
++	return old;
++}
++
++static inline unsigned long long
++atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val,
++		 unsigned long long new_val)
++{
++	return cmpxchg8b(&ptr->counter, old_val, new_val);
++}
++
++/**
++ * atomic64_set - set atomic64 variable
++ * @ptr:      pointer to type atomic64_t
++ * @new_val:  value to assign
++ *
++ * Atomically sets the value of @ptr to @new_val.
++ */
++static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val)
++{
++	unsigned long long old_val;
++
++	do {
++		old_val = atomic_read(ptr);
++	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
++}
++
++/**
++ * atomic64_read - read atomic64 variable
++ * @ptr:      pointer to type atomic64_t
++ *
++ * Atomically reads the value of @ptr and returns it.
++ */
++static inline unsigned long long atomic64_read(atomic64_t *ptr)
++{
++	unsigned long long curr_val;
++
++	do {
++		curr_val = __atomic64_read(ptr);
++	} while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val);
++
++	return curr_val;
++}
++
++/**
++ * atomic64_add_return - add and return
++ * @delta: integer value to add
++ * @ptr:   pointer to type atomic64_t
++ *
++ * Atomically adds @delta to @ptr and returns @delta + *@ptr
++ */
++static inline unsigned long long
++atomic64_add_return(unsigned long long delta, atomic64_t *ptr)
++{
++	unsigned long long old_val, new_val;
++
++	do {
++		old_val = atomic_read(ptr);
++		new_val = old_val + delta;
++
++	} while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val);
++
++	return new_val;
++}
++
++static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr)
++{
++	return atomic64_add_return(-delta, ptr);
++}
++
++static inline long atomic64_inc_return(atomic64_t *ptr)
++{
++	return atomic64_add_return(1, ptr);
++}
++
++static inline long atomic64_dec_return(atomic64_t *ptr)
++{
++	return atomic64_sub_return(1, ptr);
++}
++
++/**
++ * atomic64_add - add integer to atomic64 variable
++ * @delta: integer value to add
++ * @ptr:   pointer to type atomic64_t
++ *
++ * Atomically adds @delta to @ptr.
++ */
++static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr)
++{
++	atomic64_add_return(delta, ptr);
++}
++
++/**
++ * atomic64_sub - subtract the atomic64 variable
++ * @delta: integer value to subtract
++ * @ptr:   pointer to type atomic64_t
++ *
++ * Atomically subtracts @delta from @ptr.
++ */
++static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr)
++{
++	atomic64_add(-delta, ptr);
++}
++
++/**
++ * atomic64_sub_and_test - subtract value from variable and test result
++ * @delta: integer value to subtract
++ * @ptr:   pointer to type atomic64_t
++ *
++ * Atomically subtracts @delta from @ptr and returns
++ * true if the result is zero, or false for all
++ * other cases.
++ */
++static inline int
++atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr)
++{
++	unsigned long long old_val = atomic64_sub_return(delta, ptr);
++
++	return old_val == 0;
++}
++
++/**
++ * atomic64_inc - increment atomic64 variable
++ * @ptr: pointer to type atomic64_t
++ *
++ * Atomically increments @ptr by 1.
++ */
++static inline void atomic64_inc(atomic64_t *ptr)
++{
++	atomic64_add(1, ptr);
++}
++
++/**
++ * atomic64_dec - decrement atomic64 variable
++ * @ptr: pointer to type atomic64_t
++ *
++ * Atomically decrements @ptr by 1.
++ */
++static inline void atomic64_dec(atomic64_t *ptr)
++{
++	atomic64_sub(1, ptr);
++}
++
++/**
++ * atomic64_dec_and_test - decrement and test
++ * @ptr: pointer to type atomic64_t
++ *
++ * Atomically decrements @ptr by 1 and
++ * returns true if the result is 0, or false for all other
++ * cases.
++ */
++static inline int atomic64_dec_and_test(atomic64_t *ptr)
++{
++	return atomic64_sub_and_test(1, ptr);
++}
++
++/**
++ * atomic64_inc_and_test - increment and test
++ * @ptr: pointer to type atomic64_t
++ *
++ * Atomically increments @ptr by 1
++ * and returns true if the result is zero, or false for all
++ * other cases.
++ */
++static inline int atomic64_inc_and_test(atomic64_t *ptr)
++{
++	return atomic64_sub_and_test(-1, ptr);
++}
++
++/**
++ * atomic64_add_negative - add and test if negative
++ * @delta: integer value to add
++ * @ptr:   pointer to type atomic64_t
++ *
++ * Atomically adds @delta to @ptr and returns true
++ * if the result is negative, or false when
++ * result is greater than or equal to zero.
++ */
++static inline int
++atomic64_add_negative(unsigned long long delta, atomic64_t *ptr)
++{
++	long long old_val = atomic64_add_return(delta, ptr);
++
++	return old_val < 0;
++}
++
+ #include <asm-generic/atomic.h>
+ #endif /* _ASM_X86_ATOMIC_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/bigsmp/apic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/bigsmp/apic.h
++++ /dev/null
+@@ -1,155 +0,0 @@
+-#ifndef __ASM_MACH_APIC_H
+-#define __ASM_MACH_APIC_H
+-
+-#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu))
+-#define esr_disable (1)
+-
+-static inline int apic_id_registered(void)
+-{
+-	return (1);
+-}
+-
+-static inline const cpumask_t *target_cpus(void)
+-{
+-#ifdef CONFIG_SMP
+-	return &cpu_online_map;
+-#else
+-	return &cpumask_of_cpu(0);
+-#endif
+-}
+-
+-#undef APIC_DEST_LOGICAL
+-#define APIC_DEST_LOGICAL	0
+-#define APIC_DFR_VALUE		(APIC_DFR_FLAT)
+-#define INT_DELIVERY_MODE	(dest_Fixed)
+-#define INT_DEST_MODE		(0)    /* phys delivery to target proc */
+-#define NO_BALANCE_IRQ		(0)
+-
+-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+-{
+-	return (0);
+-}
+-
+-static inline unsigned long check_apicid_present(int bit)
+-{
+-	return (1);
+-}
+-
+-static inline unsigned long calculate_ldr(int cpu)
+-{
+-	unsigned long val, id;
+-	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+-	id = xapic_phys_to_log_apicid(cpu);
+-	val |= SET_APIC_LOGICAL_ID(id);
+-	return val;
+-}
+-
+-/*
+- * Set up the logical destination ID.
+- *
+- * Intel recommends to set DFR, LDR and TPR before enabling
+- * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+- * document number 292116).  So here it goes...
+- */
+-static inline void init_apic_ldr(void)
+-{
+-	unsigned long val;
+-	int cpu = smp_processor_id();
+-
+-	apic_write(APIC_DFR, APIC_DFR_VALUE);
+-	val = calculate_ldr(cpu);
+-	apic_write(APIC_LDR, val);
+-}
+-
+-static inline void setup_apic_routing(void)
+-{
+-	printk("Enabling APIC mode:  %s.  Using %d I/O APICs\n",
+-		"Physflat", nr_ioapics);
+-}
+-
+-static inline int multi_timer_check(int apic, int irq)
+-{
+-	return (0);
+-}
+-
+-static inline int apicid_to_node(int logical_apicid)
+-{
+-	return apicid_2_node[hard_smp_processor_id()];
+-}
+-
+-static inline int cpu_present_to_apicid(int mps_cpu)
+-{
+-	if (mps_cpu < nr_cpu_ids)
+-		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
+-
+-	return BAD_APICID;
+-}
+-
+-static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
+-{
+-	return physid_mask_of_physid(phys_apicid);
+-}
+-
+-extern u8 cpu_2_logical_apicid[];
+-/* Mapping from cpu number to logical apicid */
+-static inline int cpu_to_logical_apicid(int cpu)
+-{
+-	if (cpu >= nr_cpu_ids)
+-		return BAD_APICID;
+-	return cpu_physical_id(cpu);
+-}
+-
+-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
+-{
+-	/* For clustered we don't have a good way to do this yet - hack */
+-	return physids_promote(0xFFL);
+-}
+-
+-static inline void setup_portio_remap(void)
+-{
+-}
+-
+-static inline void enable_apic_mode(void)
+-{
+-}
+-
+-static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
+-{
+-	return (1);
+-}
+-
+-/* As we are using single CPU as destination, pick only one CPU here */
+-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+-{
+-	int cpu;
+-	int apicid;	
+-
+-	cpu = first_cpu(*cpumask);
+-	apicid = cpu_to_logical_apicid(cpu);
+-	return apicid;
+-}
+-
+-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+-						  const struct cpumask *andmask)
+-{
+-	int cpu;
+-
+-	/*
+-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+-	 * May as well be the first.
+-	 */
+-	for_each_cpu_and(cpu, cpumask, andmask)
+-		if (cpumask_test_cpu(cpu, cpu_online_mask))
+-			break;
+-	if (cpu < nr_cpu_ids)
+-		return cpu_to_logical_apicid(cpu);
+-
+-	return BAD_APICID;
+-}
+-
+-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+-{
+-	return cpuid_apic >> index_msb;
+-}
+-
+-#endif /* __ASM_MACH_APIC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/bigsmp/apicdef.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/bigsmp/apicdef.h
++++ /dev/null
+@@ -1,13 +0,0 @@
+-#ifndef __ASM_MACH_APICDEF_H
+-#define __ASM_MACH_APICDEF_H
+-
+-#define		APIC_ID_MASK		(0xFF<<24)
+-
+-static inline unsigned get_apic_id(unsigned long x)
+-{
+-	return (((x)>>24)&0xFF);
+-}
+-
+-#define		GET_APIC_ID(x)	get_apic_id(x)
+-
+-#endif
+Index: linux-2.6-tip/arch/x86/include/asm/bigsmp/ipi.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/bigsmp/ipi.h
++++ /dev/null
+@@ -1,22 +0,0 @@
+-#ifndef __ASM_MACH_IPI_H
+-#define __ASM_MACH_IPI_H
+-
+-void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
+-
+-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
+-{
+-	send_IPI_mask_sequence(mask, vector);
+-}
+-
+-static inline void send_IPI_allbutself(int vector)
+-{
+-	send_IPI_mask_allbutself(cpu_online_mask, vector);
+-}
+-
+-static inline void send_IPI_all(int vector)
+-{
+-	send_IPI_mask(cpu_online_mask, vector);
+-}
+-
+-#endif /* __ASM_MACH_IPI_H */
+Index: linux-2.6-tip/arch/x86/include/asm/boot.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/boot.h
++++ linux-2.6-tip/arch/x86/include/asm/boot.h
+@@ -1,26 +1,36 @@
+ #ifndef _ASM_X86_BOOT_H
+ #define _ASM_X86_BOOT_H
+ 
+-/* Don't touch these, unless you really know what you're doing. */
+-#define DEF_SYSSEG	0x1000
+-#define DEF_SYSSIZE	0x7F00
+-
+ /* Internal svga startup constants */
+ #define NORMAL_VGA	0xffff		/* 80x25 mode */
+ #define EXTENDED_VGA	0xfffe		/* 80x50 mode */
+ #define ASK_VGA		0xfffd		/* ask for it at bootup */
+ 
++#ifdef __KERNEL__
++
+ /* Physical address where kernel should be loaded. */
+ #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \
+ 				+ (CONFIG_PHYSICAL_ALIGN - 1)) \
+ 				& ~(CONFIG_PHYSICAL_ALIGN - 1))
+ 
++#ifdef CONFIG_KERNEL_BZIP2
++#define BOOT_HEAP_SIZE             0x400000
++#else /* !CONFIG_KERNEL_BZIP2 */
++
+ #ifdef CONFIG_X86_64
+ #define BOOT_HEAP_SIZE	0x7000
+-#define BOOT_STACK_SIZE	0x4000
+ #else
+ #define BOOT_HEAP_SIZE	0x4000
++#endif
++
++#endif /* !CONFIG_KERNEL_BZIP2 */
++
++#ifdef CONFIG_X86_64
++#define BOOT_STACK_SIZE	0x4000
++#else
+ #define BOOT_STACK_SIZE	0x1000
+ #endif
+ 
++#endif /* __KERNEL__ */
++
+ #endif /* _ASM_X86_BOOT_H */
+Index: linux-2.6-tip/arch/x86/include/asm/cacheflush.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/cacheflush.h
++++ linux-2.6-tip/arch/x86/include/asm/cacheflush.h
+@@ -5,24 +5,43 @@
+ #include <linux/mm.h>
+ 
+ /* Caches aren't brain-dead on the intel. */
+-#define flush_cache_all()			do { } while (0)
+-#define flush_cache_mm(mm)			do { } while (0)
+-#define flush_cache_dup_mm(mm)			do { } while (0)
+-#define flush_cache_range(vma, start, end)	do { } while (0)
+-#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+-#define flush_dcache_page(page)			do { } while (0)
+-#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+-#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+-#define flush_icache_range(start, end)		do { } while (0)
+-#define flush_icache_page(vma, pg)		do { } while (0)
+-#define flush_icache_user_range(vma, pg, adr, len)	do { } while (0)
+-#define flush_cache_vmap(start, end)		do { } while (0)
+-#define flush_cache_vunmap(start, end)		do { } while (0)
+-
+-#define copy_to_user_page(vma, page, vaddr, dst, src, len)	\
+-	memcpy((dst), (src), (len))
+-#define copy_from_user_page(vma, page, vaddr, dst, src, len)	\
+-	memcpy((dst), (src), (len))
++static inline void flush_cache_all(void) { }
++static inline void flush_cache_mm(struct mm_struct *mm) { }
++static inline void flush_cache_dup_mm(struct mm_struct *mm) { }
++static inline void flush_cache_range(struct vm_area_struct *vma,
++				     unsigned long start, unsigned long end) { }
++static inline void flush_cache_page(struct vm_area_struct *vma,
++				    unsigned long vmaddr, unsigned long pfn) { }
++static inline void flush_dcache_page(struct page *page) { }
++static inline void flush_dcache_mmap_lock(struct address_space *mapping) { }
++static inline void flush_dcache_mmap_unlock(struct address_space *mapping) { }
++static inline void flush_icache_range(unsigned long start,
++				      unsigned long end) { }
++static inline void flush_icache_page(struct vm_area_struct *vma,
++				     struct page *page) { }
++static inline void flush_icache_user_range(struct vm_area_struct *vma,
++					   struct page *page,
++					   unsigned long addr,
++					   unsigned long len) { }
++static inline void flush_cache_vmap(unsigned long start, unsigned long end) { }
++static inline void flush_cache_vunmap(unsigned long start,
++				      unsigned long end) { }
++
++static inline void copy_to_user_page(struct vm_area_struct *vma,
++				     struct page *page, unsigned long vaddr,
++				     void *dst, const void *src,
++				     unsigned long len)
++{
++	memcpy(dst, src, len);
++}
++
++static inline void copy_from_user_page(struct vm_area_struct *vma,
++				       struct page *page, unsigned long vaddr,
++				       void *dst, const void *src,
++				       unsigned long len)
++{
++	memcpy(dst, src, len);
++}
+ 
+ #define PG_non_WB				PG_arch_1
+ PAGEFLAG(NonWB, non_WB)
+@@ -71,6 +90,9 @@ int set_memory_4k(unsigned long addr, in
+ int set_memory_array_uc(unsigned long *addr, int addrinarray);
+ int set_memory_array_wb(unsigned long *addr, int addrinarray);
+ 
++int set_pages_array_uc(struct page **pages, int addrinarray);
++int set_pages_array_wb(struct page **pages, int addrinarray);
++
+ /*
+  * For legacy compatibility with the old APIs, a few functions
+  * are provided that work on a "struct page".
+@@ -104,6 +126,11 @@ void clflush_cache_range(void *addr, uns
+ #ifdef CONFIG_DEBUG_RODATA
+ void mark_rodata_ro(void);
+ extern const int rodata_test_data;
++void set_kernel_text_rw(void);
++void set_kernel_text_ro(void);
++#else
++static inline void set_kernel_text_rw(void) { }
++static inline void set_kernel_text_ro(void) { }
+ #endif
+ 
+ #ifdef CONFIG_DEBUG_RODATA_TEST
+Index: linux-2.6-tip/arch/x86/include/asm/calling.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/calling.h
++++ linux-2.6-tip/arch/x86/include/asm/calling.h
+@@ -1,5 +1,55 @@
+ /*
+- * Some macros to handle stack frames in assembly.
++
++ x86 function call convention, 64-bit:
++ -------------------------------------
++  arguments           |  callee-saved      | extra caller-saved | return
++ [callee-clobbered]   |                    | [callee-clobbered] |
++ ---------------------------------------------------------------------------
++ rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11             | rax, rdx [**]
++
++ ( rsp is obviously invariant across normal function calls. (gcc can 'merge'
++   functions when it sees tail-call optimization possibilities) rflags is
++   clobbered. Leftover arguments are passed over the stack frame.)
++
++ [*]  In the frame-pointers case rbp is fixed to the stack frame.
++
++ [**] for struct return values wider than 64 bits the return convention is a
++      bit more complex: up to 128 bits width we return small structures
++      straight in rax, rdx. For structures larger than that (3 words or
++      larger) the caller puts a pointer to an on-stack return struct
++      [allocated in the caller's stack frame] into the first argument - i.e.
++      into rdi. All other arguments shift up by one in this case.
++      Fortunately this case is rare in the kernel.
++
++For 32-bit we have the following conventions - kernel is built with
++-mregparm=3 and -freg-struct-return:
++
++ x86 function calling convention, 32-bit:
++ ----------------------------------------
++  arguments         | callee-saved        | extra caller-saved | return
++ [callee-clobbered] |                     | [callee-clobbered] |
++ -------------------------------------------------------------------------
++ eax edx ecx        | ebx edi esi ebp [*] | <none>             | eax, edx [**]
++
++ ( here too esp is obviously invariant across normal function calls. eflags
++   is clobbered. Leftover arguments are passed over the stack frame. )
++
++ [*]  In the frame-pointers case ebp is fixed to the stack frame.
++
++ [**] We build with -freg-struct-return, which on 32-bit means similar
++      semantics as on 64-bit: edx can be used for a second return value
++      (i.e. covering integer and structure sizes up to 64 bits) - after that
++      it gets more complex and more expensive: 3-word or larger struct returns
++      get done in the caller's frame and the pointer to the return struct goes
++      into regparm0, i.e. eax - the other arguments shift up and the
++      function's register parameters degenerate to regparm=2 in essence.
++
++*/
++
++
++/*
++ * 64-bit system call stack frame layout defines and helpers,
++ * for assembly code:
+  */
+ 
+ #define R15		  0
+@@ -9,7 +59,7 @@
+ #define RBP		 32
+ #define RBX		 40
+ 
+-/* arguments: interrupts/non tracing syscalls only save upto here*/
++/* arguments: interrupts/non tracing syscalls only save up to here: */
+ #define R11		 48
+ #define R10		 56
+ #define R9		 64
+@@ -22,7 +72,7 @@
+ #define ORIG_RAX	120       /* + error_code */
+ /* end of arguments */
+ 
+-/* cpu exception frame or undefined in case of fast syscall. */
++/* cpu exception frame or undefined in case of fast syscall: */
+ #define RIP		128
+ #define CS		136
+ #define EFLAGS		144
+Index: linux-2.6-tip/arch/x86/include/asm/cpu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/cpu.h
++++ linux-2.6-tip/arch/x86/include/asm/cpu.h
+@@ -7,6 +7,20 @@
+ #include <linux/nodemask.h>
+ #include <linux/percpu.h>
+ 
++#ifdef CONFIG_SMP
++
++extern void prefill_possible_map(void);
++
++#else /* CONFIG_SMP */
++
++static inline void prefill_possible_map(void) {}
++
++#define cpu_physical_id(cpu)			boot_cpu_physical_apicid
++#define safe_smp_processor_id()			0
++#define stack_smp_processor_id()		0
++
++#endif /* CONFIG_SMP */
++
+ struct x86_cpu {
+ 	struct cpu cpu;
+ };
+@@ -17,4 +31,7 @@ extern void arch_unregister_cpu(int);
+ #endif
+ 
+ DECLARE_PER_CPU(int, cpu_state);
++
++extern unsigned int boot_cpu_id;
++
+ #endif /* _ASM_X86_CPU_H */
+Index: linux-2.6-tip/arch/x86/include/asm/cpu_debug.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/cpu_debug.h
+@@ -0,0 +1,226 @@
++#ifndef _ASM_X86_CPU_DEBUG_H
++#define _ASM_X86_CPU_DEBUG_H
++
++/*
++ * CPU x86 architecture debug
++ *
++ * Copyright(C) 2009 Jaswinder Singh Rajput
++ */
++
++/* Register flags */
++enum cpu_debug_bit {
++/* Model Specific Registers (MSRs)					*/
++	CPU_MC_BIT,				/* Machine Check	*/
++	CPU_MONITOR_BIT,			/* Monitor		*/
++	CPU_TIME_BIT,				/* Time			*/
++	CPU_PMC_BIT,				/* Performance Monitor	*/
++	CPU_PLATFORM_BIT,			/* Platform		*/
++	CPU_APIC_BIT,				/* APIC			*/
++	CPU_POWERON_BIT,			/* Power-on		*/
++	CPU_CONTROL_BIT,			/* Control		*/
++	CPU_FEATURES_BIT,			/* Features control	*/
++	CPU_LBRANCH_BIT,			/* Last Branch		*/
++	CPU_BIOS_BIT,				/* BIOS			*/
++	CPU_FREQ_BIT,				/* Frequency		*/
++	CPU_MTTR_BIT,				/* MTRR			*/
++	CPU_PERF_BIT,				/* Performance		*/
++	CPU_CACHE_BIT,				/* Cache		*/
++	CPU_SYSENTER_BIT,			/* Sysenter		*/
++	CPU_THERM_BIT,				/* Thermal		*/
++	CPU_MISC_BIT,				/* Miscellaneous	*/
++	CPU_DEBUG_BIT,				/* Debug		*/
++	CPU_PAT_BIT,				/* PAT			*/
++	CPU_VMX_BIT,				/* VMX			*/
++	CPU_CALL_BIT,				/* System Call		*/
++	CPU_BASE_BIT,				/* BASE Address		*/
++	CPU_VER_BIT,				/* Version ID		*/
++	CPU_CONF_BIT,				/* Configuration	*/
++	CPU_SMM_BIT,				/* System mgmt mode	*/
++	CPU_SVM_BIT,				/*Secure Virtual Machine*/
++	CPU_OSVM_BIT,				/* OS-Visible Workaround*/
++/* Standard Registers							*/
++	CPU_TSS_BIT,				/* Task Stack Segment	*/
++	CPU_CR_BIT,				/* Control Registers	*/
++	CPU_DT_BIT,				/* Descriptor Table	*/
++/* End of Registers flags						*/
++	CPU_REG_ALL_BIT,			/* Select all Registers	*/
++};
++
++#define	CPU_REG_ALL		(~0)		/* Select all Registers	*/
++
++#define	CPU_MC			(1 << CPU_MC_BIT)
++#define	CPU_MONITOR		(1 << CPU_MONITOR_BIT)
++#define	CPU_TIME		(1 << CPU_TIME_BIT)
++#define	CPU_PMC			(1 << CPU_PMC_BIT)
++#define	CPU_PLATFORM		(1 << CPU_PLATFORM_BIT)
++#define	CPU_APIC		(1 << CPU_APIC_BIT)
++#define	CPU_POWERON		(1 << CPU_POWERON_BIT)
++#define	CPU_CONTROL		(1 << CPU_CONTROL_BIT)
++#define	CPU_FEATURES		(1 << CPU_FEATURES_BIT)
++#define	CPU_LBRANCH		(1 << CPU_LBRANCH_BIT)
++#define	CPU_BIOS		(1 << CPU_BIOS_BIT)
++#define	CPU_FREQ		(1 << CPU_FREQ_BIT)
++#define	CPU_MTRR		(1 << CPU_MTTR_BIT)
++#define	CPU_PERF		(1 << CPU_PERF_BIT)
++#define	CPU_CACHE		(1 << CPU_CACHE_BIT)
++#define	CPU_SYSENTER		(1 << CPU_SYSENTER_BIT)
++#define	CPU_THERM		(1 << CPU_THERM_BIT)
++#define	CPU_MISC		(1 << CPU_MISC_BIT)
++#define	CPU_DEBUG		(1 << CPU_DEBUG_BIT)
++#define	CPU_PAT			(1 << CPU_PAT_BIT)
++#define	CPU_VMX			(1 << CPU_VMX_BIT)
++#define	CPU_CALL		(1 << CPU_CALL_BIT)
++#define	CPU_BASE		(1 << CPU_BASE_BIT)
++#define	CPU_VER			(1 << CPU_VER_BIT)
++#define	CPU_CONF		(1 << CPU_CONF_BIT)
++#define	CPU_SMM			(1 << CPU_SMM_BIT)
++#define	CPU_SVM			(1 << CPU_SVM_BIT)
++#define	CPU_OSVM		(1 << CPU_OSVM_BIT)
++#define	CPU_TSS			(1 << CPU_TSS_BIT)
++#define	CPU_CR			(1 << CPU_CR_BIT)
++#define	CPU_DT			(1 << CPU_DT_BIT)
++
++/* Register file flags */
++enum cpu_file_bit {
++	CPU_INDEX_BIT,				/* index		*/
++	CPU_VALUE_BIT,				/* value		*/
++};
++
++#define	CPU_FILE_VALUE			(1 << CPU_VALUE_BIT)
++
++/*
++ * DisplayFamily_DisplayModel	Processor Families/Processor Number Series
++ * --------------------------	------------------------------------------
++ * 05_01, 05_02, 05_04		Pentium, Pentium with MMX
++ *
++ * 06_01			Pentium Pro
++ * 06_03, 06_05			Pentium II Xeon, Pentium II
++ * 06_07, 06_08, 06_0A, 06_0B	Pentium III Xeon, Pentum III
++ *
++ * 06_09, 060D			Pentium M
++ *
++ * 06_0E			Core Duo, Core Solo
++ *
++ * 06_0F			Xeon 3000, 3200, 5100, 5300, 7300 series,
++ *				Core 2 Quad, Core 2 Extreme, Core 2 Duo,
++ *				Pentium dual-core
++ * 06_17			Xeon 5200, 5400 series, Core 2 Quad Q9650
++ *
++ * 06_1C			Atom
++ *
++ * 0F_00, 0F_01, 0F_02		Xeon, Xeon MP, Pentium 4
++ * 0F_03, 0F_04			Xeon, Xeon MP, Pentium 4, Pentium D
++ *
++ * 0F_06			Xeon 7100, 5000 Series, Xeon MP,
++ *				Pentium 4, Pentium D
++ */
++
++/* Register processors bits */
++enum cpu_processor_bit {
++	CPU_NONE,
++/* Intel */
++	CPU_INTEL_PENTIUM_BIT,
++	CPU_INTEL_P6_BIT,
++	CPU_INTEL_PENTIUM_M_BIT,
++	CPU_INTEL_CORE_BIT,
++	CPU_INTEL_CORE2_BIT,
++	CPU_INTEL_ATOM_BIT,
++	CPU_INTEL_XEON_P4_BIT,
++	CPU_INTEL_XEON_MP_BIT,
++/* AMD */
++	CPU_AMD_K6_BIT,
++	CPU_AMD_K7_BIT,
++	CPU_AMD_K8_BIT,
++	CPU_AMD_0F_BIT,
++	CPU_AMD_10_BIT,
++	CPU_AMD_11_BIT,
++};
++
++#define	CPU_INTEL_PENTIUM	(1 << CPU_INTEL_PENTIUM_BIT)
++#define	CPU_INTEL_P6		(1 << CPU_INTEL_P6_BIT)
++#define	CPU_INTEL_PENTIUM_M	(1 << CPU_INTEL_PENTIUM_M_BIT)
++#define	CPU_INTEL_CORE		(1 << CPU_INTEL_CORE_BIT)
++#define	CPU_INTEL_CORE2		(1 << CPU_INTEL_CORE2_BIT)
++#define	CPU_INTEL_ATOM		(1 << CPU_INTEL_ATOM_BIT)
++#define	CPU_INTEL_XEON_P4	(1 << CPU_INTEL_XEON_P4_BIT)
++#define	CPU_INTEL_XEON_MP	(1 << CPU_INTEL_XEON_MP_BIT)
++
++#define	CPU_INTEL_PX		(CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M)
++#define	CPU_INTEL_COREX		(CPU_INTEL_CORE | CPU_INTEL_CORE2)
++#define	CPU_INTEL_XEON		(CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP)
++#define	CPU_CO_AT		(CPU_INTEL_CORE | CPU_INTEL_ATOM)
++#define	CPU_C2_AT		(CPU_INTEL_CORE2 | CPU_INTEL_ATOM)
++#define	CPU_CX_AT		(CPU_INTEL_COREX | CPU_INTEL_ATOM)
++#define	CPU_CX_XE		(CPU_INTEL_COREX | CPU_INTEL_XEON)
++#define	CPU_P6_XE		(CPU_INTEL_P6 | CPU_INTEL_XEON)
++#define	CPU_PM_CO_AT		(CPU_INTEL_PENTIUM_M | CPU_CO_AT)
++#define	CPU_C2_AT_XE		(CPU_C2_AT | CPU_INTEL_XEON)
++#define	CPU_CX_AT_XE		(CPU_CX_AT | CPU_INTEL_XEON)
++#define	CPU_P6_CX_AT		(CPU_INTEL_P6 | CPU_CX_AT)
++#define	CPU_P6_CX_XE		(CPU_P6_XE | CPU_INTEL_COREX)
++#define	CPU_P6_CX_AT_XE		(CPU_INTEL_P6 | CPU_CX_AT_XE)
++#define	CPU_PM_CX_AT_XE		(CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE)
++#define	CPU_PM_CX_AT		(CPU_INTEL_PENTIUM_M | CPU_CX_AT)
++#define	CPU_PM_CX_XE		(CPU_INTEL_PENTIUM_M | CPU_CX_XE)
++#define	CPU_PX_CX_AT		(CPU_INTEL_PX | CPU_CX_AT)
++#define	CPU_PX_CX_AT_XE		(CPU_INTEL_PX | CPU_CX_AT_XE)
++
++/* Select all supported Intel CPUs */
++#define	CPU_INTEL_ALL		(CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE)
++
++#define	CPU_AMD_K6		(1 << CPU_AMD_K6_BIT)
++#define	CPU_AMD_K7		(1 << CPU_AMD_K7_BIT)
++#define	CPU_AMD_K8		(1 << CPU_AMD_K8_BIT)
++#define	CPU_AMD_0F		(1 << CPU_AMD_0F_BIT)
++#define	CPU_AMD_10		(1 << CPU_AMD_10_BIT)
++#define	CPU_AMD_11		(1 << CPU_AMD_11_BIT)
++
++#define	CPU_K10_PLUS		(CPU_AMD_10 | CPU_AMD_11)
++#define	CPU_K0F_PLUS		(CPU_AMD_0F | CPU_K10_PLUS)
++#define	CPU_K8_PLUS		(CPU_AMD_K8 | CPU_K0F_PLUS)
++#define	CPU_K7_PLUS		(CPU_AMD_K7 | CPU_K8_PLUS)
++
++/* Select all supported AMD CPUs */
++#define	CPU_AMD_ALL		(CPU_AMD_K6 | CPU_K7_PLUS)
++
++/* Select all supported CPUs */
++#define	CPU_ALL			(CPU_INTEL_ALL | CPU_AMD_ALL)
++
++#define MAX_CPU_FILES		512
++
++struct cpu_private {
++	unsigned		cpu;
++	unsigned		type;
++	unsigned		reg;
++	unsigned		file;
++};
++
++struct cpu_debug_base {
++	char			*name;		/* Register name	*/
++	unsigned		flag;		/* Register flag	*/
++	unsigned		write;		/* Register write flag	*/
++};
++
++/*
++ * Currently it looks similar to cpu_debug_base but once we add more files
++ * cpu_file_base will go in different direction
++ */
++struct cpu_file_base {
++	char			*name;		/* Register file name	*/
++	unsigned		flag;		/* Register file flag	*/
++	unsigned		write;		/* Register write flag	*/
++};
++
++struct cpu_cpuX_base {
++	struct dentry		*dentry;	/* Register dentry	*/
++	int			init;		/* Register index file	*/
++};
++
++struct cpu_debug_range {
++	unsigned		min;		/* Register range min	*/
++	unsigned		max;		/* Register range max	*/
++	unsigned		flag;		/* Supported flags	*/
++	unsigned		model;		/* Supported models	*/
++};
++
++#endif /* _ASM_X86_CPU_DEBUG_H */
+Index: linux-2.6-tip/arch/x86/include/asm/cpumask.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/cpumask.h
+@@ -0,0 +1,14 @@
++#ifndef _ASM_X86_CPUMASK_H
++#define _ASM_X86_CPUMASK_H
++#ifndef __ASSEMBLY__
++#include <linux/cpumask.h>
++
++extern cpumask_var_t cpu_callin_mask;
++extern cpumask_var_t cpu_callout_mask;
++extern cpumask_var_t cpu_initialized_mask;
++extern cpumask_var_t cpu_sibling_setup_mask;
++
++extern void setup_cpu_local_masks(void);
++
++#endif /* __ASSEMBLY__ */
++#endif /* _ASM_X86_CPUMASK_H */
+Index: linux-2.6-tip/arch/x86/include/asm/current.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/current.h
++++ linux-2.6-tip/arch/x86/include/asm/current.h
+@@ -1,39 +1,21 @@
+ #ifndef _ASM_X86_CURRENT_H
+ #define _ASM_X86_CURRENT_H
+ 
+-#ifdef CONFIG_X86_32
+ #include <linux/compiler.h>
+ #include <asm/percpu.h>
+ 
++#ifndef __ASSEMBLY__
+ struct task_struct;
+ 
+ DECLARE_PER_CPU(struct task_struct *, current_task);
+-static __always_inline struct task_struct *get_current(void)
+-{
+-	return x86_read_percpu(current_task);
+-}
+-
+-#else /* X86_32 */
+-
+-#ifndef __ASSEMBLY__
+-#include <asm/pda.h>
+-
+-struct task_struct;
+ 
+ static __always_inline struct task_struct *get_current(void)
+ {
+-	return read_pda(pcurrent);
++	return percpu_read(current_task);
+ }
+ 
+-#else /* __ASSEMBLY__ */
+-
+-#include <asm/asm-offsets.h>
+-#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
++#define current get_current()
+ 
+ #endif /* __ASSEMBLY__ */
+ 
+-#endif /* X86_32 */
+-
+-#define current get_current()
+-
+ #endif /* _ASM_X86_CURRENT_H */
+Index: linux-2.6-tip/arch/x86/include/asm/desc.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/desc.h
++++ linux-2.6-tip/arch/x86/include/asm/desc.h
+@@ -91,7 +91,6 @@ static inline int desc_empty(const void 
+ #define store_gdt(dtr) native_store_gdt(dtr)
+ #define store_idt(dtr) native_store_idt(dtr)
+ #define store_tr(tr) (tr = native_store_tr())
+-#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
+ 
+ #define load_TLS(t, cpu) native_load_tls(t, cpu)
+ #define set_ldt native_set_ldt
+@@ -112,6 +111,8 @@ static inline void paravirt_free_ldt(str
+ }
+ #endif	/* CONFIG_PARAVIRT */
+ 
++#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
++
+ static inline void native_write_idt_entry(gate_desc *idt, int entry,
+ 					  const gate_desc *gate)
+ {
+Index: linux-2.6-tip/arch/x86/include/asm/device.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/device.h
++++ linux-2.6-tip/arch/x86/include/asm/device.h
+@@ -6,7 +6,7 @@ struct dev_archdata {
+ 	void	*acpi_handle;
+ #endif
+ #ifdef CONFIG_X86_64
+-struct dma_mapping_ops *dma_ops;
++struct dma_map_ops *dma_ops;
+ #endif
+ #ifdef CONFIG_DMAR
+ 	void *iommu; /* hook for IOMMU specific extension */
+Index: linux-2.6-tip/arch/x86/include/asm/dma-mapping.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/dma-mapping.h
++++ linux-2.6-tip/arch/x86/include/asm/dma-mapping.h
+@@ -6,7 +6,10 @@
+  * Documentation/DMA-API.txt for documentation.
+  */
+ 
++#include <linux/kmemcheck.h>
+ #include <linux/scatterlist.h>
++#include <linux/dma-debug.h>
++#include <linux/dma-attrs.h>
+ #include <asm/io.h>
+ #include <asm/swiotlb.h>
+ #include <asm-generic/dma-coherent.h>
+@@ -16,47 +19,9 @@ extern int iommu_merge;
+ extern struct device x86_dma_fallback_dev;
+ extern int panic_on_overflow;
+ 
+-struct dma_mapping_ops {
+-	int             (*mapping_error)(struct device *dev,
+-					 dma_addr_t dma_addr);
+-	void*           (*alloc_coherent)(struct device *dev, size_t size,
+-				dma_addr_t *dma_handle, gfp_t gfp);
+-	void            (*free_coherent)(struct device *dev, size_t size,
+-				void *vaddr, dma_addr_t dma_handle);
+-	dma_addr_t      (*map_single)(struct device *hwdev, phys_addr_t ptr,
+-				size_t size, int direction);
+-	void            (*unmap_single)(struct device *dev, dma_addr_t addr,
+-				size_t size, int direction);
+-	void            (*sync_single_for_cpu)(struct device *hwdev,
+-				dma_addr_t dma_handle, size_t size,
+-				int direction);
+-	void            (*sync_single_for_device)(struct device *hwdev,
+-				dma_addr_t dma_handle, size_t size,
+-				int direction);
+-	void            (*sync_single_range_for_cpu)(struct device *hwdev,
+-				dma_addr_t dma_handle, unsigned long offset,
+-				size_t size, int direction);
+-	void            (*sync_single_range_for_device)(struct device *hwdev,
+-				dma_addr_t dma_handle, unsigned long offset,
+-				size_t size, int direction);
+-	void            (*sync_sg_for_cpu)(struct device *hwdev,
+-				struct scatterlist *sg, int nelems,
+-				int direction);
+-	void            (*sync_sg_for_device)(struct device *hwdev,
+-				struct scatterlist *sg, int nelems,
+-				int direction);
+-	int             (*map_sg)(struct device *hwdev, struct scatterlist *sg,
+-				int nents, int direction);
+-	void            (*unmap_sg)(struct device *hwdev,
+-				struct scatterlist *sg, int nents,
+-				int direction);
+-	int             (*dma_supported)(struct device *hwdev, u64 mask);
+-	int		is_phys;
+-};
++extern struct dma_map_ops *dma_ops;
+ 
+-extern struct dma_mapping_ops *dma_ops;
+-
+-static inline struct dma_mapping_ops *get_dma_ops(struct device *dev)
++static inline struct dma_map_ops *get_dma_ops(struct device *dev)
+ {
+ #ifdef CONFIG_X86_32
+ 	return dma_ops;
+@@ -71,7 +36,7 @@ static inline struct dma_mapping_ops *ge
+ /* Make sure we keep the same behaviour */
+ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(dev);
++	struct dma_map_ops *ops = get_dma_ops(dev);
+ 	if (ops->mapping_error)
+ 		return ops->mapping_error(dev, dma_addr);
+ 
+@@ -90,137 +55,174 @@ extern void *dma_generic_alloc_coherent(
+ 
+ static inline dma_addr_t
+ dma_map_single(struct device *hwdev, void *ptr, size_t size,
+-	       int direction)
++	       enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
++	struct dma_map_ops *ops = get_dma_ops(hwdev);
++	dma_addr_t addr;
+ 
+-	BUG_ON(!valid_dma_direction(direction));
+-	return ops->map_single(hwdev, virt_to_phys(ptr), size, direction);
++	kmemcheck_mark_initialized(ptr, size);
++	BUG_ON(!valid_dma_direction(dir));
++	addr = ops->map_page(hwdev, virt_to_page(ptr),
++			     (unsigned long)ptr & ~PAGE_MASK, size,
++			     dir, NULL);
++	debug_dma_map_page(hwdev, virt_to_page(ptr),
++			   (unsigned long)ptr & ~PAGE_MASK, size,
++			   dir, addr, true);
++	return addr;
+ }
+ 
+ static inline void
+ dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size,
+-		 int direction)
++		 enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(dev);
++	struct dma_map_ops *ops = get_dma_ops(dev);
+ 
+-	BUG_ON(!valid_dma_direction(direction));
+-	if (ops->unmap_single)
+-		ops->unmap_single(dev, addr, size, direction);
++	BUG_ON(!valid_dma_direction(dir));
++	if (ops->unmap_page)
++		ops->unmap_page(dev, addr, size, dir, NULL);
++	debug_dma_unmap_page(dev, addr, size, dir, true);
+ }
+ 
+ static inline int
+ dma_map_sg(struct device *hwdev, struct scatterlist *sg,
+-	   int nents, int direction)
++	   int nents, enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
++	struct dma_map_ops *ops = get_dma_ops(hwdev);
++	int ents;
++
++	struct scatterlist *s;
++	int i;
++
++	for_each_sg(sg, s, nents, i)
++		kmemcheck_mark_initialized(sg_virt(s), s->length);
++	BUG_ON(!valid_dma_direction(dir));
++	ents = ops->map_sg(hwdev, sg, nents, dir, NULL);
++	debug_dma_map_sg(hwdev, sg, nents, ents, dir);
+ 
+-	BUG_ON(!valid_dma_direction(direction));
+-	return ops->map_sg(hwdev, sg, nents, direction);
++	return ents;
+ }
+ 
+ static inline void
+ dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+-	     int direction)
++	     enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
++	struct dma_map_ops *ops = get_dma_ops(hwdev);
+ 
+-	BUG_ON(!valid_dma_direction(direction));
++	BUG_ON(!valid_dma_direction(dir));
++	debug_dma_unmap_sg(hwdev, sg, nents, dir);
+ 	if (ops->unmap_sg)
+-		ops->unmap_sg(hwdev, sg, nents, direction);
++		ops->unmap_sg(hwdev, sg, nents, dir, NULL);
+ }
+ 
+ static inline void
+ dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
+-			size_t size, int direction)
++			size_t size, enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
++	struct dma_map_ops *ops = get_dma_ops(hwdev);
+ 
+-	BUG_ON(!valid_dma_direction(direction));
++	BUG_ON(!valid_dma_direction(dir));
+ 	if (ops->sync_single_for_cpu)
+-		ops->sync_single_for_cpu(hwdev, dma_handle, size, direction);
++		ops->sync_single_for_cpu(hwdev, dma_handle, size, dir);
++	debug_dma_sync_single_for_cpu(hwdev, dma_handle, size, dir);
+ 	flush_write_buffers();
+ }
+ 
+ static inline void
+ dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle,
+-			   size_t size, int direction)
++			   size_t size, enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
++	struct dma_map_ops *ops = get_dma_ops(hwdev);
+ 
+-	BUG_ON(!valid_dma_direction(direction));
++	BUG_ON(!valid_dma_direction(dir));
+ 	if (ops->sync_single_for_device)
+-		ops->sync_single_for_device(hwdev, dma_handle, size, direction);
++		ops->sync_single_for_device(hwdev, dma_handle, size, dir);
++	debug_dma_sync_single_for_device(hwdev, dma_handle, size, dir);
+ 	flush_write_buffers();
+ }
+ 
+ static inline void
+ dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle,
+-			      unsigned long offset, size_t size, int direction)
++			      unsigned long offset, size_t size,
++			      enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
++	struct dma_map_ops *ops = get_dma_ops(hwdev);
+ 
+-	BUG_ON(!valid_dma_direction(direction));
++	BUG_ON(!valid_dma_direction(dir));
+ 	if (ops->sync_single_range_for_cpu)
+ 		ops->sync_single_range_for_cpu(hwdev, dma_handle, offset,
+-					       size, direction);
++					       size, dir);
++	debug_dma_sync_single_range_for_cpu(hwdev, dma_handle,
++					    offset, size, dir);
+ 	flush_write_buffers();
+ }
+ 
+ static inline void
+ dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle,
+ 				 unsigned long offset, size_t size,
+-				 int direction)
++				 enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
++	struct dma_map_ops *ops = get_dma_ops(hwdev);
+ 
+-	BUG_ON(!valid_dma_direction(direction));
++	BUG_ON(!valid_dma_direction(dir));
+ 	if (ops->sync_single_range_for_device)
+ 		ops->sync_single_range_for_device(hwdev, dma_handle,
+-						  offset, size, direction);
++						  offset, size, dir);
++	debug_dma_sync_single_range_for_device(hwdev, dma_handle,
++					       offset, size, dir);
+ 	flush_write_buffers();
+ }
+ 
+ static inline void
+ dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+-		    int nelems, int direction)
++		    int nelems, enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
++	struct dma_map_ops *ops = get_dma_ops(hwdev);
+ 
+-	BUG_ON(!valid_dma_direction(direction));
++	BUG_ON(!valid_dma_direction(dir));
+ 	if (ops->sync_sg_for_cpu)
+-		ops->sync_sg_for_cpu(hwdev, sg, nelems, direction);
++		ops->sync_sg_for_cpu(hwdev, sg, nelems, dir);
++	debug_dma_sync_sg_for_cpu(hwdev, sg, nelems, dir);
+ 	flush_write_buffers();
+ }
+ 
+ static inline void
+ dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+-		       int nelems, int direction)
++		       int nelems, enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(hwdev);
++	struct dma_map_ops *ops = get_dma_ops(hwdev);
+ 
+-	BUG_ON(!valid_dma_direction(direction));
++	BUG_ON(!valid_dma_direction(dir));
+ 	if (ops->sync_sg_for_device)
+-		ops->sync_sg_for_device(hwdev, sg, nelems, direction);
++		ops->sync_sg_for_device(hwdev, sg, nelems, dir);
++	debug_dma_sync_sg_for_device(hwdev, sg, nelems, dir);
+ 
+ 	flush_write_buffers();
+ }
+ 
+ static inline dma_addr_t dma_map_page(struct device *dev, struct page *page,
+ 				      size_t offset, size_t size,
+-				      int direction)
++				      enum dma_data_direction dir)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(dev);
++	struct dma_map_ops *ops = get_dma_ops(dev);
++	dma_addr_t addr;
+ 
+-	BUG_ON(!valid_dma_direction(direction));
+-	return ops->map_single(dev, page_to_phys(page) + offset,
+-			       size, direction);
++	kmemcheck_mark_initialized(page_address(page) + offset, size);
++	BUG_ON(!valid_dma_direction(dir));
++	addr = ops->map_page(dev, page, offset, size, dir, NULL);
++	debug_dma_map_page(dev, page, offset, size, dir, addr, false);
++
++	return addr;
+ }
+ 
+ static inline void dma_unmap_page(struct device *dev, dma_addr_t addr,
+-				  size_t size, int direction)
++				  size_t size, enum dma_data_direction dir)
+ {
+-	dma_unmap_single(dev, addr, size, direction);
++	struct dma_map_ops *ops = get_dma_ops(dev);
++
++	BUG_ON(!valid_dma_direction(dir));
++	if (ops->unmap_page)
++		ops->unmap_page(dev, addr, size, dir, NULL);
++	debug_dma_unmap_page(dev, addr, size, dir, false);
+ }
+ 
+ static inline void
+@@ -266,7 +268,7 @@ static inline void *
+ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ 		gfp_t gfp)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(dev);
++	struct dma_map_ops *ops = get_dma_ops(dev);
+ 	void *memory;
+ 
+ 	gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
+@@ -285,20 +287,24 @@ dma_alloc_coherent(struct device *dev, s
+ 	if (!ops->alloc_coherent)
+ 		return NULL;
+ 
+-	return ops->alloc_coherent(dev, size, dma_handle,
+-				   dma_alloc_coherent_gfp_flags(dev, gfp));
++	memory = ops->alloc_coherent(dev, size, dma_handle,
++				     dma_alloc_coherent_gfp_flags(dev, gfp));
++	debug_dma_alloc_coherent(dev, size, *dma_handle, memory);
++
++	return memory;
+ }
+ 
+ static inline void dma_free_coherent(struct device *dev, size_t size,
+ 				     void *vaddr, dma_addr_t bus)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(dev);
++	struct dma_map_ops *ops = get_dma_ops(dev);
+ 
+ 	WARN_ON(irqs_disabled());       /* for portability */
+ 
+ 	if (dma_release_from_coherent(dev, get_order(size), vaddr))
+ 		return;
+ 
++	debug_dma_free_coherent(dev, size, vaddr, bus);
+ 	if (ops->free_coherent)
+ 		ops->free_coherent(dev, size, vaddr, bus);
+ }
+Index: linux-2.6-tip/arch/x86/include/asm/dmi.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/dmi.h
++++ linux-2.6-tip/arch/x86/include/asm/dmi.h
+@@ -1,22 +1,15 @@
+ #ifndef _ASM_X86_DMI_H
+ #define _ASM_X86_DMI_H
+ 
+-#include <asm/io.h>
+-
+-#define DMI_MAX_DATA 2048
++#include <linux/compiler.h>
++#include <linux/init.h>
+ 
+-extern int dmi_alloc_index;
+-extern char dmi_alloc_data[DMI_MAX_DATA];
++#include <asm/io.h>
++#include <asm/setup.h>
+ 
+-/* This is so early that there is no good way to allocate dynamic memory.
+-   Allocate data in an BSS array. */
+-static inline void *dmi_alloc(unsigned len)
++static __always_inline __init void *dmi_alloc(unsigned len)
+ {
+-	int idx = dmi_alloc_index;
+-	if ((dmi_alloc_index + len) > DMI_MAX_DATA)
+-		return NULL;
+-	dmi_alloc_index += len;
+-	return dmi_alloc_data + idx;
++	return extend_brk(len, sizeof(int));
+ }
+ 
+ /* Use early IO mappings for DMI because it's initialized early */
+Index: linux-2.6-tip/arch/x86/include/asm/do_timer.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/do_timer.h
+@@ -0,0 +1,16 @@
++/* defines for inline arch setup functions */
++#include <linux/clockchips.h>
++
++#include <asm/i8259.h>
++#include <asm/i8253.h>
++
++/**
++ * do_timer_interrupt_hook - hook into timer tick
++ *
++ * Call the pit clock event handler. see asm/i8253.h
++ **/
++
++static inline void do_timer_interrupt_hook(void)
++{
++	global_clock_event->event_handler(global_clock_event);
++}
+Index: linux-2.6-tip/arch/x86/include/asm/e820.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/e820.h
++++ linux-2.6-tip/arch/x86/include/asm/e820.h
+@@ -72,7 +72,7 @@ extern int e820_all_mapped(u64 start, u6
+ extern void e820_add_region(u64 start, u64 size, int type);
+ extern void e820_print_map(char *who);
+ extern int
+-sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, int *pnr_map);
++sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map);
+ extern u64 e820_update_range(u64 start, u64 size, unsigned old_type,
+ 			       unsigned new_type);
+ extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type,
+Index: linux-2.6-tip/arch/x86/include/asm/elf.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/elf.h
++++ linux-2.6-tip/arch/x86/include/asm/elf.h
+@@ -112,7 +112,7 @@ extern unsigned int vdso_enabled;
+  * now struct_user_regs, they are different)
+  */
+ 
+-#define ELF_CORE_COPY_REGS(pr_reg, regs)	\
++#define ELF_CORE_COPY_REGS_COMMON(pr_reg, regs)	\
+ do {						\
+ 	pr_reg[0] = regs->bx;			\
+ 	pr_reg[1] = regs->cx;			\
+@@ -124,7 +124,6 @@ do {						\
+ 	pr_reg[7] = regs->ds & 0xffff;		\
+ 	pr_reg[8] = regs->es & 0xffff;		\
+ 	pr_reg[9] = regs->fs & 0xffff;		\
+-	savesegment(gs, pr_reg[10]);		\
+ 	pr_reg[11] = regs->orig_ax;		\
+ 	pr_reg[12] = regs->ip;			\
+ 	pr_reg[13] = regs->cs & 0xffff;		\
+@@ -133,6 +132,18 @@ do {						\
+ 	pr_reg[16] = regs->ss & 0xffff;		\
+ } while (0);
+ 
++#define ELF_CORE_COPY_REGS(pr_reg, regs)	\
++do {						\
++	ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
++	pr_reg[10] = get_user_gs(regs);		\
++} while (0);
++
++#define ELF_CORE_COPY_KERNEL_REGS(pr_reg, regs)	\
++do {						\
++	ELF_CORE_COPY_REGS_COMMON(pr_reg, regs);\
++	savesegment(gs, pr_reg[10]);		\
++} while (0);
++
+ #define ELF_PLATFORM	(utsname()->machine)
+ #define set_personality_64bit()	do { } while (0)
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/entry_arch.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/entry_arch.h
+@@ -0,0 +1,59 @@
++/*
++ * This file is designed to contain the BUILD_INTERRUPT specifications for
++ * all of the extra named interrupt vectors used by the architecture.
++ * Usually this is the Inter Process Interrupts (IPIs)
++ */
++
++/*
++ * The following vectors are part of the Linux architecture, there
++ * is no hardware IRQ pin equivalent for them, they are triggered
++ * through the ICC by us (IPIs)
++ */
++#ifdef CONFIG_SMP
++BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
++BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
++BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
++BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
++
++BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
++		 smp_invalidate_interrupt)
++BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
++		 smp_invalidate_interrupt)
++BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
++		 smp_invalidate_interrupt)
++BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
++		 smp_invalidate_interrupt)
++BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
++		 smp_invalidate_interrupt)
++BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
++		 smp_invalidate_interrupt)
++BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
++		 smp_invalidate_interrupt)
++BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
++		 smp_invalidate_interrupt)
++#endif
++
++BUILD_INTERRUPT(generic_interrupt, GENERIC_INTERRUPT_VECTOR)
++
++/*
++ * every pentium local APIC has two 'local interrupts', with a
++ * soft-definable vector attached to both interrupts, one of
++ * which is a timer interrupt, the other one is error counter
++ * overflow. Linux uses the local APIC timer interrupt to get
++ * a much simpler SMP time architecture:
++ */
++#ifdef CONFIG_X86_LOCAL_APIC
++
++BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
++BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
++BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
++
++#ifdef CONFIG_PERF_COUNTERS
++BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR)
++#endif
++
++#ifdef CONFIG_X86_MCE_P4THERMAL
++BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
++#endif
++
++#endif
+Index: linux-2.6-tip/arch/x86/include/asm/es7000/apic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/es7000/apic.h
++++ /dev/null
+@@ -1,242 +0,0 @@
+-#ifndef __ASM_ES7000_APIC_H
+-#define __ASM_ES7000_APIC_H
+-
+-#include <linux/gfp.h>
+-
+-#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
+-#define esr_disable (1)
+-
+-static inline int apic_id_registered(void)
+-{
+-	        return (1);
+-}
+-
+-static inline const cpumask_t *target_cpus_cluster(void)
+-{
+-	return &CPU_MASK_ALL;
+-}
+-
+-static inline const cpumask_t *target_cpus(void)
+-{
+-	return &cpumask_of_cpu(smp_processor_id());
+-}
+-
+-#define APIC_DFR_VALUE_CLUSTER		(APIC_DFR_CLUSTER)
+-#define INT_DELIVERY_MODE_CLUSTER	(dest_LowestPrio)
+-#define INT_DEST_MODE_CLUSTER		(1) /* logical delivery broadcast to all procs */
+-#define NO_BALANCE_IRQ_CLUSTER		(1)
+-
+-#define APIC_DFR_VALUE		(APIC_DFR_FLAT)
+-#define INT_DELIVERY_MODE	(dest_Fixed)
+-#define INT_DEST_MODE		(0)    /* phys delivery to target procs */
+-#define NO_BALANCE_IRQ		(0)
+-#undef  APIC_DEST_LOGICAL
+-#define APIC_DEST_LOGICAL	0x0
+-
+-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+-{
+-	return 0;
+-}
+-static inline unsigned long check_apicid_present(int bit)
+-{
+-	return physid_isset(bit, phys_cpu_present_map);
+-}
+-
+-#define apicid_cluster(apicid) (apicid & 0xF0)
+-
+-static inline unsigned long calculate_ldr(int cpu)
+-{
+-	unsigned long id;
+-	id = xapic_phys_to_log_apicid(cpu);
+-	return (SET_APIC_LOGICAL_ID(id));
+-}
+-
+-/*
+- * Set up the logical destination ID.
+- *
+- * Intel recommends to set DFR, LdR and TPR before enabling
+- * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+- * document number 292116).  So here it goes...
+- */
+-static inline void init_apic_ldr_cluster(void)
+-{
+-	unsigned long val;
+-	int cpu = smp_processor_id();
+-
+-	apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
+-	val = calculate_ldr(cpu);
+-	apic_write(APIC_LDR, val);
+-}
+-
+-static inline void init_apic_ldr(void)
+-{
+-	unsigned long val;
+-	int cpu = smp_processor_id();
+-
+-	apic_write(APIC_DFR, APIC_DFR_VALUE);
+-	val = calculate_ldr(cpu);
+-	apic_write(APIC_LDR, val);
+-}
+-
+-extern int apic_version [MAX_APICS];
+-static inline void setup_apic_routing(void)
+-{
+-	int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
+-	printk("Enabling APIC mode:  %s. Using %d I/O APICs, target cpus %lx\n",
+-		(apic_version[apic] == 0x14) ?
+-			"Physical Cluster" : "Logical Cluster",
+-			nr_ioapics, cpus_addr(*target_cpus())[0]);
+-}
+-
+-static inline int multi_timer_check(int apic, int irq)
+-{
+-	return 0;
+-}
+-
+-static inline int apicid_to_node(int logical_apicid)
+-{
+-	return 0;
+-}
+-
+-
+-static inline int cpu_present_to_apicid(int mps_cpu)
+-{
+-	if (!mps_cpu)
+-		return boot_cpu_physical_apicid;
+-	else if (mps_cpu < nr_cpu_ids)
+-		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
+-	else
+-		return BAD_APICID;
+-}
+-
+-static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
+-{
+-	static int id = 0;
+-	physid_mask_t mask;
+-	mask = physid_mask_of_physid(id);
+-	++id;
+-	return mask;
+-}
+-
+-extern u8 cpu_2_logical_apicid[];
+-/* Mapping from cpu number to logical apicid */
+-static inline int cpu_to_logical_apicid(int cpu)
+-{
+-#ifdef CONFIG_SMP
+-	if (cpu >= nr_cpu_ids)
+-		return BAD_APICID;
+-	return (int)cpu_2_logical_apicid[cpu];
+-#else
+-	return logical_smp_processor_id();
+-#endif
+-}
+-
+-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
+-{
+-	/* For clustered we don't have a good way to do this yet - hack */
+-	return physids_promote(0xff);
+-}
+-
+-
+-static inline void setup_portio_remap(void)
+-{
+-}
+-
+-extern unsigned int boot_cpu_physical_apicid;
+-static inline int check_phys_apicid_present(int cpu_physical_apicid)
+-{
+-	boot_cpu_physical_apicid = read_apic_id();
+-	return (1);
+-}
+-
+-static inline unsigned int
+-cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
+-{
+-	int num_bits_set;
+-	int cpus_found = 0;
+-	int cpu;
+-	int apicid;
+-
+-	num_bits_set = cpumask_weight(cpumask);
+-	/* Return id to all */
+-	if (num_bits_set == nr_cpu_ids)
+-		return 0xFF;
+-	/*
+-	 * The cpus in the mask must all be on the apic cluster.  If are not
+-	 * on the same apicid cluster return default value of TARGET_CPUS.
+-	 */
+-	cpu = cpumask_first(cpumask);
+-	apicid = cpu_to_logical_apicid(cpu);
+-	while (cpus_found < num_bits_set) {
+-		if (cpumask_test_cpu(cpu, cpumask)) {
+-			int new_apicid = cpu_to_logical_apicid(cpu);
+-			if (apicid_cluster(apicid) !=
+-					apicid_cluster(new_apicid)){
+-				printk ("%s: Not a valid mask!\n", __func__);
+-				return 0xFF;
+-			}
+-			apicid = new_apicid;
+-			cpus_found++;
+-		}
+-		cpu++;
+-	}
+-	return apicid;
+-}
+-
+-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+-{
+-	int num_bits_set;
+-	int cpus_found = 0;
+-	int cpu;
+-	int apicid;
+-
+-	num_bits_set = cpus_weight(*cpumask);
+-	/* Return id to all */
+-	if (num_bits_set == nr_cpu_ids)
+-		return cpu_to_logical_apicid(0);
+-	/*
+-	 * The cpus in the mask must all be on the apic cluster.  If are not
+-	 * on the same apicid cluster return default value of TARGET_CPUS.
+-	 */
+-	cpu = first_cpu(*cpumask);
+-	apicid = cpu_to_logical_apicid(cpu);
+-	while (cpus_found < num_bits_set) {
+-		if (cpu_isset(cpu, *cpumask)) {
+-			int new_apicid = cpu_to_logical_apicid(cpu);
+-			if (apicid_cluster(apicid) !=
+-					apicid_cluster(new_apicid)){
+-				printk ("%s: Not a valid mask!\n", __func__);
+-				return cpu_to_logical_apicid(0);
+-			}
+-			apicid = new_apicid;
+-			cpus_found++;
+-		}
+-		cpu++;
+-	}
+-	return apicid;
+-}
+-
+-
+-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
+-						  const struct cpumask *andmask)
+-{
+-	int apicid = cpu_to_logical_apicid(0);
+-	cpumask_var_t cpumask;
+-
+-	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+-		return apicid;
+-
+-	cpumask_and(cpumask, inmask, andmask);
+-	cpumask_and(cpumask, cpumask, cpu_online_mask);
+-	apicid = cpu_mask_to_apicid(cpumask);
+-
+-	free_cpumask_var(cpumask);
+-	return apicid;
+-}
+-
+-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+-{
+-	return cpuid_apic >> index_msb;
+-}
+-
+-#endif /* __ASM_ES7000_APIC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/es7000/apicdef.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/es7000/apicdef.h
++++ /dev/null
+@@ -1,13 +0,0 @@
+-#ifndef __ASM_ES7000_APICDEF_H
+-#define __ASM_ES7000_APICDEF_H
+-
+-#define		APIC_ID_MASK		(0xFF<<24)
+-
+-static inline unsigned get_apic_id(unsigned long x)
+-{
+-	return (((x)>>24)&0xFF);
+-}
+-
+-#define		GET_APIC_ID(x)	get_apic_id(x)
+-
+-#endif
+Index: linux-2.6-tip/arch/x86/include/asm/es7000/ipi.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/es7000/ipi.h
++++ /dev/null
+@@ -1,22 +0,0 @@
+-#ifndef __ASM_ES7000_IPI_H
+-#define __ASM_ES7000_IPI_H
+-
+-void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
+-
+-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
+-{
+-	send_IPI_mask_sequence(mask, vector);
+-}
+-
+-static inline void send_IPI_allbutself(int vector)
+-{
+-	send_IPI_mask_allbutself(cpu_online_mask, vector);
+-}
+-
+-static inline void send_IPI_all(int vector)
+-{
+-	send_IPI_mask(cpu_online_mask, vector);
+-}
+-
+-#endif /* __ASM_ES7000_IPI_H */
+Index: linux-2.6-tip/arch/x86/include/asm/es7000/mpparse.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/es7000/mpparse.h
++++ /dev/null
+@@ -1,29 +0,0 @@
+-#ifndef __ASM_ES7000_MPPARSE_H
+-#define __ASM_ES7000_MPPARSE_H
+-
+-#include <linux/acpi.h>
+-
+-extern int parse_unisys_oem (char *oemptr);
+-extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
+-extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
+-extern void setup_unisys(void);
+-
+-#ifndef CONFIG_X86_GENERICARCH
+-extern int acpi_madt_oem_check(char *oem_id, char *oem_table_id);
+-extern int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid);
+-#endif
+-
+-#ifdef CONFIG_ACPI
+-
+-static inline int es7000_check_dsdt(void)
+-{
+-	struct acpi_table_header header;
+-
+-	if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
+-	    !strncmp(header.oem_id, "UNISYS", 6))
+-		return 1;
+-	return 0;
+-}
+-#endif
+-
+-#endif /* __ASM_MACH_MPPARSE_H */
+Index: linux-2.6-tip/arch/x86/include/asm/es7000/wakecpu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/es7000/wakecpu.h
++++ /dev/null
+@@ -1,37 +0,0 @@
+-#ifndef __ASM_ES7000_WAKECPU_H
+-#define __ASM_ES7000_WAKECPU_H
+-
+-#define TRAMPOLINE_PHYS_LOW	0x467
+-#define TRAMPOLINE_PHYS_HIGH	0x469
+-
+-static inline void wait_for_init_deassert(atomic_t *deassert)
+-{
+-#ifndef CONFIG_ES7000_CLUSTERED_APIC
+-	while (!atomic_read(deassert))
+-		cpu_relax();
+-#endif
+-	return;
+-}
+-
+-/* Nothing to do for most platforms, since cleared by the INIT cycle */
+-static inline void smp_callin_clear_local_apic(void)
+-{
+-}
+-
+-static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
+-{
+-}
+-
+-static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
+-{
+-}
+-
+-extern void __inquire_remote_apic(int apicid);
+-
+-static inline void inquire_remote_apic(int apicid)
+-{
+-	if (apic_verbosity >= APIC_DEBUG)
+-		__inquire_remote_apic(apicid);
+-}
+-
+-#endif /* __ASM_MACH_WAKECPU_H */
+Index: linux-2.6-tip/arch/x86/include/asm/fixmap.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/fixmap.h
++++ linux-2.6-tip/arch/x86/include/asm/fixmap.h
+@@ -1,11 +1,147 @@
++/*
++ * fixmap.h: compile-time virtual memory allocation
++ *
++ * This file is subject to the terms and conditions of the GNU General Public
++ * License.  See the file "COPYING" in the main directory of this archive
++ * for more details.
++ *
++ * Copyright (C) 1998 Ingo Molnar
++ *
++ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
++ * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009
++ */
++
+ #ifndef _ASM_X86_FIXMAP_H
+ #define _ASM_X86_FIXMAP_H
+ 
++#ifndef __ASSEMBLY__
++#include <linux/kernel.h>
++#include <asm/acpi.h>
++#include <asm/apicdef.h>
++#include <asm/page.h>
++#ifdef CONFIG_X86_32
++#include <linux/threads.h>
++#include <asm/kmap_types.h>
++#else
++#include <asm/vsyscall.h>
++#endif
++
++/*
++ * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
++ * uses fixmaps that relies on FIXADDR_TOP for proper address calculation.
++ * Because of this, FIXADDR_TOP x86 integration was left as later work.
++ */
++#ifdef CONFIG_X86_32
++/* used by vmalloc.c, vsyscall.lds.S.
++ *
++ * Leave one empty page between vmalloc'ed areas and
++ * the start of the fixmap.
++ */
++extern unsigned long __FIXADDR_TOP;
++#define FIXADDR_TOP	((unsigned long)__FIXADDR_TOP)
++
++#define FIXADDR_USER_START     __fix_to_virt(FIX_VDSO)
++#define FIXADDR_USER_END       __fix_to_virt(FIX_VDSO - 1)
++#else
++#define FIXADDR_TOP	(VSYSCALL_END-PAGE_SIZE)
++
++/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
++#define FIXADDR_USER_START	((unsigned long)VSYSCALL32_VSYSCALL)
++#define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
++#endif
++
++
++/*
++ * Here we define all the compile-time 'special' virtual
++ * addresses. The point is to have a constant address at
++ * compile time, but to set the physical address only
++ * in the boot process.
++ * for x86_32: We allocate these special addresses
++ * from the end of virtual memory (0xfffff000) backwards.
++ * Also this lets us do fail-safe vmalloc(), we
++ * can guarantee that these special addresses and
++ * vmalloc()-ed addresses never overlap.
++ *
++ * These 'compile-time allocated' memory buffers are
++ * fixed-size 4k pages (or larger if used with an increment
++ * higher than 1). Use set_fixmap(idx,phys) to associate
++ * physical memory with fixmap indices.
++ *
++ * TLB entries of such buffers will not be flushed across
++ * task switches.
++ */
++enum fixed_addresses {
+ #ifdef CONFIG_X86_32
+-# include "fixmap_32.h"
++	FIX_HOLE,
++	FIX_VDSO,
+ #else
+-# include "fixmap_64.h"
++	VSYSCALL_LAST_PAGE,
++	VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
++			    + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
++	VSYSCALL_HPET,
+ #endif
++	FIX_DBGP_BASE,
++	FIX_EARLYCON_MEM_BASE,
++#ifdef CONFIG_X86_LOCAL_APIC
++	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
++#endif
++#ifdef CONFIG_X86_IO_APIC
++	FIX_IO_APIC_BASE_0,
++	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
++#endif
++#ifdef CONFIG_X86_VISWS_APIC
++	FIX_CO_CPU,	/* Cobalt timer */
++	FIX_CO_APIC,	/* Cobalt APIC Redirection Table */
++	FIX_LI_PCIA,	/* Lithium PCI Bridge A */
++	FIX_LI_PCIB,	/* Lithium PCI Bridge B */
++#endif
++#ifdef CONFIG_X86_F00F_BUG
++	FIX_F00F_IDT,	/* Virtual mapping for IDT */
++#endif
++#ifdef CONFIG_X86_CYCLONE_TIMER
++	FIX_CYCLONE_TIMER, /*cyclone timer register*/
++#endif
++#ifdef CONFIG_X86_32
++	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
++	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
++#ifdef CONFIG_PCI_MMCONFIG
++	FIX_PCIE_MCFG,
++#endif
++#endif
++#ifdef CONFIG_PARAVIRT
++	FIX_PARAVIRT_BOOTMAP,
++#endif
++	FIX_TEXT_POKE0,	/* reserve 2 pages for text_poke() */
++	FIX_TEXT_POKE1,
++	__end_of_permanent_fixed_addresses,
++#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
++	FIX_OHCI1394_BASE,
++#endif
++	/*
++	 * 256 temporary boot-time mappings, used by early_ioremap(),
++	 * before ioremap() is functional.
++	 *
++	 * We round it up to the next 256 pages boundary so that we
++	 * can have a single pgd entry and a single pte table:
++	 */
++#define NR_FIX_BTMAPS		64
++#define FIX_BTMAPS_SLOTS	4
++	FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
++			(__end_of_permanent_fixed_addresses & 255),
++	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
++#ifdef CONFIG_X86_32
++	FIX_WP_TEST,
++#endif
++	__end_of_fixed_addresses
++};
++
++
++extern void reserve_top_address(unsigned long reserve);
++
++#define FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
++#define FIXADDR_BOOT_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
++#define FIXADDR_START		(FIXADDR_TOP - FIXADDR_SIZE)
++#define FIXADDR_BOOT_START	(FIXADDR_TOP - FIXADDR_BOOT_SIZE)
+ 
+ extern int fixmaps_set;
+ 
+@@ -15,11 +151,11 @@ extern pte_t *pkmap_page_table;
+ 
+ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
+ void native_set_fixmap(enum fixed_addresses idx,
+-		       unsigned long phys, pgprot_t flags);
++		       phys_addr_t phys, pgprot_t flags);
+ 
+ #ifndef CONFIG_PARAVIRT
+ static inline void __set_fixmap(enum fixed_addresses idx,
+-				unsigned long phys, pgprot_t flags)
++				phys_addr_t phys, pgprot_t flags)
+ {
+ 	native_set_fixmap(idx, phys, flags);
+ }
+@@ -69,4 +205,5 @@ static inline unsigned long virt_to_fix(
+ 	BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
+ 	return __virt_to_fix(vaddr);
+ }
++#endif /* !__ASSEMBLY__ */
+ #endif /* _ASM_X86_FIXMAP_H */
+Index: linux-2.6-tip/arch/x86/include/asm/fixmap_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/fixmap_32.h
++++ /dev/null
+@@ -1,119 +0,0 @@
+-/*
+- * fixmap.h: compile-time virtual memory allocation
+- *
+- * This file is subject to the terms and conditions of the GNU General Public
+- * License.  See the file "COPYING" in the main directory of this archive
+- * for more details.
+- *
+- * Copyright (C) 1998 Ingo Molnar
+- *
+- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
+- */
+-
+-#ifndef _ASM_X86_FIXMAP_32_H
+-#define _ASM_X86_FIXMAP_32_H
+-
+-
+-/* used by vmalloc.c, vsyscall.lds.S.
+- *
+- * Leave one empty page between vmalloc'ed areas and
+- * the start of the fixmap.
+- */
+-extern unsigned long __FIXADDR_TOP;
+-#define FIXADDR_USER_START     __fix_to_virt(FIX_VDSO)
+-#define FIXADDR_USER_END       __fix_to_virt(FIX_VDSO - 1)
+-
+-#ifndef __ASSEMBLY__
+-#include <linux/kernel.h>
+-#include <asm/acpi.h>
+-#include <asm/apicdef.h>
+-#include <asm/page.h>
+-#include <linux/threads.h>
+-#include <asm/kmap_types.h>
+-
+-/*
+- * Here we define all the compile-time 'special' virtual
+- * addresses. The point is to have a constant address at
+- * compile time, but to set the physical address only
+- * in the boot process. We allocate these special addresses
+- * from the end of virtual memory (0xfffff000) backwards.
+- * Also this lets us do fail-safe vmalloc(), we
+- * can guarantee that these special addresses and
+- * vmalloc()-ed addresses never overlap.
+- *
+- * these 'compile-time allocated' memory buffers are
+- * fixed-size 4k pages. (or larger if used with an increment
+- * highger than 1) use fixmap_set(idx,phys) to associate
+- * physical memory with fixmap indices.
+- *
+- * TLB entries of such buffers will not be flushed across
+- * task switches.
+- */
+-enum fixed_addresses {
+-	FIX_HOLE,
+-	FIX_VDSO,
+-	FIX_DBGP_BASE,
+-	FIX_EARLYCON_MEM_BASE,
+-#ifdef CONFIG_X86_LOCAL_APIC
+-	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
+-#endif
+-#ifdef CONFIG_X86_IO_APIC
+-	FIX_IO_APIC_BASE_0,
+-	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
+-#endif
+-#ifdef CONFIG_X86_VISWS_APIC
+-	FIX_CO_CPU,	/* Cobalt timer */
+-	FIX_CO_APIC,	/* Cobalt APIC Redirection Table */
+-	FIX_LI_PCIA,	/* Lithium PCI Bridge A */
+-	FIX_LI_PCIB,	/* Lithium PCI Bridge B */
+-#endif
+-#ifdef CONFIG_X86_F00F_BUG
+-	FIX_F00F_IDT,	/* Virtual mapping for IDT */
+-#endif
+-#ifdef CONFIG_X86_CYCLONE_TIMER
+-	FIX_CYCLONE_TIMER, /*cyclone timer register*/
+-#endif
+-	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+-	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+-#ifdef CONFIG_PCI_MMCONFIG
+-	FIX_PCIE_MCFG,
+-#endif
+-#ifdef CONFIG_PARAVIRT
+-	FIX_PARAVIRT_BOOTMAP,
+-#endif
+-	__end_of_permanent_fixed_addresses,
+-	/*
+-	 * 256 temporary boot-time mappings, used by early_ioremap(),
+-	 * before ioremap() is functional.
+-	 *
+-	 * We round it up to the next 256 pages boundary so that we
+-	 * can have a single pgd entry and a single pte table:
+-	 */
+-#define NR_FIX_BTMAPS		64
+-#define FIX_BTMAPS_SLOTS	4
+-	FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
+-			(__end_of_permanent_fixed_addresses & 255),
+-	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
+-	FIX_WP_TEST,
+-#ifdef CONFIG_ACPI
+-	FIX_ACPI_BEGIN,
+-	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+-#endif
+-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+-	FIX_OHCI1394_BASE,
+-#endif
+-	__end_of_fixed_addresses
+-};
+-
+-extern void reserve_top_address(unsigned long reserve);
+-
+-
+-#define FIXADDR_TOP	((unsigned long)__FIXADDR_TOP)
+-
+-#define __FIXADDR_SIZE	(__end_of_permanent_fixed_addresses << PAGE_SHIFT)
+-#define __FIXADDR_BOOT_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
+-#define FIXADDR_START		(FIXADDR_TOP - __FIXADDR_SIZE)
+-#define FIXADDR_BOOT_START	(FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
+-
+-#endif /* !__ASSEMBLY__ */
+-#endif /* _ASM_X86_FIXMAP_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/fixmap_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/fixmap_64.h
++++ /dev/null
+@@ -1,79 +0,0 @@
+-/*
+- * fixmap.h: compile-time virtual memory allocation
+- *
+- * This file is subject to the terms and conditions of the GNU General Public
+- * License.  See the file "COPYING" in the main directory of this archive
+- * for more details.
+- *
+- * Copyright (C) 1998 Ingo Molnar
+- */
+-
+-#ifndef _ASM_X86_FIXMAP_64_H
+-#define _ASM_X86_FIXMAP_64_H
+-
+-#include <linux/kernel.h>
+-#include <asm/acpi.h>
+-#include <asm/apicdef.h>
+-#include <asm/page.h>
+-#include <asm/vsyscall.h>
+-
+-/*
+- * Here we define all the compile-time 'special' virtual
+- * addresses. The point is to have a constant address at
+- * compile time, but to set the physical address only
+- * in the boot process.
+- *
+- * These 'compile-time allocated' memory buffers are
+- * fixed-size 4k pages (or larger if used with an increment
+- * higher than 1). Use set_fixmap(idx,phys) to associate
+- * physical memory with fixmap indices.
+- *
+- * TLB entries of such buffers will not be flushed across
+- * task switches.
+- */
+-
+-enum fixed_addresses {
+-	VSYSCALL_LAST_PAGE,
+-	VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
+-			    + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
+-	VSYSCALL_HPET,
+-	FIX_DBGP_BASE,
+-	FIX_EARLYCON_MEM_BASE,
+-	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
+-	FIX_IO_APIC_BASE_0,
+-	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
+-#ifdef CONFIG_PARAVIRT
+-	FIX_PARAVIRT_BOOTMAP,
+-#endif
+-	__end_of_permanent_fixed_addresses,
+-#ifdef CONFIG_ACPI
+-	FIX_ACPI_BEGIN,
+-	FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
+-#endif
+-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+-	FIX_OHCI1394_BASE,
+-#endif
+-	/*
+-	 * 256 temporary boot-time mappings, used by early_ioremap(),
+-	 * before ioremap() is functional.
+-	 *
+-	 * We round it up to the next 256 pages boundary so that we
+-	 * can have a single pgd entry and a single pte table:
+-	 */
+-#define NR_FIX_BTMAPS		64
+-#define FIX_BTMAPS_SLOTS	4
+-	FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
+-			(__end_of_permanent_fixed_addresses & 255),
+-	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
+-	__end_of_fixed_addresses
+-};
+-
+-#define FIXADDR_TOP	(VSYSCALL_END-PAGE_SIZE)
+-#define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
+-#define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
+-
+-/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
+-#define FIXADDR_USER_START	((unsigned long)VSYSCALL32_VSYSCALL)
+-#define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
+-
+-#endif /* _ASM_X86_FIXMAP_64_H */
+Index: linux-2.6-tip/arch/x86/include/asm/ftrace.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/ftrace.h
++++ linux-2.6-tip/arch/x86/include/asm/ftrace.h
+@@ -28,6 +28,13 @@
+ 
+ #endif
+ 
++/* FIXME: I don't want to stay hardcoded */
++#ifdef CONFIG_X86_64
++# define FTRACE_SYSCALL_MAX     296
++#else
++# define FTRACE_SYSCALL_MAX     333
++#endif
++
+ #ifdef CONFIG_FUNCTION_TRACER
+ #define MCOUNT_ADDR		((long)(mcount))
+ #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
+@@ -55,29 +62,4 @@ struct dyn_arch_ftrace {
+ #endif /* __ASSEMBLY__ */
+ #endif /* CONFIG_FUNCTION_TRACER */
+ 
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-
+-#ifndef __ASSEMBLY__
+-
+-/*
+- * Stack of return addresses for functions
+- * of a thread.
+- * Used in struct thread_info
+- */
+-struct ftrace_ret_stack {
+-	unsigned long ret;
+-	unsigned long func;
+-	unsigned long long calltime;
+-};
+-
+-/*
+- * Primary handler of a function return.
+- * It relays on ftrace_return_to_handler.
+- * Defined in entry_32/64.S
+- */
+-extern void return_to_handler(void);
+-
+-#endif /* __ASSEMBLY__ */
+-#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+-
+ #endif /* _ASM_X86_FTRACE_H */
+Index: linux-2.6-tip/arch/x86/include/asm/genapic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/genapic.h
++++ linux-2.6-tip/arch/x86/include/asm/genapic.h
+@@ -1,5 +1 @@
+-#ifdef CONFIG_X86_32
+-# include "genapic_32.h"
+-#else
+-# include "genapic_64.h"
+-#endif
++#include <asm/apic.h>
+Index: linux-2.6-tip/arch/x86/include/asm/genapic_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/genapic_32.h
++++ /dev/null
+@@ -1,148 +0,0 @@
+-#ifndef _ASM_X86_GENAPIC_32_H
+-#define _ASM_X86_GENAPIC_32_H
+-
+-#include <asm/mpspec.h>
+-#include <asm/atomic.h>
+-
+-/*
+- * Generic APIC driver interface.
+- *
+- * An straight forward mapping of the APIC related parts of the
+- * x86 subarchitecture interface to a dynamic object.
+- *
+- * This is used by the "generic" x86 subarchitecture.
+- *
+- * Copyright 2003 Andi Kleen, SuSE Labs.
+- */
+-
+-struct mpc_bus;
+-struct mpc_table;
+-struct mpc_cpu;
+-
+-struct genapic {
+-	char *name;
+-	int (*probe)(void);
+-
+-	int (*apic_id_registered)(void);
+-	const struct cpumask *(*target_cpus)(void);
+-	int int_delivery_mode;
+-	int int_dest_mode;
+-	int ESR_DISABLE;
+-	int apic_destination_logical;
+-	unsigned long (*check_apicid_used)(physid_mask_t bitmap, int apicid);
+-	unsigned long (*check_apicid_present)(int apicid);
+-	int no_balance_irq;
+-	int no_ioapic_check;
+-	void (*init_apic_ldr)(void);
+-	physid_mask_t (*ioapic_phys_id_map)(physid_mask_t map);
+-
+-	void (*setup_apic_routing)(void);
+-	int (*multi_timer_check)(int apic, int irq);
+-	int (*apicid_to_node)(int logical_apicid);
+-	int (*cpu_to_logical_apicid)(int cpu);
+-	int (*cpu_present_to_apicid)(int mps_cpu);
+-	physid_mask_t (*apicid_to_cpu_present)(int phys_apicid);
+-	void (*setup_portio_remap)(void);
+-	int (*check_phys_apicid_present)(int boot_cpu_physical_apicid);
+-	void (*enable_apic_mode)(void);
+-	u32 (*phys_pkg_id)(u32 cpuid_apic, int index_msb);
+-
+-	/* mpparse */
+-	/* When one of the next two hooks returns 1 the genapic
+-	   is switched to this. Essentially they are additional probe
+-	   functions. */
+-	int (*mps_oem_check)(struct mpc_table *mpc, char *oem,
+-			     char *productid);
+-	int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
+-
+-	unsigned (*get_apic_id)(unsigned long x);
+-	unsigned long apic_id_mask;
+-	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
+-	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+-					       const struct cpumask *andmask);
+-	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
+-
+-#ifdef CONFIG_SMP
+-	/* ipi */
+-	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+-	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+-					 int vector);
+-	void (*send_IPI_allbutself)(int vector);
+-	void (*send_IPI_all)(int vector);
+-#endif
+-	int (*wakeup_cpu)(int apicid, unsigned long start_eip);
+-	int trampoline_phys_low;
+-	int trampoline_phys_high;
+-	void (*wait_for_init_deassert)(atomic_t *deassert);
+-	void (*smp_callin_clear_local_apic)(void);
+-	void (*store_NMI_vector)(unsigned short *high, unsigned short *low);
+-	void (*restore_NMI_vector)(unsigned short *high, unsigned short *low);
+-	void (*inquire_remote_apic)(int apicid);
+-};
+-
+-#define APICFUNC(x) .x = x,
+-
+-/* More functions could be probably marked IPIFUNC and save some space
+-   in UP GENERICARCH kernels, but I don't have the nerve right now
+-   to untangle this mess. -AK  */
+-#ifdef CONFIG_SMP
+-#define IPIFUNC(x) APICFUNC(x)
+-#else
+-#define IPIFUNC(x)
+-#endif
+-
+-#define APIC_INIT(aname, aprobe)			\
+-{							\
+-	.name = aname,					\
+-	.probe = aprobe,				\
+-	.int_delivery_mode = INT_DELIVERY_MODE,		\
+-	.int_dest_mode = INT_DEST_MODE,			\
+-	.no_balance_irq = NO_BALANCE_IRQ,		\
+-	.ESR_DISABLE = esr_disable,			\
+-	.apic_destination_logical = APIC_DEST_LOGICAL,	\
+-	APICFUNC(apic_id_registered)			\
+-	APICFUNC(target_cpus)				\
+-	APICFUNC(check_apicid_used)			\
+-	APICFUNC(check_apicid_present)			\
+-	APICFUNC(init_apic_ldr)				\
+-	APICFUNC(ioapic_phys_id_map)			\
+-	APICFUNC(setup_apic_routing)			\
+-	APICFUNC(multi_timer_check)			\
+-	APICFUNC(apicid_to_node)			\
+-	APICFUNC(cpu_to_logical_apicid)			\
+-	APICFUNC(cpu_present_to_apicid)			\
+-	APICFUNC(apicid_to_cpu_present)			\
+-	APICFUNC(setup_portio_remap)			\
+-	APICFUNC(check_phys_apicid_present)		\
+-	APICFUNC(mps_oem_check)				\
+-	APICFUNC(get_apic_id)				\
+-	.apic_id_mask = APIC_ID_MASK,			\
+-	APICFUNC(cpu_mask_to_apicid)			\
+-	APICFUNC(cpu_mask_to_apicid_and)		\
+-	APICFUNC(vector_allocation_domain)		\
+-	APICFUNC(acpi_madt_oem_check)			\
+-	IPIFUNC(send_IPI_mask)				\
+-	IPIFUNC(send_IPI_allbutself)			\
+-	IPIFUNC(send_IPI_all)				\
+-	APICFUNC(enable_apic_mode)			\
+-	APICFUNC(phys_pkg_id)				\
+-	.trampoline_phys_low = TRAMPOLINE_PHYS_LOW,		\
+-	.trampoline_phys_high = TRAMPOLINE_PHYS_HIGH,		\
+-	APICFUNC(wait_for_init_deassert)		\
+-	APICFUNC(smp_callin_clear_local_apic)		\
+-	APICFUNC(store_NMI_vector)			\
+-	APICFUNC(restore_NMI_vector)			\
+-	APICFUNC(inquire_remote_apic)			\
+-}
+-
+-extern struct genapic *genapic;
+-extern void es7000_update_genapic_to_cluster(void);
+-
+-enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
+-#define get_uv_system_type()		UV_NONE
+-#define is_uv_system()			0
+-#define uv_wakeup_secondary(a, b)	1
+-#define uv_system_init()		do {} while (0)
+-
+-
+-#endif /* _ASM_X86_GENAPIC_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/genapic_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/genapic_64.h
++++ /dev/null
+@@ -1,66 +0,0 @@
+-#ifndef _ASM_X86_GENAPIC_64_H
+-#define _ASM_X86_GENAPIC_64_H
+-
+-#include <linux/cpumask.h>
+-
+-/*
+- * Copyright 2004 James Cleverdon, IBM.
+- * Subject to the GNU Public License, v.2
+- *
+- * Generic APIC sub-arch data struct.
+- *
+- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+- * James Cleverdon.
+- */
+-
+-struct genapic {
+-	char *name;
+-	int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
+-	u32 int_delivery_mode;
+-	u32 int_dest_mode;
+-	int (*apic_id_registered)(void);
+-	const struct cpumask *(*target_cpus)(void);
+-	void (*vector_allocation_domain)(int cpu, struct cpumask *retmask);
+-	void (*init_apic_ldr)(void);
+-	/* ipi */
+-	void (*send_IPI_mask)(const struct cpumask *mask, int vector);
+-	void (*send_IPI_mask_allbutself)(const struct cpumask *mask,
+-					 int vector);
+-	void (*send_IPI_allbutself)(int vector);
+-	void (*send_IPI_all)(int vector);
+-	void (*send_IPI_self)(int vector);
+-	/* */
+-	unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask);
+-	unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask,
+-					       const struct cpumask *andmask);
+-	unsigned int (*phys_pkg_id)(int index_msb);
+-	unsigned int (*get_apic_id)(unsigned long x);
+-	unsigned long (*set_apic_id)(unsigned int id);
+-	unsigned long apic_id_mask;
+-	/* wakeup_secondary_cpu */
+-	int (*wakeup_cpu)(int apicid, unsigned long start_eip);
+-};
+-
+-extern struct genapic *genapic;
+-
+-extern struct genapic apic_flat;
+-extern struct genapic apic_physflat;
+-extern struct genapic apic_x2apic_cluster;
+-extern struct genapic apic_x2apic_phys;
+-extern int acpi_madt_oem_check(char *, char *);
+-
+-extern void apic_send_IPI_self(int vector);
+-enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
+-extern enum uv_system_type get_uv_system_type(void);
+-extern int is_uv_system(void);
+-
+-extern struct genapic apic_x2apic_uv_x;
+-DECLARE_PER_CPU(int, x2apic_extra_bits);
+-extern void uv_cpu_init(void);
+-extern void uv_system_init(void);
+-extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
+-
+-extern void setup_apic_routing(void);
+-
+-#endif /* _ASM_X86_GENAPIC_64_H */
+Index: linux-2.6-tip/arch/x86/include/asm/hardirq.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/hardirq.h
++++ linux-2.6-tip/arch/x86/include/asm/hardirq.h
+@@ -1,11 +1,54 @@
+-#ifdef CONFIG_X86_32
+-# include "hardirq_32.h"
+-#else
+-# include "hardirq_64.h"
++#ifndef _ASM_X86_HARDIRQ_H
++#define _ASM_X86_HARDIRQ_H
++
++#include <linux/threads.h>
++#include <linux/irq.h>
++
++typedef struct {
++	unsigned int __softirq_pending;
++	unsigned int __nmi_count;	/* arch dependent */
++	unsigned int irq0_irqs;
++#ifdef CONFIG_X86_LOCAL_APIC
++	unsigned int apic_timer_irqs;	/* arch dependent */
++	unsigned int irq_spurious_count;
++#endif
++	unsigned int generic_irqs;	/* arch dependent */
++	unsigned int apic_perf_irqs;
++#ifdef CONFIG_SMP
++	unsigned int irq_resched_count;
++	unsigned int irq_call_count;
++	unsigned int irq_tlb_count;
++#endif
++#ifdef CONFIG_X86_MCE
++	unsigned int irq_thermal_count;
++# ifdef CONFIG_X86_64
++	unsigned int irq_threshold_count;
++# endif
+ #endif
++} ____cacheline_aligned irq_cpustat_t;
++
++DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
++
++/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
++#define MAX_HARDIRQS_PER_CPU NR_VECTORS
++
++#define __ARCH_IRQ_STAT
++
++#define inc_irq_stat(member)	percpu_add(irq_stat.member, 1)
++
++#define local_softirq_pending()	percpu_read(irq_stat.__softirq_pending)
++
++#define __ARCH_SET_SOFTIRQ_PENDING
++
++#define set_softirq_pending(x)	percpu_write(irq_stat.__softirq_pending, (x))
++#define or_softirq_pending(x)	percpu_or(irq_stat.__softirq_pending, (x))
++
++extern void ack_bad_irq(unsigned int irq);
+ 
+ extern u64 arch_irq_stat_cpu(unsigned int cpu);
+ #define arch_irq_stat_cpu	arch_irq_stat_cpu
+ 
+ extern u64 arch_irq_stat(void);
+ #define arch_irq_stat		arch_irq_stat
++
++#endif /* _ASM_X86_HARDIRQ_H */
+Index: linux-2.6-tip/arch/x86/include/asm/hardirq_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/hardirq_32.h
++++ /dev/null
+@@ -1,30 +0,0 @@
+-#ifndef _ASM_X86_HARDIRQ_32_H
+-#define _ASM_X86_HARDIRQ_32_H
+-
+-#include <linux/threads.h>
+-#include <linux/irq.h>
+-
+-typedef struct {
+-	unsigned int __softirq_pending;
+-	unsigned long idle_timestamp;
+-	unsigned int __nmi_count;	/* arch dependent */
+-	unsigned int apic_timer_irqs;	/* arch dependent */
+-	unsigned int irq0_irqs;
+-	unsigned int irq_resched_count;
+-	unsigned int irq_call_count;
+-	unsigned int irq_tlb_count;
+-	unsigned int irq_thermal_count;
+-	unsigned int irq_spurious_count;
+-} ____cacheline_aligned irq_cpustat_t;
+-
+-DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
+-
+-#define __ARCH_IRQ_STAT
+-#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
+-
+-#define inc_irq_stat(member)	(__get_cpu_var(irq_stat).member++)
+-
+-void ack_bad_irq(unsigned int irq);
+-#include <linux/irq_cpustat.h>
+-
+-#endif /* _ASM_X86_HARDIRQ_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/hardirq_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/hardirq_64.h
++++ /dev/null
+@@ -1,25 +0,0 @@
+-#ifndef _ASM_X86_HARDIRQ_64_H
+-#define _ASM_X86_HARDIRQ_64_H
+-
+-#include <linux/threads.h>
+-#include <linux/irq.h>
+-#include <asm/pda.h>
+-#include <asm/apic.h>
+-
+-/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
+-#define MAX_HARDIRQS_PER_CPU NR_VECTORS
+-
+-#define __ARCH_IRQ_STAT 1
+-
+-#define inc_irq_stat(member)	add_pda(member, 1)
+-
+-#define local_softirq_pending() read_pda(__softirq_pending)
+-
+-#define __ARCH_SET_SOFTIRQ_PENDING 1
+-
+-#define set_softirq_pending(x) write_pda(__softirq_pending, (x))
+-#define or_softirq_pending(x)  or_pda(__softirq_pending, (x))
+-
+-extern void ack_bad_irq(unsigned int irq);
+-
+-#endif /* _ASM_X86_HARDIRQ_64_H */
+Index: linux-2.6-tip/arch/x86/include/asm/highmem.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/highmem.h
++++ linux-2.6-tip/arch/x86/include/asm/highmem.h
+@@ -58,15 +58,28 @@ extern void *kmap_high(struct page *page
+ extern void kunmap_high(struct page *page);
+ 
+ void *kmap(struct page *page);
++extern void kunmap_virt(void *ptr);
++extern struct page *kmap_to_page(void *ptr);
++void kunmap(struct page *page);
++
++void *__kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
++void *__kmap_atomic(struct page *page, enum km_type type);
++void *__kmap_atomic_direct(struct page *page, enum km_type type);
++void __kunmap_atomic(void *kvaddr, enum km_type type);
++void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type);
++struct page *__kmap_atomic_to_page(void *ptr);
++
+ void kunmap(struct page *page);
+ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
+ void *kmap_atomic(struct page *page, enum km_type type);
+ void kunmap_atomic(void *kvaddr, enum km_type type);
+ void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
++void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+ struct page *kmap_atomic_to_page(void *ptr);
+ 
+ #ifndef CONFIG_PARAVIRT
+-#define kmap_atomic_pte(page, type)	kmap_atomic(page, type)
++#define kmap_atomic_pte(page, type)		kmap_atomic(page, type)
++#define kmap_atomic_pte_direct(page, type)	kmap_atomic_direct(page, type)
+ #endif
+ 
+ #define flush_cache_kmaps()	do { } while (0)
+@@ -74,6 +87,27 @@ struct page *kmap_atomic_to_page(void *p
+ extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
+ 					unsigned long end_pfn);
+ 
++/*
++ * on PREEMPT_RT kmap_atomic() is a wrapper that uses kmap():
++ */
++#ifdef CONFIG_PREEMPT_RT
++# define kmap_atomic_prot(page, type, prot)	({ pagefault_disable(); kmap(page); })
++# define kmap_atomic(page, type)	({ pagefault_disable(); kmap(page); })
++# define kmap_atomic_pfn(pfn, type)	kmap(pfn_to_page(pfn))
++# define kunmap_atomic(kvaddr, type)	do { pagefault_enable(); kunmap_virt(kvaddr); } while(0)
++# define kmap_atomic_to_page(kvaddr)	kmap_to_page(kvaddr)
++# define kmap_atomic_direct(page, type)	__kmap_atomic_direct(page, type)
++# define kunmap_atomic_direct(kvaddr, type)	__kunmap_atomic(kvaddr, type)
++#else
++# define kmap_atomic_prot(page, type, prot)	__kmap_atomic_prot(page, type, prot)
++# define kmap_atomic(page, type)	__kmap_atomic(page, type)
++# define kmap_atomic_pfn(pfn, type)	__kmap_atomic_pfn(pfn, type)
++# define kunmap_atomic(kvaddr, type)	__kunmap_atomic(kvaddr, type)
++# define kmap_atomic_to_page(kvaddr)	__kmap_atomic_to_page(kvaddr)
++# define kmap_atomic_direct(page, type)	__kmap_atomic(page, type)
++# define kunmap_atomic_direct(kvaddr, type)	__kunmap_atomic(kvaddr, type)
++#endif
++
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _ASM_X86_HIGHMEM_H */
+Index: linux-2.6-tip/arch/x86/include/asm/hw_irq.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/hw_irq.h
++++ linux-2.6-tip/arch/x86/include/asm/hw_irq.h
+@@ -25,11 +25,12 @@
+ #include <asm/irq.h>
+ #include <asm/sections.h>
+ 
+-#define platform_legacy_irq(irq)	((irq) < 16)
+-
+ /* Interrupt handlers registered during init_IRQ */
+ extern void apic_timer_interrupt(void);
++extern void generic_interrupt(void);
+ extern void error_interrupt(void);
++extern void perf_counter_interrupt(void);
++
+ extern void spurious_interrupt(void);
+ extern void thermal_interrupt(void);
+ extern void reschedule_interrupt(void);
+@@ -58,7 +59,7 @@ extern void make_8259A_irq(unsigned int 
+ extern void init_8259A(int aeoi);
+ 
+ /* IOAPIC */
+-#define IO_APIC_IRQ(x) (((x) >= 16) || ((1<<(x)) & io_apic_irqs))
++#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
+ extern unsigned long io_apic_irqs;
+ 
+ extern void init_VISWS_APIC_irqs(void);
+@@ -67,15 +68,7 @@ extern void disable_IO_APIC(void);
+ extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
+ extern void setup_ioapic_dest(void);
+ 
+-#ifdef CONFIG_X86_64
+ extern void enable_IO_APIC(void);
+-#endif
+-
+-/* IPI functions */
+-#ifdef CONFIG_X86_32
+-extern void send_IPI_self(int vector);
+-#endif
+-extern void send_IPI(int dest, int vector);
+ 
+ /* Statistics */
+ extern atomic_t irq_err_count;
+@@ -84,21 +77,11 @@ extern atomic_t irq_mis_count;
+ /* EISA */
+ extern void eisa_set_level_irq(unsigned int irq);
+ 
+-/* Voyager functions */
+-extern asmlinkage void vic_cpi_interrupt(void);
+-extern asmlinkage void vic_sys_interrupt(void);
+-extern asmlinkage void vic_cmn_interrupt(void);
+-extern asmlinkage void qic_timer_interrupt(void);
+-extern asmlinkage void qic_invalidate_interrupt(void);
+-extern asmlinkage void qic_reschedule_interrupt(void);
+-extern asmlinkage void qic_enable_irq_interrupt(void);
+-extern asmlinkage void qic_call_function_interrupt(void);
+-
+ /* SMP */
+ extern void smp_apic_timer_interrupt(struct pt_regs *);
+ extern void smp_spurious_interrupt(struct pt_regs *);
+ extern void smp_error_interrupt(struct pt_regs *);
+-#ifdef CONFIG_X86_SMP
++#ifdef CONFIG_SMP
+ extern void smp_reschedule_interrupt(struct pt_regs *);
+ extern void smp_call_function_interrupt(struct pt_regs *);
+ extern void smp_call_function_single_interrupt(struct pt_regs *);
+Index: linux-2.6-tip/arch/x86/include/asm/i8259.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/i8259.h
++++ linux-2.6-tip/arch/x86/include/asm/i8259.h
+@@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask;
+ #define SLAVE_ICW4_DEFAULT	0x01
+ #define PIC_ICW4_AEOI		2
+ 
+-extern spinlock_t i8259A_lock;
++extern raw_spinlock_t i8259A_lock;
+ 
+ extern void init_8259A(int auto_eoi);
+ extern void enable_8259A_irq(unsigned int irq);
+@@ -60,4 +60,8 @@ extern struct irq_chip i8259A_chip;
+ extern void mask_8259A(void);
+ extern void unmask_8259A(void);
+ 
++#ifdef CONFIG_X86_32
++extern void init_ISA_irqs(void);
++#endif
++
+ #endif /* _ASM_X86_I8259_H */
+Index: linux-2.6-tip/arch/x86/include/asm/init.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/init.h
+@@ -0,0 +1,18 @@
++#ifndef _ASM_X86_INIT_32_H
++#define _ASM_X86_INIT_32_H
++
++#ifdef CONFIG_X86_32
++extern void __init early_ioremap_page_table_range_init(void);
++#endif
++
++extern unsigned long __init
++kernel_physical_mapping_init(unsigned long start,
++			     unsigned long end,
++			     unsigned long page_size_mask);
++
++
++extern unsigned long __initdata e820_table_start;
++extern unsigned long __meminitdata e820_table_end;
++extern unsigned long __meminitdata e820_table_top;
++
++#endif /* _ASM_X86_INIT_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/intel_arch_perfmon.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/intel_arch_perfmon.h
++++ /dev/null
+@@ -1,31 +0,0 @@
+-#ifndef _ASM_X86_INTEL_ARCH_PERFMON_H
+-#define _ASM_X86_INTEL_ARCH_PERFMON_H
+-
+-#define MSR_ARCH_PERFMON_PERFCTR0		0xc1
+-#define MSR_ARCH_PERFMON_PERFCTR1		0xc2
+-
+-#define MSR_ARCH_PERFMON_EVENTSEL0		0x186
+-#define MSR_ARCH_PERFMON_EVENTSEL1		0x187
+-
+-#define ARCH_PERFMON_EVENTSEL0_ENABLE	(1 << 22)
+-#define ARCH_PERFMON_EVENTSEL_INT	(1 << 20)
+-#define ARCH_PERFMON_EVENTSEL_OS	(1 << 17)
+-#define ARCH_PERFMON_EVENTSEL_USR	(1 << 16)
+-
+-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL	(0x3c)
+-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK	(0x00 << 8)
+-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0)
+-#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
+-	(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
+-
+-union cpuid10_eax {
+-	struct {
+-		unsigned int version_id:8;
+-		unsigned int num_counters:8;
+-		unsigned int bit_width:8;
+-		unsigned int mask_length:8;
+-	} split;
+-	unsigned int full;
+-};
+-
+-#endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */
+Index: linux-2.6-tip/arch/x86/include/asm/io.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/io.h
++++ linux-2.6-tip/arch/x86/include/asm/io.h
+@@ -5,6 +5,7 @@
+ 
+ #include <linux/compiler.h>
+ #include <asm-generic/int-ll64.h>
++#include <asm/page.h>
+ 
+ #define build_mmio_read(name, size, type, reg, barrier) \
+ static inline type name(const volatile void __iomem *addr) \
+@@ -80,6 +81,98 @@ static inline void writeq(__u64 val, vol
+ #define readq			readq
+ #define writeq			writeq
+ 
++/**
++ *	virt_to_phys	-	map virtual addresses to physical
++ *	@address: address to remap
++ *
++ *	The returned physical address is the physical (CPU) mapping for
++ *	the memory address given. It is only valid to use this function on
++ *	addresses directly mapped or allocated via kmalloc.
++ *
++ *	This function does not give bus mappings for DMA transfers. In
++ *	almost all conceivable cases a device driver should not be using
++ *	this function
++ */
++
++static inline phys_addr_t virt_to_phys(volatile void *address)
++{
++	return __pa(address);
++}
++
++/**
++ *	phys_to_virt	-	map physical address to virtual
++ *	@address: address to remap
++ *
++ *	The returned virtual address is a current CPU mapping for
++ *	the memory address given. It is only valid to use this function on
++ *	addresses that have a kernel mapping
++ *
++ *	This function does not handle bus mappings for DMA transfers. In
++ *	almost all conceivable cases a device driver should not be using
++ *	this function
++ */
++
++static inline void *phys_to_virt(phys_addr_t address)
++{
++	return __va(address);
++}
++
++/*
++ * Change "struct page" to physical address.
++ */
++#define page_to_phys(page)    ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
++
++/*
++ * ISA I/O bus memory addresses are 1:1 with the physical address.
++ * However, we truncate the address to unsigned int to avoid undesirable
++ * promitions in legacy drivers.
++ */
++static inline unsigned int isa_virt_to_bus(volatile void *address)
++{
++	return (unsigned int)virt_to_phys(address);
++}
++#define isa_page_to_bus(page)	((unsigned int)page_to_phys(page))
++#define isa_bus_to_virt		phys_to_virt
++
++/*
++ * However PCI ones are not necessarily 1:1 and therefore these interfaces
++ * are forbidden in portable PCI drivers.
++ *
++ * Allow them on x86 for legacy drivers, though.
++ */
++#define virt_to_bus virt_to_phys
++#define bus_to_virt phys_to_virt
++
++/**
++ * ioremap     -   map bus memory into CPU space
++ * @offset:    bus address of the memory
++ * @size:      size of the resource to map
++ *
++ * ioremap performs a platform specific sequence of operations to
++ * make bus memory CPU accessible via the readb/readw/readl/writeb/
++ * writew/writel functions and the other mmio helpers. The returned
++ * address is not guaranteed to be usable directly as a virtual
++ * address.
++ *
++ * If the area you are trying to map is a PCI BAR you should have a
++ * look at pci_iomap().
++ */
++extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
++extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
++extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
++				unsigned long prot_val);
++
++/*
++ * The default ioremap() behavior is non-cached:
++ */
++static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
++{
++	return ioremap_nocache(offset, size);
++}
++
++extern void iounmap(volatile void __iomem *addr);
++
++
+ #ifdef CONFIG_X86_32
+ # include "io_32.h"
+ #else
+@@ -91,7 +184,7 @@ extern void unxlate_dev_mem_ptr(unsigned
+ 
+ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
+ 				unsigned long prot_val);
+-extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
++extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
+ 
+ /*
+  * early_ioremap() and early_iounmap() are for temporary early boot-time
+@@ -100,10 +193,12 @@ extern void __iomem *ioremap_wc(unsigned
+  */
+ extern void early_ioremap_init(void);
+ extern void early_ioremap_reset(void);
+-extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
+-extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
++extern void __iomem *early_ioremap(resource_size_t phys_addr,
++				   unsigned long size);
++extern void __iomem *early_memremap(resource_size_t phys_addr,
++				    unsigned long size);
+ extern void early_iounmap(void __iomem *addr, unsigned long size);
+-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
+ 
++#define IO_SPACE_LIMIT 0xffff
+ 
+ #endif /* _ASM_X86_IO_H */
+Index: linux-2.6-tip/arch/x86/include/asm/io_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/io_32.h
++++ linux-2.6-tip/arch/x86/include/asm/io_32.h
+@@ -37,8 +37,6 @@
+   *  - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+   */
+ 
+-#define IO_SPACE_LIMIT 0xffff
+-
+ #define XQUAD_PORTIO_BASE 0xfe400000
+ #define XQUAD_PORTIO_QUAD 0x40000  /* 256k per quad. */
+ 
+@@ -53,92 +51,6 @@
+  */
+ #define xlate_dev_kmem_ptr(p)	p
+ 
+-/**
+- *	virt_to_phys	-	map virtual addresses to physical
+- *	@address: address to remap
+- *
+- *	The returned physical address is the physical (CPU) mapping for
+- *	the memory address given. It is only valid to use this function on
+- *	addresses directly mapped or allocated via kmalloc.
+- *
+- *	This function does not give bus mappings for DMA transfers. In
+- *	almost all conceivable cases a device driver should not be using
+- *	this function
+- */
+-
+-static inline unsigned long virt_to_phys(volatile void *address)
+-{
+-	return __pa(address);
+-}
+-
+-/**
+- *	phys_to_virt	-	map physical address to virtual
+- *	@address: address to remap
+- *
+- *	The returned virtual address is a current CPU mapping for
+- *	the memory address given. It is only valid to use this function on
+- *	addresses that have a kernel mapping
+- *
+- *	This function does not handle bus mappings for DMA transfers. In
+- *	almost all conceivable cases a device driver should not be using
+- *	this function
+- */
+-
+-static inline void *phys_to_virt(unsigned long address)
+-{
+-	return __va(address);
+-}
+-
+-/*
+- * Change "struct page" to physical address.
+- */
+-#define page_to_phys(page)    ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+-
+-/**
+- * ioremap     -   map bus memory into CPU space
+- * @offset:    bus address of the memory
+- * @size:      size of the resource to map
+- *
+- * ioremap performs a platform specific sequence of operations to
+- * make bus memory CPU accessible via the readb/readw/readl/writeb/
+- * writew/writel functions and the other mmio helpers. The returned
+- * address is not guaranteed to be usable directly as a virtual
+- * address.
+- *
+- * If the area you are trying to map is a PCI BAR you should have a
+- * look at pci_iomap().
+- */
+-extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
+-extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+-extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+-				unsigned long prot_val);
+-
+-/*
+- * The default ioremap() behavior is non-cached:
+- */
+-static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
+-{
+-	return ioremap_nocache(offset, size);
+-}
+-
+-extern void iounmap(volatile void __iomem *addr);
+-
+-/*
+- * ISA I/O bus memory addresses are 1:1 with the physical address.
+- */
+-#define isa_virt_to_bus virt_to_phys
+-#define isa_page_to_bus page_to_phys
+-#define isa_bus_to_virt phys_to_virt
+-
+-/*
+- * However PCI ones are not necessarily 1:1 and therefore these interfaces
+- * are forbidden in portable PCI drivers.
+- *
+- * Allow them on x86 for legacy drivers, though.
+- */
+-#define virt_to_bus virt_to_phys
+-#define bus_to_virt phys_to_virt
+-
+ static inline void
+ memset_io(volatile void __iomem *addr, unsigned char val, int count)
+ {
+Index: linux-2.6-tip/arch/x86/include/asm/io_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/io_64.h
++++ linux-2.6-tip/arch/x86/include/asm/io_64.h
+@@ -136,73 +136,12 @@ __OUTS(b)
+ __OUTS(w)
+ __OUTS(l)
+ 
+-#define IO_SPACE_LIMIT 0xffff
+-
+ #if defined(__KERNEL__) && defined(__x86_64__)
+ 
+ #include <linux/vmalloc.h>
+ 
+-#ifndef __i386__
+-/*
+- * Change virtual addresses to physical addresses and vv.
+- * These are pretty trivial
+- */
+-static inline unsigned long virt_to_phys(volatile void *address)
+-{
+-	return __pa(address);
+-}
+-
+-static inline void *phys_to_virt(unsigned long address)
+-{
+-	return __va(address);
+-}
+-#endif
+-
+-/*
+- * Change "struct page" to physical address.
+- */
+-#define page_to_phys(page)    ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
+-
+ #include <asm-generic/iomap.h>
+ 
+-/*
+- * This one maps high address device memory and turns off caching for that area.
+- * it's useful if some control registers are in such an area and write combining
+- * or read caching is not desirable:
+- */
+-extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
+-extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
+-extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
+-				unsigned long prot_val);
+-
+-/*
+- * The default ioremap() behavior is non-cached:
+- */
+-static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
+-{
+-	return ioremap_nocache(offset, size);
+-}
+-
+-extern void iounmap(volatile void __iomem *addr);
+-
+-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
+-
+-/*
+- * ISA I/O bus memory addresses are 1:1 with the physical address.
+- */
+-#define isa_virt_to_bus virt_to_phys
+-#define isa_page_to_bus page_to_phys
+-#define isa_bus_to_virt phys_to_virt
+-
+-/*
+- * However PCI ones are not necessarily 1:1 and therefore these interfaces
+- * are forbidden in portable PCI drivers.
+- *
+- * Allow them on x86 for legacy drivers, though.
+- */
+-#define virt_to_bus virt_to_phys
+-#define bus_to_virt phys_to_virt
+-
+ void __memcpy_fromio(void *, unsigned long, unsigned);
+ void __memcpy_toio(unsigned long, const void *, unsigned);
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/io_apic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/io_apic.h
++++ linux-2.6-tip/arch/x86/include/asm/io_apic.h
+@@ -114,38 +114,16 @@ struct IR_IO_APIC_route_entry {
+ extern int nr_ioapics;
+ extern int nr_ioapic_registers[MAX_IO_APICS];
+ 
+-/*
+- * MP-BIOS irq configuration table structures:
+- */
+-
+ #define MP_MAX_IOAPIC_PIN 127
+ 
+-struct mp_config_ioapic {
+-	unsigned long mp_apicaddr;
+-	unsigned int mp_apicid;
+-	unsigned char mp_type;
+-	unsigned char mp_apicver;
+-	unsigned char mp_flags;
+-};
+-
+-struct mp_config_intsrc {
+-	unsigned int mp_dstapic;
+-	unsigned char mp_type;
+-	unsigned char mp_irqtype;
+-	unsigned short mp_irqflag;
+-	unsigned char mp_srcbus;
+-	unsigned char mp_srcbusirq;
+-	unsigned char mp_dstirq;
+-};
+-
+ /* I/O APIC entries */
+-extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
++extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
+ 
+ /* # of MP IRQ source entries */
+ extern int mp_irq_entries;
+ 
+ /* MP IRQ source entries */
+-extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
++extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
+ 
+ /* non-0 if default (table-less) MP configuration */
+ extern int mpc_default_type;
+@@ -165,15 +143,6 @@ extern int noioapicreroute;
+ /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */
+ extern int timer_through_8259;
+ 
+-static inline void disable_ioapic_setup(void)
+-{
+-#ifdef CONFIG_PCI
+-	noioapicquirk = 1;
+-	noioapicreroute = -1;
+-#endif
+-	skip_ioapic_setup = 1;
+-}
+-
+ /*
+  * If we use the IO-APIC for IRQ routing, disable automatic
+  * assignment of PCI IRQ's.
+@@ -193,13 +162,20 @@ extern int (*ioapic_renumber_irq)(int io
+ extern void ioapic_init_mappings(void);
+ 
+ #ifdef CONFIG_X86_64
+-extern int save_mask_IO_APIC_setup(void);
++extern int save_IO_APIC_setup(void);
++extern void mask_IO_APIC_setup(void);
+ extern void restore_IO_APIC_setup(void);
+ extern void reinit_intr_remapped_IO_APIC(int);
+ #endif
+ 
+ extern void probe_nr_irqs_gsi(void);
+ 
++extern int setup_ioapic_entry(int apic, int irq,
++			      struct IO_APIC_route_entry *entry,
++			      unsigned int destination, int trigger,
++			      int polarity, int vector, int pin);
++extern void ioapic_write_entry(int apic, int pin,
++			       struct IO_APIC_route_entry e);
+ #else  /* !CONFIG_X86_IO_APIC */
+ #define io_apic_assign_pci_irqs 0
+ static const int timer_through_8259 = 0;
+Index: linux-2.6-tip/arch/x86/include/asm/iommu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/iommu.h
++++ linux-2.6-tip/arch/x86/include/asm/iommu.h
+@@ -3,7 +3,7 @@
+ 
+ extern void pci_iommu_shutdown(void);
+ extern void no_iommu_init(void);
+-extern struct dma_mapping_ops nommu_dma_ops;
++extern struct dma_map_ops nommu_dma_ops;
+ extern int force_iommu, no_iommu;
+ extern int iommu_detected;
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/ipi.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/ipi.h
++++ linux-2.6-tip/arch/x86/include/asm/ipi.h
+@@ -1,6 +1,8 @@
+ #ifndef _ASM_X86_IPI_H
+ #define _ASM_X86_IPI_H
+ 
++#ifdef CONFIG_X86_LOCAL_APIC
++
+ /*
+  * Copyright 2004 James Cleverdon, IBM.
+  * Subject to the GNU Public License, v.2
+@@ -55,8 +57,8 @@ static inline void __xapic_wait_icr_idle
+ 		cpu_relax();
+ }
+ 
+-static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
+-				       unsigned int dest)
++static inline void
++__default_send_IPI_shortcut(unsigned int shortcut, int vector, unsigned int dest)
+ {
+ 	/*
+ 	 * Subtle. In the case of the 'never do double writes' workaround
+@@ -87,8 +89,8 @@ static inline void __send_IPI_shortcut(u
+  * This is used to send an IPI with no shorthand notation (the destination is
+  * specified in bits 56 to 63 of the ICR).
+  */
+-static inline void __send_IPI_dest_field(unsigned int mask, int vector,
+-					 unsigned int dest)
++static inline void
++ __default_send_IPI_dest_field(unsigned int mask, int vector, unsigned int dest)
+ {
+ 	unsigned long cfg;
+ 
+@@ -117,41 +119,44 @@ static inline void __send_IPI_dest_field
+ 	native_apic_mem_write(APIC_ICR, cfg);
+ }
+ 
+-static inline void send_IPI_mask_sequence(const struct cpumask *mask,
+-					  int vector)
+-{
+-	unsigned long flags;
+-	unsigned long query_cpu;
++extern void default_send_IPI_mask_sequence_phys(const struct cpumask *mask,
++						 int vector);
++extern void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
++							 int vector);
++extern void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
++							 int vector);
++extern void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
++							 int vector);
++
++/* Avoid include hell */
++#define NMI_VECTOR 0x02
+ 
+-	/*
+-	 * Hack. The clustered APIC addressing mode doesn't allow us to send
+-	 * to an arbitrary mask, so I do a unicast to each CPU instead.
+-	 * - mbligh
+-	 */
+-	local_irq_save(flags);
+-	for_each_cpu(query_cpu, mask) {
+-		__send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu),
+-				      vector, APIC_DEST_PHYSICAL);
+-	}
+-	local_irq_restore(flags);
++extern int no_broadcast;
++
++static inline void __default_local_send_IPI_allbutself(int vector)
++{
++	if (no_broadcast || vector == NMI_VECTOR)
++		apic->send_IPI_mask_allbutself(cpu_online_mask, vector);
++	else
++		__default_send_IPI_shortcut(APIC_DEST_ALLBUT, vector, apic->dest_logical);
+ }
+ 
+-static inline void send_IPI_mask_allbutself(const struct cpumask *mask,
+-					    int vector)
++static inline void __default_local_send_IPI_all(int vector)
+ {
+-	unsigned long flags;
+-	unsigned int query_cpu;
+-	unsigned int this_cpu = smp_processor_id();
++	if (no_broadcast || vector == NMI_VECTOR)
++		apic->send_IPI_mask(cpu_online_mask, vector);
++	else
++		__default_send_IPI_shortcut(APIC_DEST_ALLINC, vector, apic->dest_logical);
++}
+ 
+-	/* See Hack comment above */
++#ifdef CONFIG_X86_32
++extern void default_send_IPI_mask_logical(const struct cpumask *mask,
++						 int vector);
++extern void default_send_IPI_allbutself(int vector);
++extern void default_send_IPI_all(int vector);
++extern void default_send_IPI_self(int vector);
++#endif
+ 
+-	local_irq_save(flags);
+-	for_each_cpu(query_cpu, mask)
+-		if (query_cpu != this_cpu)
+-			__send_IPI_dest_field(
+-				per_cpu(x86_cpu_to_apicid, query_cpu),
+-				vector, APIC_DEST_PHYSICAL);
+-	local_irq_restore(flags);
+-}
++#endif
+ 
+ #endif /* _ASM_X86_IPI_H */
+Index: linux-2.6-tip/arch/x86/include/asm/irq.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/irq.h
++++ linux-2.6-tip/arch/x86/include/asm/irq.h
+@@ -36,9 +36,12 @@ static inline int irq_canonicalize(int i
+ extern void fixup_irqs(void);
+ #endif
+ 
+-extern unsigned int do_IRQ(struct pt_regs *regs);
++extern void (*generic_interrupt_extension)(void);
+ extern void init_IRQ(void);
+ extern void native_init_IRQ(void);
++extern bool handle_irq(unsigned irq, struct pt_regs *regs);
++
++extern unsigned int do_IRQ(struct pt_regs *regs);
+ 
+ /* Interrupt vector management */
+ extern DECLARE_BITMAP(used_vectors, NR_VECTORS);
+Index: linux-2.6-tip/arch/x86/include/asm/irq_regs.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/irq_regs.h
++++ linux-2.6-tip/arch/x86/include/asm/irq_regs.h
+@@ -1,5 +1,31 @@
+-#ifdef CONFIG_X86_32
+-# include "irq_regs_32.h"
+-#else
+-# include "irq_regs_64.h"
+-#endif
++/*
++ * Per-cpu current frame pointer - the location of the last exception frame on
++ * the stack, stored in the per-cpu area.
++ *
++ * Jeremy Fitzhardinge <jeremy@goop.org>
++ */
++#ifndef _ASM_X86_IRQ_REGS_H
++#define _ASM_X86_IRQ_REGS_H
++
++#include <asm/percpu.h>
++
++#define ARCH_HAS_OWN_IRQ_REGS
++
++DECLARE_PER_CPU(struct pt_regs *, irq_regs);
++
++static inline struct pt_regs *get_irq_regs(void)
++{
++	return percpu_read(irq_regs);
++}
++
++static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
++{
++	struct pt_regs *old_regs;
++
++	old_regs = get_irq_regs();
++	percpu_write(irq_regs, new_regs);
++
++	return old_regs;
++}
++
++#endif /* _ASM_X86_IRQ_REGS_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/irq_regs_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/irq_regs_32.h
++++ /dev/null
+@@ -1,31 +0,0 @@
+-/*
+- * Per-cpu current frame pointer - the location of the last exception frame on
+- * the stack, stored in the per-cpu area.
+- *
+- * Jeremy Fitzhardinge <jeremy@goop.org>
+- */
+-#ifndef _ASM_X86_IRQ_REGS_32_H
+-#define _ASM_X86_IRQ_REGS_32_H
+-
+-#include <asm/percpu.h>
+-
+-#define ARCH_HAS_OWN_IRQ_REGS
+-
+-DECLARE_PER_CPU(struct pt_regs *, irq_regs);
+-
+-static inline struct pt_regs *get_irq_regs(void)
+-{
+-	return x86_read_percpu(irq_regs);
+-}
+-
+-static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
+-{
+-	struct pt_regs *old_regs;
+-
+-	old_regs = get_irq_regs();
+-	x86_write_percpu(irq_regs, new_regs);
+-
+-	return old_regs;
+-}
+-
+-#endif /* _ASM_X86_IRQ_REGS_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/irq_regs_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/irq_regs_64.h
++++ /dev/null
+@@ -1 +0,0 @@
+-#include <asm-generic/irq_regs.h>
+Index: linux-2.6-tip/arch/x86/include/asm/irq_remapping.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/irq_remapping.h
++++ linux-2.6-tip/arch/x86/include/asm/irq_remapping.h
+@@ -1,8 +1,6 @@
+ #ifndef _ASM_X86_IRQ_REMAPPING_H
+ #define _ASM_X86_IRQ_REMAPPING_H
+ 
+-extern int x2apic;
+-
+ #define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
+ 
+ #endif	/* _ASM_X86_IRQ_REMAPPING_H */
+Index: linux-2.6-tip/arch/x86/include/asm/irq_vectors.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/irq_vectors.h
++++ linux-2.6-tip/arch/x86/include/asm/irq_vectors.h
+@@ -1,47 +1,69 @@
+ #ifndef _ASM_X86_IRQ_VECTORS_H
+ #define _ASM_X86_IRQ_VECTORS_H
+ 
+-#include <linux/threads.h>
++/*
++ * Linux IRQ vector layout.
++ *
++ * There are 256 IDT entries (per CPU - each entry is 8 bytes) which can
++ * be defined by Linux. They are used as a jump table by the CPU when a
++ * given vector is triggered - by a CPU-external, CPU-internal or
++ * software-triggered event.
++ *
++ * Linux sets the kernel code address each entry jumps to early during
++ * bootup, and never changes them. This is the general layout of the
++ * IDT entries:
++ *
++ *  Vectors   0 ...  31 : system traps and exceptions - hardcoded events
++ *  Vectors  32 ... 127 : device interrupts
++ *  Vector  128         : legacy int80 syscall interface
++ *  Vectors 129 ... 237 : device interrupts
++ *  Vectors 238 ... 255 : special interrupts
++ *
++ * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
++ *
++ * This file enumerates the exact layout of them:
++ */
+ 
+-#define NMI_VECTOR		0x02
++#define NMI_VECTOR			0x02
+ 
+ /*
+  * IDT vectors usable for external interrupt sources start
+  * at 0x20:
+  */
+-#define FIRST_EXTERNAL_VECTOR	0x20
++#define FIRST_EXTERNAL_VECTOR		0x20
+ 
+ #ifdef CONFIG_X86_32
+-# define SYSCALL_VECTOR		0x80
++# define SYSCALL_VECTOR			0x80
+ #else
+-# define IA32_SYSCALL_VECTOR	0x80
++# define IA32_SYSCALL_VECTOR		0x80
+ #endif
+ 
+ /*
+  * Reserve the lowest usable priority level 0x20 - 0x2f for triggering
+  * cleanup after irq migration.
+  */
+-#define IRQ_MOVE_CLEANUP_VECTOR	FIRST_EXTERNAL_VECTOR
++#define IRQ_MOVE_CLEANUP_VECTOR		FIRST_EXTERNAL_VECTOR
+ 
+ /*
+  * Vectors 0x30-0x3f are used for ISA interrupts.
+  */
+-#define IRQ0_VECTOR		(FIRST_EXTERNAL_VECTOR + 0x10)
+-#define IRQ1_VECTOR		(IRQ0_VECTOR + 1)
+-#define IRQ2_VECTOR		(IRQ0_VECTOR + 2)
+-#define IRQ3_VECTOR		(IRQ0_VECTOR + 3)
+-#define IRQ4_VECTOR		(IRQ0_VECTOR + 4)
+-#define IRQ5_VECTOR		(IRQ0_VECTOR + 5)
+-#define IRQ6_VECTOR		(IRQ0_VECTOR + 6)
+-#define IRQ7_VECTOR		(IRQ0_VECTOR + 7)
+-#define IRQ8_VECTOR		(IRQ0_VECTOR + 8)
+-#define IRQ9_VECTOR		(IRQ0_VECTOR + 9)
+-#define IRQ10_VECTOR		(IRQ0_VECTOR + 10)
+-#define IRQ11_VECTOR		(IRQ0_VECTOR + 11)
+-#define IRQ12_VECTOR		(IRQ0_VECTOR + 12)
+-#define IRQ13_VECTOR		(IRQ0_VECTOR + 13)
+-#define IRQ14_VECTOR		(IRQ0_VECTOR + 14)
+-#define IRQ15_VECTOR		(IRQ0_VECTOR + 15)
++#define IRQ0_VECTOR			(FIRST_EXTERNAL_VECTOR + 0x10)
++
++#define IRQ1_VECTOR			(IRQ0_VECTOR +  1)
++#define IRQ2_VECTOR			(IRQ0_VECTOR +  2)
++#define IRQ3_VECTOR			(IRQ0_VECTOR +  3)
++#define IRQ4_VECTOR			(IRQ0_VECTOR +  4)
++#define IRQ5_VECTOR			(IRQ0_VECTOR +  5)
++#define IRQ6_VECTOR			(IRQ0_VECTOR +  6)
++#define IRQ7_VECTOR			(IRQ0_VECTOR +  7)
++#define IRQ8_VECTOR			(IRQ0_VECTOR +  8)
++#define IRQ9_VECTOR			(IRQ0_VECTOR +  9)
++#define IRQ10_VECTOR			(IRQ0_VECTOR + 10)
++#define IRQ11_VECTOR			(IRQ0_VECTOR + 11)
++#define IRQ12_VECTOR			(IRQ0_VECTOR + 12)
++#define IRQ13_VECTOR			(IRQ0_VECTOR + 13)
++#define IRQ14_VECTOR			(IRQ0_VECTOR + 14)
++#define IRQ15_VECTOR			(IRQ0_VECTOR + 15)
+ 
+ /*
+  * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
+@@ -49,119 +71,103 @@
+  *  some of the following vectors are 'rare', they are merged
+  *  into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
+  *  TLB, reschedule and local APIC vectors are performance-critical.
+- *
+- *  Vectors 0xf0-0xfa are free (reserved for future Linux use).
+  */
+-#ifdef CONFIG_X86_32
+-
+-# define SPURIOUS_APIC_VECTOR		0xff
+-# define ERROR_APIC_VECTOR		0xfe
+-# define INVALIDATE_TLB_VECTOR		0xfd
+-# define RESCHEDULE_VECTOR		0xfc
+-# define CALL_FUNCTION_VECTOR		0xfb
+-# define CALL_FUNCTION_SINGLE_VECTOR	0xfa
+-# define THERMAL_APIC_VECTOR		0xf0
+-
+-#else
+ 
+ #define SPURIOUS_APIC_VECTOR		0xff
++/*
++ * Sanity check
++ */
++#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
++# error SPURIOUS_APIC_VECTOR definition error
++#endif
++
+ #define ERROR_APIC_VECTOR		0xfe
+ #define RESCHEDULE_VECTOR		0xfd
+ #define CALL_FUNCTION_VECTOR		0xfc
+ #define CALL_FUNCTION_SINGLE_VECTOR	0xfb
+ #define THERMAL_APIC_VECTOR		0xfa
+-#define THRESHOLD_APIC_VECTOR		0xf9
+-#define UV_BAU_MESSAGE			0xf8
+-#define INVALIDATE_TLB_VECTOR_END	0xf7
+-#define INVALIDATE_TLB_VECTOR_START	0xf0	/* f0-f7 used for TLB flush */
+-
+-#define NUM_INVALIDATE_TLB_VECTORS	8
+ 
++#ifdef CONFIG_X86_32
++/* 0xf8 - 0xf9 : free */
++#else
++# define THRESHOLD_APIC_VECTOR		0xf9
++# define UV_BAU_MESSAGE			0xf8
+ #endif
+ 
++/* f0-f7 used for spreading out TLB flushes: */
++#define INVALIDATE_TLB_VECTOR_END	0xf7
++#define INVALIDATE_TLB_VECTOR_START	0xf0
++#define NUM_INVALIDATE_TLB_VECTORS	   8
++
+ /*
+  * Local APIC timer IRQ vector is on a different priority level,
+  * to work around the 'lost local interrupt if more than 2 IRQ
+  * sources per level' errata.
+  */
+-#define LOCAL_TIMER_VECTOR	0xef
++#define LOCAL_TIMER_VECTOR		0xef
++
++/*
++ * Performance monitoring interrupt vector:
++ */
++#define LOCAL_PERF_VECTOR		0xee
++
++/*
++ * Generic system vector for platform specific use
++ */
++#define GENERIC_INTERRUPT_VECTOR	0xed
+ 
+ /*
+  * First APIC vector available to drivers: (vectors 0x30-0xee) we
+  * start at 0x31(0x41) to spread out vectors evenly between priority
+  * levels. (0x80 is the syscall vector)
+  */
+-#define FIRST_DEVICE_VECTOR	(IRQ15_VECTOR + 2)
++#define FIRST_DEVICE_VECTOR		(IRQ15_VECTOR + 2)
+ 
+-#define NR_VECTORS		256
++#define NR_VECTORS			 256
+ 
+-#define FPU_IRQ			13
++#define FPU_IRQ				  13
+ 
+-#define	FIRST_VM86_IRQ		3
+-#define LAST_VM86_IRQ		15
+-#define invalid_vm86_irq(irq)	((irq) < 3 || (irq) > 15)
++#define	FIRST_VM86_IRQ			   3
++#define LAST_VM86_IRQ			  15
+ 
+-#define NR_IRQS_LEGACY		16
+-
+-#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
+-
+-#ifndef CONFIG_SPARSE_IRQ
+-# if NR_CPUS < MAX_IO_APICS
+-#  define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
+-# else
+-#  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+-# endif
+-#else
+-# if (8 * NR_CPUS) > (32 * MAX_IO_APICS)
+-#  define NR_IRQS (NR_VECTORS + (8 * NR_CPUS))
+-# else
+-#  define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
+-# endif
++#ifndef __ASSEMBLY__
++static inline int invalid_vm86_irq(int irq)
++{
++	return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
++}
+ #endif
+ 
+-#elif defined(CONFIG_X86_VOYAGER)
+-
+-# define NR_IRQS		224
++/*
++ * Size the maximum number of interrupts.
++ *
++ * If the irq_desc[] array has a sparse layout, we can size things
++ * generously - it scales up linearly with the maximum number of CPUs,
++ * and the maximum number of IO-APICs, whichever is higher.
++ *
++ * In other cases we size more conservatively, to not create too large
++ * static arrays.
++ */
+ 
+-#else /* IO_APIC || VOYAGER */
++#define NR_IRQS_LEGACY			  16
+ 
+-# define NR_IRQS		16
++#define CPU_VECTOR_LIMIT		(  8 * NR_CPUS      )
++#define IO_APIC_VECTOR_LIMIT		( 32 * MAX_IO_APICS )
+ 
++#ifdef CONFIG_X86_IO_APIC
++# ifdef CONFIG_SPARSE_IRQ
++#  define NR_IRQS					\
++	(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ?	\
++		(NR_VECTORS + CPU_VECTOR_LIMIT)  :	\
++		(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
++# else
++#  if NR_CPUS < MAX_IO_APICS
++#   define NR_IRQS 			(NR_VECTORS + 4*CPU_VECTOR_LIMIT)
++#  else
++#   define NR_IRQS			(NR_VECTORS + IO_APIC_VECTOR_LIMIT)
++#  endif
++# endif
++#else /* !CONFIG_X86_IO_APIC: */
++# define NR_IRQS			NR_IRQS_LEGACY
+ #endif
+ 
+-/* Voyager specific defines */
+-/* These define the CPIs we use in linux */
+-#define VIC_CPI_LEVEL0			0
+-#define VIC_CPI_LEVEL1			1
+-/* now the fake CPIs */
+-#define VIC_TIMER_CPI			2
+-#define VIC_INVALIDATE_CPI		3
+-#define VIC_RESCHEDULE_CPI		4
+-#define VIC_ENABLE_IRQ_CPI		5
+-#define VIC_CALL_FUNCTION_CPI		6
+-#define VIC_CALL_FUNCTION_SINGLE_CPI	7
+-
+-/* Now the QIC CPIs:  Since we don't need the two initial levels,
+- * these are 2 less than the VIC CPIs */
+-#define QIC_CPI_OFFSET			1
+-#define QIC_TIMER_CPI			(VIC_TIMER_CPI - QIC_CPI_OFFSET)
+-#define QIC_INVALIDATE_CPI		(VIC_INVALIDATE_CPI - QIC_CPI_OFFSET)
+-#define QIC_RESCHEDULE_CPI		(VIC_RESCHEDULE_CPI - QIC_CPI_OFFSET)
+-#define QIC_ENABLE_IRQ_CPI		(VIC_ENABLE_IRQ_CPI - QIC_CPI_OFFSET)
+-#define QIC_CALL_FUNCTION_CPI		(VIC_CALL_FUNCTION_CPI - QIC_CPI_OFFSET)
+-#define QIC_CALL_FUNCTION_SINGLE_CPI	(VIC_CALL_FUNCTION_SINGLE_CPI - QIC_CPI_OFFSET)
+-
+-#define VIC_START_FAKE_CPI		VIC_TIMER_CPI
+-#define VIC_END_FAKE_CPI		VIC_CALL_FUNCTION_SINGLE_CPI
+-
+-/* this is the SYS_INT CPI. */
+-#define VIC_SYS_INT			8
+-#define VIC_CMN_INT			15
+-
+-/* This is the boot CPI for alternate processors.  It gets overwritten
+- * by the above once the system has activated all available processors */
+-#define VIC_CPU_BOOT_CPI		VIC_CPI_LEVEL0
+-#define VIC_CPU_BOOT_ERRATA_CPI		(VIC_CPI_LEVEL0 + 8)
+-
+-
+ #endif /* _ASM_X86_IRQ_VECTORS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/kexec.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/kexec.h
++++ linux-2.6-tip/arch/x86/include/asm/kexec.h
+@@ -10,27 +10,12 @@
+ #else
+ # define PA_CONTROL_PAGE	0
+ # define VA_CONTROL_PAGE	1
+-# define PA_PGD			2
+-# define VA_PGD			3
+-# define PA_PUD_0		4
+-# define VA_PUD_0		5
+-# define PA_PMD_0		6
+-# define VA_PMD_0		7
+-# define PA_PTE_0		8
+-# define VA_PTE_0		9
+-# define PA_PUD_1		10
+-# define VA_PUD_1		11
+-# define PA_PMD_1		12
+-# define VA_PMD_1		13
+-# define PA_PTE_1		14
+-# define VA_PTE_1		15
+-# define PA_TABLE_PAGE		16
+-# define PAGES_NR		17
++# define PA_TABLE_PAGE		2
++# define PA_SWAP_PAGE		3
++# define PAGES_NR		4
+ #endif
+ 
+-#ifdef CONFIG_X86_32
+ # define KEXEC_CONTROL_CODE_MAX_SIZE	2048
+-#endif
+ 
+ #ifndef __ASSEMBLY__
+ 
+@@ -151,15 +136,16 @@ relocate_kernel(unsigned long indirectio
+ 		unsigned int has_pae,
+ 		unsigned int preserve_context);
+ #else
+-NORET_TYPE void
++unsigned long
+ relocate_kernel(unsigned long indirection_page,
+ 		unsigned long page_list,
+-		unsigned long start_address) ATTRIB_NORET;
++		unsigned long start_address,
++		unsigned int preserve_context);
+ #endif
+ 
+-#ifdef CONFIG_X86_32
+ #define ARCH_HAS_KIMAGE_ARCH
+ 
++#ifdef CONFIG_X86_32
+ struct kimage_arch {
+ 	pgd_t *pgd;
+ #ifdef CONFIG_X86_PAE
+@@ -169,6 +155,12 @@ struct kimage_arch {
+ 	pte_t *pte0;
+ 	pte_t *pte1;
+ };
++#else
++struct kimage_arch {
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++};
+ #endif
+ 
+ #endif /* __ASSEMBLY__ */
+Index: linux-2.6-tip/arch/x86/include/asm/kmemcheck.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/kmemcheck.h
+@@ -0,0 +1,42 @@
++#ifndef ASM_X86_KMEMCHECK_H
++#define ASM_X86_KMEMCHECK_H
++
++#include <linux/types.h>
++#include <asm/ptrace.h>
++
++#ifdef CONFIG_KMEMCHECK
++bool kmemcheck_active(struct pt_regs *regs);
++
++void kmemcheck_show(struct pt_regs *regs);
++void kmemcheck_hide(struct pt_regs *regs);
++
++bool kmemcheck_fault(struct pt_regs *regs,
++	unsigned long address, unsigned long error_code);
++bool kmemcheck_trap(struct pt_regs *regs);
++#else
++static inline bool kmemcheck_active(struct pt_regs *regs)
++{
++	return false;
++}
++
++static inline void kmemcheck_show(struct pt_regs *regs)
++{
++}
++
++static inline void kmemcheck_hide(struct pt_regs *regs)
++{
++}
++
++static inline bool kmemcheck_fault(struct pt_regs *regs,
++	unsigned long address, unsigned long error_code)
++{
++	return false;
++}
++
++static inline bool kmemcheck_trap(struct pt_regs *regs)
++{
++	return false;
++}
++#endif /* CONFIG_KMEMCHECK */
++
++#endif
+Index: linux-2.6-tip/arch/x86/include/asm/linkage.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/linkage.h
++++ linux-2.6-tip/arch/x86/include/asm/linkage.h
+@@ -1,14 +1,11 @@
+ #ifndef _ASM_X86_LINKAGE_H
+ #define _ASM_X86_LINKAGE_H
+ 
++#include <linux/stringify.h>
++
+ #undef notrace
+ #define notrace __attribute__((no_instrument_function))
+ 
+-#ifdef CONFIG_X86_64
+-#define __ALIGN .p2align 4,,15
+-#define __ALIGN_STR ".p2align 4,,15"
+-#endif
+-
+ #ifdef CONFIG_X86_32
+ #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
+ /*
+@@ -50,72 +47,20 @@
+ 	__asmlinkage_protect_n(ret, "g" (arg1), "g" (arg2), "g" (arg3), \
+ 			      "g" (arg4), "g" (arg5), "g" (arg6))
+ 
+-#endif
++#endif /* CONFIG_X86_32 */
+ 
+-#ifdef CONFIG_X86_ALIGNMENT_16
+-#define __ALIGN .align 16,0x90
+-#define __ALIGN_STR ".align 16,0x90"
+-#endif
++#ifdef __ASSEMBLY__
+ 
+-/*
+- * to check ENTRY_X86/END_X86 and
+- * KPROBE_ENTRY_X86/KPROBE_END_X86
+- * unbalanced-missed-mixed appearance
+- */
+-#define __set_entry_x86		.set ENTRY_X86_IN, 0
+-#define __unset_entry_x86	.set ENTRY_X86_IN, 1
+-#define __set_kprobe_x86	.set KPROBE_X86_IN, 0
+-#define __unset_kprobe_x86	.set KPROBE_X86_IN, 1
+-
+-#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed"
+-
+-#define __check_entry_x86	\
+-	.ifdef ENTRY_X86_IN;	\
+-	.ifeq ENTRY_X86_IN;	\
+-	__macro_err_x86;	\
+-	.abort;			\
+-	.endif;			\
+-	.endif
+-
+-#define __check_kprobe_x86	\
+-	.ifdef KPROBE_X86_IN;	\
+-	.ifeq KPROBE_X86_IN;	\
+-	__macro_err_x86;	\
+-	.abort;			\
+-	.endif;			\
+-	.endif
+-
+-#define __check_entry_kprobe_x86	\
+-	__check_entry_x86;		\
+-	__check_kprobe_x86
+-
+-#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86
+-
+-#define ENTRY_X86(name)			\
+-	__check_entry_kprobe_x86;	\
+-	__set_entry_x86;		\
+-	.globl name;			\
+-	__ALIGN;			\
++#define GLOBAL(name)	\
++	.globl name;	\
+ 	name:
+ 
+-#define END_X86(name)			\
+-	__unset_entry_x86;		\
+-	__check_entry_kprobe_x86;	\
+-	.size name, .-name
+-
+-#define KPROBE_ENTRY_X86(name)		\
+-	__check_entry_kprobe_x86;	\
+-	__set_kprobe_x86;		\
+-	.pushsection .kprobes.text, "ax"; \
+-	.globl name;			\
+-	__ALIGN;			\
+-	name:
++#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
++#define __ALIGN		.p2align 4, 0x90
++#define __ALIGN_STR	__stringify(__ALIGN)
++#endif
+ 
+-#define KPROBE_END_X86(name)		\
+-	__unset_kprobe_x86;		\
+-	__check_entry_kprobe_x86;	\
+-	.size name, .-name;		\
+-	.popsection
++#endif /* __ASSEMBLY__ */
+ 
+ #endif /* _ASM_X86_LINKAGE_H */
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/apm.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/apm.h
++++ /dev/null
+@@ -1,73 +0,0 @@
+-/*
+- *  Machine specific APM BIOS functions for generic.
+- *  Split out from apm.c by Osamu Tomita <tomita@cinet.co.jp>
+- */
+-
+-#ifndef _ASM_X86_MACH_DEFAULT_APM_H
+-#define _ASM_X86_MACH_DEFAULT_APM_H
+-
+-#ifdef APM_ZERO_SEGS
+-#	define APM_DO_ZERO_SEGS \
+-		"pushl %%ds\n\t" \
+-		"pushl %%es\n\t" \
+-		"xorl %%edx, %%edx\n\t" \
+-		"mov %%dx, %%ds\n\t" \
+-		"mov %%dx, %%es\n\t" \
+-		"mov %%dx, %%fs\n\t" \
+-		"mov %%dx, %%gs\n\t"
+-#	define APM_DO_POP_SEGS \
+-		"popl %%es\n\t" \
+-		"popl %%ds\n\t"
+-#else
+-#	define APM_DO_ZERO_SEGS
+-#	define APM_DO_POP_SEGS
+-#endif
+-
+-static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in,
+-					u32 *eax, u32 *ebx, u32 *ecx,
+-					u32 *edx, u32 *esi)
+-{
+-	/*
+-	 * N.B. We do NOT need a cld after the BIOS call
+-	 * because we always save and restore the flags.
+-	 */
+-	__asm__ __volatile__(APM_DO_ZERO_SEGS
+-		"pushl %%edi\n\t"
+-		"pushl %%ebp\n\t"
+-		"lcall *%%cs:apm_bios_entry\n\t"
+-		"setc %%al\n\t"
+-		"popl %%ebp\n\t"
+-		"popl %%edi\n\t"
+-		APM_DO_POP_SEGS
+-		: "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx),
+-		  "=S" (*esi)
+-		: "a" (func), "b" (ebx_in), "c" (ecx_in)
+-		: "memory", "cc");
+-}
+-
+-static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in,
+-						u32 ecx_in, u32 *eax)
+-{
+-	int	cx, dx, si;
+-	u8	error;
+-
+-	/*
+-	 * N.B. We do NOT need a cld after the BIOS call
+-	 * because we always save and restore the flags.
+-	 */
+-	__asm__ __volatile__(APM_DO_ZERO_SEGS
+-		"pushl %%edi\n\t"
+-		"pushl %%ebp\n\t"
+-		"lcall *%%cs:apm_bios_entry\n\t"
+-		"setc %%bl\n\t"
+-		"popl %%ebp\n\t"
+-		"popl %%edi\n\t"
+-		APM_DO_POP_SEGS
+-		: "=a" (*eax), "=b" (error), "=c" (cx), "=d" (dx),
+-		  "=S" (si)
+-		: "a" (func), "b" (ebx_in), "c" (ecx_in)
+-		: "memory", "cc");
+-	return error;
+-}
+-
+-#endif /* _ASM_X86_MACH_DEFAULT_APM_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/do_timer.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/do_timer.h
++++ /dev/null
+@@ -1,16 +0,0 @@
+-/* defines for inline arch setup functions */
+-#include <linux/clockchips.h>
+-
+-#include <asm/i8259.h>
+-#include <asm/i8253.h>
+-
+-/**
+- * do_timer_interrupt_hook - hook into timer tick
+- *
+- * Call the pit clock event handler. see asm/i8253.h
+- **/
+-
+-static inline void do_timer_interrupt_hook(void)
+-{
+-	global_clock_event->event_handler(global_clock_event);
+-}
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/entry_arch.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/entry_arch.h
++++ /dev/null
+@@ -1,36 +0,0 @@
+-/*
+- * This file is designed to contain the BUILD_INTERRUPT specifications for
+- * all of the extra named interrupt vectors used by the architecture.
+- * Usually this is the Inter Process Interrupts (IPIs)
+- */
+-
+-/*
+- * The following vectors are part of the Linux architecture, there
+- * is no hardware IRQ pin equivalent for them, they are triggered
+- * through the ICC by us (IPIs)
+- */
+-#ifdef CONFIG_X86_SMP
+-BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
+-BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
+-BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
+-BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
+-BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
+-#endif
+-
+-/*
+- * every pentium local APIC has two 'local interrupts', with a
+- * soft-definable vector attached to both interrupts, one of
+- * which is a timer interrupt, the other one is error counter
+- * overflow. Linux uses the local APIC timer interrupt to get
+- * a much simpler SMP time architecture:
+- */
+-#ifdef CONFIG_X86_LOCAL_APIC
+-BUILD_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR)
+-BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR)
+-BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
+-
+-#ifdef CONFIG_X86_MCE_P4THERMAL
+-BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
+-#endif
+-
+-#endif
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/mach_apic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/mach_apic.h
++++ /dev/null
+@@ -1,168 +0,0 @@
+-#ifndef _ASM_X86_MACH_DEFAULT_MACH_APIC_H
+-#define _ASM_X86_MACH_DEFAULT_MACH_APIC_H
+-
+-#ifdef CONFIG_X86_LOCAL_APIC
+-
+-#include <mach_apicdef.h>
+-#include <asm/smp.h>
+-
+-#define APIC_DFR_VALUE	(APIC_DFR_FLAT)
+-
+-static inline const struct cpumask *target_cpus(void)
+-{ 
+-#ifdef CONFIG_SMP
+-	return cpu_online_mask;
+-#else
+-	return cpumask_of(0);
+-#endif
+-} 
+-
+-#define NO_BALANCE_IRQ (0)
+-#define esr_disable (0)
+-
+-#ifdef CONFIG_X86_64
+-#include <asm/genapic.h>
+-#define INT_DELIVERY_MODE (genapic->int_delivery_mode)
+-#define INT_DEST_MODE (genapic->int_dest_mode)
+-#define TARGET_CPUS	  (genapic->target_cpus())
+-#define apic_id_registered (genapic->apic_id_registered)
+-#define init_apic_ldr (genapic->init_apic_ldr)
+-#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+-#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
+-#define phys_pkg_id	(genapic->phys_pkg_id)
+-#define vector_allocation_domain    (genapic->vector_allocation_domain)
+-#define read_apic_id()  (GET_APIC_ID(apic_read(APIC_ID)))
+-#define send_IPI_self (genapic->send_IPI_self)
+-#define wakeup_secondary_cpu (genapic->wakeup_cpu)
+-extern void setup_apic_routing(void);
+-#else
+-#define INT_DELIVERY_MODE dest_LowestPrio
+-#define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
+-#define TARGET_CPUS (target_cpus())
+-#define wakeup_secondary_cpu wakeup_secondary_cpu_via_init
+-/*
+- * Set up the logical destination ID.
+- *
+- * Intel recommends to set DFR, LDR and TPR before enabling
+- * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+- * document number 292116).  So here it goes...
+- */
+-static inline void init_apic_ldr(void)
+-{
+-	unsigned long val;
+-
+-	apic_write(APIC_DFR, APIC_DFR_VALUE);
+-	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+-	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
+-	apic_write(APIC_LDR, val);
+-}
+-
+-static inline int apic_id_registered(void)
+-{
+-	return physid_isset(read_apic_id(), phys_cpu_present_map);
+-}
+-
+-static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask)
+-{
+-	return cpumask_bits(cpumask)[0];
+-}
+-
+-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+-						  const struct cpumask *andmask)
+-{
+-	unsigned long mask1 = cpumask_bits(cpumask)[0];
+-	unsigned long mask2 = cpumask_bits(andmask)[0];
+-	unsigned long mask3 = cpumask_bits(cpu_online_mask)[0];
+-
+-	return (unsigned int)(mask1 & mask2 & mask3);
+-}
+-
+-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+-{
+-	return cpuid_apic >> index_msb;
+-}
+-
+-static inline void setup_apic_routing(void)
+-{
+-#ifdef CONFIG_X86_IO_APIC
+-	printk("Enabling APIC mode:  %s.  Using %d I/O APICs\n",
+-					"Flat", nr_ioapics);
+-#endif
+-}
+-
+-static inline int apicid_to_node(int logical_apicid)
+-{
+-#ifdef CONFIG_SMP
+-	return apicid_2_node[hard_smp_processor_id()];
+-#else
+-	return 0;
+-#endif
+-}
+-
+-static inline void vector_allocation_domain(int cpu, struct cpumask *retmask)
+-{
+-        /* Careful. Some cpus do not strictly honor the set of cpus
+-         * specified in the interrupt destination when using lowest
+-         * priority interrupt delivery mode.
+-         *
+-         * In particular there was a hyperthreading cpu observed to
+-         * deliver interrupts to the wrong hyperthread when only one
+-         * hyperthread was specified in the interrupt desitination.
+-         */
+-	*retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
+-}
+-#endif
+-
+-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+-{
+-	return physid_isset(apicid, bitmap);
+-}
+-
+-static inline unsigned long check_apicid_present(int bit)
+-{
+-	return physid_isset(bit, phys_cpu_present_map);
+-}
+-
+-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
+-{
+-	return phys_map;
+-}
+-
+-static inline int multi_timer_check(int apic, int irq)
+-{
+-	return 0;
+-}
+-
+-/* Mapping from cpu number to logical apicid */
+-static inline int cpu_to_logical_apicid(int cpu)
+-{
+-	return 1 << cpu;
+-}
+-
+-static inline int cpu_present_to_apicid(int mps_cpu)
+-{
+-	if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu))
+-		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+-	else
+-		return BAD_APICID;
+-}
+-
+-static inline physid_mask_t apicid_to_cpu_present(int phys_apicid)
+-{
+-	return physid_mask_of_physid(phys_apicid);
+-}
+-
+-static inline void setup_portio_remap(void)
+-{
+-}
+-
+-static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
+-{
+-	return physid_isset(boot_cpu_physical_apicid, phys_cpu_present_map);
+-}
+-
+-static inline void enable_apic_mode(void)
+-{
+-}
+-#endif /* CONFIG_X86_LOCAL_APIC */
+-#endif /* _ASM_X86_MACH_DEFAULT_MACH_APIC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/mach_apicdef.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/mach_apicdef.h
++++ /dev/null
+@@ -1,24 +0,0 @@
+-#ifndef _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H
+-#define _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H
+-
+-#include <asm/apic.h>
+-
+-#ifdef CONFIG_X86_64
+-#define	APIC_ID_MASK		(genapic->apic_id_mask)
+-#define GET_APIC_ID(x)		(genapic->get_apic_id(x))
+-#define	SET_APIC_ID(x)		(genapic->set_apic_id(x))
+-#else
+-#define		APIC_ID_MASK		(0xF<<24)
+-static inline unsigned get_apic_id(unsigned long x) 
+-{
+-	unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR));
+-	if (APIC_XAPIC(ver))
+-		return (((x)>>24)&0xFF);
+-	else
+-		return (((x)>>24)&0xF);
+-} 
+-
+-#define		GET_APIC_ID(x)	get_apic_id(x)
+-#endif
+-
+-#endif /* _ASM_X86_MACH_DEFAULT_MACH_APICDEF_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/mach_ipi.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/mach_ipi.h
++++ /dev/null
+@@ -1,64 +0,0 @@
+-#ifndef _ASM_X86_MACH_DEFAULT_MACH_IPI_H
+-#define _ASM_X86_MACH_DEFAULT_MACH_IPI_H
+-
+-/* Avoid include hell */
+-#define NMI_VECTOR 0x02
+-
+-void send_IPI_mask_bitmask(const struct cpumask *mask, int vector);
+-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
+-void __send_IPI_shortcut(unsigned int shortcut, int vector);
+-
+-extern int no_broadcast;
+-
+-#ifdef CONFIG_X86_64
+-#include <asm/genapic.h>
+-#define send_IPI_mask (genapic->send_IPI_mask)
+-#define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself)
+-#else
+-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
+-{
+-	send_IPI_mask_bitmask(mask, vector);
+-}
+-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
+-#endif
+-
+-static inline void __local_send_IPI_allbutself(int vector)
+-{
+-	if (no_broadcast || vector == NMI_VECTOR)
+-		send_IPI_mask_allbutself(cpu_online_mask, vector);
+-	else
+-		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
+-}
+-
+-static inline void __local_send_IPI_all(int vector)
+-{
+-	if (no_broadcast || vector == NMI_VECTOR)
+-		send_IPI_mask(cpu_online_mask, vector);
+-	else
+-		__send_IPI_shortcut(APIC_DEST_ALLINC, vector);
+-}
+-
+-#ifdef CONFIG_X86_64
+-#define send_IPI_allbutself (genapic->send_IPI_allbutself)
+-#define send_IPI_all (genapic->send_IPI_all)
+-#else
+-static inline void send_IPI_allbutself(int vector)
+-{
+-	/*
+-	 * if there are no other CPUs in the system then we get an APIC send 
+-	 * error if we try to broadcast, thus avoid sending IPIs in this case.
+-	 */
+-	if (!(num_online_cpus() > 1))
+-		return;
+-
+-	__local_send_IPI_allbutself(vector);
+-	return;
+-}
+-
+-static inline void send_IPI_all(int vector)
+-{
+-	__local_send_IPI_all(vector);
+-}
+-#endif
+-
+-#endif /* _ASM_X86_MACH_DEFAULT_MACH_IPI_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/mach_mpparse.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/mach_mpparse.h
++++ /dev/null
+@@ -1,17 +0,0 @@
+-#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H
+-#define _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H
+-
+-static inline int
+-mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+-{
+-	return 0;
+-}
+-
+-/* Hook from generic ACPI tables.c */
+-static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	return 0;
+-}
+-
+-
+-#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPPARSE_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/mach_mpspec.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/mach_mpspec.h
++++ /dev/null
+@@ -1,12 +0,0 @@
+-#ifndef _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H
+-#define _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H
+-
+-#define MAX_IRQ_SOURCES 256
+-
+-#if CONFIG_BASE_SMALL == 0
+-#define MAX_MP_BUSSES 256
+-#else
+-#define MAX_MP_BUSSES 32
+-#endif
+-
+-#endif /* _ASM_X86_MACH_DEFAULT_MACH_MPSPEC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/mach_timer.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/mach_timer.h
++++ /dev/null
+@@ -1,48 +0,0 @@
+-/*
+- *  Machine specific calibrate_tsc() for generic.
+- *  Split out from timer_tsc.c by Osamu Tomita <tomita@cinet.co.jp>
+- */
+-/* ------ Calibrate the TSC ------- 
+- * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
+- * Too much 64-bit arithmetic here to do this cleanly in C, and for
+- * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
+- * output busy loop as low as possible. We avoid reading the CTC registers
+- * directly because of the awkward 8-bit access mechanism of the 82C54
+- * device.
+- */
+-#ifndef _ASM_X86_MACH_DEFAULT_MACH_TIMER_H
+-#define _ASM_X86_MACH_DEFAULT_MACH_TIMER_H
+-
+-#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
+-#define CALIBRATE_LATCH	\
+-	((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
+-
+-static inline void mach_prepare_counter(void)
+-{
+-       /* Set the Gate high, disable speaker */
+-	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
+-
+-	/*
+-	 * Now let's take care of CTC channel 2
+-	 *
+-	 * Set the Gate high, program CTC channel 2 for mode 0,
+-	 * (interrupt on terminal count mode), binary count,
+-	 * load 5 * LATCH count, (LSB and MSB) to begin countdown.
+-	 *
+-	 * Some devices need a delay here.
+-	 */
+-	outb(0xb0, 0x43);			/* binary, mode 0, LSB/MSB, Ch 2 */
+-	outb_p(CALIBRATE_LATCH & 0xff, 0x42);	/* LSB of count */
+-	outb_p(CALIBRATE_LATCH >> 8, 0x42);       /* MSB of count */
+-}
+-
+-static inline void mach_countup(unsigned long *count_p)
+-{
+-	unsigned long count = 0;
+-	do {
+-		count++;
+-	} while ((inb_p(0x61) & 0x20) == 0);
+-	*count_p = count;
+-}
+-
+-#endif /* _ASM_X86_MACH_DEFAULT_MACH_TIMER_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/mach_traps.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/mach_traps.h
++++ /dev/null
+@@ -1,33 +0,0 @@
+-/*
+- *  Machine specific NMI handling for generic.
+- *  Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp>
+- */
+-#ifndef _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H
+-#define _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H
+-
+-#include <asm/mc146818rtc.h>
+-
+-static inline unsigned char get_nmi_reason(void)
+-{
+-	return inb(0x61);
+-}
+-
+-static inline void reassert_nmi(void)
+-{
+-	int old_reg = -1;
+-
+-	if (do_i_have_lock_cmos())
+-		old_reg = current_lock_cmos_reg();
+-	else
+-		lock_cmos(0); /* register doesn't matter here */
+-	outb(0x8f, 0x70);
+-	inb(0x71);		/* dummy */
+-	outb(0x0f, 0x70);
+-	inb(0x71);		/* dummy */
+-	if (old_reg >= 0)
+-		outb(old_reg, 0x70);
+-	else
+-		unlock_cmos();
+-}
+-
+-#endif /* _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/mach_wakecpu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/mach_wakecpu.h
++++ /dev/null
+@@ -1,41 +0,0 @@
+-#ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
+-#define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H
+-
+-#define TRAMPOLINE_PHYS_LOW (0x467)
+-#define TRAMPOLINE_PHYS_HIGH (0x469)
+-
+-static inline void wait_for_init_deassert(atomic_t *deassert)
+-{
+-	while (!atomic_read(deassert))
+-		cpu_relax();
+-	return;
+-}
+-
+-/* Nothing to do for most platforms, since cleared by the INIT cycle */
+-static inline void smp_callin_clear_local_apic(void)
+-{
+-}
+-
+-static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
+-{
+-}
+-
+-static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
+-{
+-}
+-
+-#ifdef CONFIG_SMP
+-extern void __inquire_remote_apic(int apicid);
+-#else /* CONFIG_SMP */
+-static inline void __inquire_remote_apic(int apicid)
+-{
+-}
+-#endif /* CONFIG_SMP */
+-
+-static inline void inquire_remote_apic(int apicid)
+-{
+-	if (apic_verbosity >= APIC_DEBUG)
+-		__inquire_remote_apic(apicid);
+-}
+-
+-#endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/pci-functions.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/pci-functions.h
++++ /dev/null
+@@ -1,19 +0,0 @@
+-/*
+- *	PCI BIOS function numbering for conventional PCI BIOS 
+- *	systems
+- */
+-
+-#define PCIBIOS_PCI_FUNCTION_ID 	0xb1XX
+-#define PCIBIOS_PCI_BIOS_PRESENT 	0xb101
+-#define PCIBIOS_FIND_PCI_DEVICE		0xb102
+-#define PCIBIOS_FIND_PCI_CLASS_CODE	0xb103
+-#define PCIBIOS_GENERATE_SPECIAL_CYCLE	0xb106
+-#define PCIBIOS_READ_CONFIG_BYTE	0xb108
+-#define PCIBIOS_READ_CONFIG_WORD	0xb109
+-#define PCIBIOS_READ_CONFIG_DWORD	0xb10a
+-#define PCIBIOS_WRITE_CONFIG_BYTE	0xb10b
+-#define PCIBIOS_WRITE_CONFIG_WORD	0xb10c
+-#define PCIBIOS_WRITE_CONFIG_DWORD	0xb10d
+-#define PCIBIOS_GET_ROUTING_OPTIONS	0xb10e
+-#define PCIBIOS_SET_PCI_HW_INT		0xb10f
+-
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/setup_arch.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/setup_arch.h
++++ /dev/null
+@@ -1,3 +0,0 @@
+-/* Hook to call BIOS initialisation function */
+-
+-/* no action for generic */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-default/smpboot_hooks.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-default/smpboot_hooks.h
++++ /dev/null
+@@ -1,61 +0,0 @@
+-/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
+- * which needs to alter them. */
+-
+-static inline void smpboot_clear_io_apic_irqs(void)
+-{
+-#ifdef CONFIG_X86_IO_APIC
+-	io_apic_irqs = 0;
+-#endif
+-}
+-
+-static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
+-{
+-	CMOS_WRITE(0xa, 0xf);
+-	local_flush_tlb();
+-	pr_debug("1.\n");
+-	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+-								 start_eip >> 4;
+-	pr_debug("2.\n");
+-	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+-							 start_eip & 0xf;
+-	pr_debug("3.\n");
+-}
+-
+-static inline void smpboot_restore_warm_reset_vector(void)
+-{
+-	/*
+-	 * Install writable page 0 entry to set BIOS data area.
+-	 */
+-	local_flush_tlb();
+-
+-	/*
+-	 * Paranoid:  Set warm reset code and vector here back
+-	 * to default values.
+-	 */
+-	CMOS_WRITE(0, 0xf);
+-
+-	*((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0;
+-}
+-
+-static inline void __init smpboot_setup_io_apic(void)
+-{
+-#ifdef CONFIG_X86_IO_APIC
+-	/*
+-	 * Here we can be sure that there is an IO-APIC in the system. Let's
+-	 * go and set it up:
+-	 */
+-	if (!skip_ioapic_setup && nr_ioapics)
+-		setup_IO_APIC();
+-	else {
+-		nr_ioapics = 0;
+-		localise_nmi_watchdog();
+-	}
+-#endif
+-}
+-
+-static inline void smpboot_clear_io_apic(void)
+-{
+-#ifdef CONFIG_X86_IO_APIC
+-	nr_ioapics = 0;
+-#endif
+-}
+Index: linux-2.6-tip/arch/x86/include/asm/mach-generic/gpio.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-generic/gpio.h
++++ /dev/null
+@@ -1,15 +0,0 @@
+-#ifndef _ASM_X86_MACH_GENERIC_GPIO_H
+-#define _ASM_X86_MACH_GENERIC_GPIO_H
+-
+-int gpio_request(unsigned gpio, const char *label);
+-void gpio_free(unsigned gpio);
+-int gpio_direction_input(unsigned gpio);
+-int gpio_direction_output(unsigned gpio, int value);
+-int gpio_get_value(unsigned gpio);
+-void gpio_set_value(unsigned gpio, int value);
+-int gpio_to_irq(unsigned gpio);
+-int irq_to_gpio(unsigned irq);
+-
+-#include <asm-generic/gpio.h>           /* cansleep wrappers */
+-
+-#endif /* _ASM_X86_MACH_GENERIC_GPIO_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-generic/mach_apic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-generic/mach_apic.h
++++ /dev/null
+@@ -1,35 +0,0 @@
+-#ifndef _ASM_X86_MACH_GENERIC_MACH_APIC_H
+-#define _ASM_X86_MACH_GENERIC_MACH_APIC_H
+-
+-#include <asm/genapic.h>
+-
+-#define esr_disable (genapic->ESR_DISABLE)
+-#define NO_BALANCE_IRQ (genapic->no_balance_irq)
+-#define INT_DELIVERY_MODE (genapic->int_delivery_mode)
+-#define INT_DEST_MODE (genapic->int_dest_mode)
+-#undef APIC_DEST_LOGICAL
+-#define APIC_DEST_LOGICAL (genapic->apic_destination_logical)
+-#define TARGET_CPUS	  (genapic->target_cpus())
+-#define apic_id_registered (genapic->apic_id_registered)
+-#define init_apic_ldr (genapic->init_apic_ldr)
+-#define ioapic_phys_id_map (genapic->ioapic_phys_id_map)
+-#define setup_apic_routing (genapic->setup_apic_routing)
+-#define multi_timer_check (genapic->multi_timer_check)
+-#define apicid_to_node (genapic->apicid_to_node)
+-#define cpu_to_logical_apicid (genapic->cpu_to_logical_apicid) 
+-#define cpu_present_to_apicid (genapic->cpu_present_to_apicid)
+-#define apicid_to_cpu_present (genapic->apicid_to_cpu_present)
+-#define setup_portio_remap (genapic->setup_portio_remap)
+-#define check_apicid_present (genapic->check_apicid_present)
+-#define check_phys_apicid_present (genapic->check_phys_apicid_present)
+-#define check_apicid_used (genapic->check_apicid_used)
+-#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
+-#define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and)
+-#define vector_allocation_domain (genapic->vector_allocation_domain)
+-#define enable_apic_mode (genapic->enable_apic_mode)
+-#define phys_pkg_id (genapic->phys_pkg_id)
+-#define wakeup_secondary_cpu (genapic->wakeup_cpu)
+-
+-extern void generic_bigsmp_probe(void);
+-
+-#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-generic/mach_apicdef.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-generic/mach_apicdef.h
++++ /dev/null
+@@ -1,11 +0,0 @@
+-#ifndef _ASM_X86_MACH_GENERIC_MACH_APICDEF_H
+-#define _ASM_X86_MACH_GENERIC_MACH_APICDEF_H
+-
+-#ifndef APIC_DEFINITION
+-#include <asm/genapic.h>
+-
+-#define GET_APIC_ID (genapic->get_apic_id)
+-#define APIC_ID_MASK (genapic->apic_id_mask)
+-#endif
+-
+-#endif /* _ASM_X86_MACH_GENERIC_MACH_APICDEF_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-generic/mach_ipi.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-generic/mach_ipi.h
++++ /dev/null
+@@ -1,10 +0,0 @@
+-#ifndef _ASM_X86_MACH_GENERIC_MACH_IPI_H
+-#define _ASM_X86_MACH_GENERIC_MACH_IPI_H
+-
+-#include <asm/genapic.h>
+-
+-#define send_IPI_mask (genapic->send_IPI_mask)
+-#define send_IPI_allbutself (genapic->send_IPI_allbutself)
+-#define send_IPI_all (genapic->send_IPI_all)
+-
+-#endif /* _ASM_X86_MACH_GENERIC_MACH_IPI_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-generic/mach_mpparse.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-generic/mach_mpparse.h
++++ /dev/null
+@@ -1,9 +0,0 @@
+-#ifndef _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H
+-#define _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H
+-
+-
+-extern int mps_oem_check(struct mpc_table *, char *, char *);
+-
+-extern int acpi_madt_oem_check(char *, char *);
+-
+-#endif /* _ASM_X86_MACH_GENERIC_MACH_MPPARSE_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-generic/mach_mpspec.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-generic/mach_mpspec.h
++++ /dev/null
+@@ -1,12 +0,0 @@
+-#ifndef _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H
+-#define _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H
+-
+-#define MAX_IRQ_SOURCES 256
+-
+-/* Summit or generic (i.e. installer) kernels need lots of bus entries. */
+-/* Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets. */
+-#define MAX_MP_BUSSES 260
+-
+-extern void numaq_mps_oem_check(struct mpc_table *, char *, char *);
+-
+-#endif /* _ASM_X86_MACH_GENERIC_MACH_MPSPEC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-generic/mach_wakecpu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-generic/mach_wakecpu.h
++++ /dev/null
+@@ -1,12 +0,0 @@
+-#ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
+-#define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H
+-
+-#define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low)
+-#define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high)
+-#define wait_for_init_deassert (genapic->wait_for_init_deassert)
+-#define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic)
+-#define store_NMI_vector (genapic->store_NMI_vector)
+-#define restore_NMI_vector (genapic->restore_NMI_vector)
+-#define inquire_remote_apic (genapic->inquire_remote_apic)
+-
+-#endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-rdc321x/gpio.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-rdc321x/gpio.h
++++ /dev/null
+@@ -1,60 +0,0 @@
+-#ifndef _ASM_X86_MACH_RDC321X_GPIO_H
+-#define _ASM_X86_MACH_RDC321X_GPIO_H
+-
+-#include <linux/kernel.h>
+-
+-extern int rdc_gpio_get_value(unsigned gpio);
+-extern void rdc_gpio_set_value(unsigned gpio, int value);
+-extern int rdc_gpio_direction_input(unsigned gpio);
+-extern int rdc_gpio_direction_output(unsigned gpio, int value);
+-extern int rdc_gpio_request(unsigned gpio, const char *label);
+-extern void rdc_gpio_free(unsigned gpio);
+-extern void __init rdc321x_gpio_setup(void);
+-
+-/* Wrappers for the arch-neutral GPIO API */
+-
+-static inline int gpio_request(unsigned gpio, const char *label)
+-{
+-	return rdc_gpio_request(gpio, label);
+-}
+-
+-static inline void gpio_free(unsigned gpio)
+-{
+-	might_sleep();
+-	rdc_gpio_free(gpio);
+-}
+-
+-static inline int gpio_direction_input(unsigned gpio)
+-{
+-	return rdc_gpio_direction_input(gpio);
+-}
+-
+-static inline int gpio_direction_output(unsigned gpio, int value)
+-{
+-	return rdc_gpio_direction_output(gpio, value);
+-}
+-
+-static inline int gpio_get_value(unsigned gpio)
+-{
+-	return rdc_gpio_get_value(gpio);
+-}
+-
+-static inline void gpio_set_value(unsigned gpio, int value)
+-{
+-	rdc_gpio_set_value(gpio, value);
+-}
+-
+-static inline int gpio_to_irq(unsigned gpio)
+-{
+-	return gpio;
+-}
+-
+-static inline int irq_to_gpio(unsigned irq)
+-{
+-	return irq;
+-}
+-
+-/* For cansleep */
+-#include <asm-generic/gpio.h>
+-
+-#endif /* _ASM_X86_MACH_RDC321X_GPIO_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h
++++ /dev/null
+@@ -1,12 +0,0 @@
+-#define PFX	"rdc321x: "
+-
+-/* General purpose configuration and data registers */
+-#define RDC3210_CFGREG_ADDR     0x0CF8
+-#define RDC3210_CFGREG_DATA     0x0CFC
+-
+-#define RDC321X_GPIO_CTRL_REG1	0x48
+-#define RDC321X_GPIO_CTRL_REG2	0x84
+-#define RDC321X_GPIO_DATA_REG1	0x4c
+-#define RDC321X_GPIO_DATA_REG2	0x88
+-
+-#define RDC321X_MAX_GPIO	58
+Index: linux-2.6-tip/arch/x86/include/asm/mach-voyager/do_timer.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-voyager/do_timer.h
++++ /dev/null
+@@ -1,17 +0,0 @@
+-/* defines for inline arch setup functions */
+-#include <linux/clockchips.h>
+-
+-#include <asm/voyager.h>
+-#include <asm/i8253.h>
+-
+-/**
+- * do_timer_interrupt_hook - hook into timer tick
+- *
+- * Call the pit clock event handler. see asm/i8253.h
+- **/
+-static inline void do_timer_interrupt_hook(void)
+-{
+-	global_clock_event->event_handler(global_clock_event);
+-	voyager_timer_interrupt();
+-}
+-
+Index: linux-2.6-tip/arch/x86/include/asm/mach-voyager/entry_arch.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-voyager/entry_arch.h
++++ /dev/null
+@@ -1,26 +0,0 @@
+-/* -*- mode: c; c-basic-offset: 8 -*- */
+-
+-/* Copyright (C) 2002
+- *
+- * Author: James.Bottomley@HansenPartnership.com
+- *
+- * linux/arch/i386/voyager/entry_arch.h
+- *
+- * This file builds the VIC and QIC CPI gates
+- */
+-
+-/* initialise the voyager interrupt gates 
+- *
+- * This uses the macros in irq.h to set up assembly jump gates.  The
+- * calls are then redirected to the same routine with smp_ prefixed */
+-BUILD_INTERRUPT(vic_sys_interrupt, VIC_SYS_INT)
+-BUILD_INTERRUPT(vic_cmn_interrupt, VIC_CMN_INT)
+-BUILD_INTERRUPT(vic_cpi_interrupt, VIC_CPI_LEVEL0);
+-
+-/* do all the QIC interrupts */
+-BUILD_INTERRUPT(qic_timer_interrupt, QIC_TIMER_CPI);
+-BUILD_INTERRUPT(qic_invalidate_interrupt, QIC_INVALIDATE_CPI);
+-BUILD_INTERRUPT(qic_reschedule_interrupt, QIC_RESCHEDULE_CPI);
+-BUILD_INTERRUPT(qic_enable_irq_interrupt, QIC_ENABLE_IRQ_CPI);
+-BUILD_INTERRUPT(qic_call_function_interrupt, QIC_CALL_FUNCTION_CPI);
+-BUILD_INTERRUPT(qic_call_function_single_interrupt, QIC_CALL_FUNCTION_SINGLE_CPI);
+Index: linux-2.6-tip/arch/x86/include/asm/mach-voyager/setup_arch.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mach-voyager/setup_arch.h
++++ /dev/null
+@@ -1,12 +0,0 @@
+-#include <asm/voyager.h>
+-#include <asm/setup.h>
+-#define VOYAGER_BIOS_INFO ((struct voyager_bios_info *) \
+-			(&boot_params.apm_bios_info))
+-
+-/* Hook to call BIOS initialisation function */
+-
+-/* for voyager, pass the voyager BIOS/SUS info area to the detection
+- * routines */
+-
+-#define ARCH_SETUP	voyager_detect(VOYAGER_BIOS_INFO);
+-
+Index: linux-2.6-tip/arch/x86/include/asm/mach_timer.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/mach_timer.h
+@@ -0,0 +1,48 @@
++/*
++ *  Machine specific calibrate_tsc() for generic.
++ *  Split out from timer_tsc.c by Osamu Tomita <tomita@cinet.co.jp>
++ */
++/* ------ Calibrate the TSC ------- 
++ * Return 2^32 * (1 / (TSC clocks per usec)) for do_fast_gettimeoffset().
++ * Too much 64-bit arithmetic here to do this cleanly in C, and for
++ * accuracy's sake we want to keep the overhead on the CTC speaker (channel 2)
++ * output busy loop as low as possible. We avoid reading the CTC registers
++ * directly because of the awkward 8-bit access mechanism of the 82C54
++ * device.
++ */
++#ifndef _ASM_X86_MACH_DEFAULT_MACH_TIMER_H
++#define _ASM_X86_MACH_DEFAULT_MACH_TIMER_H
++
++#define CALIBRATE_TIME_MSEC 30 /* 30 msecs */
++#define CALIBRATE_LATCH	\
++	((CLOCK_TICK_RATE * CALIBRATE_TIME_MSEC + 1000/2)/1000)
++
++static inline void mach_prepare_counter(void)
++{
++       /* Set the Gate high, disable speaker */
++	outb((inb(0x61) & ~0x02) | 0x01, 0x61);
++
++	/*
++	 * Now let's take care of CTC channel 2
++	 *
++	 * Set the Gate high, program CTC channel 2 for mode 0,
++	 * (interrupt on terminal count mode), binary count,
++	 * load 5 * LATCH count, (LSB and MSB) to begin countdown.
++	 *
++	 * Some devices need a delay here.
++	 */
++	outb(0xb0, 0x43);			/* binary, mode 0, LSB/MSB, Ch 2 */
++	outb_p(CALIBRATE_LATCH & 0xff, 0x42);	/* LSB of count */
++	outb_p(CALIBRATE_LATCH >> 8, 0x42);       /* MSB of count */
++}
++
++static inline void mach_countup(unsigned long *count_p)
++{
++	unsigned long count = 0;
++	do {
++		count++;
++	} while ((inb_p(0x61) & 0x20) == 0);
++	*count_p = count;
++}
++
++#endif /* _ASM_X86_MACH_DEFAULT_MACH_TIMER_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mach_traps.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/mach_traps.h
+@@ -0,0 +1,33 @@
++/*
++ *  Machine specific NMI handling for generic.
++ *  Split out from traps.c by Osamu Tomita <tomita@cinet.co.jp>
++ */
++#ifndef _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H
++#define _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H
++
++#include <asm/mc146818rtc.h>
++
++static inline unsigned char get_nmi_reason(void)
++{
++	return inb(0x61);
++}
++
++static inline void reassert_nmi(void)
++{
++	int old_reg = -1;
++
++	if (do_i_have_lock_cmos())
++		old_reg = current_lock_cmos_reg();
++	else
++		lock_cmos(0); /* register doesn't matter here */
++	outb(0x8f, 0x70);
++	inb(0x71);		/* dummy */
++	outb(0x0f, 0x70);
++	inb(0x71);		/* dummy */
++	if (old_reg >= 0)
++		outb(old_reg, 0x70);
++	else
++		unlock_cmos();
++}
++
++#endif /* _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mce.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mce.h
++++ linux-2.6-tip/arch/x86/include/asm/mce.h
+@@ -11,6 +11,8 @@
+  */
+ 
+ #define MCG_CTL_P	 (1UL<<8)   /* MCG_CAP register available */
++#define MCG_EXT_P	 (1ULL<<9)   /* Extended registers available */
++#define MCG_CMCI_P	 (1ULL<<10)  /* CMCI supported */
+ 
+ #define MCG_STATUS_RIPV  (1UL<<0)   /* restart ip valid */
+ #define MCG_STATUS_EIPV  (1UL<<1)   /* ip points to correct instruction */
+@@ -90,14 +92,29 @@ extern int mce_disabled;
+ 
+ #include <asm/atomic.h>
+ 
++void mce_setup(struct mce *m);
+ void mce_log(struct mce *m);
+ DECLARE_PER_CPU(struct sys_device, device_mce);
+ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
+ 
++/*
++ * To support more than 128 would need to escape the predefined
++ * Linux defined extended banks first.
++ */
++#define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1)
++
+ #ifdef CONFIG_X86_MCE_INTEL
+ void mce_intel_feature_init(struct cpuinfo_x86 *c);
++void cmci_clear(void);
++void cmci_reenable(void);
++void cmci_rediscover(int dying);
++void cmci_recheck(void);
+ #else
+ static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
++static inline void cmci_clear(void) {}
++static inline void cmci_reenable(void) {}
++static inline void cmci_rediscover(int dying) {}
++static inline void cmci_recheck(void) {}
+ #endif
+ 
+ #ifdef CONFIG_X86_MCE_AMD
+@@ -106,11 +123,23 @@ void mce_amd_feature_init(struct cpuinfo
+ static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
+ #endif
+ 
+-void mce_log_therm_throt_event(unsigned int cpu, __u64 status);
++extern int mce_available(struct cpuinfo_x86 *c);
++
++void mce_log_therm_throt_event(__u64 status);
+ 
+ extern atomic_t mce_entry;
+ 
+ extern void do_machine_check(struct pt_regs *, long);
++
++typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
++DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
++
++enum mcp_flags {
++	MCP_TIMESTAMP = (1 << 0),	/* log time stamp */
++	MCP_UC = (1 << 1),		/* log uncorrected errors */
++};
++extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
++
+ extern int mce_notify_user(void);
+ 
+ #endif /* !CONFIG_X86_32 */
+@@ -120,8 +149,8 @@ extern void mcheck_init(struct cpuinfo_x
+ #else
+ #define mcheck_init(c) do { } while (0)
+ #endif
+-extern void stop_mce(void);
+-extern void restart_mce(void);
++
++extern void (*mce_threshold_vector)(void);
+ 
+ #endif /* __KERNEL__ */
+ #endif /* _ASM_X86_MCE_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mmu_context.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mmu_context.h
++++ linux-2.6-tip/arch/x86/include/asm/mmu_context.h
+@@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(
+ int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
+ void destroy_context(struct mm_struct *mm);
+ 
+-#ifdef CONFIG_X86_32
+-# include "mmu_context_32.h"
+-#else
+-# include "mmu_context_64.h"
++
++static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
++{
++#ifdef CONFIG_SMP
++	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
++		percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
++#endif
++}
++
++static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
++			     struct task_struct *tsk)
++{
++	unsigned cpu = smp_processor_id();
++
++	if (likely(prev != next)) {
++		/* stop flush ipis for the previous mm */
++		cpu_clear(cpu, prev->cpu_vm_mask);
++#ifdef CONFIG_SMP
++		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
++		percpu_write(cpu_tlbstate.active_mm, next);
+ #endif
++		cpu_set(cpu, next->cpu_vm_mask);
++
++		/* Re-load page tables */
++		load_cr3(next->pgd);
++
++		/*
++		 * load the LDT, if the LDT is different:
++		 */
++		if (unlikely(prev->context.ldt != next->context.ldt))
++			load_LDT_nolock(&next->context);
++	}
++#ifdef CONFIG_SMP
++	else {
++		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
++		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
++
++		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
++			/* We were in lazy tlb mode and leave_mm disabled
++			 * tlb flush IPI delivery. We must reload CR3
++			 * to make sure to use no freed page tables.
++			 */
++			load_cr3(next->pgd);
++			load_LDT_nolock(&next->context);
++		}
++	}
++#endif
++}
+ 
+ #define activate_mm(prev, next)			\
+ do {						\
+@@ -33,5 +76,17 @@ do {						\
+ 	switch_mm((prev), (next), NULL);	\
+ } while (0);
+ 
++#ifdef CONFIG_X86_32
++#define deactivate_mm(tsk, mm)			\
++do {						\
++	lazy_load_gs(0);			\
++} while (0)
++#else
++#define deactivate_mm(tsk, mm)			\
++do {						\
++	load_gs_index(0);			\
++	loadsegment(fs, 0);			\
++} while (0)
++#endif
+ 
+ #endif /* _ASM_X86_MMU_CONTEXT_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mmu_context_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mmu_context_32.h
++++ /dev/null
+@@ -1,55 +0,0 @@
+-#ifndef _ASM_X86_MMU_CONTEXT_32_H
+-#define _ASM_X86_MMU_CONTEXT_32_H
+-
+-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+-{
+-#ifdef CONFIG_SMP
+-	if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
+-		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
+-#endif
+-}
+-
+-static inline void switch_mm(struct mm_struct *prev,
+-			     struct mm_struct *next,
+-			     struct task_struct *tsk)
+-{
+-	int cpu = smp_processor_id();
+-
+-	if (likely(prev != next)) {
+-		/* stop flush ipis for the previous mm */
+-		cpu_clear(cpu, prev->cpu_vm_mask);
+-#ifdef CONFIG_SMP
+-		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
+-		x86_write_percpu(cpu_tlbstate.active_mm, next);
+-#endif
+-		cpu_set(cpu, next->cpu_vm_mask);
+-
+-		/* Re-load page tables */
+-		load_cr3(next->pgd);
+-
+-		/*
+-		 * load the LDT, if the LDT is different:
+-		 */
+-		if (unlikely(prev->context.ldt != next->context.ldt))
+-			load_LDT_nolock(&next->context);
+-	}
+-#ifdef CONFIG_SMP
+-	else {
+-		x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
+-		BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
+-
+-		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+-			/* We were in lazy tlb mode and leave_mm disabled
+-			 * tlb flush IPI delivery. We must reload %cr3.
+-			 */
+-			load_cr3(next->pgd);
+-			load_LDT_nolock(&next->context);
+-		}
+-	}
+-#endif
+-}
+-
+-#define deactivate_mm(tsk, mm)			\
+-	asm("movl %0,%%gs": :"r" (0));
+-
+-#endif /* _ASM_X86_MMU_CONTEXT_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mmu_context_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mmu_context_64.h
++++ /dev/null
+@@ -1,54 +0,0 @@
+-#ifndef _ASM_X86_MMU_CONTEXT_64_H
+-#define _ASM_X86_MMU_CONTEXT_64_H
+-
+-#include <asm/pda.h>
+-
+-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+-{
+-#ifdef CONFIG_SMP
+-	if (read_pda(mmu_state) == TLBSTATE_OK)
+-		write_pda(mmu_state, TLBSTATE_LAZY);
+-#endif
+-}
+-
+-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+-			     struct task_struct *tsk)
+-{
+-	unsigned cpu = smp_processor_id();
+-	if (likely(prev != next)) {
+-		/* stop flush ipis for the previous mm */
+-		cpu_clear(cpu, prev->cpu_vm_mask);
+-#ifdef CONFIG_SMP
+-		write_pda(mmu_state, TLBSTATE_OK);
+-		write_pda(active_mm, next);
+-#endif
+-		cpu_set(cpu, next->cpu_vm_mask);
+-		load_cr3(next->pgd);
+-
+-		if (unlikely(next->context.ldt != prev->context.ldt))
+-			load_LDT_nolock(&next->context);
+-	}
+-#ifdef CONFIG_SMP
+-	else {
+-		write_pda(mmu_state, TLBSTATE_OK);
+-		if (read_pda(active_mm) != next)
+-			BUG();
+-		if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+-			/* We were in lazy tlb mode and leave_mm disabled
+-			 * tlb flush IPI delivery. We must reload CR3
+-			 * to make sure to use no freed page tables.
+-			 */
+-			load_cr3(next->pgd);
+-			load_LDT_nolock(&next->context);
+-		}
+-	}
+-#endif
+-}
+-
+-#define deactivate_mm(tsk, mm)			\
+-do {						\
+-	load_gs_index(0);			\
+-	asm volatile("movl %0,%%fs"::"r"(0));	\
+-} while (0)
+-
+-#endif /* _ASM_X86_MMU_CONTEXT_64_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mmzone_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mmzone_32.h
++++ linux-2.6-tip/arch/x86/include/asm/mmzone_32.h
+@@ -91,46 +91,9 @@ static inline int pfn_valid(int pfn)
+ #endif /* CONFIG_DISCONTIGMEM */
+ 
+ #ifdef CONFIG_NEED_MULTIPLE_NODES
+-
+-/*
+- * Following are macros that are specific to this numa platform.
+- */
+-#define reserve_bootmem(addr, size, flags) \
+-	reserve_bootmem_node(NODE_DATA(0), (addr), (size), (flags))
+-#define alloc_bootmem(x) \
+-	__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+-#define alloc_bootmem_nopanic(x) \
+-	__alloc_bootmem_node_nopanic(NODE_DATA(0), (x), SMP_CACHE_BYTES, \
+-				__pa(MAX_DMA_ADDRESS))
+-#define alloc_bootmem_low(x) \
+-	__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES, 0)
+-#define alloc_bootmem_pages(x) \
+-	__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+-#define alloc_bootmem_pages_nopanic(x) \
+-	__alloc_bootmem_node_nopanic(NODE_DATA(0), (x), PAGE_SIZE, \
+-				__pa(MAX_DMA_ADDRESS))
+-#define alloc_bootmem_low_pages(x) \
+-	__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0)
+-#define alloc_bootmem_node(pgdat, x)					\
+-({									\
+-	struct pglist_data  __maybe_unused			\
+-				*__alloc_bootmem_node__pgdat = (pgdat);	\
+-	__alloc_bootmem_node(NODE_DATA(0), (x), SMP_CACHE_BYTES,	\
+-						__pa(MAX_DMA_ADDRESS));	\
+-})
+-#define alloc_bootmem_pages_node(pgdat, x)				\
+-({									\
+-	struct pglist_data  __maybe_unused			\
+-				*__alloc_bootmem_node__pgdat = (pgdat);	\
+-	__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE,		\
+-						__pa(MAX_DMA_ADDRESS));	\
+-})
+-#define alloc_bootmem_low_pages_node(pgdat, x)				\
+-({									\
+-	struct pglist_data  __maybe_unused			\
+-				*__alloc_bootmem_node__pgdat = (pgdat);	\
+-	__alloc_bootmem_node(NODE_DATA(0), (x), PAGE_SIZE, 0);		\
+-})
++/* always use node 0 for bootmem on this numa platform */
++#define bootmem_arch_preferred_node(__bdata, size, align, goal, limit)	\
++	(NODE_DATA(0)->bdata)
+ #endif /* CONFIG_NEED_MULTIPLE_NODES */
+ 
+ #endif /* _ASM_X86_MMZONE_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mpspec.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mpspec.h
++++ linux-2.6-tip/arch/x86/include/asm/mpspec.h
+@@ -9,7 +9,18 @@ extern int apic_version[MAX_APICS];
+ extern int pic_mode;
+ 
+ #ifdef CONFIG_X86_32
+-#include <mach_mpspec.h>
++
++/*
++ * Summit or generic (i.e. installer) kernels need lots of bus entries.
++ * Maximum 256 PCI busses, plus 1 ISA bus in each of 4 cabinets.
++ */
++#if CONFIG_BASE_SMALL == 0
++# define MAX_MP_BUSSES		260
++#else
++# define MAX_MP_BUSSES		32
++#endif
++
++#define MAX_IRQ_SOURCES		256
+ 
+ extern unsigned int def_to_bigsmp;
+ extern u8 apicid_2_node[];
+@@ -20,15 +31,15 @@ extern int mp_bus_id_to_local[MAX_MP_BUS
+ extern int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+ #endif
+ 
+-#define MAX_APICID 256
++#define MAX_APICID		256
+ 
+-#else
++#else /* CONFIG_X86_64: */
+ 
+-#define MAX_MP_BUSSES 256
++#define MAX_MP_BUSSES		256
+ /* Each PCI slot may be a combo card with its own bus.  4 IRQ pins per slot. */
+-#define MAX_IRQ_SOURCES (MAX_MP_BUSSES * 4)
++#define MAX_IRQ_SOURCES		(MAX_MP_BUSSES * 4)
+ 
+-#endif
++#endif /* CONFIG_X86_64 */
+ 
+ extern void early_find_smp_config(void);
+ extern void early_get_smp_config(void);
+@@ -45,11 +56,13 @@ extern int smp_found_config;
+ extern int mpc_default_type;
+ extern unsigned long mp_lapic_addr;
+ 
+-extern void find_smp_config(void);
+ extern void get_smp_config(void);
++
+ #ifdef CONFIG_X86_MPPARSE
++extern void find_smp_config(void);
+ extern void early_reserve_e820_mpc_new(void);
+ #else
++static inline void find_smp_config(void) { }
+ static inline void early_reserve_e820_mpc_new(void) { }
+ #endif
+ 
+@@ -64,6 +77,8 @@ extern int acpi_probe_gsi(void);
+ #ifdef CONFIG_X86_IO_APIC
+ extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
+ 				u32 gsi, int triggering, int polarity);
++extern int mp_find_ioapic(int gsi);
++extern int mp_find_ioapic_pin(int ioapic, int gsi);
+ #else
+ static inline int
+ mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
+@@ -148,4 +163,8 @@ static inline void physid_set_mask_of_ph
+ 
+ extern physid_mask_t phys_cpu_present_map;
+ 
++extern int generic_mps_oem_check(struct mpc_table *, char *, char *);
++
++extern int default_acpi_madt_oem_check(char *, char *);
++
+ #endif /* _ASM_X86_MPSPEC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/mpspec_def.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/mpspec_def.h
++++ linux-2.6-tip/arch/x86/include/asm/mpspec_def.h
+@@ -24,17 +24,18 @@
+ # endif
+ #endif
+ 
+-struct intel_mp_floating {
+-	char mpf_signature[4];		/* "_MP_"			*/
+-	unsigned int mpf_physptr;	/* Configuration table address	*/
+-	unsigned char mpf_length;	/* Our length (paragraphs)	*/
+-	unsigned char mpf_specification;/* Specification version	*/
+-	unsigned char mpf_checksum;	/* Checksum (makes sum 0)	*/
+-	unsigned char mpf_feature1;	/* Standard or configuration ?	*/
+-	unsigned char mpf_feature2;	/* Bit7 set for IMCR|PIC	*/
+-	unsigned char mpf_feature3;	/* Unused (0)			*/
+-	unsigned char mpf_feature4;	/* Unused (0)			*/
+-	unsigned char mpf_feature5;	/* Unused (0)			*/
++/* Intel MP Floating Pointer Structure */
++struct mpf_intel {
++	char signature[4];		/* "_MP_"			*/
++	unsigned int physptr;		/* Configuration table address	*/
++	unsigned char length;		/* Our length (paragraphs)	*/
++	unsigned char specification;	/* Specification version	*/
++	unsigned char checksum;		/* Checksum (makes sum 0)	*/
++	unsigned char feature1;		/* Standard or configuration ?	*/
++	unsigned char feature2;		/* Bit7 set for IMCR|PIC	*/
++	unsigned char feature3;		/* Unused (0)			*/
++	unsigned char feature4;		/* Unused (0)			*/
++	unsigned char feature5;		/* Unused (0)			*/
+ };
+ 
+ #define MPC_SIGNATURE "PCMP"
+Index: linux-2.6-tip/arch/x86/include/asm/msidef.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/msidef.h
++++ linux-2.6-tip/arch/x86/include/asm/msidef.h
+@@ -47,6 +47,7 @@
+ #define	 MSI_ADDR_DEST_ID_MASK		0x00ffff0
+ #define  MSI_ADDR_DEST_ID(dest)		(((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
+ 					 MSI_ADDR_DEST_ID_MASK)
++#define MSI_ADDR_EXT_DEST_ID(dest)	((dest) & 0xffffff00)
+ 
+ #define MSI_ADDR_IR_EXT_INT		(1 << 4)
+ #define MSI_ADDR_IR_SHV			(1 << 3)
+Index: linux-2.6-tip/arch/x86/include/asm/msr-index.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/msr-index.h
++++ linux-2.6-tip/arch/x86/include/asm/msr-index.h
+@@ -77,6 +77,11 @@
+ #define MSR_IA32_MC0_ADDR		0x00000402
+ #define MSR_IA32_MC0_MISC		0x00000403
+ 
++/* These are consecutive and not in the normal 4er MCE bank block */
++#define MSR_IA32_MC0_CTL2		0x00000280
++#define CMCI_EN			(1ULL << 30)
++#define CMCI_THRESHOLD_MASK		0xffffULL
++
+ #define MSR_P6_PERFCTR0			0x000000c1
+ #define MSR_P6_PERFCTR1			0x000000c2
+ #define MSR_P6_EVNTSEL0			0x00000186
+Index: linux-2.6-tip/arch/x86/include/asm/numa_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/numa_32.h
++++ linux-2.6-tip/arch/x86/include/asm/numa_32.h
+@@ -4,8 +4,12 @@
+ extern int pxm_to_nid(int pxm);
+ extern void numa_remove_cpu(int cpu);
+ 
+-#ifdef CONFIG_NUMA
++#ifdef CONFIG_HIGHMEM
+ extern void set_highmem_pages_init(void);
++#else
++static inline void set_highmem_pages_init(void)
++{
++}
+ #endif
+ 
+ #endif /* _ASM_X86_NUMA_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/numaq.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/numaq.h
++++ linux-2.6-tip/arch/x86/include/asm/numaq.h
+@@ -31,6 +31,8 @@
+ extern int found_numaq;
+ extern int get_memcfg_numaq(void);
+ 
++extern void *xquad_portio;
++
+ /*
+  * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the
+  */
+Index: linux-2.6-tip/arch/x86/include/asm/numaq/apic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/numaq/apic.h
++++ /dev/null
+@@ -1,142 +0,0 @@
+-#ifndef __ASM_NUMAQ_APIC_H
+-#define __ASM_NUMAQ_APIC_H
+-
+-#include <asm/io.h>
+-#include <linux/mmzone.h>
+-#include <linux/nodemask.h>
+-
+-#define APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
+-
+-static inline const cpumask_t *target_cpus(void)
+-{
+-	return &CPU_MASK_ALL;
+-}
+-
+-#define NO_BALANCE_IRQ (1)
+-#define esr_disable (1)
+-
+-#define INT_DELIVERY_MODE dest_LowestPrio
+-#define INT_DEST_MODE 0     /* physical delivery on LOCAL quad */
+- 
+-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+-{
+-	return physid_isset(apicid, bitmap);
+-}
+-static inline unsigned long check_apicid_present(int bit)
+-{
+-	return physid_isset(bit, phys_cpu_present_map);
+-}
+-#define apicid_cluster(apicid) (apicid & 0xF0)
+-
+-static inline int apic_id_registered(void)
+-{
+-	return 1;
+-}
+-
+-static inline void init_apic_ldr(void)
+-{
+-	/* Already done in NUMA-Q firmware */
+-}
+-
+-static inline void setup_apic_routing(void)
+-{
+-	printk("Enabling APIC mode:  %s.  Using %d I/O APICs\n",
+-		"NUMA-Q", nr_ioapics);
+-}
+-
+-/*
+- * Skip adding the timer int on secondary nodes, which causes
+- * a small but painful rift in the time-space continuum.
+- */
+-static inline int multi_timer_check(int apic, int irq)
+-{
+-	return apic != 0 && irq == 0;
+-}
+-
+-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map)
+-{
+-	/* We don't have a good way to do this yet - hack */
+-	return physids_promote(0xFUL);
+-}
+-
+-/* Mapping from cpu number to logical apicid */
+-extern u8 cpu_2_logical_apicid[];
+-static inline int cpu_to_logical_apicid(int cpu)
+-{
+-	if (cpu >= nr_cpu_ids)
+-		return BAD_APICID;
+-	return (int)cpu_2_logical_apicid[cpu];
+-}
+-
+-/*
+- * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
+- * cpu to APIC ID relation to properly interact with the intelligent
+- * mode of the cluster controller.
+- */
+-static inline int cpu_present_to_apicid(int mps_cpu)
+-{
+-	if (mps_cpu < 60)
+-		return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
+-	else
+-		return BAD_APICID;
+-}
+-
+-static inline int apicid_to_node(int logical_apicid) 
+-{
+-	return logical_apicid >> 4;
+-}
+-
+-static inline physid_mask_t apicid_to_cpu_present(int logical_apicid)
+-{
+-	int node = apicid_to_node(logical_apicid);
+-	int cpu = __ffs(logical_apicid & 0xf);
+-
+-	return physid_mask_of_physid(cpu + 4*node);
+-}
+-
+-extern void *xquad_portio;
+-
+-static inline void setup_portio_remap(void)
+-{
+-	int num_quads = num_online_nodes();
+-
+-	if (num_quads <= 1)
+-       		return;
+-
+-	printk("Remapping cross-quad port I/O for %d quads\n", num_quads);
+-	xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
+-	printk("xquad_portio vaddr 0x%08lx, len %08lx\n",
+-		(u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
+-}
+-
+-static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
+-{
+-	return (1);
+-}
+-
+-static inline void enable_apic_mode(void)
+-{
+-}
+-
+-/*
+- * We use physical apicids here, not logical, so just return the default
+- * physical broadcast to stop people from breaking us
+- */
+-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+-{
+-	return (int) 0xF;
+-}
+-
+-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+-						  const struct cpumask *andmask)
+-{
+-	return (int) 0xF;
+-}
+-
+-/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
+-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+-{
+-	return cpuid_apic >> index_msb;
+-}
+-
+-#endif /* __ASM_NUMAQ_APIC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/numaq/apicdef.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/numaq/apicdef.h
++++ /dev/null
+@@ -1,14 +0,0 @@
+-#ifndef __ASM_NUMAQ_APICDEF_H
+-#define __ASM_NUMAQ_APICDEF_H
+-
+-
+-#define APIC_ID_MASK (0xF<<24)
+-
+-static inline unsigned get_apic_id(unsigned long x)
+-{
+-	        return (((x)>>24)&0x0F);
+-}
+-
+-#define         GET_APIC_ID(x)  get_apic_id(x)
+-
+-#endif
+Index: linux-2.6-tip/arch/x86/include/asm/numaq/ipi.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/numaq/ipi.h
++++ /dev/null
+@@ -1,22 +0,0 @@
+-#ifndef __ASM_NUMAQ_IPI_H
+-#define __ASM_NUMAQ_IPI_H
+-
+-void send_IPI_mask_sequence(const struct cpumask *mask, int vector);
+-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector);
+-
+-static inline void send_IPI_mask(const struct cpumask *mask, int vector)
+-{
+-	send_IPI_mask_sequence(mask, vector);
+-}
+-
+-static inline void send_IPI_allbutself(int vector)
+-{
+-	send_IPI_mask_allbutself(cpu_online_mask, vector);
+-}
+-
+-static inline void send_IPI_all(int vector)
+-{
+-	send_IPI_mask(cpu_online_mask, vector);
+-}
+-
+-#endif /* __ASM_NUMAQ_IPI_H */
+Index: linux-2.6-tip/arch/x86/include/asm/numaq/mpparse.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/numaq/mpparse.h
++++ /dev/null
+@@ -1,6 +0,0 @@
+-#ifndef __ASM_NUMAQ_MPPARSE_H
+-#define __ASM_NUMAQ_MPPARSE_H
+-
+-extern void numaq_mps_oem_check(struct mpc_table *, char *, char *);
+-
+-#endif /* __ASM_NUMAQ_MPPARSE_H */
+Index: linux-2.6-tip/arch/x86/include/asm/numaq/wakecpu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/numaq/wakecpu.h
++++ /dev/null
+@@ -1,45 +0,0 @@
+-#ifndef __ASM_NUMAQ_WAKECPU_H
+-#define __ASM_NUMAQ_WAKECPU_H
+-
+-/* This file copes with machines that wakeup secondary CPUs by NMIs */
+-
+-#define TRAMPOLINE_PHYS_LOW (0x8)
+-#define TRAMPOLINE_PHYS_HIGH (0xa)
+-
+-/* We don't do anything here because we use NMI's to boot instead */
+-static inline void wait_for_init_deassert(atomic_t *deassert)
+-{
+-}
+-
+-/*
+- * Because we use NMIs rather than the INIT-STARTUP sequence to
+- * bootstrap the CPUs, the APIC may be in a weird state. Kick it.
+- */
+-static inline void smp_callin_clear_local_apic(void)
+-{
+-	clear_local_APIC();
+-}
+-
+-static inline void store_NMI_vector(unsigned short *high, unsigned short *low)
+-{
+-	printk("Storing NMI vector\n");
+-	*high =
+-	  *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH));
+-	*low =
+-	  *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW));
+-}
+-
+-static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
+-{
+-	printk("Restoring NMI vector\n");
+-	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
+-								 *high;
+-	*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
+-								 *low;
+-}
+-
+-static inline void inquire_remote_apic(int apicid)
+-{
+-}
+-
+-#endif /* __ASM_NUMAQ_WAKECPU_H */
+Index: linux-2.6-tip/arch/x86/include/asm/page.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/page.h
++++ linux-2.6-tip/arch/x86/include/asm/page.h
+@@ -1,42 +1,11 @@
+ #ifndef _ASM_X86_PAGE_H
+ #define _ASM_X86_PAGE_H
+ 
+-#include <linux/const.h>
+-
+-/* PAGE_SHIFT determines the page size */
+-#define PAGE_SHIFT	12
+-#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
+-#define PAGE_MASK	(~(PAGE_SIZE-1))
++#include <linux/types.h>
+ 
+ #ifdef __KERNEL__
+ 
+-#define __PHYSICAL_MASK		((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
+-#define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
+-
+-/* Cast PAGE_MASK to a signed type so that it is sign-extended if
+-   virtual addresses are 32-bits but physical addresses are larger
+-   (ie, 32-bit PAE). */
+-#define PHYSICAL_PAGE_MASK	(((signed long)PAGE_MASK) & __PHYSICAL_MASK)
+-
+-/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
+-#define PTE_PFN_MASK		((pteval_t)PHYSICAL_PAGE_MASK)
+-
+-/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
+-#define PTE_FLAGS_MASK		(~PTE_PFN_MASK)
+-
+-#define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
+-#define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
+-
+-#define HPAGE_SHIFT		PMD_SHIFT
+-#define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
+-#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
+-#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
+-
+-#define HUGE_MAX_HSTATE 2
+-
+-#ifndef __ASSEMBLY__
+-#include <linux/types.h>
+-#endif
++#include <asm/page_types.h>
+ 
+ #ifdef CONFIG_X86_64
+ #include <asm/page_64.h>
+@@ -44,38 +13,18 @@
+ #include <asm/page_32.h>
+ #endif	/* CONFIG_X86_64 */
+ 
+-#define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
+-
+-#define VM_DATA_DEFAULT_FLAGS \
+-	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
+-	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+-
+-
+ #ifndef __ASSEMBLY__
+ 
+-typedef struct { pgdval_t pgd; } pgd_t;
+-typedef struct { pgprotval_t pgprot; } pgprot_t;
+-
+-extern int page_is_ram(unsigned long pagenr);
+-extern int devmem_is_allowed(unsigned long pagenr);
+-extern void map_devmem(unsigned long pfn, unsigned long size,
+-		       pgprot_t vma_prot);
+-extern void unmap_devmem(unsigned long pfn, unsigned long size,
+-			 pgprot_t vma_prot);
+-
+-extern unsigned long max_low_pfn_mapped;
+-extern unsigned long max_pfn_mapped;
+-
+ struct page;
+ 
+ static inline void clear_user_page(void *page, unsigned long vaddr,
+-				struct page *pg)
++				   struct page *pg)
+ {
+ 	clear_page(page);
+ }
+ 
+ static inline void copy_user_page(void *to, void *from, unsigned long vaddr,
+-				struct page *topage)
++				  struct page *topage)
+ {
+ 	copy_page(to, from);
+ }
+@@ -84,99 +33,6 @@ static inline void copy_user_page(void *
+ 	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+ #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
+ 
+-static inline pgd_t native_make_pgd(pgdval_t val)
+-{
+-	return (pgd_t) { val };
+-}
+-
+-static inline pgdval_t native_pgd_val(pgd_t pgd)
+-{
+-	return pgd.pgd;
+-}
+-
+-#if PAGETABLE_LEVELS >= 3
+-#if PAGETABLE_LEVELS == 4
+-typedef struct { pudval_t pud; } pud_t;
+-
+-static inline pud_t native_make_pud(pmdval_t val)
+-{
+-	return (pud_t) { val };
+-}
+-
+-static inline pudval_t native_pud_val(pud_t pud)
+-{
+-	return pud.pud;
+-}
+-#else	/* PAGETABLE_LEVELS == 3 */
+-#include <asm-generic/pgtable-nopud.h>
+-
+-static inline pudval_t native_pud_val(pud_t pud)
+-{
+-	return native_pgd_val(pud.pgd);
+-}
+-#endif	/* PAGETABLE_LEVELS == 4 */
+-
+-typedef struct { pmdval_t pmd; } pmd_t;
+-
+-static inline pmd_t native_make_pmd(pmdval_t val)
+-{
+-	return (pmd_t) { val };
+-}
+-
+-static inline pmdval_t native_pmd_val(pmd_t pmd)
+-{
+-	return pmd.pmd;
+-}
+-#else  /* PAGETABLE_LEVELS == 2 */
+-#include <asm-generic/pgtable-nopmd.h>
+-
+-static inline pmdval_t native_pmd_val(pmd_t pmd)
+-{
+-	return native_pgd_val(pmd.pud.pgd);
+-}
+-#endif	/* PAGETABLE_LEVELS >= 3 */
+-
+-static inline pte_t native_make_pte(pteval_t val)
+-{
+-	return (pte_t) { .pte = val };
+-}
+-
+-static inline pteval_t native_pte_val(pte_t pte)
+-{
+-	return pte.pte;
+-}
+-
+-static inline pteval_t native_pte_flags(pte_t pte)
+-{
+-	return native_pte_val(pte) & PTE_FLAGS_MASK;
+-}
+-
+-#define pgprot_val(x)	((x).pgprot)
+-#define __pgprot(x)	((pgprot_t) { (x) } )
+-
+-#ifdef CONFIG_PARAVIRT
+-#include <asm/paravirt.h>
+-#else  /* !CONFIG_PARAVIRT */
+-
+-#define pgd_val(x)	native_pgd_val(x)
+-#define __pgd(x)	native_make_pgd(x)
+-
+-#ifndef __PAGETABLE_PUD_FOLDED
+-#define pud_val(x)	native_pud_val(x)
+-#define __pud(x)	native_make_pud(x)
+-#endif
+-
+-#ifndef __PAGETABLE_PMD_FOLDED
+-#define pmd_val(x)	native_pmd_val(x)
+-#define __pmd(x)	native_make_pmd(x)
+-#endif
+-
+-#define pte_val(x)	native_pte_val(x)
+-#define pte_flags(x)	native_pte_flags(x)
+-#define __pte(x)	native_make_pte(x)
+-
+-#endif	/* CONFIG_PARAVIRT */
+-
+ #define __pa(x)		__phys_addr((unsigned long)(x))
+ #define __pa_nodebug(x)	__phys_addr_nodebug((unsigned long)(x))
+ /* __pa_symbol should be used for C visible symbols.
+Index: linux-2.6-tip/arch/x86/include/asm/page_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/page_32.h
++++ linux-2.6-tip/arch/x86/include/asm/page_32.h
+@@ -1,82 +1,14 @@
+ #ifndef _ASM_X86_PAGE_32_H
+ #define _ASM_X86_PAGE_32_H
+ 
+-/*
+- * This handles the memory map.
+- *
+- * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
+- * a virtual address space of one gigabyte, which limits the
+- * amount of physical memory you can use to about 950MB.
+- *
+- * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
+- * and CONFIG_HIGHMEM64G options in the kernel configuration.
+- */
+-#define __PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
+-
+-#ifdef CONFIG_4KSTACKS
+-#define THREAD_ORDER	0
+-#else
+-#define THREAD_ORDER	1
+-#endif
+-#define THREAD_SIZE 	(PAGE_SIZE << THREAD_ORDER)
+-
+-#define STACKFAULT_STACK 0
+-#define DOUBLEFAULT_STACK 1
+-#define NMI_STACK 0
+-#define DEBUG_STACK 0
+-#define MCE_STACK 0
+-#define N_EXCEPTION_STACKS 1
+-
+-#ifdef CONFIG_X86_PAE
+-/* 44=32+12, the limit we can fit into an unsigned long pfn */
+-#define __PHYSICAL_MASK_SHIFT	44
+-#define __VIRTUAL_MASK_SHIFT	32
+-#define PAGETABLE_LEVELS	3
+-
+-#ifndef __ASSEMBLY__
+-typedef u64	pteval_t;
+-typedef u64	pmdval_t;
+-typedef u64	pudval_t;
+-typedef u64	pgdval_t;
+-typedef u64	pgprotval_t;
+-
+-typedef union {
+-	struct {
+-		unsigned long pte_low, pte_high;
+-	};
+-	pteval_t pte;
+-} pte_t;
+-#endif	/* __ASSEMBLY__
+- */
+-#else  /* !CONFIG_X86_PAE */
+-#define __PHYSICAL_MASK_SHIFT	32
+-#define __VIRTUAL_MASK_SHIFT	32
+-#define PAGETABLE_LEVELS	2
+-
+-#ifndef __ASSEMBLY__
+-typedef unsigned long	pteval_t;
+-typedef unsigned long	pmdval_t;
+-typedef unsigned long	pudval_t;
+-typedef unsigned long	pgdval_t;
+-typedef unsigned long	pgprotval_t;
+-
+-typedef union {
+-	pteval_t pte;
+-	pteval_t pte_low;
+-} pte_t;
+-
+-#endif	/* __ASSEMBLY__ */
+-#endif	/* CONFIG_X86_PAE */
++#include <asm/page_32_types.h>
+ 
+ #ifndef __ASSEMBLY__
+-typedef struct page *pgtable_t;
+-#endif
+ 
+ #ifdef CONFIG_HUGETLB_PAGE
+ #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+ #endif
+ 
+-#ifndef __ASSEMBLY__
+ #define __phys_addr_nodebug(x)	((x) - PAGE_OFFSET)
+ #ifdef CONFIG_DEBUG_VIRTUAL
+ extern unsigned long __phys_addr(unsigned long);
+@@ -89,23 +21,6 @@ extern unsigned long __phys_addr(unsigne
+ #define pfn_valid(pfn)		((pfn) < max_mapnr)
+ #endif /* CONFIG_FLATMEM */
+ 
+-extern int nx_enabled;
+-
+-/*
+- * This much address space is reserved for vmalloc() and iomap()
+- * as well as fixmap mappings.
+- */
+-extern unsigned int __VMALLOC_RESERVE;
+-extern int sysctl_legacy_va_layout;
+-
+-extern void find_low_pfn_range(void);
+-extern unsigned long init_memory_mapping(unsigned long start,
+-					 unsigned long end);
+-extern void initmem_init(unsigned long, unsigned long);
+-extern void free_initmem(void);
+-extern void setup_bootmem_allocator(void);
+-
+-
+ #ifdef CONFIG_X86_USE_3DNOW
+ #include <asm/mmx.h>
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/page_32_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/page_32_types.h
+@@ -0,0 +1,65 @@
++#ifndef _ASM_X86_PAGE_32_DEFS_H
++#define _ASM_X86_PAGE_32_DEFS_H
++
++#include <linux/const.h>
++
++/*
++ * This handles the memory map.
++ *
++ * A __PAGE_OFFSET of 0xC0000000 means that the kernel has
++ * a virtual address space of one gigabyte, which limits the
++ * amount of physical memory you can use to about 950MB.
++ *
++ * If you want more physical memory than this then see the CONFIG_HIGHMEM4G
++ * and CONFIG_HIGHMEM64G options in the kernel configuration.
++ */
++#define __PAGE_OFFSET		_AC(CONFIG_PAGE_OFFSET, UL)
++
++#ifdef CONFIG_4KSTACKS
++#define THREAD_ORDER	0
++#else
++#define THREAD_ORDER	1
++#endif
++#define THREAD_SIZE 	(PAGE_SIZE << THREAD_ORDER)
++
++#define STACKFAULT_STACK 0
++#define DOUBLEFAULT_STACK 1
++#define NMI_STACK 0
++#define DEBUG_STACK 0
++#define MCE_STACK 0
++#define N_EXCEPTION_STACKS 1
++
++#ifdef CONFIG_X86_PAE
++/* 44=32+12, the limit we can fit into an unsigned long pfn */
++#define __PHYSICAL_MASK_SHIFT	44
++#define __VIRTUAL_MASK_SHIFT	32
++
++#else  /* !CONFIG_X86_PAE */
++#define __PHYSICAL_MASK_SHIFT	32
++#define __VIRTUAL_MASK_SHIFT	32
++#endif	/* CONFIG_X86_PAE */
++
++/*
++ * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
++ */
++#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
++
++#ifndef __ASSEMBLY__
++
++/*
++ * This much address space is reserved for vmalloc() and iomap()
++ * as well as fixmap mappings.
++ */
++extern unsigned int __VMALLOC_RESERVE;
++extern int sysctl_legacy_va_layout;
++
++extern void find_low_pfn_range(void);
++extern unsigned long init_memory_mapping(unsigned long start,
++					 unsigned long end);
++extern void initmem_init(unsigned long, unsigned long);
++extern void free_initmem(void);
++extern void setup_bootmem_allocator(void);
++
++#endif	/* !__ASSEMBLY__ */
++
++#endif /* _ASM_X86_PAGE_32_DEFS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/page_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/page_64.h
++++ linux-2.6-tip/arch/x86/include/asm/page_64.h
+@@ -1,105 +1,6 @@
+ #ifndef _ASM_X86_PAGE_64_H
+ #define _ASM_X86_PAGE_64_H
+ 
+-#define PAGETABLE_LEVELS	4
+-
+-#define THREAD_ORDER	1
+-#define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
+-#define CURRENT_MASK (~(THREAD_SIZE - 1))
+-
+-#define EXCEPTION_STACK_ORDER 0
+-#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
+-
+-#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
+-#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
+-
+-#define IRQSTACK_ORDER 2
+-#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
+-
+-#define STACKFAULT_STACK 1
+-#define DOUBLEFAULT_STACK 2
+-#define NMI_STACK 3
+-#define DEBUG_STACK 4
+-#define MCE_STACK 5
+-#define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
+-
+-#define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
+-#define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
+-
+-/*
+- * Set __PAGE_OFFSET to the most negative possible address +
+- * PGDIR_SIZE*16 (pgd slot 272).  The gap is to allow a space for a
+- * hypervisor to fit.  Choosing 16 slots here is arbitrary, but it's
+- * what Xen requires.
+- */
+-#define __PAGE_OFFSET           _AC(0xffff880000000000, UL)
+-
+-#define __PHYSICAL_START	CONFIG_PHYSICAL_START
+-#define __KERNEL_ALIGN		0x200000
+-
+-/*
+- * Make sure kernel is aligned to 2MB address. Catching it at compile
+- * time is better. Change your config file and compile the kernel
+- * for a 2MB aligned address (CONFIG_PHYSICAL_START)
+- */
+-#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
+-#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
+-#endif
+-
+-#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
+-#define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
+-
+-/* See Documentation/x86_64/mm.txt for a description of the memory map. */
+-#define __PHYSICAL_MASK_SHIFT	46
+-#define __VIRTUAL_MASK_SHIFT	48
+-
+-/*
+- * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
+- * arch/x86/kernel/head_64.S), and it is mapped here:
+- */
+-#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
+-#define KERNEL_IMAGE_START	_AC(0xffffffff80000000, UL)
+-
+-#ifndef __ASSEMBLY__
+-void clear_page(void *page);
+-void copy_page(void *to, void *from);
+-
+-/* duplicated to the one in bootmem.h */
+-extern unsigned long max_pfn;
+-extern unsigned long phys_base;
+-
+-extern unsigned long __phys_addr(unsigned long);
+-#define __phys_reloc_hide(x)	(x)
+-
+-/*
+- * These are used to make use of C type-checking..
+- */
+-typedef unsigned long	pteval_t;
+-typedef unsigned long	pmdval_t;
+-typedef unsigned long	pudval_t;
+-typedef unsigned long	pgdval_t;
+-typedef unsigned long	pgprotval_t;
+-
+-typedef struct page *pgtable_t;
+-
+-typedef struct { pteval_t pte; } pte_t;
+-
+-#define vmemmap ((struct page *)VMEMMAP_START)
+-
+-extern unsigned long init_memory_mapping(unsigned long start,
+-					 unsigned long end);
+-
+-extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+-extern void free_initmem(void);
+-
+-extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
+-extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
+-
+-#endif	/* !__ASSEMBLY__ */
+-
+-#ifdef CONFIG_FLATMEM
+-#define pfn_valid(pfn)          ((pfn) < max_pfn)
+-#endif
+-
++#include <asm/page_64_types.h>
+ 
+ #endif /* _ASM_X86_PAGE_64_H */
+Index: linux-2.6-tip/arch/x86/include/asm/page_64_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/page_64_types.h
+@@ -0,0 +1,98 @@
++#ifndef _ASM_X86_PAGE_64_DEFS_H
++#define _ASM_X86_PAGE_64_DEFS_H
++
++#define THREAD_ORDER	1
++#define THREAD_SIZE  (PAGE_SIZE << THREAD_ORDER)
++#define CURRENT_MASK (~(THREAD_SIZE - 1))
++
++#define EXCEPTION_STACK_ORDER 0
++#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
++
++#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
++#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
++
++#define IRQ_STACK_ORDER 2
++#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
++
++#ifdef CONFIG_PREEMPT_RT
++# define STACKFAULT_STACK 0
++# define DOUBLEFAULT_STACK 1
++# define NMI_STACK 2
++# define DEBUG_STACK 0
++# define MCE_STACK 3
++# define N_EXCEPTION_STACKS 3  /* hw limit: 7 */
++#else
++# define STACKFAULT_STACK 1
++# define DOUBLEFAULT_STACK 2
++# define NMI_STACK 3
++# define DEBUG_STACK 4
++# define MCE_STACK 5
++# define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
++#endif
++
++#define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
++#define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
++
++/*
++ * Set __PAGE_OFFSET to the most negative possible address +
++ * PGDIR_SIZE*16 (pgd slot 272).  The gap is to allow a space for a
++ * hypervisor to fit.  Choosing 16 slots here is arbitrary, but it's
++ * what Xen requires.
++ */
++#define __PAGE_OFFSET           _AC(0xffff880000000000, UL)
++
++#define __PHYSICAL_START	CONFIG_PHYSICAL_START
++#define __KERNEL_ALIGN		0x200000
++
++/*
++ * Make sure kernel is aligned to 2MB address. Catching it at compile
++ * time is better. Change your config file and compile the kernel
++ * for a 2MB aligned address (CONFIG_PHYSICAL_START)
++ */
++#if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0
++#error "CONFIG_PHYSICAL_START must be a multiple of 2MB"
++#endif
++
++#define __START_KERNEL		(__START_KERNEL_map + __PHYSICAL_START)
++#define __START_KERNEL_map	_AC(0xffffffff80000000, UL)
++
++/* See Documentation/x86_64/mm.txt for a description of the memory map. */
++#define __PHYSICAL_MASK_SHIFT	46
++#define __VIRTUAL_MASK_SHIFT	48
++
++/*
++ * Kernel image size is limited to 512 MB (see level2_kernel_pgt in
++ * arch/x86/kernel/head_64.S), and it is mapped here:
++ */
++#define KERNEL_IMAGE_SIZE	(512 * 1024 * 1024)
++#define KERNEL_IMAGE_START	_AC(0xffffffff80000000, UL)
++
++#ifndef __ASSEMBLY__
++void clear_page(void *page);
++void copy_page(void *to, void *from);
++
++/* duplicated to the one in bootmem.h */
++extern unsigned long max_pfn;
++extern unsigned long phys_base;
++
++extern unsigned long __phys_addr(unsigned long);
++#define __phys_reloc_hide(x)	(x)
++
++#define vmemmap ((struct page *)VMEMMAP_START)
++
++extern unsigned long init_memory_mapping(unsigned long start,
++					 unsigned long end);
++
++extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn);
++extern void free_initmem(void);
++
++extern void init_extra_mapping_uc(unsigned long phys, unsigned long size);
++extern void init_extra_mapping_wb(unsigned long phys, unsigned long size);
++
++#endif	/* !__ASSEMBLY__ */
++
++#ifdef CONFIG_FLATMEM
++#define pfn_valid(pfn)          ((pfn) < max_pfn)
++#endif
++
++#endif /* _ASM_X86_PAGE_64_DEFS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/page_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/page_types.h
+@@ -0,0 +1,59 @@
++#ifndef _ASM_X86_PAGE_DEFS_H
++#define _ASM_X86_PAGE_DEFS_H
++
++#include <linux/const.h>
++
++/* PAGE_SHIFT determines the page size */
++#define PAGE_SHIFT	12
++#define PAGE_SIZE	(_AC(1,UL) << PAGE_SHIFT)
++#define PAGE_MASK	(~(PAGE_SIZE-1))
++
++#define __PHYSICAL_MASK		((phys_addr_t)(1ULL << __PHYSICAL_MASK_SHIFT) - 1)
++#define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
++
++/* Cast PAGE_MASK to a signed type so that it is sign-extended if
++   virtual addresses are 32-bits but physical addresses are larger
++   (ie, 32-bit PAE). */
++#define PHYSICAL_PAGE_MASK	(((signed long)PAGE_MASK) & __PHYSICAL_MASK)
++
++#define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
++#define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
++
++#define HPAGE_SHIFT		PMD_SHIFT
++#define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
++#define HPAGE_MASK		(~(HPAGE_SIZE - 1))
++#define HUGETLB_PAGE_ORDER	(HPAGE_SHIFT - PAGE_SHIFT)
++
++#define HUGE_MAX_HSTATE 2
++
++#define PAGE_OFFSET		((unsigned long)__PAGE_OFFSET)
++
++#define VM_DATA_DEFAULT_FLAGS \
++	(((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0 ) | \
++	 VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
++
++#ifdef CONFIG_X86_64
++#include <asm/page_64_types.h>
++#else
++#include <asm/page_32_types.h>
++#endif	/* CONFIG_X86_64 */
++
++#ifndef __ASSEMBLY__
++
++enum bootmem_state {
++	BEFORE_BOOTMEM,
++	DURING_BOOTMEM,
++	AFTER_BOOTMEM
++};
++
++extern enum bootmem_state bootmem_state;
++
++extern int page_is_ram(unsigned long pagenr);
++extern int devmem_is_allowed(unsigned long pagenr);
++
++extern unsigned long max_low_pfn_mapped;
++extern unsigned long max_pfn_mapped;
++
++#endif	/* !__ASSEMBLY__ */
++
++#endif	/* _ASM_X86_PAGE_DEFS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/paravirt.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/paravirt.h
++++ linux-2.6-tip/arch/x86/include/asm/paravirt.h
+@@ -4,7 +4,7 @@
+  * para-virtualization: those hooks are defined here. */
+ 
+ #ifdef CONFIG_PARAVIRT
+-#include <asm/page.h>
++#include <asm/pgtable_types.h>
+ #include <asm/asm.h>
+ 
+ /* Bitmask of what can be clobbered: usually at least eax. */
+@@ -12,21 +12,38 @@
+ #define CLBR_EAX  (1 << 0)
+ #define CLBR_ECX  (1 << 1)
+ #define CLBR_EDX  (1 << 2)
++#define CLBR_EDI  (1 << 3)
+ 
+-#ifdef CONFIG_X86_64
+-#define CLBR_RSI  (1 << 3)
+-#define CLBR_RDI  (1 << 4)
++#ifdef CONFIG_X86_32
++/* CLBR_ANY should match all regs platform has. For i386, that's just it */
++#define CLBR_ANY  ((1 << 4) - 1)
++
++#define CLBR_ARG_REGS	(CLBR_EAX | CLBR_EDX | CLBR_ECX)
++#define CLBR_RET_REG	(CLBR_EAX | CLBR_EDX)
++#define CLBR_SCRATCH	(0)
++#else
++#define CLBR_RAX  CLBR_EAX
++#define CLBR_RCX  CLBR_ECX
++#define CLBR_RDX  CLBR_EDX
++#define CLBR_RDI  CLBR_EDI
++#define CLBR_RSI  (1 << 4)
+ #define CLBR_R8   (1 << 5)
+ #define CLBR_R9   (1 << 6)
+ #define CLBR_R10  (1 << 7)
+ #define CLBR_R11  (1 << 8)
++
+ #define CLBR_ANY  ((1 << 9) - 1)
++
++#define CLBR_ARG_REGS	(CLBR_RDI | CLBR_RSI | CLBR_RDX | \
++			 CLBR_RCX | CLBR_R8 | CLBR_R9)
++#define CLBR_RET_REG	(CLBR_RAX)
++#define CLBR_SCRATCH	(CLBR_R10 | CLBR_R11)
++
+ #include <asm/desc_defs.h>
+-#else
+-/* CLBR_ANY should match all regs platform has. For i386, that's just it */
+-#define CLBR_ANY  ((1 << 3) - 1)
+ #endif /* X86_64 */
+ 
++#define CLBR_CALLEE_SAVE ((CLBR_ARG_REGS | CLBR_SCRATCH) & ~CLBR_RET_REG)
++
+ #ifndef __ASSEMBLY__
+ #include <linux/types.h>
+ #include <linux/cpumask.h>
+@@ -40,6 +57,14 @@ struct tss_struct;
+ struct mm_struct;
+ struct desc_struct;
+ 
++/*
++ * Wrapper type for pointers to code which uses the non-standard
++ * calling convention.  See PV_CALL_SAVE_REGS_THUNK below.
++ */
++struct paravirt_callee_save {
++	void *func;
++};
++
+ /* general info */
+ struct pv_info {
+ 	unsigned int kernel_rpl;
+@@ -189,11 +214,15 @@ struct pv_irq_ops {
+ 	 * expected to use X86_EFLAGS_IF; all other bits
+ 	 * returned from save_fl are undefined, and may be ignored by
+ 	 * restore_fl.
++	 *
++	 * NOTE: These functions callers expect the callee to preserve
++	 * more registers than the standard C calling convention.
+ 	 */
+-	unsigned long (*save_fl)(void);
+-	void (*restore_fl)(unsigned long);
+-	void (*irq_disable)(void);
+-	void (*irq_enable)(void);
++	struct paravirt_callee_save save_fl;
++	struct paravirt_callee_save restore_fl;
++	struct paravirt_callee_save irq_disable;
++	struct paravirt_callee_save irq_enable;
++
+ 	void (*safe_halt)(void);
+ 	void (*halt)(void);
+ 
+@@ -244,7 +273,8 @@ struct pv_mmu_ops {
+ 	void (*flush_tlb_user)(void);
+ 	void (*flush_tlb_kernel)(void);
+ 	void (*flush_tlb_single)(unsigned long addr);
+-	void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm,
++	void (*flush_tlb_others)(const struct cpumask *cpus,
++				 struct mm_struct *mm,
+ 				 unsigned long va);
+ 
+ 	/* Hooks for allocating and freeing a pagetable top-level */
+@@ -278,18 +308,15 @@ struct pv_mmu_ops {
+ 	void (*ptep_modify_prot_commit)(struct mm_struct *mm, unsigned long addr,
+ 					pte_t *ptep, pte_t pte);
+ 
+-	pteval_t (*pte_val)(pte_t);
+-	pteval_t (*pte_flags)(pte_t);
+-	pte_t (*make_pte)(pteval_t pte);
++	struct paravirt_callee_save pte_val;
++	struct paravirt_callee_save make_pte;
+ 
+-	pgdval_t (*pgd_val)(pgd_t);
+-	pgd_t (*make_pgd)(pgdval_t pgd);
++	struct paravirt_callee_save pgd_val;
++	struct paravirt_callee_save make_pgd;
+ 
+ #if PAGETABLE_LEVELS >= 3
+ #ifdef CONFIG_X86_PAE
+ 	void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
+-	void (*set_pte_present)(struct mm_struct *mm, unsigned long addr,
+-				pte_t *ptep, pte_t pte);
+ 	void (*pte_clear)(struct mm_struct *mm, unsigned long addr,
+ 			  pte_t *ptep);
+ 	void (*pmd_clear)(pmd_t *pmdp);
+@@ -298,12 +325,12 @@ struct pv_mmu_ops {
+ 
+ 	void (*set_pud)(pud_t *pudp, pud_t pudval);
+ 
+-	pmdval_t (*pmd_val)(pmd_t);
+-	pmd_t (*make_pmd)(pmdval_t pmd);
++	struct paravirt_callee_save pmd_val;
++	struct paravirt_callee_save make_pmd;
+ 
+ #if PAGETABLE_LEVELS == 4
+-	pudval_t (*pud_val)(pud_t);
+-	pud_t (*make_pud)(pudval_t pud);
++	struct paravirt_callee_save pud_val;
++	struct paravirt_callee_save make_pud;
+ 
+ 	void (*set_pgd)(pgd_t *pudp, pgd_t pgdval);
+ #endif	/* PAGETABLE_LEVELS == 4 */
+@@ -311,6 +338,7 @@ struct pv_mmu_ops {
+ 
+ #ifdef CONFIG_HIGHPTE
+ 	void *(*kmap_atomic_pte)(struct page *page, enum km_type type);
++	void *(*kmap_atomic_pte_direct)(struct page *page, enum km_type type);
+ #endif
+ 
+ 	struct pv_lazy_ops lazy_mode;
+@@ -320,7 +348,7 @@ struct pv_mmu_ops {
+ 	/* Sometimes the physical address is a pfn, and sometimes its
+ 	   an mfn.  We can tell which is which from the index. */
+ 	void (*set_fixmap)(unsigned /* enum fixed_addresses */ idx,
+-			   unsigned long phys, pgprot_t flags);
++			   phys_addr_t phys, pgprot_t flags);
+ };
+ 
+ struct raw_spinlock;
+@@ -360,7 +388,7 @@ extern struct pv_lock_ops pv_lock_ops;
+ 
+ #define paravirt_type(op)				\
+ 	[paravirt_typenum] "i" (PARAVIRT_PATCH(op)),	\
+-	[paravirt_opptr] "m" (op)
++	[paravirt_opptr] "i" (&(op))
+ #define paravirt_clobber(clobber)		\
+ 	[paravirt_clobber] "i" (clobber)
+ 
+@@ -388,6 +416,8 @@ extern struct pv_lock_ops pv_lock_ops;
+ 	asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":")
+ 
+ unsigned paravirt_patch_nop(void);
++unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len);
++unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len);
+ unsigned paravirt_patch_ignore(unsigned len);
+ unsigned paravirt_patch_call(void *insnbuf,
+ 			     const void *target, u16 tgt_clobbers,
+@@ -412,7 +442,7 @@ int paravirt_disable_iospace(void);
+  * offset into the paravirt_patch_template structure, and can therefore be
+  * freely converted back into a structure offset.
+  */
+-#define PARAVIRT_CALL	"call *%[paravirt_opptr];"
++#define PARAVIRT_CALL	"call *%c[paravirt_opptr];"
+ 
+ /*
+  * These macros are intended to wrap calls through one of the paravirt
+@@ -479,25 +509,45 @@ int paravirt_disable_iospace(void);
+  * makes sure the incoming and outgoing types are always correct.
+  */
+ #ifdef CONFIG_X86_32
+-#define PVOP_VCALL_ARGS			unsigned long __eax, __edx, __ecx
++#define PVOP_VCALL_ARGS				\
++	unsigned long __eax = __eax, __edx = __edx, __ecx = __ecx
+ #define PVOP_CALL_ARGS			PVOP_VCALL_ARGS
++
++#define PVOP_CALL_ARG1(x)		"a" ((unsigned long)(x))
++#define PVOP_CALL_ARG2(x)		"d" ((unsigned long)(x))
++#define PVOP_CALL_ARG3(x)		"c" ((unsigned long)(x))
++
+ #define PVOP_VCALL_CLOBBERS		"=a" (__eax), "=d" (__edx),	\
+ 					"=c" (__ecx)
+ #define PVOP_CALL_CLOBBERS		PVOP_VCALL_CLOBBERS
++
++#define PVOP_VCALLEE_CLOBBERS		"=a" (__eax), "=d" (__edx)
++#define PVOP_CALLEE_CLOBBERS		PVOP_VCALLEE_CLOBBERS
++
+ #define EXTRA_CLOBBERS
+ #define VEXTRA_CLOBBERS
+-#else
+-#define PVOP_VCALL_ARGS		unsigned long __edi, __esi, __edx, __ecx
++#else  /* CONFIG_X86_64 */
++#define PVOP_VCALL_ARGS					\
++	unsigned long __edi = __edi, __esi = __esi,	\
++		__edx = __edx, __ecx = __ecx
+ #define PVOP_CALL_ARGS		PVOP_VCALL_ARGS, __eax
++
++#define PVOP_CALL_ARG1(x)		"D" ((unsigned long)(x))
++#define PVOP_CALL_ARG2(x)		"S" ((unsigned long)(x))
++#define PVOP_CALL_ARG3(x)		"d" ((unsigned long)(x))
++#define PVOP_CALL_ARG4(x)		"c" ((unsigned long)(x))
++
+ #define PVOP_VCALL_CLOBBERS	"=D" (__edi),				\
+ 				"=S" (__esi), "=d" (__edx),		\
+ 				"=c" (__ecx)
+-
+ #define PVOP_CALL_CLOBBERS	PVOP_VCALL_CLOBBERS, "=a" (__eax)
+ 
++#define PVOP_VCALLEE_CLOBBERS	"=a" (__eax)
++#define PVOP_CALLEE_CLOBBERS	PVOP_VCALLEE_CLOBBERS
++
+ #define EXTRA_CLOBBERS	 , "r8", "r9", "r10", "r11"
+ #define VEXTRA_CLOBBERS	 , "rax", "r8", "r9", "r10", "r11"
+-#endif
++#endif	/* CONFIG_X86_32 */
+ 
+ #ifdef CONFIG_PARAVIRT_DEBUG
+ #define PVOP_TEST_NULL(op)	BUG_ON(op == NULL)
+@@ -505,10 +555,11 @@ int paravirt_disable_iospace(void);
+ #define PVOP_TEST_NULL(op)	((void)op)
+ #endif
+ 
+-#define __PVOP_CALL(rettype, op, pre, post, ...)			\
++#define ____PVOP_CALL(rettype, op, clbr, call_clbr, extra_clbr,		\
++		      pre, post, ...)					\
+ 	({								\
+ 		rettype __ret;						\
+-		PVOP_CALL_ARGS;					\
++		PVOP_CALL_ARGS;						\
+ 		PVOP_TEST_NULL(op);					\
+ 		/* This is 32-bit specific, but is okay in 64-bit */	\
+ 		/* since this condition will never hold */		\
+@@ -516,70 +567,113 @@ int paravirt_disable_iospace(void);
+ 			asm volatile(pre				\
+ 				     paravirt_alt(PARAVIRT_CALL)	\
+ 				     post				\
+-				     : PVOP_CALL_CLOBBERS		\
++				     : call_clbr			\
+ 				     : paravirt_type(op),		\
+-				       paravirt_clobber(CLBR_ANY),	\
++				       paravirt_clobber(clbr),		\
+ 				       ##__VA_ARGS__			\
+-				     : "memory", "cc" EXTRA_CLOBBERS);	\
++				     : "memory", "cc" extra_clbr);	\
+ 			__ret = (rettype)((((u64)__edx) << 32) | __eax); \
+ 		} else {						\
+ 			asm volatile(pre				\
+ 				     paravirt_alt(PARAVIRT_CALL)	\
+ 				     post				\
+-				     : PVOP_CALL_CLOBBERS		\
++				     : call_clbr			\
+ 				     : paravirt_type(op),		\
+-				       paravirt_clobber(CLBR_ANY),	\
++				       paravirt_clobber(clbr),		\
+ 				       ##__VA_ARGS__			\
+-				     : "memory", "cc" EXTRA_CLOBBERS);	\
++				     : "memory", "cc" extra_clbr);	\
+ 			__ret = (rettype)__eax;				\
+ 		}							\
+ 		__ret;							\
+ 	})
+-#define __PVOP_VCALL(op, pre, post, ...)				\
++
++#define __PVOP_CALL(rettype, op, pre, post, ...)			\
++	____PVOP_CALL(rettype, op, CLBR_ANY, PVOP_CALL_CLOBBERS,	\
++		      EXTRA_CLOBBERS, pre, post, ##__VA_ARGS__)
++
++#define __PVOP_CALLEESAVE(rettype, op, pre, post, ...)			\
++	____PVOP_CALL(rettype, op.func, CLBR_RET_REG,			\
++		      PVOP_CALLEE_CLOBBERS, ,				\
++		      pre, post, ##__VA_ARGS__)
++
++
++#define ____PVOP_VCALL(op, clbr, call_clbr, extra_clbr, pre, post, ...)	\
+ 	({								\
+ 		PVOP_VCALL_ARGS;					\
+ 		PVOP_TEST_NULL(op);					\
+ 		asm volatile(pre					\
+ 			     paravirt_alt(PARAVIRT_CALL)		\
+ 			     post					\
+-			     : PVOP_VCALL_CLOBBERS			\
++			     : call_clbr				\
+ 			     : paravirt_type(op),			\
+-			       paravirt_clobber(CLBR_ANY),		\
++			       paravirt_clobber(clbr),			\
+ 			       ##__VA_ARGS__				\
+-			     : "memory", "cc" VEXTRA_CLOBBERS);		\
++			     : "memory", "cc" extra_clbr);		\
+ 	})
+ 
++#define __PVOP_VCALL(op, pre, post, ...)				\
++	____PVOP_VCALL(op, CLBR_ANY, PVOP_VCALL_CLOBBERS,		\
++		       VEXTRA_CLOBBERS,					\
++		       pre, post, ##__VA_ARGS__)
++
++#define __PVOP_VCALLEESAVE(rettype, op, pre, post, ...)			\
++	____PVOP_CALL(rettype, op.func, CLBR_RET_REG,			\
++		      PVOP_VCALLEE_CLOBBERS, ,				\
++		      pre, post, ##__VA_ARGS__)
++
++
++
+ #define PVOP_CALL0(rettype, op)						\
+ 	__PVOP_CALL(rettype, op, "", "")
+ #define PVOP_VCALL0(op)							\
+ 	__PVOP_VCALL(op, "", "")
+ 
++#define PVOP_CALLEE0(rettype, op)					\
++	__PVOP_CALLEESAVE(rettype, op, "", "")
++#define PVOP_VCALLEE0(op)						\
++	__PVOP_VCALLEESAVE(op, "", "")
++
++
+ #define PVOP_CALL1(rettype, op, arg1)					\
+-	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)))
++	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
+ #define PVOP_VCALL1(op, arg1)						\
+-	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)))
++	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1))
++
++#define PVOP_CALLEE1(rettype, op, arg1)					\
++	__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1))
++#define PVOP_VCALLEE1(op, arg1)						\
++	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1))
++
+ 
+ #define PVOP_CALL2(rettype, op, arg1, arg2)				\
+-	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)), 	\
+-	"1" ((unsigned long)(arg2)))
++	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),		\
++		    PVOP_CALL_ARG2(arg2))
+ #define PVOP_VCALL2(op, arg1, arg2)					\
+-	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)), 		\
+-	"1" ((unsigned long)(arg2)))
++	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),			\
++		     PVOP_CALL_ARG2(arg2))
++
++#define PVOP_CALLEE2(rettype, op, arg1, arg2)				\
++	__PVOP_CALLEESAVE(rettype, op, "", "", PVOP_CALL_ARG1(arg1),	\
++			  PVOP_CALL_ARG2(arg2))
++#define PVOP_VCALLEE2(op, arg1, arg2)					\
++	__PVOP_VCALLEESAVE(op, "", "", PVOP_CALL_ARG1(arg1),		\
++			   PVOP_CALL_ARG2(arg2))
++
+ 
+ #define PVOP_CALL3(rettype, op, arg1, arg2, arg3)			\
+-	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),	\
+-	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
++	__PVOP_CALL(rettype, op, "", "", PVOP_CALL_ARG1(arg1),		\
++		    PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
+ #define PVOP_VCALL3(op, arg1, arg2, arg3)				\
+-	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),		\
+-	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)))
++	__PVOP_VCALL(op, "", "", PVOP_CALL_ARG1(arg1),			\
++		     PVOP_CALL_ARG2(arg2), PVOP_CALL_ARG3(arg3))
+ 
+ /* This is the only difference in x86_64. We can make it much simpler */
+ #ifdef CONFIG_X86_32
+ #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
+ 	__PVOP_CALL(rettype, op,					\
+ 		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
+-		    "0" ((u32)(arg1)), "1" ((u32)(arg2)),		\
+-		    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
++		    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),		\
++		    PVOP_CALL_ARG3(arg3), [_arg4] "mr" ((u32)(arg4)))
+ #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
+ 	__PVOP_VCALL(op,						\
+ 		    "push %[_arg4];", "lea 4(%%esp),%%esp;",		\
+@@ -587,13 +681,13 @@ int paravirt_disable_iospace(void);
+ 		    "2" ((u32)(arg3)), [_arg4] "mr" ((u32)(arg4)))
+ #else
+ #define PVOP_CALL4(rettype, op, arg1, arg2, arg3, arg4)			\
+-	__PVOP_CALL(rettype, op, "", "", "0" ((unsigned long)(arg1)),	\
+-	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)),		\
+-	"3"((unsigned long)(arg4)))
++	__PVOP_CALL(rettype, op, "", "",				\
++		    PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),		\
++		    PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
+ #define PVOP_VCALL4(op, arg1, arg2, arg3, arg4)				\
+-	__PVOP_VCALL(op, "", "", "0" ((unsigned long)(arg1)),		\
+-	"1"((unsigned long)(arg2)), "2"((unsigned long)(arg3)),		\
+-	"3"((unsigned long)(arg4)))
++	__PVOP_VCALL(op, "", "",					\
++		     PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),	\
++		     PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
+ #endif
+ 
+ static inline int paravirt_enabled(void)
+@@ -984,10 +1078,11 @@ static inline void __flush_tlb_single(un
+ 	PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
+ }
+ 
+-static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
++static inline void flush_tlb_others(const struct cpumask *cpumask,
++				    struct mm_struct *mm,
+ 				    unsigned long va)
+ {
+-	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va);
++	PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
+ }
+ 
+ static inline int paravirt_pgd_alloc(struct mm_struct *mm)
+@@ -1040,6 +1135,14 @@ static inline void *kmap_atomic_pte(stru
+ 	ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte, page, type);
+ 	return (void *)ret;
+ }
++
++static inline void *kmap_atomic_pte_direct(struct page *page, enum km_type type)
++{
++	unsigned long ret;
++	ret = PVOP_CALL2(unsigned long, pv_mmu_ops.kmap_atomic_pte_direct,
++			 page, type);
++	return (void *)ret;
++}
+ #endif
+ 
+ static inline void pte_update(struct mm_struct *mm, unsigned long addr,
+@@ -1059,13 +1162,13 @@ static inline pte_t __pte(pteval_t val)
+ 	pteval_t ret;
+ 
+ 	if (sizeof(pteval_t) > sizeof(long))
+-		ret = PVOP_CALL2(pteval_t,
+-				 pv_mmu_ops.make_pte,
+-				 val, (u64)val >> 32);
++		ret = PVOP_CALLEE2(pteval_t,
++				   pv_mmu_ops.make_pte,
++				   val, (u64)val >> 32);
+ 	else
+-		ret = PVOP_CALL1(pteval_t,
+-				 pv_mmu_ops.make_pte,
+-				 val);
++		ret = PVOP_CALLEE1(pteval_t,
++				   pv_mmu_ops.make_pte,
++				   val);
+ 
+ 	return (pte_t) { .pte = ret };
+ }
+@@ -1075,42 +1178,25 @@ static inline pteval_t pte_val(pte_t pte
+ 	pteval_t ret;
+ 
+ 	if (sizeof(pteval_t) > sizeof(long))
+-		ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_val,
+-				 pte.pte, (u64)pte.pte >> 32);
++		ret = PVOP_CALLEE2(pteval_t, pv_mmu_ops.pte_val,
++				   pte.pte, (u64)pte.pte >> 32);
+ 	else
+-		ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_val,
+-				 pte.pte);
++		ret = PVOP_CALLEE1(pteval_t, pv_mmu_ops.pte_val,
++				   pte.pte);
+ 
+ 	return ret;
+ }
+ 
+-static inline pteval_t pte_flags(pte_t pte)
+-{
+-	pteval_t ret;
+-
+-	if (sizeof(pteval_t) > sizeof(long))
+-		ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
+-				 pte.pte, (u64)pte.pte >> 32);
+-	else
+-		ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
+-				 pte.pte);
+-
+-#ifdef CONFIG_PARAVIRT_DEBUG
+-	BUG_ON(ret & PTE_PFN_MASK);
+-#endif
+-	return ret;
+-}
+-
+ static inline pgd_t __pgd(pgdval_t val)
+ {
+ 	pgdval_t ret;
+ 
+ 	if (sizeof(pgdval_t) > sizeof(long))
+-		ret = PVOP_CALL2(pgdval_t, pv_mmu_ops.make_pgd,
+-				 val, (u64)val >> 32);
++		ret = PVOP_CALLEE2(pgdval_t, pv_mmu_ops.make_pgd,
++				   val, (u64)val >> 32);
+ 	else
+-		ret = PVOP_CALL1(pgdval_t, pv_mmu_ops.make_pgd,
+-				 val);
++		ret = PVOP_CALLEE1(pgdval_t, pv_mmu_ops.make_pgd,
++				   val);
+ 
+ 	return (pgd_t) { ret };
+ }
+@@ -1120,11 +1206,11 @@ static inline pgdval_t pgd_val(pgd_t pgd
+ 	pgdval_t ret;
+ 
+ 	if (sizeof(pgdval_t) > sizeof(long))
+-		ret =  PVOP_CALL2(pgdval_t, pv_mmu_ops.pgd_val,
+-				  pgd.pgd, (u64)pgd.pgd >> 32);
++		ret =  PVOP_CALLEE2(pgdval_t, pv_mmu_ops.pgd_val,
++				    pgd.pgd, (u64)pgd.pgd >> 32);
+ 	else
+-		ret =  PVOP_CALL1(pgdval_t, pv_mmu_ops.pgd_val,
+-				  pgd.pgd);
++		ret =  PVOP_CALLEE1(pgdval_t, pv_mmu_ops.pgd_val,
++				    pgd.pgd);
+ 
+ 	return ret;
+ }
+@@ -1188,11 +1274,11 @@ static inline pmd_t __pmd(pmdval_t val)
+ 	pmdval_t ret;
+ 
+ 	if (sizeof(pmdval_t) > sizeof(long))
+-		ret = PVOP_CALL2(pmdval_t, pv_mmu_ops.make_pmd,
+-				 val, (u64)val >> 32);
++		ret = PVOP_CALLEE2(pmdval_t, pv_mmu_ops.make_pmd,
++				   val, (u64)val >> 32);
+ 	else
+-		ret = PVOP_CALL1(pmdval_t, pv_mmu_ops.make_pmd,
+-				 val);
++		ret = PVOP_CALLEE1(pmdval_t, pv_mmu_ops.make_pmd,
++				   val);
+ 
+ 	return (pmd_t) { ret };
+ }
+@@ -1202,11 +1288,11 @@ static inline pmdval_t pmd_val(pmd_t pmd
+ 	pmdval_t ret;
+ 
+ 	if (sizeof(pmdval_t) > sizeof(long))
+-		ret =  PVOP_CALL2(pmdval_t, pv_mmu_ops.pmd_val,
+-				  pmd.pmd, (u64)pmd.pmd >> 32);
++		ret =  PVOP_CALLEE2(pmdval_t, pv_mmu_ops.pmd_val,
++				    pmd.pmd, (u64)pmd.pmd >> 32);
+ 	else
+-		ret =  PVOP_CALL1(pmdval_t, pv_mmu_ops.pmd_val,
+-				  pmd.pmd);
++		ret =  PVOP_CALLEE1(pmdval_t, pv_mmu_ops.pmd_val,
++				    pmd.pmd);
+ 
+ 	return ret;
+ }
+@@ -1228,11 +1314,11 @@ static inline pud_t __pud(pudval_t val)
+ 	pudval_t ret;
+ 
+ 	if (sizeof(pudval_t) > sizeof(long))
+-		ret = PVOP_CALL2(pudval_t, pv_mmu_ops.make_pud,
+-				 val, (u64)val >> 32);
++		ret = PVOP_CALLEE2(pudval_t, pv_mmu_ops.make_pud,
++				   val, (u64)val >> 32);
+ 	else
+-		ret = PVOP_CALL1(pudval_t, pv_mmu_ops.make_pud,
+-				 val);
++		ret = PVOP_CALLEE1(pudval_t, pv_mmu_ops.make_pud,
++				   val);
+ 
+ 	return (pud_t) { ret };
+ }
+@@ -1242,11 +1328,11 @@ static inline pudval_t pud_val(pud_t pud
+ 	pudval_t ret;
+ 
+ 	if (sizeof(pudval_t) > sizeof(long))
+-		ret =  PVOP_CALL2(pudval_t, pv_mmu_ops.pud_val,
+-				  pud.pud, (u64)pud.pud >> 32);
++		ret =  PVOP_CALLEE2(pudval_t, pv_mmu_ops.pud_val,
++				    pud.pud, (u64)pud.pud >> 32);
+ 	else
+-		ret =  PVOP_CALL1(pudval_t, pv_mmu_ops.pud_val,
+-				  pud.pud);
++		ret =  PVOP_CALLEE1(pudval_t, pv_mmu_ops.pud_val,
++				    pud.pud);
+ 
+ 	return ret;
+ }
+@@ -1286,13 +1372,6 @@ static inline void set_pte_atomic(pte_t 
+ 		    pte.pte, pte.pte >> 32);
+ }
+ 
+-static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
+-				   pte_t *ptep, pte_t pte)
+-{
+-	/* 5 arg words */
+-	pv_mmu_ops.set_pte_present(mm, addr, ptep, pte);
+-}
+-
+ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+ 			     pte_t *ptep)
+ {
+@@ -1309,12 +1388,6 @@ static inline void set_pte_atomic(pte_t 
+ 	set_pte(ptep, pte);
+ }
+ 
+-static inline void set_pte_present(struct mm_struct *mm, unsigned long addr,
+-				   pte_t *ptep, pte_t pte)
+-{
+-	set_pte(ptep, pte);
+-}
+-
+ static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
+ 			     pte_t *ptep)
+ {
+@@ -1368,15 +1441,16 @@ static inline void arch_leave_lazy_mmu_m
+ void arch_flush_lazy_mmu_mode(void);
+ 
+ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
+-				unsigned long phys, pgprot_t flags)
++				phys_addr_t phys, pgprot_t flags)
+ {
+ 	pv_mmu_ops.set_fixmap(idx, phys, flags);
+ }
+ 
+ void _paravirt_nop(void);
+-#define paravirt_nop	((void *)_paravirt_nop)
++u32 _paravirt_ident_32(u32);
++u64 _paravirt_ident_64(u64);
+ 
+-void paravirt_use_bytelocks(void);
++#define paravirt_nop	((void *)_paravirt_nop)
+ 
+ #ifdef CONFIG_SMP
+ 
+@@ -1426,12 +1500,37 @@ extern struct paravirt_patch_site __para
+ 	__parainstructions_end[];
+ 
+ #ifdef CONFIG_X86_32
+-#define PV_SAVE_REGS "pushl %%ecx; pushl %%edx;"
+-#define PV_RESTORE_REGS "popl %%edx; popl %%ecx"
++#define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
++#define PV_RESTORE_REGS "popl %edx; popl %ecx;"
++
++/* save and restore all caller-save registers, except return value */
++#define PV_SAVE_ALL_CALLER_REGS		"pushl %ecx;"
++#define PV_RESTORE_ALL_CALLER_REGS	"popl  %ecx;"
++
+ #define PV_FLAGS_ARG "0"
+ #define PV_EXTRA_CLOBBERS
+ #define PV_VEXTRA_CLOBBERS
+ #else
++/* save and restore all caller-save registers, except return value */
++#define PV_SAVE_ALL_CALLER_REGS						\
++	"push %rcx;"							\
++	"push %rdx;"							\
++	"push %rsi;"							\
++	"push %rdi;"							\
++	"push %r8;"							\
++	"push %r9;"							\
++	"push %r10;"							\
++	"push %r11;"
++#define PV_RESTORE_ALL_CALLER_REGS					\
++	"pop %r11;"							\
++	"pop %r10;"							\
++	"pop %r9;"							\
++	"pop %r8;"							\
++	"pop %rdi;"							\
++	"pop %rsi;"							\
++	"pop %rdx;"							\
++	"pop %rcx;"
++
+ /* We save some registers, but all of them, that's too much. We clobber all
+  * caller saved registers but the argument parameter */
+ #define PV_SAVE_REGS "pushq %%rdi;"
+@@ -1441,52 +1540,76 @@ extern struct paravirt_patch_site __para
+ #define PV_FLAGS_ARG "D"
+ #endif
+ 
++/*
++ * Generate a thunk around a function which saves all caller-save
++ * registers except for the return value.  This allows C functions to
++ * be called from assembler code where fewer than normal registers are
++ * available.  It may also help code generation around calls from C
++ * code if the common case doesn't use many registers.
++ *
++ * When a callee is wrapped in a thunk, the caller can assume that all
++ * arg regs and all scratch registers are preserved across the
++ * call. The return value in rax/eax will not be saved, even for void
++ * functions.
++ */
++#define PV_CALLEE_SAVE_REGS_THUNK(func)					\
++	extern typeof(func) __raw_callee_save_##func;			\
++	static void *__##func##__ __used = func;			\
++									\
++	asm(".pushsection .text;"					\
++	    "__raw_callee_save_" #func ": "				\
++	    PV_SAVE_ALL_CALLER_REGS					\
++	    "call " #func ";"						\
++	    PV_RESTORE_ALL_CALLER_REGS					\
++	    "ret;"							\
++	    ".popsection")
++
++/* Get a reference to a callee-save function */
++#define PV_CALLEE_SAVE(func)						\
++	((struct paravirt_callee_save) { __raw_callee_save_##func })
++
++/* Promise that "func" already uses the right calling convention */
++#define __PV_IS_CALLEE_SAVE(func)			\
++	((struct paravirt_callee_save) { func })
++
+ static inline unsigned long __raw_local_save_flags(void)
+ {
+ 	unsigned long f;
+ 
+-	asm volatile(paravirt_alt(PV_SAVE_REGS
+-				  PARAVIRT_CALL
+-				  PV_RESTORE_REGS)
++	asm volatile(paravirt_alt(PARAVIRT_CALL)
+ 		     : "=a"(f)
+ 		     : paravirt_type(pv_irq_ops.save_fl),
+ 		       paravirt_clobber(CLBR_EAX)
+-		     : "memory", "cc" PV_VEXTRA_CLOBBERS);
++		     : "memory", "cc");
+ 	return f;
+ }
+ 
+ static inline void raw_local_irq_restore(unsigned long f)
+ {
+-	asm volatile(paravirt_alt(PV_SAVE_REGS
+-				  PARAVIRT_CALL
+-				  PV_RESTORE_REGS)
++	asm volatile(paravirt_alt(PARAVIRT_CALL)
+ 		     : "=a"(f)
+ 		     : PV_FLAGS_ARG(f),
+ 		       paravirt_type(pv_irq_ops.restore_fl),
+ 		       paravirt_clobber(CLBR_EAX)
+-		     : "memory", "cc" PV_EXTRA_CLOBBERS);
++		     : "memory", "cc");
+ }
+ 
+ static inline void raw_local_irq_disable(void)
+ {
+-	asm volatile(paravirt_alt(PV_SAVE_REGS
+-				  PARAVIRT_CALL
+-				  PV_RESTORE_REGS)
++	asm volatile(paravirt_alt(PARAVIRT_CALL)
+ 		     :
+ 		     : paravirt_type(pv_irq_ops.irq_disable),
+ 		       paravirt_clobber(CLBR_EAX)
+-		     : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
++		     : "memory", "eax", "cc");
+ }
+ 
+ static inline void raw_local_irq_enable(void)
+ {
+-	asm volatile(paravirt_alt(PV_SAVE_REGS
+-				  PARAVIRT_CALL
+-				  PV_RESTORE_REGS)
++	asm volatile(paravirt_alt(PARAVIRT_CALL)
+ 		     :
+ 		     : paravirt_type(pv_irq_ops.irq_enable),
+ 		       paravirt_clobber(CLBR_EAX)
+-		     : "memory", "eax", "cc" PV_EXTRA_CLOBBERS);
++		     : "memory", "eax", "cc");
+ }
+ 
+ static inline unsigned long __raw_local_irq_save(void)
+@@ -1529,33 +1652,49 @@ static inline unsigned long __raw_local_
+ 	.popsection
+ 
+ 
++#define COND_PUSH(set, mask, reg)			\
++	.if ((~(set)) & mask); push %reg; .endif
++#define COND_POP(set, mask, reg)			\
++	.if ((~(set)) & mask); pop %reg; .endif
++
+ #ifdef CONFIG_X86_64
+-#define PV_SAVE_REGS				\
+-	push %rax;				\
+-	push %rcx;				\
+-	push %rdx;				\
+-	push %rsi;				\
+-	push %rdi;				\
+-	push %r8;				\
+-	push %r9;				\
+-	push %r10;				\
+-	push %r11
+-#define PV_RESTORE_REGS				\
+-	pop %r11;				\
+-	pop %r10;				\
+-	pop %r9;				\
+-	pop %r8;				\
+-	pop %rdi;				\
+-	pop %rsi;				\
+-	pop %rdx;				\
+-	pop %rcx;				\
+-	pop %rax
++
++#define PV_SAVE_REGS(set)			\
++	COND_PUSH(set, CLBR_RAX, rax);		\
++	COND_PUSH(set, CLBR_RCX, rcx);		\
++	COND_PUSH(set, CLBR_RDX, rdx);		\
++	COND_PUSH(set, CLBR_RSI, rsi);		\
++	COND_PUSH(set, CLBR_RDI, rdi);		\
++	COND_PUSH(set, CLBR_R8, r8);		\
++	COND_PUSH(set, CLBR_R9, r9);		\
++	COND_PUSH(set, CLBR_R10, r10);		\
++	COND_PUSH(set, CLBR_R11, r11)
++#define PV_RESTORE_REGS(set)			\
++	COND_POP(set, CLBR_R11, r11);		\
++	COND_POP(set, CLBR_R10, r10);		\
++	COND_POP(set, CLBR_R9, r9);		\
++	COND_POP(set, CLBR_R8, r8);		\
++	COND_POP(set, CLBR_RDI, rdi);		\
++	COND_POP(set, CLBR_RSI, rsi);		\
++	COND_POP(set, CLBR_RDX, rdx);		\
++	COND_POP(set, CLBR_RCX, rcx);		\
++	COND_POP(set, CLBR_RAX, rax)
++
+ #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
+ #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
+ #define PARA_INDIRECT(addr)	*addr(%rip)
+ #else
+-#define PV_SAVE_REGS   pushl %eax; pushl %edi; pushl %ecx; pushl %edx
+-#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
++#define PV_SAVE_REGS(set)			\
++	COND_PUSH(set, CLBR_EAX, eax);		\
++	COND_PUSH(set, CLBR_EDI, edi);		\
++	COND_PUSH(set, CLBR_ECX, ecx);		\
++	COND_PUSH(set, CLBR_EDX, edx)
++#define PV_RESTORE_REGS(set)			\
++	COND_POP(set, CLBR_EDX, edx);		\
++	COND_POP(set, CLBR_ECX, ecx);		\
++	COND_POP(set, CLBR_EDI, edi);		\
++	COND_POP(set, CLBR_EAX, eax)
++
+ #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
+ #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
+ #define PARA_INDIRECT(addr)	*%cs:addr
+@@ -1567,15 +1706,15 @@ static inline unsigned long __raw_local_
+ 
+ #define DISABLE_INTERRUPTS(clobbers)					\
+ 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
+-		  PV_SAVE_REGS;						\
++		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
+ 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);	\
+-		  PV_RESTORE_REGS;)			\
++		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+ 
+ #define ENABLE_INTERRUPTS(clobbers)					\
+ 	PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,	\
+-		  PV_SAVE_REGS;						\
++		  PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE);		\
+ 		  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);	\
+-		  PV_RESTORE_REGS;)
++		  PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
+ 
+ #define USERGS_SYSRET32							\
+ 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),	\
+@@ -1605,11 +1744,15 @@ static inline unsigned long __raw_local_
+ 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
+ 		  swapgs)
+ 
++/*
++ * Note: swapgs is very special, and in practise is either going to be
++ * implemented with a single "swapgs" instruction or something very
++ * special.  Either way, we don't need to save any registers for
++ * it.
++ */
+ #define SWAPGS								\
+ 	PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,	\
+-		  PV_SAVE_REGS;						\
+-		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);		\
+-		  PV_RESTORE_REGS					\
++		  call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)		\
+ 		 )
+ 
+ #define GET_CR2_INTO_RCX				\
+Index: linux-2.6-tip/arch/x86/include/asm/pat.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pat.h
++++ linux-2.6-tip/arch/x86/include/asm/pat.h
+@@ -2,13 +2,12 @@
+ #define _ASM_X86_PAT_H
+ 
+ #include <linux/types.h>
++#include <asm/pgtable_types.h>
+ 
+ #ifdef CONFIG_X86_PAT
+ extern int pat_enabled;
+-extern void validate_pat_support(struct cpuinfo_x86 *c);
+ #else
+ static const int pat_enabled;
+-static inline void validate_pat_support(struct cpuinfo_x86 *c) { }
+ #endif
+ 
+ extern void pat_init(void);
+@@ -17,6 +16,11 @@ extern int reserve_memtype(u64 start, u6
+ 		unsigned long req_type, unsigned long *ret_type);
+ extern int free_memtype(u64 start, u64 end);
+ 
+-extern void pat_disable(char *reason);
++extern int kernel_map_sync_memtype(u64 base, unsigned long size,
++		unsigned long flag);
++extern void map_devmem(unsigned long pfn, unsigned long size,
++		       struct pgprot vma_prot);
++extern void unmap_devmem(unsigned long pfn, unsigned long size,
++			 struct pgprot vma_prot);
+ 
+ #endif /* _ASM_X86_PAT_H */
+Index: linux-2.6-tip/arch/x86/include/asm/pci-functions.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/pci-functions.h
+@@ -0,0 +1,19 @@
++/*
++ *	PCI BIOS function numbering for conventional PCI BIOS 
++ *	systems
++ */
++
++#define PCIBIOS_PCI_FUNCTION_ID 	0xb1XX
++#define PCIBIOS_PCI_BIOS_PRESENT 	0xb101
++#define PCIBIOS_FIND_PCI_DEVICE		0xb102
++#define PCIBIOS_FIND_PCI_CLASS_CODE	0xb103
++#define PCIBIOS_GENERATE_SPECIAL_CYCLE	0xb106
++#define PCIBIOS_READ_CONFIG_BYTE	0xb108
++#define PCIBIOS_READ_CONFIG_WORD	0xb109
++#define PCIBIOS_READ_CONFIG_DWORD	0xb10a
++#define PCIBIOS_WRITE_CONFIG_BYTE	0xb10b
++#define PCIBIOS_WRITE_CONFIG_WORD	0xb10c
++#define PCIBIOS_WRITE_CONFIG_DWORD	0xb10d
++#define PCIBIOS_GET_ROUTING_OPTIONS	0xb10e
++#define PCIBIOS_SET_PCI_HW_INT		0xb10f
++
+Index: linux-2.6-tip/arch/x86/include/asm/pci.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pci.h
++++ linux-2.6-tip/arch/x86/include/asm/pci.h
+@@ -109,11 +109,6 @@ static inline int __pcibus_to_node(const
+ 	return sd->node;
+ }
+ 
+-static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
+-{
+-	return node_to_cpumask(__pcibus_to_node(bus));
+-}
+-
+ static inline const struct cpumask *
+ cpumask_of_pcibus(const struct pci_bus *bus)
+ {
+Index: linux-2.6-tip/arch/x86/include/asm/pda.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pda.h
++++ /dev/null
+@@ -1,137 +0,0 @@
+-#ifndef _ASM_X86_PDA_H
+-#define _ASM_X86_PDA_H
+-
+-#ifndef __ASSEMBLY__
+-#include <linux/stddef.h>
+-#include <linux/types.h>
+-#include <linux/cache.h>
+-#include <asm/page.h>
+-
+-/* Per processor datastructure. %gs points to it while the kernel runs */
+-struct x8664_pda {
+-	struct task_struct *pcurrent;	/* 0  Current process */
+-	unsigned long data_offset;	/* 8 Per cpu data offset from linker
+-					   address */
+-	unsigned long kernelstack;	/* 16 top of kernel stack for current */
+-	unsigned long oldrsp;		/* 24 user rsp for system call */
+-	int irqcount;			/* 32 Irq nesting counter. Starts -1 */
+-	unsigned int cpunumber;		/* 36 Logical CPU number */
+-#ifdef CONFIG_CC_STACKPROTECTOR
+-	unsigned long stack_canary;	/* 40 stack canary value */
+-					/* gcc-ABI: this canary MUST be at
+-					   offset 40!!! */
+-#endif
+-	char *irqstackptr;
+-	short nodenumber;		/* number of current node (32k max) */
+-	short in_bootmem;		/* pda lives in bootmem */
+-	unsigned int __softirq_pending;
+-	unsigned int __nmi_count;	/* number of NMI on this CPUs */
+-	short mmu_state;
+-	short isidle;
+-	struct mm_struct *active_mm;
+-	unsigned apic_timer_irqs;
+-	unsigned irq0_irqs;
+-	unsigned irq_resched_count;
+-	unsigned irq_call_count;
+-	unsigned irq_tlb_count;
+-	unsigned irq_thermal_count;
+-	unsigned irq_threshold_count;
+-	unsigned irq_spurious_count;
+-} ____cacheline_aligned_in_smp;
+-
+-extern struct x8664_pda **_cpu_pda;
+-extern void pda_init(int);
+-
+-#define cpu_pda(i) (_cpu_pda[i])
+-
+-/*
+- * There is no fast way to get the base address of the PDA, all the accesses
+- * have to mention %fs/%gs.  So it needs to be done this Torvaldian way.
+- */
+-extern void __bad_pda_field(void) __attribute__((noreturn));
+-
+-/*
+- * proxy_pda doesn't actually exist, but tell gcc it is accessed for
+- * all PDA accesses so it gets read/write dependencies right.
+- */
+-extern struct x8664_pda _proxy_pda;
+-
+-#define pda_offset(field) offsetof(struct x8664_pda, field)
+-
+-#define pda_to_op(op, field, val)					\
+-do {									\
+-	typedef typeof(_proxy_pda.field) T__;				\
+-	if (0) { T__ tmp__; tmp__ = (val); }	/* type checking */	\
+-	switch (sizeof(_proxy_pda.field)) {				\
+-	case 2:								\
+-		asm(op "w %1,%%gs:%c2" :				\
+-		    "+m" (_proxy_pda.field) :				\
+-		    "ri" ((T__)val),					\
+-		    "i"(pda_offset(field)));				\
+-		break;							\
+-	case 4:								\
+-		asm(op "l %1,%%gs:%c2" :				\
+-		    "+m" (_proxy_pda.field) :				\
+-		    "ri" ((T__)val),					\
+-		    "i" (pda_offset(field)));				\
+-		break;							\
+-	case 8:								\
+-		asm(op "q %1,%%gs:%c2":					\
+-		    "+m" (_proxy_pda.field) :				\
+-		    "ri" ((T__)val),					\
+-		    "i"(pda_offset(field)));				\
+-		break;							\
+-	default:							\
+-		__bad_pda_field();					\
+-	}								\
+-} while (0)
+-
+-#define pda_from_op(op, field)			\
+-({						\
+-	typeof(_proxy_pda.field) ret__;		\
+-	switch (sizeof(_proxy_pda.field)) {	\
+-	case 2:					\
+-		asm(op "w %%gs:%c1,%0" :	\
+-		    "=r" (ret__) :		\
+-		    "i" (pda_offset(field)),	\
+-		    "m" (_proxy_pda.field));	\
+-		break;				\
+-	case 4:					\
+-		asm(op "l %%gs:%c1,%0":		\
+-		    "=r" (ret__):		\
+-		    "i" (pda_offset(field)),	\
+-		    "m" (_proxy_pda.field));	\
+-		break;				\
+-	case 8:					\
+-		asm(op "q %%gs:%c1,%0":		\
+-		    "=r" (ret__) :		\
+-		    "i" (pda_offset(field)),	\
+-		    "m" (_proxy_pda.field));	\
+-		break;				\
+-	default:				\
+-		__bad_pda_field();		\
+-	}					\
+-	ret__;					\
+-})
+-
+-#define read_pda(field)		pda_from_op("mov", field)
+-#define write_pda(field, val)	pda_to_op("mov", field, val)
+-#define add_pda(field, val)	pda_to_op("add", field, val)
+-#define sub_pda(field, val)	pda_to_op("sub", field, val)
+-#define or_pda(field, val)	pda_to_op("or", field, val)
+-
+-/* This is not atomic against other CPUs -- CPU preemption needs to be off */
+-#define test_and_clear_bit_pda(bit, field)				\
+-({									\
+-	int old__;							\
+-	asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0"			\
+-		     : "=r" (old__), "+m" (_proxy_pda.field)		\
+-		     : "dIr" (bit), "i" (pda_offset(field)) : "memory");\
+-	old__;								\
+-})
+-
+-#endif
+-
+-#define PDA_STACKOFFSET (5*8)
+-
+-#endif /* _ASM_X86_PDA_H */
+Index: linux-2.6-tip/arch/x86/include/asm/percpu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/percpu.h
++++ linux-2.6-tip/arch/x86/include/asm/percpu.h
+@@ -2,53 +2,12 @@
+ #define _ASM_X86_PERCPU_H
+ 
+ #ifdef CONFIG_X86_64
+-#include <linux/compiler.h>
+-
+-/* Same as asm-generic/percpu.h, except that we store the per cpu offset
+-   in the PDA. Longer term the PDA and every per cpu variable
+-   should be just put into a single section and referenced directly
+-   from %gs */
+-
+-#ifdef CONFIG_SMP
+-#include <asm/pda.h>
+-
+-#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
+-#define __my_cpu_offset read_pda(data_offset)
+-
+-#define per_cpu_offset(x) (__per_cpu_offset(x))
+-
++#define __percpu_seg		gs
++#define __percpu_mov_op		movq
++#else
++#define __percpu_seg		fs
++#define __percpu_mov_op		movl
+ #endif
+-#include <asm-generic/percpu.h>
+-
+-DECLARE_PER_CPU(struct x8664_pda, pda);
+-
+-/*
+- * These are supposed to be implemented as a single instruction which
+- * operates on the per-cpu data base segment.  x86-64 doesn't have
+- * that yet, so this is a fairly inefficient workaround for the
+- * meantime.  The single instruction is atomic with respect to
+- * preemption and interrupts, so we need to explicitly disable
+- * interrupts here to achieve the same effect.  However, because it
+- * can be used from within interrupt-disable/enable, we can't actually
+- * disable interrupts; disabling preemption is enough.
+- */
+-#define x86_read_percpu(var)						\
+-	({								\
+-		typeof(per_cpu_var(var)) __tmp;				\
+-		preempt_disable();					\
+-		__tmp = __get_cpu_var(var);				\
+-		preempt_enable();					\
+-		__tmp;							\
+-	})
+-
+-#define x86_write_percpu(var, val)					\
+-	do {								\
+-		preempt_disable();					\
+-		__get_cpu_var(var) = (val);				\
+-		preempt_enable();					\
+-	} while(0)
+-
+-#else /* CONFIG_X86_64 */
+ 
+ #ifdef __ASSEMBLY__
+ 
+@@ -65,47 +24,48 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
+  *    PER_CPU(cpu_gdt_descr, %ebx)
+  */
+ #ifdef CONFIG_SMP
+-#define PER_CPU(var, reg)				\
+-	movl %fs:per_cpu__##this_cpu_off, reg;		\
++#define PER_CPU(var, reg)						\
++	__percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg;	\
+ 	lea per_cpu__##var(reg), reg
+-#define PER_CPU_VAR(var)	%fs:per_cpu__##var
++#define PER_CPU_VAR(var)	%__percpu_seg:per_cpu__##var
+ #else /* ! SMP */
+-#define PER_CPU(var, reg)			\
+-	movl $per_cpu__##var, reg
++#define PER_CPU(var, reg)						\
++	__percpu_mov_op $per_cpu__##var, reg
+ #define PER_CPU_VAR(var)	per_cpu__##var
+ #endif	/* SMP */
+ 
++#ifdef CONFIG_X86_64_SMP
++#define INIT_PER_CPU_VAR(var)  init_per_cpu__##var
++#else
++#define INIT_PER_CPU_VAR(var)  per_cpu__##var
++#endif
++
+ #else /* ...!ASSEMBLY */
+ 
++#include <linux/stringify.h>
++
++#ifdef CONFIG_SMP
++#define __percpu_arg(x)		"%%"__stringify(__percpu_seg)":%P" #x
++#define __my_cpu_offset		percpu_read(this_cpu_off)
++#else
++#define __percpu_arg(x)		"%" #x
++#endif
++
+ /*
+- * PER_CPU finds an address of a per-cpu variable.
++ * Initialized pointers to per-cpu variables needed for the boot
++ * processor need to use these macros to get the proper address
++ * offset from __per_cpu_load on SMP.
+  *
+- * Args:
+- *    var - variable name
+- *    cpu - 32bit register containing the current CPU number
+- *
+- * The resulting address is stored in the "cpu" argument.
+- *
+- * Example:
+- *    PER_CPU(cpu_gdt_descr, %ebx)
++ * There also must be an entry in vmlinux_64.lds.S
+  */
+-#ifdef CONFIG_SMP
+-
+-#define __my_cpu_offset x86_read_percpu(this_cpu_off)
+-
+-/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
+-#define __percpu_seg "%%fs:"
+-
+-#else  /* !SMP */
+-
+-#define __percpu_seg ""
+-
+-#endif	/* SMP */
+-
+-#include <asm-generic/percpu.h>
++#define DECLARE_INIT_PER_CPU(var) \
++       extern typeof(per_cpu_var(var)) init_per_cpu_var(var)
+ 
+-/* We can use this directly for local CPU (faster). */
+-DECLARE_PER_CPU(unsigned long, this_cpu_off);
++#ifdef CONFIG_X86_64_SMP
++#define init_per_cpu_var(var)  init_per_cpu__##var
++#else
++#define init_per_cpu_var(var)  per_cpu_var(var)
++#endif
+ 
+ /* For arch-specific code, we can use direct single-insn ops (they
+  * don't give an lvalue though). */
+@@ -120,20 +80,25 @@ do {							\
+ 	}						\
+ 	switch (sizeof(var)) {				\
+ 	case 1:						\
+-		asm(op "b %1,"__percpu_seg"%0"		\
++		asm(op "b %1,"__percpu_arg(0)		\
+ 		    : "+m" (var)			\
+ 		    : "ri" ((T__)val));			\
+ 		break;					\
+ 	case 2:						\
+-		asm(op "w %1,"__percpu_seg"%0"		\
++		asm(op "w %1,"__percpu_arg(0)		\
+ 		    : "+m" (var)			\
+ 		    : "ri" ((T__)val));			\
+ 		break;					\
+ 	case 4:						\
+-		asm(op "l %1,"__percpu_seg"%0"		\
++		asm(op "l %1,"__percpu_arg(0)		\
+ 		    : "+m" (var)			\
+ 		    : "ri" ((T__)val));			\
+ 		break;					\
++	case 8:						\
++		asm(op "q %1,"__percpu_arg(0)		\
++		    : "+m" (var)			\
++		    : "re" ((T__)val));			\
++		break;					\
+ 	default: __bad_percpu_size();			\
+ 	}						\
+ } while (0)
+@@ -143,17 +108,22 @@ do {							\
+ 	typeof(var) ret__;				\
+ 	switch (sizeof(var)) {				\
+ 	case 1:						\
+-		asm(op "b "__percpu_seg"%1,%0"		\
++		asm(op "b "__percpu_arg(1)",%0"		\
+ 		    : "=r" (ret__)			\
+ 		    : "m" (var));			\
+ 		break;					\
+ 	case 2:						\
+-		asm(op "w "__percpu_seg"%1,%0"		\
++		asm(op "w "__percpu_arg(1)",%0"		\
+ 		    : "=r" (ret__)			\
+ 		    : "m" (var));			\
+ 		break;					\
+ 	case 4:						\
+-		asm(op "l "__percpu_seg"%1,%0"		\
++		asm(op "l "__percpu_arg(1)",%0"		\
++		    : "=r" (ret__)			\
++		    : "m" (var));			\
++		break;					\
++	case 8:						\
++		asm(op "q "__percpu_arg(1)",%0"		\
+ 		    : "=r" (ret__)			\
+ 		    : "m" (var));			\
+ 		break;					\
+@@ -162,13 +132,30 @@ do {							\
+ 	ret__;						\
+ })
+ 
+-#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var)
+-#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val)
+-#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val)
+-#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val)
+-#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val)
++#define percpu_read(var)	percpu_from_op("mov", per_cpu__##var)
++#define percpu_write(var, val)	percpu_to_op("mov", per_cpu__##var, val)
++#define percpu_add(var, val)	percpu_to_op("add", per_cpu__##var, val)
++#define percpu_sub(var, val)	percpu_to_op("sub", per_cpu__##var, val)
++#define percpu_and(var, val)	percpu_to_op("and", per_cpu__##var, val)
++#define percpu_or(var, val)	percpu_to_op("or", per_cpu__##var, val)
++#define percpu_xor(var, val)	percpu_to_op("xor", per_cpu__##var, val)
++
++/* This is not atomic against other CPUs -- CPU preemption needs to be off */
++#define x86_test_and_clear_bit_percpu(bit, var)				\
++({									\
++	int old__;							\
++	asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0"		\
++		     : "=r" (old__), "+m" (per_cpu__##var)		\
++		     : "dIr" (bit));					\
++	old__;								\
++})
++
++#include <asm-generic/percpu.h>
++
++/* We can use this directly for local CPU (faster). */
++DECLARE_PER_CPU(unsigned long, this_cpu_off);
++
+ #endif /* !__ASSEMBLY__ */
+-#endif /* !CONFIG_X86_64 */
+ 
+ #ifdef CONFIG_SMP
+ 
+@@ -195,9 +182,9 @@ do {							\
+ #define	early_per_cpu_ptr(_name) (_name##_early_ptr)
+ #define	early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
+ #define	early_per_cpu(_name, _cpu) 				\
+-	(early_per_cpu_ptr(_name) ?				\
+-		early_per_cpu_ptr(_name)[_cpu] :		\
+-		per_cpu(_name, _cpu))
++	*(early_per_cpu_ptr(_name) ?				\
++		&early_per_cpu_ptr(_name)[_cpu] :		\
++		&per_cpu(_name, _cpu))
+ 
+ #else	/* !CONFIG_SMP */
+ #define	DEFINE_EARLY_PER_CPU(_type, _name, _initvalue)		\
+Index: linux-2.6-tip/arch/x86/include/asm/perf_counter.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/perf_counter.h
+@@ -0,0 +1,98 @@
++#ifndef _ASM_X86_PERF_COUNTER_H
++#define _ASM_X86_PERF_COUNTER_H
++
++/*
++ * Performance counter hw details:
++ */
++
++#define X86_PMC_MAX_GENERIC					8
++#define X86_PMC_MAX_FIXED					3
++
++#define X86_PMC_IDX_GENERIC				        0
++#define X86_PMC_IDX_FIXED				       32
++#define X86_PMC_IDX_MAX					       64
++
++#define MSR_ARCH_PERFMON_PERFCTR0			      0xc1
++#define MSR_ARCH_PERFMON_PERFCTR1			      0xc2
++
++#define MSR_ARCH_PERFMON_EVENTSEL0			     0x186
++#define MSR_ARCH_PERFMON_EVENTSEL1			     0x187
++
++#define ARCH_PERFMON_EVENTSEL0_ENABLE			  (1 << 22)
++#define ARCH_PERFMON_EVENTSEL_INT			  (1 << 20)
++#define ARCH_PERFMON_EVENTSEL_OS			  (1 << 17)
++#define ARCH_PERFMON_EVENTSEL_USR			  (1 << 16)
++
++/*
++ * Includes eventsel and unit mask as well:
++ */
++#define ARCH_PERFMON_EVENT_MASK				    0xffff
++
++#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
++#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
++#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 		 0
++#define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \
++		(1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX))
++
++#define ARCH_PERFMON_BRANCH_MISSES_RETIRED			 6
++
++/*
++ * Intel "Architectural Performance Monitoring" CPUID
++ * detection/enumeration details:
++ */
++union cpuid10_eax {
++	struct {
++		unsigned int version_id:8;
++		unsigned int num_counters:8;
++		unsigned int bit_width:8;
++		unsigned int mask_length:8;
++	} split;
++	unsigned int full;
++};
++
++union cpuid10_edx {
++	struct {
++		unsigned int num_counters_fixed:4;
++		unsigned int reserved:28;
++	} split;
++	unsigned int full;
++};
++
++
++/*
++ * Fixed-purpose performance counters:
++ */
++
++/*
++ * All 3 fixed-mode PMCs are configured via this single MSR:
++ */
++#define MSR_ARCH_PERFMON_FIXED_CTR_CTRL			0x38d
++
++/*
++ * The counts are available in three separate MSRs:
++ */
++
++/* Instr_Retired.Any: */
++#define MSR_ARCH_PERFMON_FIXED_CTR0			0x309
++#define X86_PMC_IDX_FIXED_INSTRUCTIONS			(X86_PMC_IDX_FIXED + 0)
++
++/* CPU_CLK_Unhalted.Core: */
++#define MSR_ARCH_PERFMON_FIXED_CTR1			0x30a
++#define X86_PMC_IDX_FIXED_CPU_CYCLES			(X86_PMC_IDX_FIXED + 1)
++
++/* CPU_CLK_Unhalted.Ref: */
++#define MSR_ARCH_PERFMON_FIXED_CTR2			0x30b
++#define X86_PMC_IDX_FIXED_BUS_CYCLES			(X86_PMC_IDX_FIXED + 2)
++
++#define set_perf_counter_pending()	\
++		set_tsk_thread_flag(current, TIF_PERF_COUNTERS);
++
++#ifdef CONFIG_PERF_COUNTERS
++extern void init_hw_perf_counters(void);
++extern void perf_counters_lapic_init(int nmi);
++#else
++static inline void init_hw_perf_counters(void)		{ }
++static inline void perf_counters_lapic_init(int nmi)	{ }
++#endif
++
++#endif /* _ASM_X86_PERF_COUNTER_H */
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable-2level-defs.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pgtable-2level-defs.h
++++ /dev/null
+@@ -1,20 +0,0 @@
+-#ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H
+-#define _ASM_X86_PGTABLE_2LEVEL_DEFS_H
+-
+-#define SHARED_KERNEL_PMD	0
+-
+-/*
+- * traditional i386 two-level paging structure:
+- */
+-
+-#define PGDIR_SHIFT	22
+-#define PTRS_PER_PGD	1024
+-
+-/*
+- * the i386 is two-level, so we don't really have any
+- * PMD directory physically.
+- */
+-
+-#define PTRS_PER_PTE	1024
+-
+-#endif /* _ASM_X86_PGTABLE_2LEVEL_DEFS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable-2level.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pgtable-2level.h
++++ linux-2.6-tip/arch/x86/include/asm/pgtable-2level.h
+@@ -26,13 +26,6 @@ static inline void native_set_pte_atomic
+ 	native_set_pte(ptep, pte);
+ }
+ 
+-static inline void native_set_pte_present(struct mm_struct *mm,
+-					  unsigned long addr,
+-					  pte_t *ptep, pte_t pte)
+-{
+-	native_set_pte(ptep, pte);
+-}
+-
+ static inline void native_pmd_clear(pmd_t *pmdp)
+ {
+ 	native_set_pmd(pmdp, __pmd(0));
+@@ -53,8 +46,6 @@ static inline pte_t native_ptep_get_and_
+ #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
+ #endif
+ 
+-#define pte_none(x)		(!(x).pte_low)
+-
+ /*
+  * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken,
+  * split up the 29 bits of offset into this range:
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable-2level_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/pgtable-2level_types.h
+@@ -0,0 +1,37 @@
++#ifndef _ASM_X86_PGTABLE_2LEVEL_DEFS_H
++#define _ASM_X86_PGTABLE_2LEVEL_DEFS_H
++
++#ifndef __ASSEMBLY__
++#include <linux/types.h>
++
++typedef unsigned long	pteval_t;
++typedef unsigned long	pmdval_t;
++typedef unsigned long	pudval_t;
++typedef unsigned long	pgdval_t;
++typedef unsigned long	pgprotval_t;
++
++typedef union {
++	pteval_t pte;
++	pteval_t pte_low;
++} pte_t;
++#endif	/* !__ASSEMBLY__ */
++
++#define SHARED_KERNEL_PMD	0
++#define PAGETABLE_LEVELS	2
++
++/*
++ * traditional i386 two-level paging structure:
++ */
++
++#define PGDIR_SHIFT	22
++#define PTRS_PER_PGD	1024
++
++
++/*
++ * the i386 is two-level, so we don't really have any
++ * PMD directory physically.
++ */
++
++#define PTRS_PER_PTE	1024
++
++#endif /* _ASM_X86_PGTABLE_2LEVEL_DEFS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable-3level-defs.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pgtable-3level-defs.h
++++ /dev/null
+@@ -1,28 +0,0 @@
+-#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H
+-#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H
+-
+-#ifdef CONFIG_PARAVIRT
+-#define SHARED_KERNEL_PMD	(pv_info.shared_kernel_pmd)
+-#else
+-#define SHARED_KERNEL_PMD	1
+-#endif
+-
+-/*
+- * PGDIR_SHIFT determines what a top-level page table entry can map
+- */
+-#define PGDIR_SHIFT	30
+-#define PTRS_PER_PGD	4
+-
+-/*
+- * PMD_SHIFT determines the size of the area a middle-level
+- * page table can map
+- */
+-#define PMD_SHIFT	21
+-#define PTRS_PER_PMD	512
+-
+-/*
+- * entries per page directory level
+- */
+-#define PTRS_PER_PTE	512
+-
+-#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable-3level.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pgtable-3level.h
++++ linux-2.6-tip/arch/x86/include/asm/pgtable-3level.h
+@@ -18,21 +18,6 @@
+ 	printk("%s:%d: bad pgd %p(%016Lx).\n",				\
+ 	       __FILE__, __LINE__, &(e), pgd_val(e))
+ 
+-static inline int pud_none(pud_t pud)
+-{
+-	return pud_val(pud) == 0;
+-}
+-
+-static inline int pud_bad(pud_t pud)
+-{
+-	return (pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
+-}
+-
+-static inline int pud_present(pud_t pud)
+-{
+-	return pud_val(pud) & _PAGE_PRESENT;
+-}
+-
+ /* Rules for using set_pte: the pte being assigned *must* be
+  * either not present or in a state where the hardware will
+  * not attempt to update the pte.  In places where this is
+@@ -46,23 +31,6 @@ static inline void native_set_pte(pte_t 
+ 	ptep->pte_low = pte.pte_low;
+ }
+ 
+-/*
+- * Since this is only called on user PTEs, and the page fault handler
+- * must handle the already racy situation of simultaneous page faults,
+- * we are justified in merely clearing the PTE present bit, followed
+- * by a set.  The ordering here is important.
+- */
+-static inline void native_set_pte_present(struct mm_struct *mm,
+-					  unsigned long addr,
+-					  pte_t *ptep, pte_t pte)
+-{
+-	ptep->pte_low = 0;
+-	smp_wmb();
+-	ptep->pte_high = pte.pte_high;
+-	smp_wmb();
+-	ptep->pte_low = pte.pte_low;
+-}
+-
+ static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
+ {
+ 	set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
+@@ -103,6 +71,7 @@ static inline void pud_clear(pud_t *pudp
+ {
+ 	unsigned long pgd;
+ 
++	preempt_disable();
+ 	set_pud(pudp, __pud(0));
+ 
+ 	/*
+@@ -118,17 +87,9 @@ static inline void pud_clear(pud_t *pudp
+ 	if (__pa(pudp) >= pgd && __pa(pudp) <
+ 	    (pgd + sizeof(pgd_t)*PTRS_PER_PGD))
+ 		write_cr3(pgd);
++	preempt_enable();
+ }
+ 
+-#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
+-
+-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK))
+-
+-
+-/* Find an entry in the second-level page table.. */
+-#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) +	\
+-				  pmd_index(address))
+-
+ #ifdef CONFIG_SMP
+ static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
+ {
+@@ -145,17 +106,6 @@ static inline pte_t native_ptep_get_and_
+ #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
+ #endif
+ 
+-#define __HAVE_ARCH_PTE_SAME
+-static inline int pte_same(pte_t a, pte_t b)
+-{
+-	return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
+-}
+-
+-static inline int pte_none(pte_t pte)
+-{
+-	return !pte.pte_low && !pte.pte_high;
+-}
+-
+ /*
+  * Bits 0, 6 and 7 are taken in the low part of the pte,
+  * put the 32 bits of offset into the high part.
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable-3level_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/pgtable-3level_types.h
+@@ -0,0 +1,48 @@
++#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H
++#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H
++
++#ifndef __ASSEMBLY__
++#include <linux/types.h>
++
++typedef u64	pteval_t;
++typedef u64	pmdval_t;
++typedef u64	pudval_t;
++typedef u64	pgdval_t;
++typedef u64	pgprotval_t;
++
++typedef union {
++	struct {
++		unsigned long pte_low, pte_high;
++	};
++	pteval_t pte;
++} pte_t;
++#endif	/* !__ASSEMBLY__ */
++
++#ifdef CONFIG_PARAVIRT
++#define SHARED_KERNEL_PMD	(pv_info.shared_kernel_pmd)
++#else
++#define SHARED_KERNEL_PMD	1
++#endif
++
++#define PAGETABLE_LEVELS	3
++
++/*
++ * PGDIR_SHIFT determines what a top-level page table entry can map
++ */
++#define PGDIR_SHIFT	30
++#define PTRS_PER_PGD	4
++
++/*
++ * PMD_SHIFT determines the size of the area a middle-level
++ * page table can map
++ */
++#define PMD_SHIFT	21
++#define PTRS_PER_PMD	512
++
++/*
++ * entries per page directory level
++ */
++#define PTRS_PER_PTE	512
++
++
++#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pgtable.h
++++ linux-2.6-tip/arch/x86/include/asm/pgtable.h
+@@ -1,164 +1,9 @@
+ #ifndef _ASM_X86_PGTABLE_H
+ #define _ASM_X86_PGTABLE_H
+ 
+-#define FIRST_USER_ADDRESS	0
++#include <asm/page.h>
+ 
+-#define _PAGE_BIT_PRESENT	0	/* is present */
+-#define _PAGE_BIT_RW		1	/* writeable */
+-#define _PAGE_BIT_USER		2	/* userspace addressable */
+-#define _PAGE_BIT_PWT		3	/* page write through */
+-#define _PAGE_BIT_PCD		4	/* page cache disabled */
+-#define _PAGE_BIT_ACCESSED	5	/* was accessed (raised by CPU) */
+-#define _PAGE_BIT_DIRTY		6	/* was written to (raised by CPU) */
+-#define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
+-#define _PAGE_BIT_PAT		7	/* on 4KB pages */
+-#define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
+-#define _PAGE_BIT_UNUSED1	9	/* available for programmer */
+-#define _PAGE_BIT_IOMAP		10	/* flag used to indicate IO mapping */
+-#define _PAGE_BIT_UNUSED3	11
+-#define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
+-#define _PAGE_BIT_SPECIAL	_PAGE_BIT_UNUSED1
+-#define _PAGE_BIT_CPA_TEST	_PAGE_BIT_UNUSED1
+-#define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
+-
+-/* If _PAGE_BIT_PRESENT is clear, we use these: */
+-/* - if the user mapped it with PROT_NONE; pte_present gives true */
+-#define _PAGE_BIT_PROTNONE	_PAGE_BIT_GLOBAL
+-/* - set: nonlinear file mapping, saved PTE; unset:swap */
+-#define _PAGE_BIT_FILE		_PAGE_BIT_DIRTY
+-
+-#define _PAGE_PRESENT	(_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
+-#define _PAGE_RW	(_AT(pteval_t, 1) << _PAGE_BIT_RW)
+-#define _PAGE_USER	(_AT(pteval_t, 1) << _PAGE_BIT_USER)
+-#define _PAGE_PWT	(_AT(pteval_t, 1) << _PAGE_BIT_PWT)
+-#define _PAGE_PCD	(_AT(pteval_t, 1) << _PAGE_BIT_PCD)
+-#define _PAGE_ACCESSED	(_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+-#define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+-#define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+-#define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+-#define _PAGE_UNUSED1	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
+-#define _PAGE_IOMAP	(_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
+-#define _PAGE_UNUSED3	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
+-#define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+-#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
+-#define _PAGE_SPECIAL	(_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
+-#define _PAGE_CPA_TEST	(_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
+-#define __HAVE_ARCH_PTE_SPECIAL
+-
+-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+-#define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
+-#else
+-#define _PAGE_NX	(_AT(pteval_t, 0))
+-#endif
+-
+-#define _PAGE_FILE	(_AT(pteval_t, 1) << _PAGE_BIT_FILE)
+-#define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+-
+-#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
+-			 _PAGE_ACCESSED | _PAGE_DIRTY)
+-#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\
+-			 _PAGE_DIRTY)
+-
+-/* Set of bits not changed in pte_modify */
+-#define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
+-			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
+-
+-#define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT)
+-#define _PAGE_CACHE_WB		(0)
+-#define _PAGE_CACHE_WC		(_PAGE_PWT)
+-#define _PAGE_CACHE_UC_MINUS	(_PAGE_PCD)
+-#define _PAGE_CACHE_UC		(_PAGE_PCD | _PAGE_PWT)
+-
+-#define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
+-#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
+-				 _PAGE_ACCESSED | _PAGE_NX)
+-
+-#define PAGE_SHARED_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_RW |	\
+-					 _PAGE_USER | _PAGE_ACCESSED)
+-#define PAGE_COPY_NOEXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
+-					 _PAGE_ACCESSED | _PAGE_NX)
+-#define PAGE_COPY_EXEC		__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
+-					 _PAGE_ACCESSED)
+-#define PAGE_COPY		PAGE_COPY_NOEXEC
+-#define PAGE_READONLY		__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
+-					 _PAGE_ACCESSED | _PAGE_NX)
+-#define PAGE_READONLY_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
+-					 _PAGE_ACCESSED)
+-
+-#define __PAGE_KERNEL_EXEC						\
+-	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
+-#define __PAGE_KERNEL		(__PAGE_KERNEL_EXEC | _PAGE_NX)
+-
+-#define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
+-#define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
+-#define __PAGE_KERNEL_EXEC_NOCACHE	(__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
+-#define __PAGE_KERNEL_WC		(__PAGE_KERNEL | _PAGE_CACHE_WC)
+-#define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
+-#define __PAGE_KERNEL_UC_MINUS		(__PAGE_KERNEL | _PAGE_PCD)
+-#define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
+-#define __PAGE_KERNEL_VSYSCALL_NOCACHE	(__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
+-#define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
+-#define __PAGE_KERNEL_LARGE_NOCACHE	(__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
+-#define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
+-
+-#define __PAGE_KERNEL_IO		(__PAGE_KERNEL | _PAGE_IOMAP)
+-#define __PAGE_KERNEL_IO_NOCACHE	(__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
+-#define __PAGE_KERNEL_IO_UC_MINUS	(__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
+-#define __PAGE_KERNEL_IO_WC		(__PAGE_KERNEL_WC | _PAGE_IOMAP)
+-
+-#define PAGE_KERNEL			__pgprot(__PAGE_KERNEL)
+-#define PAGE_KERNEL_RO			__pgprot(__PAGE_KERNEL_RO)
+-#define PAGE_KERNEL_EXEC		__pgprot(__PAGE_KERNEL_EXEC)
+-#define PAGE_KERNEL_RX			__pgprot(__PAGE_KERNEL_RX)
+-#define PAGE_KERNEL_WC			__pgprot(__PAGE_KERNEL_WC)
+-#define PAGE_KERNEL_NOCACHE		__pgprot(__PAGE_KERNEL_NOCACHE)
+-#define PAGE_KERNEL_UC_MINUS		__pgprot(__PAGE_KERNEL_UC_MINUS)
+-#define PAGE_KERNEL_EXEC_NOCACHE	__pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
+-#define PAGE_KERNEL_LARGE		__pgprot(__PAGE_KERNEL_LARGE)
+-#define PAGE_KERNEL_LARGE_NOCACHE	__pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
+-#define PAGE_KERNEL_LARGE_EXEC		__pgprot(__PAGE_KERNEL_LARGE_EXEC)
+-#define PAGE_KERNEL_VSYSCALL		__pgprot(__PAGE_KERNEL_VSYSCALL)
+-#define PAGE_KERNEL_VSYSCALL_NOCACHE	__pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
+-
+-#define PAGE_KERNEL_IO			__pgprot(__PAGE_KERNEL_IO)
+-#define PAGE_KERNEL_IO_NOCACHE		__pgprot(__PAGE_KERNEL_IO_NOCACHE)
+-#define PAGE_KERNEL_IO_UC_MINUS		__pgprot(__PAGE_KERNEL_IO_UC_MINUS)
+-#define PAGE_KERNEL_IO_WC		__pgprot(__PAGE_KERNEL_IO_WC)
+-
+-/*         xwr */
+-#define __P000	PAGE_NONE
+-#define __P001	PAGE_READONLY
+-#define __P010	PAGE_COPY
+-#define __P011	PAGE_COPY
+-#define __P100	PAGE_READONLY_EXEC
+-#define __P101	PAGE_READONLY_EXEC
+-#define __P110	PAGE_COPY_EXEC
+-#define __P111	PAGE_COPY_EXEC
+-
+-#define __S000	PAGE_NONE
+-#define __S001	PAGE_READONLY
+-#define __S010	PAGE_SHARED
+-#define __S011	PAGE_SHARED
+-#define __S100	PAGE_READONLY_EXEC
+-#define __S101	PAGE_READONLY_EXEC
+-#define __S110	PAGE_SHARED_EXEC
+-#define __S111	PAGE_SHARED_EXEC
+-
+-/*
+- * early identity mapping  pte attrib macros.
+- */
+-#ifdef CONFIG_X86_64
+-#define __PAGE_KERNEL_IDENT_LARGE_EXEC	__PAGE_KERNEL_LARGE_EXEC
+-#else
+-/*
+- * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
+- * bits are combined, this will alow user to access the high address mapped
+- * VDSO in the presence of CONFIG_COMPAT_VDSO
+- */
+-#define PTE_IDENT_ATTR	 0x003		/* PRESENT+RW */
+-#define PDE_IDENT_ATTR	 0x067		/* PRESENT+RW+USER+DIRTY+ACCESSED */
+-#define PGD_IDENT_ATTR	 0x001		/* PRESENT (no other attributes) */
+-#endif
++#include <asm/pgtable_types.h>
+ 
+ /*
+  * Macro to mark a page protection value as UC-
+@@ -170,9 +15,6 @@
+ 
+ #ifndef __ASSEMBLY__
+ 
+-#define pgprot_writecombine	pgprot_writecombine
+-extern pgprot_t pgprot_writecombine(pgprot_t prot);
+-
+ /*
+  * ZERO_PAGE is a global shared page that is always zero: used
+  * for zero-mapped memory areas etc..
+@@ -183,6 +25,64 @@ extern unsigned long empty_zero_page[PAG
+ extern spinlock_t pgd_lock;
+ extern struct list_head pgd_list;
+ 
++#ifdef CONFIG_PARAVIRT
++#include <asm/paravirt.h>
++#else  /* !CONFIG_PARAVIRT */
++#define set_pte(ptep, pte)		native_set_pte(ptep, pte)
++#define set_pte_at(mm, addr, ptep, pte)	native_set_pte_at(mm, addr, ptep, pte)
++
++#define set_pte_atomic(ptep, pte)					\
++	native_set_pte_atomic(ptep, pte)
++
++#define set_pmd(pmdp, pmd)		native_set_pmd(pmdp, pmd)
++
++#ifndef __PAGETABLE_PUD_FOLDED
++#define set_pgd(pgdp, pgd)		native_set_pgd(pgdp, pgd)
++#define pgd_clear(pgd)			native_pgd_clear(pgd)
++#endif
++
++#ifndef set_pud
++# define set_pud(pudp, pud)		native_set_pud(pudp, pud)
++#endif
++
++#ifndef __PAGETABLE_PMD_FOLDED
++#define pud_clear(pud)			native_pud_clear(pud)
++#endif
++
++#define pte_clear(mm, addr, ptep)	native_pte_clear(mm, addr, ptep)
++#define pmd_clear(pmd)			native_pmd_clear(pmd)
++
++#define pte_update(mm, addr, ptep)              do { } while (0)
++#define pte_update_defer(mm, addr, ptep)        do { } while (0)
++
++static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
++{
++	native_pagetable_setup_start(base);
++}
++
++static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
++{
++	native_pagetable_setup_done(base);
++}
++
++#define pgd_val(x)	native_pgd_val(x)
++#define __pgd(x)	native_make_pgd(x)
++
++#ifndef __PAGETABLE_PUD_FOLDED
++#define pud_val(x)	native_pud_val(x)
++#define __pud(x)	native_make_pud(x)
++#endif
++
++#ifndef __PAGETABLE_PMD_FOLDED
++#define pmd_val(x)	native_pmd_val(x)
++#define __pmd(x)	native_make_pmd(x)
++#endif
++
++#define pte_val(x)	native_pte_val(x)
++#define __pte(x)	native_make_pte(x)
++
++#endif	/* CONFIG_PARAVIRT */
++
+ /*
+  * The following only work if pte_present() is true.
+  * Undefined behaviour if not..
+@@ -236,72 +136,84 @@ static inline unsigned long pte_pfn(pte_
+ 
+ static inline int pmd_large(pmd_t pte)
+ {
+-	return (pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
++	return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
+ 		(_PAGE_PSE | _PAGE_PRESENT);
+ }
+ 
++static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
++{
++	pteval_t v = native_pte_val(pte);
++
++	return native_make_pte(v | set);
++}
++
++static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
++{
++	pteval_t v = native_pte_val(pte);
++
++	return native_make_pte(v & ~clear);
++}
++
+ static inline pte_t pte_mkclean(pte_t pte)
+ {
+-	return __pte(pte_val(pte) & ~_PAGE_DIRTY);
++	return pte_clear_flags(pte, _PAGE_DIRTY);
+ }
+ 
+ static inline pte_t pte_mkold(pte_t pte)
+ {
+-	return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
++	return pte_clear_flags(pte, _PAGE_ACCESSED);
+ }
+ 
+ static inline pte_t pte_wrprotect(pte_t pte)
+ {
+-	return __pte(pte_val(pte) & ~_PAGE_RW);
++	return pte_clear_flags(pte, _PAGE_RW);
+ }
+ 
+ static inline pte_t pte_mkexec(pte_t pte)
+ {
+-	return __pte(pte_val(pte) & ~_PAGE_NX);
++	return pte_clear_flags(pte, _PAGE_NX);
+ }
+ 
+ static inline pte_t pte_mkdirty(pte_t pte)
+ {
+-	return __pte(pte_val(pte) | _PAGE_DIRTY);
++	return pte_set_flags(pte, _PAGE_DIRTY);
+ }
+ 
+ static inline pte_t pte_mkyoung(pte_t pte)
+ {
+-	return __pte(pte_val(pte) | _PAGE_ACCESSED);
++	return pte_set_flags(pte, _PAGE_ACCESSED);
+ }
+ 
+ static inline pte_t pte_mkwrite(pte_t pte)
+ {
+-	return __pte(pte_val(pte) | _PAGE_RW);
++	return pte_set_flags(pte, _PAGE_RW);
+ }
+ 
+ static inline pte_t pte_mkhuge(pte_t pte)
+ {
+-	return __pte(pte_val(pte) | _PAGE_PSE);
++	return pte_set_flags(pte, _PAGE_PSE);
+ }
+ 
+ static inline pte_t pte_clrhuge(pte_t pte)
+ {
+-	return __pte(pte_val(pte) & ~_PAGE_PSE);
++	return pte_clear_flags(pte, _PAGE_PSE);
+ }
+ 
+ static inline pte_t pte_mkglobal(pte_t pte)
+ {
+-	return __pte(pte_val(pte) | _PAGE_GLOBAL);
++	return pte_set_flags(pte, _PAGE_GLOBAL);
+ }
+ 
+ static inline pte_t pte_clrglobal(pte_t pte)
+ {
+-	return __pte(pte_val(pte) & ~_PAGE_GLOBAL);
++	return pte_clear_flags(pte, _PAGE_GLOBAL);
+ }
+ 
+ static inline pte_t pte_mkspecial(pte_t pte)
+ {
+-	return __pte(pte_val(pte) | _PAGE_SPECIAL);
++	return pte_set_flags(pte, _PAGE_SPECIAL);
+ }
+ 
+-extern pteval_t __supported_pte_mask;
+-
+ /*
+  * Mask out unsupported bits in a present pgprot.  Non-present pgprots
+  * can use those bits for other purposes, so leave them be.
+@@ -374,82 +286,202 @@ static inline int is_new_memtype_allowed
+ 	return 1;
+ }
+ 
+-#ifndef __ASSEMBLY__
+-/* Indicate that x86 has its own track and untrack pfn vma functions */
+-#define __HAVE_PFNMAP_TRACKING
+-
+-#define __HAVE_PHYS_MEM_ACCESS_PROT
+-struct file;
+-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
+-                              unsigned long size, pgprot_t vma_prot);
+-int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
+-                              unsigned long size, pgprot_t *vma_prot);
+-#endif
+-
+-/* Install a pte for a particular vaddr in kernel space. */
+-void set_pte_vaddr(unsigned long vaddr, pte_t pte);
++pmd_t *populate_extra_pmd(unsigned long vaddr);
++pte_t *populate_extra_pte(unsigned long vaddr);
++#endif	/* __ASSEMBLY__ */
+ 
+ #ifdef CONFIG_X86_32
+-extern void native_pagetable_setup_start(pgd_t *base);
+-extern void native_pagetable_setup_done(pgd_t *base);
++# include "pgtable_32.h"
+ #else
+-static inline void native_pagetable_setup_start(pgd_t *base) {}
+-static inline void native_pagetable_setup_done(pgd_t *base) {}
++# include "pgtable_64.h"
+ #endif
+ 
+-struct seq_file;
+-extern void arch_report_meminfo(struct seq_file *m);
++#ifndef __ASSEMBLY__
++#include <linux/mm_types.h>
+ 
+-#ifdef CONFIG_PARAVIRT
+-#include <asm/paravirt.h>
+-#else  /* !CONFIG_PARAVIRT */
+-#define set_pte(ptep, pte)		native_set_pte(ptep, pte)
+-#define set_pte_at(mm, addr, ptep, pte)	native_set_pte_at(mm, addr, ptep, pte)
++static inline int pte_none(pte_t pte)
++{
++	return !pte.pte;
++}
+ 
+-#define set_pte_present(mm, addr, ptep, pte)				\
+-	native_set_pte_present(mm, addr, ptep, pte)
+-#define set_pte_atomic(ptep, pte)					\
+-	native_set_pte_atomic(ptep, pte)
++#define __HAVE_ARCH_PTE_SAME
++static inline int pte_same(pte_t a, pte_t b)
++{
++	return a.pte == b.pte;
++}
+ 
+-#define set_pmd(pmdp, pmd)		native_set_pmd(pmdp, pmd)
++static inline int pte_present(pte_t a)
++{
++	return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
++}
+ 
+-#ifndef __PAGETABLE_PUD_FOLDED
+-#define set_pgd(pgdp, pgd)		native_set_pgd(pgdp, pgd)
+-#define pgd_clear(pgd)			native_pgd_clear(pgd)
+-#endif
++static inline int pmd_present(pmd_t pmd)
++{
++	return pmd_flags(pmd) & _PAGE_PRESENT;
++}
+ 
+-#ifndef set_pud
+-# define set_pud(pudp, pud)		native_set_pud(pudp, pud)
+-#endif
++static inline int pmd_none(pmd_t pmd)
++{
++	/* Only check low word on 32-bit platforms, since it might be
++	   out of sync with upper half. */
++	return (unsigned long)native_pmd_val(pmd) == 0;
++}
+ 
+-#ifndef __PAGETABLE_PMD_FOLDED
+-#define pud_clear(pud)			native_pud_clear(pud)
+-#endif
++static inline unsigned long pmd_page_vaddr(pmd_t pmd)
++{
++	return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK);
++}
+ 
+-#define pte_clear(mm, addr, ptep)	native_pte_clear(mm, addr, ptep)
+-#define pmd_clear(pmd)			native_pmd_clear(pmd)
++/*
++ * Currently stuck as a macro due to indirect forward reference to
++ * linux/mmzone.h's __section_mem_map_addr() definition:
++ */
++#define pmd_page(pmd)	pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
+ 
+-#define pte_update(mm, addr, ptep)              do { } while (0)
+-#define pte_update_defer(mm, addr, ptep)        do { } while (0)
++/*
++ * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
++ *
++ * this macro returns the index of the entry in the pmd page which would
++ * control the given virtual address
++ */
++static inline unsigned pmd_index(unsigned long address)
++{
++	return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
++}
+ 
+-static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
++/*
++ * Conversion functions: convert a page and protection to a page entry,
++ * and a page entry and page directory to the page they refer to.
++ *
++ * (Currently stuck as a macro because of indirect forward reference
++ * to linux/mm.h:page_to_nid())
++ */
++#define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
++
++/*
++ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
++ *
++ * this function returns the index of the entry in the pte page which would
++ * control the given virtual address
++ */
++static inline unsigned pte_index(unsigned long address)
+ {
+-	native_pagetable_setup_start(base);
++	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
+ }
+ 
+-static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
++static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
+ {
+-	native_pagetable_setup_done(base);
++	return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
+ }
+-#endif	/* CONFIG_PARAVIRT */
+ 
+-#endif	/* __ASSEMBLY__ */
++static inline int pmd_bad(pmd_t pmd)
++{
++	return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
++}
+ 
+-#ifdef CONFIG_X86_32
+-# include "pgtable_32.h"
++static inline unsigned long pages_to_mb(unsigned long npg)
++{
++	return npg >> (20 - PAGE_SHIFT);
++}
++
++#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
++	remap_pfn_range(vma, vaddr, pfn, size, prot)
++
++#if PAGETABLE_LEVELS > 2
++static inline int pud_none(pud_t pud)
++{
++	return native_pud_val(pud) == 0;
++}
++
++static inline int pud_present(pud_t pud)
++{
++	return pud_flags(pud) & _PAGE_PRESENT;
++}
++
++static inline unsigned long pud_page_vaddr(pud_t pud)
++{
++	return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK);
++}
++
++/*
++ * Currently stuck as a macro due to indirect forward reference to
++ * linux/mmzone.h's __section_mem_map_addr() definition:
++ */
++#define pud_page(pud)		pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
++
++/* Find an entry in the second-level page table.. */
++static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
++{
++	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
++}
++
++static inline unsigned long pmd_pfn(pmd_t pmd)
++{
++	return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
++}
++
++static inline int pud_large(pud_t pud)
++{
++	return (pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
++		(_PAGE_PSE | _PAGE_PRESENT);
++}
++
++static inline int pud_bad(pud_t pud)
++{
++	return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
++}
+ #else
+-# include "pgtable_64.h"
+-#endif
++static inline int pud_large(pud_t pud)
++{
++	return 0;
++}
++#endif	/* PAGETABLE_LEVELS > 2 */
++
++#if PAGETABLE_LEVELS > 3
++static inline int pgd_present(pgd_t pgd)
++{
++	return pgd_flags(pgd) & _PAGE_PRESENT;
++}
++
++static inline unsigned long pgd_page_vaddr(pgd_t pgd)
++{
++	return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK);
++}
++
++/*
++ * Currently stuck as a macro due to indirect forward reference to
++ * linux/mmzone.h's __section_mem_map_addr() definition:
++ */
++#define pgd_page(pgd)		pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
++
++/* to find an entry in a page-table-directory. */
++static inline unsigned pud_index(unsigned long address)
++{
++	return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
++}
++
++static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
++{
++	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
++}
++
++static inline int pgd_bad(pgd_t pgd)
++{
++	return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
++}
++
++static inline int pgd_none(pgd_t pgd)
++{
++	return !native_pgd_val(pgd);
++}
++#endif	/* PAGETABLE_LEVELS > 3 */
++
++static inline int pte_hidden(pte_t pte)
++{
++	return pte_flags(pte) & _PAGE_HIDDEN;
++}
++
++#endif	/* __ASSEMBLY__ */
+ 
+ /*
+  * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
+@@ -476,28 +508,6 @@ static inline void __init paravirt_paget
+ 
+ #ifndef __ASSEMBLY__
+ 
+-enum {
+-	PG_LEVEL_NONE,
+-	PG_LEVEL_4K,
+-	PG_LEVEL_2M,
+-	PG_LEVEL_1G,
+-	PG_LEVEL_NUM
+-};
+-
+-#ifdef CONFIG_PROC_FS
+-extern void update_page_count(int level, unsigned long pages);
+-#else
+-static inline void update_page_count(int level, unsigned long pages) { }
+-#endif
+-
+-/*
+- * Helper function that returns the kernel pagetable entry controlling
+- * the virtual address 'address'. NULL means no pagetable entry present.
+- * NOTE: the return type is pte_t but if the pmd is PSE then we return it
+- * as a pte too.
+- */
+-extern pte_t *lookup_address(unsigned long address, unsigned int *level);
+-
+ /* local pte updates need not use xchg for locking */
+ static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep)
+ {
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pgtable_32.h
++++ linux-2.6-tip/arch/x86/include/asm/pgtable_32.h
+@@ -1,6 +1,7 @@
+ #ifndef _ASM_X86_PGTABLE_32_H
+ #define _ASM_X86_PGTABLE_32_H
+ 
++#include <asm/pgtable_32_types.h>
+ 
+ /*
+  * The Linux memory management assumes a three-level page table setup. On
+@@ -33,47 +34,6 @@ void paging_init(void);
+ 
+ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
+ 
+-/*
+- * The Linux x86 paging architecture is 'compile-time dual-mode', it
+- * implements both the traditional 2-level x86 page tables and the
+- * newer 3-level PAE-mode page tables.
+- */
+-#ifdef CONFIG_X86_PAE
+-# include <asm/pgtable-3level-defs.h>
+-# define PMD_SIZE	(1UL << PMD_SHIFT)
+-# define PMD_MASK	(~(PMD_SIZE - 1))
+-#else
+-# include <asm/pgtable-2level-defs.h>
+-#endif
+-
+-#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
+-#define PGDIR_MASK	(~(PGDIR_SIZE - 1))
+-
+-/* Just any arbitrary offset to the start of the vmalloc VM area: the
+- * current 8MB value just means that there will be a 8MB "hole" after the
+- * physical memory until the kernel virtual memory starts.  That means that
+- * any out-of-bounds memory accesses will hopefully be caught.
+- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
+- * area for the same reason. ;)
+- */
+-#define VMALLOC_OFFSET	(8 * 1024 * 1024)
+-#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
+-#ifdef CONFIG_X86_PAE
+-#define LAST_PKMAP 512
+-#else
+-#define LAST_PKMAP 1024
+-#endif
+-
+-#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1))	\
+-		    & PMD_MASK)
+-
+-#ifdef CONFIG_HIGHMEM
+-# define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
+-#else
+-# define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
+-#endif
+-
+-#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
+ 
+ /*
+  * Define this if things work differently on an i386 and an i486:
+@@ -82,58 +42,12 @@ extern void set_pmd_pfn(unsigned long, u
+  */
+ #undef TEST_ACCESS_OK
+ 
+-/* The boot page tables (all created as a single array) */
+-extern unsigned long pg0[];
+-
+-#define pte_present(x)	((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
+-
+-/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
+-#define pmd_none(x)	(!(unsigned long)pmd_val((x)))
+-#define pmd_present(x)	(pmd_val((x)) & _PAGE_PRESENT)
+-#define pmd_bad(x) ((pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+-
+-#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+-
+ #ifdef CONFIG_X86_PAE
+ # include <asm/pgtable-3level.h>
+ #else
+ # include <asm/pgtable-2level.h>
+ #endif
+ 
+-/*
+- * Conversion functions: convert a page and protection to a page entry,
+- * and a page entry and page directory to the page they refer to.
+- */
+-#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn(page), (pgprot))
+-
+-
+-static inline int pud_large(pud_t pud) { return 0; }
+-
+-/*
+- * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
+- *
+- * this macro returns the index of the entry in the pmd page which would
+- * control the given virtual address
+- */
+-#define pmd_index(address)				\
+-	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+-
+-/*
+- * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
+- *
+- * this macro returns the index of the entry in the pte page which would
+- * control the given virtual address
+- */
+-#define pte_index(address)					\
+-	(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+-#define pte_offset_kernel(dir, address)				\
+-	((pte_t *)pmd_page_vaddr(*(dir)) +  pte_index((address)))
+-
+-#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
+-
+-#define pmd_page_vaddr(pmd)					\
+-	((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK))
+-
+ #if defined(CONFIG_HIGHPTE)
+ #define pte_offset_map(dir, address)					\
+ 	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) +		\
+@@ -141,14 +55,20 @@ static inline int pud_large(pud_t pud) {
+ #define pte_offset_map_nested(dir, address)				\
+ 	((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) +		\
+ 	 pte_index((address)))
++#define pte_offset_map_direct(dir, address)				\
++	((pte_t *)kmap_atomic_pte_direct(pmd_page(*(dir)), KM_PTE0) +	\
++	 pte_index((address)))
+ #define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
+ #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
++#define pte_unmap_direct(pte) kunmap_atomic_direct((pte), KM_PTE0)
+ #else
+ #define pte_offset_map(dir, address)					\
+ 	((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
+ #define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
++#define pte_offset_map_direct(dir, address) pte_offset_map((dir), (address))
+ #define pte_unmap(pte) do { } while (0)
+ #define pte_unmap_nested(pte) do { } while (0)
++#define pte_unmap_direct(pte) do { } while (0)
+ #endif
+ 
+ /* Clear a kernel PTE and flush it from the TLB */
+@@ -176,7 +96,4 @@ do {						\
+ #define kern_addr_valid(kaddr)	(0)
+ #endif
+ 
+-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
+-	remap_pfn_range(vma, vaddr, pfn, size, prot)
+-
+ #endif /* _ASM_X86_PGTABLE_32_H */
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable_32_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/pgtable_32_types.h
+@@ -0,0 +1,51 @@
++#ifndef _ASM_X86_PGTABLE_32_DEFS_H
++#define _ASM_X86_PGTABLE_32_DEFS_H
++
++/*
++ * The Linux x86 paging architecture is 'compile-time dual-mode', it
++ * implements both the traditional 2-level x86 page tables and the
++ * newer 3-level PAE-mode page tables.
++ */
++#ifdef CONFIG_X86_PAE
++# include <asm/pgtable-3level_types.h>
++# define PMD_SIZE	(1UL << PMD_SHIFT)
++# define PMD_MASK	(~(PMD_SIZE - 1))
++#else
++# include <asm/pgtable-2level_types.h>
++#endif
++
++#define PGDIR_SIZE	(1UL << PGDIR_SHIFT)
++#define PGDIR_MASK	(~(PGDIR_SIZE - 1))
++
++/* Just any arbitrary offset to the start of the vmalloc VM area: the
++ * current 8MB value just means that there will be a 8MB "hole" after the
++ * physical memory until the kernel virtual memory starts.  That means that
++ * any out-of-bounds memory accesses will hopefully be caught.
++ * The vmalloc() routines leaves a hole of 4kB between each vmalloced
++ * area for the same reason. ;)
++ */
++#define VMALLOC_OFFSET	(8 * 1024 * 1024)
++
++#ifndef __ASSEMBLER__
++extern bool __vmalloc_start_set; /* set once high_memory is set */
++#endif
++
++#define VMALLOC_START	((unsigned long)high_memory + VMALLOC_OFFSET)
++#ifdef CONFIG_X86_PAE
++#define LAST_PKMAP 512
++#else
++#define LAST_PKMAP 1024
++#endif
++
++#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1))	\
++		    & PMD_MASK)
++
++#ifdef CONFIG_HIGHMEM
++# define VMALLOC_END	(PKMAP_BASE - 2 * PAGE_SIZE)
++#else
++# define VMALLOC_END	(FIXADDR_START - 2 * PAGE_SIZE)
++#endif
++
++#define MAXMEM	(VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
++
++#endif /* _ASM_X86_PGTABLE_32_DEFS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pgtable_64.h
++++ linux-2.6-tip/arch/x86/include/asm/pgtable_64.h
+@@ -2,6 +2,8 @@
+ #define _ASM_X86_PGTABLE_64_H
+ 
+ #include <linux/const.h>
++#include <asm/pgtable_64_types.h>
++
+ #ifndef __ASSEMBLY__
+ 
+ /*
+@@ -11,7 +13,6 @@
+ #include <asm/processor.h>
+ #include <linux/bitops.h>
+ #include <linux/threads.h>
+-#include <asm/pda.h>
+ 
+ extern pud_t level3_kernel_pgt[512];
+ extern pud_t level3_ident_pgt[512];
+@@ -26,32 +27,6 @@ extern void paging_init(void);
+ 
+ #endif /* !__ASSEMBLY__ */
+ 
+-#define SHARED_KERNEL_PMD	0
+-
+-/*
+- * PGDIR_SHIFT determines what a top-level page table entry can map
+- */
+-#define PGDIR_SHIFT	39
+-#define PTRS_PER_PGD	512
+-
+-/*
+- * 3rd level page
+- */
+-#define PUD_SHIFT	30
+-#define PTRS_PER_PUD	512
+-
+-/*
+- * PMD_SHIFT determines the size of the area a middle-level
+- * page table can map
+- */
+-#define PMD_SHIFT	21
+-#define PTRS_PER_PMD	512
+-
+-/*
+- * entries per page directory level
+- */
+-#define PTRS_PER_PTE	512
+-
+ #ifndef __ASSEMBLY__
+ 
+ #define pte_ERROR(e)					\
+@@ -67,9 +42,6 @@ extern void paging_init(void);
+ 	printk("%s:%d: bad pgd %p(%016lx).\n",		\
+ 	       __FILE__, __LINE__, &(e), pgd_val(e))
+ 
+-#define pgd_none(x)	(!pgd_val(x))
+-#define pud_none(x)	(!pud_val(x))
+-
+ struct mm_struct;
+ 
+ void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
+@@ -134,48 +106,6 @@ static inline void native_pgd_clear(pgd_
+ 	native_set_pgd(pgd, native_make_pgd(0));
+ }
+ 
+-#define pte_same(a, b)		((a).pte == (b).pte)
+-
+-#endif /* !__ASSEMBLY__ */
+-
+-#define PMD_SIZE	(_AC(1, UL) << PMD_SHIFT)
+-#define PMD_MASK	(~(PMD_SIZE - 1))
+-#define PUD_SIZE	(_AC(1, UL) << PUD_SHIFT)
+-#define PUD_MASK	(~(PUD_SIZE - 1))
+-#define PGDIR_SIZE	(_AC(1, UL) << PGDIR_SHIFT)
+-#define PGDIR_MASK	(~(PGDIR_SIZE - 1))
+-
+-
+-#define MAXMEM		 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
+-#define VMALLOC_START    _AC(0xffffc20000000000, UL)
+-#define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
+-#define VMEMMAP_START	 _AC(0xffffe20000000000, UL)
+-#define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
+-#define MODULES_END      _AC(0xffffffffff000000, UL)
+-#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
+-
+-#ifndef __ASSEMBLY__
+-
+-static inline int pgd_bad(pgd_t pgd)
+-{
+-	return (pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
+-}
+-
+-static inline int pud_bad(pud_t pud)
+-{
+-	return (pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
+-}
+-
+-static inline int pmd_bad(pmd_t pmd)
+-{
+-	return (pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
+-}
+-
+-#define pte_none(x)	(!pte_val((x)))
+-#define pte_present(x)	(pte_val((x)) & (_PAGE_PRESENT | _PAGE_PROTNONE))
+-
+-#define pages_to_mb(x)	((x) >> (20 - PAGE_SHIFT))   /* FIXME: is this right? */
+-
+ /*
+  * Conversion functions: convert a page and protection to a page entry,
+  * and a page entry and page directory to the page they refer to.
+@@ -184,41 +114,12 @@ static inline int pmd_bad(pmd_t pmd)
+ /*
+  * Level 4 access.
+  */
+-#define pgd_page_vaddr(pgd)						\
+-	((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK))
+-#define pgd_page(pgd)		(pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
+-#define pgd_present(pgd) (pgd_val(pgd) & _PAGE_PRESENT)
+ static inline int pgd_large(pgd_t pgd) { return 0; }
+ #define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
+ 
+ /* PUD - Level3 access */
+-/* to find an entry in a page-table-directory. */
+-#define pud_page_vaddr(pud)						\
+-	((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK))
+-#define pud_page(pud)	(pfn_to_page(pud_val((pud)) >> PAGE_SHIFT))
+-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+-#define pud_offset(pgd, address)					\
+-	((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address)))
+-#define pud_present(pud) (pud_val((pud)) & _PAGE_PRESENT)
+-
+-static inline int pud_large(pud_t pte)
+-{
+-	return (pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
+-		(_PAGE_PSE | _PAGE_PRESENT);
+-}
+ 
+ /* PMD  - Level 2 access */
+-#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK))
+-#define pmd_page(pmd)		(pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
+-
+-#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+-#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \
+-				  pmd_index(address))
+-#define pmd_none(x)	(!pmd_val((x)))
+-#define pmd_present(x)	(pmd_val((x)) & _PAGE_PRESENT)
+-#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot))))
+-#define pmd_pfn(x)  ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
+-
+ #define pte_to_pgoff(pte) ((pte_val((pte)) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+ #define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) |	\
+ 					    _PAGE_FILE })
+@@ -226,18 +127,13 @@ static inline int pud_large(pud_t pte)
+ 
+ /* PTE - Level 1 access. */
+ 
+-/* page, protection -> pte */
+-#define mk_pte(page, pgprot)	pfn_pte(page_to_pfn((page)), (pgprot))
+-
+-#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
+-					 pte_index((address)))
+-
+ /* x86-64 always has all page tables mapped. */
+ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
+ #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
+-#define pte_unmap(pte) /* NOP */
+-#define pte_unmap_nested(pte) /* NOP */
++#define pte_offset_map_direct(dir, address) pte_offset_kernel((dir), (address))
++#define pte_unmap(pte) do { } while (0)
++#define pte_unmap_nested(pte) do { }  while (0)
++#define pte_unmap_direct(pte) do { }  while (0)
+ 
+ #define update_mmu_cache(vma, address, pte) do { } while (0)
+ 
+@@ -266,9 +162,6 @@ extern int direct_gbpages;
+ extern int kern_addr_valid(unsigned long addr);
+ extern void cleanup_highmap(void);
+ 
+-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot)	\
+-	remap_pfn_range(vma, vaddr, pfn, size, prot)
+-
+ #define HAVE_ARCH_UNMAPPED_AREA
+ #define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable_64_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/pgtable_64_types.h
+@@ -0,0 +1,63 @@
++#ifndef _ASM_X86_PGTABLE_64_DEFS_H
++#define _ASM_X86_PGTABLE_64_DEFS_H
++
++#ifndef __ASSEMBLY__
++#include <linux/types.h>
++
++/*
++ * These are used to make use of C type-checking..
++ */
++typedef unsigned long	pteval_t;
++typedef unsigned long	pmdval_t;
++typedef unsigned long	pudval_t;
++typedef unsigned long	pgdval_t;
++typedef unsigned long	pgprotval_t;
++
++typedef struct { pteval_t pte; } pte_t;
++
++#endif	/* !__ASSEMBLY__ */
++
++#define SHARED_KERNEL_PMD	0
++#define PAGETABLE_LEVELS	4
++
++/*
++ * PGDIR_SHIFT determines what a top-level page table entry can map
++ */
++#define PGDIR_SHIFT	39
++#define PTRS_PER_PGD	512
++
++/*
++ * 3rd level page
++ */
++#define PUD_SHIFT	30
++#define PTRS_PER_PUD	512
++
++/*
++ * PMD_SHIFT determines the size of the area a middle-level
++ * page table can map
++ */
++#define PMD_SHIFT	21
++#define PTRS_PER_PMD	512
++
++/*
++ * entries per page directory level
++ */
++#define PTRS_PER_PTE	512
++
++#define PMD_SIZE	(_AC(1, UL) << PMD_SHIFT)
++#define PMD_MASK	(~(PMD_SIZE - 1))
++#define PUD_SIZE	(_AC(1, UL) << PUD_SHIFT)
++#define PUD_MASK	(~(PUD_SIZE - 1))
++#define PGDIR_SIZE	(_AC(1, UL) << PGDIR_SHIFT)
++#define PGDIR_MASK	(~(PGDIR_SIZE - 1))
++
++
++#define MAXMEM		 _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
++#define VMALLOC_START    _AC(0xffffc20000000000, UL)
++#define VMALLOC_END      _AC(0xffffe1ffffffffff, UL)
++#define VMEMMAP_START	 _AC(0xffffe20000000000, UL)
++#define MODULES_VADDR    _AC(0xffffffffa0000000, UL)
++#define MODULES_END      _AC(0xffffffffff000000, UL)
++#define MODULES_LEN   (MODULES_END - MODULES_VADDR)
++
++#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/pgtable_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/pgtable_types.h
+@@ -0,0 +1,334 @@
++#ifndef _ASM_X86_PGTABLE_DEFS_H
++#define _ASM_X86_PGTABLE_DEFS_H
++
++#include <linux/const.h>
++#include <asm/page_types.h>
++
++#define FIRST_USER_ADDRESS	0
++
++#define _PAGE_BIT_PRESENT	0	/* is present */
++#define _PAGE_BIT_RW		1	/* writeable */
++#define _PAGE_BIT_USER		2	/* userspace addressable */
++#define _PAGE_BIT_PWT		3	/* page write through */
++#define _PAGE_BIT_PCD		4	/* page cache disabled */
++#define _PAGE_BIT_ACCESSED	5	/* was accessed (raised by CPU) */
++#define _PAGE_BIT_DIRTY		6	/* was written to (raised by CPU) */
++#define _PAGE_BIT_PSE		7	/* 4 MB (or 2MB) page */
++#define _PAGE_BIT_PAT		7	/* on 4KB pages */
++#define _PAGE_BIT_GLOBAL	8	/* Global TLB entry PPro+ */
++#define _PAGE_BIT_UNUSED1	9	/* available for programmer */
++#define _PAGE_BIT_IOMAP		10	/* flag used to indicate IO mapping */
++#define _PAGE_BIT_HIDDEN	11	/* hidden by kmemcheck */
++#define _PAGE_BIT_PAT_LARGE	12	/* On 2MB or 1GB pages */
++#define _PAGE_BIT_SPECIAL	_PAGE_BIT_UNUSED1
++#define _PAGE_BIT_CPA_TEST	_PAGE_BIT_UNUSED1
++#define _PAGE_BIT_NX           63       /* No execute: only valid after cpuid check */
++
++/* If _PAGE_BIT_PRESENT is clear, we use these: */
++/* - if the user mapped it with PROT_NONE; pte_present gives true */
++#define _PAGE_BIT_PROTNONE	_PAGE_BIT_GLOBAL
++/* - set: nonlinear file mapping, saved PTE; unset:swap */
++#define _PAGE_BIT_FILE		_PAGE_BIT_DIRTY
++
++#define _PAGE_PRESENT	(_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
++#define _PAGE_RW	(_AT(pteval_t, 1) << _PAGE_BIT_RW)
++#define _PAGE_USER	(_AT(pteval_t, 1) << _PAGE_BIT_USER)
++#define _PAGE_PWT	(_AT(pteval_t, 1) << _PAGE_BIT_PWT)
++#define _PAGE_PCD	(_AT(pteval_t, 1) << _PAGE_BIT_PCD)
++#define _PAGE_ACCESSED	(_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
++#define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
++#define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)
++#define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#define _PAGE_UNUSED1	(_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
++#define _PAGE_IOMAP	(_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
++#define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
++#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
++#define _PAGE_SPECIAL	(_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
++#define _PAGE_CPA_TEST	(_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
++#define __HAVE_ARCH_PTE_SPECIAL
++
++#ifdef CONFIG_KMEMCHECK
++#define _PAGE_HIDDEN	(_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
++#else
++#define _PAGE_HIDDEN	(_AT(pteval_t, 0))
++#endif
++
++#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
++#define _PAGE_NX	(_AT(pteval_t, 1) << _PAGE_BIT_NX)
++#else
++#define _PAGE_NX	(_AT(pteval_t, 0))
++#endif
++
++#define _PAGE_FILE	(_AT(pteval_t, 1) << _PAGE_BIT_FILE)
++#define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++
++#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
++			 _PAGE_ACCESSED | _PAGE_DIRTY)
++#define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\
++			 _PAGE_DIRTY)
++
++/* Set of bits not changed in pte_modify */
++#define _PAGE_CHG_MASK	(PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT |		\
++			 _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
++
++#define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT)
++#define _PAGE_CACHE_WB		(0)
++#define _PAGE_CACHE_WC		(_PAGE_PWT)
++#define _PAGE_CACHE_UC_MINUS	(_PAGE_PCD)
++#define _PAGE_CACHE_UC		(_PAGE_PCD | _PAGE_PWT)
++
++#define PAGE_NONE	__pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
++#define PAGE_SHARED	__pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
++				 _PAGE_ACCESSED | _PAGE_NX)
++
++#define PAGE_SHARED_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_RW |	\
++					 _PAGE_USER | _PAGE_ACCESSED)
++#define PAGE_COPY_NOEXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
++					 _PAGE_ACCESSED | _PAGE_NX)
++#define PAGE_COPY_EXEC		__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
++					 _PAGE_ACCESSED)
++#define PAGE_COPY		PAGE_COPY_NOEXEC
++#define PAGE_READONLY		__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
++					 _PAGE_ACCESSED | _PAGE_NX)
++#define PAGE_READONLY_EXEC	__pgprot(_PAGE_PRESENT | _PAGE_USER |	\
++					 _PAGE_ACCESSED)
++
++#define __PAGE_KERNEL_EXEC						\
++	(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_GLOBAL)
++#define __PAGE_KERNEL		(__PAGE_KERNEL_EXEC | _PAGE_NX)
++
++#define __PAGE_KERNEL_RO		(__PAGE_KERNEL & ~_PAGE_RW)
++#define __PAGE_KERNEL_RX		(__PAGE_KERNEL_EXEC & ~_PAGE_RW)
++#define __PAGE_KERNEL_EXEC_NOCACHE	(__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
++#define __PAGE_KERNEL_WC		(__PAGE_KERNEL | _PAGE_CACHE_WC)
++#define __PAGE_KERNEL_NOCACHE		(__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
++#define __PAGE_KERNEL_UC_MINUS		(__PAGE_KERNEL | _PAGE_PCD)
++#define __PAGE_KERNEL_VSYSCALL		(__PAGE_KERNEL_RX | _PAGE_USER)
++#define __PAGE_KERNEL_VSYSCALL_NOCACHE	(__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
++#define __PAGE_KERNEL_LARGE		(__PAGE_KERNEL | _PAGE_PSE)
++#define __PAGE_KERNEL_LARGE_NOCACHE	(__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
++#define __PAGE_KERNEL_LARGE_EXEC	(__PAGE_KERNEL_EXEC | _PAGE_PSE)
++
++#define __PAGE_KERNEL_IO		(__PAGE_KERNEL | _PAGE_IOMAP)
++#define __PAGE_KERNEL_IO_NOCACHE	(__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
++#define __PAGE_KERNEL_IO_UC_MINUS	(__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
++#define __PAGE_KERNEL_IO_WC		(__PAGE_KERNEL_WC | _PAGE_IOMAP)
++
++#define PAGE_KERNEL			__pgprot(__PAGE_KERNEL)
++#define PAGE_KERNEL_RO			__pgprot(__PAGE_KERNEL_RO)
++#define PAGE_KERNEL_EXEC		__pgprot(__PAGE_KERNEL_EXEC)
++#define PAGE_KERNEL_RX			__pgprot(__PAGE_KERNEL_RX)
++#define PAGE_KERNEL_WC			__pgprot(__PAGE_KERNEL_WC)
++#define PAGE_KERNEL_NOCACHE		__pgprot(__PAGE_KERNEL_NOCACHE)
++#define PAGE_KERNEL_UC_MINUS		__pgprot(__PAGE_KERNEL_UC_MINUS)
++#define PAGE_KERNEL_EXEC_NOCACHE	__pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
++#define PAGE_KERNEL_LARGE		__pgprot(__PAGE_KERNEL_LARGE)
++#define PAGE_KERNEL_LARGE_NOCACHE	__pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
++#define PAGE_KERNEL_LARGE_EXEC		__pgprot(__PAGE_KERNEL_LARGE_EXEC)
++#define PAGE_KERNEL_VSYSCALL		__pgprot(__PAGE_KERNEL_VSYSCALL)
++#define PAGE_KERNEL_VSYSCALL_NOCACHE	__pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
++
++#define PAGE_KERNEL_IO			__pgprot(__PAGE_KERNEL_IO)
++#define PAGE_KERNEL_IO_NOCACHE		__pgprot(__PAGE_KERNEL_IO_NOCACHE)
++#define PAGE_KERNEL_IO_UC_MINUS		__pgprot(__PAGE_KERNEL_IO_UC_MINUS)
++#define PAGE_KERNEL_IO_WC		__pgprot(__PAGE_KERNEL_IO_WC)
++
++/*         xwr */
++#define __P000	PAGE_NONE
++#define __P001	PAGE_READONLY
++#define __P010	PAGE_COPY
++#define __P011	PAGE_COPY
++#define __P100	PAGE_READONLY_EXEC
++#define __P101	PAGE_READONLY_EXEC
++#define __P110	PAGE_COPY_EXEC
++#define __P111	PAGE_COPY_EXEC
++
++#define __S000	PAGE_NONE
++#define __S001	PAGE_READONLY
++#define __S010	PAGE_SHARED
++#define __S011	PAGE_SHARED
++#define __S100	PAGE_READONLY_EXEC
++#define __S101	PAGE_READONLY_EXEC
++#define __S110	PAGE_SHARED_EXEC
++#define __S111	PAGE_SHARED_EXEC
++
++/*
++ * early identity mapping  pte attrib macros.
++ */
++#ifdef CONFIG_X86_64
++#define __PAGE_KERNEL_IDENT_LARGE_EXEC	__PAGE_KERNEL_LARGE_EXEC
++#else
++/*
++ * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
++ * bits are combined, this will alow user to access the high address mapped
++ * VDSO in the presence of CONFIG_COMPAT_VDSO
++ */
++#define PTE_IDENT_ATTR	 0x003		/* PRESENT+RW */
++#define PDE_IDENT_ATTR	 0x067		/* PRESENT+RW+USER+DIRTY+ACCESSED */
++#define PGD_IDENT_ATTR	 0x001		/* PRESENT (no other attributes) */
++#endif
++
++#ifdef CONFIG_X86_32
++# include "pgtable_32_types.h"
++#else
++# include "pgtable_64_types.h"
++#endif
++
++#ifndef __ASSEMBLY__
++
++#include <linux/types.h>
++
++/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
++#define PTE_PFN_MASK		((pteval_t)PHYSICAL_PAGE_MASK)
++
++/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
++#define PTE_FLAGS_MASK		(~PTE_PFN_MASK)
++
++typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
++
++typedef struct { pgdval_t pgd; } pgd_t;
++
++static inline pgd_t native_make_pgd(pgdval_t val)
++{
++	return (pgd_t) { val };
++}
++
++static inline pgdval_t native_pgd_val(pgd_t pgd)
++{
++	return pgd.pgd;
++}
++
++static inline pgdval_t pgd_flags(pgd_t pgd)
++{
++	return native_pgd_val(pgd) & PTE_FLAGS_MASK;
++}
++
++#if PAGETABLE_LEVELS > 3
++typedef struct { pudval_t pud; } pud_t;
++
++static inline pud_t native_make_pud(pmdval_t val)
++{
++	return (pud_t) { val };
++}
++
++static inline pudval_t native_pud_val(pud_t pud)
++{
++	return pud.pud;
++}
++#else
++#include <asm-generic/pgtable-nopud.h>
++
++static inline pudval_t native_pud_val(pud_t pud)
++{
++	return native_pgd_val(pud.pgd);
++}
++#endif
++
++#if PAGETABLE_LEVELS > 2
++typedef struct { pmdval_t pmd; } pmd_t;
++
++static inline pmd_t native_make_pmd(pmdval_t val)
++{
++	return (pmd_t) { val };
++}
++
++static inline pmdval_t native_pmd_val(pmd_t pmd)
++{
++	return pmd.pmd;
++}
++#else
++#include <asm-generic/pgtable-nopmd.h>
++
++static inline pmdval_t native_pmd_val(pmd_t pmd)
++{
++	return native_pgd_val(pmd.pud.pgd);
++}
++#endif
++
++static inline pudval_t pud_flags(pud_t pud)
++{
++	return native_pud_val(pud) & PTE_FLAGS_MASK;
++}
++
++static inline pmdval_t pmd_flags(pmd_t pmd)
++{
++	return native_pmd_val(pmd) & PTE_FLAGS_MASK;
++}
++
++static inline pte_t native_make_pte(pteval_t val)
++{
++	return (pte_t) { .pte = val };
++}
++
++static inline pteval_t native_pte_val(pte_t pte)
++{
++	return pte.pte;
++}
++
++static inline pteval_t pte_flags(pte_t pte)
++{
++	return native_pte_val(pte) & PTE_FLAGS_MASK;
++}
++
++#define pgprot_val(x)	((x).pgprot)
++#define __pgprot(x)	((pgprot_t) { (x) } )
++
++
++typedef struct page *pgtable_t;
++
++extern pteval_t __supported_pte_mask;
++extern int nx_enabled;
++extern void set_nx(void);
++
++#define pgprot_writecombine	pgprot_writecombine
++extern pgprot_t pgprot_writecombine(pgprot_t prot);
++
++/* Indicate that x86 has its own track and untrack pfn vma functions */
++#define __HAVE_PFNMAP_TRACKING
++
++#define __HAVE_PHYS_MEM_ACCESS_PROT
++struct file;
++pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
++                              unsigned long size, pgprot_t vma_prot);
++int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
++                              unsigned long size, pgprot_t *vma_prot);
++
++/* Install a pte for a particular vaddr in kernel space. */
++void set_pte_vaddr(unsigned long vaddr, pte_t pte);
++
++#ifdef CONFIG_X86_32
++extern void native_pagetable_setup_start(pgd_t *base);
++extern void native_pagetable_setup_done(pgd_t *base);
++#else
++static inline void native_pagetable_setup_start(pgd_t *base) {}
++static inline void native_pagetable_setup_done(pgd_t *base) {}
++#endif
++
++struct seq_file;
++extern void arch_report_meminfo(struct seq_file *m);
++
++enum {
++	PG_LEVEL_NONE,
++	PG_LEVEL_4K,
++	PG_LEVEL_2M,
++	PG_LEVEL_1G,
++	PG_LEVEL_NUM
++};
++
++#ifdef CONFIG_PROC_FS
++extern void update_page_count(int level, unsigned long pages);
++#else
++static inline void update_page_count(int level, unsigned long pages) { }
++#endif
++
++/*
++ * Helper function that returns the kernel pagetable entry controlling
++ * the virtual address 'address'. NULL means no pagetable entry present.
++ * NOTE: the return type is pte_t but if the pmd is PSE then we return it
++ * as a pte too.
++ */
++extern pte_t *lookup_address(unsigned long address, unsigned int *level);
++
++#endif	/* !__ASSEMBLY__ */
++
++#endif /* _ASM_X86_PGTABLE_DEFS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/prctl.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/prctl.h
++++ linux-2.6-tip/arch/x86/include/asm/prctl.h
+@@ -6,8 +6,4 @@
+ #define ARCH_GET_FS 0x1003
+ #define ARCH_GET_GS 0x1004
+ 
+-#ifdef CONFIG_X86_64
+-extern long sys_arch_prctl(int, unsigned long);
+-#endif /* CONFIG_X86_64 */
+-
+ #endif /* _ASM_X86_PRCTL_H */
+Index: linux-2.6-tip/arch/x86/include/asm/processor.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/processor.h
++++ linux-2.6-tip/arch/x86/include/asm/processor.h
+@@ -16,6 +16,7 @@ struct mm_struct;
+ #include <asm/cpufeature.h>
+ #include <asm/system.h>
+ #include <asm/page.h>
++#include <asm/pgtable_types.h>
+ #include <asm/percpu.h>
+ #include <asm/msr.h>
+ #include <asm/desc_defs.h>
+@@ -73,10 +74,10 @@ struct cpuinfo_x86 {
+ 	char			pad0;
+ #else
+ 	/* Number of 4K pages in DTLB/ITLB combined(in pages): */
+-	int			 x86_tlbsize;
++	int			x86_tlbsize;
++#endif
+ 	__u8			x86_virt_bits;
+ 	__u8			x86_phys_bits;
+-#endif
+ 	/* CPUID returned core id bits: */
+ 	__u8			x86_coreid_bits;
+ 	/* Max extended CPUID function supported: */
+@@ -93,7 +94,7 @@ struct cpuinfo_x86 {
+ 	unsigned long		loops_per_jiffy;
+ #ifdef CONFIG_SMP
+ 	/* cpus sharing the last level cache: */
+-	cpumask_t		llc_shared_map;
++	cpumask_var_t		llc_shared_map;
+ #endif
+ 	/* cpuid returned max cores value: */
+ 	u16			 x86_max_cores;
+@@ -247,7 +248,6 @@ struct x86_hw_tss {
+ #define IO_BITMAP_LONGS			(IO_BITMAP_BYTES/sizeof(long))
+ #define IO_BITMAP_OFFSET		offsetof(struct tss_struct, io_bitmap)
+ #define INVALID_IO_BITMAP_OFFSET	0x8000
+-#define INVALID_IO_BITMAP_OFFSET_LAZY	0x9000
+ 
+ struct tss_struct {
+ 	/*
+@@ -262,11 +262,6 @@ struct tss_struct {
+ 	 * be within the limit.
+ 	 */
+ 	unsigned long		io_bitmap[IO_BITMAP_LONGS + 1];
+-	/*
+-	 * Cache the current maximum and the last task that used the bitmap:
+-	 */
+-	unsigned long		io_bitmap_max;
+-	struct thread_struct	*io_bitmap_owner;
+ 
+ 	/*
+ 	 * .. and then another 0x100 bytes for the emergency kernel stack:
+@@ -378,9 +373,33 @@ union thread_xstate {
+ 
+ #ifdef CONFIG_X86_64
+ DECLARE_PER_CPU(struct orig_ist, orig_ist);
++
++union irq_stack_union {
++	char irq_stack[IRQ_STACK_SIZE];
++	/*
++	 * GCC hardcodes the stack canary as %gs:40.  Since the
++	 * irq_stack is the object at %gs:0, we reserve the bottom
++	 * 48 bytes of the irq stack for the canary.
++	 */
++	struct {
++		char gs_base[40];
++		unsigned long stack_canary;
++	};
++};
++
++DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
++DECLARE_INIT_PER_CPU(irq_stack_union);
++
++DECLARE_PER_CPU(char *, irq_stack_ptr);
++DECLARE_PER_CPU(unsigned int, irq_count);
++extern unsigned long kernel_eflags;
++extern asmlinkage void ignore_sysret(void);
++#else	/* X86_64 */
++#ifdef CONFIG_CC_STACKPROTECTOR
++DECLARE_PER_CPU(unsigned long, stack_canary);
+ #endif
++#endif	/* X86_64 */
+ 
+-extern void print_cpu_info(struct cpuinfo_x86 *);
+ extern unsigned int xstate_size;
+ extern void free_thread_xstate(struct task_struct *);
+ extern struct kmem_cache *task_xstate_cachep;
+@@ -717,6 +736,7 @@ static inline void __sti_mwait(unsigned 
+ extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
+ 
+ extern void select_idle_routine(const struct cpuinfo_x86 *c);
++extern void init_c1e_mask(void);
+ 
+ extern unsigned long		boot_option_idle_override;
+ extern unsigned long		idle_halt;
+@@ -752,9 +772,9 @@ extern int sysenter_setup(void);
+ extern struct desc_ptr		early_gdt_descr;
+ 
+ extern void cpu_set_gdt(int);
+-extern void switch_to_new_gdt(void);
++extern void switch_to_new_gdt(int);
++extern void load_percpu_segment(int);
+ extern void cpu_init(void);
+-extern void init_gdt(int cpu);
+ 
+ static inline unsigned long get_debugctlmsr(void)
+ {
+@@ -839,6 +859,7 @@ static inline void spin_lock_prefetch(co
+  * User space process size: 3GB (default).
+  */
+ #define TASK_SIZE		PAGE_OFFSET
++#define TASK_SIZE_MAX		TASK_SIZE
+ #define STACK_TOP		TASK_SIZE
+ #define STACK_TOP_MAX		STACK_TOP
+ 
+@@ -898,7 +919,7 @@ extern unsigned long thread_saved_pc(str
+ /*
+  * User space process size. 47bits minus one guard page.
+  */
+-#define TASK_SIZE64	((1UL << 47) - PAGE_SIZE)
++#define TASK_SIZE_MAX	((1UL << 47) - PAGE_SIZE)
+ 
+ /* This decides where the kernel will search for a free chunk of vm
+  * space during mmap's.
+@@ -907,12 +928,12 @@ extern unsigned long thread_saved_pc(str
+ 					0xc0000000 : 0xFFFFe000)
+ 
+ #define TASK_SIZE		(test_thread_flag(TIF_IA32) ? \
+-					IA32_PAGE_OFFSET : TASK_SIZE64)
++					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+ #define TASK_SIZE_OF(child)	((test_tsk_thread_flag(child, TIF_IA32)) ? \
+-					IA32_PAGE_OFFSET : TASK_SIZE64)
++					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+ 
+ #define STACK_TOP		TASK_SIZE
+-#define STACK_TOP_MAX		TASK_SIZE64
++#define STACK_TOP_MAX		TASK_SIZE_MAX
+ 
+ #define INIT_THREAD  { \
+ 	.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+Index: linux-2.6-tip/arch/x86/include/asm/proto.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/proto.h
++++ linux-2.6-tip/arch/x86/include/asm/proto.h
+@@ -18,11 +18,7 @@ extern void syscall32_cpu_init(void);
+ 
+ extern void check_efer(void);
+ 
+-#ifdef CONFIG_X86_BIOS_REBOOT
+ extern int reboot_force;
+-#else
+-static const int reboot_force = 0;
+-#endif
+ 
+ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/ptrace-abi.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/ptrace-abi.h
++++ linux-2.6-tip/arch/x86/include/asm/ptrace-abi.h
+@@ -80,8 +80,6 @@
+ 
+ #define PTRACE_SINGLEBLOCK	33	/* resume execution until next branch */
+ 
+-#ifdef CONFIG_X86_PTRACE_BTS
+-
+ #ifndef __ASSEMBLY__
+ #include <linux/types.h>
+ 
+@@ -140,6 +138,5 @@ struct ptrace_bts_config {
+    BTS records are read from oldest to newest.
+    Returns number of BTS records drained.
+ */
+-#endif /* CONFIG_X86_PTRACE_BTS */
+ 
+ #endif /* _ASM_X86_PTRACE_ABI_H */
+Index: linux-2.6-tip/arch/x86/include/asm/ptrace.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/ptrace.h
++++ linux-2.6-tip/arch/x86/include/asm/ptrace.h
+@@ -28,7 +28,7 @@ struct pt_regs {
+ 	int  xds;
+ 	int  xes;
+ 	int  xfs;
+-	/* int  gs; */
++	int  xgs;
+ 	long orig_eax;
+ 	long eip;
+ 	int  xcs;
+@@ -50,7 +50,7 @@ struct pt_regs {
+ 	unsigned long ds;
+ 	unsigned long es;
+ 	unsigned long fs;
+-	/* int  gs; */
++	unsigned long gs;
+ 	unsigned long orig_ax;
+ 	unsigned long ip;
+ 	unsigned long cs;
+Index: linux-2.6-tip/arch/x86/include/asm/rdc321x_defs.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/rdc321x_defs.h
+@@ -0,0 +1,12 @@
++#define PFX	"rdc321x: "
++
++/* General purpose configuration and data registers */
++#define RDC3210_CFGREG_ADDR     0x0CF8
++#define RDC3210_CFGREG_DATA     0x0CFC
++
++#define RDC321X_GPIO_CTRL_REG1	0x48
++#define RDC321X_GPIO_CTRL_REG2	0x84
++#define RDC321X_GPIO_DATA_REG1	0x4c
++#define RDC321X_GPIO_DATA_REG2	0x88
++
++#define RDC321X_MAX_GPIO	58
+Index: linux-2.6-tip/arch/x86/include/asm/sections.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/sections.h
++++ linux-2.6-tip/arch/x86/include/asm/sections.h
+@@ -1 +1,8 @@
++#ifndef _ASM_X86_SECTIONS_H
++#define _ASM_X86_SECTIONS_H
++
+ #include <asm-generic/sections.h>
++
++extern char __brk_base[], __brk_limit[];
++
++#endif	/* _ASM_X86_SECTIONS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/segment.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/segment.h
++++ linux-2.6-tip/arch/x86/include/asm/segment.h
+@@ -61,7 +61,7 @@
+  *
+  *  26 - ESPFIX small SS
+  *  27 - per-cpu			[ offset to per-cpu data area ]
+- *  28 - unused
++ *  28 - stack_canary-20		[ for stack protector ]
+  *  29 - unused
+  *  30 - unused
+  *  31 - TSS for double fault handler
+@@ -95,6 +95,13 @@
+ #define __KERNEL_PERCPU 0
+ #endif
+ 
++#define GDT_ENTRY_STACK_CANARY		(GDT_ENTRY_KERNEL_BASE + 16)
++#ifdef CONFIG_CC_STACKPROTECTOR
++#define __KERNEL_STACK_CANARY		(GDT_ENTRY_STACK_CANARY * 8)
++#else
++#define __KERNEL_STACK_CANARY		0
++#endif
++
+ #define GDT_ENTRY_DOUBLEFAULT_TSS	31
+ 
+ /*
+Index: linux-2.6-tip/arch/x86/include/asm/setup.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/setup.h
++++ linux-2.6-tip/arch/x86/include/asm/setup.h
+@@ -1,33 +1,19 @@
+ #ifndef _ASM_X86_SETUP_H
+ #define _ASM_X86_SETUP_H
+ 
++#ifdef __KERNEL__
++
+ #define COMMAND_LINE_SIZE 2048
+ 
+ #ifndef __ASSEMBLY__
+ 
+-/* Interrupt control for vSMPowered x86_64 systems */
+-void vsmp_init(void);
+-
+-
+-void setup_bios_corruption_check(void);
+-
+-
+-#ifdef CONFIG_X86_VISWS
+-extern void visws_early_detect(void);
+-extern int is_visws_box(void);
+-#else
+-static inline void visws_early_detect(void) { }
+-static inline int is_visws_box(void) { return 0; }
+-#endif
+-
+-extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip);
+-extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip);
+ /*
+  * Any setup quirks to be performed?
+  */
+ struct mpc_cpu;
+ struct mpc_bus;
+ struct mpc_oemtable;
++
+ struct x86_quirks {
+ 	int (*arch_pre_time_init)(void);
+ 	int (*arch_time_init)(void);
+@@ -43,20 +29,19 @@ struct x86_quirks {
+ 	void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
+ 	void (*mpc_oem_pci_bus)(struct mpc_bus *m);
+ 	void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable,
+-                                    unsigned short oemsize);
++				unsigned short oemsize);
+ 	int (*setup_ioapic_ids)(void);
+-	int (*update_genapic)(void);
+ };
+ 
+-extern struct x86_quirks *x86_quirks;
+-extern unsigned long saved_video_mode;
++extern void x86_quirk_pre_intr_init(void);
++extern void x86_quirk_intr_init(void);
+ 
+-#ifndef CONFIG_PARAVIRT
+-#define paravirt_post_allocator_init()	do {} while (0)
+-#endif
+-#endif /* __ASSEMBLY__ */
++extern void x86_quirk_trap_init(void);
+ 
+-#ifdef __KERNEL__
++extern void x86_quirk_pre_time_init(void);
++extern void x86_quirk_time_init(void);
++
++#endif /* __ASSEMBLY__ */
+ 
+ #ifdef __i386__
+ 
+@@ -78,6 +63,30 @@ extern unsigned long saved_video_mode;
+ #ifndef __ASSEMBLY__
+ #include <asm/bootparam.h>
+ 
++/* Interrupt control for vSMPowered x86_64 systems */
++#ifdef CONFIG_X86_VSMP
++void vsmp_init(void);
++#else
++static inline void vsmp_init(void) { }
++#endif
++
++void setup_bios_corruption_check(void);
++
++#ifdef CONFIG_X86_VISWS
++extern void visws_early_detect(void);
++extern int is_visws_box(void);
++#else
++static inline void visws_early_detect(void) { }
++static inline int is_visws_box(void) { return 0; }
++#endif
++
++extern struct x86_quirks *x86_quirks;
++extern unsigned long saved_video_mode;
++
++#ifndef CONFIG_PARAVIRT
++#define paravirt_post_allocator_init()	do {} while (0)
++#endif
++
+ #ifndef _SETUP
+ 
+ /*
+@@ -91,21 +100,51 @@ extern struct boot_params boot_params;
+  */
+ #define LOWMEMSIZE()	(0x9f000)
+ 
++/* exceedingly early brk-like allocator */
++extern unsigned long _brk_end;
++void *extend_brk(size_t size, size_t align);
++
++/*
++ * Reserve space in the brk section.  The name must be unique within
++ * the file, and somewhat descriptive.  The size is in bytes.  Must be
++ * used at file scope.
++ *
++ * (This uses a temp function to wrap the asm so we can pass it the
++ * size parameter; otherwise we wouldn't be able to.  We can't use a
++ * "section" attribute on a normal variable because it always ends up
++ * being @progbits, which ends up allocating space in the vmlinux
++ * executable.)
++ */
++#define RESERVE_BRK(name,sz)						\
++	static void __section(.discard) __used				\
++	__brk_reservation_fn_##name##__(void) {				\
++		asm volatile (						\
++			".pushsection .brk_reservation,\"aw\",@nobits;" \
++			".brk." #name ":"				\
++			" 1:.skip %c0;"					\
++			" .size .brk." #name ", . - 1b;"		\
++			" .popsection"					\
++			: : "i" (sz));					\
++	}
++
+ #ifdef __i386__
+ 
+ void __init i386_start_kernel(void);
+ extern void probe_roms(void);
+ 
+-extern unsigned long init_pg_tables_start;
+-extern unsigned long init_pg_tables_end;
+-
+ #else
+-void __init x86_64_init_pda(void);
+ void __init x86_64_start_kernel(char *real_mode);
+ void __init x86_64_start_reservations(char *real_mode_data);
+ 
+ #endif /* __i386__ */
+ #endif /* _SETUP */
++#else
++#define RESERVE_BRK(name,sz)				\
++	.pushsection .brk_reservation,"aw",@nobits;	\
++.brk.name:						\
++1:	.skip sz;					\
++	.size .brk.name,.-1b;				\
++	.popsection
+ #endif /* __ASSEMBLY__ */
+ #endif  /*  __KERNEL__  */
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/setup_arch.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/setup_arch.h
+@@ -0,0 +1,3 @@
++/* Hook to call BIOS initialisation function */
++
++/* no action for generic */
+Index: linux-2.6-tip/arch/x86/include/asm/smp.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/smp.h
++++ linux-2.6-tip/arch/x86/include/asm/smp.h
+@@ -15,53 +15,25 @@
+ #  include <asm/io_apic.h>
+ # endif
+ #endif
+-#include <asm/pda.h>
+ #include <asm/thread_info.h>
+-
+-#ifdef CONFIG_X86_64
+-
+-extern cpumask_var_t cpu_callin_mask;
+-extern cpumask_var_t cpu_callout_mask;
+-extern cpumask_var_t cpu_initialized_mask;
+-extern cpumask_var_t cpu_sibling_setup_mask;
+-
+-#else /* CONFIG_X86_32 */
+-
+-extern cpumask_t cpu_callin_map;
+-extern cpumask_t cpu_callout_map;
+-extern cpumask_t cpu_initialized;
+-extern cpumask_t cpu_sibling_setup_map;
+-
+-#define cpu_callin_mask		((struct cpumask *)&cpu_callin_map)
+-#define cpu_callout_mask	((struct cpumask *)&cpu_callout_map)
+-#define cpu_initialized_mask	((struct cpumask *)&cpu_initialized)
+-#define cpu_sibling_setup_mask	((struct cpumask *)&cpu_sibling_setup_map)
+-
+-#endif /* CONFIG_X86_32 */
+-
+-extern void (*mtrr_hook)(void);
+-extern void zap_low_mappings(void);
+-
+-extern int __cpuinit get_local_pda(int cpu);
++#include <asm/cpumask.h>
+ 
+ extern int smp_num_siblings;
+ extern unsigned int num_processors;
+ 
+-DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
+-DECLARE_PER_CPU(cpumask_t, cpu_core_map);
++DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
++DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+ DECLARE_PER_CPU(u16, cpu_llc_id);
+-#ifdef CONFIG_X86_32
+ DECLARE_PER_CPU(int, cpu_number);
+-#endif
+ 
+ static inline struct cpumask *cpu_sibling_mask(int cpu)
+ {
+-	return &per_cpu(cpu_sibling_map, cpu);
++	return per_cpu(cpu_sibling_map, cpu);
+ }
+ 
+ static inline struct cpumask *cpu_core_mask(int cpu)
+ {
+-	return &per_cpu(cpu_core_map, cpu);
++	return per_cpu(cpu_core_map, cpu);
+ }
+ 
+ DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
+@@ -149,9 +121,10 @@ static inline void arch_send_call_functi
+ 	smp_ops.send_call_func_single_ipi(cpu);
+ }
+ 
+-static inline void arch_send_call_function_ipi(cpumask_t mask)
++#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
++static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+ {
+-	smp_ops.send_call_func_ipi(&mask);
++	smp_ops.send_call_func_ipi(mask);
+ }
+ 
+ void cpu_disable_common(void);
+@@ -167,8 +140,6 @@ void play_dead_common(void);
+ void native_send_call_func_ipi(const struct cpumask *mask);
+ void native_send_call_func_single_ipi(int cpu);
+ 
+-extern void prefill_possible_map(void);
+-
+ void smp_store_cpu_info(int id);
+ #define cpu_physical_id(cpu)	per_cpu(x86_cpu_to_apicid, cpu)
+ 
+@@ -177,10 +148,6 @@ static inline int num_booting_cpus(void)
+ {
+ 	return cpumask_weight(cpu_callout_mask);
+ }
+-#else
+-static inline void prefill_possible_map(void)
+-{
+-}
+ #endif /* CONFIG_SMP */
+ 
+ extern unsigned disabled_cpus __cpuinitdata;
+@@ -191,11 +158,11 @@ extern unsigned disabled_cpus __cpuinitd
+  * from the initial startup. We map APIC_BASE very early in page_setup(),
+  * so this is correct in the x86 case.
+  */
+-#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
++#define raw_smp_processor_id() (percpu_read(cpu_number))
+ extern int safe_smp_processor_id(void);
+ 
+ #elif defined(CONFIG_X86_64_SMP)
+-#define raw_smp_processor_id()	read_pda(cpunumber)
++#define raw_smp_processor_id() (percpu_read(cpu_number))
+ 
+ #define stack_smp_processor_id()					\
+ ({								\
+@@ -205,10 +172,6 @@ extern int safe_smp_processor_id(void);
+ })
+ #define safe_smp_processor_id()		smp_processor_id()
+ 
+-#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
+-#define cpu_physical_id(cpu)		boot_cpu_physical_apicid
+-#define safe_smp_processor_id()		0
+-#define stack_smp_processor_id() 	0
+ #endif
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+@@ -220,28 +183,9 @@ static inline int logical_smp_processor_
+ 	return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+ }
+ 
+-#include <mach_apicdef.h>
+-static inline unsigned int read_apic_id(void)
+-{
+-	unsigned int reg;
+-
+-	reg = *(u32 *)(APIC_BASE + APIC_ID);
+-
+-	return GET_APIC_ID(reg);
+-}
+ #endif
+ 
+-
+-# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
+ extern int hard_smp_processor_id(void);
+-# else
+-#include <mach_apicdef.h>
+-static inline int hard_smp_processor_id(void)
+-{
+-	/* we don't want to mark this access volatile - bad code generation */
+-	return read_apic_id();
+-}
+-# endif /* APIC_DEFINITION */
+ 
+ #else /* CONFIG_X86_LOCAL_APIC */
+ 
+@@ -251,11 +195,5 @@ static inline int hard_smp_processor_id(
+ 
+ #endif /* CONFIG_X86_LOCAL_APIC */
+ 
+-#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
+-extern unsigned char boot_cpu_id;
+-#else
+-#define boot_cpu_id	0
+-#endif
+-
+ #endif /* __ASSEMBLY__ */
+ #endif /* _ASM_X86_SMP_H */
+Index: linux-2.6-tip/arch/x86/include/asm/smpboot_hooks.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/smpboot_hooks.h
+@@ -0,0 +1,61 @@
++/* two abstractions specific to kernel/smpboot.c, mainly to cater to visws
++ * which needs to alter them. */
++
++static inline void smpboot_clear_io_apic_irqs(void)
++{
++#ifdef CONFIG_X86_IO_APIC
++	io_apic_irqs = 0;
++#endif
++}
++
++static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
++{
++	CMOS_WRITE(0xa, 0xf);
++	local_flush_tlb();
++	pr_debug("1.\n");
++	*((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_high)) =
++								 start_eip >> 4;
++	pr_debug("2.\n");
++	*((volatile unsigned short *)phys_to_virt(apic->trampoline_phys_low)) =
++							 start_eip & 0xf;
++	pr_debug("3.\n");
++}
++
++static inline void smpboot_restore_warm_reset_vector(void)
++{
++	/*
++	 * Install writable page 0 entry to set BIOS data area.
++	 */
++	local_flush_tlb();
++
++	/*
++	 * Paranoid:  Set warm reset code and vector here back
++	 * to default values.
++	 */
++	CMOS_WRITE(0, 0xf);
++
++	*((volatile long *)phys_to_virt(apic->trampoline_phys_low)) = 0;
++}
++
++static inline void __init smpboot_setup_io_apic(void)
++{
++#ifdef CONFIG_X86_IO_APIC
++	/*
++	 * Here we can be sure that there is an IO-APIC in the system. Let's
++	 * go and set it up:
++	 */
++	if (!skip_ioapic_setup && nr_ioapics)
++		setup_IO_APIC();
++	else {
++		nr_ioapics = 0;
++		localise_nmi_watchdog();
++	}
++#endif
++}
++
++static inline void smpboot_clear_io_apic(void)
++{
++#ifdef CONFIG_X86_IO_APIC
++	nr_ioapics = 0;
++#endif
++}
+Index: linux-2.6-tip/arch/x86/include/asm/spinlock.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/spinlock.h
++++ linux-2.6-tip/arch/x86/include/asm/spinlock.h
+@@ -58,7 +58,7 @@
+ #if (NR_CPUS < 256)
+ #define TICKET_SHIFT 8
+ 
+-static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
++static __always_inline void __ticket_spin_lock(__raw_spinlock_t *lock)
+ {
+ 	short inc = 0x0100;
+ 
+@@ -77,7 +77,7 @@ static __always_inline void __ticket_spi
+ 		: "memory", "cc");
+ }
+ 
+-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
++static __always_inline int __ticket_spin_trylock(__raw_spinlock_t *lock)
+ {
+ 	int tmp, new;
+ 
+@@ -96,7 +96,7 @@ static __always_inline int __ticket_spin
+ 	return tmp;
+ }
+ 
+-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
++static __always_inline void __ticket_spin_unlock(__raw_spinlock_t *lock)
+ {
+ 	asm volatile(UNLOCK_LOCK_PREFIX "incb %0"
+ 		     : "+m" (lock->slock)
+@@ -106,7 +106,7 @@ static __always_inline void __ticket_spi
+ #else
+ #define TICKET_SHIFT 16
+ 
+-static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
++static __always_inline void __ticket_spin_lock(__raw_spinlock_t *lock)
+ {
+ 	int inc = 0x00010000;
+ 	int tmp;
+@@ -127,7 +127,7 @@ static __always_inline void __ticket_spi
+ 		     : "memory", "cc");
+ }
+ 
+-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
++static __always_inline int __ticket_spin_trylock(__raw_spinlock_t *lock)
+ {
+ 	int tmp;
+ 	int new;
+@@ -149,7 +149,7 @@ static __always_inline int __ticket_spin
+ 	return tmp;
+ }
+ 
+-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
++static __always_inline void __ticket_spin_unlock(__raw_spinlock_t *lock)
+ {
+ 	asm volatile(UNLOCK_LOCK_PREFIX "incw %0"
+ 		     : "+m" (lock->slock)
+@@ -158,119 +158,57 @@ static __always_inline void __ticket_spi
+ }
+ #endif
+ 
+-static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
++static inline int __ticket_spin_is_locked(__raw_spinlock_t *lock)
+ {
+ 	int tmp = ACCESS_ONCE(lock->slock);
+ 
+ 	return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
+ }
+ 
+-static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
++static inline int __ticket_spin_is_contended(__raw_spinlock_t *lock)
+ {
+ 	int tmp = ACCESS_ONCE(lock->slock);
+ 
+ 	return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
+ }
+ 
+-#ifdef CONFIG_PARAVIRT
+-/*
+- * Define virtualization-friendly old-style lock byte lock, for use in
+- * pv_lock_ops if desired.
+- *
+- * This differs from the pre-2.6.24 spinlock by always using xchgb
+- * rather than decb to take the lock; this allows it to use a
+- * zero-initialized lock structure.  It also maintains a 1-byte
+- * contention counter, so that we can implement
+- * __byte_spin_is_contended.
+- */
+-struct __byte_spinlock {
+-	s8 lock;
+-	s8 spinners;
+-};
+-
+-static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
+-{
+-	struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+-	return bl->lock != 0;
+-}
++#ifndef CONFIG_PARAVIRT
+ 
+-static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
+-{
+-	struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+-	return bl->spinners != 0;
+-}
+-
+-static inline void __byte_spin_lock(raw_spinlock_t *lock)
+-{
+-	struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+-	s8 val = 1;
+-
+-	asm("1: xchgb %1, %0\n"
+-	    "   test %1,%1\n"
+-	    "   jz 3f\n"
+-	    "   " LOCK_PREFIX "incb %2\n"
+-	    "2: rep;nop\n"
+-	    "   cmpb $1, %0\n"
+-	    "   je 2b\n"
+-	    "   " LOCK_PREFIX "decb %2\n"
+-	    "   jmp 1b\n"
+-	    "3:"
+-	    : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
+-}
+-
+-static inline int __byte_spin_trylock(raw_spinlock_t *lock)
+-{
+-	struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+-	u8 old = 1;
+-
+-	asm("xchgb %1,%0"
+-	    : "+m" (bl->lock), "+q" (old) : : "memory");
+-
+-	return old == 0;
+-}
+-
+-static inline void __byte_spin_unlock(raw_spinlock_t *lock)
+-{
+-	struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
+-	smp_wmb();
+-	bl->lock = 0;
+-}
+-#else  /* !CONFIG_PARAVIRT */
+-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
++static inline int __raw_spin_is_locked(__raw_spinlock_t *lock)
+ {
+ 	return __ticket_spin_is_locked(lock);
+ }
+ 
+-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
++static inline int __raw_spin_is_contended(__raw_spinlock_t *lock)
+ {
+ 	return __ticket_spin_is_contended(lock);
+ }
+ #define __raw_spin_is_contended	__raw_spin_is_contended
+ 
+-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
++static __always_inline void __raw_spin_lock(__raw_spinlock_t *lock)
+ {
+ 	__ticket_spin_lock(lock);
+ }
+ 
+-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
++static __always_inline int __raw_spin_trylock(__raw_spinlock_t *lock)
+ {
+ 	return __ticket_spin_trylock(lock);
+ }
+ 
+-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
++static __always_inline void __raw_spin_unlock(__raw_spinlock_t *lock)
+ {
+ 	__ticket_spin_unlock(lock);
+ }
+ 
+-static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
++static __always_inline void __raw_spin_lock_flags(__raw_spinlock_t *lock,
+ 						  unsigned long flags)
+ {
+ 	__raw_spin_lock(lock);
+ }
+ 
+-#endif	/* CONFIG_PARAVIRT */
++#endif
+ 
+-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
++static inline void __raw_spin_unlock_wait(__raw_spinlock_t *lock)
+ {
+ 	while (__raw_spin_is_locked(lock))
+ 		cpu_relax();
+@@ -294,7 +232,7 @@ static inline void __raw_spin_unlock_wai
+  * read_can_lock - would read_trylock() succeed?
+  * @lock: the rwlock in question.
+  */
+-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
++static inline int __raw_read_can_lock(__raw_rwlock_t *lock)
+ {
+ 	return (int)(lock)->lock > 0;
+ }
+@@ -303,12 +241,12 @@ static inline int __raw_read_can_lock(ra
+  * write_can_lock - would write_trylock() succeed?
+  * @lock: the rwlock in question.
+  */
+-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
++static inline int __raw_write_can_lock(__raw_rwlock_t *lock)
+ {
+ 	return (lock)->lock == RW_LOCK_BIAS;
+ }
+ 
+-static inline void __raw_read_lock(raw_rwlock_t *rw)
++static inline void __raw_read_lock(__raw_rwlock_t *rw)
+ {
+ 	asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
+ 		     "jns 1f\n"
+@@ -317,7 +255,7 @@ static inline void __raw_read_lock(raw_r
+ 		     ::LOCK_PTR_REG (rw) : "memory");
+ }
+ 
+-static inline void __raw_write_lock(raw_rwlock_t *rw)
++static inline void __raw_write_lock(__raw_rwlock_t *rw)
+ {
+ 	asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
+ 		     "jz 1f\n"
+@@ -326,18 +264,17 @@ static inline void __raw_write_lock(raw_
+ 		     ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
+ }
+ 
+-static inline int __raw_read_trylock(raw_rwlock_t *lock)
++static inline int __raw_read_trylock(__raw_rwlock_t *lock)
+ {
+ 	atomic_t *count = (atomic_t *)lock;
+ 
+-	atomic_dec(count);
+-	if (atomic_read(count) >= 0)
++	if (atomic_dec_return(count) >= 0)
+ 		return 1;
+ 	atomic_inc(count);
+ 	return 0;
+ }
+ 
+-static inline int __raw_write_trylock(raw_rwlock_t *lock)
++static inline int __raw_write_trylock(__raw_rwlock_t *lock)
+ {
+ 	atomic_t *count = (atomic_t *)lock;
+ 
+@@ -347,19 +284,19 @@ static inline int __raw_write_trylock(ra
+ 	return 0;
+ }
+ 
+-static inline void __raw_read_unlock(raw_rwlock_t *rw)
++static inline void __raw_read_unlock(__raw_rwlock_t *rw)
+ {
+ 	asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
+ }
+ 
+-static inline void __raw_write_unlock(raw_rwlock_t *rw)
++static inline void __raw_write_unlock(__raw_rwlock_t *rw)
+ {
+ 	asm volatile(LOCK_PREFIX "addl %1, %0"
+ 		     : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
+ }
+ 
+-#define _raw_spin_relax(lock)	cpu_relax()
+-#define _raw_read_relax(lock)	cpu_relax()
+-#define _raw_write_relax(lock)	cpu_relax()
++#define __raw_spin_relax(lock)	cpu_relax()
++#define __raw_read_relax(lock)	cpu_relax()
++#define __raw_write_relax(lock)	cpu_relax()
+ 
+ #endif /* _ASM_X86_SPINLOCK_H */
+Index: linux-2.6-tip/arch/x86/include/asm/stackprotector.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/stackprotector.h
+@@ -0,0 +1,124 @@
++/*
++ * GCC stack protector support.
++ *
++ * Stack protector works by putting predefined pattern at the start of
++ * the stack frame and verifying that it hasn't been overwritten when
++ * returning from the function.  The pattern is called stack canary
++ * and unfortunately gcc requires it to be at a fixed offset from %gs.
++ * On x86_64, the offset is 40 bytes and on x86_32 20 bytes.  x86_64
++ * and x86_32 use segment registers differently and thus handles this
++ * requirement differently.
++ *
++ * On x86_64, %gs is shared by percpu area and stack canary.  All
++ * percpu symbols are zero based and %gs points to the base of percpu
++ * area.  The first occupant of the percpu area is always
++ * irq_stack_union which contains stack_canary at offset 40.  Userland
++ * %gs is always saved and restored on kernel entry and exit using
++ * swapgs, so stack protector doesn't add any complexity there.
++ *
++ * On x86_32, it's slightly more complicated.  As in x86_64, %gs is
++ * used for userland TLS.  Unfortunately, some processors are much
++ * slower at loading segment registers with different value when
++ * entering and leaving the kernel, so the kernel uses %fs for percpu
++ * area and manages %gs lazily so that %gs is switched only when
++ * necessary, usually during task switch.
++ *
++ * As gcc requires the stack canary at %gs:20, %gs can't be managed
++ * lazily if stack protector is enabled, so the kernel saves and
++ * restores userland %gs on kernel entry and exit.  This behavior is
++ * controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
++ * system.h to hide the details.
++ */
++
++#ifndef _ASM_STACKPROTECTOR_H
++#define _ASM_STACKPROTECTOR_H 1
++
++#ifdef CONFIG_CC_STACKPROTECTOR
++
++#include <asm/tsc.h>
++#include <asm/processor.h>
++#include <asm/percpu.h>
++#include <asm/system.h>
++#include <asm/desc.h>
++#include <linux/random.h>
++
++/*
++ * 24 byte read-only segment initializer for stack canary.  Linker
++ * can't handle the address bit shifting.  Address will be set in
++ * head_32 for boot CPU and setup_per_cpu_areas() for others.
++ */
++#define GDT_STACK_CANARY_INIT						\
++	[GDT_ENTRY_STACK_CANARY] = { { { 0x00000018, 0x00409000 } } },
++
++/*
++ * Initialize the stackprotector canary value.
++ *
++ * NOTE: this must only be called from functions that never return,
++ * and it must always be inlined.
++ */
++static __always_inline void boot_init_stack_canary(void)
++{
++	u64 canary;
++	u64 tsc;
++
++#ifdef CONFIG_X86_64
++	BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
++#endif
++	/*
++	 * We both use the random pool and the current TSC as a source
++	 * of randomness. The TSC only matters for very early init,
++	 * there it already has some randomness on most systems. Later
++	 * on during the bootup the random pool has true entropy too.
++	 */
++	get_random_bytes(&canary, sizeof(canary));
++	tsc = __native_read_tsc();
++	canary += tsc + (tsc << 32UL);
++
++	current->stack_canary = canary;
++#ifdef CONFIG_X86_64
++	percpu_write(irq_stack_union.stack_canary, canary);
++#else
++	percpu_write(stack_canary, canary);
++#endif
++}
++
++static inline void setup_stack_canary_segment(int cpu)
++{
++#ifdef CONFIG_X86_32
++	unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20;
++	struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
++	struct desc_struct desc;
++
++	desc = gdt_table[GDT_ENTRY_STACK_CANARY];
++	desc.base0 = canary & 0xffff;
++	desc.base1 = (canary >> 16) & 0xff;
++	desc.base2 = (canary >> 24) & 0xff;
++	write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
++#endif
++}
++
++static inline void load_stack_canary_segment(void)
++{
++#ifdef CONFIG_X86_32
++	asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
++#endif
++}
++
++#else	/* CC_STACKPROTECTOR */
++
++#define GDT_STACK_CANARY_INIT
++
++/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */
++
++static inline void setup_stack_canary_segment(int cpu)
++{ }
++
++static inline void load_stack_canary_segment(void)
++{
++#ifdef CONFIG_X86_32
++	asm volatile ("mov %0, %%gs" : : "r" (0));
++#endif
++}
++
++#endif	/* CC_STACKPROTECTOR */
++#endif	/* _ASM_STACKPROTECTOR_H */
+Index: linux-2.6-tip/arch/x86/include/asm/string_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/string_32.h
++++ linux-2.6-tip/arch/x86/include/asm/string_32.h
+@@ -177,10 +177,18 @@ static inline void *__memcpy3d(void *to,
+  *	No 3D Now!
+  */
+ 
++#ifndef CONFIG_KMEMCHECK
+ #define memcpy(t, f, n)				\
+ 	(__builtin_constant_p((n))		\
+ 	 ? __constant_memcpy((t), (f), (n))	\
+ 	 : __memcpy((t), (f), (n)))
++#else
++/*
++ * kmemcheck becomes very happy if we use the REP instructions unconditionally,
++ * because it means that we know both memory operands in advance.
++ */
++#define memcpy(t, f, n) __memcpy((t), (f), (n))
++#endif
+ 
+ #endif
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/string_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/string_64.h
++++ linux-2.6-tip/arch/x86/include/asm/string_64.h
+@@ -27,6 +27,7 @@ static __always_inline void *__inline_me
+    function. */
+ 
+ #define __HAVE_ARCH_MEMCPY 1
++#ifndef CONFIG_KMEMCHECK
+ #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4
+ extern void *memcpy(void *to, const void *from, size_t len);
+ #else
+@@ -42,6 +43,13 @@ extern void *__memcpy(void *to, const vo
+ 	__ret;							\
+ })
+ #endif
++#else
++/*
++ * kmemcheck becomes very happy if we use the REP instructions unconditionally,
++ * because it means that we know both memory operands in advance.
++ */
++#define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len))
++#endif
+ 
+ #define __HAVE_ARCH_MEMSET
+ void *memset(void *s, int c, size_t n);
+Index: linux-2.6-tip/arch/x86/include/asm/summit/apic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/summit/apic.h
++++ /dev/null
+@@ -1,202 +0,0 @@
+-#ifndef __ASM_SUMMIT_APIC_H
+-#define __ASM_SUMMIT_APIC_H
+-
+-#include <asm/smp.h>
+-#include <linux/gfp.h>
+-
+-#define esr_disable (1)
+-#define NO_BALANCE_IRQ (0)
+-
+-/* In clustered mode, the high nibble of APIC ID is a cluster number.
+- * The low nibble is a 4-bit bitmap. */
+-#define XAPIC_DEST_CPUS_SHIFT	4
+-#define XAPIC_DEST_CPUS_MASK	((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
+-#define XAPIC_DEST_CLUSTER_MASK	(XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
+-
+-#define APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
+-
+-static inline const cpumask_t *target_cpus(void)
+-{
+-	/* CPU_MASK_ALL (0xff) has undefined behaviour with
+-	 * dest_LowestPrio mode logical clustered apic interrupt routing
+-	 * Just start on cpu 0.  IRQ balancing will spread load
+-	 */
+-	return &cpumask_of_cpu(0);
+-}
+-
+-#define INT_DELIVERY_MODE (dest_LowestPrio)
+-#define INT_DEST_MODE 1     /* logical delivery broadcast to all procs */
+-
+-static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
+-{
+-	return 0;
+-}
+-
+-/* we don't use the phys_cpu_present_map to indicate apicid presence */
+-static inline unsigned long check_apicid_present(int bit)
+-{
+-	return 1;
+-}
+-
+-#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK)
+-
+-extern u8 cpu_2_logical_apicid[];
+-
+-static inline void init_apic_ldr(void)
+-{
+-	unsigned long val, id;
+-	int count = 0;
+-	u8 my_id = (u8)hard_smp_processor_id();
+-	u8 my_cluster = (u8)apicid_cluster(my_id);
+-#ifdef CONFIG_SMP
+-	u8 lid;
+-	int i;
+-
+-	/* Create logical APIC IDs by counting CPUs already in cluster. */
+-	for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
+-		lid = cpu_2_logical_apicid[i];
+-		if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
+-			++count;
+-	}
+-#endif
+-	/* We only have a 4 wide bitmap in cluster mode.  If a deranged
+-	 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
+-	BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
+-	id = my_cluster | (1UL << count);
+-	apic_write(APIC_DFR, APIC_DFR_VALUE);
+-	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+-	val |= SET_APIC_LOGICAL_ID(id);
+-	apic_write(APIC_LDR, val);
+-}
+-
+-static inline int multi_timer_check(int apic, int irq)
+-{
+-	return 0;
+-}
+-
+-static inline int apic_id_registered(void)
+-{
+-	return 1;
+-}
+-
+-static inline void setup_apic_routing(void)
+-{
+-	printk("Enabling APIC mode:  Summit.  Using %d I/O APICs\n",
+-						nr_ioapics);
+-}
+-
+-static inline int apicid_to_node(int logical_apicid)
+-{
+-#ifdef CONFIG_SMP
+-	return apicid_2_node[hard_smp_processor_id()];
+-#else
+-	return 0;
+-#endif
+-}
+-
+-/* Mapping from cpu number to logical apicid */
+-static inline int cpu_to_logical_apicid(int cpu)
+-{
+-#ifdef CONFIG_SMP
+-	if (cpu >= nr_cpu_ids)
+-		return BAD_APICID;
+-	return (int)cpu_2_logical_apicid[cpu];
+-#else
+-	return logical_smp_processor_id();
+-#endif
+-}
+-
+-static inline int cpu_present_to_apicid(int mps_cpu)
+-{
+-	if (mps_cpu < nr_cpu_ids)
+-		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
+-	else
+-		return BAD_APICID;
+-}
+-
+-static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map)
+-{
+-	/* For clustered we don't have a good way to do this yet - hack */
+-	return physids_promote(0x0F);
+-}
+-
+-static inline physid_mask_t apicid_to_cpu_present(int apicid)
+-{
+-	return physid_mask_of_physid(0);
+-}
+-
+-static inline void setup_portio_remap(void)
+-{
+-}
+-
+-static inline int check_phys_apicid_present(int boot_cpu_physical_apicid)
+-{
+-	return 1;
+-}
+-
+-static inline void enable_apic_mode(void)
+-{
+-}
+-
+-static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask)
+-{
+-	int num_bits_set;
+-	int cpus_found = 0;
+-	int cpu;
+-	int apicid;
+-
+-	num_bits_set = cpus_weight(*cpumask);
+-	/* Return id to all */
+-	if (num_bits_set >= nr_cpu_ids)
+-		return (int) 0xFF;
+-	/*
+-	 * The cpus in the mask must all be on the apic cluster.  If are not
+-	 * on the same apicid cluster return default value of TARGET_CPUS.
+-	 */
+-	cpu = first_cpu(*cpumask);
+-	apicid = cpu_to_logical_apicid(cpu);
+-	while (cpus_found < num_bits_set) {
+-		if (cpu_isset(cpu, *cpumask)) {
+-			int new_apicid = cpu_to_logical_apicid(cpu);
+-			if (apicid_cluster(apicid) !=
+-					apicid_cluster(new_apicid)){
+-				printk ("%s: Not a valid mask!\n", __func__);
+-				return 0xFF;
+-			}
+-			apicid = apicid | new_apicid;
+-			cpus_found++;
+-		}
+-		cpu++;
+-	}
+-	return apicid;
+-}
+-
+-static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask,
+-						  const struct cpumask *andmask)
+-{
+-	int apicid = cpu_to_logical_apicid(0);
+-	cpumask_var_t cpumask;
+-
+-	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
+-		return apicid;
+-
+-	cpumask_and(cpumask, inmask, andmask);
+-	cpumask_and(cpumask, cpumask, cpu_online_mask);
+-	apicid = cpu_mask_to_apicid(cpumask);
+-
+-	free_cpumask_var(cpumask);
+-	return apicid;
+-}
+-
+-/* cpuid returns the value latched in the HW at reset, not the APIC ID
+- * register's value.  For any box whose BIOS changes APIC IDs, like
+- * clustered APIC systems, we must use hard_smp_processor_id.
+- *
+- * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
+- */
+-static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
+-{
+-	return hard_smp_processor_id() >> index_msb;
+-}
+-
+-#endif /* __ASM_SUMMIT_APIC_H */
+Index: linux-2.6-tip/arch/x86/include/asm/summit/apicdef.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/summit/apicdef.h
++++ /dev/null
+@@ -1,13 +0,0 @@
+-#ifndef __ASM_SUMMIT_APICDEF_H
+-#define __ASM_SUMMIT_APICDEF_H
+-
+-#define		APIC_ID_MASK		(0xFF<<24)
+-
+-static inline unsigned get_apic_id(unsigned long x)
+-{
+-	return (x>>24)&0xFF;
+-}
+-
+-#define		GET_APIC_ID(x)	get_apic_id(x)
+-
+-#endif
+Index: linux-2.6-tip/arch/x86/include/asm/summit/ipi.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/summit/ipi.h
++++ /dev/null
+@@ -1,26 +0,0 @@
+-#ifndef __ASM_SUMMIT_IPI_H
+-#define __ASM_SUMMIT_IPI_H
+-
+-void send_IPI_mask_sequence(const cpumask_t *mask, int vector);
+-void send_IPI_mask_allbutself(const cpumask_t *mask, int vector);
+-
+-static inline void send_IPI_mask(const cpumask_t *mask, int vector)
+-{
+-	send_IPI_mask_sequence(mask, vector);
+-}
+-
+-static inline void send_IPI_allbutself(int vector)
+-{
+-	cpumask_t mask = cpu_online_map;
+-	cpu_clear(smp_processor_id(), mask);
+-
+-	if (!cpus_empty(mask))
+-		send_IPI_mask(&mask, vector);
+-}
+-
+-static inline void send_IPI_all(int vector)
+-{
+-	send_IPI_mask(&cpu_online_map, vector);
+-}
+-
+-#endif /* __ASM_SUMMIT_IPI_H */
+Index: linux-2.6-tip/arch/x86/include/asm/summit/mpparse.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/summit/mpparse.h
++++ /dev/null
+@@ -1,109 +0,0 @@
+-#ifndef __ASM_SUMMIT_MPPARSE_H
+-#define __ASM_SUMMIT_MPPARSE_H
+-
+-#include <asm/tsc.h>
+-
+-extern int use_cyclone;
+-
+-#ifdef CONFIG_X86_SUMMIT_NUMA
+-extern void setup_summit(void);
+-#else
+-#define setup_summit()	{}
+-#endif
+-
+-static inline int mps_oem_check(struct mpc_table *mpc, char *oem,
+-		char *productid)
+-{
+-	if (!strncmp(oem, "IBM ENSW", 8) &&
+-			(!strncmp(productid, "VIGIL SMP", 9)
+-			 || !strncmp(productid, "EXA", 3)
+-			 || !strncmp(productid, "RUTHLESS SMP", 12))){
+-		mark_tsc_unstable("Summit based system");
+-		use_cyclone = 1; /*enable cyclone-timer*/
+-		setup_summit();
+-		return 1;
+-	}
+-	return 0;
+-}
+-
+-/* Hook from generic ACPI tables.c */
+-static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	if (!strncmp(oem_id, "IBM", 3) &&
+-	    (!strncmp(oem_table_id, "SERVIGIL", 8)
+-	     || !strncmp(oem_table_id, "EXA", 3))){
+-		mark_tsc_unstable("Summit based system");
+-		use_cyclone = 1; /*enable cyclone-timer*/
+-		setup_summit();
+-		return 1;
+-	}
+-	return 0;
+-}
+-
+-struct rio_table_hdr {
+-	unsigned char version;      /* Version number of this data structure           */
+-	                            /* Version 3 adds chassis_num & WP_index           */
+-	unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil)   */
+-	unsigned char num_rio_dev;  /* # of RIO I/O devices (Cyclones and Winnipegs)   */
+-} __attribute__((packed));
+-
+-struct scal_detail {
+-	unsigned char node_id;      /* Scalability Node ID                             */
+-	unsigned long CBAR;         /* Address of 1MB register space                   */
+-	unsigned char port0node;    /* Node ID port connected to: 0xFF=None            */
+-	unsigned char port0port;    /* Port num port connected to: 0,1,2, or 0xFF=None */
+-	unsigned char port1node;    /* Node ID port connected to: 0xFF = None          */
+-	unsigned char port1port;    /* Port num port connected to: 0,1,2, or 0xFF=None */
+-	unsigned char port2node;    /* Node ID port connected to: 0xFF = None          */
+-	unsigned char port2port;    /* Port num port connected to: 0,1,2, or 0xFF=None */
+-	unsigned char chassis_num;  /* 1 based Chassis number (1 = boot node)          */
+-} __attribute__((packed));
+-
+-struct rio_detail {
+-	unsigned char node_id;      /* RIO Node ID                                     */
+-	unsigned long BBAR;         /* Address of 1MB register space                   */
+-	unsigned char type;         /* Type of device                                  */
+-	unsigned char owner_id;     /* For WPEG: Node ID of Cyclone that owns this WPEG*/
+-	                            /* For CYC:  Node ID of Twister that owns this CYC */
+-	unsigned char port0node;    /* Node ID port connected to: 0xFF=None            */
+-	unsigned char port0port;    /* Port num port connected to: 0,1,2, or 0xFF=None */
+-	unsigned char port1node;    /* Node ID port connected to: 0xFF=None            */
+-	unsigned char port1port;    /* Port num port connected to: 0,1,2, or 0xFF=None */
+-	unsigned char first_slot;   /* For WPEG: Lowest slot number below this WPEG    */
+-	                            /* For CYC:  0                                     */
+-	unsigned char status;       /* For WPEG: Bit 0 = 1 : the XAPIC is used         */
+-	                            /*                 = 0 : the XAPIC is not used, ie:*/
+-	                            /*                     ints fwded to another XAPIC */
+-	                            /*           Bits1:7 Reserved                      */
+-	                            /* For CYC:  Bits0:7 Reserved                      */
+-	unsigned char WP_index;     /* For WPEG: WPEG instance index - lower ones have */
+-	                            /*           lower slot numbers/PCI bus numbers    */
+-	                            /* For CYC:  No meaning                            */
+-	unsigned char chassis_num;  /* 1 based Chassis number                          */
+-	                            /* For LookOut WPEGs this field indicates the      */
+-	                            /* Expansion Chassis #, enumerated from Boot       */
+-	                            /* Node WPEG external port, then Boot Node CYC     */
+-	                            /* external port, then Next Vigil chassis WPEG     */
+-	                            /* external port, etc.                             */
+-	                            /* Shared Lookouts have only 1 chassis number (the */
+-	                            /* first one assigned)                             */
+-} __attribute__((packed));
+-
+-
+-typedef enum {
+-	CompatTwister = 0,  /* Compatibility Twister               */
+-	AltTwister    = 1,  /* Alternate Twister of internal 8-way */
+-	CompatCyclone = 2,  /* Compatibility Cyclone               */
+-	AltCyclone    = 3,  /* Alternate Cyclone of internal 8-way */
+-	CompatWPEG    = 4,  /* Compatibility WPEG                  */
+-	AltWPEG       = 5,  /* Second Planar WPEG                  */
+-	LookOutAWPEG  = 6,  /* LookOut WPEG                        */
+-	LookOutBWPEG  = 7,  /* LookOut WPEG                        */
+-} node_type;
+-
+-static inline int is_WPEG(struct rio_detail *rio){
+-	return (rio->type == CompatWPEG || rio->type == AltWPEG ||
+-		rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
+-}
+-
+-#endif /* __ASM_SUMMIT_MPPARSE_H */
+Index: linux-2.6-tip/arch/x86/include/asm/syscalls.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/syscalls.h
++++ linux-2.6-tip/arch/x86/include/asm/syscalls.h
+@@ -29,21 +29,21 @@ asmlinkage int sys_get_thread_area(struc
+ /* X86_32 only */
+ #ifdef CONFIG_X86_32
+ /* kernel/process_32.c */
+-asmlinkage int sys_fork(struct pt_regs);
+-asmlinkage int sys_clone(struct pt_regs);
+-asmlinkage int sys_vfork(struct pt_regs);
+-asmlinkage int sys_execve(struct pt_regs);
++int sys_fork(struct pt_regs *);
++int sys_clone(struct pt_regs *);
++int sys_vfork(struct pt_regs *);
++int sys_execve(struct pt_regs *);
+ 
+ /* kernel/signal_32.c */
+ asmlinkage int sys_sigsuspend(int, int, old_sigset_t);
+ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
+ 			     struct old_sigaction __user *);
+-asmlinkage int sys_sigaltstack(unsigned long);
+-asmlinkage unsigned long sys_sigreturn(unsigned long);
+-asmlinkage int sys_rt_sigreturn(unsigned long);
++int sys_sigaltstack(struct pt_regs *);
++unsigned long sys_sigreturn(struct pt_regs *);
++long sys_rt_sigreturn(struct pt_regs *);
+ 
+ /* kernel/ioport.c */
+-asmlinkage long sys_iopl(unsigned long);
++long sys_iopl(struct pt_regs *);
+ 
+ /* kernel/sys_i386_32.c */
+ asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long,
+@@ -59,8 +59,8 @@ struct oldold_utsname;
+ asmlinkage int sys_olduname(struct oldold_utsname __user *);
+ 
+ /* kernel/vm86_32.c */
+-asmlinkage int sys_vm86old(struct pt_regs);
+-asmlinkage int sys_vm86(struct pt_regs);
++int sys_vm86old(struct pt_regs *);
++int sys_vm86(struct pt_regs *);
+ 
+ #else /* CONFIG_X86_32 */
+ 
+@@ -74,6 +74,7 @@ asmlinkage long sys_vfork(struct pt_regs
+ asmlinkage long sys_execve(char __user *, char __user * __user *,
+ 			   char __user * __user *,
+ 			   struct pt_regs *);
++long sys_arch_prctl(int, unsigned long);
+ 
+ /* kernel/ioport.c */
+ asmlinkage long sys_iopl(unsigned int, struct pt_regs *);
+@@ -81,7 +82,7 @@ asmlinkage long sys_iopl(unsigned int, s
+ /* kernel/signal_64.c */
+ asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *,
+ 				struct pt_regs *);
+-asmlinkage long sys_rt_sigreturn(struct pt_regs *);
++long sys_rt_sigreturn(struct pt_regs *);
+ 
+ /* kernel/sys_x86_64.c */
+ asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long,
+Index: linux-2.6-tip/arch/x86/include/asm/system.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/system.h
++++ linux-2.6-tip/arch/x86/include/asm/system.h
+@@ -20,9 +20,26 @@
+ struct task_struct; /* one of the stranger aspects of C forward declarations */
+ struct task_struct *__switch_to(struct task_struct *prev,
+ 				struct task_struct *next);
++struct tss_struct;
++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
++		      struct tss_struct *tss);
+ 
+ #ifdef CONFIG_X86_32
+ 
++#ifdef CONFIG_CC_STACKPROTECTOR
++#define __switch_canary							\
++	"movl %P[task_canary](%[next]), %%ebx\n\t"			\
++	"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
++#define __switch_canary_oparam						\
++	, [stack_canary] "=m" (per_cpu_var(stack_canary))
++#define __switch_canary_iparam						\
++	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
++#else	/* CC_STACKPROTECTOR */
++#define __switch_canary
++#define __switch_canary_oparam
++#define __switch_canary_iparam
++#endif	/* CC_STACKPROTECTOR */
++
+ /*
+  * Saving eflags is important. It switches not only IOPL between tasks,
+  * it also protects other tasks from NT leaking through sysenter etc.
+@@ -44,6 +61,7 @@ do {									\
+ 		     "movl %[next_sp],%%esp\n\t"	/* restore ESP   */ \
+ 		     "movl $1f,%[prev_ip]\n\t"	/* save    EIP   */	\
+ 		     "pushl %[next_ip]\n\t"	/* restore EIP   */	\
++		     __switch_canary					\
+ 		     "jmp __switch_to\n"	/* regparm call  */	\
+ 		     "1:\t"						\
+ 		     "popl %%ebp\n\t"		/* restore EBP   */	\
+@@ -58,6 +76,8 @@ do {									\
+ 		       "=b" (ebx), "=c" (ecx), "=d" (edx),		\
+ 		       "=S" (esi), "=D" (edi)				\
+ 		       							\
++		       __switch_canary_oparam				\
++									\
+ 		       /* input parameters: */				\
+ 		     : [next_sp]  "m" (next->thread.sp),		\
+ 		       [next_ip]  "m" (next->thread.ip),		\
+@@ -66,6 +86,8 @@ do {									\
+ 		       [prev]     "a" (prev),				\
+ 		       [next]     "d" (next)				\
+ 									\
++		       __switch_canary_iparam				\
++									\
+ 		     : /* reloaded segment registers */			\
+ 			"memory");					\
+ } while (0)
+@@ -86,27 +108,44 @@ do {									\
+ 	, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
+ 	  "r12", "r13", "r14", "r15"
+ 
++#ifdef CONFIG_CC_STACKPROTECTOR
++#define __switch_canary							  \
++	"movq %P[task_canary](%%rsi),%%r8\n\t"				  \
++	"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
++#define __switch_canary_oparam						  \
++	, [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
++#define __switch_canary_iparam						  \
++	, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
++#else	/* CC_STACKPROTECTOR */
++#define __switch_canary
++#define __switch_canary_oparam
++#define __switch_canary_iparam
++#endif	/* CC_STACKPROTECTOR */
++
+ /* Save restore flags to clear handle leaking NT */
+ #define switch_to(prev, next, last) \
+-	asm volatile(SAVE_CONTEXT						    \
++	asm volatile(SAVE_CONTEXT					  \
+ 	     "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */	  \
+ 	     "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */	  \
+ 	     "call __switch_to\n\t"					  \
+ 	     ".globl thread_return\n"					  \
+ 	     "thread_return:\n\t"					  \
+-	     "movq %%gs:%P[pda_pcurrent],%%rsi\n\t"			  \
++	     "movq "__percpu_arg([current_task])",%%rsi\n\t"		  \
++	     __switch_canary						  \
+ 	     "movq %P[thread_info](%%rsi),%%r8\n\t"			  \
+-	     LOCK_PREFIX "btr  %[tif_fork],%P[ti_flags](%%r8)\n\t"	  \
+ 	     "movq %%rax,%%rdi\n\t" 					  \
+-	     "jc   ret_from_fork\n\t"					  \
++	     "testl  %[_tif_fork],%P[ti_flags](%%r8)\n\t"	  \
++	     "jnz   ret_from_fork\n\t"					  \
+ 	     RESTORE_CONTEXT						  \
+ 	     : "=a" (last)					  	  \
++	       __switch_canary_oparam					  \
+ 	     : [next] "S" (next), [prev] "D" (prev),			  \
+ 	       [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
+ 	       [ti_flags] "i" (offsetof(struct thread_info, flags)),	  \
+-	       [tif_fork] "i" (TIF_FORK),			  	  \
++	       [_tif_fork] "i" (_TIF_FORK),			  	  \
+ 	       [thread_info] "i" (offsetof(struct task_struct, stack)),   \
+-	       [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent))  \
++	       [current_task] "m" (per_cpu_var(current_task))		  \
++	       __switch_canary_iparam					  \
+ 	     : "memory", "cc" __EXTRA_CLOBBER)
+ #endif
+ 
+@@ -165,6 +204,25 @@ extern void native_load_gs_index(unsigne
+ #define savesegment(seg, value)				\
+ 	asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
+ 
++/*
++ * x86_32 user gs accessors.
++ */
++#ifdef CONFIG_X86_32
++#ifdef CONFIG_X86_32_LAZY_GS
++#define get_user_gs(regs)	(u16)({unsigned long v; savesegment(gs, v); v;})
++#define set_user_gs(regs, v)	loadsegment(gs, (unsigned long)(v))
++#define task_user_gs(tsk)	((tsk)->thread.gs)
++#define lazy_save_gs(v)		savesegment(gs, (v))
++#define lazy_load_gs(v)		loadsegment(gs, (v))
++#else	/* X86_32_LAZY_GS */
++#define get_user_gs(regs)	(u16)((regs)->gs)
++#define set_user_gs(regs, v)	do { (regs)->gs = (v); } while (0)
++#define task_user_gs(tsk)	(task_pt_regs(tsk)->gs)
++#define lazy_save_gs(v)		do { } while (0)
++#define lazy_load_gs(v)		do { } while (0)
++#endif	/* X86_32_LAZY_GS */
++#endif	/* X86_32 */
++
+ static inline unsigned long get_limit(unsigned long segment)
+ {
+ 	unsigned long __limit;
+Index: linux-2.6-tip/arch/x86/include/asm/thread_info.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/thread_info.h
++++ linux-2.6-tip/arch/x86/include/asm/thread_info.h
+@@ -40,6 +40,7 @@ struct thread_info {
+ 						*/
+ 	__u8			supervisor_stack[0];
+ #endif
++	int			uaccess_err;
+ };
+ 
+ #define INIT_THREAD_INFO(tsk)			\
+@@ -82,6 +83,7 @@ struct thread_info {
+ #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
+ #define TIF_SECCOMP		8	/* secure computing */
+ #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
++#define TIF_PERF_COUNTERS	11	/* notify perf counter work */
+ #define TIF_NOTSC		16	/* TSC is not accessible in userland */
+ #define TIF_IA32		17	/* 32bit process */
+ #define TIF_FORK		18	/* ret_from_fork */
+@@ -93,6 +95,7 @@ struct thread_info {
+ #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
+ #define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
+ #define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
++#define TIF_SYSCALL_FTRACE	27	/* for ftrace syscall instrumentation */
+ 
+ #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+ #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
+@@ -104,6 +107,7 @@ struct thread_info {
+ #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
+ #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
+ #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
++#define _TIF_PERF_COUNTERS	(1 << TIF_PERF_COUNTERS)
+ #define _TIF_NOTSC		(1 << TIF_NOTSC)
+ #define _TIF_IA32		(1 << TIF_IA32)
+ #define _TIF_FORK		(1 << TIF_FORK)
+@@ -114,15 +118,17 @@ struct thread_info {
+ #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
+ #define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
+ #define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
++#define _TIF_SYSCALL_FTRACE	(1 << TIF_SYSCALL_FTRACE)
+ 
+ /* work to do in syscall_trace_enter() */
+ #define _TIF_WORK_SYSCALL_ENTRY	\
+-	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | \
++	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_FTRACE |	\
+ 	 _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | _TIF_SINGLESTEP)
+ 
+ /* work to do in syscall_trace_leave() */
+ #define _TIF_WORK_SYSCALL_EXIT	\
+-	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP)
++	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP |	\
++	 _TIF_SYSCALL_FTRACE)
+ 
+ /* work to do on interrupt/exception return */
+ #define _TIF_WORK_MASK							\
+@@ -131,11 +137,11 @@ struct thread_info {
+ 	   _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
+ 
+ /* work to do on any return to user space */
+-#define _TIF_ALLWORK_MASK (0x0000FFFF & ~_TIF_SECCOMP)
++#define _TIF_ALLWORK_MASK ((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_FTRACE)
+ 
+ /* Only used for 64 bit */
+ #define _TIF_DO_NOTIFY_MASK						\
+-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
++	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_PERF_COUNTERS|_TIF_NOTIFY_RESUME)
+ 
+ /* flags to check in __switch_to() */
+ #define _TIF_WORK_CTXSW							\
+@@ -148,9 +154,9 @@ struct thread_info {
+ 
+ /* thread information allocation */
+ #ifdef CONFIG_DEBUG_STACK_USAGE
+-#define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO)
++#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO)
+ #else
+-#define THREAD_FLAGS GFP_KERNEL
++#define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK)
+ #endif
+ 
+ #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+@@ -194,25 +200,21 @@ static inline struct thread_info *curren
+ 
+ #else /* X86_32 */
+ 
+-#include <asm/pda.h>
++#include <asm/percpu.h>
++#define KERNEL_STACK_OFFSET (5*8)
+ 
+ /*
+  * macros/functions for gaining access to the thread information structure
+  * preempt_count needs to be 1 initially, until the scheduler is functional.
+  */
+ #ifndef __ASSEMBLY__
+-static inline struct thread_info *current_thread_info(void)
+-{
+-	struct thread_info *ti;
+-	ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
+-	return ti;
+-}
++DECLARE_PER_CPU(unsigned long, kernel_stack);
+ 
+-/* do not use in interrupt context */
+-static inline struct thread_info *stack_thread_info(void)
++static inline struct thread_info *current_thread_info(void)
+ {
+ 	struct thread_info *ti;
+-	asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1)));
++	ti = (void *)(percpu_read(kernel_stack) +
++		      KERNEL_STACK_OFFSET - THREAD_SIZE);
+ 	return ti;
+ }
+ 
+@@ -220,8 +222,8 @@ static inline struct thread_info *stack_
+ 
+ /* how to get the thread information struct from ASM */
+ #define GET_THREAD_INFO(reg) \
+-	movq %gs:pda_kernelstack,reg ; \
+-	subq $(THREAD_SIZE-PDA_STACKOFFSET),reg
++	movq PER_CPU_VAR(kernel_stack),reg ; \
++	subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
+ 
+ #endif
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/timer.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/timer.h
++++ linux-2.6-tip/arch/x86/include/asm/timer.h
+@@ -3,6 +3,7 @@
+ #include <linux/init.h>
+ #include <linux/pm.h>
+ #include <linux/percpu.h>
++#include <linux/interrupt.h>
+ 
+ #define TICK_SIZE (tick_nsec / 1000)
+ 
+@@ -12,6 +13,7 @@ unsigned long native_calibrate_tsc(void)
+ #ifdef CONFIG_X86_32
+ extern int timer_ack;
+ extern int recalibrate_cpu_khz(void);
++extern irqreturn_t timer_interrupt(int irq, void *dev_id);
+ #endif /* CONFIG_X86_32 */
+ 
+ extern int no_timer_check;
+@@ -56,9 +58,9 @@ static inline unsigned long long cycles_
+ 	unsigned long long ns;
+ 	unsigned long flags;
+ 
+-	local_irq_save(flags);
++	raw_local_irq_save(flags);
+ 	ns = __cycles_2_ns(cyc);
+-	local_irq_restore(flags);
++	raw_local_irq_restore(flags);
+ 
+ 	return ns;
+ }
+Index: linux-2.6-tip/arch/x86/include/asm/tlbflush.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/tlbflush.h
++++ linux-2.6-tip/arch/x86/include/asm/tlbflush.h
+@@ -7,6 +7,21 @@
+ #include <asm/processor.h>
+ #include <asm/system.h>
+ 
++/*
++ * TLB-flush needs to be nonpreemptible on PREEMPT_RT due to the
++ * following complex race scenario:
++ *
++ * if the current task is lazy-TLB and does a TLB flush and
++ * gets preempted after the movl %%r3, %0 but before the
++ * movl %0, %%cr3 then its ->active_mm might change and it will
++ * install the wrong cr3 when it switches back. This is not a
++ * problem for the lazy-TLB task itself, but if the next task it
++ * switches to has an ->mm that is also the lazy-TLB task's
++ * new ->active_mm, then the scheduler will assume that cr3 is
++ * the new one, while we overwrote it with the old one. The result
++ * is the wrong cr3 in the new (non-lazy-TLB) task, which typically
++ * causes an infinite pagefault upon the next userspace access.
++ */
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
+@@ -17,7 +32,9 @@
+ 
+ static inline void __native_flush_tlb(void)
+ {
++	preempt_disable();
+ 	write_cr3(read_cr3());
++	preempt_enable();
+ }
+ 
+ static inline void __native_flush_tlb_global(void)
+@@ -95,6 +112,13 @@ static inline void __flush_tlb_one(unsig
+ 
+ static inline void flush_tlb_mm(struct mm_struct *mm)
+ {
++	/*
++	 * This is safe on PREEMPT_RT because if we preempt
++	 * right after the check but before the __flush_tlb(),
++	 * and if ->active_mm changes, then we might miss a
++	 * TLB flush, but that TLB flush happened already when
++	 * ->active_mm was changed:
++	 */
+ 	if (mm == current->active_mm)
+ 		__flush_tlb();
+ }
+@@ -113,7 +137,7 @@ static inline void flush_tlb_range(struc
+ 		__flush_tlb();
+ }
+ 
+-static inline void native_flush_tlb_others(const cpumask_t *cpumask,
++static inline void native_flush_tlb_others(const struct cpumask *cpumask,
+ 					   struct mm_struct *mm,
+ 					   unsigned long va)
+ {
+@@ -142,31 +166,28 @@ static inline void flush_tlb_range(struc
+ 	flush_tlb_mm(vma->vm_mm);
+ }
+ 
+-void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm,
+-			     unsigned long va);
++void native_flush_tlb_others(const struct cpumask *cpumask,
++			     struct mm_struct *mm, unsigned long va);
+ 
+ #define TLBSTATE_OK	1
+ #define TLBSTATE_LAZY	2
+ 
+-#ifdef CONFIG_X86_32
+ struct tlb_state {
+ 	struct mm_struct *active_mm;
+ 	int state;
+-	char __cacheline_padding[L1_CACHE_BYTES-8];
+ };
+ DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
+ 
+-void reset_lazy_tlbstate(void);
+-#else
+ static inline void reset_lazy_tlbstate(void)
+ {
++	percpu_write(cpu_tlbstate.state, 0);
++	percpu_write(cpu_tlbstate.active_mm, &init_mm);
+ }
+-#endif
+ 
+ #endif	/* SMP */
+ 
+ #ifndef CONFIG_PARAVIRT
+-#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(&mask, mm, va)
++#define flush_tlb_others(mask, mm, va)	native_flush_tlb_others(mask, mm, va)
+ #endif
+ 
+ static inline void flush_tlb_kernel_range(unsigned long start,
+@@ -175,4 +196,6 @@ static inline void flush_tlb_kernel_rang
+ 	flush_tlb_all();
+ }
+ 
++extern void zap_low_mappings(void);
++
+ #endif /* _ASM_X86_TLBFLUSH_H */
+Index: linux-2.6-tip/arch/x86/include/asm/topology.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/topology.h
++++ linux-2.6-tip/arch/x86/include/asm/topology.h
+@@ -44,9 +44,6 @@
+ 
+ #ifdef CONFIG_X86_32
+ 
+-/* Mappings between node number and cpus on that node. */
+-extern cpumask_t node_to_cpumask_map[];
+-
+ /* Mappings between logical cpu number and node number */
+ extern int cpu_to_node_map[];
+ 
+@@ -57,39 +54,18 @@ static inline int cpu_to_node(int cpu)
+ }
+ #define early_cpu_to_node(cpu)	cpu_to_node(cpu)
+ 
+-/* Returns a bitmask of CPUs on Node 'node'.
+- *
+- * Side note: this function creates the returned cpumask on the stack
+- * so with a high NR_CPUS count, excessive stack space is used.  The
+- * cpumask_of_node function should be used whenever possible.
+- */
+-static inline cpumask_t node_to_cpumask(int node)
+-{
+-	return node_to_cpumask_map[node];
+-}
+-
+-/* Returns a bitmask of CPUs on Node 'node'. */
+-static inline const struct cpumask *cpumask_of_node(int node)
+-{
+-	return &node_to_cpumask_map[node];
+-}
+-
+ #else /* CONFIG_X86_64 */
+ 
+-/* Mappings between node number and cpus on that node. */
+-extern cpumask_t *node_to_cpumask_map;
+-
+ /* Mappings between logical cpu number and node number */
+ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
+ 
+ /* Returns the number of the current Node. */
+-#define numa_node_id()		read_pda(nodenumber)
++DECLARE_PER_CPU(int, node_number);
++#define numa_node_id()		percpu_read(node_number)
+ 
+ #ifdef CONFIG_DEBUG_PER_CPU_MAPS
+ extern int cpu_to_node(int cpu);
+ extern int early_cpu_to_node(int cpu);
+-extern const cpumask_t *cpumask_of_node(int node);
+-extern cpumask_t node_to_cpumask(int node);
+ 
+ #else	/* !CONFIG_DEBUG_PER_CPU_MAPS */
+ 
+@@ -102,37 +78,27 @@ static inline int cpu_to_node(int cpu)
+ /* Same function but used if called before per_cpu areas are setup */
+ static inline int early_cpu_to_node(int cpu)
+ {
+-	if (early_per_cpu_ptr(x86_cpu_to_node_map))
+-		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+-
+-	return per_cpu(x86_cpu_to_node_map, cpu);
++	return early_per_cpu(x86_cpu_to_node_map, cpu);
+ }
+ 
+-/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
+-static inline const cpumask_t *cpumask_of_node(int node)
+-{
+-	return &node_to_cpumask_map[node];
+-}
++#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
+ 
+-/* Returns a bitmask of CPUs on Node 'node'. */
+-static inline cpumask_t node_to_cpumask(int node)
++#endif /* CONFIG_X86_64 */
++
++/* Mappings between node number and cpus on that node. */
++extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
++
++#ifdef CONFIG_DEBUG_PER_CPU_MAPS
++extern const struct cpumask *cpumask_of_node(int node);
++#else
++/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
++static inline const struct cpumask *cpumask_of_node(int node)
+ {
+ 	return node_to_cpumask_map[node];
+ }
++#endif
+ 
+-#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
+-
+-/*
+- * Replace default node_to_cpumask_ptr with optimized version
+- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+- */
+-#define node_to_cpumask_ptr(v, node)		\
+-		const cpumask_t *v = cpumask_of_node(node)
+-
+-#define node_to_cpumask_ptr_next(v, node)	\
+-			   v = cpumask_of_node(node)
+-
+-#endif /* CONFIG_X86_64 */
++extern void setup_node_to_cpumask_map(void);
+ 
+ /*
+  * Returns the number of the node containing Node 'node'. This
+@@ -141,7 +107,6 @@ static inline cpumask_t node_to_cpumask(
+ #define parent_node(node) (node)
+ 
+ #define pcibus_to_node(bus) __pcibus_to_node(bus)
+-#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
+ 
+ #ifdef CONFIG_X86_32
+ extern unsigned long node_start_pfn[];
+@@ -192,32 +157,32 @@ extern int __node_distance(int, int);
+ 
+ #else /* !CONFIG_NUMA */
+ 
+-#define numa_node_id()		0
+-#define	cpu_to_node(cpu)	0
+-#define	early_cpu_to_node(cpu)	0
++static inline int numa_node_id(void)
++{
++	return 0;
++}
++
++static inline int cpu_to_node(int cpu)
++{
++	return 0;
++}
+ 
+-static inline const cpumask_t *cpumask_of_node(int node)
++static inline int early_cpu_to_node(int cpu)
+ {
+-	return &cpu_online_map;
++	return 0;
+ }
+-static inline cpumask_t node_to_cpumask(int node)
++
++static inline const struct cpumask *cpumask_of_node(int node)
+ {
+-	return cpu_online_map;
++	return cpu_online_mask;
+ }
+ static inline int node_to_first_cpu(int node)
+ {
+-	return first_cpu(cpu_online_map);
++	return cpumask_first(cpu_online_mask);
+ }
+ 
+-/*
+- * Replace default node_to_cpumask_ptr with optimized version
+- * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
+- */
+-#define node_to_cpumask_ptr(v, node)		\
+-		const cpumask_t *v = cpumask_of_node(node)
++static inline void setup_node_to_cpumask_map(void) { }
+ 
+-#define node_to_cpumask_ptr_next(v, node)	\
+-			   v = cpumask_of_node(node)
+ #endif
+ 
+ #include <asm-generic/topology.h>
+@@ -230,16 +195,13 @@ static inline int node_to_first_cpu(int 
+ }
+ #endif
+ 
+-extern cpumask_t cpu_coregroup_map(int cpu);
+ extern const struct cpumask *cpu_coregroup_mask(int cpu);
+ 
+ #ifdef ENABLE_TOPO_DEFINES
+ #define topology_physical_package_id(cpu)	(cpu_data(cpu).phys_proc_id)
+ #define topology_core_id(cpu)			(cpu_data(cpu).cpu_core_id)
+-#define topology_core_siblings(cpu)		(per_cpu(cpu_core_map, cpu))
+-#define topology_thread_siblings(cpu)		(per_cpu(cpu_sibling_map, cpu))
+-#define topology_core_cpumask(cpu)		(&per_cpu(cpu_core_map, cpu))
+-#define topology_thread_cpumask(cpu)		(&per_cpu(cpu_sibling_map, cpu))
++#define topology_core_cpumask(cpu)		(per_cpu(cpu_core_map, cpu))
++#define topology_thread_cpumask(cpu)		(per_cpu(cpu_sibling_map, cpu))
+ 
+ /* indicates that pointers to the topology cpumask_t maps are valid */
+ #define arch_provides_topology_pointers		yes
+@@ -253,7 +215,7 @@ struct pci_bus;
+ void set_pci_bus_resources_arch_default(struct pci_bus *b);
+ 
+ #ifdef CONFIG_SMP
+-#define mc_capable()	(cpus_weight(per_cpu(cpu_core_map, 0)) != nr_cpu_ids)
++#define mc_capable()	(cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)
+ #define smt_capable()			(smp_num_siblings > 1)
+ #endif
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/trampoline.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/trampoline.h
++++ linux-2.6-tip/arch/x86/include/asm/trampoline.h
+@@ -13,6 +13,7 @@ extern unsigned char *trampoline_base;
+ 
+ extern unsigned long init_rsp;
+ extern unsigned long initial_code;
++extern unsigned long initial_gs;
+ 
+ #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
+ #define TRAMPOLINE_BASE 0x6000
+Index: linux-2.6-tip/arch/x86/include/asm/traps.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/traps.h
++++ linux-2.6-tip/arch/x86/include/asm/traps.h
+@@ -41,7 +41,7 @@ dotraplinkage void do_int3(struct pt_reg
+ dotraplinkage void do_overflow(struct pt_regs *, long);
+ dotraplinkage void do_bounds(struct pt_regs *, long);
+ dotraplinkage void do_invalid_op(struct pt_regs *, long);
+-dotraplinkage void do_device_not_available(struct pt_regs);
++dotraplinkage void do_device_not_available(struct pt_regs *, long);
+ dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long);
+ dotraplinkage void do_invalid_TSS(struct pt_regs *, long);
+ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
+Index: linux-2.6-tip/arch/x86/include/asm/uaccess.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/uaccess.h
++++ linux-2.6-tip/arch/x86/include/asm/uaccess.h
+@@ -121,7 +121,7 @@ extern int __get_user_bad(void);
+ 
+ #define __get_user_x(size, ret, x, ptr)		      \
+ 	asm volatile("call __get_user_" #size	      \
+-		     : "=a" (ret),"=d" (x)	      \
++		     : "=a" (ret), "=d" (x)	      \
+ 		     : "0" (ptr))		      \
+ 
+ /* Careful: we have to cast the result to the type of the pointer
+@@ -181,12 +181,12 @@ extern int __get_user_bad(void);
+ 
+ #define __put_user_x(size, x, ptr, __ret_pu)			\
+ 	asm volatile("call __put_user_" #size : "=a" (__ret_pu)	\
+-		     :"0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
++		     : "0" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
+ 
+ 
+ 
+ #ifdef CONFIG_X86_32
+-#define __put_user_u64(x, addr, err)					\
++#define __put_user_asm_u64(x, addr, err, errret)			\
+ 	asm volatile("1:	movl %%eax,0(%2)\n"			\
+ 		     "2:	movl %%edx,4(%2)\n"			\
+ 		     "3:\n"						\
+@@ -197,14 +197,24 @@ extern int __get_user_bad(void);
+ 		     _ASM_EXTABLE(1b, 4b)				\
+ 		     _ASM_EXTABLE(2b, 4b)				\
+ 		     : "=r" (err)					\
+-		     : "A" (x), "r" (addr), "i" (-EFAULT), "0" (err))
++		     : "A" (x), "r" (addr), "i" (errret), "0" (err))
++
++#define __put_user_asm_ex_u64(x, addr)					\
++	asm volatile("1:	movl %%eax,0(%1)\n"			\
++		     "2:	movl %%edx,4(%1)\n"			\
++		     "3:\n"						\
++		     _ASM_EXTABLE(1b, 2b - 1b)				\
++		     _ASM_EXTABLE(2b, 3b - 2b)				\
++		     : : "A" (x), "r" (addr))
+ 
+ #define __put_user_x8(x, ptr, __ret_pu)				\
+ 	asm volatile("call __put_user_8" : "=a" (__ret_pu)	\
+ 		     : "A" ((typeof(*(ptr)))(x)), "c" (ptr) : "ebx")
+ #else
+-#define __put_user_u64(x, ptr, retval) \
+-	__put_user_asm(x, ptr, retval, "q", "", "Zr", -EFAULT)
++#define __put_user_asm_u64(x, ptr, retval, errret) \
++	__put_user_asm(x, ptr, retval, "q", "", "Zr", errret)
++#define __put_user_asm_ex_u64(x, addr)	\
++	__put_user_asm_ex(x, addr, "q", "", "Zr")
+ #define __put_user_x8(x, ptr, __ret_pu) __put_user_x(8, x, ptr, __ret_pu)
+ #endif
+ 
+@@ -276,10 +286,32 @@ do {									\
+ 		__put_user_asm(x, ptr, retval, "w", "w", "ir", errret);	\
+ 		break;							\
+ 	case 4:								\
+-		__put_user_asm(x, ptr, retval, "l", "k",  "ir", errret);\
++		__put_user_asm(x, ptr, retval, "l", "k", "ir", errret);	\
+ 		break;							\
+ 	case 8:								\
+-		__put_user_u64((__typeof__(*ptr))(x), ptr, retval);	\
++		__put_user_asm_u64((__typeof__(*ptr))(x), ptr, retval,	\
++				   errret);				\
++		break;							\
++	default:							\
++		__put_user_bad();					\
++	}								\
++} while (0)
++
++#define __put_user_size_ex(x, ptr, size)				\
++do {									\
++	__chk_user_ptr(ptr);						\
++	switch (size) {							\
++	case 1:								\
++		__put_user_asm_ex(x, ptr, "b", "b", "iq");		\
++		break;							\
++	case 2:								\
++		__put_user_asm_ex(x, ptr, "w", "w", "ir");		\
++		break;							\
++	case 4:								\
++		__put_user_asm_ex(x, ptr, "l", "k", "ir");		\
++		break;							\
++	case 8:								\
++		__put_user_asm_ex_u64((__typeof__(*ptr))(x), ptr);	\
+ 		break;							\
+ 	default:							\
+ 		__put_user_bad();					\
+@@ -311,9 +343,12 @@ do {									\
+ 
+ #ifdef CONFIG_X86_32
+ #define __get_user_asm_u64(x, ptr, retval, errret)	(x) = __get_user_bad()
++#define __get_user_asm_ex_u64(x, ptr)			(x) = __get_user_bad()
+ #else
+ #define __get_user_asm_u64(x, ptr, retval, errret) \
+ 	 __get_user_asm(x, ptr, retval, "q", "", "=r", errret)
++#define __get_user_asm_ex_u64(x, ptr) \
++	 __get_user_asm_ex(x, ptr, "q", "", "=r")
+ #endif
+ 
+ #define __get_user_size(x, ptr, size, retval, errret)			\
+@@ -350,6 +385,33 @@ do {									\
+ 		     : "=r" (err), ltype(x)				\
+ 		     : "m" (__m(addr)), "i" (errret), "0" (err))
+ 
++#define __get_user_size_ex(x, ptr, size)				\
++do {									\
++	__chk_user_ptr(ptr);						\
++	switch (size) {							\
++	case 1:								\
++		__get_user_asm_ex(x, ptr, "b", "b", "=q");		\
++		break;							\
++	case 2:								\
++		__get_user_asm_ex(x, ptr, "w", "w", "=r");		\
++		break;							\
++	case 4:								\
++		__get_user_asm_ex(x, ptr, "l", "k", "=r");		\
++		break;							\
++	case 8:								\
++		__get_user_asm_ex_u64(x, ptr);				\
++		break;							\
++	default:							\
++		(x) = __get_user_bad();					\
++	}								\
++} while (0)
++
++#define __get_user_asm_ex(x, addr, itype, rtype, ltype)			\
++	asm volatile("1:	mov"itype" %1,%"rtype"0\n"		\
++		     "2:\n"						\
++		     _ASM_EXTABLE(1b, 2b - 1b)				\
++		     : ltype(x) : "m" (__m(addr)))
++
+ #define __put_user_nocheck(x, ptr, size)			\
+ ({								\
+ 	int __pu_err;						\
+@@ -385,6 +447,26 @@ struct __large_struct { unsigned long bu
+ 		     _ASM_EXTABLE(1b, 3b)				\
+ 		     : "=r"(err)					\
+ 		     : ltype(x), "m" (__m(addr)), "i" (errret), "0" (err))
++
++#define __put_user_asm_ex(x, addr, itype, rtype, ltype)			\
++	asm volatile("1:	mov"itype" %"rtype"0,%1\n"		\
++		     "2:\n"						\
++		     _ASM_EXTABLE(1b, 2b - 1b)				\
++		     : : ltype(x), "m" (__m(addr)))
++
++/*
++ * uaccess_try and catch
++ */
++#define uaccess_try	do {						\
++	int prev_err = current_thread_info()->uaccess_err;		\
++	current_thread_info()->uaccess_err = 0;				\
++	barrier();
++
++#define uaccess_catch(err)						\
++	(err) |= current_thread_info()->uaccess_err;			\
++	current_thread_info()->uaccess_err = prev_err;			\
++} while (0)
++
+ /**
+  * __get_user: - Get a simple variable from user space, with less checking.
+  * @x:   Variable to store result.
+@@ -408,6 +490,7 @@ struct __large_struct { unsigned long bu
+ 
+ #define __get_user(x, ptr)						\
+ 	__get_user_nocheck((x), (ptr), sizeof(*(ptr)))
++
+ /**
+  * __put_user: - Write a simple value into user space, with less checking.
+  * @x:   Value to copy to user space.
+@@ -435,6 +518,45 @@ struct __large_struct { unsigned long bu
+ #define __put_user_unaligned __put_user
+ 
+ /*
++ * {get|put}_user_try and catch
++ *
++ * get_user_try {
++ *	get_user_ex(...);
++ * } get_user_catch(err)
++ */
++#define get_user_try		uaccess_try
++#define get_user_catch(err)	uaccess_catch(err)
++
++#define get_user_ex(x, ptr)	do {					\
++	unsigned long __gue_val;					\
++	__get_user_size_ex((__gue_val), (ptr), (sizeof(*(ptr))));	\
++	(x) = (__force __typeof__(*(ptr)))__gue_val;			\
++} while (0)
++
++#ifdef CONFIG_X86_WP_WORKS_OK
++
++#define put_user_try		uaccess_try
++#define put_user_catch(err)	uaccess_catch(err)
++
++#define put_user_ex(x, ptr)						\
++	__put_user_size_ex((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)))
++
++#else /* !CONFIG_X86_WP_WORKS_OK */
++
++#define put_user_try		do {		\
++	int __uaccess_err = 0;
++
++#define put_user_catch(err)			\
++	(err) |= __uaccess_err;			\
++} while (0)
++
++#define put_user_ex(x, ptr)	do {		\
++	__uaccess_err |= __put_user(x, ptr);	\
++} while (0)
++
++#endif /* CONFIG_X86_WP_WORKS_OK */
++
++/*
+  * movsl can be slow when source and dest are not both 8-byte aligned
+  */
+ #ifdef CONFIG_X86_INTEL_USERCOPY
+Index: linux-2.6-tip/arch/x86/include/asm/uaccess_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/uaccess_64.h
++++ linux-2.6-tip/arch/x86/include/asm/uaccess_64.h
+@@ -188,16 +188,16 @@ __copy_to_user_inatomic(void __user *dst
+ extern long __copy_user_nocache(void *dst, const void __user *src,
+ 				unsigned size, int zerorest);
+ 
+-static inline int __copy_from_user_nocache(void *dst, const void __user *src,
+-					   unsigned size)
++static inline int
++__copy_from_user_nocache(void *dst, const void __user *src, unsigned size)
+ {
+ 	might_sleep();
+ 	return __copy_user_nocache(dst, src, size, 1);
+ }
+ 
+-static inline int __copy_from_user_inatomic_nocache(void *dst,
+-						    const void __user *src,
+-						    unsigned size)
++static inline int
++__copy_from_user_inatomic_nocache(void *dst, const void __user *src,
++				  unsigned size)
+ {
+ 	return __copy_user_nocache(dst, src, size, 0);
+ }
+Index: linux-2.6-tip/arch/x86/include/asm/unistd_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/unistd_32.h
++++ linux-2.6-tip/arch/x86/include/asm/unistd_32.h
+@@ -338,6 +338,10 @@
+ #define __NR_dup3		330
+ #define __NR_pipe2		331
+ #define __NR_inotify_init1	332
++#define __NR_preadv		333
++#define __NR_pwritev		334
++#define __NR_rt_tgsigqueueinfo	335
++#define __NR_perf_counter_open	336
+ 
+ #ifdef __KERNEL__
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/unistd_64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/unistd_64.h
++++ linux-2.6-tip/arch/x86/include/asm/unistd_64.h
+@@ -653,7 +653,14 @@ __SYSCALL(__NR_dup3, sys_dup3)
+ __SYSCALL(__NR_pipe2, sys_pipe2)
+ #define __NR_inotify_init1			294
+ __SYSCALL(__NR_inotify_init1, sys_inotify_init1)
+-
++#define __NR_preadv				295
++__SYSCALL(__NR_preadv, sys_ni_syscall)
++#define __NR_pwritev				296
++__SYSCALL(__NR_pwritev, sys_ni_syscall)
++#define __NR_rt_tgsigqueueinfo			297
++__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
++#define __NR_perf_counter_open			298
++__SYSCALL(__NR_perf_counter_open, sys_perf_counter_open)
+ 
+ #ifndef __NO_STUBS
+ #define __ARCH_WANT_OLD_READDIR
+Index: linux-2.6-tip/arch/x86/include/asm/uv/uv.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/include/asm/uv/uv.h
+@@ -0,0 +1,33 @@
++#ifndef _ASM_X86_UV_UV_H
++#define _ASM_X86_UV_UV_H
++
++enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
++
++struct cpumask;
++struct mm_struct;
++
++#ifdef CONFIG_X86_UV
++
++extern enum uv_system_type get_uv_system_type(void);
++extern int is_uv_system(void);
++extern void uv_cpu_init(void);
++extern void uv_system_init(void);
++extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
++						 struct mm_struct *mm,
++						 unsigned long va,
++						 unsigned int cpu);
++
++#else	/* X86_UV */
++
++static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
++static inline int is_uv_system(void)	{ return 0; }
++static inline void uv_cpu_init(void)	{ }
++static inline void uv_system_init(void)	{ }
++static inline const struct cpumask *
++uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
++		    unsigned long va, unsigned int cpu)
++{ return cpumask; }
++
++#endif	/* X86_UV */
++
++#endif	/* _ASM_X86_UV_UV_H */
+Index: linux-2.6-tip/arch/x86/include/asm/uv/uv_bau.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/uv/uv_bau.h
++++ linux-2.6-tip/arch/x86/include/asm/uv/uv_bau.h
+@@ -325,7 +325,6 @@ static inline void bau_cpubits_clear(str
+ #define cpubit_isset(cpu, bau_local_cpumask) \
+ 	test_bit((cpu), (bau_local_cpumask).bits)
+ 
+-extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
+ extern void uv_bau_message_intr1(void);
+ extern void uv_bau_timeout_intr1(void);
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/uv/uv_hub.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/uv/uv_hub.h
++++ linux-2.6-tip/arch/x86/include/asm/uv/uv_hub.h
+@@ -199,6 +199,10 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __
+ #define SCIR_CPU_ACTIVITY	0x02	/* not idle */
+ #define SCIR_CPU_HB_INTERVAL	(HZ)	/* once per second */
+ 
++/* Loop through all installed blades */
++#define for_each_possible_blade(bid)		\
++	for ((bid) = 0; (bid) < uv_num_possible_blades(); (bid)++)
++
+ /*
+  * Macros for converting between kernel virtual addresses, socket local physical
+  * addresses, and UV global physical addresses.
+Index: linux-2.6-tip/arch/x86/include/asm/vic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/vic.h
++++ /dev/null
+@@ -1,61 +0,0 @@
+-/* Copyright (C) 1999,2001
+- *
+- * Author: J.E.J.Bottomley@HansenPartnership.com
+- *
+- * Standard include definitions for the NCR Voyager Interrupt Controller */
+-
+-/* The eight CPI vectors.  To activate a CPI, you write a bit mask
+- * corresponding to the processor set to be interrupted into the
+- * relevant register.  That set of CPUs will then be interrupted with
+- * the CPI */
+-static const int VIC_CPI_Registers[] =
+-	{0xFC00, 0xFC01, 0xFC08, 0xFC09,
+-	 0xFC10, 0xFC11, 0xFC18, 0xFC19 };
+-
+-#define VIC_PROC_WHO_AM_I		0xfc29
+-#	define	QUAD_IDENTIFIER		0xC0
+-#	define  EIGHT_SLOT_IDENTIFIER	0xE0
+-#define QIC_EXTENDED_PROCESSOR_SELECT	0xFC72
+-#define VIC_CPI_BASE_REGISTER		0xFC41
+-#define VIC_PROCESSOR_ID		0xFC21
+-#	define VIC_CPU_MASQUERADE_ENABLE 0x8
+-
+-#define VIC_CLAIM_REGISTER_0		0xFC38
+-#define VIC_CLAIM_REGISTER_1		0xFC39
+-#define VIC_REDIRECT_REGISTER_0		0xFC60
+-#define VIC_REDIRECT_REGISTER_1		0xFC61
+-#define VIC_PRIORITY_REGISTER		0xFC20
+-
+-#define VIC_PRIMARY_MC_BASE		0xFC48
+-#define VIC_SECONDARY_MC_BASE		0xFC49
+-
+-#define QIC_PROCESSOR_ID		0xFC71
+-#	define	QIC_CPUID_ENABLE	0x08
+-
+-#define QIC_VIC_CPI_BASE_REGISTER	0xFC79
+-#define QIC_CPI_BASE_REGISTER		0xFC7A
+-
+-#define QIC_MASK_REGISTER0		0xFC80
+-/* NOTE: these are masked high, enabled low */
+-#	define QIC_PERF_TIMER		0x01
+-#	define QIC_LPE			0x02
+-#	define QIC_SYS_INT		0x04
+-#	define QIC_CMN_INT		0x08
+-/* at the moment, just enable CMN_INT, disable SYS_INT */
+-#	define QIC_DEFAULT_MASK0	(~(QIC_CMN_INT /* | VIC_SYS_INT */))
+-#define QIC_MASK_REGISTER1		0xFC81
+-#	define QIC_BOOT_CPI_MASK	0xFE
+-/* Enable CPI's 1-6 inclusive */
+-#	define QIC_CPI_ENABLE		0x81
+-
+-#define QIC_INTERRUPT_CLEAR0		0xFC8A
+-#define QIC_INTERRUPT_CLEAR1		0xFC8B
+-
+-/* this is where we place the CPI vectors */
+-#define VIC_DEFAULT_CPI_BASE		0xC0
+-/* this is where we place the QIC CPI vectors */
+-#define QIC_DEFAULT_CPI_BASE		0xD0
+-
+-#define VIC_BOOT_INTERRUPT_MASK		0xfe
+-
+-extern void smp_vic_timer_interrupt(void);
+Index: linux-2.6-tip/arch/x86/include/asm/voyager.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/voyager.h
++++ /dev/null
+@@ -1,529 +0,0 @@
+-/* Copyright (C) 1999,2001
+- *
+- * Author: J.E.J.Bottomley@HansenPartnership.com
+- *
+- * Standard include definitions for the NCR Voyager system */
+-
+-#undef	VOYAGER_DEBUG
+-#undef	VOYAGER_CAT_DEBUG
+-
+-#ifdef VOYAGER_DEBUG
+-#define VDEBUG(x)	printk x
+-#else
+-#define VDEBUG(x)
+-#endif
+-
+-/* There are three levels of voyager machine: 3,4 and 5. The rule is
+- * if it's less than 3435 it's a Level 3 except for a 3360 which is
+- * a level 4.  A 3435 or above is a Level 5 */
+-#define VOYAGER_LEVEL5_AND_ABOVE	0x3435
+-#define VOYAGER_LEVEL4			0x3360
+-
+-/* The L4 DINO ASIC */
+-#define VOYAGER_DINO			0x43
+-
+-/* voyager ports in standard I/O space */
+-#define VOYAGER_MC_SETUP	0x96
+-
+-
+-#define	VOYAGER_CAT_CONFIG_PORT			0x97
+-#	define VOYAGER_CAT_DESELECT		0xff
+-#define VOYAGER_SSPB_RELOCATION_PORT		0x98
+-
+-/* Valid CAT controller commands */
+-/* start instruction register cycle */
+-#define VOYAGER_CAT_IRCYC			0x01
+-/* start data register cycle */
+-#define VOYAGER_CAT_DRCYC			0x02
+-/* move to execute state */
+-#define VOYAGER_CAT_RUN				0x0F
+-/* end operation */
+-#define VOYAGER_CAT_END				0x80
+-/* hold in idle state */
+-#define VOYAGER_CAT_HOLD			0x90
+-/* single step an "intest" vector */
+-#define VOYAGER_CAT_STEP			0xE0
+-/* return cat controller to CLEMSON mode */
+-#define VOYAGER_CAT_CLEMSON			0xFF
+-
+-/* the default cat command header */
+-#define VOYAGER_CAT_HEADER			0x7F
+-
+-/* the range of possible CAT module ids in the system */
+-#define VOYAGER_MIN_MODULE			0x10
+-#define VOYAGER_MAX_MODULE			0x1f
+-
+-/* The voyager registers per asic */
+-#define VOYAGER_ASIC_ID_REG			0x00
+-#define VOYAGER_ASIC_TYPE_REG			0x01
+-/* the sub address registers can be made auto incrementing on reads */
+-#define VOYAGER_AUTO_INC_REG			0x02
+-#	define VOYAGER_AUTO_INC			0x04
+-#	define VOYAGER_NO_AUTO_INC		0xfb
+-#define VOYAGER_SUBADDRDATA			0x03
+-#define VOYAGER_SCANPATH			0x05
+-#	define VOYAGER_CONNECT_ASIC		0x01
+-#	define VOYAGER_DISCONNECT_ASIC		0xfe
+-#define VOYAGER_SUBADDRLO			0x06
+-#define VOYAGER_SUBADDRHI			0x07
+-#define VOYAGER_SUBMODSELECT			0x08
+-#define VOYAGER_SUBMODPRESENT			0x09
+-
+-#define VOYAGER_SUBADDR_LO			0xff
+-#define VOYAGER_SUBADDR_HI			0xffff
+-
+-/* the maximum size of a scan path -- used to form instructions */
+-#define VOYAGER_MAX_SCAN_PATH			0x100
+-/* the biggest possible register size (in bytes) */
+-#define VOYAGER_MAX_REG_SIZE			4
+-
+-/* Total number of possible modules (including submodules) */
+-#define VOYAGER_MAX_MODULES			16
+-/* Largest number of asics per module */
+-#define VOYAGER_MAX_ASICS_PER_MODULE		7
+-
+-/* the CAT asic of each module is always the first one */
+-#define VOYAGER_CAT_ID				0
+-#define VOYAGER_PSI				0x1a
+-
+-/* voyager instruction operations and registers */
+-#define VOYAGER_READ_CONFIG			0x1
+-#define VOYAGER_WRITE_CONFIG			0x2
+-#define VOYAGER_BYPASS				0xff
+-
+-typedef struct voyager_asic {
+-	__u8	asic_addr;	/* ASIC address; Level 4 */
+-	__u8	asic_type;      /* ASIC type */
+-	__u8	asic_id;	/* ASIC id */
+-	__u8	jtag_id[4];	/* JTAG id */
+-	__u8	asic_location;	/* Location within scan path; start w/ 0 */
+-	__u8	bit_location;	/* Location within bit stream; start w/ 0 */
+-	__u8	ireg_length;	/* Instruction register length */
+-	__u16	subaddr;	/* Amount of sub address space */
+-	struct voyager_asic *next;	/* Next asic in linked list */
+-} voyager_asic_t;
+-
+-typedef struct voyager_module {
+-	__u8	module_addr;		/* Module address */
+-	__u8	scan_path_connected;	/* Scan path connected */
+-	__u16   ee_size;		/* Size of the EEPROM */
+-	__u16   num_asics;		/* Number of Asics */
+-	__u16   inst_bits;		/* Instruction bits in the scan path */
+-	__u16   largest_reg;		/* Largest register in the scan path */
+-	__u16   smallest_reg;		/* Smallest register in the scan path */
+-	voyager_asic_t   *asic;		/* First ASIC in scan path (CAT_I) */
+-	struct   voyager_module *submodule;	/* Submodule pointer */
+-	struct   voyager_module *next;		/* Next module in linked list */
+-} voyager_module_t;
+-
+-typedef struct voyager_eeprom_hdr {
+-	 __u8  module_id[4];
+-	 __u8  version_id;
+-	 __u8  config_id;
+-	 __u16 boundry_id;	/* boundary scan id */
+-	 __u16 ee_size;		/* size of EEPROM */
+-	 __u8  assembly[11];	/* assembly # */
+-	 __u8  assembly_rev;	/* assembly rev */
+-	 __u8  tracer[4];	/* tracer number */
+-	 __u16 assembly_cksum;	/* asm checksum */
+-	 __u16 power_consump;	/* pwr requirements */
+-	 __u16 num_asics;	/* number of asics */
+-	 __u16 bist_time;	/* min. bist time */
+-	 __u16 err_log_offset;	/* error log offset */
+-	 __u16 scan_path_offset;/* scan path offset */
+-	 __u16 cct_offset;
+-	 __u16 log_length;	/* length of err log */
+-	 __u16 xsum_end;	/* offset to end of
+-				   checksum */
+-	 __u8  reserved[4];
+-	 __u8  sflag;		/* starting sentinal */
+-	 __u8  part_number[13];	/* prom part number */
+-	 __u8  version[10];	/* version number */
+-	 __u8  signature[8];
+-	 __u16 eeprom_chksum;
+-	 __u32  data_stamp_offset;
+-	 __u8  eflag ;		 /* ending sentinal */
+-} __attribute__((packed)) voyager_eprom_hdr_t;
+-
+-
+-
+-#define VOYAGER_EPROM_SIZE_OFFSET				\
+-	((__u16)(&(((voyager_eprom_hdr_t *)0)->ee_size)))
+-#define VOYAGER_XSUM_END_OFFSET		0x2a
+-
+-/* the following three definitions are for internal table layouts
+- * in the module EPROMs.  We really only care about the IDs and
+- * offsets */
+-typedef struct voyager_sp_table {
+-	__u8 asic_id;
+-	__u8 bypass_flag;
+-	__u16 asic_data_offset;
+-	__u16 config_data_offset;
+-} __attribute__((packed)) voyager_sp_table_t;
+-
+-typedef struct voyager_jtag_table {
+-	__u8 icode[4];
+-	__u8 runbist[4];
+-	__u8 intest[4];
+-	__u8 samp_preld[4];
+-	__u8 ireg_len;
+-} __attribute__((packed)) voyager_jtt_t;
+-
+-typedef struct voyager_asic_data_table {
+-	__u8 jtag_id[4];
+-	__u16 length_bsr;
+-	__u16 length_bist_reg;
+-	__u32 bist_clk;
+-	__u16 subaddr_bits;
+-	__u16 seed_bits;
+-	__u16 sig_bits;
+-	__u16 jtag_offset;
+-} __attribute__((packed)) voyager_at_t;
+-
+-/* Voyager Interrupt Controller (VIC) registers */
+-
+-/* Base to add to Cross Processor Interrupts (CPIs) when triggering
+- * the CPU IRQ line */
+-/* register defines for the WCBICs (one per processor) */
+-#define VOYAGER_WCBIC0	0x41		/* bus A node P1 processor 0 */
+-#define VOYAGER_WCBIC1	0x49		/* bus A node P1 processor 1 */
+-#define VOYAGER_WCBIC2	0x51		/* bus A node P2 processor 0 */
+-#define VOYAGER_WCBIC3	0x59		/* bus A node P2 processor 1 */
+-#define VOYAGER_WCBIC4	0x61		/* bus B node P1 processor 0 */
+-#define VOYAGER_WCBIC5	0x69		/* bus B node P1 processor 1 */
+-#define VOYAGER_WCBIC6	0x71		/* bus B node P2 processor 0 */
+-#define VOYAGER_WCBIC7	0x79		/* bus B node P2 processor 1 */
+-
+-
+-/* top of memory registers */
+-#define VOYAGER_WCBIC_TOM_L	0x4
+-#define VOYAGER_WCBIC_TOM_H	0x5
+-
+-/* register defines for Voyager Memory Contol (VMC)
+- * these are present on L4 machines only */
+-#define	VOYAGER_VMC1		0x81
+-#define VOYAGER_VMC2		0x91
+-#define VOYAGER_VMC3		0xa1
+-#define VOYAGER_VMC4		0xb1
+-
+-/* VMC Ports */
+-#define VOYAGER_VMC_MEMORY_SETUP	0x9
+-#	define VMC_Interleaving		0x01
+-#	define VMC_4Way			0x02
+-#	define VMC_EvenCacheLines	0x04
+-#	define VMC_HighLine		0x08
+-#	define VMC_Start0_Enable	0x20
+-#	define VMC_Start1_Enable	0x40
+-#	define VMC_Vremap		0x80
+-#define VOYAGER_VMC_BANK_DENSITY	0xa
+-#	define	VMC_BANK_EMPTY		0
+-#	define	VMC_BANK_4MB		1
+-#	define	VMC_BANK_16MB		2
+-#	define	VMC_BANK_64MB		3
+-#	define	VMC_BANK0_MASK		0x03
+-#	define	VMC_BANK1_MASK		0x0C
+-#	define	VMC_BANK2_MASK		0x30
+-#	define	VMC_BANK3_MASK		0xC0
+-
+-/* Magellan Memory Controller (MMC) defines - present on L5 */
+-#define VOYAGER_MMC_ASIC_ID		1
+-/* the two memory modules corresponding to memory cards in the system */
+-#define VOYAGER_MMC_MEMORY0_MODULE	0x14
+-#define VOYAGER_MMC_MEMORY1_MODULE	0x15
+-/* the Magellan Memory Address (MMA) defines */
+-#define VOYAGER_MMA_ASIC_ID		2
+-
+-/* Submodule number for the Quad Baseboard */
+-#define VOYAGER_QUAD_BASEBOARD		1
+-
+-/* ASIC defines for the Quad Baseboard */
+-#define VOYAGER_QUAD_QDATA0		1
+-#define VOYAGER_QUAD_QDATA1		2
+-#define VOYAGER_QUAD_QABC		3
+-
+-/* Useful areas in extended CMOS */
+-#define VOYAGER_PROCESSOR_PRESENT_MASK	0x88a
+-#define VOYAGER_MEMORY_CLICKMAP		0xa23
+-#define VOYAGER_DUMP_LOCATION		0xb1a
+-
+-/* SUS In Control bit - used to tell SUS that we don't need to be
+- * babysat anymore */
+-#define VOYAGER_SUS_IN_CONTROL_PORT	0x3ff
+-#	define VOYAGER_IN_CONTROL_FLAG	0x80
+-
+-/* Voyager PSI defines */
+-#define VOYAGER_PSI_STATUS_REG		0x08
+-#	define PSI_DC_FAIL		0x01
+-#	define PSI_MON			0x02
+-#	define PSI_FAULT		0x04
+-#	define PSI_ALARM		0x08
+-#	define PSI_CURRENT		0x10
+-#	define PSI_DVM			0x20
+-#	define PSI_PSCFAULT		0x40
+-#	define PSI_STAT_CHG		0x80
+-
+-#define VOYAGER_PSI_SUPPLY_REG		0x8000
+-	/* read */
+-#	define PSI_FAIL_DC		0x01
+-#	define PSI_FAIL_AC		0x02
+-#	define PSI_MON_INT		0x04
+-#	define PSI_SWITCH_OFF		0x08
+-#	define PSI_HX_OFF		0x10
+-#	define PSI_SECURITY		0x20
+-#	define PSI_CMOS_BATT_LOW	0x40
+-#	define PSI_CMOS_BATT_FAIL	0x80
+-	/* write */
+-#	define PSI_CLR_SWITCH_OFF	0x13
+-#	define PSI_CLR_HX_OFF		0x14
+-#	define PSI_CLR_CMOS_BATT_FAIL	0x17
+-
+-#define VOYAGER_PSI_MASK		0x8001
+-#	define PSI_MASK_MASK		0x10
+-
+-#define VOYAGER_PSI_AC_FAIL_REG		0x8004
+-#define	AC_FAIL_STAT_CHANGE		0x80
+-
+-#define VOYAGER_PSI_GENERAL_REG		0x8007
+-	/* read */
+-#	define PSI_SWITCH_ON		0x01
+-#	define PSI_SWITCH_ENABLED	0x02
+-#	define PSI_ALARM_ENABLED	0x08
+-#	define PSI_SECURE_ENABLED	0x10
+-#	define PSI_COLD_RESET		0x20
+-#	define PSI_COLD_START		0x80
+-	/* write */
+-#	define PSI_POWER_DOWN		0x10
+-#	define PSI_SWITCH_DISABLE	0x01
+-#	define PSI_SWITCH_ENABLE	0x11
+-#	define PSI_CLEAR		0x12
+-#	define PSI_ALARM_DISABLE	0x03
+-#	define PSI_ALARM_ENABLE		0x13
+-#	define PSI_CLEAR_COLD_RESET	0x05
+-#	define PSI_SET_COLD_RESET	0x15
+-#	define PSI_CLEAR_COLD_START	0x07
+-#	define PSI_SET_COLD_START	0x17
+-
+-
+-
+-struct voyager_bios_info {
+-	__u8	len;
+-	__u8	major;
+-	__u8	minor;
+-	__u8	debug;
+-	__u8	num_classes;
+-	__u8	class_1;
+-	__u8	class_2;
+-};
+-
+-/* The following structures and definitions are for the Kernel/SUS
+- * interface these are needed to find out how SUS initialised any Quad
+- * boards in the system */
+-
+-#define	NUMBER_OF_MC_BUSSES	2
+-#define SLOTS_PER_MC_BUS	8
+-#define MAX_CPUS                16      /* 16 way CPU system */
+-#define MAX_PROCESSOR_BOARDS	4	/* 4 processor slot system */
+-#define MAX_CACHE_LEVELS	4	/* # of cache levels supported */
+-#define MAX_SHARED_CPUS		4	/* # of CPUs that can share a LARC */
+-#define NUMBER_OF_POS_REGS	8
+-
+-typedef struct {
+-	__u8	MC_Slot;
+-	__u8	POS_Values[NUMBER_OF_POS_REGS];
+-} __attribute__((packed)) MC_SlotInformation_t;
+-
+-struct QuadDescription {
+-	__u8  Type;	/* for type 0 (DYADIC or MONADIC) all fields
+-			 * will be zero except for slot */
+-	__u8 StructureVersion;
+-	__u32 CPI_BaseAddress;
+-	__u32  LARC_BankSize;
+-	__u32 LocalMemoryStateBits;
+-	__u8  Slot; /* Processor slots 1 - 4 */
+-} __attribute__((packed));
+-
+-struct ProcBoardInfo {
+-	__u8 Type;
+-	__u8 StructureVersion;
+-	__u8 NumberOfBoards;
+-	struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS];
+-} __attribute__((packed));
+-
+-struct CacheDescription {
+-	__u8 Level;
+-	__u32 TotalSize;
+-	__u16 LineSize;
+-	__u8  Associativity;
+-	__u8  CacheType;
+-	__u8  WriteType;
+-	__u8  Number_CPUs_SharedBy;
+-	__u8  Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS];
+-
+-} __attribute__((packed));
+-
+-struct CPU_Description {
+-	__u8 CPU_HardwareId;
+-	char *FRU_String;
+-	__u8 NumberOfCacheLevels;
+-	struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS];
+-} __attribute__((packed));
+-
+-struct CPU_Info {
+-	__u8 Type;
+-	__u8 StructureVersion;
+-	__u8 NumberOf_CPUs;
+-	struct CPU_Description CPU_Data[MAX_CPUS];
+-} __attribute__((packed));
+-
+-
+-/*
+- * This structure will be used by SUS and the OS.
+- * The assumption about this structure is that no blank space is
+- * packed in it by our friend the compiler.
+- */
+-typedef struct {
+-	__u8	Mailbox_SUS;		/* Written to by SUS to give
+-					   commands/response to the OS */
+-	__u8	Mailbox_OS;		/* Written to by the OS to give
+-					   commands/response to SUS */
+-	__u8	SUS_MailboxVersion;	/* Tells the OS which iteration of the
+-					   interface SUS supports */
+-	__u8	OS_MailboxVersion;	/* Tells SUS which iteration of the
+-					   interface the OS supports */
+-	__u32	OS_Flags;		/* Flags set by the OS as info for
+-					   SUS */
+-	__u32	SUS_Flags;		/* Flags set by SUS as info
+-					   for the OS */
+-	__u32	WatchDogPeriod;		/* Watchdog period (in seconds) which
+-					   the DP uses to see if the OS
+-					   is dead */
+-	__u32	WatchDogCount;		/* Updated by the OS on every tic. */
+-	__u32	MemoryFor_SUS_ErrorLog;	/* Flat 32 bit address which tells SUS
+-					   where to stuff the SUS error log
+-					   on a dump */
+-	MC_SlotInformation_t MC_SlotInfo[NUMBER_OF_MC_BUSSES*SLOTS_PER_MC_BUS];
+-					/* Storage for MCA POS data */
+-	/* All new SECOND_PASS_INTERFACE fields added from this point */
+-	struct ProcBoardInfo    *BoardData;
+-	struct CPU_Info         *CPU_Data;
+-	/* All new fields must be added from this point */
+-} Voyager_KernelSUS_Mbox_t;
+-
+-/* structure for finding the right memory address to send a QIC CPI to */
+-struct voyager_qic_cpi {
+-	/* Each cache line (32 bytes) can trigger a cpi.  The cpi
+-	 * read/write may occur anywhere in the cache line---pick the
+-	 * middle to be safe */
+-	struct  {
+-		__u32 pad1[3];
+-		__u32 cpi;
+-		__u32 pad2[4];
+-	} qic_cpi[8];
+-};
+-
+-struct voyager_status {
+-	__u32	power_fail:1;
+-	__u32	switch_off:1;
+-	__u32	request_from_kernel:1;
+-};
+-
+-struct voyager_psi_regs {
+-	__u8 cat_id;
+-	__u8 cat_dev;
+-	__u8 cat_control;
+-	__u8 subaddr;
+-	__u8 dummy4;
+-	__u8 checkbit;
+-	__u8 subaddr_low;
+-	__u8 subaddr_high;
+-	__u8 intstatus;
+-	__u8 stat1;
+-	__u8 stat3;
+-	__u8 fault;
+-	__u8 tms;
+-	__u8 gen;
+-	__u8 sysconf;
+-	__u8 dummy15;
+-};
+-
+-struct voyager_psi_subregs {
+-	__u8 supply;
+-	__u8 mask;
+-	__u8 present;
+-	__u8 DCfail;
+-	__u8 ACfail;
+-	__u8 fail;
+-	__u8 UPSfail;
+-	__u8 genstatus;
+-};
+-
+-struct voyager_psi {
+-	struct voyager_psi_regs regs;
+-	struct voyager_psi_subregs subregs;
+-};
+-
+-struct voyager_SUS {
+-#define	VOYAGER_DUMP_BUTTON_NMI		0x1
+-#define VOYAGER_SUS_VALID		0x2
+-#define VOYAGER_SYSINT_COMPLETE		0x3
+-	__u8	SUS_mbox;
+-#define VOYAGER_NO_COMMAND		0x0
+-#define VOYAGER_IGNORE_DUMP		0x1
+-#define VOYAGER_DO_DUMP			0x2
+-#define VOYAGER_SYSINT_HANDSHAKE	0x3
+-#define VOYAGER_DO_MEM_DUMP		0x4
+-#define VOYAGER_SYSINT_WAS_RECOVERED	0x5
+-	__u8	kernel_mbox;
+-#define	VOYAGER_MAILBOX_VERSION		0x10
+-	__u8	SUS_version;
+-	__u8	kernel_version;
+-#define VOYAGER_OS_HAS_SYSINT		0x1
+-#define VOYAGER_OS_IN_PROGRESS		0x2
+-#define VOYAGER_UPDATING_WDPERIOD	0x4
+-	__u32	kernel_flags;
+-#define VOYAGER_SUS_BOOTING		0x1
+-#define VOYAGER_SUS_IN_PROGRESS		0x2
+-	__u32	SUS_flags;
+-	__u32	watchdog_period;
+-	__u32	watchdog_count;
+-	__u32	SUS_errorlog;
+-	/* lots of system configuration stuff under here */
+-};
+-
+-/* Variables exported by voyager_smp */
+-extern __u32 voyager_extended_vic_processors;
+-extern __u32 voyager_allowed_boot_processors;
+-extern __u32 voyager_quad_processors;
+-extern struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS];
+-extern struct voyager_SUS *voyager_SUS;
+-
+-/* variables exported always */
+-extern struct task_struct *voyager_thread;
+-extern int voyager_level;
+-extern struct voyager_status voyager_status;
+-
+-/* functions exported by the voyager and voyager_smp modules */
+-extern int voyager_cat_readb(__u8 module, __u8 asic, int reg);
+-extern void voyager_cat_init(void);
+-extern void voyager_detect(struct voyager_bios_info *);
+-extern void voyager_trap_init(void);
+-extern void voyager_setup_irqs(void);
+-extern int voyager_memory_detect(int region, __u32 *addr, __u32 *length);
+-extern void voyager_smp_intr_init(void);
+-extern __u8 voyager_extended_cmos_read(__u16 cmos_address);
+-extern void voyager_smp_dump(void);
+-extern void voyager_timer_interrupt(void);
+-extern void smp_local_timer_interrupt(void);
+-extern void voyager_power_off(void);
+-extern void smp_voyager_power_off(void *dummy);
+-extern void voyager_restart(void);
+-extern void voyager_cat_power_off(void);
+-extern void voyager_cat_do_common_interrupt(void);
+-extern void voyager_handle_nmi(void);
+-extern void voyager_smp_intr_init(void);
+-/* Commands for the following are */
+-#define	VOYAGER_PSI_READ	0
+-#define VOYAGER_PSI_WRITE	1
+-#define VOYAGER_PSI_SUBREAD	2
+-#define VOYAGER_PSI_SUBWRITE	3
+-extern void voyager_cat_psi(__u8, __u16, __u8 *);
+Index: linux-2.6-tip/arch/x86/include/asm/xen/events.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/xen/events.h
++++ linux-2.6-tip/arch/x86/include/asm/xen/events.h
+@@ -15,10 +15,4 @@ static inline int xen_irqs_disabled(stru
+ 	return raw_irqs_disabled_flags(regs->flags);
+ }
+ 
+-static inline void xen_do_IRQ(int irq, struct pt_regs *regs)
+-{
+-	regs->orig_ax = ~irq;
+-	do_IRQ(regs);
+-}
+-
+ #endif /* _ASM_X86_XEN_EVENTS_H */
+Index: linux-2.6-tip/arch/x86/include/asm/xen/hypercall.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/xen/hypercall.h
++++ linux-2.6-tip/arch/x86/include/asm/xen/hypercall.h
+@@ -296,6 +296,8 @@ HYPERVISOR_get_debugreg(int reg)
+ static inline int
+ HYPERVISOR_update_descriptor(u64 ma, u64 desc)
+ {
++	if (sizeof(u64) == sizeof(long))
++		return _hypercall2(int, update_descriptor, ma, desc);
+ 	return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32);
+ }
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/xen/hypervisor.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/xen/hypervisor.h
++++ linux-2.6-tip/arch/x86/include/asm/xen/hypervisor.h
+@@ -38,22 +38,30 @@ extern struct shared_info *HYPERVISOR_sh
+ extern struct start_info *xen_start_info;
+ 
+ enum xen_domain_type {
+-	XEN_NATIVE,
+-	XEN_PV_DOMAIN,
+-	XEN_HVM_DOMAIN,
++	XEN_NATIVE,		/* running on bare hardware    */
++	XEN_PV_DOMAIN,		/* running in a PV domain      */
++	XEN_HVM_DOMAIN,		/* running in a Xen hvm domain */
+ };
+ 
+-extern enum xen_domain_type xen_domain_type;
+-
+ #ifdef CONFIG_XEN
+-#define xen_domain()		(xen_domain_type != XEN_NATIVE)
++extern enum xen_domain_type xen_domain_type;
+ #else
+-#define xen_domain()		(0)
++#define xen_domain_type		XEN_NATIVE
+ #endif
+ 
+-#define xen_pv_domain()		(xen_domain() && xen_domain_type == XEN_PV_DOMAIN)
+-#define xen_hvm_domain()	(xen_domain() && xen_domain_type == XEN_HVM_DOMAIN)
+-
+-#define xen_initial_domain()	(xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN)
++#define xen_domain()		(xen_domain_type != XEN_NATIVE)
++#define xen_pv_domain()		(xen_domain() &&			\
++				 xen_domain_type == XEN_PV_DOMAIN)
++#define xen_hvm_domain()	(xen_domain() &&			\
++				 xen_domain_type == XEN_HVM_DOMAIN)
++
++#ifdef CONFIG_XEN_DOM0
++#include <xen/interface/xen.h>
++
++#define xen_initial_domain()	(xen_pv_domain() && \
++				 xen_start_info->flags & SIF_INITDOMAIN)
++#else  /* !CONFIG_XEN_DOM0 */
++#define xen_initial_domain()	(0)
++#endif	/* CONFIG_XEN_DOM0 */
+ 
+ #endif /* _ASM_X86_XEN_HYPERVISOR_H */
+Index: linux-2.6-tip/arch/x86/include/asm/xen/page.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/xen/page.h
++++ linux-2.6-tip/arch/x86/include/asm/xen/page.h
+@@ -164,6 +164,7 @@ static inline pte_t __pte_ma(pteval_t x)
+ 
+ 
+ xmaddr_t arbitrary_virt_to_machine(void *address);
++unsigned long arbitrary_virt_to_mfn(void *vaddr);
+ void make_lowmem_page_readonly(void *vaddr);
+ void make_lowmem_page_readwrite(void *vaddr);
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/xor.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/xor.h
++++ linux-2.6-tip/arch/x86/include/asm/xor.h
+@@ -1,5 +1,10 @@
++#ifdef CONFIG_KMEMCHECK
++/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
++# include <asm-generic/xor.h>
++#else
+ #ifdef CONFIG_X86_32
+ # include "xor_32.h"
+ #else
+ # include "xor_64.h"
+ #endif
++#endif
+Index: linux-2.6-tip/arch/x86/kernel/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/Makefile
++++ linux-2.6-tip/arch/x86/kernel/Makefile
+@@ -23,11 +23,12 @@ nostackp := $(call cc-option, -fno-stack
+ CFLAGS_vsyscall_64.o	:= $(PROFILING) -g0 $(nostackp)
+ CFLAGS_hpet.o		:= $(nostackp)
+ CFLAGS_tsc.o		:= $(nostackp)
++CFLAGS_paravirt.o	:= $(nostackp)
+ 
+ obj-y			:= process_$(BITS).o signal.o entry_$(BITS).o
+ obj-y			+= traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
+ obj-y			+= time_$(BITS).o ioport.o ldt.o dumpstack.o
+-obj-y			+= setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
++obj-y			+= setup.o i8259.o irqinit_$(BITS).o
+ obj-$(CONFIG_X86_VISWS)	+= visws_quirks.o
+ obj-$(CONFIG_X86_32)	+= probe_roms_32.o
+ obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
+@@ -49,31 +50,28 @@ obj-y				+= step.o
+ obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
+ obj-y				+= cpu/
+ obj-y				+= acpi/
+-obj-$(CONFIG_X86_BIOS_REBOOT)	+= reboot.o
++obj-y				+= reboot.o
+ obj-$(CONFIG_MCA)		+= mca_32.o
+ obj-$(CONFIG_X86_MSR)		+= msr.o
+ obj-$(CONFIG_X86_CPUID)		+= cpuid.o
+ obj-$(CONFIG_PCI)		+= early-quirks.o
+ apm-y				:= apm_32.o
+ obj-$(CONFIG_APM)		+= apm.o
+-obj-$(CONFIG_X86_SMP)		+= smp.o
+-obj-$(CONFIG_X86_SMP)		+= smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o
+-obj-$(CONFIG_X86_32_SMP)	+= smpcommon.o
+-obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o smpcommon.o
++obj-$(CONFIG_SMP)		+= smp.o
++obj-$(CONFIG_SMP)		+= smpboot.o tsc_sync.o
++obj-$(CONFIG_SMP)		+= setup_percpu.o
++obj-$(CONFIG_X86_64_SMP)	+= tsc_sync.o
+ obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline_$(BITS).o
+ obj-$(CONFIG_X86_MPPARSE)	+= mpparse.o
+-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o nmi.o
+-obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
++obj-y				+= apic/
+ obj-$(CONFIG_X86_REBOOTFIXUPS)	+= reboot_fixups_32.o
+ obj-$(CONFIG_DYNAMIC_FTRACE)	+= ftrace.o
+-obj-$(CONFIG_FUNCTION_GRAPH_TRACER)	+= ftrace.o
++obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
++obj-$(CONFIG_FTRACE_SYSCALLS)	+= ftrace.o
+ obj-$(CONFIG_KEXEC)		+= machine_kexec_$(BITS).o
+ obj-$(CONFIG_KEXEC)		+= relocate_kernel_$(BITS).o crash.o
+ obj-$(CONFIG_CRASH_DUMP)	+= crash_dump_$(BITS).o
+-obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
+-obj-$(CONFIG_X86_ES7000)	+= es7000_32.o
+-obj-$(CONFIG_X86_SUMMIT_NUMA)	+= summit_32.o
+-obj-y				+= vsmp_64.o
++obj-$(CONFIG_X86_VSMP)		+= vsmp_64.o
+ obj-$(CONFIG_KPROBES)		+= kprobes.o
+ obj-$(CONFIG_MODULES)		+= module_$(BITS).o
+ obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
+@@ -109,21 +107,18 @@ obj-$(CONFIG_MICROCODE)			+= microcode.o
+ 
+ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
+ 
+-obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb_64.o # NB rename without _64
++obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
+ 
+ ###
+ # 64 bit specific files
+ ifeq ($(CONFIG_X86_64),y)
+-        obj-y				+= genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
+-	obj-y				+= bios_uv.o uv_irq.o uv_sysfs.o
+-        obj-y				+= genx2apic_cluster.o
+-        obj-y				+= genx2apic_phys.o
+-        obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
+-        obj-$(CONFIG_AUDIT)		+= audit_64.o
+-
+-        obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
+-        obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
+-        obj-$(CONFIG_AMD_IOMMU)		+= amd_iommu_init.o amd_iommu.o
++	obj-$(CONFIG_X86_UV)		+= tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
++	obj-$(CONFIG_X86_PM_TIMER)	+= pmtimer_64.o
++	obj-$(CONFIG_AUDIT)		+= audit_64.o
++
++	obj-$(CONFIG_GART_IOMMU)	+= pci-gart_64.o aperture_64.o
++	obj-$(CONFIG_CALGARY_IOMMU)	+= pci-calgary_64.o tce_64.o
++	obj-$(CONFIG_AMD_IOMMU)		+= amd_iommu_init.o amd_iommu.o
+ 
+-        obj-$(CONFIG_PCI_MMCONFIG)	+= mmconf-fam10h_64.o
++	obj-$(CONFIG_PCI_MMCONFIG)	+= mmconf-fam10h_64.o
+ endif
+Index: linux-2.6-tip/arch/x86/kernel/acpi/boot.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/acpi/boot.c
++++ linux-2.6-tip/arch/x86/kernel/acpi/boot.c
+@@ -37,15 +37,10 @@
+ #include <asm/pgtable.h>
+ #include <asm/io_apic.h>
+ #include <asm/apic.h>
+-#include <asm/genapic.h>
+ #include <asm/io.h>
+ #include <asm/mpspec.h>
+ #include <asm/smp.h>
+ 
+-#ifdef CONFIG_X86_LOCAL_APIC
+-# include <mach_apic.h>
+-#endif
+-
+ static int __initdata acpi_force = 0;
+ u32 acpi_rsdt_forced;
+ #ifdef	CONFIG_ACPI
+@@ -56,16 +51,7 @@ int acpi_disabled = 1;
+ EXPORT_SYMBOL(acpi_disabled);
+ 
+ #ifdef	CONFIG_X86_64
+-
+-#include <asm/proto.h>
+-
+-#else				/* X86 */
+-
+-#ifdef	CONFIG_X86_LOCAL_APIC
+-#include <mach_apic.h>
+-#include <mach_mpparse.h>
+-#endif				/* CONFIG_X86_LOCAL_APIC */
+-
++# include <asm/proto.h>
+ #endif				/* X86 */
+ 
+ #define BAD_MADT_ENTRY(entry, end) (					    \
+@@ -121,35 +107,18 @@ enum acpi_irq_model_id acpi_irq_model = 
+  */
+ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
+ {
+-	unsigned long base, offset, mapped_size;
+-	int idx;
+ 
+ 	if (!phys || !size)
+ 		return NULL;
+ 
+-	if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
+-		return __va(phys);
+-
+-	offset = phys & (PAGE_SIZE - 1);
+-	mapped_size = PAGE_SIZE - offset;
+-	clear_fixmap(FIX_ACPI_END);
+-	set_fixmap(FIX_ACPI_END, phys);
+-	base = fix_to_virt(FIX_ACPI_END);
+-
+-	/*
+-	 * Most cases can be covered by the below.
+-	 */
+-	idx = FIX_ACPI_END;
+-	while (mapped_size < size) {
+-		if (--idx < FIX_ACPI_BEGIN)
+-			return NULL;	/* cannot handle this */
+-		phys += PAGE_SIZE;
+-		clear_fixmap(idx);
+-		set_fixmap(idx, phys);
+-		mapped_size += PAGE_SIZE;
+-	}
++	return early_ioremap(phys, size);
++}
++void __init __acpi_unmap_table(char *map, unsigned long size)
++{
++	if (!map || !size)
++		return;
+ 
+-	return ((unsigned char *)base + offset);
++	early_iounmap(map, size);
+ }
+ 
+ #ifdef CONFIG_PCI_MMCONFIG
+@@ -239,7 +208,8 @@ static int __init acpi_parse_madt(struct
+ 		       madt->address);
+ 	}
+ 
+-	acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id);
++	default_acpi_madt_oem_check(madt->header.oem_id,
++				    madt->header.oem_table_id);
+ 
+ 	return 0;
+ }
+@@ -884,7 +854,7 @@ static struct {
+ 	DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+ } mp_ioapic_routing[MAX_IO_APICS];
+ 
+-static int mp_find_ioapic(int gsi)
++int mp_find_ioapic(int gsi)
+ {
+ 	int i = 0;
+ 
+@@ -899,6 +869,16 @@ static int mp_find_ioapic(int gsi)
+ 	return -1;
+ }
+ 
++int mp_find_ioapic_pin(int ioapic, int gsi)
++{
++	if (WARN_ON(ioapic == -1))
++		return -1;
++	if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end))
++		return -1;
++
++	return gsi - mp_ioapic_routing[ioapic].gsi_base;
++}
++
+ static u8 __init uniq_ioapic_id(u8 id)
+ {
+ #ifdef CONFIG_X86_32
+@@ -912,8 +892,8 @@ static u8 __init uniq_ioapic_id(u8 id)
+ 	DECLARE_BITMAP(used, 256);
+ 	bitmap_zero(used, 256);
+ 	for (i = 0; i < nr_ioapics; i++) {
+-		struct mp_config_ioapic *ia = &mp_ioapics[i];
+-		__set_bit(ia->mp_apicid, used);
++		struct mpc_ioapic *ia = &mp_ioapics[i];
++		__set_bit(ia->apicid, used);
+ 	}
+ 	if (!test_bit(id, used))
+ 		return id;
+@@ -945,29 +925,29 @@ void __init mp_register_ioapic(int id, u
+ 
+ 	idx = nr_ioapics;
+ 
+-	mp_ioapics[idx].mp_type = MP_IOAPIC;
+-	mp_ioapics[idx].mp_flags = MPC_APIC_USABLE;
+-	mp_ioapics[idx].mp_apicaddr = address;
++	mp_ioapics[idx].type = MP_IOAPIC;
++	mp_ioapics[idx].flags = MPC_APIC_USABLE;
++	mp_ioapics[idx].apicaddr = address;
+ 
+ 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+-	mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id);
++	mp_ioapics[idx].apicid = uniq_ioapic_id(id);
+ #ifdef CONFIG_X86_32
+-	mp_ioapics[idx].mp_apicver = io_apic_get_version(idx);
++	mp_ioapics[idx].apicver = io_apic_get_version(idx);
+ #else
+-	mp_ioapics[idx].mp_apicver = 0;
++	mp_ioapics[idx].apicver = 0;
+ #endif
+ 	/*
+ 	 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+ 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+ 	 */
+-	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid;
++	mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
+ 	mp_ioapic_routing[idx].gsi_base = gsi_base;
+ 	mp_ioapic_routing[idx].gsi_end = gsi_base +
+ 	    io_apic_get_redir_entries(idx);
+ 
+-	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, "
+-	       "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid,
+-	       mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr,
++	printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
++	       "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
++	       mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
+ 	       mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
+ 
+ 	nr_ioapics++;
+@@ -996,19 +976,19 @@ int __init acpi_probe_gsi(void)
+ 	return max_gsi + 1;
+ }
+ 
+-static void assign_to_mp_irq(struct mp_config_intsrc *m,
+-				    struct mp_config_intsrc *mp_irq)
++static void assign_to_mp_irq(struct mpc_intsrc *m,
++				    struct mpc_intsrc *mp_irq)
+ {
+-	memcpy(mp_irq, m, sizeof(struct mp_config_intsrc));
++	memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
+ }
+ 
+-static int mp_irq_cmp(struct mp_config_intsrc *mp_irq,
+-				struct mp_config_intsrc *m)
++static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
++				struct mpc_intsrc *m)
+ {
+-	return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc));
++	return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
+ }
+ 
+-static void save_mp_irq(struct mp_config_intsrc *m)
++static void save_mp_irq(struct mpc_intsrc *m)
+ {
+ 	int i;
+ 
+@@ -1026,7 +1006,7 @@ void __init mp_override_legacy_irq(u8 bu
+ {
+ 	int ioapic;
+ 	int pin;
+-	struct mp_config_intsrc mp_irq;
++	struct mpc_intsrc mp_irq;
+ 
+ 	/*
+ 	 * Convert 'gsi' to 'ioapic.pin'.
+@@ -1034,7 +1014,7 @@ void __init mp_override_legacy_irq(u8 bu
+ 	ioapic = mp_find_ioapic(gsi);
+ 	if (ioapic < 0)
+ 		return;
+-	pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
++	pin = mp_find_ioapic_pin(ioapic, gsi);
+ 
+ 	/*
+ 	 * TBD: This check is for faulty timer entries, where the override
+@@ -1044,13 +1024,13 @@ void __init mp_override_legacy_irq(u8 bu
+ 	if ((bus_irq == 0) && (trigger == 3))
+ 		trigger = 1;
+ 
+-	mp_irq.mp_type = MP_INTSRC;
+-	mp_irq.mp_irqtype = mp_INT;
+-	mp_irq.mp_irqflag = (trigger << 2) | polarity;
+-	mp_irq.mp_srcbus = MP_ISA_BUS;
+-	mp_irq.mp_srcbusirq = bus_irq;	/* IRQ */
+-	mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */
+-	mp_irq.mp_dstirq = pin;	/* INTIN# */
++	mp_irq.type = MP_INTSRC;
++	mp_irq.irqtype = mp_INT;
++	mp_irq.irqflag = (trigger << 2) | polarity;
++	mp_irq.srcbus = MP_ISA_BUS;
++	mp_irq.srcbusirq = bus_irq;	/* IRQ */
++	mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
++	mp_irq.dstirq = pin;	/* INTIN# */
+ 
+ 	save_mp_irq(&mp_irq);
+ }
+@@ -1060,7 +1040,7 @@ void __init mp_config_acpi_legacy_irqs(v
+ 	int i;
+ 	int ioapic;
+ 	unsigned int dstapic;
+-	struct mp_config_intsrc mp_irq;
++	struct mpc_intsrc mp_irq;
+ 
+ #if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+ 	/*
+@@ -1085,7 +1065,7 @@ void __init mp_config_acpi_legacy_irqs(v
+ 	ioapic = mp_find_ioapic(0);
+ 	if (ioapic < 0)
+ 		return;
+-	dstapic = mp_ioapics[ioapic].mp_apicid;
++	dstapic = mp_ioapics[ioapic].apicid;
+ 
+ 	/*
+ 	 * Use the default configuration for the IRQs 0-15.  Unless
+@@ -1095,16 +1075,14 @@ void __init mp_config_acpi_legacy_irqs(v
+ 		int idx;
+ 
+ 		for (idx = 0; idx < mp_irq_entries; idx++) {
+-			struct mp_config_intsrc *irq = mp_irqs + idx;
++			struct mpc_intsrc *irq = mp_irqs + idx;
+ 
+ 			/* Do we already have a mapping for this ISA IRQ? */
+-			if (irq->mp_srcbus == MP_ISA_BUS
+-			    && irq->mp_srcbusirq == i)
++			if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
+ 				break;
+ 
+ 			/* Do we already have a mapping for this IOAPIC pin */
+-			if (irq->mp_dstapic == dstapic &&
+-			    irq->mp_dstirq == i)
++			if (irq->dstapic == dstapic && irq->dstirq == i)
+ 				break;
+ 		}
+ 
+@@ -1113,13 +1091,13 @@ void __init mp_config_acpi_legacy_irqs(v
+ 			continue;	/* IRQ already used */
+ 		}
+ 
+-		mp_irq.mp_type = MP_INTSRC;
+-		mp_irq.mp_irqflag = 0;	/* Conforming */
+-		mp_irq.mp_srcbus = MP_ISA_BUS;
+-		mp_irq.mp_dstapic = dstapic;
+-		mp_irq.mp_irqtype = mp_INT;
+-		mp_irq.mp_srcbusirq = i; /* Identity mapped */
+-		mp_irq.mp_dstirq = i;
++		mp_irq.type = MP_INTSRC;
++		mp_irq.irqflag = 0;	/* Conforming */
++		mp_irq.srcbus = MP_ISA_BUS;
++		mp_irq.dstapic = dstapic;
++		mp_irq.irqtype = mp_INT;
++		mp_irq.srcbusirq = i; /* Identity mapped */
++		mp_irq.dstirq = i;
+ 
+ 		save_mp_irq(&mp_irq);
+ 	}
+@@ -1156,7 +1134,7 @@ int mp_register_gsi(u32 gsi, int trigger
+ 		return gsi;
+ 	}
+ 
+-	ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
++	ioapic_pin = mp_find_ioapic_pin(ioapic, gsi);
+ 
+ #ifdef CONFIG_X86_32
+ 	if (ioapic_renumber_irq)
+@@ -1230,22 +1208,22 @@ int mp_config_acpi_gsi(unsigned char num
+ 			u32 gsi, int triggering, int polarity)
+ {
+ #ifdef CONFIG_X86_MPPARSE
+-	struct mp_config_intsrc mp_irq;
++	struct mpc_intsrc mp_irq;
+ 	int ioapic;
+ 
+ 	if (!acpi_ioapic)
+ 		return 0;
+ 
+ 	/* print the entry should happen on mptable identically */
+-	mp_irq.mp_type = MP_INTSRC;
+-	mp_irq.mp_irqtype = mp_INT;
+-	mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
++	mp_irq.type = MP_INTSRC;
++	mp_irq.irqtype = mp_INT;
++	mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
+ 				(polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
+-	mp_irq.mp_srcbus = number;
+-	mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
++	mp_irq.srcbus = number;
++	mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
+ 	ioapic = mp_find_ioapic(gsi);
+-	mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id;
+-	mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
++	mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
++	mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
+ 
+ 	save_mp_irq(&mp_irq);
+ #endif
+@@ -1372,7 +1350,7 @@ static void __init acpi_process_madt(voi
+ 		if (!error) {
+ 			acpi_lapic = 1;
+ 
+-#ifdef CONFIG_X86_GENERICARCH
++#ifdef CONFIG_X86_BIGSMP
+ 			generic_bigsmp_probe();
+ #endif
+ 			/*
+@@ -1384,9 +1362,8 @@ static void __init acpi_process_madt(voi
+ 				acpi_ioapic = 1;
+ 
+ 				smp_found_config = 1;
+-#ifdef CONFIG_X86_32
+-				setup_apic_routing();
+-#endif
++				if (apic->setup_apic_routing)
++					apic->setup_apic_routing();
+ 			}
+ 		}
+ 		if (error == -EINVAL) {
+Index: linux-2.6-tip/arch/x86/kernel/acpi/processor.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/acpi/processor.c
++++ linux-2.6-tip/arch/x86/kernel/acpi/processor.c
+@@ -43,6 +43,11 @@ static void init_intel_pdc(struct acpi_p
+ 	buf[0] = ACPI_PDC_REVISION_ID;
+ 	buf[1] = 1;
+ 	buf[2] = ACPI_PDC_C_CAPABILITY_SMP;
++	/*
++	 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled.
++	 */
++	if (!cpu_has(c, X86_FEATURE_MWAIT))
++		buf[2] &= ~ACPI_PDC_C_C2C3_FFH;
+ 
+ 	/*
+ 	 * The default of PDC_SMP_T_SWCOORD bit is set for intel x86 cpu so
+Index: linux-2.6-tip/arch/x86/kernel/acpi/realmode/wakeup.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/acpi/realmode/wakeup.S
++++ linux-2.6-tip/arch/x86/kernel/acpi/realmode/wakeup.S
+@@ -3,8 +3,8 @@
+  */
+ #include <asm/segment.h>
+ #include <asm/msr-index.h>
+-#include <asm/page.h>
+-#include <asm/pgtable.h>
++#include <asm/page_types.h>
++#include <asm/pgtable_types.h>
+ #include <asm/processor-flags.h>
+ 
+ 	.code16
+Index: linux-2.6-tip/arch/x86/kernel/acpi/sleep.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/acpi/sleep.c
++++ linux-2.6-tip/arch/x86/kernel/acpi/sleep.c
+@@ -101,6 +101,7 @@ int acpi_save_state_mem(void)
+ 	stack_start.sp = temp_stack + sizeof(temp_stack);
+ 	early_gdt_descr.address =
+ 			(unsigned long)get_cpu_gdt_table(smp_processor_id());
++	initial_gs = per_cpu_offset(smp_processor_id());
+ #endif
+ 	initial_code = (unsigned long)wakeup_long64;
+ 	saved_magic = 0x123456789abcdef0;
+Index: linux-2.6-tip/arch/x86/kernel/acpi/wakeup_32.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/acpi/wakeup_32.S
++++ linux-2.6-tip/arch/x86/kernel/acpi/wakeup_32.S
+@@ -1,7 +1,7 @@
+ 	.section .text.page_aligned
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ 
+ # Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
+ 
+Index: linux-2.6-tip/arch/x86/kernel/acpi/wakeup_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/acpi/wakeup_64.S
++++ linux-2.6-tip/arch/x86/kernel/acpi/wakeup_64.S
+@@ -1,8 +1,8 @@
+ .text
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+-#include <asm/pgtable.h>
+-#include <asm/page.h>
++#include <asm/pgtable_types.h>
++#include <asm/page_types.h>
+ #include <asm/msr.h>
+ #include <asm/asm-offsets.h>
+ 
+Index: linux-2.6-tip/arch/x86/kernel/alternative.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/alternative.c
++++ linux-2.6-tip/arch/x86/kernel/alternative.c
+@@ -5,6 +5,7 @@
+ #include <linux/kprobes.h>
+ #include <linux/mm.h>
+ #include <linux/vmalloc.h>
++#include <linux/memory.h>
+ #include <asm/alternative.h>
+ #include <asm/sections.h>
+ #include <asm/pgtable.h>
+@@ -12,7 +13,9 @@
+ #include <asm/nmi.h>
+ #include <asm/vsyscall.h>
+ #include <asm/cacheflush.h>
++#include <asm/tlbflush.h>
+ #include <asm/io.h>
++#include <asm/fixmap.h>
+ 
+ #define MAX_PATCH_LEN (255-1)
+ 
+@@ -226,6 +229,7 @@ static void alternatives_smp_lock(u8 **s
+ {
+ 	u8 **ptr;
+ 
++	mutex_lock(&text_mutex);
+ 	for (ptr = start; ptr < end; ptr++) {
+ 		if (*ptr < text)
+ 			continue;
+@@ -234,6 +238,7 @@ static void alternatives_smp_lock(u8 **s
+ 		/* turn DS segment override prefix into lock prefix */
+ 		text_poke(*ptr, ((unsigned char []){0xf0}), 1);
+ 	};
++	mutex_unlock(&text_mutex);
+ }
+ 
+ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
+@@ -243,6 +248,7 @@ static void alternatives_smp_unlock(u8 *
+ 	if (noreplace_smp)
+ 		return;
+ 
++	mutex_lock(&text_mutex);
+ 	for (ptr = start; ptr < end; ptr++) {
+ 		if (*ptr < text)
+ 			continue;
+@@ -251,6 +257,7 @@ static void alternatives_smp_unlock(u8 *
+ 		/* turn lock prefix into DS segment override prefix */
+ 		text_poke(*ptr, ((unsigned char []){0x3E}), 1);
+ 	};
++	mutex_unlock(&text_mutex);
+ }
+ 
+ struct smp_alt_module {
+@@ -414,9 +421,17 @@ void __init alternative_instructions(voi
+ 	   that might execute the to be patched code.
+ 	   Other CPUs are not running. */
+ 	stop_nmi();
+-#ifdef CONFIG_X86_MCE
+-	stop_mce();
+-#endif
++
++	/*
++	 * Don't stop machine check exceptions while patching.
++	 * MCEs only happen when something got corrupted and in this
++	 * case we must do something about the corruption.
++	 * Ignoring it is worse than a unlikely patching race.
++	 * Also machine checks tend to be broadcast and if one CPU
++	 * goes into machine check the others follow quickly, so we don't
++	 * expect a machine check to cause undue problems during to code
++	 * patching.
++	 */
+ 
+ 	apply_alternatives(__alt_instructions, __alt_instructions_end);
+ 
+@@ -456,9 +471,6 @@ void __init alternative_instructions(voi
+ 				(unsigned long)__smp_locks_end);
+ 
+ 	restart_nmi();
+-#ifdef CONFIG_X86_MCE
+-	restart_mce();
+-#endif
+ }
+ 
+ /**
+@@ -495,12 +507,13 @@ void *text_poke_early(void *addr, const 
+  * It means the size must be writable atomically and the address must be aligned
+  * in a way that permits an atomic write. It also makes sure we fit on a single
+  * page.
++ *
++ * Note: Must be called under text_mutex.
+  */
+ void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
+ {
+ 	unsigned long flags;
+ 	char *vaddr;
+-	int nr_pages = 2;
+ 	struct page *pages[2];
+ 	int i;
+ 
+@@ -513,18 +526,21 @@ void *__kprobes text_poke(void *addr, co
+ 		pages[1] = virt_to_page(addr + PAGE_SIZE);
+ 	}
+ 	BUG_ON(!pages[0]);
+-	if (!pages[1])
+-		nr_pages = 1;
+-	vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+-	BUG_ON(!vaddr);
+ 	local_irq_save(flags);
++	set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
++	if (pages[1])
++		set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
++	vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
+ 	memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
+-	local_irq_restore(flags);
+-	vunmap(vaddr);
++	clear_fixmap(FIX_TEXT_POKE0);
++	if (pages[1])
++		clear_fixmap(FIX_TEXT_POKE1);
++	local_flush_tlb();
+ 	sync_core();
+ 	/* Could also do a CLFLUSH here to speed up CPU recovery; but
+ 	   that causes hangs on some VIA CPUs. */
+ 	for (i = 0; i < len; i++)
+ 		BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
++	local_irq_restore(flags);
+ 	return addr;
+ }
+Index: linux-2.6-tip/arch/x86/kernel/amd_iommu.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/amd_iommu.c
++++ linux-2.6-tip/arch/x86/kernel/amd_iommu.c
+@@ -22,10 +22,9 @@
+ #include <linux/bitops.h>
+ #include <linux/debugfs.h>
+ #include <linux/scatterlist.h>
++#include <linux/dma-mapping.h>
+ #include <linux/iommu-helper.h>
+-#ifdef CONFIG_IOMMU_API
+ #include <linux/iommu.h>
+-#endif
+ #include <asm/proto.h>
+ #include <asm/iommu.h>
+ #include <asm/gart.h>
+@@ -1297,8 +1296,10 @@ static void __unmap_single(struct amd_io
+ /*
+  * The exported map_single function for dma_ops.
+  */
+-static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
+-			     size_t size, int dir)
++static dma_addr_t map_page(struct device *dev, struct page *page,
++			   unsigned long offset, size_t size,
++			   enum dma_data_direction dir,
++			   struct dma_attrs *attrs)
+ {
+ 	unsigned long flags;
+ 	struct amd_iommu *iommu;
+@@ -1306,6 +1307,7 @@ static dma_addr_t map_single(struct devi
+ 	u16 devid;
+ 	dma_addr_t addr;
+ 	u64 dma_mask;
++	phys_addr_t paddr = page_to_phys(page) + offset;
+ 
+ 	INC_STATS_COUNTER(cnt_map_single);
+ 
+@@ -1340,8 +1342,8 @@ out:
+ /*
+  * The exported unmap_single function for dma_ops.
+  */
+-static void unmap_single(struct device *dev, dma_addr_t dma_addr,
+-			 size_t size, int dir)
++static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
++		       enum dma_data_direction dir, struct dma_attrs *attrs)
+ {
+ 	unsigned long flags;
+ 	struct amd_iommu *iommu;
+@@ -1390,7 +1392,8 @@ static int map_sg_no_iommu(struct device
+  * lists).
+  */
+ static int map_sg(struct device *dev, struct scatterlist *sglist,
+-		  int nelems, int dir)
++		  int nelems, enum dma_data_direction dir,
++		  struct dma_attrs *attrs)
+ {
+ 	unsigned long flags;
+ 	struct amd_iommu *iommu;
+@@ -1457,7 +1460,8 @@ unmap:
+  * lists).
+  */
+ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
+-		     int nelems, int dir)
++		     int nelems, enum dma_data_direction dir,
++		     struct dma_attrs *attrs)
+ {
+ 	unsigned long flags;
+ 	struct amd_iommu *iommu;
+@@ -1644,11 +1648,11 @@ static void prealloc_protection_domains(
+ 	}
+ }
+ 
+-static struct dma_mapping_ops amd_iommu_dma_ops = {
++static struct dma_map_ops amd_iommu_dma_ops = {
+ 	.alloc_coherent = alloc_coherent,
+ 	.free_coherent = free_coherent,
+-	.map_single = map_single,
+-	.unmap_single = unmap_single,
++	.map_page = map_page,
++	.unmap_page = unmap_page,
+ 	.map_sg = map_sg,
+ 	.unmap_sg = unmap_sg,
+ 	.dma_supported = amd_iommu_dma_supported,
+Index: linux-2.6-tip/arch/x86/kernel/apic.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/apic.c
++++ /dev/null
+@@ -1,2223 +0,0 @@
+-/*
+- *	Local APIC handling, local APIC timers
+- *
+- *	(c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
+- *
+- *	Fixes
+- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
+- *					thanks to Eric Gilmore
+- *					and Rolf G. Tews
+- *					for testing these extensively.
+- *	Maciej W. Rozycki	:	Various updates and fixes.
+- *	Mikael Pettersson	:	Power Management for UP-APIC.
+- *	Pavel Machek and
+- *	Mikael Pettersson	:	PM converted to driver model.
+- */
+-
+-#include <linux/init.h>
+-
+-#include <linux/mm.h>
+-#include <linux/delay.h>
+-#include <linux/bootmem.h>
+-#include <linux/interrupt.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/sysdev.h>
+-#include <linux/ioport.h>
+-#include <linux/cpu.h>
+-#include <linux/clockchips.h>
+-#include <linux/acpi_pmtmr.h>
+-#include <linux/module.h>
+-#include <linux/dmi.h>
+-#include <linux/dmar.h>
+-#include <linux/ftrace.h>
+-#include <linux/smp.h>
+-#include <linux/nmi.h>
+-#include <linux/timex.h>
+-
+-#include <asm/atomic.h>
+-#include <asm/mtrr.h>
+-#include <asm/mpspec.h>
+-#include <asm/desc.h>
+-#include <asm/arch_hooks.h>
+-#include <asm/hpet.h>
+-#include <asm/pgalloc.h>
+-#include <asm/i8253.h>
+-#include <asm/idle.h>
+-#include <asm/proto.h>
+-#include <asm/apic.h>
+-#include <asm/i8259.h>
+-#include <asm/smp.h>
+-
+-#include <mach_apic.h>
+-#include <mach_apicdef.h>
+-#include <mach_ipi.h>
+-
+-/*
+- * Sanity check
+- */
+-#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F)
+-# error SPURIOUS_APIC_VECTOR definition error
+-#endif
+-
+-#ifdef CONFIG_X86_32
+-/*
+- * Knob to control our willingness to enable the local APIC.
+- *
+- * +1=force-enable
+- */
+-static int force_enable_local_apic;
+-/*
+- * APIC command line parameters
+- */
+-static int __init parse_lapic(char *arg)
+-{
+-	force_enable_local_apic = 1;
+-	return 0;
+-}
+-early_param("lapic", parse_lapic);
+-/* Local APIC was disabled by the BIOS and enabled by the kernel */
+-static int enabled_via_apicbase;
+-
+-#endif
+-
+-#ifdef CONFIG_X86_64
+-static int apic_calibrate_pmtmr __initdata;
+-static __init int setup_apicpmtimer(char *s)
+-{
+-	apic_calibrate_pmtmr = 1;
+-	notsc_setup(NULL);
+-	return 0;
+-}
+-__setup("apicpmtimer", setup_apicpmtimer);
+-#endif
+-
+-#ifdef CONFIG_X86_64
+-#define HAVE_X2APIC
+-#endif
+-
+-#ifdef HAVE_X2APIC
+-int x2apic;
+-/* x2apic enabled before OS handover */
+-static int x2apic_preenabled;
+-static int disable_x2apic;
+-static __init int setup_nox2apic(char *str)
+-{
+-	disable_x2apic = 1;
+-	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
+-	return 0;
+-}
+-early_param("nox2apic", setup_nox2apic);
+-#endif
+-
+-unsigned long mp_lapic_addr;
+-int disable_apic;
+-/* Disable local APIC timer from the kernel commandline or via dmi quirk */
+-static int disable_apic_timer __cpuinitdata;
+-/* Local APIC timer works in C2 */
+-int local_apic_timer_c2_ok;
+-EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
+-
+-int first_system_vector = 0xfe;
+-
+-/*
+- * Debug level, exported for io_apic.c
+- */
+-unsigned int apic_verbosity;
+-
+-int pic_mode;
+-
+-/* Have we found an MP table */
+-int smp_found_config;
+-
+-static struct resource lapic_resource = {
+-	.name = "Local APIC",
+-	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
+-};
+-
+-static unsigned int calibration_result;
+-
+-static int lapic_next_event(unsigned long delta,
+-			    struct clock_event_device *evt);
+-static void lapic_timer_setup(enum clock_event_mode mode,
+-			      struct clock_event_device *evt);
+-static void lapic_timer_broadcast(const struct cpumask *mask);
+-static void apic_pm_activate(void);
+-
+-/*
+- * The local apic timer can be used for any function which is CPU local.
+- */
+-static struct clock_event_device lapic_clockevent = {
+-	.name		= "lapic",
+-	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
+-			| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
+-	.shift		= 32,
+-	.set_mode	= lapic_timer_setup,
+-	.set_next_event	= lapic_next_event,
+-	.broadcast	= lapic_timer_broadcast,
+-	.rating		= 100,
+-	.irq		= -1,
+-};
+-static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
+-
+-static unsigned long apic_phys;
+-
+-/*
+- * Get the LAPIC version
+- */
+-static inline int lapic_get_version(void)
+-{
+-	return GET_APIC_VERSION(apic_read(APIC_LVR));
+-}
+-
+-/*
+- * Check, if the APIC is integrated or a separate chip
+- */
+-static inline int lapic_is_integrated(void)
+-{
+-#ifdef CONFIG_X86_64
+-	return 1;
+-#else
+-	return APIC_INTEGRATED(lapic_get_version());
+-#endif
+-}
+-
+-/*
+- * Check, whether this is a modern or a first generation APIC
+- */
+-static int modern_apic(void)
+-{
+-	/* AMD systems use old APIC versions, so check the CPU */
+-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+-	    boot_cpu_data.x86 >= 0xf)
+-		return 1;
+-	return lapic_get_version() >= 0x14;
+-}
+-
+-/*
+- * Paravirt kernels also might be using these below ops. So we still
+- * use generic apic_read()/apic_write(), which might be pointing to different
+- * ops in PARAVIRT case.
+- */
+-void xapic_wait_icr_idle(void)
+-{
+-	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
+-		cpu_relax();
+-}
+-
+-u32 safe_xapic_wait_icr_idle(void)
+-{
+-	u32 send_status;
+-	int timeout;
+-
+-	timeout = 0;
+-	do {
+-		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
+-		if (!send_status)
+-			break;
+-		udelay(100);
+-	} while (timeout++ < 1000);
+-
+-	return send_status;
+-}
+-
+-void xapic_icr_write(u32 low, u32 id)
+-{
+-	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
+-	apic_write(APIC_ICR, low);
+-}
+-
+-static u64 xapic_icr_read(void)
+-{
+-	u32 icr1, icr2;
+-
+-	icr2 = apic_read(APIC_ICR2);
+-	icr1 = apic_read(APIC_ICR);
+-
+-	return icr1 | ((u64)icr2 << 32);
+-}
+-
+-static struct apic_ops xapic_ops = {
+-	.read = native_apic_mem_read,
+-	.write = native_apic_mem_write,
+-	.icr_read = xapic_icr_read,
+-	.icr_write = xapic_icr_write,
+-	.wait_icr_idle = xapic_wait_icr_idle,
+-	.safe_wait_icr_idle = safe_xapic_wait_icr_idle,
+-};
+-
+-struct apic_ops __read_mostly *apic_ops = &xapic_ops;
+-EXPORT_SYMBOL_GPL(apic_ops);
+-
+-#ifdef HAVE_X2APIC
+-static void x2apic_wait_icr_idle(void)
+-{
+-	/* no need to wait for icr idle in x2apic */
+-	return;
+-}
+-
+-static u32 safe_x2apic_wait_icr_idle(void)
+-{
+-	/* no need to wait for icr idle in x2apic */
+-	return 0;
+-}
+-
+-void x2apic_icr_write(u32 low, u32 id)
+-{
+-	wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
+-}
+-
+-static u64 x2apic_icr_read(void)
+-{
+-	unsigned long val;
+-
+-	rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
+-	return val;
+-}
+-
+-static struct apic_ops x2apic_ops = {
+-	.read = native_apic_msr_read,
+-	.write = native_apic_msr_write,
+-	.icr_read = x2apic_icr_read,
+-	.icr_write = x2apic_icr_write,
+-	.wait_icr_idle = x2apic_wait_icr_idle,
+-	.safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
+-};
+-#endif
+-
+-/**
+- * enable_NMI_through_LVT0 - enable NMI through local vector table 0
+- */
+-void __cpuinit enable_NMI_through_LVT0(void)
+-{
+-	unsigned int v;
+-
+-	/* unmask and set to NMI */
+-	v = APIC_DM_NMI;
+-
+-	/* Level triggered for 82489DX (32bit mode) */
+-	if (!lapic_is_integrated())
+-		v |= APIC_LVT_LEVEL_TRIGGER;
+-
+-	apic_write(APIC_LVT0, v);
+-}
+-
+-#ifdef CONFIG_X86_32
+-/**
+- * get_physical_broadcast - Get number of physical broadcast IDs
+- */
+-int get_physical_broadcast(void)
+-{
+-	return modern_apic() ? 0xff : 0xf;
+-}
+-#endif
+-
+-/**
+- * lapic_get_maxlvt - get the maximum number of local vector table entries
+- */
+-int lapic_get_maxlvt(void)
+-{
+-	unsigned int v;
+-
+-	v = apic_read(APIC_LVR);
+-	/*
+-	 * - we always have APIC integrated on 64bit mode
+-	 * - 82489DXs do not report # of LVT entries
+-	 */
+-	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
+-}
+-
+-/*
+- * Local APIC timer
+- */
+-
+-/* Clock divisor */
+-#define APIC_DIVISOR 16
+-
+-/*
+- * This function sets up the local APIC timer, with a timeout of
+- * 'clocks' APIC bus clock. During calibration we actually call
+- * this function twice on the boot CPU, once with a bogus timeout
+- * value, second time for real. The other (noncalibrating) CPUs
+- * call this function only once, with the real, calibrated value.
+- *
+- * We do reads before writes even if unnecessary, to get around the
+- * P5 APIC double write bug.
+- */
+-static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
+-{
+-	unsigned int lvtt_value, tmp_value;
+-
+-	lvtt_value = LOCAL_TIMER_VECTOR;
+-	if (!oneshot)
+-		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
+-	if (!lapic_is_integrated())
+-		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
+-
+-	if (!irqen)
+-		lvtt_value |= APIC_LVT_MASKED;
+-
+-	apic_write(APIC_LVTT, lvtt_value);
+-
+-	/*
+-	 * Divide PICLK by 16
+-	 */
+-	tmp_value = apic_read(APIC_TDCR);
+-	apic_write(APIC_TDCR,
+-		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
+-		APIC_TDR_DIV_16);
+-
+-	if (!oneshot)
+-		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
+-}
+-
+-/*
+- * Setup extended LVT, AMD specific (K8, family 10h)
+- *
+- * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
+- * MCE interrupts are supported. Thus MCE offset must be set to 0.
+- *
+- * If mask=1, the LVT entry does not generate interrupts while mask=0
+- * enables the vector. See also the BKDGs.
+- */
+-
+-#define APIC_EILVT_LVTOFF_MCE 0
+-#define APIC_EILVT_LVTOFF_IBS 1
+-
+-static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
+-{
+-	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
+-	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
+-
+-	apic_write(reg, v);
+-}
+-
+-u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
+-{
+-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
+-	return APIC_EILVT_LVTOFF_MCE;
+-}
+-
+-u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
+-{
+-	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
+-	return APIC_EILVT_LVTOFF_IBS;
+-}
+-EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
+-
+-/*
+- * Program the next event, relative to now
+- */
+-static int lapic_next_event(unsigned long delta,
+-			    struct clock_event_device *evt)
+-{
+-	apic_write(APIC_TMICT, delta);
+-	return 0;
+-}
+-
+-/*
+- * Setup the lapic timer in periodic or oneshot mode
+- */
+-static void lapic_timer_setup(enum clock_event_mode mode,
+-			      struct clock_event_device *evt)
+-{
+-	unsigned long flags;
+-	unsigned int v;
+-
+-	/* Lapic used as dummy for broadcast ? */
+-	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
+-		return;
+-
+-	local_irq_save(flags);
+-
+-	switch (mode) {
+-	case CLOCK_EVT_MODE_PERIODIC:
+-	case CLOCK_EVT_MODE_ONESHOT:
+-		__setup_APIC_LVTT(calibration_result,
+-				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
+-		break;
+-	case CLOCK_EVT_MODE_UNUSED:
+-	case CLOCK_EVT_MODE_SHUTDOWN:
+-		v = apic_read(APIC_LVTT);
+-		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+-		apic_write(APIC_LVTT, v);
+-		apic_write(APIC_TMICT, 0xffffffff);
+-		break;
+-	case CLOCK_EVT_MODE_RESUME:
+-		/* Nothing to do here */
+-		break;
+-	}
+-
+-	local_irq_restore(flags);
+-}
+-
+-/*
+- * Local APIC timer broadcast function
+- */
+-static void lapic_timer_broadcast(const struct cpumask *mask)
+-{
+-#ifdef CONFIG_SMP
+-	send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
+-#endif
+-}
+-
+-/*
+- * Setup the local APIC timer for this CPU. Copy the initilized values
+- * of the boot CPU and register the clock event in the framework.
+- */
+-static void __cpuinit setup_APIC_timer(void)
+-{
+-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+-
+-	memcpy(levt, &lapic_clockevent, sizeof(*levt));
+-	levt->cpumask = cpumask_of(smp_processor_id());
+-
+-	clockevents_register_device(levt);
+-}
+-
+-/*
+- * In this functions we calibrate APIC bus clocks to the external timer.
+- *
+- * We want to do the calibration only once since we want to have local timer
+- * irqs syncron. CPUs connected by the same APIC bus have the very same bus
+- * frequency.
+- *
+- * This was previously done by reading the PIT/HPET and waiting for a wrap
+- * around to find out, that a tick has elapsed. I have a box, where the PIT
+- * readout is broken, so it never gets out of the wait loop again. This was
+- * also reported by others.
+- *
+- * Monitoring the jiffies value is inaccurate and the clockevents
+- * infrastructure allows us to do a simple substitution of the interrupt
+- * handler.
+- *
+- * The calibration routine also uses the pm_timer when possible, as the PIT
+- * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
+- * back to normal later in the boot process).
+- */
+-
+-#define LAPIC_CAL_LOOPS		(HZ/10)
+-
+-static __initdata int lapic_cal_loops = -1;
+-static __initdata long lapic_cal_t1, lapic_cal_t2;
+-static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
+-static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
+-static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
+-
+-/*
+- * Temporary interrupt handler.
+- */
+-static void __init lapic_cal_handler(struct clock_event_device *dev)
+-{
+-	unsigned long long tsc = 0;
+-	long tapic = apic_read(APIC_TMCCT);
+-	unsigned long pm = acpi_pm_read_early();
+-
+-	if (cpu_has_tsc)
+-		rdtscll(tsc);
+-
+-	switch (lapic_cal_loops++) {
+-	case 0:
+-		lapic_cal_t1 = tapic;
+-		lapic_cal_tsc1 = tsc;
+-		lapic_cal_pm1 = pm;
+-		lapic_cal_j1 = jiffies;
+-		break;
+-
+-	case LAPIC_CAL_LOOPS:
+-		lapic_cal_t2 = tapic;
+-		lapic_cal_tsc2 = tsc;
+-		if (pm < lapic_cal_pm1)
+-			pm += ACPI_PM_OVRRUN;
+-		lapic_cal_pm2 = pm;
+-		lapic_cal_j2 = jiffies;
+-		break;
+-	}
+-}
+-
+-static int __init calibrate_by_pmtimer(long deltapm, long *delta)
+-{
+-	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
+-	const long pm_thresh = pm_100ms / 100;
+-	unsigned long mult;
+-	u64 res;
+-
+-#ifndef CONFIG_X86_PM_TIMER
+-	return -1;
+-#endif
+-
+-	apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm);
+-
+-	/* Check, if the PM timer is available */
+-	if (!deltapm)
+-		return -1;
+-
+-	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
+-
+-	if (deltapm > (pm_100ms - pm_thresh) &&
+-	    deltapm < (pm_100ms + pm_thresh)) {
+-		apic_printk(APIC_VERBOSE, "... PM timer result ok\n");
+-	} else {
+-		res = (((u64)deltapm) *  mult) >> 22;
+-		do_div(res, 1000000);
+-		pr_warning("APIC calibration not consistent "
+-			"with PM Timer: %ldms instead of 100ms\n",
+-			(long)res);
+-		/* Correct the lapic counter value */
+-		res = (((u64)(*delta)) * pm_100ms);
+-		do_div(res, deltapm);
+-		pr_info("APIC delta adjusted to PM-Timer: "
+-			"%lu (%ld)\n", (unsigned long)res, *delta);
+-		*delta = (long)res;
+-	}
+-
+-	return 0;
+-}
+-
+-static int __init calibrate_APIC_clock(void)
+-{
+-	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
+-	void (*real_handler)(struct clock_event_device *dev);
+-	unsigned long deltaj;
+-	long delta;
+-	int pm_referenced = 0;
+-
+-	local_irq_disable();
+-
+-	/* Replace the global interrupt handler */
+-	real_handler = global_clock_event->event_handler;
+-	global_clock_event->event_handler = lapic_cal_handler;
+-
+-	/*
+-	 * Setup the APIC counter to maximum. There is no way the lapic
+-	 * can underflow in the 100ms detection time frame
+-	 */
+-	__setup_APIC_LVTT(0xffffffff, 0, 0);
+-
+-	/* Let the interrupts run */
+-	local_irq_enable();
+-
+-	while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+-		cpu_relax();
+-
+-	local_irq_disable();
+-
+-	/* Restore the real event handler */
+-	global_clock_event->event_handler = real_handler;
+-
+-	/* Build delta t1-t2 as apic timer counts down */
+-	delta = lapic_cal_t1 - lapic_cal_t2;
+-	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
+-
+-	/* we trust the PM based calibration if possible */
+-	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
+-					&delta);
+-
+-	/* Calculate the scaled math multiplication factor */
+-	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
+-				       lapic_clockevent.shift);
+-	lapic_clockevent.max_delta_ns =
+-		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
+-	lapic_clockevent.min_delta_ns =
+-		clockevent_delta2ns(0xF, &lapic_clockevent);
+-
+-	calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+-
+-	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
+-	apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
+-	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
+-		    calibration_result);
+-
+-	if (cpu_has_tsc) {
+-		delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
+-		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
+-			    "%ld.%04ld MHz.\n",
+-			    (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ),
+-			    (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ));
+-	}
+-
+-	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
+-		    "%u.%04u MHz.\n",
+-		    calibration_result / (1000000 / HZ),
+-		    calibration_result % (1000000 / HZ));
+-
+-	/*
+-	 * Do a sanity check on the APIC calibration result
+-	 */
+-	if (calibration_result < (1000000 / HZ)) {
+-		local_irq_enable();
+-		pr_warning("APIC frequency too slow, disabling apic timer\n");
+-		return -1;
+-	}
+-
+-	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
+-
+-	/*
+-	 * PM timer calibration failed or not turned on
+-	 * so lets try APIC timer based calibration
+-	 */
+-	if (!pm_referenced) {
+-		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
+-
+-		/*
+-		 * Setup the apic timer manually
+-		 */
+-		levt->event_handler = lapic_cal_handler;
+-		lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
+-		lapic_cal_loops = -1;
+-
+-		/* Let the interrupts run */
+-		local_irq_enable();
+-
+-		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
+-			cpu_relax();
+-
+-		/* Stop the lapic timer */
+-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
+-
+-		/* Jiffies delta */
+-		deltaj = lapic_cal_j2 - lapic_cal_j1;
+-		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
+-
+-		/* Check, if the jiffies result is consistent */
+-		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
+-			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
+-		else
+-			levt->features |= CLOCK_EVT_FEAT_DUMMY;
+-	} else
+-		local_irq_enable();
+-
+-	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
+-		pr_warning("APIC timer disabled due to verification failure\n");
+-			return -1;
+-	}
+-
+-	return 0;
+-}
+-
+-/*
+- * Setup the boot APIC
+- *
+- * Calibrate and verify the result.
+- */
+-void __init setup_boot_APIC_clock(void)
+-{
+-	/*
+-	 * The local apic timer can be disabled via the kernel
+-	 * commandline or from the CPU detection code. Register the lapic
+-	 * timer as a dummy clock event source on SMP systems, so the
+-	 * broadcast mechanism is used. On UP systems simply ignore it.
+-	 */
+-	if (disable_apic_timer) {
+-		pr_info("Disabling APIC timer\n");
+-		/* No broadcast on UP ! */
+-		if (num_possible_cpus() > 1) {
+-			lapic_clockevent.mult = 1;
+-			setup_APIC_timer();
+-		}
+-		return;
+-	}
+-
+-	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
+-		    "calibrating APIC timer ...\n");
+-
+-	if (calibrate_APIC_clock()) {
+-		/* No broadcast on UP ! */
+-		if (num_possible_cpus() > 1)
+-			setup_APIC_timer();
+-		return;
+-	}
+-
+-	/*
+-	 * If nmi_watchdog is set to IO_APIC, we need the
+-	 * PIT/HPET going.  Otherwise register lapic as a dummy
+-	 * device.
+-	 */
+-	if (nmi_watchdog != NMI_IO_APIC)
+-		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
+-	else
+-		pr_warning("APIC timer registered as dummy,"
+-			" due to nmi_watchdog=%d!\n", nmi_watchdog);
+-
+-	/* Setup the lapic or request the broadcast */
+-	setup_APIC_timer();
+-}
+-
+-void __cpuinit setup_secondary_APIC_clock(void)
+-{
+-	setup_APIC_timer();
+-}
+-
+-/*
+- * The guts of the apic timer interrupt
+- */
+-static void local_apic_timer_interrupt(void)
+-{
+-	int cpu = smp_processor_id();
+-	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
+-
+-	/*
+-	 * Normally we should not be here till LAPIC has been initialized but
+-	 * in some cases like kdump, its possible that there is a pending LAPIC
+-	 * timer interrupt from previous kernel's context and is delivered in
+-	 * new kernel the moment interrupts are enabled.
+-	 *
+-	 * Interrupts are enabled early and LAPIC is setup much later, hence
+-	 * its possible that when we get here evt->event_handler is NULL.
+-	 * Check for event_handler being NULL and discard the interrupt as
+-	 * spurious.
+-	 */
+-	if (!evt->event_handler) {
+-		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
+-		/* Switch it off */
+-		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
+-		return;
+-	}
+-
+-	/*
+-	 * the NMI deadlock-detector uses this.
+-	 */
+-	inc_irq_stat(apic_timer_irqs);
+-
+-	evt->event_handler(evt);
+-}
+-
+-/*
+- * Local APIC timer interrupt. This is the most natural way for doing
+- * local interrupts, but local timer interrupts can be emulated by
+- * broadcast interrupts too. [in case the hw doesn't support APIC timers]
+- *
+- * [ if a single-CPU system runs an SMP kernel then we call the local
+- *   interrupt as well. Thus we cannot inline the local irq ... ]
+- */
+-void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
+-{
+-	struct pt_regs *old_regs = set_irq_regs(regs);
+-
+-	/*
+-	 * NOTE! We'd better ACK the irq immediately,
+-	 * because timer handling can be slow.
+-	 */
+-	ack_APIC_irq();
+-	/*
+-	 * update_process_times() expects us to have done irq_enter().
+-	 * Besides, if we don't timer interrupts ignore the global
+-	 * interrupt lock, which is the WrongThing (tm) to do.
+-	 */
+-	exit_idle();
+-	irq_enter();
+-	local_apic_timer_interrupt();
+-	irq_exit();
+-
+-	set_irq_regs(old_regs);
+-}
+-
+-int setup_profiling_timer(unsigned int multiplier)
+-{
+-	return -EINVAL;
+-}
+-
+-/*
+- * Local APIC start and shutdown
+- */
+-
+-/**
+- * clear_local_APIC - shutdown the local APIC
+- *
+- * This is called, when a CPU is disabled and before rebooting, so the state of
+- * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
+- * leftovers during boot.
+- */
+-void clear_local_APIC(void)
+-{
+-	int maxlvt;
+-	u32 v;
+-
+-	/* APIC hasn't been mapped yet */
+-	if (!apic_phys)
+-		return;
+-
+-	maxlvt = lapic_get_maxlvt();
+-	/*
+-	 * Masking an LVT entry can trigger a local APIC error
+-	 * if the vector is zero. Mask LVTERR first to prevent this.
+-	 */
+-	if (maxlvt >= 3) {
+-		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
+-		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
+-	}
+-	/*
+-	 * Careful: we have to set masks only first to deassert
+-	 * any level-triggered sources.
+-	 */
+-	v = apic_read(APIC_LVTT);
+-	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
+-	v = apic_read(APIC_LVT0);
+-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+-	v = apic_read(APIC_LVT1);
+-	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
+-	if (maxlvt >= 4) {
+-		v = apic_read(APIC_LVTPC);
+-		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
+-	}
+-
+-	/* lets not touch this if we didn't frob it */
+-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+-	if (maxlvt >= 5) {
+-		v = apic_read(APIC_LVTTHMR);
+-		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
+-	}
+-#endif
+-	/*
+-	 * Clean APIC state for other OSs:
+-	 */
+-	apic_write(APIC_LVTT, APIC_LVT_MASKED);
+-	apic_write(APIC_LVT0, APIC_LVT_MASKED);
+-	apic_write(APIC_LVT1, APIC_LVT_MASKED);
+-	if (maxlvt >= 3)
+-		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
+-	if (maxlvt >= 4)
+-		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
+-
+-	/* Integrated APIC (!82489DX) ? */
+-	if (lapic_is_integrated()) {
+-		if (maxlvt > 3)
+-			/* Clear ESR due to Pentium errata 3AP and 11AP */
+-			apic_write(APIC_ESR, 0);
+-		apic_read(APIC_ESR);
+-	}
+-}
+-
+-/**
+- * disable_local_APIC - clear and disable the local APIC
+- */
+-void disable_local_APIC(void)
+-{
+-	unsigned int value;
+-
+-	/* APIC hasn't been mapped yet */
+-	if (!apic_phys)
+-		return;
+-
+-	clear_local_APIC();
+-
+-	/*
+-	 * Disable APIC (implies clearing of registers
+-	 * for 82489DX!).
+-	 */
+-	value = apic_read(APIC_SPIV);
+-	value &= ~APIC_SPIV_APIC_ENABLED;
+-	apic_write(APIC_SPIV, value);
+-
+-#ifdef CONFIG_X86_32
+-	/*
+-	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
+-	 * restore the disabled state.
+-	 */
+-	if (enabled_via_apicbase) {
+-		unsigned int l, h;
+-
+-		rdmsr(MSR_IA32_APICBASE, l, h);
+-		l &= ~MSR_IA32_APICBASE_ENABLE;
+-		wrmsr(MSR_IA32_APICBASE, l, h);
+-	}
+-#endif
+-}
+-
+-/*
+- * If Linux enabled the LAPIC against the BIOS default disable it down before
+- * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
+- * not power-off.  Additionally clear all LVT entries before disable_local_APIC
+- * for the case where Linux didn't enable the LAPIC.
+- */
+-void lapic_shutdown(void)
+-{
+-	unsigned long flags;
+-
+-	if (!cpu_has_apic)
+-		return;
+-
+-	local_irq_save(flags);
+-
+-#ifdef CONFIG_X86_32
+-	if (!enabled_via_apicbase)
+-		clear_local_APIC();
+-	else
+-#endif
+-		disable_local_APIC();
+-
+-
+-	local_irq_restore(flags);
+-}
+-
+-/*
+- * This is to verify that we're looking at a real local APIC.
+- * Check these against your board if the CPUs aren't getting
+- * started for no apparent reason.
+- */
+-int __init verify_local_APIC(void)
+-{
+-	unsigned int reg0, reg1;
+-
+-	/*
+-	 * The version register is read-only in a real APIC.
+-	 */
+-	reg0 = apic_read(APIC_LVR);
+-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
+-	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
+-	reg1 = apic_read(APIC_LVR);
+-	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
+-
+-	/*
+-	 * The two version reads above should print the same
+-	 * numbers.  If the second one is different, then we
+-	 * poke at a non-APIC.
+-	 */
+-	if (reg1 != reg0)
+-		return 0;
+-
+-	/*
+-	 * Check if the version looks reasonably.
+-	 */
+-	reg1 = GET_APIC_VERSION(reg0);
+-	if (reg1 == 0x00 || reg1 == 0xff)
+-		return 0;
+-	reg1 = lapic_get_maxlvt();
+-	if (reg1 < 0x02 || reg1 == 0xff)
+-		return 0;
+-
+-	/*
+-	 * The ID register is read/write in a real APIC.
+-	 */
+-	reg0 = apic_read(APIC_ID);
+-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
+-	apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
+-	reg1 = apic_read(APIC_ID);
+-	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
+-	apic_write(APIC_ID, reg0);
+-	if (reg1 != (reg0 ^ APIC_ID_MASK))
+-		return 0;
+-
+-	/*
+-	 * The next two are just to see if we have sane values.
+-	 * They're only really relevant if we're in Virtual Wire
+-	 * compatibility mode, but most boxes are anymore.
+-	 */
+-	reg0 = apic_read(APIC_LVT0);
+-	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
+-	reg1 = apic_read(APIC_LVT1);
+-	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
+-
+-	return 1;
+-}
+-
+-/**
+- * sync_Arb_IDs - synchronize APIC bus arbitration IDs
+- */
+-void __init sync_Arb_IDs(void)
+-{
+-	/*
+-	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
+-	 * needed on AMD.
+-	 */
+-	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+-		return;
+-
+-	/*
+-	 * Wait for idle.
+-	 */
+-	apic_wait_icr_idle();
+-
+-	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
+-	apic_write(APIC_ICR, APIC_DEST_ALLINC |
+-			APIC_INT_LEVELTRIG | APIC_DM_INIT);
+-}
+-
+-/*
+- * An initial setup of the virtual wire mode.
+- */
+-void __init init_bsp_APIC(void)
+-{
+-	unsigned int value;
+-
+-	/*
+-	 * Don't do the setup now if we have a SMP BIOS as the
+-	 * through-I/O-APIC virtual wire mode might be active.
+-	 */
+-	if (smp_found_config || !cpu_has_apic)
+-		return;
+-
+-	/*
+-	 * Do not trust the local APIC being empty at bootup.
+-	 */
+-	clear_local_APIC();
+-
+-	/*
+-	 * Enable APIC.
+-	 */
+-	value = apic_read(APIC_SPIV);
+-	value &= ~APIC_VECTOR_MASK;
+-	value |= APIC_SPIV_APIC_ENABLED;
+-
+-#ifdef CONFIG_X86_32
+-	/* This bit is reserved on P4/Xeon and should be cleared */
+-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+-	    (boot_cpu_data.x86 == 15))
+-		value &= ~APIC_SPIV_FOCUS_DISABLED;
+-	else
+-#endif
+-		value |= APIC_SPIV_FOCUS_DISABLED;
+-	value |= SPURIOUS_APIC_VECTOR;
+-	apic_write(APIC_SPIV, value);
+-
+-	/*
+-	 * Set up the virtual wire mode.
+-	 */
+-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+-	value = APIC_DM_NMI;
+-	if (!lapic_is_integrated())		/* 82489DX */
+-		value |= APIC_LVT_LEVEL_TRIGGER;
+-	apic_write(APIC_LVT1, value);
+-}
+-
+-static void __cpuinit lapic_setup_esr(void)
+-{
+-	unsigned int oldvalue, value, maxlvt;
+-
+-	if (!lapic_is_integrated()) {
+-		pr_info("No ESR for 82489DX.\n");
+-		return;
+-	}
+-
+-	if (esr_disable) {
+-		/*
+-		 * Something untraceable is creating bad interrupts on
+-		 * secondary quads ... for the moment, just leave the
+-		 * ESR disabled - we can't do anything useful with the
+-		 * errors anyway - mbligh
+-		 */
+-		pr_info("Leaving ESR disabled.\n");
+-		return;
+-	}
+-
+-	maxlvt = lapic_get_maxlvt();
+-	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
+-		apic_write(APIC_ESR, 0);
+-	oldvalue = apic_read(APIC_ESR);
+-
+-	/* enables sending errors */
+-	value = ERROR_APIC_VECTOR;
+-	apic_write(APIC_LVTERR, value);
+-
+-	/*
+-	 * spec says clear errors after enabling vector.
+-	 */
+-	if (maxlvt > 3)
+-		apic_write(APIC_ESR, 0);
+-	value = apic_read(APIC_ESR);
+-	if (value != oldvalue)
+-		apic_printk(APIC_VERBOSE, "ESR value before enabling "
+-			"vector: 0x%08x  after: 0x%08x\n",
+-			oldvalue, value);
+-}
+-
+-
+-/**
+- * setup_local_APIC - setup the local APIC
+- */
+-void __cpuinit setup_local_APIC(void)
+-{
+-	unsigned int value;
+-	int i, j;
+-
+-#ifdef CONFIG_X86_32
+-	/* Pound the ESR really hard over the head with a big hammer - mbligh */
+-	if (lapic_is_integrated() && esr_disable) {
+-		apic_write(APIC_ESR, 0);
+-		apic_write(APIC_ESR, 0);
+-		apic_write(APIC_ESR, 0);
+-		apic_write(APIC_ESR, 0);
+-	}
+-#endif
+-
+-	preempt_disable();
+-
+-	/*
+-	 * Double-check whether this APIC is really registered.
+-	 * This is meaningless in clustered apic mode, so we skip it.
+-	 */
+-	if (!apic_id_registered())
+-		BUG();
+-
+-	/*
+-	 * Intel recommends to set DFR, LDR and TPR before enabling
+-	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+-	 * document number 292116).  So here it goes...
+-	 */
+-	init_apic_ldr();
+-
+-	/*
+-	 * Set Task Priority to 'accept all'. We never change this
+-	 * later on.
+-	 */
+-	value = apic_read(APIC_TASKPRI);
+-	value &= ~APIC_TPRI_MASK;
+-	apic_write(APIC_TASKPRI, value);
+-
+-	/*
+-	 * After a crash, we no longer service the interrupts and a pending
+-	 * interrupt from previous kernel might still have ISR bit set.
+-	 *
+-	 * Most probably by now CPU has serviced that pending interrupt and
+-	 * it might not have done the ack_APIC_irq() because it thought,
+-	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
+-	 * does not clear the ISR bit and cpu thinks it has already serivced
+-	 * the interrupt. Hence a vector might get locked. It was noticed
+-	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
+-	 */
+-	for (i = APIC_ISR_NR - 1; i >= 0; i--) {
+-		value = apic_read(APIC_ISR + i*0x10);
+-		for (j = 31; j >= 0; j--) {
+-			if (value & (1<<j))
+-				ack_APIC_irq();
+-		}
+-	}
+-
+-	/*
+-	 * Now that we are all set up, enable the APIC
+-	 */
+-	value = apic_read(APIC_SPIV);
+-	value &= ~APIC_VECTOR_MASK;
+-	/*
+-	 * Enable APIC
+-	 */
+-	value |= APIC_SPIV_APIC_ENABLED;
+-
+-#ifdef CONFIG_X86_32
+-	/*
+-	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
+-	 * certain networking cards. If high frequency interrupts are
+-	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
+-	 * entry is masked/unmasked at a high rate as well then sooner or
+-	 * later IOAPIC line gets 'stuck', no more interrupts are received
+-	 * from the device. If focus CPU is disabled then the hang goes
+-	 * away, oh well :-(
+-	 *
+-	 * [ This bug can be reproduced easily with a level-triggered
+-	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
+-	 *   BX chipset. ]
+-	 */
+-	/*
+-	 * Actually disabling the focus CPU check just makes the hang less
+-	 * frequent as it makes the interrupt distributon model be more
+-	 * like LRU than MRU (the short-term load is more even across CPUs).
+-	 * See also the comment in end_level_ioapic_irq().  --macro
+-	 */
+-
+-	/*
+-	 * - enable focus processor (bit==0)
+-	 * - 64bit mode always use processor focus
+-	 *   so no need to set it
+-	 */
+-	value &= ~APIC_SPIV_FOCUS_DISABLED;
+-#endif
+-
+-	/*
+-	 * Set spurious IRQ vector
+-	 */
+-	value |= SPURIOUS_APIC_VECTOR;
+-	apic_write(APIC_SPIV, value);
+-
+-	/*
+-	 * Set up LVT0, LVT1:
+-	 *
+-	 * set up through-local-APIC on the BP's LINT0. This is not
+-	 * strictly necessary in pure symmetric-IO mode, but sometimes
+-	 * we delegate interrupts to the 8259A.
+-	 */
+-	/*
+-	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
+-	 */
+-	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
+-	if (!smp_processor_id() && (pic_mode || !value)) {
+-		value = APIC_DM_EXTINT;
+-		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
+-				smp_processor_id());
+-	} else {
+-		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
+-		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
+-				smp_processor_id());
+-	}
+-	apic_write(APIC_LVT0, value);
+-
+-	/*
+-	 * only the BP should see the LINT1 NMI signal, obviously.
+-	 */
+-	if (!smp_processor_id())
+-		value = APIC_DM_NMI;
+-	else
+-		value = APIC_DM_NMI | APIC_LVT_MASKED;
+-	if (!lapic_is_integrated())		/* 82489DX */
+-		value |= APIC_LVT_LEVEL_TRIGGER;
+-	apic_write(APIC_LVT1, value);
+-
+-	preempt_enable();
+-}
+-
+-void __cpuinit end_local_APIC_setup(void)
+-{
+-	lapic_setup_esr();
+-
+-#ifdef CONFIG_X86_32
+-	{
+-		unsigned int value;
+-		/* Disable the local apic timer */
+-		value = apic_read(APIC_LVTT);
+-		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
+-		apic_write(APIC_LVTT, value);
+-	}
+-#endif
+-
+-	setup_apic_nmi_watchdog(NULL);
+-	apic_pm_activate();
+-}
+-
+-#ifdef HAVE_X2APIC
+-void check_x2apic(void)
+-{
+-	int msr, msr2;
+-
+-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+-
+-	if (msr & X2APIC_ENABLE) {
+-		pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
+-		x2apic_preenabled = x2apic = 1;
+-		apic_ops = &x2apic_ops;
+-	}
+-}
+-
+-void enable_x2apic(void)
+-{
+-	int msr, msr2;
+-
+-	rdmsr(MSR_IA32_APICBASE, msr, msr2);
+-	if (!(msr & X2APIC_ENABLE)) {
+-		pr_info("Enabling x2apic\n");
+-		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
+-	}
+-}
+-
+-void __init enable_IR_x2apic(void)
+-{
+-#ifdef CONFIG_INTR_REMAP
+-	int ret;
+-	unsigned long flags;
+-
+-	if (!cpu_has_x2apic)
+-		return;
+-
+-	if (!x2apic_preenabled && disable_x2apic) {
+-		pr_info("Skipped enabling x2apic and Interrupt-remapping "
+-			"because of nox2apic\n");
+-		return;
+-	}
+-
+-	if (x2apic_preenabled && disable_x2apic)
+-		panic("Bios already enabled x2apic, can't enforce nox2apic");
+-
+-	if (!x2apic_preenabled && skip_ioapic_setup) {
+-		pr_info("Skipped enabling x2apic and Interrupt-remapping "
+-			"because of skipping io-apic setup\n");
+-		return;
+-	}
+-
+-	ret = dmar_table_init();
+-	if (ret) {
+-		pr_info("dmar_table_init() failed with %d:\n", ret);
+-
+-		if (x2apic_preenabled)
+-			panic("x2apic enabled by bios. But IR enabling failed");
+-		else
+-			pr_info("Not enabling x2apic,Intr-remapping\n");
+-		return;
+-	}
+-
+-	local_irq_save(flags);
+-	mask_8259A();
+-
+-	ret = save_mask_IO_APIC_setup();
+-	if (ret) {
+-		pr_info("Saving IO-APIC state failed: %d\n", ret);
+-		goto end;
+-	}
+-
+-	ret = enable_intr_remapping(1);
+-
+-	if (ret && x2apic_preenabled) {
+-		local_irq_restore(flags);
+-		panic("x2apic enabled by bios. But IR enabling failed");
+-	}
+-
+-	if (ret)
+-		goto end_restore;
+-
+-	if (!x2apic) {
+-		x2apic = 1;
+-		apic_ops = &x2apic_ops;
+-		enable_x2apic();
+-	}
+-
+-end_restore:
+-	if (ret)
+-		/*
+-		 * IR enabling failed
+-		 */
+-		restore_IO_APIC_setup();
+-	else
+-		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
+-
+-end:
+-	unmask_8259A();
+-	local_irq_restore(flags);
+-
+-	if (!ret) {
+-		if (!x2apic_preenabled)
+-			pr_info("Enabled x2apic and interrupt-remapping\n");
+-		else
+-			pr_info("Enabled Interrupt-remapping\n");
+-	} else
+-		pr_err("Failed to enable Interrupt-remapping and x2apic\n");
+-#else
+-	if (!cpu_has_x2apic)
+-		return;
+-
+-	if (x2apic_preenabled)
+-		panic("x2apic enabled prior OS handover,"
+-		      " enable CONFIG_INTR_REMAP");
+-
+-	pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
+-		" and x2apic\n");
+-#endif
+-
+-	return;
+-}
+-#endif /* HAVE_X2APIC */
+-
+-#ifdef CONFIG_X86_64
+-/*
+- * Detect and enable local APICs on non-SMP boards.
+- * Original code written by Keir Fraser.
+- * On AMD64 we trust the BIOS - if it says no APIC it is likely
+- * not correctly set up (usually the APIC timer won't work etc.)
+- */
+-static int __init detect_init_APIC(void)
+-{
+-	if (!cpu_has_apic) {
+-		pr_info("No local APIC present\n");
+-		return -1;
+-	}
+-
+-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+-	boot_cpu_physical_apicid = 0;
+-	return 0;
+-}
+-#else
+-/*
+- * Detect and initialize APIC
+- */
+-static int __init detect_init_APIC(void)
+-{
+-	u32 h, l, features;
+-
+-	/* Disabled by kernel option? */
+-	if (disable_apic)
+-		return -1;
+-
+-	switch (boot_cpu_data.x86_vendor) {
+-	case X86_VENDOR_AMD:
+-		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
+-		    (boot_cpu_data.x86 >= 15))
+-			break;
+-		goto no_apic;
+-	case X86_VENDOR_INTEL:
+-		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
+-		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
+-			break;
+-		goto no_apic;
+-	default:
+-		goto no_apic;
+-	}
+-
+-	if (!cpu_has_apic) {
+-		/*
+-		 * Over-ride BIOS and try to enable the local APIC only if
+-		 * "lapic" specified.
+-		 */
+-		if (!force_enable_local_apic) {
+-			pr_info("Local APIC disabled by BIOS -- "
+-				"you can enable it with \"lapic\"\n");
+-			return -1;
+-		}
+-		/*
+-		 * Some BIOSes disable the local APIC in the APIC_BASE
+-		 * MSR. This can only be done in software for Intel P6 or later
+-		 * and AMD K7 (Model > 1) or later.
+-		 */
+-		rdmsr(MSR_IA32_APICBASE, l, h);
+-		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
+-			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
+-			l &= ~MSR_IA32_APICBASE_BASE;
+-			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
+-			wrmsr(MSR_IA32_APICBASE, l, h);
+-			enabled_via_apicbase = 1;
+-		}
+-	}
+-	/*
+-	 * The APIC feature bit should now be enabled
+-	 * in `cpuid'
+-	 */
+-	features = cpuid_edx(1);
+-	if (!(features & (1 << X86_FEATURE_APIC))) {
+-		pr_warning("Could not enable APIC!\n");
+-		return -1;
+-	}
+-	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+-	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
+-
+-	/* The BIOS may have set up the APIC at some other address */
+-	rdmsr(MSR_IA32_APICBASE, l, h);
+-	if (l & MSR_IA32_APICBASE_ENABLE)
+-		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
+-
+-	pr_info("Found and enabled local APIC!\n");
+-
+-	apic_pm_activate();
+-
+-	return 0;
+-
+-no_apic:
+-	pr_info("No local APIC present or hardware disabled\n");
+-	return -1;
+-}
+-#endif
+-
+-#ifdef CONFIG_X86_64
+-void __init early_init_lapic_mapping(void)
+-{
+-	unsigned long phys_addr;
+-
+-	/*
+-	 * If no local APIC can be found then go out
+-	 * : it means there is no mpatable and MADT
+-	 */
+-	if (!smp_found_config)
+-		return;
+-
+-	phys_addr = mp_lapic_addr;
+-
+-	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
+-	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
+-		    APIC_BASE, phys_addr);
+-
+-	/*
+-	 * Fetch the APIC ID of the BSP in case we have a
+-	 * default configuration (or the MP table is broken).
+-	 */
+-	boot_cpu_physical_apicid = read_apic_id();
+-}
+-#endif
+-
+-/**
+- * init_apic_mappings - initialize APIC mappings
+- */
+-void __init init_apic_mappings(void)
+-{
+-#ifdef HAVE_X2APIC
+-	if (x2apic) {
+-		boot_cpu_physical_apicid = read_apic_id();
+-		return;
+-	}
+-#endif
+-
+-	/*
+-	 * If no local APIC can be found then set up a fake all
+-	 * zeroes page to simulate the local APIC and another
+-	 * one for the IO-APIC.
+-	 */
+-	if (!smp_found_config && detect_init_APIC()) {
+-		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
+-		apic_phys = __pa(apic_phys);
+-	} else
+-		apic_phys = mp_lapic_addr;
+-
+-	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
+-	apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
+-				APIC_BASE, apic_phys);
+-
+-	/*
+-	 * Fetch the APIC ID of the BSP in case we have a
+-	 * default configuration (or the MP table is broken).
+-	 */
+-	if (boot_cpu_physical_apicid == -1U)
+-		boot_cpu_physical_apicid = read_apic_id();
+-}
+-
+-/*
+- * This initializes the IO-APIC and APIC hardware if this is
+- * a UP kernel.
+- */
+-int apic_version[MAX_APICS];
+-
+-int __init APIC_init_uniprocessor(void)
+-{
+-#ifdef CONFIG_X86_64
+-	if (disable_apic) {
+-		pr_info("Apic disabled\n");
+-		return -1;
+-	}
+-	if (!cpu_has_apic) {
+-		disable_apic = 1;
+-		pr_info("Apic disabled by BIOS\n");
+-		return -1;
+-	}
+-#else
+-	if (!smp_found_config && !cpu_has_apic)
+-		return -1;
+-
+-	/*
+-	 * Complain if the BIOS pretends there is one.
+-	 */
+-	if (!cpu_has_apic &&
+-	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
+-		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
+-			boot_cpu_physical_apicid);
+-		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
+-		return -1;
+-	}
+-#endif
+-
+-#ifdef HAVE_X2APIC
+-	enable_IR_x2apic();
+-#endif
+-#ifdef CONFIG_X86_64
+-	setup_apic_routing();
+-#endif
+-
+-	verify_local_APIC();
+-	connect_bsp_APIC();
+-
+-#ifdef CONFIG_X86_64
+-	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
+-#else
+-	/*
+-	 * Hack: In case of kdump, after a crash, kernel might be booting
+-	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
+-	 * might be zero if read from MP tables. Get it from LAPIC.
+-	 */
+-# ifdef CONFIG_CRASH_DUMP
+-	boot_cpu_physical_apicid = read_apic_id();
+-# endif
+-#endif
+-	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
+-	setup_local_APIC();
+-
+-#ifdef CONFIG_X86_64
+-	/*
+-	 * Now enable IO-APICs, actually call clear_IO_APIC
+-	 * We need clear_IO_APIC before enabling vector on BP
+-	 */
+-	if (!skip_ioapic_setup && nr_ioapics)
+-		enable_IO_APIC();
+-#endif
+-
+-#ifdef CONFIG_X86_IO_APIC
+-	if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
+-#endif
+-		localise_nmi_watchdog();
+-	end_local_APIC_setup();
+-
+-#ifdef CONFIG_X86_IO_APIC
+-	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
+-		setup_IO_APIC();
+-# ifdef CONFIG_X86_64
+-	else
+-		nr_ioapics = 0;
+-# endif
+-#endif
+-
+-#ifdef CONFIG_X86_64
+-	setup_boot_APIC_clock();
+-	check_nmi_watchdog();
+-#else
+-	setup_boot_clock();
+-#endif
+-
+-	return 0;
+-}
+-
+-/*
+- * Local APIC interrupts
+- */
+-
+-/*
+- * This interrupt should _never_ happen with our APIC/SMP architecture
+- */
+-void smp_spurious_interrupt(struct pt_regs *regs)
+-{
+-	u32 v;
+-
+-	exit_idle();
+-	irq_enter();
+-	/*
+-	 * Check if this really is a spurious interrupt and ACK it
+-	 * if it is a vectored one.  Just in case...
+-	 * Spurious interrupts should not be ACKed.
+-	 */
+-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
+-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+-		ack_APIC_irq();
+-
+-	inc_irq_stat(irq_spurious_count);
+-
+-	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
+-	pr_info("spurious APIC interrupt on CPU#%d, "
+-		"should never happen.\n", smp_processor_id());
+-	irq_exit();
+-}
+-
+-/*
+- * This interrupt should never happen with our APIC/SMP architecture
+- */
+-void smp_error_interrupt(struct pt_regs *regs)
+-{
+-	u32 v, v1;
+-
+-	exit_idle();
+-	irq_enter();
+-	/* First tickle the hardware, only then report what went on. -- REW */
+-	v = apic_read(APIC_ESR);
+-	apic_write(APIC_ESR, 0);
+-	v1 = apic_read(APIC_ESR);
+-	ack_APIC_irq();
+-	atomic_inc(&irq_err_count);
+-
+-	/*
+-	 * Here is what the APIC error bits mean:
+-	 * 0: Send CS error
+-	 * 1: Receive CS error
+-	 * 2: Send accept error
+-	 * 3: Receive accept error
+-	 * 4: Reserved
+-	 * 5: Send illegal vector
+-	 * 6: Received illegal vector
+-	 * 7: Illegal register address
+-	 */
+-	pr_debug("APIC error on CPU%d: %02x(%02x)\n",
+-		smp_processor_id(), v , v1);
+-	irq_exit();
+-}
+-
+-/**
+- * connect_bsp_APIC - attach the APIC to the interrupt system
+- */
+-void __init connect_bsp_APIC(void)
+-{
+-#ifdef CONFIG_X86_32
+-	if (pic_mode) {
+-		/*
+-		 * Do not trust the local APIC being empty at bootup.
+-		 */
+-		clear_local_APIC();
+-		/*
+-		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
+-		 * local APIC to INT and NMI lines.
+-		 */
+-		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
+-				"enabling APIC mode.\n");
+-		outb(0x70, 0x22);
+-		outb(0x01, 0x23);
+-	}
+-#endif
+-	enable_apic_mode();
+-}
+-
+-/**
+- * disconnect_bsp_APIC - detach the APIC from the interrupt system
+- * @virt_wire_setup:	indicates, whether virtual wire mode is selected
+- *
+- * Virtual wire mode is necessary to deliver legacy interrupts even when the
+- * APIC is disabled.
+- */
+-void disconnect_bsp_APIC(int virt_wire_setup)
+-{
+-	unsigned int value;
+-
+-#ifdef CONFIG_X86_32
+-	if (pic_mode) {
+-		/*
+-		 * Put the board back into PIC mode (has an effect only on
+-		 * certain older boards).  Note that APIC interrupts, including
+-		 * IPIs, won't work beyond this point!  The only exception are
+-		 * INIT IPIs.
+-		 */
+-		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
+-				"entering PIC mode.\n");
+-		outb(0x70, 0x22);
+-		outb(0x00, 0x23);
+-		return;
+-	}
+-#endif
+-
+-	/* Go back to Virtual Wire compatibility mode */
+-
+-	/* For the spurious interrupt use vector F, and enable it */
+-	value = apic_read(APIC_SPIV);
+-	value &= ~APIC_VECTOR_MASK;
+-	value |= APIC_SPIV_APIC_ENABLED;
+-	value |= 0xf;
+-	apic_write(APIC_SPIV, value);
+-
+-	if (!virt_wire_setup) {
+-		/*
+-		 * For LVT0 make it edge triggered, active high,
+-		 * external and enabled
+-		 */
+-		value = apic_read(APIC_LVT0);
+-		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+-		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+-		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
+-		apic_write(APIC_LVT0, value);
+-	} else {
+-		/* Disable LVT0 */
+-		apic_write(APIC_LVT0, APIC_LVT_MASKED);
+-	}
+-
+-	/*
+-	 * For LVT1 make it edge triggered, active high,
+-	 * nmi and enabled
+-	 */
+-	value = apic_read(APIC_LVT1);
+-	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
+-			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
+-			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
+-	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
+-	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
+-	apic_write(APIC_LVT1, value);
+-}
+-
+-void __cpuinit generic_processor_info(int apicid, int version)
+-{
+-	int cpu;
+-
+-	/*
+-	 * Validate version
+-	 */
+-	if (version == 0x0) {
+-		pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
+-			   "fixing up to 0x10. (tell your hw vendor)\n",
+-				version);
+-		version = 0x10;
+-	}
+-	apic_version[apicid] = version;
+-
+-	if (num_processors >= nr_cpu_ids) {
+-		int max = nr_cpu_ids;
+-		int thiscpu = max + disabled_cpus;
+-
+-		pr_warning(
+-			"ACPI: NR_CPUS/possible_cpus limit of %i reached."
+-			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
+-
+-		disabled_cpus++;
+-		return;
+-	}
+-
+-	num_processors++;
+-	cpu = cpumask_next_zero(-1, cpu_present_mask);
+-
+-	if (version != apic_version[boot_cpu_physical_apicid])
+-		WARN_ONCE(1,
+-			"ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
+-			apic_version[boot_cpu_physical_apicid], cpu, version);
+-
+-	physid_set(apicid, phys_cpu_present_map);
+-	if (apicid == boot_cpu_physical_apicid) {
+-		/*
+-		 * x86_bios_cpu_apicid is required to have processors listed
+-		 * in same order as logical cpu numbers. Hence the first
+-		 * entry is BSP, and so on.
+-		 */
+-		cpu = 0;
+-	}
+-	if (apicid > max_physical_apicid)
+-		max_physical_apicid = apicid;
+-
+-#ifdef CONFIG_X86_32
+-	/*
+-	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
+-	 * but we need to work other dependencies like SMP_SUSPEND etc
+-	 * before this can be done without some confusion.
+-	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
+-	 *       - Ashok Raj <ashok.raj@intel.com>
+-	 */
+-	if (max_physical_apicid >= 8) {
+-		switch (boot_cpu_data.x86_vendor) {
+-		case X86_VENDOR_INTEL:
+-			if (!APIC_XAPIC(version)) {
+-				def_to_bigsmp = 0;
+-				break;
+-			}
+-			/* If P4 and above fall through */
+-		case X86_VENDOR_AMD:
+-			def_to_bigsmp = 1;
+-		}
+-	}
+-#endif
+-
+-#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
+-	/* are we being called early in kernel startup? */
+-	if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
+-		u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
+-		u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+-
+-		cpu_to_apicid[cpu] = apicid;
+-		bios_cpu_apicid[cpu] = apicid;
+-	} else {
+-		per_cpu(x86_cpu_to_apicid, cpu) = apicid;
+-		per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
+-	}
+-#endif
+-
+-	set_cpu_possible(cpu, true);
+-	set_cpu_present(cpu, true);
+-}
+-
+-#ifdef CONFIG_X86_64
+-int hard_smp_processor_id(void)
+-{
+-	return read_apic_id();
+-}
+-#endif
+-
+-/*
+- * Power management
+- */
+-#ifdef CONFIG_PM
+-
+-static struct {
+-	/*
+-	 * 'active' is true if the local APIC was enabled by us and
+-	 * not the BIOS; this signifies that we are also responsible
+-	 * for disabling it before entering apm/acpi suspend
+-	 */
+-	int active;
+-	/* r/w apic fields */
+-	unsigned int apic_id;
+-	unsigned int apic_taskpri;
+-	unsigned int apic_ldr;
+-	unsigned int apic_dfr;
+-	unsigned int apic_spiv;
+-	unsigned int apic_lvtt;
+-	unsigned int apic_lvtpc;
+-	unsigned int apic_lvt0;
+-	unsigned int apic_lvt1;
+-	unsigned int apic_lvterr;
+-	unsigned int apic_tmict;
+-	unsigned int apic_tdcr;
+-	unsigned int apic_thmr;
+-} apic_pm_state;
+-
+-static int lapic_suspend(struct sys_device *dev, pm_message_t state)
+-{
+-	unsigned long flags;
+-	int maxlvt;
+-
+-	if (!apic_pm_state.active)
+-		return 0;
+-
+-	maxlvt = lapic_get_maxlvt();
+-
+-	apic_pm_state.apic_id = apic_read(APIC_ID);
+-	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
+-	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
+-	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
+-	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
+-	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
+-	if (maxlvt >= 4)
+-		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+-	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
+-	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
+-	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
+-	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
+-	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
+-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+-	if (maxlvt >= 5)
+-		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+-#endif
+-
+-	local_irq_save(flags);
+-	disable_local_APIC();
+-	local_irq_restore(flags);
+-	return 0;
+-}
+-
+-static int lapic_resume(struct sys_device *dev)
+-{
+-	unsigned int l, h;
+-	unsigned long flags;
+-	int maxlvt;
+-
+-	if (!apic_pm_state.active)
+-		return 0;
+-
+-	maxlvt = lapic_get_maxlvt();
+-
+-	local_irq_save(flags);
+-
+-#ifdef HAVE_X2APIC
+-	if (x2apic)
+-		enable_x2apic();
+-	else
+-#endif
+-	{
+-		/*
+-		 * Make sure the APICBASE points to the right address
+-		 *
+-		 * FIXME! This will be wrong if we ever support suspend on
+-		 * SMP! We'll need to do this as part of the CPU restore!
+-		 */
+-		rdmsr(MSR_IA32_APICBASE, l, h);
+-		l &= ~MSR_IA32_APICBASE_BASE;
+-		l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
+-		wrmsr(MSR_IA32_APICBASE, l, h);
+-	}
+-
+-	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
+-	apic_write(APIC_ID, apic_pm_state.apic_id);
+-	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
+-	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
+-	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
+-	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
+-	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
+-	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
+-#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
+-	if (maxlvt >= 5)
+-		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+-#endif
+-	if (maxlvt >= 4)
+-		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+-	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
+-	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
+-	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
+-	apic_write(APIC_ESR, 0);
+-	apic_read(APIC_ESR);
+-	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
+-	apic_write(APIC_ESR, 0);
+-	apic_read(APIC_ESR);
+-
+-	local_irq_restore(flags);
+-
+-	return 0;
+-}
+-
+-/*
+- * This device has no shutdown method - fully functioning local APICs
+- * are needed on every CPU up until machine_halt/restart/poweroff.
+- */
+-
+-static struct sysdev_class lapic_sysclass = {
+-	.name		= "lapic",
+-	.resume		= lapic_resume,
+-	.suspend	= lapic_suspend,
+-};
+-
+-static struct sys_device device_lapic = {
+-	.id	= 0,
+-	.cls	= &lapic_sysclass,
+-};
+-
+-static void __cpuinit apic_pm_activate(void)
+-{
+-	apic_pm_state.active = 1;
+-}
+-
+-static int __init init_lapic_sysfs(void)
+-{
+-	int error;
+-
+-	if (!cpu_has_apic)
+-		return 0;
+-	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
+-
+-	error = sysdev_class_register(&lapic_sysclass);
+-	if (!error)
+-		error = sysdev_register(&device_lapic);
+-	return error;
+-}
+-device_initcall(init_lapic_sysfs);
+-
+-#else	/* CONFIG_PM */
+-
+-static void apic_pm_activate(void) { }
+-
+-#endif	/* CONFIG_PM */
+-
+-#ifdef CONFIG_X86_64
+-/*
+- * apic_is_clustered_box() -- Check if we can expect good TSC
+- *
+- * Thus far, the major user of this is IBM's Summit2 series:
+- *
+- * Clustered boxes may have unsynced TSC problems if they are
+- * multi-chassis. Use available data to take a good guess.
+- * If in doubt, go HPET.
+- */
+-__cpuinit int apic_is_clustered_box(void)
+-{
+-	int i, clusters, zeros;
+-	unsigned id;
+-	u16 *bios_cpu_apicid;
+-	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
+-
+-	/*
+-	 * there is not this kind of box with AMD CPU yet.
+-	 * Some AMD box with quadcore cpu and 8 sockets apicid
+-	 * will be [4, 0x23] or [8, 0x27] could be thought to
+-	 * vsmp box still need checking...
+-	 */
+-	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
+-		return 0;
+-
+-	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
+-	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
+-
+-	for (i = 0; i < nr_cpu_ids; i++) {
+-		/* are we being called early in kernel startup? */
+-		if (bios_cpu_apicid) {
+-			id = bios_cpu_apicid[i];
+-		} else if (i < nr_cpu_ids) {
+-			if (cpu_present(i))
+-				id = per_cpu(x86_bios_cpu_apicid, i);
+-			else
+-				continue;
+-		} else
+-			break;
+-
+-		if (id != BAD_APICID)
+-			__set_bit(APIC_CLUSTERID(id), clustermap);
+-	}
+-
+-	/* Problem:  Partially populated chassis may not have CPUs in some of
+-	 * the APIC clusters they have been allocated.  Only present CPUs have
+-	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
+-	 * Since clusters are allocated sequentially, count zeros only if
+-	 * they are bounded by ones.
+-	 */
+-	clusters = 0;
+-	zeros = 0;
+-	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
+-		if (test_bit(i, clustermap)) {
+-			clusters += 1 + zeros;
+-			zeros = 0;
+-		} else
+-			++zeros;
+-	}
+-
+-	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
+-	 * not guaranteed to be synced between boards
+-	 */
+-	if (is_vsmp_box() && clusters > 1)
+-		return 1;
+-
+-	/*
+-	 * If clusters > 2, then should be multi-chassis.
+-	 * May have to revisit this when multi-core + hyperthreaded CPUs come
+-	 * out, but AFAIK this will work even for them.
+-	 */
+-	return (clusters > 2);
+-}
+-#endif
+-
+-/*
+- * APIC command line parameters
+- */
+-static int __init setup_disableapic(char *arg)
+-{
+-	disable_apic = 1;
+-	setup_clear_cpu_cap(X86_FEATURE_APIC);
+-	return 0;
+-}
+-early_param("disableapic", setup_disableapic);
+-
+-/* same as disableapic, for compatibility */
+-static int __init setup_nolapic(char *arg)
+-{
+-	return setup_disableapic(arg);
+-}
+-early_param("nolapic", setup_nolapic);
+-
+-static int __init parse_lapic_timer_c2_ok(char *arg)
+-{
+-	local_apic_timer_c2_ok = 1;
+-	return 0;
+-}
+-early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
+-
+-static int __init parse_disable_apic_timer(char *arg)
+-{
+-	disable_apic_timer = 1;
+-	return 0;
+-}
+-early_param("noapictimer", parse_disable_apic_timer);
+-
+-static int __init parse_nolapic_timer(char *arg)
+-{
+-	disable_apic_timer = 1;
+-	return 0;
+-}
+-early_param("nolapic_timer", parse_nolapic_timer);
+-
+-static int __init apic_set_verbosity(char *arg)
+-{
+-	if (!arg)  {
+-#ifdef CONFIG_X86_64
+-		skip_ioapic_setup = 0;
+-		return 0;
+-#endif
+-		return -EINVAL;
+-	}
+-
+-	if (strcmp("debug", arg) == 0)
+-		apic_verbosity = APIC_DEBUG;
+-	else if (strcmp("verbose", arg) == 0)
+-		apic_verbosity = APIC_VERBOSE;
+-	else {
+-		pr_warning("APIC Verbosity level %s not recognised"
+-			" use apic=verbose or apic=debug\n", arg);
+-		return -EINVAL;
+-	}
+-
+-	return 0;
+-}
+-early_param("apic", apic_set_verbosity);
+-
+-static int __init lapic_insert_resource(void)
+-{
+-	if (!apic_phys)
+-		return -1;
+-
+-	/* Put local APIC into the resource map. */
+-	lapic_resource.start = apic_phys;
+-	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
+-	insert_resource(&iomem_resource, &lapic_resource);
+-
+-	return 0;
+-}
+-
+-/*
+- * need call insert after e820_reserve_resources()
+- * that is using request_resource
+- */
+-late_initcall(lapic_insert_resource);
+Index: linux-2.6-tip/arch/x86/kernel/apic/Makefile
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/Makefile
+@@ -0,0 +1,19 @@
++#
++# Makefile for local APIC drivers and for the IO-APIC code
++#
++
++obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o probe_$(BITS).o ipi.o nmi.o
++obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
++obj-$(CONFIG_SMP)		+= ipi.o
++
++ifeq ($(CONFIG_X86_64),y)
++obj-y				+= apic_flat_64.o
++obj-$(CONFIG_X86_X2APIC)	+= x2apic_cluster.o
++obj-$(CONFIG_X86_X2APIC)	+= x2apic_phys.o
++obj-$(CONFIG_X86_UV)		+= x2apic_uv_x.o
++endif
++
++obj-$(CONFIG_X86_BIGSMP)	+= bigsmp_32.o
++obj-$(CONFIG_X86_NUMAQ)		+= numaq_32.o
++obj-$(CONFIG_X86_ES7000)	+= es7000_32.o
++obj-$(CONFIG_X86_SUMMIT)	+= summit_32.o
+Index: linux-2.6-tip/arch/x86/kernel/apic/apic.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/apic.c
+@@ -0,0 +1,2219 @@
++/*
++ *	Local APIC handling, local APIC timers
++ *
++ *	(c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
++ *
++ *	Fixes
++ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
++ *					thanks to Eric Gilmore
++ *					and Rolf G. Tews
++ *					for testing these extensively.
++ *	Maciej W. Rozycki	:	Various updates and fixes.
++ *	Mikael Pettersson	:	Power Management for UP-APIC.
++ *	Pavel Machek and
++ *	Mikael Pettersson	:	PM converted to driver model.
++ */
++
++#include <linux/kernel_stat.h>
++#include <linux/mc146818rtc.h>
++#include <linux/acpi_pmtmr.h>
++#include <linux/clockchips.h>
++#include <linux/interrupt.h>
++#include <linux/bootmem.h>
++#include <linux/ftrace.h>
++#include <linux/ioport.h>
++#include <linux/module.h>
++#include <linux/sysdev.h>
++#include <linux/delay.h>
++#include <linux/timex.h>
++#include <linux/dmar.h>
++#include <linux/init.h>
++#include <linux/cpu.h>
++#include <linux/dmi.h>
++#include <linux/nmi.h>
++#include <linux/smp.h>
++#include <linux/mm.h>
++
++#include <asm/perf_counter.h>
++#include <asm/pgalloc.h>
++#include <asm/atomic.h>
++#include <asm/mpspec.h>
++#include <asm/i8253.h>
++#include <asm/i8259.h>
++#include <asm/proto.h>
++#include <asm/apic.h>
++#include <asm/desc.h>
++#include <asm/hpet.h>
++#include <asm/idle.h>
++#include <asm/mtrr.h>
++#include <asm/smp.h>
++#include <asm/mce.h>
++
++unsigned int num_processors;
++
++unsigned disabled_cpus __cpuinitdata;
++
++/* Processor that is doing the boot up */
++unsigned int boot_cpu_physical_apicid = -1U;
++
++/*
++ * The highest APIC ID seen during enumeration.
++ *
++ * This determines the messaging protocol we can use: if all APIC IDs
++ * are in the 0 ... 7 range, then we can use logical addressing which
++ * has some performance advantages (better broadcasting).
++ *
++ * If there's an APIC ID above 8, we use physical addressing.
++ */
++unsigned int max_physical_apicid;
++
++/*
++ * Bitmask of physically existing CPUs:
++ */
++physid_mask_t phys_cpu_present_map;
++
++/*
++ * Map cpu index to physical APIC ID
++ */
++DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
++DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
++EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
++EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
++
++#ifdef CONFIG_X86_32
++/*
++ * Knob to control our willingness to enable the local APIC.
++ *
++ * +1=force-enable
++ */
++static int force_enable_local_apic;
++/*
++ * APIC command line parameters
++ */
++static int __init parse_lapic(char *arg)
++{
++	force_enable_local_apic = 1;
++	return 0;
++}
++early_param("lapic", parse_lapic);
++/* Local APIC was disabled by the BIOS and enabled by the kernel */
++static int enabled_via_apicbase;
++
++#endif
++
++#ifdef CONFIG_X86_64
++static int apic_calibrate_pmtmr __initdata;
++static __init int setup_apicpmtimer(char *s)
++{
++	apic_calibrate_pmtmr = 1;
++	notsc_setup(NULL);
++	return 0;
++}
++__setup("apicpmtimer", setup_apicpmtimer);
++#endif
++
++#ifdef CONFIG_X86_X2APIC
++int x2apic;
++/* x2apic enabled before OS handover */
++static int x2apic_preenabled;
++static int disable_x2apic;
++static __init int setup_nox2apic(char *str)
++{
++	disable_x2apic = 1;
++	setup_clear_cpu_cap(X86_FEATURE_X2APIC);
++	return 0;
++}
++early_param("nox2apic", setup_nox2apic);
++#endif
++
++unsigned long mp_lapic_addr;
++int disable_apic;
++/* Disable local APIC timer from the kernel commandline or via dmi quirk */
++static int disable_apic_timer __cpuinitdata;
++/* Local APIC timer works in C2 */
++int local_apic_timer_c2_ok;
++EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
++
++int first_system_vector = 0xfe;
++
++/*
++ * Debug level, exported for io_apic.c
++ */
++unsigned int apic_verbosity;
++
++int pic_mode;
++
++/* Have we found an MP table */
++int smp_found_config;
++
++static struct resource lapic_resource = {
++	.name = "Local APIC",
++	.flags = IORESOURCE_MEM | IORESOURCE_BUSY,
++};
++
++static unsigned int calibration_result;
++
++static int lapic_next_event(unsigned long delta,
++			    struct clock_event_device *evt);
++static void lapic_timer_setup(enum clock_event_mode mode,
++			      struct clock_event_device *evt);
++static void lapic_timer_broadcast(const struct cpumask *mask);
++static void apic_pm_activate(void);
++
++/*
++ * The local apic timer can be used for any function which is CPU local.
++ */
++static struct clock_event_device lapic_clockevent = {
++	.name		= "lapic",
++	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
++			| CLOCK_EVT_FEAT_C3STOP | CLOCK_EVT_FEAT_DUMMY,
++	.shift		= 32,
++	.set_mode	= lapic_timer_setup,
++	.set_next_event	= lapic_next_event,
++	.broadcast	= lapic_timer_broadcast,
++	.rating		= 100,
++	.irq		= -1,
++};
++static DEFINE_PER_CPU(struct clock_event_device, lapic_events);
++
++static unsigned long apic_phys;
++
++/*
++ * Get the LAPIC version
++ */
++static inline int lapic_get_version(void)
++{
++	return GET_APIC_VERSION(apic_read(APIC_LVR));
++}
++
++/*
++ * Check, if the APIC is integrated or a separate chip
++ */
++static inline int lapic_is_integrated(void)
++{
++#ifdef CONFIG_X86_64
++	return 1;
++#else
++	return APIC_INTEGRATED(lapic_get_version());
++#endif
++}
++
++/*
++ * Check, whether this is a modern or a first generation APIC
++ */
++static int modern_apic(void)
++{
++	/* AMD systems use old APIC versions, so check the CPU */
++	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
++	    boot_cpu_data.x86 >= 0xf)
++		return 1;
++	return lapic_get_version() >= 0x14;
++}
++
++void native_apic_wait_icr_idle(void)
++{
++	while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
++		cpu_relax();
++}
++
++u32 native_safe_apic_wait_icr_idle(void)
++{
++	u32 send_status;
++	int timeout;
++
++	timeout = 0;
++	do {
++		send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY;
++		if (!send_status)
++			break;
++		udelay(100);
++	} while (timeout++ < 1000);
++
++	return send_status;
++}
++
++void native_apic_icr_write(u32 low, u32 id)
++{
++	apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
++	apic_write(APIC_ICR, low);
++}
++
++u64 native_apic_icr_read(void)
++{
++	u32 icr1, icr2;
++
++	icr2 = apic_read(APIC_ICR2);
++	icr1 = apic_read(APIC_ICR);
++
++	return icr1 | ((u64)icr2 << 32);
++}
++
++/**
++ * enable_NMI_through_LVT0 - enable NMI through local vector table 0
++ */
++void __cpuinit enable_NMI_through_LVT0(void)
++{
++	unsigned int v;
++
++	/* unmask and set to NMI */
++	v = APIC_DM_NMI;
++
++	/* Level triggered for 82489DX (32bit mode) */
++	if (!lapic_is_integrated())
++		v |= APIC_LVT_LEVEL_TRIGGER;
++
++	apic_write(APIC_LVT0, v);
++}
++
++#ifdef CONFIG_X86_32
++/**
++ * get_physical_broadcast - Get number of physical broadcast IDs
++ */
++int get_physical_broadcast(void)
++{
++	return modern_apic() ? 0xff : 0xf;
++}
++#endif
++
++/**
++ * lapic_get_maxlvt - get the maximum number of local vector table entries
++ */
++int lapic_get_maxlvt(void)
++{
++	unsigned int v;
++
++	v = apic_read(APIC_LVR);
++	/*
++	 * - we always have APIC integrated on 64bit mode
++	 * - 82489DXs do not report # of LVT entries
++	 */
++	return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
++}
++
++/*
++ * Local APIC timer
++ */
++
++/* Clock divisor */
++#define APIC_DIVISOR 16
++
++/*
++ * This function sets up the local APIC timer, with a timeout of
++ * 'clocks' APIC bus clock. During calibration we actually call
++ * this function twice on the boot CPU, once with a bogus timeout
++ * value, second time for real. The other (noncalibrating) CPUs
++ * call this function only once, with the real, calibrated value.
++ *
++ * We do reads before writes even if unnecessary, to get around the
++ * P5 APIC double write bug.
++ */
++static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
++{
++	unsigned int lvtt_value, tmp_value;
++
++	lvtt_value = LOCAL_TIMER_VECTOR;
++	if (!oneshot)
++		lvtt_value |= APIC_LVT_TIMER_PERIODIC;
++	if (!lapic_is_integrated())
++		lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
++
++	if (!irqen)
++		lvtt_value |= APIC_LVT_MASKED;
++
++	apic_write(APIC_LVTT, lvtt_value);
++
++	/*
++	 * Divide PICLK by 16
++	 */
++	tmp_value = apic_read(APIC_TDCR);
++	apic_write(APIC_TDCR,
++		(tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
++		APIC_TDR_DIV_16);
++
++	if (!oneshot)
++		apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
++}
++
++/*
++ * Setup extended LVT, AMD specific (K8, family 10h)
++ *
++ * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
++ * MCE interrupts are supported. Thus MCE offset must be set to 0.
++ *
++ * If mask=1, the LVT entry does not generate interrupts while mask=0
++ * enables the vector. See also the BKDGs.
++ */
++
++#define APIC_EILVT_LVTOFF_MCE 0
++#define APIC_EILVT_LVTOFF_IBS 1
++
++static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
++{
++	unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
++	unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
++
++	apic_write(reg, v);
++}
++
++u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
++{
++	setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
++	return APIC_EILVT_LVTOFF_MCE;
++}
++
++u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
++{
++	setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
++	return APIC_EILVT_LVTOFF_IBS;
++}
++EXPORT_SYMBOL_GPL(setup_APIC_eilvt_ibs);
++
++/*
++ * Program the next event, relative to now
++ */
++static int lapic_next_event(unsigned long delta,
++			    struct clock_event_device *evt)
++{
++	apic_write(APIC_TMICT, delta);
++	return 0;
++}
++
++/*
++ * Setup the lapic timer in periodic or oneshot mode
++ */
++static void lapic_timer_setup(enum clock_event_mode mode,
++			      struct clock_event_device *evt)
++{
++	unsigned long flags;
++	unsigned int v;
++
++	/* Lapic used as dummy for broadcast ? */
++	if (evt->features & CLOCK_EVT_FEAT_DUMMY)
++		return;
++
++	local_irq_save(flags);
++
++	switch (mode) {
++	case CLOCK_EVT_MODE_PERIODIC:
++	case CLOCK_EVT_MODE_ONESHOT:
++		__setup_APIC_LVTT(calibration_result,
++				  mode != CLOCK_EVT_MODE_PERIODIC, 1);
++		break;
++	case CLOCK_EVT_MODE_UNUSED:
++	case CLOCK_EVT_MODE_SHUTDOWN:
++		v = apic_read(APIC_LVTT);
++		v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
++		apic_write(APIC_LVTT, v);
++		apic_write(APIC_TMICT, 0xffffffff);
++		break;
++	case CLOCK_EVT_MODE_RESUME:
++		/* Nothing to do here */
++		break;
++	}
++
++	local_irq_restore(flags);
++}
++
++/*
++ * Local APIC timer broadcast function
++ */
++static void lapic_timer_broadcast(const struct cpumask *mask)
++{
++#ifdef CONFIG_SMP
++	apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
++#endif
++}
++
++/*
++ * Setup the local APIC timer for this CPU. Copy the initilized values
++ * of the boot CPU and register the clock event in the framework.
++ */
++static void __cpuinit setup_APIC_timer(void)
++{
++	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
++
++	memcpy(levt, &lapic_clockevent, sizeof(*levt));
++	levt->cpumask = cpumask_of(smp_processor_id());
++
++	clockevents_register_device(levt);
++}
++
++/*
++ * In this functions we calibrate APIC bus clocks to the external timer.
++ *
++ * We want to do the calibration only once since we want to have local timer
++ * irqs syncron. CPUs connected by the same APIC bus have the very same bus
++ * frequency.
++ *
++ * This was previously done by reading the PIT/HPET and waiting for a wrap
++ * around to find out, that a tick has elapsed. I have a box, where the PIT
++ * readout is broken, so it never gets out of the wait loop again. This was
++ * also reported by others.
++ *
++ * Monitoring the jiffies value is inaccurate and the clockevents
++ * infrastructure allows us to do a simple substitution of the interrupt
++ * handler.
++ *
++ * The calibration routine also uses the pm_timer when possible, as the PIT
++ * happens to run way too slow (factor 2.3 on my VAIO CoreDuo, which goes
++ * back to normal later in the boot process).
++ */
++
++#define LAPIC_CAL_LOOPS		(HZ/10)
++
++static __initdata int lapic_cal_loops = -1;
++static __initdata long lapic_cal_t1, lapic_cal_t2;
++static __initdata unsigned long long lapic_cal_tsc1, lapic_cal_tsc2;
++static __initdata unsigned long lapic_cal_pm1, lapic_cal_pm2;
++static __initdata unsigned long lapic_cal_j1, lapic_cal_j2;
++
++/*
++ * Temporary interrupt handler.
++ */
++static void __init lapic_cal_handler(struct clock_event_device *dev)
++{
++	unsigned long long tsc = 0;
++	long tapic = apic_read(APIC_TMCCT);
++	unsigned long pm = acpi_pm_read_early();
++
++	if (cpu_has_tsc)
++		rdtscll(tsc);
++
++	switch (lapic_cal_loops++) {
++	case 0:
++		lapic_cal_t1 = tapic;
++		lapic_cal_tsc1 = tsc;
++		lapic_cal_pm1 = pm;
++		lapic_cal_j1 = jiffies;
++		break;
++
++	case LAPIC_CAL_LOOPS:
++		lapic_cal_t2 = tapic;
++		lapic_cal_tsc2 = tsc;
++		if (pm < lapic_cal_pm1)
++			pm += ACPI_PM_OVRRUN;
++		lapic_cal_pm2 = pm;
++		lapic_cal_j2 = jiffies;
++		break;
++	}
++}
++
++static int __init
++calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
++{
++	const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
++	const long pm_thresh = pm_100ms / 100;
++	unsigned long mult;
++	u64 res;
++
++#ifndef CONFIG_X86_PM_TIMER
++	return -1;
++#endif
++
++	apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
++
++	/* Check, if the PM timer is available */
++	if (!deltapm)
++		return -1;
++
++	mult = clocksource_hz2mult(PMTMR_TICKS_PER_SEC, 22);
++
++	if (deltapm > (pm_100ms - pm_thresh) &&
++	    deltapm < (pm_100ms + pm_thresh)) {
++		apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
++		return 0;
++	}
++
++	res = (((u64)deltapm) *  mult) >> 22;
++	do_div(res, 1000000);
++	pr_warning("APIC calibration not consistent "
++		   "with PM-Timer: %ldms instead of 100ms\n",(long)res);
++
++	/* Correct the lapic counter value */
++	res = (((u64)(*delta)) * pm_100ms);
++	do_div(res, deltapm);
++	pr_info("APIC delta adjusted to PM-Timer: "
++		"%lu (%ld)\n", (unsigned long)res, *delta);
++	*delta = (long)res;
++
++	/* Correct the tsc counter value */
++	if (cpu_has_tsc) {
++		res = (((u64)(*deltatsc)) * pm_100ms);
++		do_div(res, deltapm);
++		apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
++					  "PM-Timer: %lu (%ld) \n",
++					(unsigned long)res, *deltatsc);
++		*deltatsc = (long)res;
++	}
++
++	return 0;
++}
++
++static int __init calibrate_APIC_clock(void)
++{
++	struct clock_event_device *levt = &__get_cpu_var(lapic_events);
++	void (*real_handler)(struct clock_event_device *dev);
++	unsigned long deltaj;
++	long delta, deltatsc;
++	int pm_referenced = 0;
++
++	local_irq_disable();
++
++	/* Replace the global interrupt handler */
++	real_handler = global_clock_event->event_handler;
++	global_clock_event->event_handler = lapic_cal_handler;
++
++	/*
++	 * Setup the APIC counter to maximum. There is no way the lapic
++	 * can underflow in the 100ms detection time frame
++	 */
++	__setup_APIC_LVTT(0xffffffff, 0, 0);
++
++	/* Let the interrupts run */
++	local_irq_enable();
++
++	while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
++		cpu_relax();
++
++	local_irq_disable();
++
++	/* Restore the real event handler */
++	global_clock_event->event_handler = real_handler;
++
++	/* Build delta t1-t2 as apic timer counts down */
++	delta = lapic_cal_t1 - lapic_cal_t2;
++	apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
++
++	deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
++
++	/* we trust the PM based calibration if possible */
++	pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
++					&delta, &deltatsc);
++
++	/* Calculate the scaled math multiplication factor */
++	lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
++				       lapic_clockevent.shift);
++	lapic_clockevent.max_delta_ns =
++		clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
++	lapic_clockevent.min_delta_ns =
++		clockevent_delta2ns(0xF, &lapic_clockevent);
++
++	calibration_result = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
++
++	apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
++	apic_printk(APIC_VERBOSE, "..... mult: %ld\n", lapic_clockevent.mult);
++	apic_printk(APIC_VERBOSE, "..... calibration result: %u\n",
++		    calibration_result);
++
++	if (cpu_has_tsc) {
++		apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
++			    "%ld.%04ld MHz.\n",
++			    (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
++			    (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
++	}
++
++	apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
++		    "%u.%04u MHz.\n",
++		    calibration_result / (1000000 / HZ),
++		    calibration_result % (1000000 / HZ));
++
++	/*
++	 * Do a sanity check on the APIC calibration result
++	 */
++	if (calibration_result < (1000000 / HZ)) {
++		local_irq_enable();
++		pr_warning("APIC frequency too slow, disabling apic timer\n");
++		return -1;
++	}
++
++	levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
++
++	/*
++	 * PM timer calibration failed or not turned on
++	 * so lets try APIC timer based calibration
++	 */
++	if (!pm_referenced) {
++		apic_printk(APIC_VERBOSE, "... verify APIC timer\n");
++
++		/*
++		 * Setup the apic timer manually
++		 */
++		levt->event_handler = lapic_cal_handler;
++		lapic_timer_setup(CLOCK_EVT_MODE_PERIODIC, levt);
++		lapic_cal_loops = -1;
++
++		/* Let the interrupts run */
++		local_irq_enable();
++
++		while (lapic_cal_loops <= LAPIC_CAL_LOOPS)
++			cpu_relax();
++
++		/* Stop the lapic timer */
++		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt);
++
++		/* Jiffies delta */
++		deltaj = lapic_cal_j2 - lapic_cal_j1;
++		apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj);
++
++		/* Check, if the jiffies result is consistent */
++		if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
++			apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
++		else
++			levt->features |= CLOCK_EVT_FEAT_DUMMY;
++	} else
++		local_irq_enable();
++
++	if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
++		pr_warning("APIC timer disabled due to verification failure\n");
++			return -1;
++	}
++
++	return 0;
++}
++
++/*
++ * Setup the boot APIC
++ *
++ * Calibrate and verify the result.
++ */
++void __init setup_boot_APIC_clock(void)
++{
++	/*
++	 * The local apic timer can be disabled via the kernel
++	 * commandline or from the CPU detection code. Register the lapic
++	 * timer as a dummy clock event source on SMP systems, so the
++	 * broadcast mechanism is used. On UP systems simply ignore it.
++	 */
++	if (disable_apic_timer) {
++		pr_info("Disabling APIC timer\n");
++		/* No broadcast on UP ! */
++		if (num_possible_cpus() > 1) {
++			lapic_clockevent.mult = 1;
++			setup_APIC_timer();
++		}
++		return;
++	}
++
++	apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
++		    "calibrating APIC timer ...\n");
++
++	if (calibrate_APIC_clock()) {
++		/* No broadcast on UP ! */
++		if (num_possible_cpus() > 1)
++			setup_APIC_timer();
++		return;
++	}
++
++	/*
++	 * If nmi_watchdog is set to IO_APIC, we need the
++	 * PIT/HPET going.  Otherwise register lapic as a dummy
++	 * device.
++	 */
++	if (nmi_watchdog != NMI_IO_APIC)
++		lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
++	else
++		pr_warning("APIC timer registered as dummy,"
++			" due to nmi_watchdog=%d!\n", nmi_watchdog);
++
++	/* Setup the lapic or request the broadcast */
++	setup_APIC_timer();
++}
++
++void __cpuinit setup_secondary_APIC_clock(void)
++{
++	setup_APIC_timer();
++}
++
++/*
++ * The guts of the apic timer interrupt
++ */
++static void local_apic_timer_interrupt(void)
++{
++	int cpu = smp_processor_id();
++	struct clock_event_device *evt = &per_cpu(lapic_events, cpu);
++
++	/*
++	 * Normally we should not be here till LAPIC has been initialized but
++	 * in some cases like kdump, its possible that there is a pending LAPIC
++	 * timer interrupt from previous kernel's context and is delivered in
++	 * new kernel the moment interrupts are enabled.
++	 *
++	 * Interrupts are enabled early and LAPIC is setup much later, hence
++	 * its possible that when we get here evt->event_handler is NULL.
++	 * Check for event_handler being NULL and discard the interrupt as
++	 * spurious.
++	 */
++	if (!evt->event_handler) {
++		pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu);
++		/* Switch it off */
++		lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt);
++		return;
++	}
++
++	/*
++	 * the NMI deadlock-detector uses this.
++	 */
++	inc_irq_stat(apic_timer_irqs);
++
++	evt->event_handler(evt);
++
++	perf_counter_unthrottle();
++}
++
++/*
++ * Local APIC timer interrupt. This is the most natural way for doing
++ * local interrupts, but local timer interrupts can be emulated by
++ * broadcast interrupts too. [in case the hw doesn't support APIC timers]
++ *
++ * [ if a single-CPU system runs an SMP kernel then we call the local
++ *   interrupt as well. Thus we cannot inline the local irq ... ]
++ */
++void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs)
++{
++	struct pt_regs *old_regs = set_irq_regs(regs);
++
++	/*
++	 * NOTE! We'd better ACK the irq immediately,
++	 * because timer handling can be slow.
++	 */
++	ack_APIC_irq();
++	/*
++	 * update_process_times() expects us to have done irq_enter().
++	 * Besides, if we don't timer interrupts ignore the global
++	 * interrupt lock, which is the WrongThing (tm) to do.
++	 */
++	exit_idle();
++	irq_enter();
++	local_apic_timer_interrupt();
++	irq_exit();
++
++	set_irq_regs(old_regs);
++}
++
++int setup_profiling_timer(unsigned int multiplier)
++{
++	return -EINVAL;
++}
++
++/*
++ * Local APIC start and shutdown
++ */
++
++/**
++ * clear_local_APIC - shutdown the local APIC
++ *
++ * This is called, when a CPU is disabled and before rebooting, so the state of
++ * the local APIC has no dangling leftovers. Also used to cleanout any BIOS
++ * leftovers during boot.
++ */
++void clear_local_APIC(void)
++{
++	int maxlvt;
++	u32 v;
++
++	/* APIC hasn't been mapped yet */
++	if (!x2apic && !apic_phys)
++		return;
++
++	maxlvt = lapic_get_maxlvt();
++	/*
++	 * Masking an LVT entry can trigger a local APIC error
++	 * if the vector is zero. Mask LVTERR first to prevent this.
++	 */
++	if (maxlvt >= 3) {
++		v = ERROR_APIC_VECTOR; /* any non-zero vector will do */
++		apic_write(APIC_LVTERR, v | APIC_LVT_MASKED);
++	}
++	/*
++	 * Careful: we have to set masks only first to deassert
++	 * any level-triggered sources.
++	 */
++	v = apic_read(APIC_LVTT);
++	apic_write(APIC_LVTT, v | APIC_LVT_MASKED);
++	v = apic_read(APIC_LVT0);
++	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
++	v = apic_read(APIC_LVT1);
++	apic_write(APIC_LVT1, v | APIC_LVT_MASKED);
++	if (maxlvt >= 4) {
++		v = apic_read(APIC_LVTPC);
++		apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
++	}
++
++	/* lets not touch this if we didn't frob it */
++#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
++	if (maxlvt >= 5) {
++		v = apic_read(APIC_LVTTHMR);
++		apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
++	}
++#endif
++#ifdef CONFIG_X86_MCE_INTEL
++	if (maxlvt >= 6) {
++		v = apic_read(APIC_LVTCMCI);
++		if (!(v & APIC_LVT_MASKED))
++			apic_write(APIC_LVTCMCI, v | APIC_LVT_MASKED);
++	}
++#endif
++
++	/*
++	 * Clean APIC state for other OSs:
++	 */
++	apic_write(APIC_LVTT, APIC_LVT_MASKED);
++	apic_write(APIC_LVT0, APIC_LVT_MASKED);
++	apic_write(APIC_LVT1, APIC_LVT_MASKED);
++	if (maxlvt >= 3)
++		apic_write(APIC_LVTERR, APIC_LVT_MASKED);
++	if (maxlvt >= 4)
++		apic_write(APIC_LVTPC, APIC_LVT_MASKED);
++
++	/* Integrated APIC (!82489DX) ? */
++	if (lapic_is_integrated()) {
++		if (maxlvt > 3)
++			/* Clear ESR due to Pentium errata 3AP and 11AP */
++			apic_write(APIC_ESR, 0);
++		apic_read(APIC_ESR);
++	}
++}
++
++/**
++ * disable_local_APIC - clear and disable the local APIC
++ */
++void disable_local_APIC(void)
++{
++	unsigned int value;
++
++	/* APIC hasn't been mapped yet */
++	if (!apic_phys)
++		return;
++
++	clear_local_APIC();
++
++	/*
++	 * Disable APIC (implies clearing of registers
++	 * for 82489DX!).
++	 */
++	value = apic_read(APIC_SPIV);
++	value &= ~APIC_SPIV_APIC_ENABLED;
++	apic_write(APIC_SPIV, value);
++
++#ifdef CONFIG_X86_32
++	/*
++	 * When LAPIC was disabled by the BIOS and enabled by the kernel,
++	 * restore the disabled state.
++	 */
++	if (enabled_via_apicbase) {
++		unsigned int l, h;
++
++		rdmsr(MSR_IA32_APICBASE, l, h);
++		l &= ~MSR_IA32_APICBASE_ENABLE;
++		wrmsr(MSR_IA32_APICBASE, l, h);
++	}
++#endif
++}
++
++/*
++ * If Linux enabled the LAPIC against the BIOS default disable it down before
++ * re-entering the BIOS on shutdown.  Otherwise the BIOS may get confused and
++ * not power-off.  Additionally clear all LVT entries before disable_local_APIC
++ * for the case where Linux didn't enable the LAPIC.
++ */
++void lapic_shutdown(void)
++{
++	unsigned long flags;
++
++	if (!cpu_has_apic)
++		return;
++
++	local_irq_save(flags);
++
++#ifdef CONFIG_X86_32
++	if (!enabled_via_apicbase)
++		clear_local_APIC();
++	else
++#endif
++		disable_local_APIC();
++
++
++	local_irq_restore(flags);
++}
++
++/*
++ * This is to verify that we're looking at a real local APIC.
++ * Check these against your board if the CPUs aren't getting
++ * started for no apparent reason.
++ */
++int __init verify_local_APIC(void)
++{
++	unsigned int reg0, reg1;
++
++	/*
++	 * The version register is read-only in a real APIC.
++	 */
++	reg0 = apic_read(APIC_LVR);
++	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
++	apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
++	reg1 = apic_read(APIC_LVR);
++	apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
++
++	/*
++	 * The two version reads above should print the same
++	 * numbers.  If the second one is different, then we
++	 * poke at a non-APIC.
++	 */
++	if (reg1 != reg0)
++		return 0;
++
++	/*
++	 * Check if the version looks reasonably.
++	 */
++	reg1 = GET_APIC_VERSION(reg0);
++	if (reg1 == 0x00 || reg1 == 0xff)
++		return 0;
++	reg1 = lapic_get_maxlvt();
++	if (reg1 < 0x02 || reg1 == 0xff)
++		return 0;
++
++	/*
++	 * The ID register is read/write in a real APIC.
++	 */
++	reg0 = apic_read(APIC_ID);
++	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
++	apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
++	reg1 = apic_read(APIC_ID);
++	apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
++	apic_write(APIC_ID, reg0);
++	if (reg1 != (reg0 ^ apic->apic_id_mask))
++		return 0;
++
++	/*
++	 * The next two are just to see if we have sane values.
++	 * They're only really relevant if we're in Virtual Wire
++	 * compatibility mode, but most boxes are anymore.
++	 */
++	reg0 = apic_read(APIC_LVT0);
++	apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
++	reg1 = apic_read(APIC_LVT1);
++	apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
++
++	return 1;
++}
++
++/**
++ * sync_Arb_IDs - synchronize APIC bus arbitration IDs
++ */
++void __init sync_Arb_IDs(void)
++{
++	/*
++	 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
++	 * needed on AMD.
++	 */
++	if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
++		return;
++
++	/*
++	 * Wait for idle.
++	 */
++	apic_wait_icr_idle();
++
++	apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
++	apic_write(APIC_ICR, APIC_DEST_ALLINC |
++			APIC_INT_LEVELTRIG | APIC_DM_INIT);
++}
++
++/*
++ * An initial setup of the virtual wire mode.
++ */
++void __init init_bsp_APIC(void)
++{
++	unsigned int value;
++
++	/*
++	 * Don't do the setup now if we have a SMP BIOS as the
++	 * through-I/O-APIC virtual wire mode might be active.
++	 */
++	if (smp_found_config || !cpu_has_apic)
++		return;
++
++	/*
++	 * Do not trust the local APIC being empty at bootup.
++	 */
++	clear_local_APIC();
++
++	/*
++	 * Enable APIC.
++	 */
++	value = apic_read(APIC_SPIV);
++	value &= ~APIC_VECTOR_MASK;
++	value |= APIC_SPIV_APIC_ENABLED;
++
++#ifdef CONFIG_X86_32
++	/* This bit is reserved on P4/Xeon and should be cleared */
++	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
++	    (boot_cpu_data.x86 == 15))
++		value &= ~APIC_SPIV_FOCUS_DISABLED;
++	else
++#endif
++		value |= APIC_SPIV_FOCUS_DISABLED;
++	value |= SPURIOUS_APIC_VECTOR;
++	apic_write(APIC_SPIV, value);
++
++	/*
++	 * Set up the virtual wire mode.
++	 */
++	apic_write(APIC_LVT0, APIC_DM_EXTINT);
++	value = APIC_DM_NMI;
++	if (!lapic_is_integrated())		/* 82489DX */
++		value |= APIC_LVT_LEVEL_TRIGGER;
++	apic_write(APIC_LVT1, value);
++}
++
++static void __cpuinit lapic_setup_esr(void)
++{
++	unsigned int oldvalue, value, maxlvt;
++
++	if (!lapic_is_integrated()) {
++		pr_info("No ESR for 82489DX.\n");
++		return;
++	}
++
++	if (apic->disable_esr) {
++		/*
++		 * Something untraceable is creating bad interrupts on
++		 * secondary quads ... for the moment, just leave the
++		 * ESR disabled - we can't do anything useful with the
++		 * errors anyway - mbligh
++		 */
++		pr_info("Leaving ESR disabled.\n");
++		return;
++	}
++
++	maxlvt = lapic_get_maxlvt();
++	if (maxlvt > 3)		/* Due to the Pentium erratum 3AP. */
++		apic_write(APIC_ESR, 0);
++	oldvalue = apic_read(APIC_ESR);
++
++	/* enables sending errors */
++	value = ERROR_APIC_VECTOR;
++	apic_write(APIC_LVTERR, value);
++
++	/*
++	 * spec says clear errors after enabling vector.
++	 */
++	if (maxlvt > 3)
++		apic_write(APIC_ESR, 0);
++	value = apic_read(APIC_ESR);
++	if (value != oldvalue)
++		apic_printk(APIC_VERBOSE, "ESR value before enabling "
++			"vector: 0x%08x  after: 0x%08x\n",
++			oldvalue, value);
++}
++
++
++/**
++ * setup_local_APIC - setup the local APIC
++ */
++void __cpuinit setup_local_APIC(void)
++{
++	unsigned int value;
++	int i, j;
++
++	if (disable_apic) {
++		arch_disable_smp_support();
++		return;
++	}
++
++#ifdef CONFIG_X86_32
++	/* Pound the ESR really hard over the head with a big hammer - mbligh */
++	if (lapic_is_integrated() && apic->disable_esr) {
++		apic_write(APIC_ESR, 0);
++		apic_write(APIC_ESR, 0);
++		apic_write(APIC_ESR, 0);
++		apic_write(APIC_ESR, 0);
++	}
++#endif
++	perf_counters_lapic_init(0);
++
++	preempt_disable();
++
++	/*
++	 * Double-check whether this APIC is really registered.
++	 * This is meaningless in clustered apic mode, so we skip it.
++	 */
++	if (!apic->apic_id_registered())
++		BUG();
++
++	/*
++	 * Intel recommends to set DFR, LDR and TPR before enabling
++	 * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
++	 * document number 292116).  So here it goes...
++	 */
++	apic->init_apic_ldr();
++
++	/*
++	 * Set Task Priority to 'accept all'. We never change this
++	 * later on.
++	 */
++	value = apic_read(APIC_TASKPRI);
++	value &= ~APIC_TPRI_MASK;
++	apic_write(APIC_TASKPRI, value);
++
++	/*
++	 * After a crash, we no longer service the interrupts and a pending
++	 * interrupt from previous kernel might still have ISR bit set.
++	 *
++	 * Most probably by now CPU has serviced that pending interrupt and
++	 * it might not have done the ack_APIC_irq() because it thought,
++	 * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it
++	 * does not clear the ISR bit and cpu thinks it has already serivced
++	 * the interrupt. Hence a vector might get locked. It was noticed
++	 * for timer irq (vector 0x31). Issue an extra EOI to clear ISR.
++	 */
++	for (i = APIC_ISR_NR - 1; i >= 0; i--) {
++		value = apic_read(APIC_ISR + i*0x10);
++		for (j = 31; j >= 0; j--) {
++			if (value & (1<<j))
++				ack_APIC_irq();
++		}
++	}
++
++	/*
++	 * Now that we are all set up, enable the APIC
++	 */
++	value = apic_read(APIC_SPIV);
++	value &= ~APIC_VECTOR_MASK;
++	/*
++	 * Enable APIC
++	 */
++	value |= APIC_SPIV_APIC_ENABLED;
++
++#ifdef CONFIG_X86_32
++	/*
++	 * Some unknown Intel IO/APIC (or APIC) errata is biting us with
++	 * certain networking cards. If high frequency interrupts are
++	 * happening on a particular IOAPIC pin, plus the IOAPIC routing
++	 * entry is masked/unmasked at a high rate as well then sooner or
++	 * later IOAPIC line gets 'stuck', no more interrupts are received
++	 * from the device. If focus CPU is disabled then the hang goes
++	 * away, oh well :-(
++	 *
++	 * [ This bug can be reproduced easily with a level-triggered
++	 *   PCI Ne2000 networking cards and PII/PIII processors, dual
++	 *   BX chipset. ]
++	 */
++	/*
++	 * Actually disabling the focus CPU check just makes the hang less
++	 * frequent as it makes the interrupt distributon model be more
++	 * like LRU than MRU (the short-term load is more even across CPUs).
++	 * See also the comment in end_level_ioapic_irq().  --macro
++	 */
++
++	/*
++	 * - enable focus processor (bit==0)
++	 * - 64bit mode always use processor focus
++	 *   so no need to set it
++	 */
++	value &= ~APIC_SPIV_FOCUS_DISABLED;
++#endif
++
++	/*
++	 * Set spurious IRQ vector
++	 */
++	value |= SPURIOUS_APIC_VECTOR;
++	apic_write(APIC_SPIV, value);
++
++	/*
++	 * Set up LVT0, LVT1:
++	 *
++	 * set up through-local-APIC on the BP's LINT0. This is not
++	 * strictly necessary in pure symmetric-IO mode, but sometimes
++	 * we delegate interrupts to the 8259A.
++	 */
++	/*
++	 * TODO: set up through-local-APIC from through-I/O-APIC? --macro
++	 */
++	value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
++	if (!smp_processor_id() && (pic_mode || !value)) {
++		value = APIC_DM_EXTINT;
++		apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n",
++				smp_processor_id());
++	} else {
++		value = APIC_DM_EXTINT | APIC_LVT_MASKED;
++		apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n",
++				smp_processor_id());
++	}
++	apic_write(APIC_LVT0, value);
++
++	/*
++	 * only the BP should see the LINT1 NMI signal, obviously.
++	 */
++	if (!smp_processor_id())
++		value = APIC_DM_NMI;
++	else
++		value = APIC_DM_NMI | APIC_LVT_MASKED;
++	if (!lapic_is_integrated())		/* 82489DX */
++		value |= APIC_LVT_LEVEL_TRIGGER;
++	apic_write(APIC_LVT1, value);
++
++	preempt_enable();
++
++#ifdef CONFIG_X86_MCE_INTEL
++	/* Recheck CMCI information after local APIC is up on CPU #0 */
++	if (smp_processor_id() == 0)
++		cmci_recheck();
++#endif
++}
++
++void __cpuinit end_local_APIC_setup(void)
++{
++	lapic_setup_esr();
++
++#ifdef CONFIG_X86_32
++	{
++		unsigned int value;
++		/* Disable the local apic timer */
++		value = apic_read(APIC_LVTT);
++		value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
++		apic_write(APIC_LVTT, value);
++	}
++#endif
++
++	setup_apic_nmi_watchdog(NULL);
++	apic_pm_activate();
++}
++
++#ifdef CONFIG_X86_X2APIC
++void check_x2apic(void)
++{
++	if (x2apic_enabled()) {
++		pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
++		x2apic_preenabled = x2apic = 1;
++	}
++}
++
++void enable_x2apic(void)
++{
++	int msr, msr2;
++
++	if (!x2apic)
++		return;
++
++	rdmsr(MSR_IA32_APICBASE, msr, msr2);
++	if (!(msr & X2APIC_ENABLE)) {
++		pr_info("Enabling x2apic\n");
++		wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
++	}
++}
++
++void __init enable_IR_x2apic(void)
++{
++#ifdef CONFIG_INTR_REMAP
++	int ret;
++	unsigned long flags;
++
++	if (!cpu_has_x2apic)
++		return;
++
++	if (!x2apic_preenabled && disable_x2apic) {
++		pr_info("Skipped enabling x2apic and Interrupt-remapping "
++			"because of nox2apic\n");
++		return;
++	}
++
++	if (x2apic_preenabled && disable_x2apic)
++		panic("Bios already enabled x2apic, can't enforce nox2apic");
++
++	if (!x2apic_preenabled && skip_ioapic_setup) {
++		pr_info("Skipped enabling x2apic and Interrupt-remapping "
++			"because of skipping io-apic setup\n");
++		return;
++	}
++
++	ret = dmar_table_init();
++	if (ret) {
++		pr_info("dmar_table_init() failed with %d:\n", ret);
++
++		if (x2apic_preenabled)
++			panic("x2apic enabled by bios. But IR enabling failed");
++		else
++			pr_info("Not enabling x2apic,Intr-remapping\n");
++		return;
++	}
++
++	ret = save_IO_APIC_setup();
++	if (ret) {
++		pr_info("Saving IO-APIC state failed: %d\n", ret);
++		goto end;
++	}
++
++	local_irq_save(flags);
++	mask_IO_APIC_setup();
++	mask_8259A();
++
++	ret = enable_intr_remapping(1);
++
++	if (ret && x2apic_preenabled) {
++		local_irq_restore(flags);
++		panic("x2apic enabled by bios. But IR enabling failed");
++	}
++
++	if (ret)
++		goto end_restore;
++
++	if (!x2apic) {
++		x2apic = 1;
++		enable_x2apic();
++	}
++
++end_restore:
++	if (ret)
++		/*
++		 * IR enabling failed
++		 */
++		restore_IO_APIC_setup();
++	else
++		reinit_intr_remapped_IO_APIC(x2apic_preenabled);
++
++	unmask_8259A();
++	local_irq_restore(flags);
++
++end:
++	if (!ret) {
++		if (!x2apic_preenabled)
++			pr_info("Enabled x2apic and interrupt-remapping\n");
++		else
++			pr_info("Enabled Interrupt-remapping\n");
++	} else
++		pr_err("Failed to enable Interrupt-remapping and x2apic\n");
++#else
++	if (!cpu_has_x2apic)
++		return;
++
++	if (x2apic_preenabled)
++		panic("x2apic enabled prior OS handover,"
++		      " enable CONFIG_INTR_REMAP");
++
++	pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping "
++		" and x2apic\n");
++#endif
++
++	return;
++}
++#endif /* CONFIG_X86_X2APIC */
++
++#ifdef CONFIG_X86_64
++/*
++ * Detect and enable local APICs on non-SMP boards.
++ * Original code written by Keir Fraser.
++ * On AMD64 we trust the BIOS - if it says no APIC it is likely
++ * not correctly set up (usually the APIC timer won't work etc.)
++ */
++static int __init detect_init_APIC(void)
++{
++	if (!cpu_has_apic) {
++		pr_info("No local APIC present\n");
++		return -1;
++	}
++
++	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
++	boot_cpu_physical_apicid = 0;
++	return 0;
++}
++#else
++/*
++ * Detect and initialize APIC
++ */
++static int __init detect_init_APIC(void)
++{
++	u32 h, l, features;
++
++	/* Disabled by kernel option? */
++	if (disable_apic)
++		return -1;
++
++	switch (boot_cpu_data.x86_vendor) {
++	case X86_VENDOR_AMD:
++		if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) ||
++		    (boot_cpu_data.x86 >= 15))
++			break;
++		goto no_apic;
++	case X86_VENDOR_INTEL:
++		if (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15 ||
++		    (boot_cpu_data.x86 == 5 && cpu_has_apic))
++			break;
++		goto no_apic;
++	default:
++		goto no_apic;
++	}
++
++	if (!cpu_has_apic) {
++		/*
++		 * Over-ride BIOS and try to enable the local APIC only if
++		 * "lapic" specified.
++		 */
++		if (!force_enable_local_apic) {
++			pr_info("Local APIC disabled by BIOS -- "
++				"you can enable it with \"lapic\"\n");
++			return -1;
++		}
++		/*
++		 * Some BIOSes disable the local APIC in the APIC_BASE
++		 * MSR. This can only be done in software for Intel P6 or later
++		 * and AMD K7 (Model > 1) or later.
++		 */
++		rdmsr(MSR_IA32_APICBASE, l, h);
++		if (!(l & MSR_IA32_APICBASE_ENABLE)) {
++			pr_info("Local APIC disabled by BIOS -- reenabling.\n");
++			l &= ~MSR_IA32_APICBASE_BASE;
++			l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE;
++			wrmsr(MSR_IA32_APICBASE, l, h);
++			enabled_via_apicbase = 1;
++		}
++	}
++	/*
++	 * The APIC feature bit should now be enabled
++	 * in `cpuid'
++	 */
++	features = cpuid_edx(1);
++	if (!(features & (1 << X86_FEATURE_APIC))) {
++		pr_warning("Could not enable APIC!\n");
++		return -1;
++	}
++	set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
++	mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
++
++	/* The BIOS may have set up the APIC at some other address */
++	rdmsr(MSR_IA32_APICBASE, l, h);
++	if (l & MSR_IA32_APICBASE_ENABLE)
++		mp_lapic_addr = l & MSR_IA32_APICBASE_BASE;
++
++	pr_info("Found and enabled local APIC!\n");
++
++	apic_pm_activate();
++
++	return 0;
++
++no_apic:
++	pr_info("No local APIC present or hardware disabled\n");
++	return -1;
++}
++#endif
++
++#ifdef CONFIG_X86_64
++void __init early_init_lapic_mapping(void)
++{
++	unsigned long phys_addr;
++
++	/*
++	 * If no local APIC can be found then go out
++	 * : it means there is no mpatable and MADT
++	 */
++	if (!smp_found_config)
++		return;
++
++	phys_addr = mp_lapic_addr;
++
++	set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
++	apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
++		    APIC_BASE, phys_addr);
++
++	/*
++	 * Fetch the APIC ID of the BSP in case we have a
++	 * default configuration (or the MP table is broken).
++	 */
++	boot_cpu_physical_apicid = read_apic_id();
++}
++#endif
++
++/**
++ * init_apic_mappings - initialize APIC mappings
++ */
++void __init init_apic_mappings(void)
++{
++	if (x2apic) {
++		boot_cpu_physical_apicid = read_apic_id();
++		return;
++	}
++
++	/*
++	 * If no local APIC can be found then set up a fake all
++	 * zeroes page to simulate the local APIC and another
++	 * one for the IO-APIC.
++	 */
++	if (!smp_found_config && detect_init_APIC()) {
++		apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE);
++		apic_phys = __pa(apic_phys);
++	} else
++		apic_phys = mp_lapic_addr;
++
++	set_fixmap_nocache(FIX_APIC_BASE, apic_phys);
++	apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n",
++				APIC_BASE, apic_phys);
++
++	/*
++	 * Fetch the APIC ID of the BSP in case we have a
++	 * default configuration (or the MP table is broken).
++	 */
++	if (boot_cpu_physical_apicid == -1U)
++		boot_cpu_physical_apicid = read_apic_id();
++}
++
++/*
++ * This initializes the IO-APIC and APIC hardware if this is
++ * a UP kernel.
++ */
++int apic_version[MAX_APICS];
++
++int __init APIC_init_uniprocessor(void)
++{
++	if (disable_apic) {
++		pr_info("Apic disabled\n");
++		return -1;
++	}
++#ifdef CONFIG_X86_64
++	if (!cpu_has_apic) {
++		disable_apic = 1;
++		pr_info("Apic disabled by BIOS\n");
++		return -1;
++	}
++#else
++	if (!smp_found_config && !cpu_has_apic)
++		return -1;
++
++	/*
++	 * Complain if the BIOS pretends there is one.
++	 */
++	if (!cpu_has_apic &&
++	    APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
++		pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
++			boot_cpu_physical_apicid);
++		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
++		return -1;
++	}
++#endif
++
++	enable_IR_x2apic();
++#ifdef CONFIG_X86_64
++	default_setup_apic_routing();
++#endif
++
++	verify_local_APIC();
++	connect_bsp_APIC();
++
++#ifdef CONFIG_X86_64
++	apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
++#else
++	/*
++	 * Hack: In case of kdump, after a crash, kernel might be booting
++	 * on a cpu with non-zero lapic id. But boot_cpu_physical_apicid
++	 * might be zero if read from MP tables. Get it from LAPIC.
++	 */
++# ifdef CONFIG_CRASH_DUMP
++	boot_cpu_physical_apicid = read_apic_id();
++# endif
++#endif
++	physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
++	setup_local_APIC();
++
++#ifdef CONFIG_X86_IO_APIC
++	/*
++	 * Now enable IO-APICs, actually call clear_IO_APIC
++	 * We need clear_IO_APIC before enabling error vector
++	 */
++	if (!skip_ioapic_setup && nr_ioapics)
++		enable_IO_APIC();
++#endif
++
++	end_local_APIC_setup();
++
++#ifdef CONFIG_X86_IO_APIC
++	if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
++		setup_IO_APIC();
++	else {
++		nr_ioapics = 0;
++		localise_nmi_watchdog();
++	}
++#else
++	localise_nmi_watchdog();
++#endif
++
++	setup_boot_clock();
++#ifdef CONFIG_X86_64
++	check_nmi_watchdog();
++#endif
++
++	return 0;
++}
++
++/*
++ * Local APIC interrupts
++ */
++
++/*
++ * This interrupt should _never_ happen with our APIC/SMP architecture
++ */
++void smp_spurious_interrupt(struct pt_regs *regs)
++{
++	u32 v;
++
++	exit_idle();
++	irq_enter();
++	/*
++	 * Check if this really is a spurious interrupt and ACK it
++	 * if it is a vectored one.  Just in case...
++	 * Spurious interrupts should not be ACKed.
++	 */
++	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
++	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
++		ack_APIC_irq();
++
++	inc_irq_stat(irq_spurious_count);
++
++	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
++	pr_info("spurious APIC interrupt on CPU#%d, "
++		"should never happen.\n", smp_processor_id());
++	irq_exit();
++}
++
++/*
++ * This interrupt should never happen with our APIC/SMP architecture
++ */
++void smp_error_interrupt(struct pt_regs *regs)
++{
++	u32 v, v1;
++
++	exit_idle();
++	irq_enter();
++	/* First tickle the hardware, only then report what went on. -- REW */
++	v = apic_read(APIC_ESR);
++	apic_write(APIC_ESR, 0);
++	v1 = apic_read(APIC_ESR);
++	ack_APIC_irq();
++	atomic_inc(&irq_err_count);
++
++	/*
++	 * Here is what the APIC error bits mean:
++	 * 0: Send CS error
++	 * 1: Receive CS error
++	 * 2: Send accept error
++	 * 3: Receive accept error
++	 * 4: Reserved
++	 * 5: Send illegal vector
++	 * 6: Received illegal vector
++	 * 7: Illegal register address
++	 */
++	pr_debug("APIC error on CPU%d: %02x(%02x)\n",
++		smp_processor_id(), v , v1);
++	irq_exit();
++}
++
++/**
++ * connect_bsp_APIC - attach the APIC to the interrupt system
++ */
++void __init connect_bsp_APIC(void)
++{
++#ifdef CONFIG_X86_32
++	if (pic_mode) {
++		/*
++		 * Do not trust the local APIC being empty at bootup.
++		 */
++		clear_local_APIC();
++		/*
++		 * PIC mode, enable APIC mode in the IMCR, i.e.  connect BSP's
++		 * local APIC to INT and NMI lines.
++		 */
++		apic_printk(APIC_VERBOSE, "leaving PIC mode, "
++				"enabling APIC mode.\n");
++		outb(0x70, 0x22);
++		outb(0x01, 0x23);
++	}
++#endif
++	if (apic->enable_apic_mode)
++		apic->enable_apic_mode();
++}
++
++/**
++ * disconnect_bsp_APIC - detach the APIC from the interrupt system
++ * @virt_wire_setup:	indicates, whether virtual wire mode is selected
++ *
++ * Virtual wire mode is necessary to deliver legacy interrupts even when the
++ * APIC is disabled.
++ */
++void disconnect_bsp_APIC(int virt_wire_setup)
++{
++	unsigned int value;
++
++#ifdef CONFIG_X86_32
++	if (pic_mode) {
++		/*
++		 * Put the board back into PIC mode (has an effect only on
++		 * certain older boards).  Note that APIC interrupts, including
++		 * IPIs, won't work beyond this point!  The only exception are
++		 * INIT IPIs.
++		 */
++		apic_printk(APIC_VERBOSE, "disabling APIC mode, "
++				"entering PIC mode.\n");
++		outb(0x70, 0x22);
++		outb(0x00, 0x23);
++		return;
++	}
++#endif
++
++	/* Go back to Virtual Wire compatibility mode */
++
++	/* For the spurious interrupt use vector F, and enable it */
++	value = apic_read(APIC_SPIV);
++	value &= ~APIC_VECTOR_MASK;
++	value |= APIC_SPIV_APIC_ENABLED;
++	value |= 0xf;
++	apic_write(APIC_SPIV, value);
++
++	if (!virt_wire_setup) {
++		/*
++		 * For LVT0 make it edge triggered, active high,
++		 * external and enabled
++		 */
++		value = apic_read(APIC_LVT0);
++		value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
++			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
++			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
++		value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
++		value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
++		apic_write(APIC_LVT0, value);
++	} else {
++		/* Disable LVT0 */
++		apic_write(APIC_LVT0, APIC_LVT_MASKED);
++	}
++
++	/*
++	 * For LVT1 make it edge triggered, active high,
++	 * nmi and enabled
++	 */
++	value = apic_read(APIC_LVT1);
++	value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
++			APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
++			APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
++	value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
++	value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
++	apic_write(APIC_LVT1, value);
++}
++
++void __cpuinit generic_processor_info(int apicid, int version)
++{
++	int cpu;
++
++	/*
++	 * Validate version
++	 */
++	if (version == 0x0) {
++		pr_warning("BIOS bug, APIC version is 0 for CPU#%d! "
++			   "fixing up to 0x10. (tell your hw vendor)\n",
++				version);
++		version = 0x10;
++	}
++	apic_version[apicid] = version;
++
++	if (num_processors >= nr_cpu_ids) {
++		int max = nr_cpu_ids;
++		int thiscpu = max + disabled_cpus;
++
++		pr_warning(
++			"ACPI: NR_CPUS/possible_cpus limit of %i reached."
++			"  Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
++
++		disabled_cpus++;
++		return;
++	}
++
++	num_processors++;
++	cpu = cpumask_next_zero(-1, cpu_present_mask);
++
++	if (version != apic_version[boot_cpu_physical_apicid])
++		WARN_ONCE(1,
++			"ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
++			apic_version[boot_cpu_physical_apicid], cpu, version);
++
++	physid_set(apicid, phys_cpu_present_map);
++	if (apicid == boot_cpu_physical_apicid) {
++		/*
++		 * x86_bios_cpu_apicid is required to have processors listed
++		 * in same order as logical cpu numbers. Hence the first
++		 * entry is BSP, and so on.
++		 */
++		cpu = 0;
++	}
++	if (apicid > max_physical_apicid)
++		max_physical_apicid = apicid;
++
++#ifdef CONFIG_X86_32
++	/*
++	 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
++	 * but we need to work other dependencies like SMP_SUSPEND etc
++	 * before this can be done without some confusion.
++	 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
++	 *       - Ashok Raj <ashok.raj@intel.com>
++	 */
++	if (max_physical_apicid >= 8) {
++		switch (boot_cpu_data.x86_vendor) {
++		case X86_VENDOR_INTEL:
++			if (!APIC_XAPIC(version)) {
++				def_to_bigsmp = 0;
++				break;
++			}
++			/* If P4 and above fall through */
++		case X86_VENDOR_AMD:
++			def_to_bigsmp = 1;
++		}
++	}
++#endif
++
++#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
++	early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
++	early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
++#endif
++
++	set_cpu_possible(cpu, true);
++	set_cpu_present(cpu, true);
++}
++
++int hard_smp_processor_id(void)
++{
++	return read_apic_id();
++}
++
++void default_init_apic_ldr(void)
++{
++	unsigned long val;
++
++	apic_write(APIC_DFR, APIC_DFR_VALUE);
++	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
++	val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
++	apic_write(APIC_LDR, val);
++}
++
++#ifdef CONFIG_X86_32
++int default_apicid_to_node(int logical_apicid)
++{
++#ifdef CONFIG_SMP
++	return apicid_2_node[hard_smp_processor_id()];
++#else
++	return 0;
++#endif
++}
++#endif
++
++/*
++ * Power management
++ */
++#ifdef CONFIG_PM
++
++static struct {
++	/*
++	 * 'active' is true if the local APIC was enabled by us and
++	 * not the BIOS; this signifies that we are also responsible
++	 * for disabling it before entering apm/acpi suspend
++	 */
++	int active;
++	/* r/w apic fields */
++	unsigned int apic_id;
++	unsigned int apic_taskpri;
++	unsigned int apic_ldr;
++	unsigned int apic_dfr;
++	unsigned int apic_spiv;
++	unsigned int apic_lvtt;
++	unsigned int apic_lvtpc;
++	unsigned int apic_lvt0;
++	unsigned int apic_lvt1;
++	unsigned int apic_lvterr;
++	unsigned int apic_tmict;
++	unsigned int apic_tdcr;
++	unsigned int apic_thmr;
++} apic_pm_state;
++
++static int lapic_suspend(struct sys_device *dev, pm_message_t state)
++{
++	unsigned long flags;
++	int maxlvt;
++
++	if (!apic_pm_state.active)
++		return 0;
++
++	maxlvt = lapic_get_maxlvt();
++
++	apic_pm_state.apic_id = apic_read(APIC_ID);
++	apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
++	apic_pm_state.apic_ldr = apic_read(APIC_LDR);
++	apic_pm_state.apic_dfr = apic_read(APIC_DFR);
++	apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
++	apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
++	if (maxlvt >= 4)
++		apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
++	apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
++	apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
++	apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
++	apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
++	apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
++#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
++	if (maxlvt >= 5)
++		apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
++#endif
++
++	local_irq_save(flags);
++	disable_local_APIC();
++	local_irq_restore(flags);
++	return 0;
++}
++
++static int lapic_resume(struct sys_device *dev)
++{
++	unsigned int l, h;
++	unsigned long flags;
++	int maxlvt;
++
++	if (!apic_pm_state.active)
++		return 0;
++
++	maxlvt = lapic_get_maxlvt();
++
++	local_irq_save(flags);
++
++	if (x2apic)
++		enable_x2apic();
++	else {
++		/*
++		 * Make sure the APICBASE points to the right address
++		 *
++		 * FIXME! This will be wrong if we ever support suspend on
++		 * SMP! We'll need to do this as part of the CPU restore!
++		 */
++		rdmsr(MSR_IA32_APICBASE, l, h);
++		l &= ~MSR_IA32_APICBASE_BASE;
++		l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
++		wrmsr(MSR_IA32_APICBASE, l, h);
++	}
++
++	apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
++	apic_write(APIC_ID, apic_pm_state.apic_id);
++	apic_write(APIC_DFR, apic_pm_state.apic_dfr);
++	apic_write(APIC_LDR, apic_pm_state.apic_ldr);
++	apic_write(APIC_TASKPRI, apic_pm_state.apic_taskpri);
++	apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
++	apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
++	apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
++#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
++	if (maxlvt >= 5)
++		apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
++#endif
++	if (maxlvt >= 4)
++		apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
++	apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
++	apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
++	apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
++	apic_write(APIC_ESR, 0);
++	apic_read(APIC_ESR);
++	apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
++	apic_write(APIC_ESR, 0);
++	apic_read(APIC_ESR);
++
++	local_irq_restore(flags);
++
++	return 0;
++}
++
++/*
++ * This device has no shutdown method - fully functioning local APICs
++ * are needed on every CPU up until machine_halt/restart/poweroff.
++ */
++
++static struct sysdev_class lapic_sysclass = {
++	.name		= "lapic",
++	.resume		= lapic_resume,
++	.suspend	= lapic_suspend,
++};
++
++static struct sys_device device_lapic = {
++	.id	= 0,
++	.cls	= &lapic_sysclass,
++};
++
++static void __cpuinit apic_pm_activate(void)
++{
++	apic_pm_state.active = 1;
++}
++
++static int __init init_lapic_sysfs(void)
++{
++	int error;
++
++	if (!cpu_has_apic)
++		return 0;
++	/* XXX: remove suspend/resume procs if !apic_pm_state.active? */
++
++	error = sysdev_class_register(&lapic_sysclass);
++	if (!error)
++		error = sysdev_register(&device_lapic);
++	return error;
++}
++device_initcall(init_lapic_sysfs);
++
++#else	/* CONFIG_PM */
++
++static void apic_pm_activate(void) { }
++
++#endif	/* CONFIG_PM */
++
++#ifdef CONFIG_X86_64
++/*
++ * apic_is_clustered_box() -- Check if we can expect good TSC
++ *
++ * Thus far, the major user of this is IBM's Summit2 series:
++ *
++ * Clustered boxes may have unsynced TSC problems if they are
++ * multi-chassis. Use available data to take a good guess.
++ * If in doubt, go HPET.
++ */
++__cpuinit int apic_is_clustered_box(void)
++{
++	int i, clusters, zeros;
++	unsigned id;
++	u16 *bios_cpu_apicid;
++	DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS);
++
++	/*
++	 * there is not this kind of box with AMD CPU yet.
++	 * Some AMD box with quadcore cpu and 8 sockets apicid
++	 * will be [4, 0x23] or [8, 0x27] could be thought to
++	 * vsmp box still need checking...
++	 */
++	if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box())
++		return 0;
++
++	bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
++	bitmap_zero(clustermap, NUM_APIC_CLUSTERS);
++
++	for (i = 0; i < nr_cpu_ids; i++) {
++		/* are we being called early in kernel startup? */
++		if (bios_cpu_apicid) {
++			id = bios_cpu_apicid[i];
++		} else if (i < nr_cpu_ids) {
++			if (cpu_present(i))
++				id = per_cpu(x86_bios_cpu_apicid, i);
++			else
++				continue;
++		} else
++			break;
++
++		if (id != BAD_APICID)
++			__set_bit(APIC_CLUSTERID(id), clustermap);
++	}
++
++	/* Problem:  Partially populated chassis may not have CPUs in some of
++	 * the APIC clusters they have been allocated.  Only present CPUs have
++	 * x86_bios_cpu_apicid entries, thus causing zeroes in the bitmap.
++	 * Since clusters are allocated sequentially, count zeros only if
++	 * they are bounded by ones.
++	 */
++	clusters = 0;
++	zeros = 0;
++	for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
++		if (test_bit(i, clustermap)) {
++			clusters += 1 + zeros;
++			zeros = 0;
++		} else
++			++zeros;
++	}
++
++	/* ScaleMP vSMPowered boxes have one cluster per board and TSCs are
++	 * not guaranteed to be synced between boards
++	 */
++	if (is_vsmp_box() && clusters > 1)
++		return 1;
++
++	/*
++	 * If clusters > 2, then should be multi-chassis.
++	 * May have to revisit this when multi-core + hyperthreaded CPUs come
++	 * out, but AFAIK this will work even for them.
++	 */
++	return (clusters > 2);
++}
++#endif
++
++/*
++ * APIC command line parameters
++ */
++static int __init setup_disableapic(char *arg)
++{
++	disable_apic = 1;
++	setup_clear_cpu_cap(X86_FEATURE_APIC);
++	return 0;
++}
++early_param("disableapic", setup_disableapic);
++
++/* same as disableapic, for compatibility */
++static int __init setup_nolapic(char *arg)
++{
++	return setup_disableapic(arg);
++}
++early_param("nolapic", setup_nolapic);
++
++static int __init parse_lapic_timer_c2_ok(char *arg)
++{
++	local_apic_timer_c2_ok = 1;
++	return 0;
++}
++early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
++
++static int __init parse_disable_apic_timer(char *arg)
++{
++	disable_apic_timer = 1;
++	return 0;
++}
++early_param("noapictimer", parse_disable_apic_timer);
++
++static int __init parse_nolapic_timer(char *arg)
++{
++	disable_apic_timer = 1;
++	return 0;
++}
++early_param("nolapic_timer", parse_nolapic_timer);
++
++static int __init apic_set_verbosity(char *arg)
++{
++	if (!arg)  {
++#ifdef CONFIG_X86_64
++		skip_ioapic_setup = 0;
++		return 0;
++#endif
++		return -EINVAL;
++	}
++
++	if (strcmp("debug", arg) == 0)
++		apic_verbosity = APIC_DEBUG;
++	else if (strcmp("verbose", arg) == 0)
++		apic_verbosity = APIC_VERBOSE;
++	else {
++		pr_warning("APIC Verbosity level %s not recognised"
++			" use apic=verbose or apic=debug\n", arg);
++		return -EINVAL;
++	}
++
++	return 0;
++}
++early_param("apic", apic_set_verbosity);
++
++static int __init lapic_insert_resource(void)
++{
++	if (!apic_phys)
++		return -1;
++
++	/* Put local APIC into the resource map. */
++	lapic_resource.start = apic_phys;
++	lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
++	insert_resource(&iomem_resource, &lapic_resource);
++
++	return 0;
++}
++
++/*
++ * need call insert after e820_reserve_resources()
++ * that is using request_resource
++ */
++late_initcall(lapic_insert_resource);
+Index: linux-2.6-tip/arch/x86/kernel/apic/apic_flat_64.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/apic_flat_64.c
+@@ -0,0 +1,373 @@
++/*
++ * Copyright 2004 James Cleverdon, IBM.
++ * Subject to the GNU Public License, v.2
++ *
++ * Flat APIC subarch code.
++ *
++ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
++ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
++ * James Cleverdon.
++ */
++#include <linux/errno.h>
++#include <linux/threads.h>
++#include <linux/cpumask.h>
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/init.h>
++#include <linux/hardirq.h>
++#include <asm/smp.h>
++#include <asm/apic.h>
++#include <asm/ipi.h>
++
++#ifdef CONFIG_ACPI
++#include <acpi/acpi_bus.h>
++#endif
++
++static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++	return 1;
++}
++
++static const struct cpumask *flat_target_cpus(void)
++{
++	return cpu_online_mask;
++}
++
++static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
++{
++	/* Careful. Some cpus do not strictly honor the set of cpus
++	 * specified in the interrupt destination when using lowest
++	 * priority interrupt delivery mode.
++	 *
++	 * In particular there was a hyperthreading cpu observed to
++	 * deliver interrupts to the wrong hyperthread when only one
++	 * hyperthread was specified in the interrupt desitination.
++	 */
++	cpumask_clear(retmask);
++	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
++}
++
++/*
++ * Set up the logical destination ID.
++ *
++ * Intel recommends to set DFR, LDR and TPR before enabling
++ * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
++ * document number 292116).  So here it goes...
++ */
++static void flat_init_apic_ldr(void)
++{
++	unsigned long val;
++	unsigned long num, id;
++
++	num = smp_processor_id();
++	id = 1UL << num;
++	apic_write(APIC_DFR, APIC_DFR_FLAT);
++	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
++	val |= SET_APIC_LOGICAL_ID(id);
++	apic_write(APIC_LDR, val);
++}
++
++static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
++{
++	unsigned long flags;
++
++	local_irq_save(flags);
++	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
++	local_irq_restore(flags);
++}
++
++static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
++{
++	unsigned long mask = cpumask_bits(cpumask)[0];
++
++	_flat_send_IPI_mask(mask, vector);
++}
++
++static void
++ flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
++{
++	unsigned long mask = cpumask_bits(cpumask)[0];
++	int cpu = smp_processor_id();
++
++	if (cpu < BITS_PER_LONG)
++		clear_bit(cpu, &mask);
++
++	_flat_send_IPI_mask(mask, vector);
++}
++
++static void flat_send_IPI_allbutself(int vector)
++{
++	int cpu = smp_processor_id();
++#ifdef	CONFIG_HOTPLUG_CPU
++	int hotplug = 1;
++#else
++	int hotplug = 0;
++#endif
++	if (hotplug || vector == NMI_VECTOR) {
++		if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
++			unsigned long mask = cpumask_bits(cpu_online_mask)[0];
++
++			if (cpu < BITS_PER_LONG)
++				clear_bit(cpu, &mask);
++
++			_flat_send_IPI_mask(mask, vector);
++		}
++	} else if (num_online_cpus() > 1) {
++		__default_send_IPI_shortcut(APIC_DEST_ALLBUT,
++					    vector, apic->dest_logical);
++	}
++}
++
++static void flat_send_IPI_all(int vector)
++{
++	if (vector == NMI_VECTOR) {
++		flat_send_IPI_mask(cpu_online_mask, vector);
++	} else {
++		__default_send_IPI_shortcut(APIC_DEST_ALLINC,
++					    vector, apic->dest_logical);
++	}
++}
++
++static unsigned int flat_get_apic_id(unsigned long x)
++{
++	unsigned int id;
++
++	id = (((x)>>24) & 0xFFu);
++
++	return id;
++}
++
++static unsigned long set_apic_id(unsigned int id)
++{
++	unsigned long x;
++
++	x = ((id & 0xFFu)<<24);
++	return x;
++}
++
++static unsigned int read_xapic_id(void)
++{
++	unsigned int id;
++
++	id = flat_get_apic_id(apic_read(APIC_ID));
++	return id;
++}
++
++static int flat_apic_id_registered(void)
++{
++	return physid_isset(read_xapic_id(), phys_cpu_present_map);
++}
++
++static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
++{
++	return hard_smp_processor_id() >> index_msb;
++}
++
++struct apic apic_flat =  {
++	.name				= "flat",
++	.probe				= NULL,
++	.acpi_madt_oem_check		= flat_acpi_madt_oem_check,
++	.apic_id_registered		= flat_apic_id_registered,
++
++	.irq_delivery_mode		= dest_LowestPrio,
++	.irq_dest_mode			= 1, /* logical */
++
++	.target_cpus			= flat_target_cpus,
++	.disable_esr			= 0,
++	.dest_logical			= APIC_DEST_LOGICAL,
++	.check_apicid_used		= NULL,
++	.check_apicid_present		= NULL,
++
++	.vector_allocation_domain	= flat_vector_allocation_domain,
++	.init_apic_ldr			= flat_init_apic_ldr,
++
++	.ioapic_phys_id_map		= NULL,
++	.setup_apic_routing		= NULL,
++	.multi_timer_check		= NULL,
++	.apicid_to_node			= NULL,
++	.cpu_to_logical_apicid		= NULL,
++	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= NULL,
++	.setup_portio_remap		= NULL,
++	.check_phys_apicid_present	= default_check_phys_apicid_present,
++	.enable_apic_mode		= NULL,
++	.phys_pkg_id			= flat_phys_pkg_id,
++	.mps_oem_check			= NULL,
++
++	.get_apic_id			= flat_get_apic_id,
++	.set_apic_id			= set_apic_id,
++	.apic_id_mask			= 0xFFu << 24,
++
++	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= flat_send_IPI_mask,
++	.send_IPI_mask_allbutself	= flat_send_IPI_mask_allbutself,
++	.send_IPI_allbutself		= flat_send_IPI_allbutself,
++	.send_IPI_all			= flat_send_IPI_all,
++	.send_IPI_self			= apic_send_IPI_self,
++
++	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
++	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
++	.wait_for_init_deassert		= NULL,
++	.smp_callin_clear_local_apic	= NULL,
++	.inquire_remote_apic		= NULL,
++
++	.read				= native_apic_mem_read,
++	.write				= native_apic_mem_write,
++	.icr_read			= native_apic_icr_read,
++	.icr_write			= native_apic_icr_write,
++	.wait_icr_idle			= native_apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
++};
++
++/*
++ * Physflat mode is used when there are more than 8 CPUs on a AMD system.
++ * We cannot use logical delivery in this case because the mask
++ * overflows, so use physical mode.
++ */
++static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++#ifdef CONFIG_ACPI
++	/*
++	 * Quirk: some x86_64 machines can only use physical APIC mode
++	 * regardless of how many processors are present (x86_64 ES7000
++	 * is an example).
++	 */
++	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
++		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
++		printk(KERN_DEBUG "system APIC only can use physical flat");
++		return 1;
++	}
++#endif
++
++	return 0;
++}
++
++static const struct cpumask *physflat_target_cpus(void)
++{
++	return cpu_online_mask;
++}
++
++static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
++{
++	cpumask_clear(retmask);
++	cpumask_set_cpu(cpu, retmask);
++}
++
++static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
++{
++	default_send_IPI_mask_sequence_phys(cpumask, vector);
++}
++
++static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
++					      int vector)
++{
++	default_send_IPI_mask_allbutself_phys(cpumask, vector);
++}
++
++static void physflat_send_IPI_allbutself(int vector)
++{
++	default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
++}
++
++static void physflat_send_IPI_all(int vector)
++{
++	physflat_send_IPI_mask(cpu_online_mask, vector);
++}
++
++static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
++{
++	int cpu;
++
++	/*
++	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
++	 * May as well be the first.
++	 */
++	cpu = cpumask_first(cpumask);
++	if ((unsigned)cpu < nr_cpu_ids)
++		return per_cpu(x86_cpu_to_apicid, cpu);
++	else
++		return BAD_APICID;
++}
++
++static unsigned int
++physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
++				const struct cpumask *andmask)
++{
++	int cpu;
++
++	/*
++	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
++	 * May as well be the first.
++	 */
++	for_each_cpu_and(cpu, cpumask, andmask) {
++		if (cpumask_test_cpu(cpu, cpu_online_mask))
++			break;
++	}
++	if (cpu < nr_cpu_ids)
++		return per_cpu(x86_cpu_to_apicid, cpu);
++
++	return BAD_APICID;
++}
++
++struct apic apic_physflat =  {
++
++	.name				= "physical flat",
++	.probe				= NULL,
++	.acpi_madt_oem_check		= physflat_acpi_madt_oem_check,
++	.apic_id_registered		= flat_apic_id_registered,
++
++	.irq_delivery_mode		= dest_Fixed,
++	.irq_dest_mode			= 0, /* physical */
++
++	.target_cpus			= physflat_target_cpus,
++	.disable_esr			= 0,
++	.dest_logical			= 0,
++	.check_apicid_used		= NULL,
++	.check_apicid_present		= NULL,
++
++	.vector_allocation_domain	= physflat_vector_allocation_domain,
++	/* not needed, but shouldn't hurt: */
++	.init_apic_ldr			= flat_init_apic_ldr,
++
++	.ioapic_phys_id_map		= NULL,
++	.setup_apic_routing		= NULL,
++	.multi_timer_check		= NULL,
++	.apicid_to_node			= NULL,
++	.cpu_to_logical_apicid		= NULL,
++	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= NULL,
++	.setup_portio_remap		= NULL,
++	.check_phys_apicid_present	= default_check_phys_apicid_present,
++	.enable_apic_mode		= NULL,
++	.phys_pkg_id			= flat_phys_pkg_id,
++	.mps_oem_check			= NULL,
++
++	.get_apic_id			= flat_get_apic_id,
++	.set_apic_id			= set_apic_id,
++	.apic_id_mask			= 0xFFu << 24,
++
++	.cpu_mask_to_apicid		= physflat_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= physflat_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= physflat_send_IPI_mask,
++	.send_IPI_mask_allbutself	= physflat_send_IPI_mask_allbutself,
++	.send_IPI_allbutself		= physflat_send_IPI_allbutself,
++	.send_IPI_all			= physflat_send_IPI_all,
++	.send_IPI_self			= apic_send_IPI_self,
++
++	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
++	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
++	.wait_for_init_deassert		= NULL,
++	.smp_callin_clear_local_apic	= NULL,
++	.inquire_remote_apic		= NULL,
++
++	.read				= native_apic_mem_read,
++	.write				= native_apic_mem_write,
++	.icr_read			= native_apic_icr_read,
++	.icr_write			= native_apic_icr_write,
++	.wait_icr_idle			= native_apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
++};
+Index: linux-2.6-tip/arch/x86/kernel/apic/bigsmp_32.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/bigsmp_32.c
+@@ -0,0 +1,267 @@
++/*
++ * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs.
++ *
++ * Drives the local APIC in "clustered mode".
++ */
++#include <linux/threads.h>
++#include <linux/cpumask.h>
++#include <linux/kernel.h>
++#include <linux/init.h>
++#include <linux/dmi.h>
++#include <linux/smp.h>
++
++#include <asm/apicdef.h>
++#include <asm/fixmap.h>
++#include <asm/mpspec.h>
++#include <asm/apic.h>
++#include <asm/ipi.h>
++
++static unsigned bigsmp_get_apic_id(unsigned long x)
++{
++	return (x >> 24) & 0xFF;
++}
++
++static int bigsmp_apic_id_registered(void)
++{
++	return 1;
++}
++
++static const struct cpumask *bigsmp_target_cpus(void)
++{
++#ifdef CONFIG_SMP
++	return cpu_online_mask;
++#else
++	return cpumask_of(0);
++#endif
++}
++
++static unsigned long bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
++{
++	return 0;
++}
++
++static unsigned long bigsmp_check_apicid_present(int bit)
++{
++	return 1;
++}
++
++static inline unsigned long calculate_ldr(int cpu)
++{
++	unsigned long val, id;
++
++	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
++	id = per_cpu(x86_bios_cpu_apicid, cpu);
++	val |= SET_APIC_LOGICAL_ID(id);
++
++	return val;
++}
++
++/*
++ * Set up the logical destination ID.
++ *
++ * Intel recommends to set DFR, LDR and TPR before enabling
++ * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
++ * document number 292116).  So here it goes...
++ */
++static void bigsmp_init_apic_ldr(void)
++{
++	unsigned long val;
++	int cpu = smp_processor_id();
++
++	apic_write(APIC_DFR, APIC_DFR_FLAT);
++	val = calculate_ldr(cpu);
++	apic_write(APIC_LDR, val);
++}
++
++static void bigsmp_setup_apic_routing(void)
++{
++	printk(KERN_INFO
++		"Enabling APIC mode:  Physflat.  Using %d I/O APICs\n",
++		nr_ioapics);
++}
++
++static int bigsmp_apicid_to_node(int logical_apicid)
++{
++	return apicid_2_node[hard_smp_processor_id()];
++}
++
++static int bigsmp_cpu_present_to_apicid(int mps_cpu)
++{
++	if (mps_cpu < nr_cpu_ids)
++		return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
++
++	return BAD_APICID;
++}
++
++static physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
++{
++	return physid_mask_of_physid(phys_apicid);
++}
++
++/* Mapping from cpu number to logical apicid */
++static inline int bigsmp_cpu_to_logical_apicid(int cpu)
++{
++	if (cpu >= nr_cpu_ids)
++		return BAD_APICID;
++	return cpu_physical_id(cpu);
++}
++
++static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
++{
++	/* For clustered we don't have a good way to do this yet - hack */
++	return physids_promote(0xFFL);
++}
++
++static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
++{
++	return 1;
++}
++
++/* As we are using single CPU as destination, pick only one CPU here */
++static unsigned int bigsmp_cpu_mask_to_apicid(const struct cpumask *cpumask)
++{
++	return bigsmp_cpu_to_logical_apicid(cpumask_first(cpumask));
++}
++
++static unsigned int bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
++			      const struct cpumask *andmask)
++{
++	int cpu;
++
++	/*
++	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
++	 * May as well be the first.
++	 */
++	for_each_cpu_and(cpu, cpumask, andmask) {
++		if (cpumask_test_cpu(cpu, cpu_online_mask))
++			break;
++	}
++	if (cpu < nr_cpu_ids)
++		return bigsmp_cpu_to_logical_apicid(cpu);
++
++	return BAD_APICID;
++}
++
++static int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
++{
++	return cpuid_apic >> index_msb;
++}
++
++static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
++{
++	default_send_IPI_mask_sequence_phys(mask, vector);
++}
++
++static void bigsmp_send_IPI_allbutself(int vector)
++{
++	default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
++}
++
++static void bigsmp_send_IPI_all(int vector)
++{
++	bigsmp_send_IPI_mask(cpu_online_mask, vector);
++}
++
++static int dmi_bigsmp; /* can be set by dmi scanners */
++
++static int hp_ht_bigsmp(const struct dmi_system_id *d)
++{
++	printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
++	dmi_bigsmp = 1;
++
++	return 0;
++}
++
++
++static const struct dmi_system_id bigsmp_dmi_table[] = {
++	{ hp_ht_bigsmp, "HP ProLiant DL760 G2",
++		{	DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
++			DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
++		}
++	},
++
++	{ hp_ht_bigsmp, "HP ProLiant DL740",
++		{	DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
++			DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
++		}
++	},
++	{ } /* NULL entry stops DMI scanning */
++};
++
++static void bigsmp_vector_allocation_domain(int cpu, struct cpumask *retmask)
++{
++	cpumask_clear(retmask);
++	cpumask_set_cpu(cpu, retmask);
++}
++
++static int probe_bigsmp(void)
++{
++	if (def_to_bigsmp)
++		dmi_bigsmp = 1;
++	else
++		dmi_check_system(bigsmp_dmi_table);
++
++	return dmi_bigsmp;
++}
++
++struct apic apic_bigsmp = {
++
++	.name				= "bigsmp",
++	.probe				= probe_bigsmp,
++	.acpi_madt_oem_check		= NULL,
++	.apic_id_registered		= bigsmp_apic_id_registered,
++
++	.irq_delivery_mode		= dest_Fixed,
++	/* phys delivery to target CPU: */
++	.irq_dest_mode			= 0,
++
++	.target_cpus			= bigsmp_target_cpus,
++	.disable_esr			= 1,
++	.dest_logical			= 0,
++	.check_apicid_used		= bigsmp_check_apicid_used,
++	.check_apicid_present		= bigsmp_check_apicid_present,
++
++	.vector_allocation_domain	= bigsmp_vector_allocation_domain,
++	.init_apic_ldr			= bigsmp_init_apic_ldr,
++
++	.ioapic_phys_id_map		= bigsmp_ioapic_phys_id_map,
++	.setup_apic_routing		= bigsmp_setup_apic_routing,
++	.multi_timer_check		= NULL,
++	.apicid_to_node			= bigsmp_apicid_to_node,
++	.cpu_to_logical_apicid		= bigsmp_cpu_to_logical_apicid,
++	.cpu_present_to_apicid		= bigsmp_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= bigsmp_apicid_to_cpu_present,
++	.setup_portio_remap		= NULL,
++	.check_phys_apicid_present	= bigsmp_check_phys_apicid_present,
++	.enable_apic_mode		= NULL,
++	.phys_pkg_id			= bigsmp_phys_pkg_id,
++	.mps_oem_check			= NULL,
++
++	.get_apic_id			= bigsmp_get_apic_id,
++	.set_apic_id			= NULL,
++	.apic_id_mask			= 0xFF << 24,
++
++	.cpu_mask_to_apicid		= bigsmp_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= bigsmp_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= bigsmp_send_IPI_mask,
++	.send_IPI_mask_allbutself	= NULL,
++	.send_IPI_allbutself		= bigsmp_send_IPI_allbutself,
++	.send_IPI_all			= bigsmp_send_IPI_all,
++	.send_IPI_self			= default_send_IPI_self,
++
++	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
++	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
++
++	.wait_for_init_deassert		= default_wait_for_init_deassert,
++
++	.smp_callin_clear_local_apic	= NULL,
++	.inquire_remote_apic		= default_inquire_remote_apic,
++
++	.read				= native_apic_mem_read,
++	.write				= native_apic_mem_write,
++	.icr_read			= native_apic_icr_read,
++	.icr_write			= native_apic_icr_write,
++	.wait_icr_idle			= native_apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
++};
+Index: linux-2.6-tip/arch/x86/kernel/apic/es7000_32.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/es7000_32.c
+@@ -0,0 +1,781 @@
++/*
++ * Written by: Garry Forsgren, Unisys Corporation
++ *             Natalie Protasevich, Unisys Corporation
++ *
++ * This file contains the code to configure and interface
++ * with Unisys ES7000 series hardware system manager.
++ *
++ * Copyright (c) 2003 Unisys Corporation.
++ * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
++ *
++ *   All Rights Reserved.
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of version 2 of the GNU General Public License as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it would be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
++ *
++ * You should have received a copy of the GNU General Public License along
++ * with this program; if not, write the Free Software Foundation, Inc., 59
++ * Temple Place - Suite 330, Boston MA 02111-1307, USA.
++ *
++ * Contact information: Unisys Corporation, Township Line & Union Meeting
++ * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
++ *
++ * http://www.unisys.com
++ */
++#include <linux/notifier.h>
++#include <linux/spinlock.h>
++#include <linux/cpumask.h>
++#include <linux/threads.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/reboot.h>
++#include <linux/string.h>
++#include <linux/types.h>
++#include <linux/errno.h>
++#include <linux/acpi.h>
++#include <linux/init.h>
++#include <linux/nmi.h>
++#include <linux/smp.h>
++#include <linux/io.h>
++
++#include <asm/apicdef.h>
++#include <asm/atomic.h>
++#include <asm/fixmap.h>
++#include <asm/mpspec.h>
++#include <asm/setup.h>
++#include <asm/apic.h>
++#include <asm/ipi.h>
++
++/*
++ * ES7000 chipsets
++ */
++
++#define NON_UNISYS			0
++#define ES7000_CLASSIC			1
++#define ES7000_ZORRO			2
++
++#define	MIP_REG				1
++#define	MIP_PSAI_REG			4
++
++#define	MIP_BUSY			1
++#define	MIP_SPIN			0xf0000
++#define	MIP_VALID			0x0100000000000000ULL
++#define	MIP_SW_APIC			0x1020b
++
++#define	MIP_PORT(val)			((val >> 32) & 0xffff)
++
++#define	MIP_RD_LO(val)			(val & 0xffffffff)
++
++struct mip_reg {
++	unsigned long long		off_0x00;
++	unsigned long long		off_0x08;
++	unsigned long long		off_0x10;
++	unsigned long long		off_0x18;
++	unsigned long long		off_0x20;
++	unsigned long long		off_0x28;
++	unsigned long long		off_0x30;
++	unsigned long long		off_0x38;
++};
++
++struct mip_reg_info {
++	unsigned long long		mip_info;
++	unsigned long long		delivery_info;
++	unsigned long long		host_reg;
++	unsigned long long		mip_reg;
++};
++
++struct psai {
++	unsigned long long		entry_type;
++	unsigned long long		addr;
++	unsigned long long		bep_addr;
++};
++
++#ifdef CONFIG_ACPI
++
++struct es7000_oem_table {
++	struct acpi_table_header	Header;
++	u32				OEMTableAddr;
++	u32				OEMTableSize;
++};
++
++static unsigned long			oem_addrX;
++static unsigned long			oem_size;
++
++#endif
++
++/*
++ * ES7000 Globals
++ */
++
++static volatile unsigned long		*psai;
++static struct mip_reg			*mip_reg;
++static struct mip_reg			*host_reg;
++static int 				mip_port;
++static unsigned long			mip_addr;
++static unsigned long			host_addr;
++
++int					es7000_plat;
++
++/*
++ * GSI override for ES7000 platforms.
++ */
++
++static unsigned int			base;
++
++static int
++es7000_rename_gsi(int ioapic, int gsi)
++{
++	if (es7000_plat == ES7000_ZORRO)
++		return gsi;
++
++	if (!base) {
++		int i;
++		for (i = 0; i < nr_ioapics; i++)
++			base += nr_ioapic_registers[i];
++	}
++
++	if (!ioapic && (gsi < 16))
++		gsi += base;
++
++	return gsi;
++}
++
++static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
++{
++	unsigned long vect = 0, psaival = 0;
++
++	if (psai == NULL)
++		return -1;
++
++	vect = ((unsigned long)__pa(eip)/0x1000) << 16;
++	psaival = (0x1000000 | vect | cpu);
++
++	while (*psai & 0x1000000)
++		;
++
++	*psai = psaival;
++
++	return 0;
++}
++
++static int es7000_apic_is_cluster(void)
++{
++	/* MPENTIUMIII */
++	if (boot_cpu_data.x86 == 6 &&
++	    (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11))
++		return 1;
++
++	return 0;
++}
++
++static void setup_unisys(void)
++{
++	/*
++	 * Determine the generation of the ES7000 currently running.
++	 *
++	 * es7000_plat = 1 if the machine is a 5xx ES7000 box
++	 * es7000_plat = 2 if the machine is a x86_64 ES7000 box
++	 *
++	 */
++	if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
++		es7000_plat = ES7000_ZORRO;
++	else
++		es7000_plat = ES7000_CLASSIC;
++	ioapic_renumber_irq = es7000_rename_gsi;
++}
++
++/*
++ * Parse the OEM Table:
++ */
++static int parse_unisys_oem(char *oemptr)
++{
++	int			i;
++	int 			success = 0;
++	unsigned char		type, size;
++	unsigned long		val;
++	char			*tp = NULL;
++	struct psai		*psaip = NULL;
++	struct mip_reg_info 	*mi;
++	struct mip_reg		*host, *mip;
++
++	tp = oemptr;
++
++	tp += 8;
++
++	for (i = 0; i <= 6; i++) {
++		type = *tp++;
++		size = *tp++;
++		tp -= 2;
++		switch (type) {
++		case MIP_REG:
++			mi = (struct mip_reg_info *)tp;
++			val = MIP_RD_LO(mi->host_reg);
++			host_addr = val;
++			host = (struct mip_reg *)val;
++			host_reg = __va(host);
++			val = MIP_RD_LO(mi->mip_reg);
++			mip_port = MIP_PORT(mi->mip_info);
++			mip_addr = val;
++			mip = (struct mip_reg *)val;
++			mip_reg = __va(mip);
++			pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
++				 (unsigned long)host_reg);
++			pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
++				 (unsigned long)mip_reg);
++			success++;
++			break;
++		case MIP_PSAI_REG:
++			psaip = (struct psai *)tp;
++			if (tp != NULL) {
++				if (psaip->addr)
++					psai = __va(psaip->addr);
++				else
++					psai = NULL;
++				success++;
++			}
++			break;
++		default:
++			break;
++		}
++		tp += size;
++	}
++
++	if (success < 2)
++		es7000_plat = NON_UNISYS;
++	else
++		setup_unisys();
++
++	return es7000_plat;
++}
++
++#ifdef CONFIG_ACPI
++static int find_unisys_acpi_oem_table(unsigned long *oem_addr)
++{
++	struct acpi_table_header *header = NULL;
++	struct es7000_oem_table *table;
++	acpi_size tbl_size;
++	acpi_status ret;
++	int i = 0;
++
++	for (;;) {
++		ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size);
++		if (!ACPI_SUCCESS(ret))
++			return -1;
++
++		if (!memcmp((char *) &header->oem_id, "UNISYS", 6))
++			break;
++
++		early_acpi_os_unmap_memory(header, tbl_size);
++	}
++
++	table = (void *)header;
++
++	oem_addrX	= table->OEMTableAddr;
++	oem_size	= table->OEMTableSize;
++
++	early_acpi_os_unmap_memory(header, tbl_size);
++
++	*oem_addr	= (unsigned long)__acpi_map_table(oem_addrX, oem_size);
++
++	return 0;
++}
++
++static void unmap_unisys_acpi_oem_table(unsigned long oem_addr)
++{
++	if (!oem_addr)
++		return;
++
++	__acpi_unmap_table((char *)oem_addr, oem_size);
++}
++
++static int es7000_check_dsdt(void)
++{
++	struct acpi_table_header header;
++
++	if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
++	    !strncmp(header.oem_id, "UNISYS", 6))
++		return 1;
++	return 0;
++}
++
++static int es7000_acpi_ret;
++
++/* Hook from generic ACPI tables.c */
++static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++	unsigned long oem_addr = 0;
++	int check_dsdt;
++	int ret = 0;
++
++	/* check dsdt at first to avoid clear fix_map for oem_addr */
++	check_dsdt = es7000_check_dsdt();
++
++	if (!find_unisys_acpi_oem_table(&oem_addr)) {
++		if (check_dsdt) {
++			ret = parse_unisys_oem((char *)oem_addr);
++		} else {
++			setup_unisys();
++			ret = 1;
++		}
++		/*
++		 * we need to unmap it
++		 */
++		unmap_unisys_acpi_oem_table(oem_addr);
++	}
++
++	es7000_acpi_ret = ret;
++
++	return ret && !es7000_apic_is_cluster();
++}
++
++static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
++{
++	int ret = es7000_acpi_ret;
++
++	return ret && es7000_apic_is_cluster();
++}
++
++#else /* !CONFIG_ACPI: */
++static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++	return 0;
++}
++
++static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id)
++{
++	return 0;
++}
++#endif /* !CONFIG_ACPI */
++
++static void es7000_spin(int n)
++{
++	int i = 0;
++
++	while (i++ < n)
++		rep_nop();
++}
++
++static int es7000_mip_write(struct mip_reg *mip_reg)
++{
++	int status = 0;
++	int spin;
++
++	spin = MIP_SPIN;
++	while ((host_reg->off_0x38 & MIP_VALID) != 0) {
++		if (--spin <= 0) {
++			WARN(1,	"Timeout waiting for Host Valid Flag\n");
++			return -1;
++		}
++		es7000_spin(MIP_SPIN);
++	}
++
++	memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
++	outb(1, mip_port);
++
++	spin = MIP_SPIN;
++
++	while ((mip_reg->off_0x38 & MIP_VALID) == 0) {
++		if (--spin <= 0) {
++			WARN(1,	"Timeout waiting for MIP Valid Flag\n");
++			return -1;
++		}
++		es7000_spin(MIP_SPIN);
++	}
++
++	status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48;
++	mip_reg->off_0x38 &= ~MIP_VALID;
++
++	return status;
++}
++
++static void es7000_enable_apic_mode(void)
++{
++	struct mip_reg es7000_mip_reg;
++	int mip_status;
++
++	if (!es7000_plat)
++		return;
++
++	printk(KERN_INFO "ES7000: Enabling APIC mode.\n");
++	memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
++	es7000_mip_reg.off_0x00 = MIP_SW_APIC;
++	es7000_mip_reg.off_0x38 = MIP_VALID;
++
++	while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
++		WARN(1, "Command failed, status = %x\n", mip_status);
++}
++
++static void es7000_vector_allocation_domain(int cpu, struct cpumask *retmask)
++{
++	/* Careful. Some cpus do not strictly honor the set of cpus
++	 * specified in the interrupt destination when using lowest
++	 * priority interrupt delivery mode.
++	 *
++	 * In particular there was a hyperthreading cpu observed to
++	 * deliver interrupts to the wrong hyperthread when only one
++	 * hyperthread was specified in the interrupt desitination.
++	 */
++	cpumask_clear(retmask);
++	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
++}
++
++
++static void es7000_wait_for_init_deassert(atomic_t *deassert)
++{
++	while (!atomic_read(deassert))
++		cpu_relax();
++}
++
++static unsigned int es7000_get_apic_id(unsigned long x)
++{
++	return (x >> 24) & 0xFF;
++}
++
++static void es7000_send_IPI_mask(const struct cpumask *mask, int vector)
++{
++	default_send_IPI_mask_sequence_phys(mask, vector);
++}
++
++static void es7000_send_IPI_allbutself(int vector)
++{
++	default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
++}
++
++static void es7000_send_IPI_all(int vector)
++{
++	es7000_send_IPI_mask(cpu_online_mask, vector);
++}
++
++static int es7000_apic_id_registered(void)
++{
++	return 1;
++}
++
++static const struct cpumask *target_cpus_cluster(void)
++{
++	return cpu_all_mask;
++}
++
++static const struct cpumask *es7000_target_cpus(void)
++{
++	return cpumask_of(smp_processor_id());
++}
++
++static unsigned long
++es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
++{
++	return 0;
++}
++static unsigned long es7000_check_apicid_present(int bit)
++{
++	return physid_isset(bit, phys_cpu_present_map);
++}
++
++static unsigned long calculate_ldr(int cpu)
++{
++	unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
++
++	return SET_APIC_LOGICAL_ID(id);
++}
++
++/*
++ * Set up the logical destination ID.
++ *
++ * Intel recommends to set DFR, LdR and TPR before enabling
++ * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
++ * document number 292116).  So here it goes...
++ */
++static void es7000_init_apic_ldr_cluster(void)
++{
++	unsigned long val;
++	int cpu = smp_processor_id();
++
++	apic_write(APIC_DFR, APIC_DFR_CLUSTER);
++	val = calculate_ldr(cpu);
++	apic_write(APIC_LDR, val);
++}
++
++static void es7000_init_apic_ldr(void)
++{
++	unsigned long val;
++	int cpu = smp_processor_id();
++
++	apic_write(APIC_DFR, APIC_DFR_FLAT);
++	val = calculate_ldr(cpu);
++	apic_write(APIC_LDR, val);
++}
++
++static void es7000_setup_apic_routing(void)
++{
++	int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
++
++	printk(KERN_INFO
++	  "Enabling APIC mode:  %s. Using %d I/O APICs, target cpus %lx\n",
++		(apic_version[apic] == 0x14) ?
++			"Physical Cluster" : "Logical Cluster",
++		nr_ioapics, cpumask_bits(es7000_target_cpus())[0]);
++}
++
++static int es7000_apicid_to_node(int logical_apicid)
++{
++	return 0;
++}
++
++
++static int es7000_cpu_present_to_apicid(int mps_cpu)
++{
++	if (!mps_cpu)
++		return boot_cpu_physical_apicid;
++	else if (mps_cpu < nr_cpu_ids)
++		return per_cpu(x86_bios_cpu_apicid, mps_cpu);
++	else
++		return BAD_APICID;
++}
++
++static int cpu_id;
++
++static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
++{
++	physid_mask_t mask;
++
++	mask = physid_mask_of_physid(cpu_id);
++	++cpu_id;
++
++	return mask;
++}
++
++/* Mapping from cpu number to logical apicid */
++static int es7000_cpu_to_logical_apicid(int cpu)
++{
++#ifdef CONFIG_SMP
++	if (cpu >= nr_cpu_ids)
++		return BAD_APICID;
++	return cpu_2_logical_apicid[cpu];
++#else
++	return logical_smp_processor_id();
++#endif
++}
++
++static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
++{
++	/* For clustered we don't have a good way to do this yet - hack */
++	return physids_promote(0xff);
++}
++
++static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
++{
++	boot_cpu_physical_apicid = read_apic_id();
++	return 1;
++}
++
++static unsigned int es7000_cpu_mask_to_apicid(const struct cpumask *cpumask)
++{
++	unsigned int round = 0;
++	int cpu, uninitialized_var(apicid);
++
++	/*
++	 * The cpus in the mask must all be on the apic cluster.
++	 */
++	for_each_cpu(cpu, cpumask) {
++		int new_apicid = es7000_cpu_to_logical_apicid(cpu);
++
++		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
++			WARN(1, "Not a valid mask!");
++
++			return BAD_APICID;
++		}
++		apicid = new_apicid;
++		round++;
++	}
++	return apicid;
++}
++
++static unsigned int
++es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
++			      const struct cpumask *andmask)
++{
++	int apicid = es7000_cpu_to_logical_apicid(0);
++	cpumask_var_t cpumask;
++
++	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
++		return apicid;
++
++	cpumask_and(cpumask, inmask, andmask);
++	cpumask_and(cpumask, cpumask, cpu_online_mask);
++	apicid = es7000_cpu_mask_to_apicid(cpumask);
++
++	free_cpumask_var(cpumask);
++
++	return apicid;
++}
++
++static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
++{
++	return cpuid_apic >> index_msb;
++}
++
++static int probe_es7000(void)
++{
++	/* probed later in mptable/ACPI hooks */
++	return 0;
++}
++
++static int es7000_mps_ret;
++static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem,
++		char *productid)
++{
++	int ret = 0;
++
++	if (mpc->oemptr) {
++		struct mpc_oemtable *oem_table =
++			(struct mpc_oemtable *)mpc->oemptr;
++
++		if (!strncmp(oem, "UNISYS", 6))
++			ret = parse_unisys_oem((char *)oem_table);
++	}
++
++	es7000_mps_ret = ret;
++
++	return ret && !es7000_apic_is_cluster();
++}
++
++static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem,
++		char *productid)
++{
++	int ret = es7000_mps_ret;
++
++	return ret && es7000_apic_is_cluster();
++}
++
++struct apic apic_es7000_cluster = {
++
++	.name				= "es7000",
++	.probe				= probe_es7000,
++	.acpi_madt_oem_check		= es7000_acpi_madt_oem_check_cluster,
++	.apic_id_registered		= es7000_apic_id_registered,
++
++	.irq_delivery_mode		= dest_LowestPrio,
++	/* logical delivery broadcast to all procs: */
++	.irq_dest_mode			= 1,
++
++	.target_cpus			= target_cpus_cluster,
++	.disable_esr			= 1,
++	.dest_logical			= 0,
++	.check_apicid_used		= es7000_check_apicid_used,
++	.check_apicid_present		= es7000_check_apicid_present,
++
++	.vector_allocation_domain	= es7000_vector_allocation_domain,
++	.init_apic_ldr			= es7000_init_apic_ldr_cluster,
++
++	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
++	.setup_apic_routing		= es7000_setup_apic_routing,
++	.multi_timer_check		= NULL,
++	.apicid_to_node			= es7000_apicid_to_node,
++	.cpu_to_logical_apicid		= es7000_cpu_to_logical_apicid,
++	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
++	.setup_portio_remap		= NULL,
++	.check_phys_apicid_present	= es7000_check_phys_apicid_present,
++	.enable_apic_mode		= es7000_enable_apic_mode,
++	.phys_pkg_id			= es7000_phys_pkg_id,
++	.mps_oem_check			= es7000_mps_oem_check_cluster,
++
++	.get_apic_id			= es7000_get_apic_id,
++	.set_apic_id			= NULL,
++	.apic_id_mask			= 0xFF << 24,
++
++	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= es7000_send_IPI_mask,
++	.send_IPI_mask_allbutself	= NULL,
++	.send_IPI_allbutself		= es7000_send_IPI_allbutself,
++	.send_IPI_all			= es7000_send_IPI_all,
++	.send_IPI_self			= default_send_IPI_self,
++
++	.wakeup_secondary_cpu		= wakeup_secondary_cpu_via_mip,
++
++	.trampoline_phys_low		= 0x467,
++	.trampoline_phys_high		= 0x469,
++
++	.wait_for_init_deassert		= NULL,
++
++	/* Nothing to do for most platforms, since cleared by the INIT cycle: */
++	.smp_callin_clear_local_apic	= NULL,
++	.inquire_remote_apic		= default_inquire_remote_apic,
++
++	.read				= native_apic_mem_read,
++	.write				= native_apic_mem_write,
++	.icr_read			= native_apic_icr_read,
++	.icr_write			= native_apic_icr_write,
++	.wait_icr_idle			= native_apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
++};
++
++struct apic apic_es7000 = {
++
++	.name				= "es7000",
++	.probe				= probe_es7000,
++	.acpi_madt_oem_check		= es7000_acpi_madt_oem_check,
++	.apic_id_registered		= es7000_apic_id_registered,
++
++	.irq_delivery_mode		= dest_Fixed,
++	/* phys delivery to target CPUs: */
++	.irq_dest_mode			= 0,
++
++	.target_cpus			= es7000_target_cpus,
++	.disable_esr			= 1,
++	.dest_logical			= 0,
++	.check_apicid_used		= es7000_check_apicid_used,
++	.check_apicid_present		= es7000_check_apicid_present,
++
++	.vector_allocation_domain	= es7000_vector_allocation_domain,
++	.init_apic_ldr			= es7000_init_apic_ldr,
++
++	.ioapic_phys_id_map		= es7000_ioapic_phys_id_map,
++	.setup_apic_routing		= es7000_setup_apic_routing,
++	.multi_timer_check		= NULL,
++	.apicid_to_node			= es7000_apicid_to_node,
++	.cpu_to_logical_apicid		= es7000_cpu_to_logical_apicid,
++	.cpu_present_to_apicid		= es7000_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= es7000_apicid_to_cpu_present,
++	.setup_portio_remap		= NULL,
++	.check_phys_apicid_present	= es7000_check_phys_apicid_present,
++	.enable_apic_mode		= es7000_enable_apic_mode,
++	.phys_pkg_id			= es7000_phys_pkg_id,
++	.mps_oem_check			= es7000_mps_oem_check,
++
++	.get_apic_id			= es7000_get_apic_id,
++	.set_apic_id			= NULL,
++	.apic_id_mask			= 0xFF << 24,
++
++	.cpu_mask_to_apicid		= es7000_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= es7000_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= es7000_send_IPI_mask,
++	.send_IPI_mask_allbutself	= NULL,
++	.send_IPI_allbutself		= es7000_send_IPI_allbutself,
++	.send_IPI_all			= es7000_send_IPI_all,
++	.send_IPI_self			= default_send_IPI_self,
++
++	.trampoline_phys_low		= 0x467,
++	.trampoline_phys_high		= 0x469,
++
++	.wait_for_init_deassert		= es7000_wait_for_init_deassert,
++
++	/* Nothing to do for most platforms, since cleared by the INIT cycle: */
++	.smp_callin_clear_local_apic	= NULL,
++	.inquire_remote_apic		= default_inquire_remote_apic,
++
++	.read				= native_apic_mem_read,
++	.write				= native_apic_mem_write,
++	.icr_read			= native_apic_icr_read,
++	.icr_write			= native_apic_icr_write,
++	.wait_icr_idle			= native_apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
++};
+Index: linux-2.6-tip/arch/x86/kernel/apic/io_apic.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/io_apic.c
+@@ -0,0 +1,4166 @@
++/*
++ *	Intel IO-APIC support for multi-Pentium hosts.
++ *
++ *	Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
++ *
++ *	Many thanks to Stig Venaas for trying out countless experimental
++ *	patches and reporting/debugging problems patiently!
++ *
++ *	(c) 1999, Multiple IO-APIC support, developed by
++ *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
++ *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
++ *	further tested and cleaned up by Zach Brown <zab@redhat.com>
++ *	and Ingo Molnar <mingo@redhat.com>
++ *
++ *	Fixes
++ *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
++ *					thanks to Eric Gilmore
++ *					and Rolf G. Tews
++ *					for testing these extensively
++ *	Paul Diefenbaugh	:	Added full ACPI support
++ */
++
++#include <linux/mm.h>
++#include <linux/interrupt.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/sched.h>
++#include <linux/pci.h>
++#include <linux/mc146818rtc.h>
++#include <linux/compiler.h>
++#include <linux/acpi.h>
++#include <linux/module.h>
++#include <linux/sysdev.h>
++#include <linux/msi.h>
++#include <linux/htirq.h>
++#include <linux/freezer.h>
++#include <linux/kthread.h>
++#include <linux/jiffies.h>	/* time_after() */
++#ifdef CONFIG_ACPI
++#include <acpi/acpi_bus.h>
++#endif
++#include <linux/bootmem.h>
++#include <linux/dmar.h>
++#include <linux/hpet.h>
++
++#include <asm/idle.h>
++#include <asm/io.h>
++#include <asm/smp.h>
++#include <asm/cpu.h>
++#include <asm/desc.h>
++#include <asm/proto.h>
++#include <asm/acpi.h>
++#include <asm/dma.h>
++#include <asm/timer.h>
++#include <asm/i8259.h>
++#include <asm/nmi.h>
++#include <asm/msidef.h>
++#include <asm/hypertransport.h>
++#include <asm/setup.h>
++#include <asm/irq_remapping.h>
++#include <asm/hpet.h>
++#include <asm/uv/uv_hub.h>
++#include <asm/uv/uv_irq.h>
++
++#include <asm/apic.h>
++
++#define __apicdebuginit(type) static type __init
++
++/*
++ *      Is the SiS APIC rmw bug present ?
++ *      -1 = don't know, 0 = no, 1 = yes
++ */
++int sis_apic_bug = -1;
++
++static DEFINE_RAW_SPINLOCK(ioapic_lock);
++static DEFINE_RAW_SPINLOCK(vector_lock);
++
++/*
++ * # of IRQ routing registers
++ */
++int nr_ioapic_registers[MAX_IO_APICS];
++
++/* I/O APIC entries */
++struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
++int nr_ioapics;
++
++/* MP IRQ source entries */
++struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
++
++/* # of MP IRQ source entries */
++int mp_irq_entries;
++
++#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
++int mp_bus_id_to_type[MAX_MP_BUSSES];
++#endif
++
++DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
++
++int skip_ioapic_setup;
++
++void arch_disable_smp_support(void)
++{
++#ifdef CONFIG_PCI
++	noioapicquirk = 1;
++	noioapicreroute = -1;
++#endif
++	skip_ioapic_setup = 1;
++}
++
++static int __init parse_noapic(char *str)
++{
++	/* disable IO-APIC */
++	arch_disable_smp_support();
++	return 0;
++}
++early_param("noapic", parse_noapic);
++
++struct irq_pin_list;
++
++/*
++ * This is performance-critical, we want to do it O(1)
++ *
++ * the indexing order of this array favors 1:1 mappings
++ * between pins and IRQs.
++ */
++
++struct irq_pin_list {
++	int apic, pin;
++	struct irq_pin_list *next;
++};
++
++static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
++{
++	struct irq_pin_list *pin;
++	int node;
++
++	node = cpu_to_node(cpu);
++
++	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
++
++	return pin;
++}
++
++struct irq_cfg {
++	struct irq_pin_list *irq_2_pin;
++	cpumask_var_t domain;
++	cpumask_var_t old_domain;
++	unsigned move_cleanup_count;
++	u8 vector;
++	u8 move_in_progress : 1;
++#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
++	u8 move_desc_pending : 1;
++#endif
++};
++
++/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
++#ifdef CONFIG_SPARSE_IRQ
++static struct irq_cfg irq_cfgx[] = {
++#else
++static struct irq_cfg irq_cfgx[NR_IRQS] = {
++#endif
++	[0]  = { .vector = IRQ0_VECTOR,  },
++	[1]  = { .vector = IRQ1_VECTOR,  },
++	[2]  = { .vector = IRQ2_VECTOR,  },
++	[3]  = { .vector = IRQ3_VECTOR,  },
++	[4]  = { .vector = IRQ4_VECTOR,  },
++	[5]  = { .vector = IRQ5_VECTOR,  },
++	[6]  = { .vector = IRQ6_VECTOR,  },
++	[7]  = { .vector = IRQ7_VECTOR,  },
++	[8]  = { .vector = IRQ8_VECTOR,  },
++	[9]  = { .vector = IRQ9_VECTOR,  },
++	[10] = { .vector = IRQ10_VECTOR, },
++	[11] = { .vector = IRQ11_VECTOR, },
++	[12] = { .vector = IRQ12_VECTOR, },
++	[13] = { .vector = IRQ13_VECTOR, },
++	[14] = { .vector = IRQ14_VECTOR, },
++	[15] = { .vector = IRQ15_VECTOR, },
++};
++
++int __init arch_early_irq_init(void)
++{
++	struct irq_cfg *cfg;
++	struct irq_desc *desc;
++	int count;
++	int i;
++
++	cfg = irq_cfgx;
++	count = ARRAY_SIZE(irq_cfgx);
++
++	for (i = 0; i < count; i++) {
++		desc = irq_to_desc(i);
++		desc->chip_data = &cfg[i];
++		alloc_bootmem_cpumask_var(&cfg[i].domain);
++		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
++		if (i < NR_IRQS_LEGACY)
++			cpumask_setall(cfg[i].domain);
++	}
++
++	return 0;
++}
++
++#ifdef CONFIG_SPARSE_IRQ
++static struct irq_cfg *irq_cfg(unsigned int irq)
++{
++	struct irq_cfg *cfg = NULL;
++	struct irq_desc *desc;
++
++	desc = irq_to_desc(irq);
++	if (desc)
++		cfg = desc->chip_data;
++
++	return cfg;
++}
++
++static struct irq_cfg *get_one_free_irq_cfg(int cpu)
++{
++	struct irq_cfg *cfg;
++	int node;
++
++	node = cpu_to_node(cpu);
++
++	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
++	if (cfg) {
++		if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
++			kfree(cfg);
++			cfg = NULL;
++		} else if (!alloc_cpumask_var_node(&cfg->old_domain,
++							  GFP_ATOMIC, node)) {
++			free_cpumask_var(cfg->domain);
++			kfree(cfg);
++			cfg = NULL;
++		} else {
++			cpumask_clear(cfg->domain);
++			cpumask_clear(cfg->old_domain);
++		}
++	}
++
++	return cfg;
++}
++
++int arch_init_chip_data(struct irq_desc *desc, int cpu)
++{
++	struct irq_cfg *cfg;
++
++	cfg = desc->chip_data;
++	if (!cfg) {
++		desc->chip_data = get_one_free_irq_cfg(cpu);
++		if (!desc->chip_data) {
++			printk(KERN_ERR "can not alloc irq_cfg\n");
++			BUG_ON(1);
++		}
++	}
++
++	return 0;
++}
++
++#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
++
++static void
++init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
++{
++	struct irq_pin_list *old_entry, *head, *tail, *entry;
++
++	cfg->irq_2_pin = NULL;
++	old_entry = old_cfg->irq_2_pin;
++	if (!old_entry)
++		return;
++
++	entry = get_one_free_irq_2_pin(cpu);
++	if (!entry)
++		return;
++
++	entry->apic	= old_entry->apic;
++	entry->pin	= old_entry->pin;
++	head		= entry;
++	tail		= entry;
++	old_entry	= old_entry->next;
++	while (old_entry) {
++		entry = get_one_free_irq_2_pin(cpu);
++		if (!entry) {
++			entry = head;
++			while (entry) {
++				head = entry->next;
++				kfree(entry);
++				entry = head;
++			}
++			/* still use the old one */
++			return;
++		}
++		entry->apic	= old_entry->apic;
++		entry->pin	= old_entry->pin;
++		tail->next	= entry;
++		tail		= entry;
++		old_entry	= old_entry->next;
++	}
++
++	tail->next = NULL;
++	cfg->irq_2_pin = head;
++}
++
++static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
++{
++	struct irq_pin_list *entry, *next;
++
++	if (old_cfg->irq_2_pin == cfg->irq_2_pin)
++		return;
++
++	entry = old_cfg->irq_2_pin;
++
++	while (entry) {
++		next = entry->next;
++		kfree(entry);
++		entry = next;
++	}
++	old_cfg->irq_2_pin = NULL;
++}
++
++void arch_init_copy_chip_data(struct irq_desc *old_desc,
++				 struct irq_desc *desc, int cpu)
++{
++	struct irq_cfg *cfg;
++	struct irq_cfg *old_cfg;
++
++	cfg = get_one_free_irq_cfg(cpu);
++
++	if (!cfg)
++		return;
++
++	desc->chip_data = cfg;
++
++	old_cfg = old_desc->chip_data;
++
++	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
++
++	init_copy_irq_2_pin(old_cfg, cfg, cpu);
++}
++
++static void free_irq_cfg(struct irq_cfg *old_cfg)
++{
++	kfree(old_cfg);
++}
++
++void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
++{
++	struct irq_cfg *old_cfg, *cfg;
++
++	old_cfg = old_desc->chip_data;
++	cfg = desc->chip_data;
++
++	if (old_cfg == cfg)
++		return;
++
++	if (old_cfg) {
++		free_irq_2_pin(old_cfg, cfg);
++		free_irq_cfg(old_cfg);
++		old_desc->chip_data = NULL;
++	}
++}
++
++static void
++set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
++{
++	struct irq_cfg *cfg = desc->chip_data;
++
++	if (!cfg->move_in_progress) {
++		/* it means that domain is not changed */
++		if (!cpumask_intersects(desc->affinity, mask))
++			cfg->move_desc_pending = 1;
++	}
++}
++#endif
++
++#else
++static struct irq_cfg *irq_cfg(unsigned int irq)
++{
++	return irq < nr_irqs ? irq_cfgx + irq : NULL;
++}
++
++#endif
++
++#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
++static inline void
++set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
++{
++}
++#endif
++
++struct io_apic {
++	unsigned int index;
++	unsigned int unused[3];
++	unsigned int data;
++	unsigned int unused2[11];
++	unsigned int eoi;
++};
++
++static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
++{
++	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
++		+ (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
++}
++
++static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
++{
++	struct io_apic __iomem *io_apic = io_apic_base(apic);
++	writel(vector, &io_apic->eoi);
++}
++
++static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
++{
++	struct io_apic __iomem *io_apic = io_apic_base(apic);
++	writel(reg, &io_apic->index);
++	return readl(&io_apic->data);
++}
++
++static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
++{
++	struct io_apic __iomem *io_apic = io_apic_base(apic);
++	writel(reg, &io_apic->index);
++	writel(value, &io_apic->data);
++}
++
++/*
++ * Re-write a value: to be used for read-modify-write
++ * cycles where the read already set up the index register.
++ *
++ * Older SiS APIC requires we rewrite the index register
++ */
++static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
++{
++	struct io_apic __iomem *io_apic = io_apic_base(apic);
++
++	if (sis_apic_bug)
++		writel(reg, &io_apic->index);
++	writel(value, &io_apic->data);
++}
++
++static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
++{
++	struct irq_pin_list *entry;
++	unsigned long flags;
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	entry = cfg->irq_2_pin;
++	for (;;) {
++		unsigned int reg;
++		int pin;
++
++		if (!entry)
++			break;
++		pin = entry->pin;
++		reg = io_apic_read(entry->apic, 0x10 + pin*2);
++		/* Is the remote IRR bit set? */
++		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
++			spin_unlock_irqrestore(&ioapic_lock, flags);
++			return true;
++		}
++		if (!entry->next)
++			break;
++		entry = entry->next;
++	}
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	return false;
++}
++
++union entry_union {
++	struct { u32 w1, w2; };
++	struct IO_APIC_route_entry entry;
++};
++
++static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
++{
++	union entry_union eu;
++	unsigned long flags;
++	spin_lock_irqsave(&ioapic_lock, flags);
++	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
++	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++	return eu.entry;
++}
++
++/*
++ * When we write a new IO APIC routing entry, we need to write the high
++ * word first! If the mask bit in the low word is clear, we will enable
++ * the interrupt, and we need to make sure the entry is fully populated
++ * before that happens.
++ */
++static void
++__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
++{
++	union entry_union eu;
++	eu.entry = e;
++	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++}
++
++void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
++{
++	unsigned long flags;
++	spin_lock_irqsave(&ioapic_lock, flags);
++	__ioapic_write_entry(apic, pin, e);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++/*
++ * When we mask an IO APIC routing entry, we need to write the low
++ * word first, in order to set the mask bit before we change the
++ * high bits!
++ */
++static void ioapic_mask_entry(int apic, int pin)
++{
++	unsigned long flags;
++	union entry_union eu = { .entry.mask = 1 };
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
++	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++#ifdef CONFIG_SMP
++static void send_cleanup_vector(struct irq_cfg *cfg)
++{
++	cpumask_var_t cleanup_mask;
++
++	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
++		unsigned int i;
++		cfg->move_cleanup_count = 0;
++		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
++			cfg->move_cleanup_count++;
++		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
++			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
++	} else {
++		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
++		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
++		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
++		free_cpumask_var(cleanup_mask);
++	}
++	cfg->move_in_progress = 0;
++}
++
++static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
++{
++	int apic, pin;
++	struct irq_pin_list *entry;
++	u8 vector = cfg->vector;
++
++	entry = cfg->irq_2_pin;
++	for (;;) {
++		unsigned int reg;
++
++		if (!entry)
++			break;
++
++		apic = entry->apic;
++		pin = entry->pin;
++		/*
++		 * With interrupt-remapping, destination information comes
++		 * from interrupt-remapping table entry.
++		 */
++		if (!irq_remapped(irq))
++			io_apic_write(apic, 0x11 + pin*2, dest);
++		reg = io_apic_read(apic, 0x10 + pin*2);
++		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
++		reg |= vector;
++		io_apic_modify(apic, 0x10 + pin*2, reg);
++		if (!entry->next)
++			break;
++		entry = entry->next;
++	}
++}
++
++static int
++assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
++
++/*
++ * Either sets desc->affinity to a valid value, and returns
++ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
++ * leaves desc->affinity untouched.
++ */
++static unsigned int
++set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
++{
++	struct irq_cfg *cfg;
++	unsigned int irq;
++
++	if (!cpumask_intersects(mask, cpu_online_mask))
++		return BAD_APICID;
++
++	irq = desc->irq;
++	cfg = desc->chip_data;
++	if (assign_irq_vector(irq, cfg, mask))
++		return BAD_APICID;
++
++	/* check that before desc->addinity get updated */
++	set_extra_move_desc(desc, mask);
++
++	cpumask_copy(desc->affinity, mask);
++
++	return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
++}
++
++static void
++set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
++{
++	struct irq_cfg *cfg;
++	unsigned long flags;
++	unsigned int dest;
++	unsigned int irq;
++
++	irq = desc->irq;
++	cfg = desc->chip_data;
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	dest = set_desc_affinity(desc, mask);
++	if (dest != BAD_APICID) {
++		/* Only the high 8 bits are valid. */
++		dest = SET_APIC_LOGICAL_ID(dest);
++		__target_IO_APIC_irq(irq, dest, cfg);
++	}
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++static void
++set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
++{
++	struct irq_desc *desc;
++
++	desc = irq_to_desc(irq);
++
++	set_ioapic_affinity_irq_desc(desc, mask);
++}
++#endif /* CONFIG_SMP */
++
++/*
++ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
++ * shared ISA-space IRQs, so we have to support them. We are super
++ * fast in the common case, and fast for shared ISA-space IRQs.
++ */
++static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
++{
++	struct irq_pin_list *entry;
++
++	entry = cfg->irq_2_pin;
++	if (!entry) {
++		entry = get_one_free_irq_2_pin(cpu);
++		if (!entry) {
++			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
++					apic, pin);
++			return;
++		}
++		cfg->irq_2_pin = entry;
++		entry->apic = apic;
++		entry->pin = pin;
++		return;
++	}
++
++	while (entry->next) {
++		/* not again, please */
++		if (entry->apic == apic && entry->pin == pin)
++			return;
++
++		entry = entry->next;
++	}
++
++	entry->next = get_one_free_irq_2_pin(cpu);
++	entry = entry->next;
++	entry->apic = apic;
++	entry->pin = pin;
++}
++
++/*
++ * Reroute an IRQ to a different pin.
++ */
++static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
++				      int oldapic, int oldpin,
++				      int newapic, int newpin)
++{
++	struct irq_pin_list *entry = cfg->irq_2_pin;
++	int replaced = 0;
++
++	while (entry) {
++		if (entry->apic == oldapic && entry->pin == oldpin) {
++			entry->apic = newapic;
++			entry->pin = newpin;
++			replaced = 1;
++			/* every one is different, right? */
++			break;
++		}
++		entry = entry->next;
++	}
++
++	/* why? call replace before add? */
++	if (!replaced)
++		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
++}
++
++static inline void io_apic_modify_irq(struct irq_cfg *cfg,
++				int mask_and, int mask_or,
++				void (*final)(struct irq_pin_list *entry))
++{
++	int pin;
++	struct irq_pin_list *entry;
++
++	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
++		unsigned int reg;
++		pin = entry->pin;
++		reg = io_apic_read(entry->apic, 0x10 + pin * 2);
++		reg &= mask_and;
++		reg |= mask_or;
++		io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
++		if (final)
++			final(entry);
++	}
++}
++
++static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
++{
++	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
++}
++
++#ifdef CONFIG_X86_64
++static void io_apic_sync(struct irq_pin_list *entry)
++{
++	/*
++	 * Synchronize the IO-APIC and the CPU by doing
++	 * a dummy read from the IO-APIC
++	 */
++	struct io_apic __iomem *io_apic;
++	io_apic = io_apic_base(entry->apic);
++	readl(&io_apic->data);
++}
++
++static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
++{
++	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
++}
++#else /* CONFIG_X86_32 */
++static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
++{
++	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
++}
++
++static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
++{
++	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
++			IO_APIC_REDIR_MASKED, NULL);
++}
++
++static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
++{
++	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
++			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
++}
++#endif /* CONFIG_X86_32 */
++
++static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
++{
++	struct irq_cfg *cfg = desc->chip_data;
++	unsigned long flags;
++
++	BUG_ON(!cfg);
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	__mask_IO_APIC_irq(cfg);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
++{
++	struct irq_cfg *cfg = desc->chip_data;
++	unsigned long flags;
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	__unmask_IO_APIC_irq(cfg);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++static void mask_IO_APIC_irq(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	mask_IO_APIC_irq_desc(desc);
++}
++static void unmask_IO_APIC_irq(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	unmask_IO_APIC_irq_desc(desc);
++}
++
++static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
++{
++	struct IO_APIC_route_entry entry;
++
++	/* Check delivery_mode to be sure we're not clearing an SMI pin */
++	entry = ioapic_read_entry(apic, pin);
++	if (entry.delivery_mode == dest_SMI)
++		return;
++	/*
++	 * Disable it in the IO-APIC irq-routing table:
++	 */
++	ioapic_mask_entry(apic, pin);
++}
++
++static void clear_IO_APIC (void)
++{
++	int apic, pin;
++
++	for (apic = 0; apic < nr_ioapics; apic++)
++		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
++			clear_IO_APIC_pin(apic, pin);
++}
++
++#ifdef CONFIG_X86_32
++/*
++ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
++ * specific CPU-side IRQs.
++ */
++
++#define MAX_PIRQS 8
++static int pirq_entries[MAX_PIRQS] = {
++	[0 ... MAX_PIRQS - 1] = -1
++};
++
++static int __init ioapic_pirq_setup(char *str)
++{
++	int i, max;
++	int ints[MAX_PIRQS+1];
++
++	get_options(str, ARRAY_SIZE(ints), ints);
++
++	apic_printk(APIC_VERBOSE, KERN_INFO
++			"PIRQ redirection, working around broken MP-BIOS.\n");
++	max = MAX_PIRQS;
++	if (ints[0] < MAX_PIRQS)
++		max = ints[0];
++
++	for (i = 0; i < max; i++) {
++		apic_printk(APIC_VERBOSE, KERN_DEBUG
++				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
++		/*
++		 * PIRQs are mapped upside down, usually.
++		 */
++		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
++	}
++	return 1;
++}
++
++__setup("pirq=", ioapic_pirq_setup);
++#endif /* CONFIG_X86_32 */
++
++#ifdef CONFIG_INTR_REMAP
++/* I/O APIC RTE contents at the OS boot up */
++static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
++
++/*
++ * Saves all the IO-APIC RTE's
++ */
++int save_IO_APIC_setup(void)
++{
++	union IO_APIC_reg_01 reg_01;
++	unsigned long flags;
++	int apic, pin;
++
++	/*
++	 * The number of IO-APIC IRQ registers (== #pins):
++	 */
++	for (apic = 0; apic < nr_ioapics; apic++) {
++		spin_lock_irqsave(&ioapic_lock, flags);
++		reg_01.raw = io_apic_read(apic, 1);
++		spin_unlock_irqrestore(&ioapic_lock, flags);
++		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
++	}
++
++	for (apic = 0; apic < nr_ioapics; apic++) {
++		early_ioapic_entries[apic] =
++			kzalloc(sizeof(struct IO_APIC_route_entry) *
++				nr_ioapic_registers[apic], GFP_KERNEL);
++		if (!early_ioapic_entries[apic])
++			goto nomem;
++	}
++
++	for (apic = 0; apic < nr_ioapics; apic++)
++		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
++			early_ioapic_entries[apic][pin] =
++				ioapic_read_entry(apic, pin);
++
++	return 0;
++
++nomem:
++	while (apic >= 0)
++		kfree(early_ioapic_entries[apic--]);
++	memset(early_ioapic_entries, 0,
++		ARRAY_SIZE(early_ioapic_entries));
++
++	return -ENOMEM;
++}
++
++void mask_IO_APIC_setup(void)
++{
++	int apic, pin;
++
++	for (apic = 0; apic < nr_ioapics; apic++) {
++		if (!early_ioapic_entries[apic])
++			break;
++		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
++			struct IO_APIC_route_entry entry;
++
++			entry = early_ioapic_entries[apic][pin];
++			if (!entry.mask) {
++				entry.mask = 1;
++				ioapic_write_entry(apic, pin, entry);
++			}
++		}
++	}
++}
++
++void restore_IO_APIC_setup(void)
++{
++	int apic, pin;
++
++	for (apic = 0; apic < nr_ioapics; apic++) {
++		if (!early_ioapic_entries[apic])
++			break;
++		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
++			ioapic_write_entry(apic, pin,
++					   early_ioapic_entries[apic][pin]);
++		kfree(early_ioapic_entries[apic]);
++		early_ioapic_entries[apic] = NULL;
++	}
++}
++
++void reinit_intr_remapped_IO_APIC(int intr_remapping)
++{
++	/*
++	 * for now plain restore of previous settings.
++	 * TBD: In the case of OS enabling interrupt-remapping,
++	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
++	 * table entries. for now, do a plain restore, and wait for
++	 * the setup_IO_APIC_irqs() to do proper initialization.
++	 */
++	restore_IO_APIC_setup();
++}
++#endif
++
++/*
++ * Find the IRQ entry number of a certain pin.
++ */
++static int find_irq_entry(int apic, int pin, int type)
++{
++	int i;
++
++	for (i = 0; i < mp_irq_entries; i++)
++		if (mp_irqs[i].irqtype == type &&
++		    (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
++		     mp_irqs[i].dstapic == MP_APIC_ALL) &&
++		    mp_irqs[i].dstirq == pin)
++			return i;
++
++	return -1;
++}
++
++/*
++ * Find the pin to which IRQ[irq] (ISA) is connected
++ */
++static int __init find_isa_irq_pin(int irq, int type)
++{
++	int i;
++
++	for (i = 0; i < mp_irq_entries; i++) {
++		int lbus = mp_irqs[i].srcbus;
++
++		if (test_bit(lbus, mp_bus_not_pci) &&
++		    (mp_irqs[i].irqtype == type) &&
++		    (mp_irqs[i].srcbusirq == irq))
++
++			return mp_irqs[i].dstirq;
++	}
++	return -1;
++}
++
++static int __init find_isa_irq_apic(int irq, int type)
++{
++	int i;
++
++	for (i = 0; i < mp_irq_entries; i++) {
++		int lbus = mp_irqs[i].srcbus;
++
++		if (test_bit(lbus, mp_bus_not_pci) &&
++		    (mp_irqs[i].irqtype == type) &&
++		    (mp_irqs[i].srcbusirq == irq))
++			break;
++	}
++	if (i < mp_irq_entries) {
++		int apic;
++		for(apic = 0; apic < nr_ioapics; apic++) {
++			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
++				return apic;
++		}
++	}
++
++	return -1;
++}
++
++/*
++ * Find a specific PCI IRQ entry.
++ * Not an __init, possibly needed by modules
++ */
++static int pin_2_irq(int idx, int apic, int pin);
++
++int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
++{
++	int apic, i, best_guess = -1;
++
++	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
++		bus, slot, pin);
++	if (test_bit(bus, mp_bus_not_pci)) {
++		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
++		return -1;
++	}
++	for (i = 0; i < mp_irq_entries; i++) {
++		int lbus = mp_irqs[i].srcbus;
++
++		for (apic = 0; apic < nr_ioapics; apic++)
++			if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
++			    mp_irqs[i].dstapic == MP_APIC_ALL)
++				break;
++
++		if (!test_bit(lbus, mp_bus_not_pci) &&
++		    !mp_irqs[i].irqtype &&
++		    (bus == lbus) &&
++		    (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
++			int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
++
++			if (!(apic || IO_APIC_IRQ(irq)))
++				continue;
++
++			if (pin == (mp_irqs[i].srcbusirq & 3))
++				return irq;
++			/*
++			 * Use the first all-but-pin matching entry as a
++			 * best-guess fuzzy result for broken mptables.
++			 */
++			if (best_guess < 0)
++				best_guess = irq;
++		}
++	}
++	return best_guess;
++}
++
++EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
++
++#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
++/*
++ * EISA Edge/Level control register, ELCR
++ */
++static int EISA_ELCR(unsigned int irq)
++{
++	if (irq < NR_IRQS_LEGACY) {
++		unsigned int port = 0x4d0 + (irq >> 3);
++		return (inb(port) >> (irq & 7)) & 1;
++	}
++	apic_printk(APIC_VERBOSE, KERN_INFO
++			"Broken MPtable reports ISA irq %d\n", irq);
++	return 0;
++}
++
++#endif
++
++/* ISA interrupts are always polarity zero edge triggered,
++ * when listed as conforming in the MP table. */
++
++#define default_ISA_trigger(idx)	(0)
++#define default_ISA_polarity(idx)	(0)
++
++/* EISA interrupts are always polarity zero and can be edge or level
++ * trigger depending on the ELCR value.  If an interrupt is listed as
++ * EISA conforming in the MP table, that means its trigger type must
++ * be read in from the ELCR */
++
++#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].srcbusirq))
++#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
++
++/* PCI interrupts are always polarity one level triggered,
++ * when listed as conforming in the MP table. */
++
++#define default_PCI_trigger(idx)	(1)
++#define default_PCI_polarity(idx)	(1)
++
++/* MCA interrupts are always polarity zero level triggered,
++ * when listed as conforming in the MP table. */
++
++#define default_MCA_trigger(idx)	(1)
++#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
++
++static int MPBIOS_polarity(int idx)
++{
++	int bus = mp_irqs[idx].srcbus;
++	int polarity;
++
++	/*
++	 * Determine IRQ line polarity (high active or low active):
++	 */
++	switch (mp_irqs[idx].irqflag & 3)
++	{
++		case 0: /* conforms, ie. bus-type dependent polarity */
++			if (test_bit(bus, mp_bus_not_pci))
++				polarity = default_ISA_polarity(idx);
++			else
++				polarity = default_PCI_polarity(idx);
++			break;
++		case 1: /* high active */
++		{
++			polarity = 0;
++			break;
++		}
++		case 2: /* reserved */
++		{
++			printk(KERN_WARNING "broken BIOS!!\n");
++			polarity = 1;
++			break;
++		}
++		case 3: /* low active */
++		{
++			polarity = 1;
++			break;
++		}
++		default: /* invalid */
++		{
++			printk(KERN_WARNING "broken BIOS!!\n");
++			polarity = 1;
++			break;
++		}
++	}
++	return polarity;
++}
++
++static int MPBIOS_trigger(int idx)
++{
++	int bus = mp_irqs[idx].srcbus;
++	int trigger;
++
++	/*
++	 * Determine IRQ trigger mode (edge or level sensitive):
++	 */
++	switch ((mp_irqs[idx].irqflag>>2) & 3)
++	{
++		case 0: /* conforms, ie. bus-type dependent */
++			if (test_bit(bus, mp_bus_not_pci))
++				trigger = default_ISA_trigger(idx);
++			else
++				trigger = default_PCI_trigger(idx);
++#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
++			switch (mp_bus_id_to_type[bus]) {
++				case MP_BUS_ISA: /* ISA pin */
++				{
++					/* set before the switch */
++					break;
++				}
++				case MP_BUS_EISA: /* EISA pin */
++				{
++					trigger = default_EISA_trigger(idx);
++					break;
++				}
++				case MP_BUS_PCI: /* PCI pin */
++				{
++					/* set before the switch */
++					break;
++				}
++				case MP_BUS_MCA: /* MCA pin */
++				{
++					trigger = default_MCA_trigger(idx);
++					break;
++				}
++				default:
++				{
++					printk(KERN_WARNING "broken BIOS!!\n");
++					trigger = 1;
++					break;
++				}
++			}
++#endif
++			break;
++		case 1: /* edge */
++		{
++			trigger = 0;
++			break;
++		}
++		case 2: /* reserved */
++		{
++			printk(KERN_WARNING "broken BIOS!!\n");
++			trigger = 1;
++			break;
++		}
++		case 3: /* level */
++		{
++			trigger = 1;
++			break;
++		}
++		default: /* invalid */
++		{
++			printk(KERN_WARNING "broken BIOS!!\n");
++			trigger = 0;
++			break;
++		}
++	}
++	return trigger;
++}
++
++static inline int irq_polarity(int idx)
++{
++	return MPBIOS_polarity(idx);
++}
++
++static inline int irq_trigger(int idx)
++{
++	return MPBIOS_trigger(idx);
++}
++
++int (*ioapic_renumber_irq)(int ioapic, int irq);
++static int pin_2_irq(int idx, int apic, int pin)
++{
++	int irq, i;
++	int bus = mp_irqs[idx].srcbus;
++
++	/*
++	 * Debugging check, we are in big trouble if this message pops up!
++	 */
++	if (mp_irqs[idx].dstirq != pin)
++		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
++
++	if (test_bit(bus, mp_bus_not_pci)) {
++		irq = mp_irqs[idx].srcbusirq;
++	} else {
++		/*
++		 * PCI IRQs are mapped in order
++		 */
++		i = irq = 0;
++		while (i < apic)
++			irq += nr_ioapic_registers[i++];
++		irq += pin;
++		/*
++                 * For MPS mode, so far only needed by ES7000 platform
++                 */
++		if (ioapic_renumber_irq)
++			irq = ioapic_renumber_irq(apic, irq);
++	}
++
++#ifdef CONFIG_X86_32
++	/*
++	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
++	 */
++	if ((pin >= 16) && (pin <= 23)) {
++		if (pirq_entries[pin-16] != -1) {
++			if (!pirq_entries[pin-16]) {
++				apic_printk(APIC_VERBOSE, KERN_DEBUG
++						"disabling PIRQ%d\n", pin-16);
++			} else {
++				irq = pirq_entries[pin-16];
++				apic_printk(APIC_VERBOSE, KERN_DEBUG
++						"using PIRQ%d -> IRQ %d\n",
++						pin-16, irq);
++			}
++		}
++	}
++#endif
++
++	return irq;
++}
++
++void lock_vector_lock(void)
++{
++	/* Used to the online set of cpus does not change
++	 * during assign_irq_vector.
++	 */
++	spin_lock(&vector_lock);
++}
++
++void unlock_vector_lock(void)
++{
++	spin_unlock(&vector_lock);
++}
++
++static int
++__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
++{
++	/*
++	 * NOTE! The local APIC isn't very good at handling
++	 * multiple interrupts at the same interrupt level.
++	 * As the interrupt level is determined by taking the
++	 * vector number and shifting that right by 4, we
++	 * want to spread these out a bit so that they don't
++	 * all fall in the same interrupt level.
++	 *
++	 * Also, we've got to be careful not to trash gate
++	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
++	 */
++	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
++	unsigned int old_vector;
++	int cpu, err;
++	cpumask_var_t tmp_mask;
++
++	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
++		return -EBUSY;
++
++	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
++		return -ENOMEM;
++
++	old_vector = cfg->vector;
++	if (old_vector) {
++		cpumask_and(tmp_mask, mask, cpu_online_mask);
++		cpumask_and(tmp_mask, cfg->domain, tmp_mask);
++		if (!cpumask_empty(tmp_mask)) {
++			free_cpumask_var(tmp_mask);
++			return 0;
++		}
++	}
++
++	/* Only try and allocate irqs on cpus that are present */
++	err = -ENOSPC;
++	for_each_cpu_and(cpu, mask, cpu_online_mask) {
++		int new_cpu;
++		int vector, offset;
++
++		apic->vector_allocation_domain(cpu, tmp_mask);
++
++		vector = current_vector;
++		offset = current_offset;
++next:
++		vector += 8;
++		if (vector >= first_system_vector) {
++			/* If out of vectors on large boxen, must share them. */
++			offset = (offset + 1) % 8;
++			vector = FIRST_DEVICE_VECTOR + offset;
++		}
++		if (unlikely(current_vector == vector))
++			continue;
++
++		if (test_bit(vector, used_vectors))
++			goto next;
++
++		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
++			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
++				goto next;
++		/* Found one! */
++		current_vector = vector;
++		current_offset = offset;
++		if (old_vector) {
++			cfg->move_in_progress = 1;
++			cpumask_copy(cfg->old_domain, cfg->domain);
++		}
++		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
++			per_cpu(vector_irq, new_cpu)[vector] = irq;
++		cfg->vector = vector;
++		cpumask_copy(cfg->domain, tmp_mask);
++		err = 0;
++		break;
++	}
++	free_cpumask_var(tmp_mask);
++	return err;
++}
++
++static int
++assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
++{
++	int err;
++	unsigned long flags;
++
++	spin_lock_irqsave(&vector_lock, flags);
++	err = __assign_irq_vector(irq, cfg, mask);
++	spin_unlock_irqrestore(&vector_lock, flags);
++	return err;
++}
++
++static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
++{
++	int cpu, vector;
++
++	BUG_ON(!cfg->vector);
++
++	vector = cfg->vector;
++	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
++		per_cpu(vector_irq, cpu)[vector] = -1;
++
++	cfg->vector = 0;
++	cpumask_clear(cfg->domain);
++
++	if (likely(!cfg->move_in_progress))
++		return;
++	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
++		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
++								vector++) {
++			if (per_cpu(vector_irq, cpu)[vector] != irq)
++				continue;
++			per_cpu(vector_irq, cpu)[vector] = -1;
++			break;
++		}
++	}
++	cfg->move_in_progress = 0;
++}
++
++void __setup_vector_irq(int cpu)
++{
++	/* Initialize vector_irq on a new cpu */
++	/* This function must be called with vector_lock held */
++	int irq, vector;
++	struct irq_cfg *cfg;
++	struct irq_desc *desc;
++
++	/* Mark the inuse vectors */
++	for_each_irq_desc(irq, desc) {
++		cfg = desc->chip_data;
++		if (!cpumask_test_cpu(cpu, cfg->domain))
++			continue;
++		vector = cfg->vector;
++		per_cpu(vector_irq, cpu)[vector] = irq;
++	}
++	/* Mark the free vectors */
++	for (vector = 0; vector < NR_VECTORS; ++vector) {
++		irq = per_cpu(vector_irq, cpu)[vector];
++		if (irq < 0)
++			continue;
++
++		cfg = irq_cfg(irq);
++		if (!cpumask_test_cpu(cpu, cfg->domain))
++			per_cpu(vector_irq, cpu)[vector] = -1;
++	}
++}
++
++static struct irq_chip ioapic_chip;
++static struct irq_chip ir_ioapic_chip;
++
++#define IOAPIC_AUTO     -1
++#define IOAPIC_EDGE     0
++#define IOAPIC_LEVEL    1
++
++#ifdef CONFIG_X86_32
++static inline int IO_APIC_irq_trigger(int irq)
++{
++	int apic, idx, pin;
++
++	for (apic = 0; apic < nr_ioapics; apic++) {
++		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
++			idx = find_irq_entry(apic, pin, mp_INT);
++			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
++				return irq_trigger(idx);
++		}
++	}
++	/*
++         * nonexistent IRQs are edge default
++         */
++	return 0;
++}
++#else
++static inline int IO_APIC_irq_trigger(int irq)
++{
++	return 1;
++}
++#endif
++
++static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
++{
++
++	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
++	    trigger == IOAPIC_LEVEL)
++		desc->status |= IRQ_LEVEL;
++	else
++		desc->status &= ~IRQ_LEVEL;
++
++	if (irq_remapped(irq)) {
++		desc->status |= IRQ_MOVE_PCNTXT;
++		if (trigger)
++			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
++						      handle_fasteoi_irq,
++						     "fasteoi");
++		else
++			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
++						      handle_edge_irq, "edge");
++		return;
++	}
++
++	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
++	    trigger == IOAPIC_LEVEL)
++		set_irq_chip_and_handler_name(irq, &ioapic_chip,
++					      handle_fasteoi_irq,
++					      "fasteoi");
++	else
++		set_irq_chip_and_handler_name(irq, &ioapic_chip,
++					      handle_edge_irq, "edge");
++}
++
++int setup_ioapic_entry(int apic_id, int irq,
++		       struct IO_APIC_route_entry *entry,
++		       unsigned int destination, int trigger,
++		       int polarity, int vector, int pin)
++{
++	/*
++	 * add it to the IO-APIC irq-routing table:
++	 */
++	memset(entry,0,sizeof(*entry));
++
++	if (intr_remapping_enabled) {
++		struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
++		struct irte irte;
++		struct IR_IO_APIC_route_entry *ir_entry =
++			(struct IR_IO_APIC_route_entry *) entry;
++		int index;
++
++		if (!iommu)
++			panic("No mapping iommu for ioapic %d\n", apic_id);
++
++		index = alloc_irte(iommu, irq, 1);
++		if (index < 0)
++			panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
++
++		memset(&irte, 0, sizeof(irte));
++
++		irte.present = 1;
++		irte.dst_mode = apic->irq_dest_mode;
++		/*
++		 * Trigger mode in the IRTE will always be edge, and the
++		 * actual level or edge trigger will be setup in the IO-APIC
++		 * RTE. This will help simplify level triggered irq migration.
++		 * For more details, see the comments above explainig IO-APIC
++		 * irq migration in the presence of interrupt-remapping.
++		 */
++		irte.trigger_mode = 0;
++		irte.dlvry_mode = apic->irq_delivery_mode;
++		irte.vector = vector;
++		irte.dest_id = IRTE_DEST(destination);
++
++		modify_irte(irq, &irte);
++
++		ir_entry->index2 = (index >> 15) & 0x1;
++		ir_entry->zero = 0;
++		ir_entry->format = 1;
++		ir_entry->index = (index & 0x7fff);
++		/*
++		 * IO-APIC RTE will be configured with virtual vector.
++		 * irq handler will do the explicit EOI to the io-apic.
++		 */
++		ir_entry->vector = pin;
++	} else {
++		entry->delivery_mode = apic->irq_delivery_mode;
++		entry->dest_mode = apic->irq_dest_mode;
++		entry->dest = destination;
++		entry->vector = vector;
++	}
++
++	entry->mask = 0;				/* enable IRQ */
++	entry->trigger = trigger;
++	entry->polarity = polarity;
++
++	/* Mask level triggered irqs.
++	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
++	 */
++	if (trigger)
++		entry->mask = 1;
++	return 0;
++}
++
++static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
++			      int trigger, int polarity)
++{
++	struct irq_cfg *cfg;
++	struct IO_APIC_route_entry entry;
++	unsigned int dest;
++
++	if (!IO_APIC_IRQ(irq))
++		return;
++
++	cfg = desc->chip_data;
++
++	if (assign_irq_vector(irq, cfg, apic->target_cpus()))
++		return;
++
++	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
++
++	apic_printk(APIC_VERBOSE,KERN_DEBUG
++		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
++		    "IRQ %d Mode:%i Active:%i)\n",
++		    apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector,
++		    irq, trigger, polarity);
++
++
++	if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
++			       dest, trigger, polarity, cfg->vector, pin)) {
++		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
++		       mp_ioapics[apic_id].apicid, pin);
++		__clear_irq_vector(irq, cfg);
++		return;
++	}
++
++	ioapic_register_intr(irq, desc, trigger);
++	if (irq < NR_IRQS_LEGACY)
++		disable_8259A_irq(irq);
++
++	ioapic_write_entry(apic_id, pin, entry);
++}
++
++static void __init setup_IO_APIC_irqs(void)
++{
++	int apic_id, pin, idx, irq;
++	int notcon = 0;
++	struct irq_desc *desc;
++	struct irq_cfg *cfg;
++	int cpu = boot_cpu_id;
++
++	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
++
++	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
++		for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
++
++			idx = find_irq_entry(apic_id, pin, mp_INT);
++			if (idx == -1) {
++				if (!notcon) {
++					notcon = 1;
++					apic_printk(APIC_VERBOSE,
++						KERN_DEBUG " %d-%d",
++						mp_ioapics[apic_id].apicid, pin);
++				} else
++					apic_printk(APIC_VERBOSE, " %d-%d",
++						mp_ioapics[apic_id].apicid, pin);
++				continue;
++			}
++			if (notcon) {
++				apic_printk(APIC_VERBOSE,
++					" (apicid-pin) not connected\n");
++				notcon = 0;
++			}
++
++			irq = pin_2_irq(idx, apic_id, pin);
++
++			/*
++			 * Skip the timer IRQ if there's a quirk handler
++			 * installed and if it returns 1:
++			 */
++			if (apic->multi_timer_check &&
++					apic->multi_timer_check(apic_id, irq))
++				continue;
++
++			desc = irq_to_desc_alloc_cpu(irq, cpu);
++			if (!desc) {
++				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
++				continue;
++			}
++			cfg = desc->chip_data;
++			add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
++
++			setup_IO_APIC_irq(apic_id, pin, irq, desc,
++					irq_trigger(idx), irq_polarity(idx));
++		}
++	}
++
++	if (notcon)
++		apic_printk(APIC_VERBOSE,
++			" (apicid-pin) not connected\n");
++}
++
++/*
++ * Set up the timer pin, possibly with the 8259A-master behind.
++ */
++static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
++					int vector)
++{
++	struct IO_APIC_route_entry entry;
++
++	if (intr_remapping_enabled)
++		return;
++
++	memset(&entry, 0, sizeof(entry));
++
++	/*
++	 * We use logical delivery to get the timer IRQ
++	 * to the first CPU.
++	 */
++	entry.dest_mode = apic->irq_dest_mode;
++	entry.mask = 0;			/* don't mask IRQ for edge */
++	entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
++	entry.delivery_mode = apic->irq_delivery_mode;
++	entry.polarity = 0;
++	entry.trigger = 0;
++	entry.vector = vector;
++
++	/*
++	 * The timer IRQ doesn't have to know that behind the
++	 * scene we may have a 8259A-master in AEOI mode ...
++	 */
++	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
++
++	/*
++	 * Add it to the IO-APIC irq-routing table:
++	 */
++	ioapic_write_entry(apic_id, pin, entry);
++}
++
++
++__apicdebuginit(void) print_IO_APIC(void)
++{
++	int apic, i;
++	union IO_APIC_reg_00 reg_00;
++	union IO_APIC_reg_01 reg_01;
++	union IO_APIC_reg_02 reg_02;
++	union IO_APIC_reg_03 reg_03;
++	unsigned long flags;
++	struct irq_cfg *cfg;
++	struct irq_desc *desc;
++	unsigned int irq;
++
++	if (apic_verbosity == APIC_QUIET)
++		return;
++
++	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
++	for (i = 0; i < nr_ioapics; i++)
++		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
++		       mp_ioapics[i].apicid, nr_ioapic_registers[i]);
++
++	/*
++	 * We are a bit conservative about what we expect.  We have to
++	 * know about every hardware change ASAP.
++	 */
++	printk(KERN_INFO "testing the IO APIC.......................\n");
++
++	for (apic = 0; apic < nr_ioapics; apic++) {
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	reg_00.raw = io_apic_read(apic, 0);
++	reg_01.raw = io_apic_read(apic, 1);
++	if (reg_01.bits.version >= 0x10)
++		reg_02.raw = io_apic_read(apic, 2);
++	if (reg_01.bits.version >= 0x20)
++		reg_03.raw = io_apic_read(apic, 3);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	printk("\n");
++	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
++	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
++	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
++	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
++	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
++
++	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
++	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
++
++	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
++	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
++
++	/*
++	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
++	 * but the value of reg_02 is read as the previous read register
++	 * value, so ignore it if reg_02 == reg_01.
++	 */
++	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
++		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
++		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
++	}
++
++	/*
++	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
++	 * or reg_03, but the value of reg_0[23] is read as the previous read
++	 * register value, so ignore it if reg_03 == reg_0[12].
++	 */
++	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
++	    reg_03.raw != reg_01.raw) {
++		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
++		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
++	}
++
++	printk(KERN_DEBUG ".... IRQ redirection table:\n");
++
++	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
++			  " Stat Dmod Deli Vect:   \n");
++
++	for (i = 0; i <= reg_01.bits.entries; i++) {
++		struct IO_APIC_route_entry entry;
++
++		entry = ioapic_read_entry(apic, i);
++
++		printk(KERN_DEBUG " %02x %03X ",
++			i,
++			entry.dest
++		);
++
++		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
++			entry.mask,
++			entry.trigger,
++			entry.irr,
++			entry.polarity,
++			entry.delivery_status,
++			entry.dest_mode,
++			entry.delivery_mode,
++			entry.vector
++		);
++	}
++	}
++	printk(KERN_DEBUG "IRQ to pin mappings:\n");
++	for_each_irq_desc(irq, desc) {
++		struct irq_pin_list *entry;
++
++		cfg = desc->chip_data;
++		entry = cfg->irq_2_pin;
++		if (!entry)
++			continue;
++		printk(KERN_DEBUG "IRQ%d ", irq);
++		for (;;) {
++			printk("-> %d:%d", entry->apic, entry->pin);
++			if (!entry->next)
++				break;
++			entry = entry->next;
++		}
++		printk("\n");
++	}
++
++	printk(KERN_INFO ".................................... done.\n");
++
++	return;
++}
++
++__apicdebuginit(void) print_APIC_bitfield(int base)
++{
++	unsigned int v;
++	int i, j;
++
++	if (apic_verbosity == APIC_QUIET)
++		return;
++
++	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
++	for (i = 0; i < 8; i++) {
++		v = apic_read(base + i*0x10);
++		for (j = 0; j < 32; j++) {
++			if (v & (1<<j))
++				printk("1");
++			else
++				printk("0");
++		}
++		printk("\n");
++	}
++}
++
++__apicdebuginit(void) print_local_APIC(void *dummy)
++{
++	unsigned int v, ver, maxlvt;
++	u64 icr;
++
++	if (apic_verbosity == APIC_QUIET)
++		return;
++
++	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
++		smp_processor_id(), hard_smp_processor_id());
++	v = apic_read(APIC_ID);
++	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
++	v = apic_read(APIC_LVR);
++	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
++	ver = GET_APIC_VERSION(v);
++	maxlvt = lapic_get_maxlvt();
++
++	v = apic_read(APIC_TASKPRI);
++	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
++
++	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
++		if (!APIC_XAPIC(ver)) {
++			v = apic_read(APIC_ARBPRI);
++			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
++			       v & APIC_ARBPRI_MASK);
++		}
++		v = apic_read(APIC_PROCPRI);
++		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
++	}
++
++	/*
++	 * Remote read supported only in the 82489DX and local APIC for
++	 * Pentium processors.
++	 */
++	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
++		v = apic_read(APIC_RRR);
++		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
++	}
++
++	v = apic_read(APIC_LDR);
++	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
++	if (!x2apic_enabled()) {
++		v = apic_read(APIC_DFR);
++		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
++	}
++	v = apic_read(APIC_SPIV);
++	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
++
++	printk(KERN_DEBUG "... APIC ISR field:\n");
++	print_APIC_bitfield(APIC_ISR);
++	printk(KERN_DEBUG "... APIC TMR field:\n");
++	print_APIC_bitfield(APIC_TMR);
++	printk(KERN_DEBUG "... APIC IRR field:\n");
++	print_APIC_bitfield(APIC_IRR);
++
++	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
++		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
++			apic_write(APIC_ESR, 0);
++
++		v = apic_read(APIC_ESR);
++		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
++	}
++
++	icr = apic_icr_read();
++	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
++	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
++
++	v = apic_read(APIC_LVTT);
++	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
++
++	if (maxlvt > 3) {                       /* PC is LVT#4. */
++		v = apic_read(APIC_LVTPC);
++		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
++	}
++	v = apic_read(APIC_LVT0);
++	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
++	v = apic_read(APIC_LVT1);
++	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
++
++	if (maxlvt > 2) {			/* ERR is LVT#3. */
++		v = apic_read(APIC_LVTERR);
++		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
++	}
++
++	v = apic_read(APIC_TMICT);
++	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
++	v = apic_read(APIC_TMCCT);
++	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
++	v = apic_read(APIC_TDCR);
++	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
++	printk("\n");
++}
++
++__apicdebuginit(void) print_all_local_APICs(void)
++{
++	int cpu;
++
++	preempt_disable();
++	for_each_online_cpu(cpu)
++		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
++	preempt_enable();
++}
++
++__apicdebuginit(void) print_PIC(void)
++{
++	unsigned int v;
++	unsigned long flags;
++
++	if (apic_verbosity == APIC_QUIET)
++		return;
++
++	printk(KERN_DEBUG "\nprinting PIC contents\n");
++
++	spin_lock_irqsave(&i8259A_lock, flags);
++
++	v = inb(0xa1) << 8 | inb(0x21);
++	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
++
++	v = inb(0xa0) << 8 | inb(0x20);
++	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
++
++	outb(0x0b,0xa0);
++	outb(0x0b,0x20);
++	v = inb(0xa0) << 8 | inb(0x20);
++	outb(0x0a,0xa0);
++	outb(0x0a,0x20);
++
++	spin_unlock_irqrestore(&i8259A_lock, flags);
++
++	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
++
++	v = inb(0x4d1) << 8 | inb(0x4d0);
++	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
++}
++
++__apicdebuginit(int) print_all_ICs(void)
++{
++	print_PIC();
++	print_all_local_APICs();
++	print_IO_APIC();
++
++	return 0;
++}
++
++fs_initcall(print_all_ICs);
++
++
++/* Where if anywhere is the i8259 connect in external int mode */
++static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
++
++void __init enable_IO_APIC(void)
++{
++	union IO_APIC_reg_01 reg_01;
++	int i8259_apic, i8259_pin;
++	int apic;
++	unsigned long flags;
++
++	/*
++	 * The number of IO-APIC IRQ registers (== #pins):
++	 */
++	for (apic = 0; apic < nr_ioapics; apic++) {
++		spin_lock_irqsave(&ioapic_lock, flags);
++		reg_01.raw = io_apic_read(apic, 1);
++		spin_unlock_irqrestore(&ioapic_lock, flags);
++		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
++	}
++	for(apic = 0; apic < nr_ioapics; apic++) {
++		int pin;
++		/* See if any of the pins is in ExtINT mode */
++		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
++			struct IO_APIC_route_entry entry;
++			entry = ioapic_read_entry(apic, pin);
++
++			/* If the interrupt line is enabled and in ExtInt mode
++			 * I have found the pin where the i8259 is connected.
++			 */
++			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
++				ioapic_i8259.apic = apic;
++				ioapic_i8259.pin  = pin;
++				goto found_i8259;
++			}
++		}
++	}
++ found_i8259:
++	/* Look to see what if the MP table has reported the ExtINT */
++	/* If we could not find the appropriate pin by looking at the ioapic
++	 * the i8259 probably is not connected the ioapic but give the
++	 * mptable a chance anyway.
++	 */
++	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
++	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
++	/* Trust the MP table if nothing is setup in the hardware */
++	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
++		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
++		ioapic_i8259.pin  = i8259_pin;
++		ioapic_i8259.apic = i8259_apic;
++	}
++	/* Complain if the MP table and the hardware disagree */
++	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
++		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
++	{
++		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
++	}
++
++	/*
++	 * Do not trust the IO-APIC being empty at bootup
++	 */
++	clear_IO_APIC();
++}
++
++/*
++ * Not an __init, needed by the reboot code
++ */
++void disable_IO_APIC(void)
++{
++	/*
++	 * Clear the IO-APIC before rebooting:
++	 */
++	clear_IO_APIC();
++
++	/*
++	 * If the i8259 is routed through an IOAPIC
++	 * Put that IOAPIC in virtual wire mode
++	 * so legacy interrupts can be delivered.
++	 *
++	 * With interrupt-remapping, for now we will use virtual wire A mode,
++	 * as virtual wire B is little complex (need to configure both
++	 * IOAPIC RTE aswell as interrupt-remapping table entry).
++	 * As this gets called during crash dump, keep this simple for now.
++	 */
++	if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
++		struct IO_APIC_route_entry entry;
++
++		memset(&entry, 0, sizeof(entry));
++		entry.mask            = 0; /* Enabled */
++		entry.trigger         = 0; /* Edge */
++		entry.irr             = 0;
++		entry.polarity        = 0; /* High */
++		entry.delivery_status = 0;
++		entry.dest_mode       = 0; /* Physical */
++		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
++		entry.vector          = 0;
++		entry.dest            = read_apic_id();
++
++		/*
++		 * Add it to the IO-APIC irq-routing table:
++		 */
++		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
++	}
++
++	/*
++	 * Use virtual wire A mode when interrupt remapping is enabled.
++	 */
++	disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
++}
++
++#ifdef CONFIG_X86_32
++/*
++ * function to set the IO-APIC physical IDs based on the
++ * values stored in the MPC table.
++ *
++ * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
++ */
++
++static void __init setup_ioapic_ids_from_mpc(void)
++{
++	union IO_APIC_reg_00 reg_00;
++	physid_mask_t phys_id_present_map;
++	int apic_id;
++	int i;
++	unsigned char old_id;
++	unsigned long flags;
++
++	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
++		return;
++
++	/*
++	 * Don't check I/O APIC IDs for xAPIC systems.  They have
++	 * no meaning without the serial APIC bus.
++	 */
++	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
++		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
++		return;
++	/*
++	 * This is broken; anything with a real cpu count has to
++	 * circumvent this idiocy regardless.
++	 */
++	phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
++
++	/*
++	 * Set the IOAPIC ID to the value stored in the MPC table.
++	 */
++	for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
++
++		/* Read the register 0 value */
++		spin_lock_irqsave(&ioapic_lock, flags);
++		reg_00.raw = io_apic_read(apic_id, 0);
++		spin_unlock_irqrestore(&ioapic_lock, flags);
++
++		old_id = mp_ioapics[apic_id].apicid;
++
++		if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
++			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
++				apic_id, mp_ioapics[apic_id].apicid);
++			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
++				reg_00.bits.ID);
++			mp_ioapics[apic_id].apicid = reg_00.bits.ID;
++		}
++
++		/*
++		 * Sanity check, is the ID really free? Every APIC in a
++		 * system must have a unique ID or we get lots of nice
++		 * 'stuck on smp_invalidate_needed IPI wait' messages.
++		 */
++		if (apic->check_apicid_used(phys_id_present_map,
++					mp_ioapics[apic_id].apicid)) {
++			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
++				apic_id, mp_ioapics[apic_id].apicid);
++			for (i = 0; i < get_physical_broadcast(); i++)
++				if (!physid_isset(i, phys_id_present_map))
++					break;
++			if (i >= get_physical_broadcast())
++				panic("Max APIC ID exceeded!\n");
++			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
++				i);
++			physid_set(i, phys_id_present_map);
++			mp_ioapics[apic_id].apicid = i;
++		} else {
++			physid_mask_t tmp;
++			tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
++			apic_printk(APIC_VERBOSE, "Setting %d in the "
++					"phys_id_present_map\n",
++					mp_ioapics[apic_id].apicid);
++			physids_or(phys_id_present_map, phys_id_present_map, tmp);
++		}
++
++
++		/*
++		 * We need to adjust the IRQ routing table
++		 * if the ID changed.
++		 */
++		if (old_id != mp_ioapics[apic_id].apicid)
++			for (i = 0; i < mp_irq_entries; i++)
++				if (mp_irqs[i].dstapic == old_id)
++					mp_irqs[i].dstapic
++						= mp_ioapics[apic_id].apicid;
++
++		/*
++		 * Read the right value from the MPC table and
++		 * write it into the ID register.
++		 */
++		apic_printk(APIC_VERBOSE, KERN_INFO
++			"...changing IO-APIC physical APIC ID to %d ...",
++			mp_ioapics[apic_id].apicid);
++
++		reg_00.bits.ID = mp_ioapics[apic_id].apicid;
++		spin_lock_irqsave(&ioapic_lock, flags);
++		io_apic_write(apic_id, 0, reg_00.raw);
++		spin_unlock_irqrestore(&ioapic_lock, flags);
++
++		/*
++		 * Sanity check
++		 */
++		spin_lock_irqsave(&ioapic_lock, flags);
++		reg_00.raw = io_apic_read(apic_id, 0);
++		spin_unlock_irqrestore(&ioapic_lock, flags);
++		if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
++			printk("could not set ID!\n");
++		else
++			apic_printk(APIC_VERBOSE, " ok.\n");
++	}
++}
++#endif
++
++int no_timer_check __initdata;
++
++static int __init notimercheck(char *s)
++{
++	no_timer_check = 1;
++	return 1;
++}
++__setup("no_timer_check", notimercheck);
++
++/*
++ * There is a nasty bug in some older SMP boards, their mptable lies
++ * about the timer IRQ. We do the following to work around the situation:
++ *
++ *	- timer IRQ defaults to IO-APIC IRQ
++ *	- if this function detects that timer IRQs are defunct, then we fall
++ *	  back to ISA timer IRQs
++ */
++static int __init timer_irq_works(void)
++{
++	unsigned long t1 = jiffies;
++	unsigned long flags;
++
++	if (no_timer_check)
++		return 1;
++
++	local_save_flags(flags);
++	local_irq_enable();
++	/* Let ten ticks pass... */
++	mdelay((10 * 1000) / HZ);
++	local_irq_restore(flags);
++
++	/*
++	 * Expect a few ticks at least, to be sure some possible
++	 * glue logic does not lock up after one or two first
++	 * ticks in a non-ExtINT mode.  Also the local APIC
++	 * might have cached one ExtINT interrupt.  Finally, at
++	 * least one tick may be lost due to delays.
++	 */
++
++	/* jiffies wrap? */
++	if (time_after(jiffies, t1 + 4) &&
++	    time_before(jiffies, t1 + 16))
++		return 1;
++
++	return 0;
++}
++
++/*
++ * In the SMP+IOAPIC case it might happen that there are an unspecified
++ * number of pending IRQ events unhandled. These cases are very rare,
++ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
++ * better to do it this way as thus we do not have to be aware of
++ * 'pending' interrupts in the IRQ path, except at this point.
++ */
++/*
++ * Edge triggered needs to resend any interrupt
++ * that was delayed but this is now handled in the device
++ * independent code.
++ */
++
++/*
++ * Starting up a edge-triggered IO-APIC interrupt is
++ * nasty - we need to make sure that we get the edge.
++ * If it is already asserted for some reason, we need
++ * return 1 to indicate that is was pending.
++ *
++ * This is not complete - we should be able to fake
++ * an edge even if it isn't on the 8259A...
++ */
++
++static unsigned int startup_ioapic_irq(unsigned int irq)
++{
++	int was_pending = 0;
++	unsigned long flags;
++	struct irq_cfg *cfg;
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	if (irq < NR_IRQS_LEGACY) {
++		disable_8259A_irq(irq);
++		if (i8259A_irq_pending(irq))
++			was_pending = 1;
++	}
++	cfg = irq_cfg(irq);
++	__unmask_IO_APIC_irq(cfg);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	return was_pending;
++}
++
++#ifdef CONFIG_X86_64
++static int ioapic_retrigger_irq(unsigned int irq)
++{
++
++	struct irq_cfg *cfg = irq_cfg(irq);
++	unsigned long flags;
++
++	spin_lock_irqsave(&vector_lock, flags);
++	apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
++	spin_unlock_irqrestore(&vector_lock, flags);
++
++	return 1;
++}
++#else
++static int ioapic_retrigger_irq(unsigned int irq)
++{
++	apic->send_IPI_self(irq_cfg(irq)->vector);
++
++	return 1;
++}
++#endif
++
++/*
++ * Level and edge triggered IO-APIC interrupts need different handling,
++ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
++ * handled with the level-triggered descriptor, but that one has slightly
++ * more overhead. Level-triggered interrupts cannot be handled with the
++ * edge-triggered handler, without risking IRQ storms and other ugly
++ * races.
++ */
++
++#ifdef CONFIG_SMP
++
++#ifdef CONFIG_INTR_REMAP
++
++/*
++ * Migrate the IO-APIC irq in the presence of intr-remapping.
++ *
++ * For both level and edge triggered, irq migration is a simple atomic
++ * update(of vector and cpu destination) of IRTE and flush the hardware cache.
++ *
++ * For level triggered, we eliminate the io-apic RTE modification (with the
++ * updated vector information), by using a virtual vector (io-apic pin number).
++ * Real vector that is used for interrupting cpu will be coming from
++ * the interrupt-remapping table entry.
++ */
++static void
++migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
++{
++	struct irq_cfg *cfg;
++	struct irte irte;
++	unsigned int dest;
++	unsigned int irq;
++
++	if (!cpumask_intersects(mask, cpu_online_mask))
++		return;
++
++	irq = desc->irq;
++	if (get_irte(irq, &irte))
++		return;
++
++	cfg = desc->chip_data;
++	if (assign_irq_vector(irq, cfg, mask))
++		return;
++
++	set_extra_move_desc(desc, mask);
++
++	dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
++
++	irte.vector = cfg->vector;
++	irte.dest_id = IRTE_DEST(dest);
++
++	/*
++	 * Modified the IRTE and flushes the Interrupt entry cache.
++	 */
++	modify_irte(irq, &irte);
++
++	if (cfg->move_in_progress)
++		send_cleanup_vector(cfg);
++
++	cpumask_copy(desc->affinity, mask);
++}
++
++/*
++ * Migrates the IRQ destination in the process context.
++ */
++static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
++					    const struct cpumask *mask)
++{
++	migrate_ioapic_irq_desc(desc, mask);
++}
++static void set_ir_ioapic_affinity_irq(unsigned int irq,
++				       const struct cpumask *mask)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	set_ir_ioapic_affinity_irq_desc(desc, mask);
++}
++#else
++static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
++						   const struct cpumask *mask)
++{
++}
++#endif
++
++asmlinkage void smp_irq_move_cleanup_interrupt(void)
++{
++	unsigned vector, me;
++
++	ack_APIC_irq();
++	exit_idle();
++	irq_enter();
++
++	me = smp_processor_id();
++	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
++		unsigned int irq;
++		unsigned int irr;
++		struct irq_desc *desc;
++		struct irq_cfg *cfg;
++		irq = __get_cpu_var(vector_irq)[vector];
++
++		if (irq == -1)
++			continue;
++
++		desc = irq_to_desc(irq);
++		if (!desc)
++			continue;
++
++		cfg = irq_cfg(irq);
++		spin_lock(&desc->lock);
++		if (!cfg->move_cleanup_count)
++			goto unlock;
++
++		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
++			goto unlock;
++
++		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
++		/*
++		 * Check if the vector that needs to be cleanedup is
++		 * registered at the cpu's IRR. If so, then this is not
++		 * the best time to clean it up. Lets clean it up in the
++		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
++		 * to myself.
++		 */
++		if (irr  & (1 << (vector % 32))) {
++			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
++			goto unlock;
++		}
++		__get_cpu_var(vector_irq)[vector] = -1;
++		cfg->move_cleanup_count--;
++unlock:
++		spin_unlock(&desc->lock);
++	}
++
++	irq_exit();
++}
++
++static void irq_complete_move(struct irq_desc **descp)
++{
++	struct irq_desc *desc = *descp;
++	struct irq_cfg *cfg = desc->chip_data;
++	unsigned vector, me;
++
++	if (likely(!cfg->move_in_progress)) {
++#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
++		if (likely(!cfg->move_desc_pending))
++			return;
++
++		/* domain has not changed, but affinity did */
++		me = smp_processor_id();
++		if (cpumask_test_cpu(me, desc->affinity)) {
++			*descp = desc = move_irq_desc(desc, me);
++			/* get the new one */
++			cfg = desc->chip_data;
++			cfg->move_desc_pending = 0;
++		}
++#endif
++		return;
++	}
++
++	vector = ~get_irq_regs()->orig_ax;
++	me = smp_processor_id();
++
++	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
++#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
++		*descp = desc = move_irq_desc(desc, me);
++		/* get the new one */
++		cfg = desc->chip_data;
++#endif
++		send_cleanup_vector(cfg);
++	}
++}
++#else
++static inline void irq_complete_move(struct irq_desc **descp) {}
++#endif
++
++#ifdef CONFIG_INTR_REMAP
++static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
++{
++	int apic, pin;
++	struct irq_pin_list *entry;
++
++	entry = cfg->irq_2_pin;
++	for (;;) {
++
++		if (!entry)
++			break;
++
++		apic = entry->apic;
++		pin = entry->pin;
++		io_apic_eoi(apic, pin);
++		entry = entry->next;
++	}
++}
++
++static void
++eoi_ioapic_irq(struct irq_desc *desc)
++{
++	struct irq_cfg *cfg;
++	unsigned long flags;
++	unsigned int irq;
++
++	irq = desc->irq;
++	cfg = desc->chip_data;
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	__eoi_ioapic_irq(irq, cfg);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++}
++
++static void ack_x2apic_level(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	ack_x2APIC_irq();
++	eoi_ioapic_irq(desc);
++}
++
++static void ack_x2apic_edge(unsigned int irq)
++{
++	ack_x2APIC_irq();
++}
++
++#endif
++
++static void ack_apic_edge(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	irq_complete_move(&desc);
++	move_native_irq(irq);
++	ack_APIC_irq();
++}
++
++atomic_t irq_mis_count;
++
++static void ack_apic_level(unsigned int irq)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++#ifdef CONFIG_X86_32
++	unsigned long v;
++	int i;
++#endif
++	struct irq_cfg *cfg;
++	int do_unmask_irq = 0;
++
++	irq_complete_move(&desc);
++#ifdef CONFIG_GENERIC_PENDING_IRQ
++	/* If we are moving the irq we need to mask it */
++	if (unlikely(desc->status & IRQ_MOVE_PENDING) &&
++	    !(desc->status & IRQ_INPROGRESS)) {
++		do_unmask_irq = 1;
++		mask_IO_APIC_irq_desc(desc);
++	}
++#endif
++
++#ifdef CONFIG_X86_32
++	/*
++	* It appears there is an erratum which affects at least version 0x11
++	* of I/O APIC (that's the 82093AA and cores integrated into various
++	* chipsets).  Under certain conditions a level-triggered interrupt is
++	* erroneously delivered as edge-triggered one but the respective IRR
++	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
++	* message but it will never arrive and further interrupts are blocked
++	* from the source.  The exact reason is so far unknown, but the
++	* phenomenon was observed when two consecutive interrupt requests
++	* from a given source get delivered to the same CPU and the source is
++	* temporarily disabled in between.
++	*
++	* A workaround is to simulate an EOI message manually.  We achieve it
++	* by setting the trigger mode to edge and then to level when the edge
++	* trigger mode gets detected in the TMR of a local APIC for a
++	* level-triggered interrupt.  We mask the source for the time of the
++	* operation to prevent an edge-triggered interrupt escaping meanwhile.
++	* The idea is from Manfred Spraul.  --macro
++	*/
++	cfg = desc->chip_data;
++	i = cfg->vector;
++
++	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
++#endif
++
++	/*
++	 * We must acknowledge the irq before we move it or the acknowledge will
++	 * not propagate properly.
++	 */
++	ack_APIC_irq();
++
++	/* Now we can move and renable the irq */
++	if (unlikely(do_unmask_irq)) {
++		/* Only migrate the irq if the ack has been received.
++		 *
++		 * On rare occasions the broadcast level triggered ack gets
++		 * delayed going to ioapics, and if we reprogram the
++		 * vector while Remote IRR is still set the irq will never
++		 * fire again.
++		 *
++		 * To prevent this scenario we read the Remote IRR bit
++		 * of the ioapic.  This has two effects.
++		 * - On any sane system the read of the ioapic will
++		 *   flush writes (and acks) going to the ioapic from
++		 *   this cpu.
++		 * - We get to see if the ACK has actually been delivered.
++		 *
++		 * Based on failed experiments of reprogramming the
++		 * ioapic entry from outside of irq context starting
++		 * with masking the ioapic entry and then polling until
++		 * Remote IRR was clear before reprogramming the
++		 * ioapic I don't trust the Remote IRR bit to be
++		 * completey accurate.
++		 *
++		 * However there appears to be no other way to plug
++		 * this race, so if the Remote IRR bit is not
++		 * accurate and is causing problems then it is a hardware bug
++		 * and you can go talk to the chipset vendor about it.
++		 */
++		cfg = desc->chip_data;
++		if (!io_apic_level_ack_pending(cfg))
++			move_masked_irq(irq);
++		unmask_IO_APIC_irq_desc(desc);
++	}
++#if (defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)) && \
++	defined(CONFIG_PREEMPT_HARDIRQS)
++	/*
++	 * With threaded interrupts, we always have IRQ_INPROGRESS
++	 * when acking.
++	 */
++	else if (unlikely(desc->status & IRQ_MOVE_PENDING))
++		move_masked_irq(irq);
++#endif
++
++#ifdef CONFIG_X86_32
++	if (!(v & (1 << (i & 0x1f)))) {
++		atomic_inc(&irq_mis_count);
++		spin_lock(&ioapic_lock);
++		__mask_and_edge_IO_APIC_irq(cfg);
++		__unmask_and_level_IO_APIC_irq(cfg);
++		spin_unlock(&ioapic_lock);
++	}
++#endif
++}
++
++static struct irq_chip ioapic_chip __read_mostly = {
++	.name		= "IO-APIC",
++	.startup	= startup_ioapic_irq,
++	.mask		= mask_IO_APIC_irq,
++	.unmask		= unmask_IO_APIC_irq,
++	.ack		= ack_apic_edge,
++	.eoi		= ack_apic_level,
++#ifdef CONFIG_SMP
++	.set_affinity	= set_ioapic_affinity_irq,
++#endif
++	.retrigger	= ioapic_retrigger_irq,
++};
++
++static struct irq_chip ir_ioapic_chip __read_mostly = {
++	.name		= "IR-IO-APIC",
++	.startup	= startup_ioapic_irq,
++	.mask		= mask_IO_APIC_irq,
++	.unmask		= unmask_IO_APIC_irq,
++#ifdef CONFIG_INTR_REMAP
++	.ack		= ack_x2apic_edge,
++	.eoi		= ack_x2apic_level,
++#ifdef CONFIG_SMP
++	.set_affinity	= set_ir_ioapic_affinity_irq,
++#endif
++#endif
++	.retrigger	= ioapic_retrigger_irq,
++};
++
++static inline void init_IO_APIC_traps(void)
++{
++	int irq;
++	struct irq_desc *desc;
++	struct irq_cfg *cfg;
++
++	/*
++	 * NOTE! The local APIC isn't very good at handling
++	 * multiple interrupts at the same interrupt level.
++	 * As the interrupt level is determined by taking the
++	 * vector number and shifting that right by 4, we
++	 * want to spread these out a bit so that they don't
++	 * all fall in the same interrupt level.
++	 *
++	 * Also, we've got to be careful not to trash gate
++	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
++	 */
++	for_each_irq_desc(irq, desc) {
++		cfg = desc->chip_data;
++		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
++			/*
++			 * Hmm.. We don't have an entry for this,
++			 * so default to an old-fashioned 8259
++			 * interrupt if we can..
++			 */
++			if (irq < NR_IRQS_LEGACY)
++				make_8259A_irq(irq);
++			else
++				/* Strange. Oh, well.. */
++				desc->chip = &no_irq_chip;
++		}
++	}
++}
++
++/*
++ * The local APIC irq-chip implementation:
++ */
++
++static void mask_lapic_irq(unsigned int irq)
++{
++	unsigned long v;
++
++	v = apic_read(APIC_LVT0);
++	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
++}
++
++static void unmask_lapic_irq(unsigned int irq)
++{
++	unsigned long v;
++
++	v = apic_read(APIC_LVT0);
++	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
++}
++
++static void ack_lapic_irq(unsigned int irq)
++{
++	ack_APIC_irq();
++}
++
++static struct irq_chip lapic_chip __read_mostly = {
++	.name		= "local-APIC",
++	.mask		= mask_lapic_irq,
++	.unmask		= unmask_lapic_irq,
++	.ack		= ack_lapic_irq,
++};
++
++static void lapic_register_intr(int irq, struct irq_desc *desc)
++{
++	desc->status &= ~IRQ_LEVEL;
++	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
++				      "edge");
++}
++
++static void __init setup_nmi(void)
++{
++	/*
++	 * Dirty trick to enable the NMI watchdog ...
++	 * We put the 8259A master into AEOI mode and
++	 * unmask on all local APICs LVT0 as NMI.
++	 *
++	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
++	 * is from Maciej W. Rozycki - so we do not have to EOI from
++	 * the NMI handler or the timer interrupt.
++	 */
++	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
++
++	enable_NMI_through_LVT0();
++
++	apic_printk(APIC_VERBOSE, " done.\n");
++}
++
++/*
++ * This looks a bit hackish but it's about the only one way of sending
++ * a few INTA cycles to 8259As and any associated glue logic.  ICR does
++ * not support the ExtINT mode, unfortunately.  We need to send these
++ * cycles as some i82489DX-based boards have glue logic that keeps the
++ * 8259A interrupt line asserted until INTA.  --macro
++ */
++static inline void __init unlock_ExtINT_logic(void)
++{
++	int apic, pin, i;
++	struct IO_APIC_route_entry entry0, entry1;
++	unsigned char save_control, save_freq_select;
++
++	pin  = find_isa_irq_pin(8, mp_INT);
++	if (pin == -1) {
++		WARN_ON_ONCE(1);
++		return;
++	}
++	apic = find_isa_irq_apic(8, mp_INT);
++	if (apic == -1) {
++		WARN_ON_ONCE(1);
++		return;
++	}
++
++	entry0 = ioapic_read_entry(apic, pin);
++	clear_IO_APIC_pin(apic, pin);
++
++	memset(&entry1, 0, sizeof(entry1));
++
++	entry1.dest_mode = 0;			/* physical delivery */
++	entry1.mask = 0;			/* unmask IRQ now */
++	entry1.dest = hard_smp_processor_id();
++	entry1.delivery_mode = dest_ExtINT;
++	entry1.polarity = entry0.polarity;
++	entry1.trigger = 0;
++	entry1.vector = 0;
++
++	ioapic_write_entry(apic, pin, entry1);
++
++	save_control = CMOS_READ(RTC_CONTROL);
++	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
++	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
++		   RTC_FREQ_SELECT);
++	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
++
++	i = 100;
++	while (i-- > 0) {
++		mdelay(10);
++		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
++			i -= 10;
++	}
++
++	CMOS_WRITE(save_control, RTC_CONTROL);
++	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
++	clear_IO_APIC_pin(apic, pin);
++
++	ioapic_write_entry(apic, pin, entry0);
++}
++
++static int disable_timer_pin_1 __initdata;
++/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
++static int __init disable_timer_pin_setup(char *arg)
++{
++	disable_timer_pin_1 = 1;
++	return 0;
++}
++early_param("disable_timer_pin_1", disable_timer_pin_setup);
++
++int timer_through_8259 __initdata;
++
++/*
++ * This code may look a bit paranoid, but it's supposed to cooperate with
++ * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
++ * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
++ * fanatically on his truly buggy board.
++ *
++ * FIXME: really need to revamp this for all platforms.
++ */
++static inline void __init check_timer(void)
++{
++	struct irq_desc *desc = irq_to_desc(0);
++	struct irq_cfg *cfg = desc->chip_data;
++	int cpu = boot_cpu_id;
++	int apic1, pin1, apic2, pin2;
++	unsigned long flags;
++	int no_pin1 = 0;
++
++	local_irq_save(flags);
++
++	/*
++	 * get/set the timer IRQ vector:
++	 */
++	disable_8259A_irq(0);
++	assign_irq_vector(0, cfg, apic->target_cpus());
++
++	/*
++	 * As IRQ0 is to be enabled in the 8259A, the virtual
++	 * wire has to be disabled in the local APIC.  Also
++	 * timer interrupts need to be acknowledged manually in
++	 * the 8259A for the i82489DX when using the NMI
++	 * watchdog as that APIC treats NMIs as level-triggered.
++	 * The AEOI mode will finish them in the 8259A
++	 * automatically.
++	 */
++	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
++	init_8259A(1);
++#ifdef CONFIG_X86_32
++	{
++		unsigned int ver;
++
++		ver = apic_read(APIC_LVR);
++		ver = GET_APIC_VERSION(ver);
++		timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
++	}
++#endif
++
++	pin1  = find_isa_irq_pin(0, mp_INT);
++	apic1 = find_isa_irq_apic(0, mp_INT);
++	pin2  = ioapic_i8259.pin;
++	apic2 = ioapic_i8259.apic;
++
++	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
++		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
++		    cfg->vector, apic1, pin1, apic2, pin2);
++
++	/*
++	 * Some BIOS writers are clueless and report the ExtINTA
++	 * I/O APIC input from the cascaded 8259A as the timer
++	 * interrupt input.  So just in case, if only one pin
++	 * was found above, try it both directly and through the
++	 * 8259A.
++	 */
++	if (pin1 == -1) {
++		if (intr_remapping_enabled)
++			panic("BIOS bug: timer not connected to IO-APIC");
++		pin1 = pin2;
++		apic1 = apic2;
++		no_pin1 = 1;
++	} else if (pin2 == -1) {
++		pin2 = pin1;
++		apic2 = apic1;
++	}
++
++	if (pin1 != -1) {
++		/*
++		 * Ok, does IRQ0 through the IOAPIC work?
++		 */
++		if (no_pin1) {
++			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
++			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
++		} else {
++			/* for edge trigger, setup_IO_APIC_irq already
++			 * leave it unmasked.
++			 * so only need to unmask if it is level-trigger
++			 * do we really have level trigger timer?
++			 */
++			int idx;
++			idx = find_irq_entry(apic1, pin1, mp_INT);
++			if (idx != -1 && irq_trigger(idx))
++				unmask_IO_APIC_irq_desc(desc);
++		}
++		if (timer_irq_works()) {
++			if (nmi_watchdog == NMI_IO_APIC) {
++				setup_nmi();
++				enable_8259A_irq(0);
++			}
++			if (disable_timer_pin_1 > 0)
++				clear_IO_APIC_pin(0, pin1);
++			goto out;
++		}
++		if (intr_remapping_enabled)
++			panic("timer doesn't work through Interrupt-remapped IO-APIC");
++		local_irq_disable();
++		clear_IO_APIC_pin(apic1, pin1);
++		if (!no_pin1)
++			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
++				    "8254 timer not connected to IO-APIC\n");
++
++		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
++			    "(IRQ0) through the 8259A ...\n");
++		apic_printk(APIC_QUIET, KERN_INFO
++			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
++		/*
++		 * legacy devices should be connected to IO APIC #0
++		 */
++		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
++		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
++		enable_8259A_irq(0);
++		if (timer_irq_works()) {
++			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
++			timer_through_8259 = 1;
++			if (nmi_watchdog == NMI_IO_APIC) {
++				disable_8259A_irq(0);
++				setup_nmi();
++				enable_8259A_irq(0);
++			}
++			goto out;
++		}
++		/*
++		 * Cleanup, just in case ...
++		 */
++		local_irq_disable();
++		disable_8259A_irq(0);
++		clear_IO_APIC_pin(apic2, pin2);
++		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
++	}
++
++	if (nmi_watchdog == NMI_IO_APIC) {
++		apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
++			    "through the IO-APIC - disabling NMI Watchdog!\n");
++		nmi_watchdog = NMI_NONE;
++	}
++#ifdef CONFIG_X86_32
++	timer_ack = 0;
++#endif
++
++	apic_printk(APIC_QUIET, KERN_INFO
++		    "...trying to set up timer as Virtual Wire IRQ...\n");
++
++	lapic_register_intr(0, desc);
++	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
++	enable_8259A_irq(0);
++
++	if (timer_irq_works()) {
++		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
++		goto out;
++	}
++	local_irq_disable();
++	disable_8259A_irq(0);
++	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
++	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
++
++	apic_printk(APIC_QUIET, KERN_INFO
++		    "...trying to set up timer as ExtINT IRQ...\n");
++
++	init_8259A(0);
++	make_8259A_irq(0);
++	apic_write(APIC_LVT0, APIC_DM_EXTINT);
++
++	unlock_ExtINT_logic();
++
++	if (timer_irq_works()) {
++		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
++		goto out;
++	}
++	local_irq_disable();
++	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
++	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
++		"report.  Then try booting with the 'noapic' option.\n");
++out:
++	local_irq_restore(flags);
++}
++
++/*
++ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
++ * to devices.  However there may be an I/O APIC pin available for
++ * this interrupt regardless.  The pin may be left unconnected, but
++ * typically it will be reused as an ExtINT cascade interrupt for
++ * the master 8259A.  In the MPS case such a pin will normally be
++ * reported as an ExtINT interrupt in the MP table.  With ACPI
++ * there is no provision for ExtINT interrupts, and in the absence
++ * of an override it would be treated as an ordinary ISA I/O APIC
++ * interrupt, that is edge-triggered and unmasked by default.  We
++ * used to do this, but it caused problems on some systems because
++ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
++ * the same ExtINT cascade interrupt to drive the local APIC of the
++ * bootstrap processor.  Therefore we refrain from routing IRQ2 to
++ * the I/O APIC in all cases now.  No actual device should request
++ * it anyway.  --macro
++ */
++#define PIC_IRQS	(1 << PIC_CASCADE_IR)
++
++void __init setup_IO_APIC(void)
++{
++
++	/*
++	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
++	 */
++
++	io_apic_irqs = ~PIC_IRQS;
++
++	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
++	/*
++         * Set up IO-APIC IRQ routing.
++         */
++#ifdef CONFIG_X86_32
++	if (!acpi_ioapic)
++		setup_ioapic_ids_from_mpc();
++#endif
++	sync_Arb_IDs();
++	setup_IO_APIC_irqs();
++	init_IO_APIC_traps();
++	check_timer();
++}
++
++/*
++ *      Called after all the initialization is done. If we didnt find any
++ *      APIC bugs then we can allow the modify fast path
++ */
++
++static int __init io_apic_bug_finalize(void)
++{
++	if (sis_apic_bug == -1)
++		sis_apic_bug = 0;
++	return 0;
++}
++
++late_initcall(io_apic_bug_finalize);
++
++struct sysfs_ioapic_data {
++	struct sys_device dev;
++	struct IO_APIC_route_entry entry[0];
++};
++static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
++
++static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
++{
++	struct IO_APIC_route_entry *entry;
++	struct sysfs_ioapic_data *data;
++	int i;
++
++	data = container_of(dev, struct sysfs_ioapic_data, dev);
++	entry = data->entry;
++	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
++		*entry = ioapic_read_entry(dev->id, i);
++
++	return 0;
++}
++
++static int ioapic_resume(struct sys_device *dev)
++{
++	struct IO_APIC_route_entry *entry;
++	struct sysfs_ioapic_data *data;
++	unsigned long flags;
++	union IO_APIC_reg_00 reg_00;
++	int i;
++
++	data = container_of(dev, struct sysfs_ioapic_data, dev);
++	entry = data->entry;
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	reg_00.raw = io_apic_read(dev->id, 0);
++	if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
++		reg_00.bits.ID = mp_ioapics[dev->id].apicid;
++		io_apic_write(dev->id, 0, reg_00.raw);
++	}
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
++		ioapic_write_entry(dev->id, i, entry[i]);
++
++	return 0;
++}
++
++static struct sysdev_class ioapic_sysdev_class = {
++	.name = "ioapic",
++	.suspend = ioapic_suspend,
++	.resume = ioapic_resume,
++};
++
++static int __init ioapic_init_sysfs(void)
++{
++	struct sys_device * dev;
++	int i, size, error;
++
++	error = sysdev_class_register(&ioapic_sysdev_class);
++	if (error)
++		return error;
++
++	for (i = 0; i < nr_ioapics; i++ ) {
++		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
++			* sizeof(struct IO_APIC_route_entry);
++		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
++		if (!mp_ioapic_data[i]) {
++			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
++			continue;
++		}
++		dev = &mp_ioapic_data[i]->dev;
++		dev->id = i;
++		dev->cls = &ioapic_sysdev_class;
++		error = sysdev_register(dev);
++		if (error) {
++			kfree(mp_ioapic_data[i]);
++			mp_ioapic_data[i] = NULL;
++			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
++			continue;
++		}
++	}
++
++	return 0;
++}
++
++device_initcall(ioapic_init_sysfs);
++
++static int nr_irqs_gsi = NR_IRQS_LEGACY;
++/*
++ * Dynamic irq allocate and deallocation
++ */
++unsigned int create_irq_nr(unsigned int irq_want)
++{
++	/* Allocate an unused irq */
++	unsigned int irq;
++	unsigned int new;
++	unsigned long flags;
++	struct irq_cfg *cfg_new = NULL;
++	int cpu = boot_cpu_id;
++	struct irq_desc *desc_new = NULL;
++
++	irq = 0;
++	if (irq_want < nr_irqs_gsi)
++		irq_want = nr_irqs_gsi;
++
++	for (new = irq_want; new < nr_irqs; new++) {
++		desc_new = irq_to_desc_alloc_cpu(new, cpu);
++		if (!desc_new) {
++			printk(KERN_INFO "can not get irq_desc for %d\n", new);
++			continue;
++		}
++		cfg_new = desc_new->chip_data;
++
++		spin_lock_irqsave(&vector_lock, flags);
++		if (cfg_new->vector != 0) {
++			spin_unlock_irqrestore(&vector_lock, flags);
++			continue;
++		}
++		if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
++			irq = new;
++		spin_unlock_irqrestore(&vector_lock, flags);
++		break;
++	}
++
++	if (irq > 0) {
++		dynamic_irq_init(irq);
++		/* restore it, in case dynamic_irq_init clear it */
++		if (desc_new)
++			desc_new->chip_data = cfg_new;
++	}
++	return irq;
++}
++
++int create_irq(void)
++{
++	unsigned int irq_want;
++	int irq;
++
++	irq_want = nr_irqs_gsi;
++	irq = create_irq_nr(irq_want);
++
++	if (irq == 0)
++		irq = -1;
++
++	return irq;
++}
++
++void destroy_irq(unsigned int irq)
++{
++	unsigned long flags;
++	struct irq_cfg *cfg;
++	struct irq_desc *desc;
++
++	/* store it, in case dynamic_irq_cleanup clear it */
++	desc = irq_to_desc(irq);
++	cfg = desc->chip_data;
++	dynamic_irq_cleanup(irq);
++	/* connect back irq_cfg */
++	if (desc)
++		desc->chip_data = cfg;
++
++	free_irte(irq);
++	spin_lock_irqsave(&vector_lock, flags);
++	__clear_irq_vector(irq, cfg);
++	spin_unlock_irqrestore(&vector_lock, flags);
++}
++
++/*
++ * MSI message composition
++ */
++#ifdef CONFIG_PCI_MSI
++static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
++{
++	struct irq_cfg *cfg;
++	int err;
++	unsigned dest;
++
++	if (disable_apic)
++		return -ENXIO;
++
++	cfg = irq_cfg(irq);
++	err = assign_irq_vector(irq, cfg, apic->target_cpus());
++	if (err)
++		return err;
++
++	dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
++
++	if (irq_remapped(irq)) {
++		struct irte irte;
++		int ir_index;
++		u16 sub_handle;
++
++		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
++		BUG_ON(ir_index == -1);
++
++		memset (&irte, 0, sizeof(irte));
++
++		irte.present = 1;
++		irte.dst_mode = apic->irq_dest_mode;
++		irte.trigger_mode = 0; /* edge */
++		irte.dlvry_mode = apic->irq_delivery_mode;
++		irte.vector = cfg->vector;
++		irte.dest_id = IRTE_DEST(dest);
++
++		modify_irte(irq, &irte);
++
++		msg->address_hi = MSI_ADDR_BASE_HI;
++		msg->data = sub_handle;
++		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
++				  MSI_ADDR_IR_SHV |
++				  MSI_ADDR_IR_INDEX1(ir_index) |
++				  MSI_ADDR_IR_INDEX2(ir_index);
++	} else {
++		if (x2apic_enabled())
++			msg->address_hi = MSI_ADDR_BASE_HI |
++					  MSI_ADDR_EXT_DEST_ID(dest);
++		else
++			msg->address_hi = MSI_ADDR_BASE_HI;
++
++		msg->address_lo =
++			MSI_ADDR_BASE_LO |
++			((apic->irq_dest_mode == 0) ?
++				MSI_ADDR_DEST_MODE_PHYSICAL:
++				MSI_ADDR_DEST_MODE_LOGICAL) |
++			((apic->irq_delivery_mode != dest_LowestPrio) ?
++				MSI_ADDR_REDIRECTION_CPU:
++				MSI_ADDR_REDIRECTION_LOWPRI) |
++			MSI_ADDR_DEST_ID(dest);
++
++		msg->data =
++			MSI_DATA_TRIGGER_EDGE |
++			MSI_DATA_LEVEL_ASSERT |
++			((apic->irq_delivery_mode != dest_LowestPrio) ?
++				MSI_DATA_DELIVERY_FIXED:
++				MSI_DATA_DELIVERY_LOWPRI) |
++			MSI_DATA_VECTOR(cfg->vector);
++	}
++	return err;
++}
++
++#ifdef CONFIG_SMP
++static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irq_cfg *cfg;
++	struct msi_msg msg;
++	unsigned int dest;
++
++	dest = set_desc_affinity(desc, mask);
++	if (dest == BAD_APICID)
++		return;
++
++	cfg = desc->chip_data;
++
++	read_msi_msg_desc(desc, &msg);
++
++	msg.data &= ~MSI_DATA_VECTOR_MASK;
++	msg.data |= MSI_DATA_VECTOR(cfg->vector);
++	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
++	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
++
++	write_msi_msg_desc(desc, &msg);
++}
++#ifdef CONFIG_INTR_REMAP
++/*
++ * Migrate the MSI irq to another cpumask. This migration is
++ * done in the process context using interrupt-remapping hardware.
++ */
++static void
++ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irq_cfg *cfg = desc->chip_data;
++	unsigned int dest;
++	struct irte irte;
++
++	if (get_irte(irq, &irte))
++		return;
++
++	dest = set_desc_affinity(desc, mask);
++	if (dest == BAD_APICID)
++		return;
++
++	irte.vector = cfg->vector;
++	irte.dest_id = IRTE_DEST(dest);
++
++	/*
++	 * atomically update the IRTE with the new destination and vector.
++	 */
++	modify_irte(irq, &irte);
++
++	/*
++	 * After this point, all the interrupts will start arriving
++	 * at the new destination. So, time to cleanup the previous
++	 * vector allocation.
++	 */
++	if (cfg->move_in_progress)
++		send_cleanup_vector(cfg);
++}
++
++#endif
++#endif /* CONFIG_SMP */
++
++/*
++ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
++ * which implement the MSI or MSI-X Capability Structure.
++ */
++static struct irq_chip msi_chip = {
++	.name		= "PCI-MSI",
++	.unmask		= unmask_msi_irq,
++	.mask		= mask_msi_irq,
++	.ack		= ack_apic_edge,
++#ifdef CONFIG_SMP
++	.set_affinity	= set_msi_irq_affinity,
++#endif
++	.retrigger	= ioapic_retrigger_irq,
++};
++
++static struct irq_chip msi_ir_chip = {
++	.name		= "IR-PCI-MSI",
++	.unmask		= unmask_msi_irq,
++	.mask		= mask_msi_irq,
++#ifdef CONFIG_INTR_REMAP
++	.ack		= ack_x2apic_edge,
++#ifdef CONFIG_SMP
++	.set_affinity	= ir_set_msi_irq_affinity,
++#endif
++#endif
++	.retrigger	= ioapic_retrigger_irq,
++};
++
++/*
++ * Map the PCI dev to the corresponding remapping hardware unit
++ * and allocate 'nvec' consecutive interrupt-remapping table entries
++ * in it.
++ */
++static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
++{
++	struct intel_iommu *iommu;
++	int index;
++
++	iommu = map_dev_to_ir(dev);
++	if (!iommu) {
++		printk(KERN_ERR
++		       "Unable to map PCI %s to iommu\n", pci_name(dev));
++		return -ENOENT;
++	}
++
++	index = alloc_irte(iommu, irq, nvec);
++	if (index < 0) {
++		printk(KERN_ERR
++		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
++		       pci_name(dev));
++		return -ENOSPC;
++	}
++	return index;
++}
++
++static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
++{
++	int ret;
++	struct msi_msg msg;
++
++	ret = msi_compose_msg(dev, irq, &msg);
++	if (ret < 0)
++		return ret;
++
++	set_irq_msi(irq, msidesc);
++	write_msi_msg(irq, &msg);
++
++	if (irq_remapped(irq)) {
++		struct irq_desc *desc = irq_to_desc(irq);
++		/*
++		 * irq migration in process context
++		 */
++		desc->status |= IRQ_MOVE_PCNTXT;
++		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
++	} else
++		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
++
++	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
++
++	return 0;
++}
++
++int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
++{
++	unsigned int irq;
++	int ret, sub_handle;
++	struct msi_desc *msidesc;
++	unsigned int irq_want;
++	struct intel_iommu *iommu = NULL;
++	int index = 0;
++
++	irq_want = nr_irqs_gsi;
++	sub_handle = 0;
++	list_for_each_entry(msidesc, &dev->msi_list, list) {
++		irq = create_irq_nr(irq_want);
++		if (irq == 0)
++			return -1;
++		irq_want = irq + 1;
++		if (!intr_remapping_enabled)
++			goto no_ir;
++
++		if (!sub_handle) {
++			/*
++			 * allocate the consecutive block of IRTE's
++			 * for 'nvec'
++			 */
++			index = msi_alloc_irte(dev, irq, nvec);
++			if (index < 0) {
++				ret = index;
++				goto error;
++			}
++		} else {
++			iommu = map_dev_to_ir(dev);
++			if (!iommu) {
++				ret = -ENOENT;
++				goto error;
++			}
++			/*
++			 * setup the mapping between the irq and the IRTE
++			 * base index, the sub_handle pointing to the
++			 * appropriate interrupt remap table entry.
++			 */
++			set_irte_irq(irq, iommu, index, sub_handle);
++		}
++no_ir:
++		ret = setup_msi_irq(dev, msidesc, irq);
++		if (ret < 0)
++			goto error;
++		sub_handle++;
++	}
++	return 0;
++
++error:
++	destroy_irq(irq);
++	return ret;
++}
++
++void arch_teardown_msi_irq(unsigned int irq)
++{
++	destroy_irq(irq);
++}
++
++#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
++#ifdef CONFIG_SMP
++static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irq_cfg *cfg;
++	struct msi_msg msg;
++	unsigned int dest;
++
++	dest = set_desc_affinity(desc, mask);
++	if (dest == BAD_APICID)
++		return;
++
++	cfg = desc->chip_data;
++
++	dmar_msi_read(irq, &msg);
++
++	msg.data &= ~MSI_DATA_VECTOR_MASK;
++	msg.data |= MSI_DATA_VECTOR(cfg->vector);
++	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
++	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
++
++	dmar_msi_write(irq, &msg);
++}
++
++#endif /* CONFIG_SMP */
++
++struct irq_chip dmar_msi_type = {
++	.name = "DMAR_MSI",
++	.unmask = dmar_msi_unmask,
++	.mask = dmar_msi_mask,
++	.ack = ack_apic_edge,
++#ifdef CONFIG_SMP
++	.set_affinity = dmar_msi_set_affinity,
++#endif
++	.retrigger = ioapic_retrigger_irq,
++};
++
++int arch_setup_dmar_msi(unsigned int irq)
++{
++	int ret;
++	struct msi_msg msg;
++
++	ret = msi_compose_msg(NULL, irq, &msg);
++	if (ret < 0)
++		return ret;
++	dmar_msi_write(irq, &msg);
++	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
++		"edge");
++	return 0;
++}
++#endif
++
++#ifdef CONFIG_HPET_TIMER
++
++#ifdef CONFIG_SMP
++static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irq_cfg *cfg;
++	struct msi_msg msg;
++	unsigned int dest;
++
++	dest = set_desc_affinity(desc, mask);
++	if (dest == BAD_APICID)
++		return;
++
++	cfg = desc->chip_data;
++
++	hpet_msi_read(irq, &msg);
++
++	msg.data &= ~MSI_DATA_VECTOR_MASK;
++	msg.data |= MSI_DATA_VECTOR(cfg->vector);
++	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
++	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
++
++	hpet_msi_write(irq, &msg);
++}
++
++#endif /* CONFIG_SMP */
++
++static struct irq_chip hpet_msi_type = {
++	.name = "HPET_MSI",
++	.unmask = hpet_msi_unmask,
++	.mask = hpet_msi_mask,
++	.ack = ack_apic_edge,
++#ifdef CONFIG_SMP
++	.set_affinity = hpet_msi_set_affinity,
++#endif
++	.retrigger = ioapic_retrigger_irq,
++};
++
++int arch_setup_hpet_msi(unsigned int irq)
++{
++	int ret;
++	struct msi_msg msg;
++
++	ret = msi_compose_msg(NULL, irq, &msg);
++	if (ret < 0)
++		return ret;
++
++	hpet_msi_write(irq, &msg);
++	set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
++		"edge");
++
++	return 0;
++}
++#endif
++
++#endif /* CONFIG_PCI_MSI */
++/*
++ * Hypertransport interrupt support
++ */
++#ifdef CONFIG_HT_IRQ
++
++#ifdef CONFIG_SMP
++
++static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
++{
++	struct ht_irq_msg msg;
++	fetch_ht_irq_msg(irq, &msg);
++
++	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
++	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
++
++	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
++	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
++
++	write_ht_irq_msg(irq, &msg);
++}
++
++static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irq_cfg *cfg;
++	unsigned int dest;
++
++	dest = set_desc_affinity(desc, mask);
++	if (dest == BAD_APICID)
++		return;
++
++	cfg = desc->chip_data;
++
++	target_ht_irq(irq, dest, cfg->vector);
++}
++
++#endif
++
++static struct irq_chip ht_irq_chip = {
++	.name		= "PCI-HT",
++	.mask		= mask_ht_irq,
++	.unmask		= unmask_ht_irq,
++	.ack		= ack_apic_edge,
++#ifdef CONFIG_SMP
++	.set_affinity	= set_ht_irq_affinity,
++#endif
++	.retrigger	= ioapic_retrigger_irq,
++};
++
++int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
++{
++	struct irq_cfg *cfg;
++	int err;
++
++	if (disable_apic)
++		return -ENXIO;
++
++	cfg = irq_cfg(irq);
++	err = assign_irq_vector(irq, cfg, apic->target_cpus());
++	if (!err) {
++		struct ht_irq_msg msg;
++		unsigned dest;
++
++		dest = apic->cpu_mask_to_apicid_and(cfg->domain,
++						    apic->target_cpus());
++
++		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
++
++		msg.address_lo =
++			HT_IRQ_LOW_BASE |
++			HT_IRQ_LOW_DEST_ID(dest) |
++			HT_IRQ_LOW_VECTOR(cfg->vector) |
++			((apic->irq_dest_mode == 0) ?
++				HT_IRQ_LOW_DM_PHYSICAL :
++				HT_IRQ_LOW_DM_LOGICAL) |
++			HT_IRQ_LOW_RQEOI_EDGE |
++			((apic->irq_delivery_mode != dest_LowestPrio) ?
++				HT_IRQ_LOW_MT_FIXED :
++				HT_IRQ_LOW_MT_ARBITRATED) |
++			HT_IRQ_LOW_IRQ_MASKED;
++
++		write_ht_irq_msg(irq, &msg);
++
++		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
++					      handle_edge_irq, "edge");
++
++		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
++	}
++	return err;
++}
++#endif /* CONFIG_HT_IRQ */
++
++#ifdef CONFIG_X86_UV
++/*
++ * Re-target the irq to the specified CPU and enable the specified MMR located
++ * on the specified blade to allow the sending of MSIs to the specified CPU.
++ */
++int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
++		       unsigned long mmr_offset)
++{
++	const struct cpumask *eligible_cpu = cpumask_of(cpu);
++	struct irq_cfg *cfg;
++	int mmr_pnode;
++	unsigned long mmr_value;
++	struct uv_IO_APIC_route_entry *entry;
++	unsigned long flags;
++	int err;
++
++	cfg = irq_cfg(irq);
++
++	err = assign_irq_vector(irq, cfg, eligible_cpu);
++	if (err != 0)
++		return err;
++
++	spin_lock_irqsave(&vector_lock, flags);
++	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
++				      irq_name);
++	spin_unlock_irqrestore(&vector_lock, flags);
++
++	mmr_value = 0;
++	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
++	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
++
++	entry->vector = cfg->vector;
++	entry->delivery_mode = apic->irq_delivery_mode;
++	entry->dest_mode = apic->irq_dest_mode;
++	entry->polarity = 0;
++	entry->trigger = 0;
++	entry->mask = 0;
++	entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
++
++	mmr_pnode = uv_blade_to_pnode(mmr_blade);
++	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
++
++	return irq;
++}
++
++/*
++ * Disable the specified MMR located on the specified blade so that MSIs are
++ * longer allowed to be sent.
++ */
++void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
++{
++	unsigned long mmr_value;
++	struct uv_IO_APIC_route_entry *entry;
++	int mmr_pnode;
++
++	mmr_value = 0;
++	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
++	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
++
++	entry->mask = 1;
++
++	mmr_pnode = uv_blade_to_pnode(mmr_blade);
++	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
++}
++#endif /* CONFIG_X86_64 */
++
++int __init io_apic_get_redir_entries (int ioapic)
++{
++	union IO_APIC_reg_01	reg_01;
++	unsigned long flags;
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	reg_01.raw = io_apic_read(ioapic, 1);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	return reg_01.bits.entries;
++}
++
++void __init probe_nr_irqs_gsi(void)
++{
++	int nr = 0;
++
++	nr = acpi_probe_gsi();
++	if (nr > nr_irqs_gsi) {
++		nr_irqs_gsi = nr;
++	} else {
++		/* for acpi=off or acpi is not compiled in */
++		int idx;
++
++		nr = 0;
++		for (idx = 0; idx < nr_ioapics; idx++)
++			nr += io_apic_get_redir_entries(idx) + 1;
++
++		if (nr > nr_irqs_gsi)
++			nr_irqs_gsi = nr;
++	}
++
++	printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
++}
++
++#ifdef CONFIG_SPARSE_IRQ
++int __init arch_probe_nr_irqs(void)
++{
++	int nr;
++
++	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
++		nr_irqs = NR_VECTORS * nr_cpu_ids;
++
++	nr = nr_irqs_gsi + 8 * nr_cpu_ids;
++#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
++	/*
++	 * for MSI and HT dyn irq
++	 */
++	nr += nr_irqs_gsi * 16;
++#endif
++	if (nr < nr_irqs)
++		nr_irqs = nr;
++
++	return 0;
++}
++#endif
++
++/* --------------------------------------------------------------------------
++                          ACPI-based IOAPIC Configuration
++   -------------------------------------------------------------------------- */
++
++#ifdef CONFIG_ACPI
++
++#ifdef CONFIG_X86_32
++int __init io_apic_get_unique_id(int ioapic, int apic_id)
++{
++	union IO_APIC_reg_00 reg_00;
++	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
++	physid_mask_t tmp;
++	unsigned long flags;
++	int i = 0;
++
++	/*
++	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
++	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
++	 * supports up to 16 on one shared APIC bus.
++	 *
++	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
++	 *      advantage of new APIC bus architecture.
++	 */
++
++	if (physids_empty(apic_id_map))
++		apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	reg_00.raw = io_apic_read(ioapic, 0);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	if (apic_id >= get_physical_broadcast()) {
++		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
++			"%d\n", ioapic, apic_id, reg_00.bits.ID);
++		apic_id = reg_00.bits.ID;
++	}
++
++	/*
++	 * Every APIC in a system must have a unique ID or we get lots of nice
++	 * 'stuck on smp_invalidate_needed IPI wait' messages.
++	 */
++	if (apic->check_apicid_used(apic_id_map, apic_id)) {
++
++		for (i = 0; i < get_physical_broadcast(); i++) {
++			if (!apic->check_apicid_used(apic_id_map, i))
++				break;
++		}
++
++		if (i == get_physical_broadcast())
++			panic("Max apic_id exceeded!\n");
++
++		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
++			"trying %d\n", ioapic, apic_id, i);
++
++		apic_id = i;
++	}
++
++	tmp = apic->apicid_to_cpu_present(apic_id);
++	physids_or(apic_id_map, apic_id_map, tmp);
++
++	if (reg_00.bits.ID != apic_id) {
++		reg_00.bits.ID = apic_id;
++
++		spin_lock_irqsave(&ioapic_lock, flags);
++		io_apic_write(ioapic, 0, reg_00.raw);
++		reg_00.raw = io_apic_read(ioapic, 0);
++		spin_unlock_irqrestore(&ioapic_lock, flags);
++
++		/* Sanity check */
++		if (reg_00.bits.ID != apic_id) {
++			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
++			return -1;
++		}
++	}
++
++	apic_printk(APIC_VERBOSE, KERN_INFO
++			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
++
++	return apic_id;
++}
++
++int __init io_apic_get_version(int ioapic)
++{
++	union IO_APIC_reg_01	reg_01;
++	unsigned long flags;
++
++	spin_lock_irqsave(&ioapic_lock, flags);
++	reg_01.raw = io_apic_read(ioapic, 1);
++	spin_unlock_irqrestore(&ioapic_lock, flags);
++
++	return reg_01.bits.version;
++}
++#endif
++
++int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
++{
++	struct irq_desc *desc;
++	struct irq_cfg *cfg;
++	int cpu = boot_cpu_id;
++
++	if (!IO_APIC_IRQ(irq)) {
++		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
++			ioapic);
++		return -EINVAL;
++	}
++
++	desc = irq_to_desc_alloc_cpu(irq, cpu);
++	if (!desc) {
++		printk(KERN_INFO "can not get irq_desc %d\n", irq);
++		return 0;
++	}
++
++	/*
++	 * IRQs < 16 are already in the irq_2_pin[] map
++	 */
++	if (irq >= NR_IRQS_LEGACY) {
++		cfg = desc->chip_data;
++		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
++	}
++
++	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
++
++	return 0;
++}
++
++
++int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
++{
++	int i;
++
++	if (skip_ioapic_setup)
++		return -1;
++
++	for (i = 0; i < mp_irq_entries; i++)
++		if (mp_irqs[i].irqtype == mp_INT &&
++		    mp_irqs[i].srcbusirq == bus_irq)
++			break;
++	if (i >= mp_irq_entries)
++		return -1;
++
++	*trigger = irq_trigger(i);
++	*polarity = irq_polarity(i);
++	return 0;
++}
++
++#endif /* CONFIG_ACPI */
++
++/*
++ * This function currently is only a helper for the i386 smp boot process where
++ * we need to reprogram the ioredtbls to cater for the cpus which have come online
++ * so mask in all cases should simply be apic->target_cpus()
++ */
++#ifdef CONFIG_SMP
++void __init setup_ioapic_dest(void)
++{
++	int pin, ioapic, irq, irq_entry;
++	struct irq_desc *desc;
++	struct irq_cfg *cfg;
++	const struct cpumask *mask;
++
++	if (skip_ioapic_setup == 1)
++		return;
++
++	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
++		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
++			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
++			if (irq_entry == -1)
++				continue;
++			irq = pin_2_irq(irq_entry, ioapic, pin);
++
++			/* setup_IO_APIC_irqs could fail to get vector for some device
++			 * when you have too many devices, because at that time only boot
++			 * cpu is online.
++			 */
++			desc = irq_to_desc(irq);
++			cfg = desc->chip_data;
++			if (!cfg->vector) {
++				setup_IO_APIC_irq(ioapic, pin, irq, desc,
++						  irq_trigger(irq_entry),
++						  irq_polarity(irq_entry));
++				continue;
++
++			}
++
++			/*
++			 * Honour affinities which have been set in early boot
++			 */
++			if (desc->status &
++			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
++				mask = desc->affinity;
++			else
++				mask = apic->target_cpus();
++
++			if (intr_remapping_enabled)
++				set_ir_ioapic_affinity_irq_desc(desc, mask);
++			else
++				set_ioapic_affinity_irq_desc(desc, mask);
++		}
++
++	}
++}
++#endif
++
++#define IOAPIC_RESOURCE_NAME_SIZE 11
++
++static struct resource *ioapic_resources;
++
++static struct resource * __init ioapic_setup_resources(void)
++{
++	unsigned long n;
++	struct resource *res;
++	char *mem;
++	int i;
++
++	if (nr_ioapics <= 0)
++		return NULL;
++
++	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
++	n *= nr_ioapics;
++
++	mem = alloc_bootmem(n);
++	res = (void *)mem;
++
++	if (mem != NULL) {
++		mem += sizeof(struct resource) * nr_ioapics;
++
++		for (i = 0; i < nr_ioapics; i++) {
++			res[i].name = mem;
++			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
++			sprintf(mem,  "IOAPIC %u", i);
++			mem += IOAPIC_RESOURCE_NAME_SIZE;
++		}
++	}
++
++	ioapic_resources = res;
++
++	return res;
++}
++
++void __init ioapic_init_mappings(void)
++{
++	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
++	struct resource *ioapic_res;
++	int i;
++
++	ioapic_res = ioapic_setup_resources();
++	for (i = 0; i < nr_ioapics; i++) {
++		if (smp_found_config) {
++			ioapic_phys = mp_ioapics[i].apicaddr;
++#ifdef CONFIG_X86_32
++			if (!ioapic_phys) {
++				printk(KERN_ERR
++				       "WARNING: bogus zero IO-APIC "
++				       "address found in MPTABLE, "
++				       "disabling IO/APIC support!\n");
++				smp_found_config = 0;
++				skip_ioapic_setup = 1;
++				goto fake_ioapic_page;
++			}
++#endif
++		} else {
++#ifdef CONFIG_X86_32
++fake_ioapic_page:
++#endif
++			ioapic_phys = (unsigned long)
++				alloc_bootmem_pages(PAGE_SIZE);
++			ioapic_phys = __pa(ioapic_phys);
++		}
++		set_fixmap_nocache(idx, ioapic_phys);
++		apic_printk(APIC_VERBOSE,
++			    "mapped IOAPIC to %08lx (%08lx)\n",
++			    __fix_to_virt(idx), ioapic_phys);
++		idx++;
++
++		if (ioapic_res != NULL) {
++			ioapic_res->start = ioapic_phys;
++			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
++			ioapic_res++;
++		}
++	}
++}
++
++static int __init ioapic_insert_resources(void)
++{
++	int i;
++	struct resource *r = ioapic_resources;
++
++	if (!r) {
++		if (nr_ioapics > 0) {
++			printk(KERN_ERR
++				"IO APIC resources couldn't be allocated.\n");
++			return -1;
++		}
++		return 0;
++	}
++
++	for (i = 0; i < nr_ioapics; i++) {
++		insert_resource(&iomem_resource, r);
++		r++;
++	}
++
++	return 0;
++}
++
++/* Insert the IO APIC resources after PCI initialization has occured to handle
++ * IO APICS that are mapped in on a BAR in PCI space. */
++late_initcall(ioapic_insert_resources);
+Index: linux-2.6-tip/arch/x86/kernel/apic/ipi.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/ipi.c
+@@ -0,0 +1,164 @@
++#include <linux/cpumask.h>
++#include <linux/interrupt.h>
++#include <linux/init.h>
++
++#include <linux/mm.h>
++#include <linux/delay.h>
++#include <linux/spinlock.h>
++#include <linux/kernel_stat.h>
++#include <linux/mc146818rtc.h>
++#include <linux/cache.h>
++#include <linux/cpu.h>
++#include <linux/module.h>
++
++#include <asm/smp.h>
++#include <asm/mtrr.h>
++#include <asm/tlbflush.h>
++#include <asm/mmu_context.h>
++#include <asm/apic.h>
++#include <asm/proto.h>
++#include <asm/ipi.h>
++
++void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
++{
++	unsigned long query_cpu;
++	unsigned long flags;
++
++	/*
++	 * Hack. The clustered APIC addressing mode doesn't allow us to send
++	 * to an arbitrary mask, so I do a unicast to each CPU instead.
++	 * - mbligh
++	 */
++	local_irq_save(flags);
++	for_each_cpu(query_cpu, mask) {
++		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
++				query_cpu), vector, APIC_DEST_PHYSICAL);
++	}
++	local_irq_restore(flags);
++}
++
++void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
++						 int vector)
++{
++	unsigned int this_cpu = smp_processor_id();
++	unsigned int query_cpu;
++	unsigned long flags;
++
++	/* See Hack comment above */
++
++	local_irq_save(flags);
++	for_each_cpu(query_cpu, mask) {
++		if (query_cpu == this_cpu)
++			continue;
++		__default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
++				 query_cpu), vector, APIC_DEST_PHYSICAL);
++	}
++	local_irq_restore(flags);
++}
++
++void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
++						 int vector)
++{
++	unsigned long flags;
++	unsigned int query_cpu;
++
++	/*
++	 * Hack. The clustered APIC addressing mode doesn't allow us to send
++	 * to an arbitrary mask, so I do a unicasts to each CPU instead. This
++	 * should be modified to do 1 message per cluster ID - mbligh
++	 */
++
++	local_irq_save(flags);
++	for_each_cpu(query_cpu, mask)
++		__default_send_IPI_dest_field(
++			apic->cpu_to_logical_apicid(query_cpu), vector,
++			apic->dest_logical);
++	local_irq_restore(flags);
++}
++
++void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
++						 int vector)
++{
++	unsigned long flags;
++	unsigned int query_cpu;
++	unsigned int this_cpu = smp_processor_id();
++
++	/* See Hack comment above */
++
++	local_irq_save(flags);
++	for_each_cpu(query_cpu, mask) {
++		if (query_cpu == this_cpu)
++			continue;
++		__default_send_IPI_dest_field(
++			apic->cpu_to_logical_apicid(query_cpu), vector,
++			apic->dest_logical);
++		}
++	local_irq_restore(flags);
++}
++
++#ifdef CONFIG_X86_32
++
++/*
++ * This is only used on smaller machines.
++ */
++void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
++{
++	unsigned long mask = cpumask_bits(cpumask)[0];
++	unsigned long flags;
++
++	local_irq_save(flags);
++	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
++	__default_send_IPI_dest_field(mask, vector, apic->dest_logical);
++	local_irq_restore(flags);
++}
++
++void default_send_IPI_allbutself(int vector)
++{
++	/*
++	 * if there are no other CPUs in the system then we get an APIC send
++	 * error if we try to broadcast, thus avoid sending IPIs in this case.
++	 */
++	if (!(num_online_cpus() > 1))
++		return;
++
++	__default_local_send_IPI_allbutself(vector);
++}
++
++void default_send_IPI_all(int vector)
++{
++	__default_local_send_IPI_all(vector);
++}
++
++void default_send_IPI_self(int vector)
++{
++	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
++}
++
++/* must come after the send_IPI functions above for inlining */
++static int convert_apicid_to_cpu(int apic_id)
++{
++	int i;
++
++	for_each_possible_cpu(i) {
++		if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
++			return i;
++	}
++	return -1;
++}
++
++int safe_smp_processor_id(void)
++{
++	int apicid, cpuid;
++
++	if (!boot_cpu_has(X86_FEATURE_APIC))
++		return 0;
++
++	apicid = hard_smp_processor_id();
++	if (apicid == BAD_APICID)
++		return 0;
++
++	cpuid = convert_apicid_to_cpu(apicid);
++
++	return cpuid >= 0 ? cpuid : 0;
++}
++#endif
+Index: linux-2.6-tip/arch/x86/kernel/apic/nmi.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/nmi.c
+@@ -0,0 +1,567 @@
++/*
++ *  NMI watchdog support on APIC systems
++ *
++ *  Started by Ingo Molnar <mingo@redhat.com>
++ *
++ *  Fixes:
++ *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
++ *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
++ *  Mikael Pettersson	: Pentium 4 support for local APIC NMI watchdog.
++ *  Pavel Machek and
++ *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
++ */
++
++#include <asm/apic.h>
++
++#include <linux/nmi.h>
++#include <linux/mm.h>
++#include <linux/delay.h>
++#include <linux/interrupt.h>
++#include <linux/module.h>
++#include <linux/sysdev.h>
++#include <linux/sysctl.h>
++#include <linux/percpu.h>
++#include <linux/kprobes.h>
++#include <linux/cpumask.h>
++#include <linux/kernel_stat.h>
++#include <linux/kdebug.h>
++#include <linux/smp.h>
++
++#include <asm/i8259.h>
++#include <asm/io_apic.h>
++#include <asm/proto.h>
++#include <asm/timer.h>
++
++#include <asm/mce.h>
++
++#include <asm/mach_traps.h>
++
++int unknown_nmi_panic;
++int nmi_watchdog_enabled;
++
++static cpumask_var_t backtrace_mask;
++
++/* nmi_active:
++ * >0: the lapic NMI watchdog is active, but can be disabled
++ * <0: the lapic NMI watchdog has not been set up, and cannot
++ *     be enabled
++ *  0: the lapic NMI watchdog is disabled, but can be enabled
++ */
++atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
++EXPORT_SYMBOL(nmi_active);
++
++unsigned int nmi_watchdog = NMI_NONE;
++EXPORT_SYMBOL(nmi_watchdog);
++
++static int panic_on_timeout;
++
++static unsigned int nmi_hz = HZ;
++static DEFINE_PER_CPU(short, wd_enabled);
++static int endflag __initdata;
++
++static inline unsigned int get_nmi_count(int cpu)
++{
++	return per_cpu(irq_stat, cpu).__nmi_count;
++}
++
++static inline int mce_in_progress(void)
++{
++#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
++	return atomic_read(&mce_entry) > 0;
++#endif
++	return 0;
++}
++
++/*
++ * Take the local apic timer and PIT/HPET into account. We don't
++ * know which one is active, when we have highres/dyntick on
++ */
++static inline unsigned int get_timer_irqs(int cpu)
++{
++	return per_cpu(irq_stat, cpu).apic_timer_irqs +
++		per_cpu(irq_stat, cpu).irq0_irqs;
++}
++
++#ifdef CONFIG_SMP
++/*
++ * The performance counters used by NMI_LOCAL_APIC don't trigger when
++ * the CPU is idle. To make sure the NMI watchdog really ticks on all
++ * CPUs during the test make them busy.
++ */
++static __init void nmi_cpu_busy(void *data)
++{
++#ifndef CONFIG_PREEMPT_RT
++	local_irq_enable_in_hardirq();
++#endif
++	/*
++	 * Intentionally don't use cpu_relax here. This is
++	 * to make sure that the performance counter really ticks,
++	 * even if there is a simulator or similar that catches the
++	 * pause instruction. On a real HT machine this is fine because
++	 * all other CPUs are busy with "useless" delay loops and don't
++	 * care if they get somewhat less cycles.
++	 */
++	while (endflag == 0)
++		mb();
++}
++#endif
++
++static void report_broken_nmi(int cpu, int *prev_nmi_count)
++{
++	printk(KERN_CONT "\n");
++
++	printk(KERN_WARNING
++		"WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
++			cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
++
++	printk(KERN_WARNING
++		"Please report this to bugzilla.kernel.org,\n");
++	printk(KERN_WARNING
++		"and attach the output of the 'dmesg' command.\n");
++
++	per_cpu(wd_enabled, cpu) = 0;
++	atomic_dec(&nmi_active);
++}
++
++static void __acpi_nmi_disable(void *__unused)
++{
++	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
++}
++
++int __init check_nmi_watchdog(void)
++{
++	unsigned int *prev_nmi_count;
++	int cpu;
++
++	if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
++		return 0;
++
++	prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
++	if (!prev_nmi_count)
++		goto error;
++
++	alloc_cpumask_var(&backtrace_mask, GFP_KERNEL);
++	printk(KERN_INFO "Testing NMI watchdog ... ");
++
++#ifdef CONFIG_SMP
++	if (nmi_watchdog == NMI_LOCAL_APIC)
++		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
++#endif
++
++	for_each_possible_cpu(cpu)
++		prev_nmi_count[cpu] = get_nmi_count(cpu);
++	local_irq_enable();
++	mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
++
++	for_each_online_cpu(cpu) {
++		if (!per_cpu(wd_enabled, cpu))
++			continue;
++		if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
++			report_broken_nmi(cpu, prev_nmi_count);
++	}
++	endflag = 1;
++	if (!atomic_read(&nmi_active)) {
++		kfree(prev_nmi_count);
++		atomic_set(&nmi_active, -1);
++		goto error;
++	}
++	printk("OK.\n");
++
++	/*
++	 * now that we know it works we can reduce NMI frequency to
++	 * something more reasonable; makes a difference in some configs
++	 */
++	if (nmi_watchdog == NMI_LOCAL_APIC)
++		nmi_hz = lapic_adjust_nmi_hz(1);
++
++	kfree(prev_nmi_count);
++	return 0;
++error:
++	if (nmi_watchdog == NMI_IO_APIC) {
++		if (!timer_through_8259)
++			disable_8259A_irq(0);
++		on_each_cpu(__acpi_nmi_disable, NULL, 1);
++	}
++
++#ifdef CONFIG_X86_32
++	timer_ack = 0;
++#endif
++	return -1;
++}
++
++static int __init setup_nmi_watchdog(char *str)
++{
++	unsigned int nmi;
++
++	if (!strncmp(str, "panic", 5)) {
++		panic_on_timeout = 1;
++		str = strchr(str, ',');
++		if (!str)
++			return 1;
++		++str;
++	}
++
++	if (!strncmp(str, "lapic", 5))
++		nmi_watchdog = NMI_LOCAL_APIC;
++	else if (!strncmp(str, "ioapic", 6))
++		nmi_watchdog = NMI_IO_APIC;
++	else {
++		get_option(&str, &nmi);
++		if (nmi >= NMI_INVALID)
++			return 0;
++		nmi_watchdog = nmi;
++	}
++
++	return 1;
++}
++__setup("nmi_watchdog=", setup_nmi_watchdog);
++
++/*
++ * Suspend/resume support
++ */
++#ifdef CONFIG_PM
++
++static int nmi_pm_active; /* nmi_active before suspend */
++
++static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
++{
++	/* only CPU0 goes here, other CPUs should be offline */
++	nmi_pm_active = atomic_read(&nmi_active);
++	stop_apic_nmi_watchdog(NULL);
++	BUG_ON(atomic_read(&nmi_active) != 0);
++	return 0;
++}
++
++static int lapic_nmi_resume(struct sys_device *dev)
++{
++	/* only CPU0 goes here, other CPUs should be offline */
++	if (nmi_pm_active > 0) {
++		setup_apic_nmi_watchdog(NULL);
++		touch_nmi_watchdog();
++	}
++	return 0;
++}
++
++static struct sysdev_class nmi_sysclass = {
++	.name		= "lapic_nmi",
++	.resume		= lapic_nmi_resume,
++	.suspend	= lapic_nmi_suspend,
++};
++
++static struct sys_device device_lapic_nmi = {
++	.id	= 0,
++	.cls	= &nmi_sysclass,
++};
++
++static int __init init_lapic_nmi_sysfs(void)
++{
++	int error;
++
++	/*
++	 * should really be a BUG_ON but b/c this is an
++	 * init call, it just doesn't work.  -dcz
++	 */
++	if (nmi_watchdog != NMI_LOCAL_APIC)
++		return 0;
++
++	if (atomic_read(&nmi_active) < 0)
++		return 0;
++
++	error = sysdev_class_register(&nmi_sysclass);
++	if (!error)
++		error = sysdev_register(&device_lapic_nmi);
++	return error;
++}
++
++/* must come after the local APIC's device_initcall() */
++late_initcall(init_lapic_nmi_sysfs);
++
++#endif	/* CONFIG_PM */
++
++static void __acpi_nmi_enable(void *__unused)
++{
++	apic_write(APIC_LVT0, APIC_DM_NMI);
++}
++
++/*
++ * Enable timer based NMIs on all CPUs:
++ */
++void acpi_nmi_enable(void)
++{
++	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
++		on_each_cpu(__acpi_nmi_enable, NULL, 1);
++}
++
++/*
++ * Disable timer based NMIs on all CPUs:
++ */
++void acpi_nmi_disable(void)
++{
++	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
++		on_each_cpu(__acpi_nmi_disable, NULL, 1);
++}
++
++/*
++ * This function is called as soon the LAPIC NMI watchdog driver has everything
++ * in place and it's ready to check if the NMIs belong to the NMI watchdog
++ */
++void cpu_nmi_set_wd_enabled(void)
++{
++	__get_cpu_var(wd_enabled) = 1;
++}
++
++void setup_apic_nmi_watchdog(void *unused)
++{
++	if (__get_cpu_var(wd_enabled))
++		return;
++
++	/* cheap hack to support suspend/resume */
++	/* if cpu0 is not active neither should the other cpus */
++	if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
++		return;
++
++	switch (nmi_watchdog) {
++	case NMI_LOCAL_APIC:
++		if (lapic_watchdog_init(nmi_hz) < 0) {
++			__get_cpu_var(wd_enabled) = 0;
++			return;
++		}
++		/* FALL THROUGH */
++	case NMI_IO_APIC:
++		__get_cpu_var(wd_enabled) = 1;
++		atomic_inc(&nmi_active);
++	}
++}
++
++void stop_apic_nmi_watchdog(void *unused)
++{
++	/* only support LOCAL and IO APICs for now */
++	if (!nmi_watchdog_active())
++		return;
++	if (__get_cpu_var(wd_enabled) == 0)
++		return;
++	if (nmi_watchdog == NMI_LOCAL_APIC)
++		lapic_watchdog_stop();
++	else
++		__acpi_nmi_disable(NULL);
++	__get_cpu_var(wd_enabled) = 0;
++	atomic_dec(&nmi_active);
++}
++
++/*
++ * the best way to detect whether a CPU has a 'hard lockup' problem
++ * is to check it's local APIC timer IRQ counts. If they are not
++ * changing then that CPU has some problem.
++ *
++ * as these watchdog NMI IRQs are generated on every CPU, we only
++ * have to check the current processor.
++ *
++ * since NMIs don't listen to _any_ locks, we have to be extremely
++ * careful not to rely on unsafe variables. The printk might lock
++ * up though, so we have to break up any console locks first ...
++ * [when there will be more tty-related locks, break them up here too!]
++ */
++
++static DEFINE_PER_CPU(unsigned, last_irq_sum);
++static DEFINE_PER_CPU(local_t, alert_counter);
++static DEFINE_PER_CPU(int, nmi_touch);
++
++void touch_nmi_watchdog(void)
++{
++	if (nmi_watchdog_active()) {
++		unsigned cpu;
++
++		/*
++		 * Tell other CPUs to reset their alert counters. We cannot
++		 * do it ourselves because the alert count increase is not
++		 * atomic.
++		 */
++		for_each_present_cpu(cpu) {
++			if (per_cpu(nmi_touch, cpu) != 1)
++				per_cpu(nmi_touch, cpu) = 1;
++		}
++	}
++
++	/*
++	 * Tickle the softlockup detector too:
++	 */
++	touch_softlockup_watchdog();
++}
++EXPORT_SYMBOL(touch_nmi_watchdog);
++
++notrace __kprobes int
++nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
++{
++	/*
++	 * Since current_thread_info()-> is always on the stack, and we
++	 * always switch the stack NMI-atomically, it's safe to use
++	 * smp_processor_id().
++	 */
++	unsigned int sum;
++	int touched = 0;
++	int cpu = smp_processor_id();
++	int rc = 0;
++
++	/* check for other users first */
++	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
++			== NOTIFY_STOP) {
++		rc = 1;
++		touched = 1;
++	}
++
++	sum = get_timer_irqs(cpu);
++
++	if (__get_cpu_var(nmi_touch)) {
++		__get_cpu_var(nmi_touch) = 0;
++		touched = 1;
++	}
++
++	if (cpumask_test_cpu(cpu, backtrace_mask)) {
++		static DEFINE_RAW_SPINLOCK(lock);	/* Serialise the printks */
++
++		spin_lock(&lock);
++		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
++		dump_stack();
++		spin_unlock(&lock);
++		cpumask_clear_cpu(cpu, backtrace_mask);
++	}
++
++	/* Could check oops_in_progress here too, but it's safer not to */
++	if (mce_in_progress())
++		touched = 1;
++
++	/* if the none of the timers isn't firing, this cpu isn't doing much */
++	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
++		/*
++		 * Ayiee, looks like this CPU is stuck ...
++		 * wait a few IRQs (5 seconds) before doing the oops ...
++		 */
++		local_inc(&__get_cpu_var(alert_counter));
++		if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
++			/*
++			 * die_nmi will return ONLY if NOTIFY_STOP happens..
++			 */
++			die_nmi("BUG: NMI Watchdog detected LOCKUP",
++				regs, panic_on_timeout);
++	} else {
++		__get_cpu_var(last_irq_sum) = sum;
++		local_set(&__get_cpu_var(alert_counter), 0);
++	}
++
++	/* see if the nmi watchdog went off */
++	if (!__get_cpu_var(wd_enabled))
++		return rc;
++	switch (nmi_watchdog) {
++	case NMI_LOCAL_APIC:
++		rc |= lapic_wd_event(nmi_hz);
++		break;
++	case NMI_IO_APIC:
++		/*
++		 * don't know how to accurately check for this.
++		 * just assume it was a watchdog timer interrupt
++		 * This matches the old behaviour.
++		 */
++		rc = 1;
++		break;
++	}
++	return rc;
++}
++
++#ifdef CONFIG_SYSCTL
++
++static void enable_ioapic_nmi_watchdog_single(void *unused)
++{
++	__get_cpu_var(wd_enabled) = 1;
++	atomic_inc(&nmi_active);
++	__acpi_nmi_enable(NULL);
++}
++
++static void enable_ioapic_nmi_watchdog(void)
++{
++	on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
++	touch_nmi_watchdog();
++}
++
++static void disable_ioapic_nmi_watchdog(void)
++{
++	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
++}
++
++static int __init setup_unknown_nmi_panic(char *str)
++{
++	unknown_nmi_panic = 1;
++	return 1;
++}
++__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
++
++static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
++{
++	unsigned char reason = get_nmi_reason();
++	char buf[64];
++
++	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
++	die_nmi(buf, regs, 1); /* Always panic here */
++	return 0;
++}
++
++/*
++ * proc handler for /proc/sys/kernel/nmi
++ */
++int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
++			void __user *buffer, size_t *length, loff_t *ppos)
++{
++	int old_state;
++
++	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
++	old_state = nmi_watchdog_enabled;
++	proc_dointvec(table, write, file, buffer, length, ppos);
++	if (!!old_state == !!nmi_watchdog_enabled)
++		return 0;
++
++	if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
++		printk(KERN_WARNING
++			"NMI watchdog is permanently disabled\n");
++		return -EIO;
++	}
++
++	if (nmi_watchdog == NMI_LOCAL_APIC) {
++		if (nmi_watchdog_enabled)
++			enable_lapic_nmi_watchdog();
++		else
++			disable_lapic_nmi_watchdog();
++	} else if (nmi_watchdog == NMI_IO_APIC) {
++		if (nmi_watchdog_enabled)
++			enable_ioapic_nmi_watchdog();
++		else
++			disable_ioapic_nmi_watchdog();
++	} else {
++		printk(KERN_WARNING
++			"NMI watchdog doesn't know what hardware to touch\n");
++		return -EIO;
++	}
++	return 0;
++}
++
++#endif /* CONFIG_SYSCTL */
++
++int do_nmi_callback(struct pt_regs *regs, int cpu)
++{
++#ifdef CONFIG_SYSCTL
++	if (unknown_nmi_panic)
++		return unknown_nmi_panic_callback(regs, cpu);
++#endif
++	return 0;
++}
++
++void __trigger_all_cpu_backtrace(void)
++{
++	int i;
++
++	cpumask_copy(backtrace_mask, cpu_online_mask);
++	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
++	for (i = 0; i < 10 * 1000; i++) {
++		if (cpumask_empty(backtrace_mask))
++			break;
++		mdelay(1);
++	}
++}
+Index: linux-2.6-tip/arch/x86/kernel/apic/numaq_32.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/numaq_32.c
+@@ -0,0 +1,558 @@
++/*
++ * Written by: Patricia Gaughen, IBM Corporation
++ *
++ * Copyright (C) 2002, IBM Corp.
++ * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
++ *
++ * All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
++ * NON INFRINGEMENT.  See the GNU General Public License for more
++ * details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ * Send feedback to <gone@us.ibm.com>
++ */
++#include <linux/nodemask.h>
++#include <linux/topology.h>
++#include <linux/bootmem.h>
++#include <linux/threads.h>
++#include <linux/cpumask.h>
++#include <linux/kernel.h>
++#include <linux/mmzone.h>
++#include <linux/module.h>
++#include <linux/string.h>
++#include <linux/init.h>
++#include <linux/numa.h>
++#include <linux/smp.h>
++#include <linux/io.h>
++#include <linux/mm.h>
++
++#include <asm/processor.h>
++#include <asm/fixmap.h>
++#include <asm/mpspec.h>
++#include <asm/numaq.h>
++#include <asm/setup.h>
++#include <asm/apic.h>
++#include <asm/e820.h>
++#include <asm/ipi.h>
++
++#define	MB_TO_PAGES(addr)		((addr) << (20 - PAGE_SHIFT))
++
++int found_numaq;
++
++/*
++ * Have to match translation table entries to main table entries by counter
++ * hence the mpc_record variable .... can't see a less disgusting way of
++ * doing this ....
++ */
++struct mpc_trans {
++	unsigned char			mpc_type;
++	unsigned char			trans_len;
++	unsigned char			trans_type;
++	unsigned char			trans_quad;
++	unsigned char			trans_global;
++	unsigned char			trans_local;
++	unsigned short			trans_reserved;
++};
++
++/* x86_quirks member */
++static int				mpc_record;
++
++static struct mpc_trans			*translation_table[MAX_MPC_ENTRY];
++
++int					mp_bus_id_to_node[MAX_MP_BUSSES];
++int					mp_bus_id_to_local[MAX_MP_BUSSES];
++int					quad_local_to_mp_bus_id[NR_CPUS/4][4];
++
++
++static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
++{
++	struct eachquadmem *eq = scd->eq + node;
++
++	node_set_online(node);
++
++	/* Convert to pages */
++	node_start_pfn[node] =
++		 MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
++
++	node_end_pfn[node] =
++		 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
++
++	e820_register_active_regions(node, node_start_pfn[node],
++						node_end_pfn[node]);
++
++	memory_present(node, node_start_pfn[node], node_end_pfn[node]);
++
++	node_remap_size[node] = node_memmap_size_bytes(node,
++					node_start_pfn[node],
++					node_end_pfn[node]);
++}
++
++/*
++ * Function: smp_dump_qct()
++ *
++ * Description: gets memory layout from the quad config table.  This
++ * function also updates node_online_map with the nodes (quads) present.
++ */
++static void __init smp_dump_qct(void)
++{
++	struct sys_cfg_data *scd;
++	int node;
++
++	scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
++
++	nodes_clear(node_online_map);
++	for_each_node(node) {
++		if (scd->quads_present31_0 & (1 << node))
++			numaq_register_node(node, scd);
++	}
++}
++
++void __cpuinit numaq_tsc_disable(void)
++{
++	if (!found_numaq)
++		return;
++
++	if (num_online_nodes() > 1) {
++		printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
++		setup_clear_cpu_cap(X86_FEATURE_TSC);
++	}
++}
++
++static int __init numaq_pre_time_init(void)
++{
++	numaq_tsc_disable();
++	return 0;
++}
++
++static inline int generate_logical_apicid(int quad, int phys_apicid)
++{
++	return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
++}
++
++/* x86_quirks member */
++static int mpc_apic_id(struct mpc_cpu *m)
++{
++	int quad = translation_table[mpc_record]->trans_quad;
++	int logical_apicid = generate_logical_apicid(quad, m->apicid);
++
++	printk(KERN_DEBUG
++		"Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
++		 m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
++		(m->cpufeature & CPU_MODEL_MASK) >> 4,
++		 m->apicver, quad, logical_apicid);
++
++	return logical_apicid;
++}
++
++/* x86_quirks member */
++static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
++{
++	int quad = translation_table[mpc_record]->trans_quad;
++	int local = translation_table[mpc_record]->trans_local;
++
++	mp_bus_id_to_node[m->busid] = quad;
++	mp_bus_id_to_local[m->busid] = local;
++
++	printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad);
++}
++
++/* x86_quirks member */
++static void mpc_oem_pci_bus(struct mpc_bus *m)
++{
++	int quad = translation_table[mpc_record]->trans_quad;
++	int local = translation_table[mpc_record]->trans_local;
++
++	quad_local_to_mp_bus_id[quad][local] = m->busid;
++}
++
++static void __init MP_translation_info(struct mpc_trans *m)
++{
++	printk(KERN_INFO
++	    "Translation: record %d, type %d, quad %d, global %d, local %d\n",
++	       mpc_record, m->trans_type, m->trans_quad, m->trans_global,
++	       m->trans_local);
++
++	if (mpc_record >= MAX_MPC_ENTRY)
++		printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
++	else
++		translation_table[mpc_record] = m; /* stash this for later */
++
++	if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
++		node_set_online(m->trans_quad);
++}
++
++static int __init mpf_checksum(unsigned char *mp, int len)
++{
++	int sum = 0;
++
++	while (len--)
++		sum += *mp++;
++
++	return sum & 0xFF;
++}
++
++/*
++ * Read/parse the MPC oem tables
++ */
++static void __init
++ smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize)
++{
++	int count = sizeof(*oemtable);	/* the header size */
++	unsigned char *oemptr = ((unsigned char *)oemtable) + count;
++
++	mpc_record = 0;
++	printk(KERN_INFO
++		"Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
++
++	if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
++		printk(KERN_WARNING
++		       "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
++		       oemtable->signature[0], oemtable->signature[1],
++		       oemtable->signature[2], oemtable->signature[3]);
++		return;
++	}
++
++	if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
++		printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
++		return;
++	}
++
++	while (count < oemtable->length) {
++		switch (*oemptr) {
++		case MP_TRANSLATION:
++			{
++				struct mpc_trans *m = (void *)oemptr;
++
++				MP_translation_info(m);
++				oemptr += sizeof(*m);
++				count += sizeof(*m);
++				++mpc_record;
++				break;
++			}
++		default:
++			printk(KERN_WARNING
++			       "Unrecognised OEM table entry type! - %d\n",
++			       (int)*oemptr);
++			return;
++		}
++	}
++}
++
++static int __init numaq_setup_ioapic_ids(void)
++{
++	/* so can skip it */
++	return 1;
++}
++
++static struct x86_quirks numaq_x86_quirks __initdata = {
++	.arch_pre_time_init		= numaq_pre_time_init,
++	.arch_time_init			= NULL,
++	.arch_pre_intr_init		= NULL,
++	.arch_memory_setup		= NULL,
++	.arch_intr_init			= NULL,
++	.arch_trap_init			= NULL,
++	.mach_get_smp_config		= NULL,
++	.mach_find_smp_config		= NULL,
++	.mpc_record			= &mpc_record,
++	.mpc_apic_id			= mpc_apic_id,
++	.mpc_oem_bus_info		= mpc_oem_bus_info,
++	.mpc_oem_pci_bus		= mpc_oem_pci_bus,
++	.smp_read_mpc_oem		= smp_read_mpc_oem,
++	.setup_ioapic_ids		= numaq_setup_ioapic_ids,
++};
++
++static __init void early_check_numaq(void)
++{
++	/*
++	 * Find possible boot-time SMP configuration:
++	 */
++	early_find_smp_config();
++
++	/*
++	 * get boot-time SMP configuration:
++	 */
++	if (smp_found_config)
++		early_get_smp_config();
++
++	if (found_numaq)
++		x86_quirks = &numaq_x86_quirks;
++}
++
++int __init get_memcfg_numaq(void)
++{
++	early_check_numaq();
++	if (!found_numaq)
++		return 0;
++	smp_dump_qct();
++
++	return 1;
++}
++
++#define NUMAQ_APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
++
++static inline unsigned int numaq_get_apic_id(unsigned long x)
++{
++	return (x >> 24) & 0x0F;
++}
++
++static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
++{
++	default_send_IPI_mask_sequence_logical(mask, vector);
++}
++
++static inline void numaq_send_IPI_allbutself(int vector)
++{
++	default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
++}
++
++static inline void numaq_send_IPI_all(int vector)
++{
++	numaq_send_IPI_mask(cpu_online_mask, vector);
++}
++
++#define NUMAQ_TRAMPOLINE_PHYS_LOW	(0x8)
++#define NUMAQ_TRAMPOLINE_PHYS_HIGH	(0xa)
++
++/*
++ * Because we use NMIs rather than the INIT-STARTUP sequence to
++ * bootstrap the CPUs, the APIC may be in a weird state. Kick it:
++ */
++static inline void numaq_smp_callin_clear_local_apic(void)
++{
++	clear_local_APIC();
++}
++
++static inline const struct cpumask *numaq_target_cpus(void)
++{
++	return cpu_all_mask;
++}
++
++static inline unsigned long
++numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
++{
++	return physid_isset(apicid, bitmap);
++}
++
++static inline unsigned long numaq_check_apicid_present(int bit)
++{
++	return physid_isset(bit, phys_cpu_present_map);
++}
++
++static inline int numaq_apic_id_registered(void)
++{
++	return 1;
++}
++
++static inline void numaq_init_apic_ldr(void)
++{
++	/* Already done in NUMA-Q firmware */
++}
++
++static inline void numaq_setup_apic_routing(void)
++{
++	printk(KERN_INFO
++		"Enabling APIC mode:  NUMA-Q.  Using %d I/O APICs\n",
++		nr_ioapics);
++}
++
++/*
++ * Skip adding the timer int on secondary nodes, which causes
++ * a small but painful rift in the time-space continuum.
++ */
++static inline int numaq_multi_timer_check(int apic, int irq)
++{
++	return apic != 0 && irq == 0;
++}
++
++static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
++{
++	/* We don't have a good way to do this yet - hack */
++	return physids_promote(0xFUL);
++}
++
++static inline int numaq_cpu_to_logical_apicid(int cpu)
++{
++	if (cpu >= nr_cpu_ids)
++		return BAD_APICID;
++	return cpu_2_logical_apicid[cpu];
++}
++
++/*
++ * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
++ * cpu to APIC ID relation to properly interact with the intelligent
++ * mode of the cluster controller.
++ */
++static inline int numaq_cpu_present_to_apicid(int mps_cpu)
++{
++	if (mps_cpu < 60)
++		return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
++	else
++		return BAD_APICID;
++}
++
++static inline int numaq_apicid_to_node(int logical_apicid)
++{
++	return logical_apicid >> 4;
++}
++
++static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
++{
++	int node = numaq_apicid_to_node(logical_apicid);
++	int cpu = __ffs(logical_apicid & 0xf);
++
++	return physid_mask_of_physid(cpu + 4*node);
++}
++
++/* Where the IO area was mapped on multiquad, always 0 otherwise */
++void *xquad_portio;
++
++static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid)
++{
++	return 1;
++}
++
++/*
++ * We use physical apicids here, not logical, so just return the default
++ * physical broadcast to stop people from breaking us
++ */
++static unsigned int numaq_cpu_mask_to_apicid(const struct cpumask *cpumask)
++{
++	return 0x0F;
++}
++
++static inline unsigned int
++numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
++			     const struct cpumask *andmask)
++{
++	return 0x0F;
++}
++
++/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
++static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
++{
++	return cpuid_apic >> index_msb;
++}
++
++static int
++numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
++{
++	if (strncmp(oem, "IBM NUMA", 8))
++		printk(KERN_ERR "Warning! Not a NUMA-Q system!\n");
++	else
++		found_numaq = 1;
++
++	return found_numaq;
++}
++
++static int probe_numaq(void)
++{
++	/* already know from get_memcfg_numaq() */
++	return found_numaq;
++}
++
++static void numaq_vector_allocation_domain(int cpu, struct cpumask *retmask)
++{
++	/* Careful. Some cpus do not strictly honor the set of cpus
++	 * specified in the interrupt destination when using lowest
++	 * priority interrupt delivery mode.
++	 *
++	 * In particular there was a hyperthreading cpu observed to
++	 * deliver interrupts to the wrong hyperthread when only one
++	 * hyperthread was specified in the interrupt desitination.
++	 */
++	cpumask_clear(retmask);
++	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
++}
++
++static void numaq_setup_portio_remap(void)
++{
++	int num_quads = num_online_nodes();
++
++	if (num_quads <= 1)
++		return;
++
++	printk(KERN_INFO
++		"Remapping cross-quad port I/O for %d quads\n", num_quads);
++
++	xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
++
++	printk(KERN_INFO
++		"xquad_portio vaddr 0x%08lx, len %08lx\n",
++		(u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
++}
++
++struct apic apic_numaq = {
++
++	.name				= "NUMAQ",
++	.probe				= probe_numaq,
++	.acpi_madt_oem_check		= NULL,
++	.apic_id_registered		= numaq_apic_id_registered,
++
++	.irq_delivery_mode		= dest_LowestPrio,
++	/* physical delivery on LOCAL quad: */
++	.irq_dest_mode			= 0,
++
++	.target_cpus			= numaq_target_cpus,
++	.disable_esr			= 1,
++	.dest_logical			= APIC_DEST_LOGICAL,
++	.check_apicid_used		= numaq_check_apicid_used,
++	.check_apicid_present		= numaq_check_apicid_present,
++
++	.vector_allocation_domain	= numaq_vector_allocation_domain,
++	.init_apic_ldr			= numaq_init_apic_ldr,
++
++	.ioapic_phys_id_map		= numaq_ioapic_phys_id_map,
++	.setup_apic_routing		= numaq_setup_apic_routing,
++	.multi_timer_check		= numaq_multi_timer_check,
++	.apicid_to_node			= numaq_apicid_to_node,
++	.cpu_to_logical_apicid		= numaq_cpu_to_logical_apicid,
++	.cpu_present_to_apicid		= numaq_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= numaq_apicid_to_cpu_present,
++	.setup_portio_remap		= numaq_setup_portio_remap,
++	.check_phys_apicid_present	= numaq_check_phys_apicid_present,
++	.enable_apic_mode		= NULL,
++	.phys_pkg_id			= numaq_phys_pkg_id,
++	.mps_oem_check			= numaq_mps_oem_check,
++
++	.get_apic_id			= numaq_get_apic_id,
++	.set_apic_id			= NULL,
++	.apic_id_mask			= 0x0F << 24,
++
++	.cpu_mask_to_apicid		= numaq_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= numaq_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= numaq_send_IPI_mask,
++	.send_IPI_mask_allbutself	= NULL,
++	.send_IPI_allbutself		= numaq_send_IPI_allbutself,
++	.send_IPI_all			= numaq_send_IPI_all,
++	.send_IPI_self			= default_send_IPI_self,
++
++	.wakeup_secondary_cpu		= wakeup_secondary_cpu_via_nmi,
++	.trampoline_phys_low		= NUMAQ_TRAMPOLINE_PHYS_LOW,
++	.trampoline_phys_high		= NUMAQ_TRAMPOLINE_PHYS_HIGH,
++
++	/* We don't do anything here because we use NMI's to boot instead */
++	.wait_for_init_deassert		= NULL,
++
++	.smp_callin_clear_local_apic	= numaq_smp_callin_clear_local_apic,
++	.inquire_remote_apic		= NULL,
++
++	.read				= native_apic_mem_read,
++	.write				= native_apic_mem_write,
++	.icr_read			= native_apic_icr_read,
++	.icr_write			= native_apic_icr_write,
++	.wait_icr_idle			= native_apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
++};
+Index: linux-2.6-tip/arch/x86/kernel/apic/probe_32.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/probe_32.c
+@@ -0,0 +1,285 @@
++/*
++ * Default generic APIC driver. This handles up to 8 CPUs.
++ *
++ * Copyright 2003 Andi Kleen, SuSE Labs.
++ * Subject to the GNU Public License, v.2
++ *
++ * Generic x86 APIC driver probe layer.
++ */
++#include <linux/threads.h>
++#include <linux/cpumask.h>
++#include <linux/module.h>
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/init.h>
++#include <linux/errno.h>
++#include <asm/fixmap.h>
++#include <asm/mpspec.h>
++#include <asm/apicdef.h>
++#include <asm/apic.h>
++#include <asm/setup.h>
++
++#include <linux/threads.h>
++#include <linux/cpumask.h>
++#include <asm/mpspec.h>
++#include <asm/fixmap.h>
++#include <asm/apicdef.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/smp.h>
++#include <linux/init.h>
++#include <asm/ipi.h>
++
++#include <linux/smp.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <asm/acpi.h>
++#include <asm/e820.h>
++#include <asm/setup.h>
++
++#ifdef CONFIG_HOTPLUG_CPU
++#define DEFAULT_SEND_IPI	(1)
++#else
++#define DEFAULT_SEND_IPI	(0)
++#endif
++
++int no_broadcast = DEFAULT_SEND_IPI;
++
++static __init int no_ipi_broadcast(char *str)
++{
++	get_option(&str, &no_broadcast);
++	pr_info("Using %s mode\n",
++		no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
++	return 1;
++}
++__setup("no_ipi_broadcast=", no_ipi_broadcast);
++
++static int __init print_ipi_mode(void)
++{
++	pr_info("Using IPI %s mode\n",
++		no_broadcast ? "No-Shortcut" : "Shortcut");
++	return 0;
++}
++late_initcall(print_ipi_mode);
++
++void default_setup_apic_routing(void)
++{
++#ifdef CONFIG_X86_IO_APIC
++	printk(KERN_INFO
++		"Enabling APIC mode:  Flat.  Using %d I/O APICs\n",
++		nr_ioapics);
++#endif
++}
++
++static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
++{
++	/*
++	 * Careful. Some cpus do not strictly honor the set of cpus
++	 * specified in the interrupt destination when using lowest
++	 * priority interrupt delivery mode.
++	 *
++	 * In particular there was a hyperthreading cpu observed to
++	 * deliver interrupts to the wrong hyperthread when only one
++	 * hyperthread was specified in the interrupt desitination.
++	 */
++	cpumask_clear(retmask);
++	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
++}
++
++/* should be called last. */
++static int probe_default(void)
++{
++	return 1;
++}
++
++struct apic apic_default = {
++
++	.name				= "default",
++	.probe				= probe_default,
++	.acpi_madt_oem_check		= NULL,
++	.apic_id_registered		= default_apic_id_registered,
++
++	.irq_delivery_mode		= dest_LowestPrio,
++	/* logical delivery broadcast to all CPUs: */
++	.irq_dest_mode			= 1,
++
++	.target_cpus			= default_target_cpus,
++	.disable_esr			= 0,
++	.dest_logical			= APIC_DEST_LOGICAL,
++	.check_apicid_used		= default_check_apicid_used,
++	.check_apicid_present		= default_check_apicid_present,
++
++	.vector_allocation_domain	= default_vector_allocation_domain,
++	.init_apic_ldr			= default_init_apic_ldr,
++
++	.ioapic_phys_id_map		= default_ioapic_phys_id_map,
++	.setup_apic_routing		= default_setup_apic_routing,
++	.multi_timer_check		= NULL,
++	.apicid_to_node			= default_apicid_to_node,
++	.cpu_to_logical_apicid		= default_cpu_to_logical_apicid,
++	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= default_apicid_to_cpu_present,
++	.setup_portio_remap		= NULL,
++	.check_phys_apicid_present	= default_check_phys_apicid_present,
++	.enable_apic_mode		= NULL,
++	.phys_pkg_id			= default_phys_pkg_id,
++	.mps_oem_check			= NULL,
++
++	.get_apic_id			= default_get_apic_id,
++	.set_apic_id			= NULL,
++	.apic_id_mask			= 0x0F << 24,
++
++	.cpu_mask_to_apicid		= default_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= default_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= default_send_IPI_mask_logical,
++	.send_IPI_mask_allbutself	= default_send_IPI_mask_allbutself_logical,
++	.send_IPI_allbutself		= default_send_IPI_allbutself,
++	.send_IPI_all			= default_send_IPI_all,
++	.send_IPI_self			= default_send_IPI_self,
++
++	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
++	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
++
++	.wait_for_init_deassert		= default_wait_for_init_deassert,
++
++	.smp_callin_clear_local_apic	= NULL,
++	.inquire_remote_apic		= default_inquire_remote_apic,
++
++	.read				= native_apic_mem_read,
++	.write				= native_apic_mem_write,
++	.icr_read			= native_apic_icr_read,
++	.icr_write			= native_apic_icr_write,
++	.wait_icr_idle			= native_apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
++};
++
++extern struct apic apic_numaq;
++extern struct apic apic_summit;
++extern struct apic apic_bigsmp;
++extern struct apic apic_es7000;
++extern struct apic apic_es7000_cluster;
++extern struct apic apic_default;
++
++struct apic *apic = &apic_default;
++EXPORT_SYMBOL_GPL(apic);
++
++static struct apic *apic_probe[] __initdata = {
++#ifdef CONFIG_X86_NUMAQ
++	&apic_numaq,
++#endif
++#ifdef CONFIG_X86_SUMMIT
++	&apic_summit,
++#endif
++#ifdef CONFIG_X86_BIGSMP
++	&apic_bigsmp,
++#endif
++#ifdef CONFIG_X86_ES7000
++	&apic_es7000,
++	&apic_es7000_cluster,
++#endif
++	&apic_default,	/* must be last */
++	NULL,
++};
++
++static int cmdline_apic __initdata;
++static int __init parse_apic(char *arg)
++{
++	int i;
++
++	if (!arg)
++		return -EINVAL;
++
++	for (i = 0; apic_probe[i]; i++) {
++		if (!strcmp(apic_probe[i]->name, arg)) {
++			apic = apic_probe[i];
++			cmdline_apic = 1;
++			return 0;
++		}
++	}
++
++	/* Parsed again by __setup for debug/verbose */
++	return 0;
++}
++early_param("apic", parse_apic);
++
++void __init generic_bigsmp_probe(void)
++{
++#ifdef CONFIG_X86_BIGSMP
++	/*
++	 * This routine is used to switch to bigsmp mode when
++	 * - There is no apic= option specified by the user
++	 * - generic_apic_probe() has chosen apic_default as the sub_arch
++	 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
++	 */
++
++	if (!cmdline_apic && apic == &apic_default) {
++		if (apic_bigsmp.probe()) {
++			apic = &apic_bigsmp;
++			printk(KERN_INFO "Overriding APIC driver with %s\n",
++			       apic->name);
++		}
++	}
++#endif
++}
++
++void __init generic_apic_probe(void)
++{
++	if (!cmdline_apic) {
++		int i;
++		for (i = 0; apic_probe[i]; i++) {
++			if (apic_probe[i]->probe()) {
++				apic = apic_probe[i];
++				break;
++			}
++		}
++		/* Not visible without early console */
++		if (!apic_probe[i])
++			panic("Didn't find an APIC driver");
++	}
++	printk(KERN_INFO "Using APIC driver %s\n", apic->name);
++}
++
++/* These functions can switch the APIC even after the initial ->probe() */
++
++int __init
++generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
++{
++	int i;
++
++	for (i = 0; apic_probe[i]; ++i) {
++		if (!apic_probe[i]->mps_oem_check)
++			continue;
++		if (!apic_probe[i]->mps_oem_check(mpc, oem, productid))
++			continue;
++
++		if (!cmdline_apic) {
++			apic = apic_probe[i];
++			printk(KERN_INFO "Switched to APIC driver `%s'.\n",
++			       apic->name);
++		}
++		return 1;
++	}
++	return 0;
++}
++
++int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++	int i;
++
++	for (i = 0; apic_probe[i]; ++i) {
++		if (!apic_probe[i]->acpi_madt_oem_check)
++			continue;
++		if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id))
++			continue;
++
++		if (!cmdline_apic) {
++			apic = apic_probe[i];
++			printk(KERN_INFO "Switched to APIC driver `%s'.\n",
++			       apic->name);
++		}
++		return 1;
++	}
++	return 0;
++}
+Index: linux-2.6-tip/arch/x86/kernel/apic/probe_64.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/probe_64.c
+@@ -0,0 +1,100 @@
++/*
++ * Copyright 2004 James Cleverdon, IBM.
++ * Subject to the GNU Public License, v.2
++ *
++ * Generic APIC sub-arch probe layer.
++ *
++ * Hacked for x86-64 by James Cleverdon from i386 architecture code by
++ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
++ * James Cleverdon.
++ */
++#include <linux/threads.h>
++#include <linux/cpumask.h>
++#include <linux/string.h>
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/init.h>
++#include <linux/hardirq.h>
++#include <linux/dmar.h>
++
++#include <asm/smp.h>
++#include <asm/apic.h>
++#include <asm/ipi.h>
++#include <asm/setup.h>
++
++extern struct apic apic_flat;
++extern struct apic apic_physflat;
++extern struct apic apic_x2xpic_uv_x;
++extern struct apic apic_x2apic_phys;
++extern struct apic apic_x2apic_cluster;
++
++struct apic __read_mostly *apic = &apic_flat;
++EXPORT_SYMBOL_GPL(apic);
++
++static struct apic *apic_probe[] __initdata = {
++#ifdef CONFIG_X86_UV
++	&apic_x2apic_uv_x,
++#endif
++#ifdef CONFIG_X86_X2APIC
++	&apic_x2apic_phys,
++	&apic_x2apic_cluster,
++#endif
++	&apic_physflat,
++	NULL,
++};
++
++/*
++ * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
++ */
++void __init default_setup_apic_routing(void)
++{
++#ifdef CONFIG_X86_X2APIC
++	if (x2apic && (apic != &apic_x2apic_phys &&
++#ifdef CONFIG_X86_UV
++		       apic != &apic_x2apic_uv_x &&
++#endif
++		       apic != &apic_x2apic_cluster)) {
++		if (x2apic_phys)
++			apic = &apic_x2apic_phys;
++		else
++			apic = &apic_x2apic_cluster;
++		printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
++	}
++#endif
++
++	if (apic == &apic_flat) {
++		if (max_physical_apicid >= 8)
++			apic = &apic_physflat;
++		printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
++	}
++
++	/*
++	 * Now that apic routing model is selected, configure the
++	 * fault handling for intr remapping.
++	 */
++	if (intr_remapping_enabled)
++		enable_drhd_fault_handling();
++}
++
++/* Same for both flat and physical. */
++
++void apic_send_IPI_self(int vector)
++{
++	__default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
++}
++
++int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++	int i;
++
++	for (i = 0; apic_probe[i]; ++i) {
++		if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
++			apic = apic_probe[i];
++			printk(KERN_INFO "Setting APIC routing to %s.\n",
++				apic->name);
++			return 1;
++		}
++	}
++	return 0;
++}
+Index: linux-2.6-tip/arch/x86/kernel/apic/summit_32.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/summit_32.c
+@@ -0,0 +1,576 @@
++/*
++ * IBM Summit-Specific Code
++ *
++ * Written By: Matthew Dobson, IBM Corporation
++ *
++ * Copyright (c) 2003 IBM Corp.
++ *
++ * All rights reserved.
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or (at
++ * your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
++ * NON INFRINGEMENT.  See the GNU General Public License for more
++ * details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ * Send feedback to <colpatch@us.ibm.com>
++ *
++ */
++
++#include <linux/mm.h>
++#include <linux/init.h>
++#include <asm/io.h>
++#include <asm/bios_ebda.h>
++
++/*
++ * APIC driver for the IBM "Summit" chipset.
++ */
++#include <linux/threads.h>
++#include <linux/cpumask.h>
++#include <asm/mpspec.h>
++#include <asm/apic.h>
++#include <asm/smp.h>
++#include <asm/fixmap.h>
++#include <asm/apicdef.h>
++#include <asm/ipi.h>
++#include <linux/kernel.h>
++#include <linux/string.h>
++#include <linux/init.h>
++#include <linux/gfp.h>
++#include <linux/smp.h>
++
++static unsigned summit_get_apic_id(unsigned long x)
++{
++	return (x >> 24) & 0xFF;
++}
++
++static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector)
++{
++	default_send_IPI_mask_sequence_logical(mask, vector);
++}
++
++static void summit_send_IPI_allbutself(int vector)
++{
++	default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
++}
++
++static void summit_send_IPI_all(int vector)
++{
++	summit_send_IPI_mask(cpu_online_mask, vector);
++}
++
++#include <asm/tsc.h>
++
++extern int use_cyclone;
++
++#ifdef CONFIG_X86_SUMMIT_NUMA
++static void setup_summit(void);
++#else
++static inline void setup_summit(void) {}
++#endif
++
++static int summit_mps_oem_check(struct mpc_table *mpc, char *oem,
++		char *productid)
++{
++	if (!strncmp(oem, "IBM ENSW", 8) &&
++			(!strncmp(productid, "VIGIL SMP", 9)
++			 || !strncmp(productid, "EXA", 3)
++			 || !strncmp(productid, "RUTHLESS SMP", 12))){
++		mark_tsc_unstable("Summit based system");
++		use_cyclone = 1; /*enable cyclone-timer*/
++		setup_summit();
++		return 1;
++	}
++	return 0;
++}
++
++/* Hook from generic ACPI tables.c */
++static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++	if (!strncmp(oem_id, "IBM", 3) &&
++	    (!strncmp(oem_table_id, "SERVIGIL", 8)
++	     || !strncmp(oem_table_id, "EXA", 3))){
++		mark_tsc_unstable("Summit based system");
++		use_cyclone = 1; /*enable cyclone-timer*/
++		setup_summit();
++		return 1;
++	}
++	return 0;
++}
++
++struct rio_table_hdr {
++	unsigned char version;      /* Version number of this data structure           */
++	                            /* Version 3 adds chassis_num & WP_index           */
++	unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil)   */
++	unsigned char num_rio_dev;  /* # of RIO I/O devices (Cyclones and Winnipegs)   */
++} __attribute__((packed));
++
++struct scal_detail {
++	unsigned char node_id;      /* Scalability Node ID                             */
++	unsigned long CBAR;         /* Address of 1MB register space                   */
++	unsigned char port0node;    /* Node ID port connected to: 0xFF=None            */
++	unsigned char port0port;    /* Port num port connected to: 0,1,2, or 0xFF=None */
++	unsigned char port1node;    /* Node ID port connected to: 0xFF = None          */
++	unsigned char port1port;    /* Port num port connected to: 0,1,2, or 0xFF=None */
++	unsigned char port2node;    /* Node ID port connected to: 0xFF = None          */
++	unsigned char port2port;    /* Port num port connected to: 0,1,2, or 0xFF=None */
++	unsigned char chassis_num;  /* 1 based Chassis number (1 = boot node)          */
++} __attribute__((packed));
++
++struct rio_detail {
++	unsigned char node_id;      /* RIO Node ID                                     */
++	unsigned long BBAR;         /* Address of 1MB register space                   */
++	unsigned char type;         /* Type of device                                  */
++	unsigned char owner_id;     /* For WPEG: Node ID of Cyclone that owns this WPEG*/
++	                            /* For CYC:  Node ID of Twister that owns this CYC */
++	unsigned char port0node;    /* Node ID port connected to: 0xFF=None            */
++	unsigned char port0port;    /* Port num port connected to: 0,1,2, or 0xFF=None */
++	unsigned char port1node;    /* Node ID port connected to: 0xFF=None            */
++	unsigned char port1port;    /* Port num port connected to: 0,1,2, or 0xFF=None */
++	unsigned char first_slot;   /* For WPEG: Lowest slot number below this WPEG    */
++	                            /* For CYC:  0                                     */
++	unsigned char status;       /* For WPEG: Bit 0 = 1 : the XAPIC is used         */
++	                            /*                 = 0 : the XAPIC is not used, ie:*/
++	                            /*                     ints fwded to another XAPIC */
++	                            /*           Bits1:7 Reserved                      */
++	                            /* For CYC:  Bits0:7 Reserved                      */
++	unsigned char WP_index;     /* For WPEG: WPEG instance index - lower ones have */
++	                            /*           lower slot numbers/PCI bus numbers    */
++	                            /* For CYC:  No meaning                            */
++	unsigned char chassis_num;  /* 1 based Chassis number                          */
++	                            /* For LookOut WPEGs this field indicates the      */
++	                            /* Expansion Chassis #, enumerated from Boot       */
++	                            /* Node WPEG external port, then Boot Node CYC     */
++	                            /* external port, then Next Vigil chassis WPEG     */
++	                            /* external port, etc.                             */
++	                            /* Shared Lookouts have only 1 chassis number (the */
++	                            /* first one assigned)                             */
++} __attribute__((packed));
++
++
++typedef enum {
++	CompatTwister = 0,  /* Compatibility Twister               */
++	AltTwister    = 1,  /* Alternate Twister of internal 8-way */
++	CompatCyclone = 2,  /* Compatibility Cyclone               */
++	AltCyclone    = 3,  /* Alternate Cyclone of internal 8-way */
++	CompatWPEG    = 4,  /* Compatibility WPEG                  */
++	AltWPEG       = 5,  /* Second Planar WPEG                  */
++	LookOutAWPEG  = 6,  /* LookOut WPEG                        */
++	LookOutBWPEG  = 7,  /* LookOut WPEG                        */
++} node_type;
++
++static inline int is_WPEG(struct rio_detail *rio){
++	return (rio->type == CompatWPEG || rio->type == AltWPEG ||
++		rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
++}
++
++
++/* In clustered mode, the high nibble of APIC ID is a cluster number.
++ * The low nibble is a 4-bit bitmap. */
++#define XAPIC_DEST_CPUS_SHIFT	4
++#define XAPIC_DEST_CPUS_MASK	((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
++#define XAPIC_DEST_CLUSTER_MASK	(XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
++
++#define SUMMIT_APIC_DFR_VALUE	(APIC_DFR_CLUSTER)
++
++static const struct cpumask *summit_target_cpus(void)
++{
++	/* CPU_MASK_ALL (0xff) has undefined behaviour with
++	 * dest_LowestPrio mode logical clustered apic interrupt routing
++	 * Just start on cpu 0.  IRQ balancing will spread load
++	 */
++	return cpumask_of(0);
++}
++
++static unsigned long summit_check_apicid_used(physid_mask_t bitmap, int apicid)
++{
++	return 0;
++}
++
++/* we don't use the phys_cpu_present_map to indicate apicid presence */
++static unsigned long summit_check_apicid_present(int bit)
++{
++	return 1;
++}
++
++static void summit_init_apic_ldr(void)
++{
++	unsigned long val, id;
++	int count = 0;
++	u8 my_id = (u8)hard_smp_processor_id();
++	u8 my_cluster = APIC_CLUSTER(my_id);
++#ifdef CONFIG_SMP
++	u8 lid;
++	int i;
++
++	/* Create logical APIC IDs by counting CPUs already in cluster. */
++	for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
++		lid = cpu_2_logical_apicid[i];
++		if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
++			++count;
++	}
++#endif
++	/* We only have a 4 wide bitmap in cluster mode.  If a deranged
++	 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
++	BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
++	id = my_cluster | (1UL << count);
++	apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
++	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
++	val |= SET_APIC_LOGICAL_ID(id);
++	apic_write(APIC_LDR, val);
++}
++
++static int summit_apic_id_registered(void)
++{
++	return 1;
++}
++
++static void summit_setup_apic_routing(void)
++{
++	printk("Enabling APIC mode:  Summit.  Using %d I/O APICs\n",
++						nr_ioapics);
++}
++
++static int summit_apicid_to_node(int logical_apicid)
++{
++#ifdef CONFIG_SMP
++	return apicid_2_node[hard_smp_processor_id()];
++#else
++	return 0;
++#endif
++}
++
++/* Mapping from cpu number to logical apicid */
++static inline int summit_cpu_to_logical_apicid(int cpu)
++{
++#ifdef CONFIG_SMP
++	if (cpu >= nr_cpu_ids)
++		return BAD_APICID;
++	return cpu_2_logical_apicid[cpu];
++#else
++	return logical_smp_processor_id();
++#endif
++}
++
++static int summit_cpu_present_to_apicid(int mps_cpu)
++{
++	if (mps_cpu < nr_cpu_ids)
++		return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
++	else
++		return BAD_APICID;
++}
++
++static physid_mask_t summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
++{
++	/* For clustered we don't have a good way to do this yet - hack */
++	return physids_promote(0x0F);
++}
++
++static physid_mask_t summit_apicid_to_cpu_present(int apicid)
++{
++	return physid_mask_of_physid(0);
++}
++
++static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
++{
++	return 1;
++}
++
++static unsigned int summit_cpu_mask_to_apicid(const struct cpumask *cpumask)
++{
++	unsigned int round = 0;
++	int cpu, apicid = 0;
++
++	/*
++	 * The cpus in the mask must all be on the apic cluster.
++	 */
++	for_each_cpu(cpu, cpumask) {
++		int new_apicid = summit_cpu_to_logical_apicid(cpu);
++
++		if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
++			printk("%s: Not a valid mask!\n", __func__);
++			return BAD_APICID;
++		}
++		apicid |= new_apicid;
++		round++;
++	}
++	return apicid;
++}
++
++static unsigned int summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
++			      const struct cpumask *andmask)
++{
++	int apicid = summit_cpu_to_logical_apicid(0);
++	cpumask_var_t cpumask;
++
++	if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
++		return apicid;
++
++	cpumask_and(cpumask, inmask, andmask);
++	cpumask_and(cpumask, cpumask, cpu_online_mask);
++	apicid = summit_cpu_mask_to_apicid(cpumask);
++
++	free_cpumask_var(cpumask);
++
++	return apicid;
++}
++
++/*
++ * cpuid returns the value latched in the HW at reset, not the APIC ID
++ * register's value.  For any box whose BIOS changes APIC IDs, like
++ * clustered APIC systems, we must use hard_smp_processor_id.
++ *
++ * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
++ */
++static int summit_phys_pkg_id(int cpuid_apic, int index_msb)
++{
++	return hard_smp_processor_id() >> index_msb;
++}
++
++static int probe_summit(void)
++{
++	/* probed later in mptable/ACPI hooks */
++	return 0;
++}
++
++static void summit_vector_allocation_domain(int cpu, struct cpumask *retmask)
++{
++	/* Careful. Some cpus do not strictly honor the set of cpus
++	 * specified in the interrupt destination when using lowest
++	 * priority interrupt delivery mode.
++	 *
++	 * In particular there was a hyperthreading cpu observed to
++	 * deliver interrupts to the wrong hyperthread when only one
++	 * hyperthread was specified in the interrupt desitination.
++	 */
++	cpumask_clear(retmask);
++	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
++}
++
++#ifdef CONFIG_X86_SUMMIT_NUMA
++static struct rio_table_hdr *rio_table_hdr;
++static struct scal_detail   *scal_devs[MAX_NUMNODES];
++static struct rio_detail    *rio_devs[MAX_NUMNODES*4];
++
++#ifndef CONFIG_X86_NUMAQ
++static int mp_bus_id_to_node[MAX_MP_BUSSES];
++#endif
++
++static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
++{
++	int twister = 0, node = 0;
++	int i, bus, num_buses;
++
++	for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
++		if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
++			twister = rio_devs[i]->owner_id;
++			break;
++		}
++	}
++	if (i == rio_table_hdr->num_rio_dev) {
++		printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
++		return last_bus;
++	}
++
++	for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
++		if (scal_devs[i]->node_id == twister) {
++			node = scal_devs[i]->node_id;
++			break;
++		}
++	}
++	if (i == rio_table_hdr->num_scal_dev) {
++		printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
++		return last_bus;
++	}
++
++	switch (rio_devs[wpeg_num]->type) {
++	case CompatWPEG:
++		/*
++		 * The Compatibility Winnipeg controls the 2 legacy buses,
++		 * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
++		 * a PCI-PCI bridge card is used in either slot: total 5 buses.
++		 */
++		num_buses = 5;
++		break;
++	case AltWPEG:
++		/*
++		 * The Alternate Winnipeg controls the 2 133MHz buses [1 slot
++		 * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
++		 * the "extra" buses for each of those slots: total 7 buses.
++		 */
++		num_buses = 7;
++		break;
++	case LookOutAWPEG:
++	case LookOutBWPEG:
++		/*
++		 * A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
++		 * & the "extra" buses for each of those slots: total 9 buses.
++		 */
++		num_buses = 9;
++		break;
++	default:
++		printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
++		return last_bus;
++	}
++
++	for (bus = last_bus; bus < last_bus + num_buses; bus++)
++		mp_bus_id_to_node[bus] = node;
++	return bus;
++}
++
++static int build_detail_arrays(void)
++{
++	unsigned long ptr;
++	int i, scal_detail_size, rio_detail_size;
++
++	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
++		printk(KERN_WARNING "%s: MAX_NUMNODES too low!  Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
++		return 0;
++	}
++
++	switch (rio_table_hdr->version) {
++	default:
++		printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
++		return 0;
++	case 2:
++		scal_detail_size = 11;
++		rio_detail_size = 13;
++		break;
++	case 3:
++		scal_detail_size = 12;
++		rio_detail_size = 15;
++		break;
++	}
++
++	ptr = (unsigned long)rio_table_hdr + 3;
++	for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
++		scal_devs[i] = (struct scal_detail *)ptr;
++
++	for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
++		rio_devs[i] = (struct rio_detail *)ptr;
++
++	return 1;
++}
++
++void setup_summit(void)
++{
++	unsigned long		ptr;
++	unsigned short		offset;
++	int			i, next_wpeg, next_bus = 0;
++
++	/* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
++	ptr = get_bios_ebda();
++	ptr = (unsigned long)phys_to_virt(ptr);
++
++	rio_table_hdr = NULL;
++	offset = 0x180;
++	while (offset) {
++		/* The block id is stored in the 2nd word */
++		if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
++			/* set the pointer past the offset & block id */
++			rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
++			break;
++		}
++		/* The next offset is stored in the 1st word.  0 means no more */
++		offset = *((unsigned short *)(ptr + offset));
++	}
++	if (!rio_table_hdr) {
++		printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
++		return;
++	}
++
++	if (!build_detail_arrays())
++		return;
++
++	/* The first Winnipeg we're looking for has an index of 0 */
++	next_wpeg = 0;
++	do {
++		for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
++			if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
++				/* It's the Winnipeg we're looking for! */
++				next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
++				next_wpeg++;
++				break;
++			}
++		}
++		/*
++		 * If we go through all Rio devices and don't find one with
++		 * the next index, it means we've found all the Winnipegs,
++		 * and thus all the PCI buses.
++		 */
++		if (i == rio_table_hdr->num_rio_dev)
++			next_wpeg = 0;
++	} while (next_wpeg != 0);
++}
++#endif
++
++struct apic apic_summit = {
++
++	.name				= "summit",
++	.probe				= probe_summit,
++	.acpi_madt_oem_check		= summit_acpi_madt_oem_check,
++	.apic_id_registered		= summit_apic_id_registered,
++
++	.irq_delivery_mode		= dest_LowestPrio,
++	/* logical delivery broadcast to all CPUs: */
++	.irq_dest_mode			= 1,
++
++	.target_cpus			= summit_target_cpus,
++	.disable_esr			= 1,
++	.dest_logical			= APIC_DEST_LOGICAL,
++	.check_apicid_used		= summit_check_apicid_used,
++	.check_apicid_present		= summit_check_apicid_present,
++
++	.vector_allocation_domain	= summit_vector_allocation_domain,
++	.init_apic_ldr			= summit_init_apic_ldr,
++
++	.ioapic_phys_id_map		= summit_ioapic_phys_id_map,
++	.setup_apic_routing		= summit_setup_apic_routing,
++	.multi_timer_check		= NULL,
++	.apicid_to_node			= summit_apicid_to_node,
++	.cpu_to_logical_apicid		= summit_cpu_to_logical_apicid,
++	.cpu_present_to_apicid		= summit_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= summit_apicid_to_cpu_present,
++	.setup_portio_remap		= NULL,
++	.check_phys_apicid_present	= summit_check_phys_apicid_present,
++	.enable_apic_mode		= NULL,
++	.phys_pkg_id			= summit_phys_pkg_id,
++	.mps_oem_check			= summit_mps_oem_check,
++
++	.get_apic_id			= summit_get_apic_id,
++	.set_apic_id			= NULL,
++	.apic_id_mask			= 0xFF << 24,
++
++	.cpu_mask_to_apicid		= summit_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= summit_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= summit_send_IPI_mask,
++	.send_IPI_mask_allbutself	= NULL,
++	.send_IPI_allbutself		= summit_send_IPI_allbutself,
++	.send_IPI_all			= summit_send_IPI_all,
++	.send_IPI_self			= default_send_IPI_self,
++
++	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
++	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
++
++	.wait_for_init_deassert		= default_wait_for_init_deassert,
++
++	.smp_callin_clear_local_apic	= NULL,
++	.inquire_remote_apic		= default_inquire_remote_apic,
++
++	.read				= native_apic_mem_read,
++	.write				= native_apic_mem_write,
++	.icr_read			= native_apic_icr_read,
++	.icr_write			= native_apic_icr_write,
++	.wait_icr_idle			= native_apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_apic_wait_icr_idle,
++};
+Index: linux-2.6-tip/arch/x86/kernel/apic/x2apic_cluster.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/x2apic_cluster.c
+@@ -0,0 +1,245 @@
++#include <linux/threads.h>
++#include <linux/cpumask.h>
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/init.h>
++#include <linux/dmar.h>
++
++#include <asm/smp.h>
++#include <asm/apic.h>
++#include <asm/ipi.h>
++
++DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
++
++static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++	return x2apic_enabled();
++}
++
++/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
++
++static const struct cpumask *x2apic_target_cpus(void)
++{
++	return cpumask_of(0);
++}
++
++/*
++ * for now each logical cpu is in its own vector allocation domain.
++ */
++static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
++{
++	cpumask_clear(retmask);
++	cpumask_set_cpu(cpu, retmask);
++}
++
++static void
++ __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
++{
++	unsigned long cfg;
++
++	cfg = __prepare_ICR(0, vector, dest);
++
++	/*
++	 * send the IPI.
++	 */
++	native_x2apic_icr_write(cfg, apicid);
++}
++
++/*
++ * for now, we send the IPI's one by one in the cpumask.
++ * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
++ * at once. We have 16 cpu's in a cluster. This will minimize IPI register
++ * writes.
++ */
++static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
++{
++	unsigned long query_cpu;
++	unsigned long flags;
++
++	x2apic_wrmsr_fence();
++
++	local_irq_save(flags);
++	for_each_cpu(query_cpu, mask) {
++		__x2apic_send_IPI_dest(
++			per_cpu(x86_cpu_to_logical_apicid, query_cpu),
++			vector, apic->dest_logical);
++	}
++	local_irq_restore(flags);
++}
++
++static void
++ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
++{
++	unsigned long this_cpu = smp_processor_id();
++	unsigned long query_cpu;
++	unsigned long flags;
++
++	x2apic_wrmsr_fence();
++
++	local_irq_save(flags);
++	for_each_cpu(query_cpu, mask) {
++		if (query_cpu == this_cpu)
++			continue;
++		__x2apic_send_IPI_dest(
++				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
++				vector, apic->dest_logical);
++	}
++	local_irq_restore(flags);
++}
++
++static void x2apic_send_IPI_allbutself(int vector)
++{
++	unsigned long this_cpu = smp_processor_id();
++	unsigned long query_cpu;
++	unsigned long flags;
++
++	x2apic_wrmsr_fence();
++
++	local_irq_save(flags);
++	for_each_online_cpu(query_cpu) {
++		if (query_cpu == this_cpu)
++			continue;
++		__x2apic_send_IPI_dest(
++				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
++				vector, apic->dest_logical);
++	}
++	local_irq_restore(flags);
++}
++
++static void x2apic_send_IPI_all(int vector)
++{
++	x2apic_send_IPI_mask(cpu_online_mask, vector);
++}
++
++static int x2apic_apic_id_registered(void)
++{
++	return 1;
++}
++
++static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
++{
++	/*
++	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
++	 * May as well be the first.
++	 */
++	int cpu = cpumask_first(cpumask);
++
++	if ((unsigned)cpu < nr_cpu_ids)
++		return per_cpu(x86_cpu_to_logical_apicid, cpu);
++	else
++		return BAD_APICID;
++}
++
++static unsigned int
++x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
++			      const struct cpumask *andmask)
++{
++	int cpu;
++
++	/*
++	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
++	 * May as well be the first.
++	 */
++	for_each_cpu_and(cpu, cpumask, andmask) {
++		if (cpumask_test_cpu(cpu, cpu_online_mask))
++			break;
++	}
++
++	if (cpu < nr_cpu_ids)
++		return per_cpu(x86_cpu_to_logical_apicid, cpu);
++
++	return BAD_APICID;
++}
++
++static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x)
++{
++	unsigned int id;
++
++	id = x;
++	return id;
++}
++
++static unsigned long set_apic_id(unsigned int id)
++{
++	unsigned long x;
++
++	x = id;
++	return x;
++}
++
++static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
++{
++	return current_cpu_data.initial_apicid >> index_msb;
++}
++
++static void x2apic_send_IPI_self(int vector)
++{
++	apic_write(APIC_SELF_IPI, vector);
++}
++
++static void init_x2apic_ldr(void)
++{
++	int cpu = smp_processor_id();
++
++	per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
++}
++
++struct apic apic_x2apic_cluster = {
++
++	.name				= "cluster x2apic",
++	.probe				= NULL,
++	.acpi_madt_oem_check		= x2apic_acpi_madt_oem_check,
++	.apic_id_registered		= x2apic_apic_id_registered,
++
++	.irq_delivery_mode		= dest_LowestPrio,
++	.irq_dest_mode			= 1, /* logical */
++
++	.target_cpus			= x2apic_target_cpus,
++	.disable_esr			= 0,
++	.dest_logical			= APIC_DEST_LOGICAL,
++	.check_apicid_used		= NULL,
++	.check_apicid_present		= NULL,
++
++	.vector_allocation_domain	= x2apic_vector_allocation_domain,
++	.init_apic_ldr			= init_x2apic_ldr,
++
++	.ioapic_phys_id_map		= NULL,
++	.setup_apic_routing		= NULL,
++	.multi_timer_check		= NULL,
++	.apicid_to_node			= NULL,
++	.cpu_to_logical_apicid		= NULL,
++	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= NULL,
++	.setup_portio_remap		= NULL,
++	.check_phys_apicid_present	= default_check_phys_apicid_present,
++	.enable_apic_mode		= NULL,
++	.phys_pkg_id			= x2apic_cluster_phys_pkg_id,
++	.mps_oem_check			= NULL,
++
++	.get_apic_id			= x2apic_cluster_phys_get_apic_id,
++	.set_apic_id			= set_apic_id,
++	.apic_id_mask			= 0xFFFFFFFFu,
++
++	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= x2apic_send_IPI_mask,
++	.send_IPI_mask_allbutself	= x2apic_send_IPI_mask_allbutself,
++	.send_IPI_allbutself		= x2apic_send_IPI_allbutself,
++	.send_IPI_all			= x2apic_send_IPI_all,
++	.send_IPI_self			= x2apic_send_IPI_self,
++
++	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
++	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
++	.wait_for_init_deassert		= NULL,
++	.smp_callin_clear_local_apic	= NULL,
++	.inquire_remote_apic		= NULL,
++
++	.read				= native_apic_msr_read,
++	.write				= native_apic_msr_write,
++	.icr_read			= native_x2apic_icr_read,
++	.icr_write			= native_x2apic_icr_write,
++	.wait_icr_idle			= native_x2apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_x2apic_wait_icr_idle,
++};
+Index: linux-2.6-tip/arch/x86/kernel/apic/x2apic_phys.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/x2apic_phys.c
+@@ -0,0 +1,234 @@
++#include <linux/threads.h>
++#include <linux/cpumask.h>
++#include <linux/string.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/init.h>
++#include <linux/dmar.h>
++
++#include <asm/smp.h>
++#include <asm/apic.h>
++#include <asm/ipi.h>
++
++int x2apic_phys;
++
++static int set_x2apic_phys_mode(char *arg)
++{
++	x2apic_phys = 1;
++	return 0;
++}
++early_param("x2apic_phys", set_x2apic_phys_mode);
++
++static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++	if (x2apic_phys)
++		return x2apic_enabled();
++	else
++		return 0;
++}
++
++/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
++
++static const struct cpumask *x2apic_target_cpus(void)
++{
++	return cpumask_of(0);
++}
++
++static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
++{
++	cpumask_clear(retmask);
++	cpumask_set_cpu(cpu, retmask);
++}
++
++static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
++				   unsigned int dest)
++{
++	unsigned long cfg;
++
++	cfg = __prepare_ICR(0, vector, dest);
++
++	/*
++	 * send the IPI.
++	 */
++	native_x2apic_icr_write(cfg, apicid);
++}
++
++static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
++{
++	unsigned long query_cpu;
++	unsigned long flags;
++
++	x2apic_wrmsr_fence();
++
++	local_irq_save(flags);
++	for_each_cpu(query_cpu, mask) {
++		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
++				       vector, APIC_DEST_PHYSICAL);
++	}
++	local_irq_restore(flags);
++}
++
++static void
++ x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
++{
++	unsigned long this_cpu = smp_processor_id();
++	unsigned long query_cpu;
++	unsigned long flags;
++
++	x2apic_wrmsr_fence();
++
++	local_irq_save(flags);
++	for_each_cpu(query_cpu, mask) {
++		if (query_cpu != this_cpu)
++			__x2apic_send_IPI_dest(
++				per_cpu(x86_cpu_to_apicid, query_cpu),
++				vector, APIC_DEST_PHYSICAL);
++	}
++	local_irq_restore(flags);
++}
++
++static void x2apic_send_IPI_allbutself(int vector)
++{
++	unsigned long this_cpu = smp_processor_id();
++	unsigned long query_cpu;
++	unsigned long flags;
++
++	x2apic_wrmsr_fence();
++
++	local_irq_save(flags);
++	for_each_online_cpu(query_cpu) {
++		if (query_cpu == this_cpu)
++			continue;
++		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
++				       vector, APIC_DEST_PHYSICAL);
++	}
++	local_irq_restore(flags);
++}
++
++static void x2apic_send_IPI_all(int vector)
++{
++	x2apic_send_IPI_mask(cpu_online_mask, vector);
++}
++
++static int x2apic_apic_id_registered(void)
++{
++	return 1;
++}
++
++static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
++{
++	/*
++	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
++	 * May as well be the first.
++	 */
++	int cpu = cpumask_first(cpumask);
++
++	if ((unsigned)cpu < nr_cpu_ids)
++		return per_cpu(x86_cpu_to_apicid, cpu);
++	else
++		return BAD_APICID;
++}
++
++static unsigned int
++x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
++			      const struct cpumask *andmask)
++{
++	int cpu;
++
++	/*
++	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
++	 * May as well be the first.
++	 */
++	for_each_cpu_and(cpu, cpumask, andmask) {
++		if (cpumask_test_cpu(cpu, cpu_online_mask))
++			break;
++	}
++
++	if (cpu < nr_cpu_ids)
++		return per_cpu(x86_cpu_to_apicid, cpu);
++
++	return BAD_APICID;
++}
++
++static unsigned int x2apic_phys_get_apic_id(unsigned long x)
++{
++	return x;
++}
++
++static unsigned long set_apic_id(unsigned int id)
++{
++	return id;
++}
++
++static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
++{
++	return current_cpu_data.initial_apicid >> index_msb;
++}
++
++static void x2apic_send_IPI_self(int vector)
++{
++	apic_write(APIC_SELF_IPI, vector);
++}
++
++static void init_x2apic_ldr(void)
++{
++}
++
++struct apic apic_x2apic_phys = {
++
++	.name				= "physical x2apic",
++	.probe				= NULL,
++	.acpi_madt_oem_check		= x2apic_acpi_madt_oem_check,
++	.apic_id_registered		= x2apic_apic_id_registered,
++
++	.irq_delivery_mode		= dest_Fixed,
++	.irq_dest_mode			= 0, /* physical */
++
++	.target_cpus			= x2apic_target_cpus,
++	.disable_esr			= 0,
++	.dest_logical			= 0,
++	.check_apicid_used		= NULL,
++	.check_apicid_present		= NULL,
++
++	.vector_allocation_domain	= x2apic_vector_allocation_domain,
++	.init_apic_ldr			= init_x2apic_ldr,
++
++	.ioapic_phys_id_map		= NULL,
++	.setup_apic_routing		= NULL,
++	.multi_timer_check		= NULL,
++	.apicid_to_node			= NULL,
++	.cpu_to_logical_apicid		= NULL,
++	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= NULL,
++	.setup_portio_remap		= NULL,
++	.check_phys_apicid_present	= default_check_phys_apicid_present,
++	.enable_apic_mode		= NULL,
++	.phys_pkg_id			= x2apic_phys_pkg_id,
++	.mps_oem_check			= NULL,
++
++	.get_apic_id			= x2apic_phys_get_apic_id,
++	.set_apic_id			= set_apic_id,
++	.apic_id_mask			= 0xFFFFFFFFu,
++
++	.cpu_mask_to_apicid		= x2apic_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= x2apic_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= x2apic_send_IPI_mask,
++	.send_IPI_mask_allbutself	= x2apic_send_IPI_mask_allbutself,
++	.send_IPI_allbutself		= x2apic_send_IPI_allbutself,
++	.send_IPI_all			= x2apic_send_IPI_all,
++	.send_IPI_self			= x2apic_send_IPI_self,
++
++	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
++	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
++	.wait_for_init_deassert		= NULL,
++	.smp_callin_clear_local_apic	= NULL,
++	.inquire_remote_apic		= NULL,
++
++	.read				= native_apic_msr_read,
++	.write				= native_apic_msr_write,
++	.icr_read			= native_x2apic_icr_read,
++	.icr_write			= native_x2apic_icr_write,
++	.wait_icr_idle			= native_x2apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_x2apic_wait_icr_idle,
++};
+Index: linux-2.6-tip/arch/x86/kernel/apic/x2apic_uv_x.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/apic/x2apic_uv_x.c
+@@ -0,0 +1,648 @@
++/*
++ * This file is subject to the terms and conditions of the GNU General Public
++ * License.  See the file "COPYING" in the main directory of this archive
++ * for more details.
++ *
++ * SGI UV APIC functions (note: not an Intel compatible APIC)
++ *
++ * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
++ */
++#include <linux/cpumask.h>
++#include <linux/hardirq.h>
++#include <linux/proc_fs.h>
++#include <linux/threads.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/string.h>
++#include <linux/ctype.h>
++#include <linux/sched.h>
++#include <linux/timer.h>
++#include <linux/cpu.h>
++#include <linux/init.h>
++
++#include <asm/uv/uv_mmrs.h>
++#include <asm/uv/uv_hub.h>
++#include <asm/current.h>
++#include <asm/pgtable.h>
++#include <asm/uv/bios.h>
++#include <asm/uv/uv.h>
++#include <asm/apic.h>
++#include <asm/ipi.h>
++#include <asm/smp.h>
++
++DEFINE_PER_CPU(int, x2apic_extra_bits);
++
++static enum uv_system_type uv_system_type;
++
++static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
++{
++	if (!strcmp(oem_id, "SGI")) {
++		if (!strcmp(oem_table_id, "UVL"))
++			uv_system_type = UV_LEGACY_APIC;
++		else if (!strcmp(oem_table_id, "UVX"))
++			uv_system_type = UV_X2APIC;
++		else if (!strcmp(oem_table_id, "UVH")) {
++			uv_system_type = UV_NON_UNIQUE_APIC;
++			return 1;
++		}
++	}
++	return 0;
++}
++
++enum uv_system_type get_uv_system_type(void)
++{
++	return uv_system_type;
++}
++
++int is_uv_system(void)
++{
++	return uv_system_type != UV_NONE;
++}
++EXPORT_SYMBOL_GPL(is_uv_system);
++
++DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
++EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
++
++struct uv_blade_info *uv_blade_info;
++EXPORT_SYMBOL_GPL(uv_blade_info);
++
++short *uv_node_to_blade;
++EXPORT_SYMBOL_GPL(uv_node_to_blade);
++
++short *uv_cpu_to_blade;
++EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
++
++short uv_possible_blades;
++EXPORT_SYMBOL_GPL(uv_possible_blades);
++
++unsigned long sn_rtc_cycles_per_second;
++EXPORT_SYMBOL(sn_rtc_cycles_per_second);
++
++/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
++
++static const struct cpumask *uv_target_cpus(void)
++{
++	return cpumask_of(0);
++}
++
++static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
++{
++	cpumask_clear(retmask);
++	cpumask_set_cpu(cpu, retmask);
++}
++
++static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip)
++{
++#ifdef CONFIG_SMP
++	unsigned long val;
++	int pnode;
++
++	pnode = uv_apicid_to_pnode(phys_apicid);
++	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
++	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
++	    ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
++	    APIC_DM_INIT;
++	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
++	mdelay(10);
++
++	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
++	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
++	    ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
++	    APIC_DM_STARTUP;
++	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
++
++	atomic_set(&init_deasserted, 1);
++#endif
++	return 0;
++}
++
++static void uv_send_IPI_one(int cpu, int vector)
++{
++	unsigned long val, apicid;
++	int pnode;
++
++	apicid = per_cpu(x86_cpu_to_apicid, cpu);
++	pnode = uv_apicid_to_pnode(apicid);
++
++	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
++	      (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
++	      (vector << UVH_IPI_INT_VECTOR_SHFT);
++
++	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
++}
++
++static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
++{
++	unsigned int cpu;
++
++	for_each_cpu(cpu, mask)
++		uv_send_IPI_one(cpu, vector);
++}
++
++static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
++{
++	unsigned int this_cpu = smp_processor_id();
++	unsigned int cpu;
++
++	for_each_cpu(cpu, mask) {
++		if (cpu != this_cpu)
++			uv_send_IPI_one(cpu, vector);
++	}
++}
++
++static void uv_send_IPI_allbutself(int vector)
++{
++	unsigned int this_cpu = smp_processor_id();
++	unsigned int cpu;
++
++	for_each_online_cpu(cpu) {
++		if (cpu != this_cpu)
++			uv_send_IPI_one(cpu, vector);
++	}
++}
++
++static void uv_send_IPI_all(int vector)
++{
++	uv_send_IPI_mask(cpu_online_mask, vector);
++}
++
++static int uv_apic_id_registered(void)
++{
++	return 1;
++}
++
++static void uv_init_apic_ldr(void)
++{
++}
++
++static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
++{
++	/*
++	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
++	 * May as well be the first.
++	 */
++	int cpu = cpumask_first(cpumask);
++
++	if ((unsigned)cpu < nr_cpu_ids)
++		return per_cpu(x86_cpu_to_apicid, cpu);
++	else
++		return BAD_APICID;
++}
++
++static unsigned int
++uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
++			  const struct cpumask *andmask)
++{
++	int cpu;
++
++	/*
++	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
++	 * May as well be the first.
++	 */
++	for_each_cpu_and(cpu, cpumask, andmask) {
++		if (cpumask_test_cpu(cpu, cpu_online_mask))
++			break;
++	}
++	if (cpu < nr_cpu_ids)
++		return per_cpu(x86_cpu_to_apicid, cpu);
++
++	return BAD_APICID;
++}
++
++static unsigned int x2apic_get_apic_id(unsigned long x)
++{
++	unsigned int id;
++
++	WARN_ON(preemptible() && num_online_cpus() > 1);
++	id = x | __get_cpu_var(x2apic_extra_bits);
++
++	return id;
++}
++
++static unsigned long set_apic_id(unsigned int id)
++{
++	unsigned long x;
++
++	/* maskout x2apic_extra_bits ? */
++	x = id;
++	return x;
++}
++
++static unsigned int uv_read_apic_id(void)
++{
++
++	return x2apic_get_apic_id(apic_read(APIC_ID));
++}
++
++static int uv_phys_pkg_id(int initial_apicid, int index_msb)
++{
++	return uv_read_apic_id() >> index_msb;
++}
++
++static void uv_send_IPI_self(int vector)
++{
++	apic_write(APIC_SELF_IPI, vector);
++}
++
++struct apic apic_x2apic_uv_x = {
++
++	.name				= "UV large system",
++	.probe				= NULL,
++	.acpi_madt_oem_check		= uv_acpi_madt_oem_check,
++	.apic_id_registered		= uv_apic_id_registered,
++
++	.irq_delivery_mode		= dest_Fixed,
++	.irq_dest_mode			= 1, /* logical */
++
++	.target_cpus			= uv_target_cpus,
++	.disable_esr			= 0,
++	.dest_logical			= APIC_DEST_LOGICAL,
++	.check_apicid_used		= NULL,
++	.check_apicid_present		= NULL,
++
++	.vector_allocation_domain	= uv_vector_allocation_domain,
++	.init_apic_ldr			= uv_init_apic_ldr,
++
++	.ioapic_phys_id_map		= NULL,
++	.setup_apic_routing		= NULL,
++	.multi_timer_check		= NULL,
++	.apicid_to_node			= NULL,
++	.cpu_to_logical_apicid		= NULL,
++	.cpu_present_to_apicid		= default_cpu_present_to_apicid,
++	.apicid_to_cpu_present		= NULL,
++	.setup_portio_remap		= NULL,
++	.check_phys_apicid_present	= default_check_phys_apicid_present,
++	.enable_apic_mode		= NULL,
++	.phys_pkg_id			= uv_phys_pkg_id,
++	.mps_oem_check			= NULL,
++
++	.get_apic_id			= x2apic_get_apic_id,
++	.set_apic_id			= set_apic_id,
++	.apic_id_mask			= 0xFFFFFFFFu,
++
++	.cpu_mask_to_apicid		= uv_cpu_mask_to_apicid,
++	.cpu_mask_to_apicid_and		= uv_cpu_mask_to_apicid_and,
++
++	.send_IPI_mask			= uv_send_IPI_mask,
++	.send_IPI_mask_allbutself	= uv_send_IPI_mask_allbutself,
++	.send_IPI_allbutself		= uv_send_IPI_allbutself,
++	.send_IPI_all			= uv_send_IPI_all,
++	.send_IPI_self			= uv_send_IPI_self,
++
++	.wakeup_secondary_cpu		= uv_wakeup_secondary,
++	.trampoline_phys_low		= DEFAULT_TRAMPOLINE_PHYS_LOW,
++	.trampoline_phys_high		= DEFAULT_TRAMPOLINE_PHYS_HIGH,
++	.wait_for_init_deassert		= NULL,
++	.smp_callin_clear_local_apic	= NULL,
++	.inquire_remote_apic		= NULL,
++
++	.read				= native_apic_msr_read,
++	.write				= native_apic_msr_write,
++	.icr_read			= native_x2apic_icr_read,
++	.icr_write			= native_x2apic_icr_write,
++	.wait_icr_idle			= native_x2apic_wait_icr_idle,
++	.safe_wait_icr_idle		= native_safe_x2apic_wait_icr_idle,
++};
++
++static __cpuinit void set_x2apic_extra_bits(int pnode)
++{
++	__get_cpu_var(x2apic_extra_bits) = (pnode << 6);
++}
++
++/*
++ * Called on boot cpu.
++ */
++static __init int boot_pnode_to_blade(int pnode)
++{
++	int blade;
++
++	for (blade = 0; blade < uv_num_possible_blades(); blade++)
++		if (pnode == uv_blade_info[blade].pnode)
++			return blade;
++
++	panic("x2apic_uv: bad pnode!");
++}
++
++struct redir_addr {
++	unsigned long redirect;
++	unsigned long alias;
++};
++
++#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
++
++static __initdata struct redir_addr redir_addrs[] = {
++	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
++	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
++	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
++};
++
++static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
++{
++	union uvh_si_alias0_overlay_config_u alias;
++	union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
++		alias.v = uv_read_local_mmr(redir_addrs[i].alias);
++		if (alias.s.base == 0) {
++			*size = (1UL << alias.s.m_alias);
++			redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
++			*base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
++			return;
++		}
++	}
++	panic("get_lowmem_redirect: no match!");
++}
++
++static __init void map_low_mmrs(void)
++{
++	init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
++	init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
++}
++
++enum map_type {map_wb, map_uc};
++
++static __init void map_high(char *id, unsigned long base, int shift,
++			    int max_pnode, enum map_type map_type)
++{
++	unsigned long bytes, paddr;
++
++	paddr = base << shift;
++	bytes = (1UL << shift) * (max_pnode + 1);
++	printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
++						paddr + bytes);
++	if (map_type == map_uc)
++		init_extra_mapping_uc(paddr, bytes);
++	else
++		init_extra_mapping_wb(paddr, bytes);
++
++}
++static __init void map_gru_high(int max_pnode)
++{
++	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
++	int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
++
++	gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
++	if (gru.s.enable)
++		map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
++}
++
++static __init void map_config_high(int max_pnode)
++{
++	union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
++	int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
++
++	cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
++	if (cfg.s.enable)
++		map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
++}
++
++static __init void map_mmr_high(int max_pnode)
++{
++	union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
++	int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
++
++	mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
++	if (mmr.s.enable)
++		map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
++}
++
++static __init void map_mmioh_high(int max_pnode)
++{
++	union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
++	int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
++
++	mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
++	if (mmioh.s.enable)
++		map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
++}
++
++static __init void uv_rtc_init(void)
++{
++	long status;
++	u64 ticks_per_sec;
++
++	status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
++					&ticks_per_sec);
++	if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
++		printk(KERN_WARNING
++			"unable to determine platform RTC clock frequency, "
++			"guessing.\n");
++		/* BIOS gives wrong value for clock freq. so guess */
++		sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
++	} else
++		sn_rtc_cycles_per_second = ticks_per_sec;
++}
++
++/*
++ * percpu heartbeat timer
++ */
++static void uv_heartbeat(unsigned long ignored)
++{
++	struct timer_list *timer = &uv_hub_info->scir.timer;
++	unsigned char bits = uv_hub_info->scir.state;
++
++	/* flip heartbeat bit */
++	bits ^= SCIR_CPU_HEARTBEAT;
++
++	/* is this cpu idle? */
++	if (idle_cpu(raw_smp_processor_id()))
++		bits &= ~SCIR_CPU_ACTIVITY;
++	else
++		bits |= SCIR_CPU_ACTIVITY;
++
++	/* update system controller interface reg */
++	uv_set_scir_bits(bits);
++
++	/* enable next timer period */
++	mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
++}
++
++static void __cpuinit uv_heartbeat_enable(int cpu)
++{
++	if (!uv_cpu_hub_info(cpu)->scir.enabled) {
++		struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
++
++		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
++		setup_timer(timer, uv_heartbeat, cpu);
++		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
++		add_timer_on(timer, cpu);
++		uv_cpu_hub_info(cpu)->scir.enabled = 1;
++	}
++
++	/* check boot cpu */
++	if (!uv_cpu_hub_info(0)->scir.enabled)
++		uv_heartbeat_enable(0);
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++static void __cpuinit uv_heartbeat_disable(int cpu)
++{
++	if (uv_cpu_hub_info(cpu)->scir.enabled) {
++		uv_cpu_hub_info(cpu)->scir.enabled = 0;
++		del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
++	}
++	uv_set_cpu_scir_bits(cpu, 0xff);
++}
++
++/*
++ * cpu hotplug notifier
++ */
++static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
++				       unsigned long action, void *hcpu)
++{
++	long cpu = (long)hcpu;
++
++	switch (action) {
++	case CPU_ONLINE:
++		uv_heartbeat_enable(cpu);
++		break;
++	case CPU_DOWN_PREPARE:
++		uv_heartbeat_disable(cpu);
++		break;
++	default:
++		break;
++	}
++	return NOTIFY_OK;
++}
++
++static __init void uv_scir_register_cpu_notifier(void)
++{
++	hotcpu_notifier(uv_scir_cpu_notify, 0);
++}
++
++#else /* !CONFIG_HOTPLUG_CPU */
++
++static __init void uv_scir_register_cpu_notifier(void)
++{
++}
++
++static __init int uv_init_heartbeat(void)
++{
++	int cpu;
++
++	if (is_uv_system())
++		for_each_online_cpu(cpu)
++			uv_heartbeat_enable(cpu);
++	return 0;
++}
++
++late_initcall(uv_init_heartbeat);
++
++#endif /* !CONFIG_HOTPLUG_CPU */
++
++/*
++ * Called on each cpu to initialize the per_cpu UV data area.
++ * FIXME: hotplug not supported yet
++ */
++void __cpuinit uv_cpu_init(void)
++{
++	/* CPU 0 initilization will be done via uv_system_init. */
++	if (!uv_blade_info)
++		return;
++
++	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
++
++	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
++		set_x2apic_extra_bits(uv_hub_info->pnode);
++}
++
++
++void __init uv_system_init(void)
++{
++	union uvh_si_addr_map_config_u m_n_config;
++	union uvh_node_id_u node_id;
++	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
++	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
++	int max_pnode = 0;
++	unsigned long mmr_base, present;
++
++	map_low_mmrs();
++
++	m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
++	m_val = m_n_config.s.m_skt;
++	n_val = m_n_config.s.n_skt;
++	mmr_base =
++	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
++	    ~UV_MMR_ENABLE;
++	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
++
++	for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
++		uv_possible_blades +=
++		  hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
++	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
++
++	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
++	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
++
++	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
++
++	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
++	uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
++	memset(uv_node_to_blade, 255, bytes);
++
++	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
++	uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
++	memset(uv_cpu_to_blade, 255, bytes);
++
++	blade = 0;
++	for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
++		present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
++		for (j = 0; j < 64; j++) {
++			if (!test_bit(j, &present))
++				continue;
++			uv_blade_info[blade].pnode = (i * 64 + j);
++			uv_blade_info[blade].nr_possible_cpus = 0;
++			uv_blade_info[blade].nr_online_cpus = 0;
++			blade++;
++		}
++	}
++
++	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
++	gnode_upper = (((unsigned long)node_id.s.node_id) &
++		       ~((1 << n_val) - 1)) << m_val;
++
++	uv_bios_init();
++	uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
++			    &sn_coherency_id, &sn_region_size);
++	uv_rtc_init();
++
++	for_each_present_cpu(cpu) {
++		nid = cpu_to_node(cpu);
++		pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu));
++		blade = boot_pnode_to_blade(pnode);
++		lcpu = uv_blade_info[blade].nr_possible_cpus;
++		uv_blade_info[blade].nr_possible_cpus++;
++
++		uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
++		uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
++		uv_cpu_hub_info(cpu)->m_val = m_val;
++		uv_cpu_hub_info(cpu)->n_val = m_val;
++		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
++		uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
++		uv_cpu_hub_info(cpu)->pnode = pnode;
++		uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
++		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
++		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
++		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
++		uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
++		uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
++		uv_node_to_blade[nid] = blade;
++		uv_cpu_to_blade[cpu] = blade;
++		max_pnode = max(pnode, max_pnode);
++
++		printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, "
++			"lcpu %d, blade %d\n",
++			cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
++			lcpu, blade);
++	}
++
++	map_gru_high(max_pnode);
++	map_mmr_high(max_pnode);
++	map_config_high(max_pnode);
++	map_mmioh_high(max_pnode);
++
++	uv_cpu_init();
++	uv_scir_register_cpu_notifier();
++	proc_mkdir("sgi_uv", NULL);
++}
+Index: linux-2.6-tip/arch/x86/kernel/apm_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/apm_32.c
++++ linux-2.6-tip/arch/x86/kernel/apm_32.c
+@@ -301,7 +301,7 @@ extern int (*console_blank_hook)(int);
+  */
+ #define APM_ZERO_SEGS
+ 
+-#include "apm.h"
++#include <asm/apm.h>
+ 
+ /*
+  * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
+@@ -466,7 +466,7 @@ static const lookup_t error_table[] = {
+  *	@err: APM BIOS return code
+  *
+  *	Write a meaningful log entry to the kernel log in the event of
+- *	an APM error.
++ *	an APM error.  Note that this also handles (negative) kernel errors.
+  */
+ 
+ static void apm_error(char *str, int err)
+@@ -478,43 +478,14 @@ static void apm_error(char *str, int err
+ 			break;
+ 	if (i < ERROR_COUNT)
+ 		printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
++	else if (err < 0)
++		printk(KERN_NOTICE "apm: %s: linux error code %i\n", str, err);
+ 	else
+ 		printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
+ 		       str, err);
+ }
+ 
+ /*
+- * Lock APM functionality to physical CPU 0
+- */
+-
+-#ifdef CONFIG_SMP
+-
+-static cpumask_t apm_save_cpus(void)
+-{
+-	cpumask_t x = current->cpus_allowed;
+-	/* Some bioses don't like being called from CPU != 0 */
+-	set_cpus_allowed(current, cpumask_of_cpu(0));
+-	BUG_ON(smp_processor_id() != 0);
+-	return x;
+-}
+-
+-static inline void apm_restore_cpus(cpumask_t mask)
+-{
+-	set_cpus_allowed(current, mask);
+-}
+-
+-#else
+-
+-/*
+- *	No CPU lockdown needed on a uniprocessor
+- */
+-
+-#define apm_save_cpus()		(current->cpus_allowed)
+-#define apm_restore_cpus(x)	(void)(x)
+-
+-#endif
+-
+-/*
+  * These are the actual BIOS calls.  Depending on APM_ZERO_SEGS and
+  * apm_info.allow_ints, we are being really paranoid here!  Not only
+  * are interrupts disabled, but all the segment registers (except SS)
+@@ -568,16 +539,23 @@ static inline void apm_irq_restore(unsig
+ #	define APM_DO_RESTORE_SEGS
+ #endif
+ 
++struct apm_bios_call {
++	u32 func;
++	/* In and out */
++	u32 ebx;
++	u32 ecx;
++	/* Out only */
++	u32 eax;
++	u32 edx;
++	u32 esi;
++
++	/* Error: -ENOMEM, or bits 8-15 of eax */
++	int err;
++};
++
+ /**
+- *	apm_bios_call	-	Make an APM BIOS 32bit call
+- *	@func: APM function to execute
+- *	@ebx_in: EBX register for call entry
+- *	@ecx_in: ECX register for call entry
+- *	@eax: EAX register return
+- *	@ebx: EBX register return
+- *	@ecx: ECX register return
+- *	@edx: EDX register return
+- *	@esi: ESI register return
++ *	__apm_bios_call - Make an APM BIOS 32bit call
++ *	@_call: pointer to struct apm_bios_call.
+  *
+  *	Make an APM call using the 32bit protected mode interface. The
+  *	caller is responsible for knowing if APM BIOS is configured and
+@@ -586,80 +564,142 @@ static inline void apm_irq_restore(unsig
+  *	flag is loaded into AL.  If there is an error, then the error
+  *	code is returned in AH (bits 8-15 of eax) and this function
+  *	returns non-zero.
++ *
++ *	Note: this makes the call on the current CPU.
+  */
+-
+-static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
+-	u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
++static long __apm_bios_call(void *_call)
+ {
+ 	APM_DECL_SEGS
+ 	unsigned long		flags;
+-	cpumask_t		cpus;
+ 	int			cpu;
+ 	struct desc_struct	save_desc_40;
+ 	struct desc_struct	*gdt;
+-
+-	cpus = apm_save_cpus();
++	struct apm_bios_call	*call = _call;
+ 
+ 	cpu = get_cpu();
++	BUG_ON(cpu != 0);
+ 	gdt = get_cpu_gdt_table(cpu);
+ 	save_desc_40 = gdt[0x40 / 8];
+ 	gdt[0x40 / 8] = bad_bios_desc;
+ 
+ 	apm_irq_save(flags);
+ 	APM_DO_SAVE_SEGS;
+-	apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
++	apm_bios_call_asm(call->func, call->ebx, call->ecx,
++			  &call->eax, &call->ebx, &call->ecx, &call->edx,
++			  &call->esi);
+ 	APM_DO_RESTORE_SEGS;
+ 	apm_irq_restore(flags);
+ 	gdt[0x40 / 8] = save_desc_40;
+ 	put_cpu();
+-	apm_restore_cpus(cpus);
+ 
+-	return *eax & 0xff;
++	return call->eax & 0xff;
++}
++
++/* Run __apm_bios_call or __apm_bios_call_simple on CPU 0 */
++static int on_cpu0(long (*fn)(void *), struct apm_bios_call *call)
++{
++	int ret;
++
++	/* Don't bother with work_on_cpu in the common case, so we don't
++	 * have to worry about OOM or overhead. */
++	if (get_cpu() == 0) {
++		ret = fn(call);
++		put_cpu();
++	} else {
++		put_cpu();
++		ret = work_on_cpu(0, fn, call);
++	}
++
++	/* work_on_cpu can fail with -ENOMEM */
++	if (ret < 0)
++		call->err = ret;
++	else
++		call->err = (call->eax >> 8) & 0xff;
++
++	return ret;
+ }
+ 
+ /**
+- *	apm_bios_call_simple	-	make a simple APM BIOS 32bit call
+- *	@func: APM function to invoke
+- *	@ebx_in: EBX register value for BIOS call
+- *	@ecx_in: ECX register value for BIOS call
+- *	@eax: EAX register on return from the BIOS call
++ *	apm_bios_call	-	Make an APM BIOS 32bit call (on CPU 0)
++ *	@call: the apm_bios_call registers.
++ *
++ *	If there is an error, it is returned in @call.err.
++ */
++static int apm_bios_call(struct apm_bios_call *call)
++{
++	return on_cpu0(__apm_bios_call, call);
++}
++
++/**
++ *	__apm_bios_call_simple - Make an APM BIOS 32bit call (on CPU 0)
++ *	@_call: pointer to struct apm_bios_call.
+  *
+  *	Make a BIOS call that returns one value only, or just status.
+  *	If there is an error, then the error code is returned in AH
+- *	(bits 8-15 of eax) and this function returns non-zero. This is
+- *	used for simpler BIOS operations. This call may hold interrupts
+- *	off for a long time on some laptops.
++ *	(bits 8-15 of eax) and this function returns non-zero (it can
++ *	also return -ENOMEM). This is used for simpler BIOS operations.
++ *	This call may hold interrupts off for a long time on some laptops.
++ *
++ *	Note: this makes the call on the current CPU.
+  */
+-
+-static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
++static long __apm_bios_call_simple(void *_call)
+ {
+ 	u8			error;
+ 	APM_DECL_SEGS
+ 	unsigned long		flags;
+-	cpumask_t		cpus;
+ 	int			cpu;
+ 	struct desc_struct	save_desc_40;
+ 	struct desc_struct	*gdt;
+-
+-	cpus = apm_save_cpus();
++	struct apm_bios_call	*call = _call;
+ 
+ 	cpu = get_cpu();
++	BUG_ON(cpu != 0);
+ 	gdt = get_cpu_gdt_table(cpu);
+ 	save_desc_40 = gdt[0x40 / 8];
+ 	gdt[0x40 / 8] = bad_bios_desc;
+ 
+ 	apm_irq_save(flags);
+ 	APM_DO_SAVE_SEGS;
+-	error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
++	error = apm_bios_call_simple_asm(call->func, call->ebx, call->ecx,
++					 &call->eax);
+ 	APM_DO_RESTORE_SEGS;
+ 	apm_irq_restore(flags);
+ 	gdt[0x40 / 8] = save_desc_40;
+ 	put_cpu();
+-	apm_restore_cpus(cpus);
+ 	return error;
+ }
+ 
+ /**
++ *	apm_bios_call_simple	-	make a simple APM BIOS 32bit call
++ *	@func: APM function to invoke
++ *	@ebx_in: EBX register value for BIOS call
++ *	@ecx_in: ECX register value for BIOS call
++ *	@eax: EAX register on return from the BIOS call
++ *	@err: bits
++ *
++ *	Make a BIOS call that returns one value only, or just status.
++ *	If there is an error, then the error code is returned in @err
++ *	and this function returns non-zero. This is used for simpler
++ *	BIOS operations.  This call may hold interrupts off for a long
++ *	time on some laptops.
++ */
++static int apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax,
++				int *err)
++{
++	struct apm_bios_call call;
++	int ret;
++
++	call.func = func;
++	call.ebx = ebx_in;
++	call.ecx = ecx_in;
++
++	ret = on_cpu0(__apm_bios_call_simple, &call);
++	*eax = call.eax;
++	*err = call.err;
++	return ret;
++}
++
++/**
+  *	apm_driver_version	-	APM driver version
+  *	@val:	loaded with the APM version on return
+  *
+@@ -678,9 +718,10 @@ static u8 apm_bios_call_simple(u32 func,
+ static int apm_driver_version(u_short *val)
+ {
+ 	u32 eax;
++	int err;
+ 
+-	if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax))
+-		return (eax >> 8) & 0xff;
++	if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax, &err))
++		return err;
+ 	*val = eax;
+ 	return APM_SUCCESS;
+ }
+@@ -701,22 +742,21 @@ static int apm_driver_version(u_short *v
+  *	that APM 1.2 is in use. If no messges are pending the value 0x80
+  *	is returned (No power management events pending).
+  */
+-
+ static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
+ {
+-	u32 eax;
+-	u32 ebx;
+-	u32 ecx;
+-	u32 dummy;
++	struct apm_bios_call call;
+ 
+-	if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx,
+-			  &dummy, &dummy))
+-		return (eax >> 8) & 0xff;
+-	*event = ebx;
++	call.func = APM_FUNC_GET_EVENT;
++	call.ebx = call.ecx = 0;
++
++	if (apm_bios_call(&call))
++		return call.err;
++
++	*event = call.ebx;
+ 	if (apm_info.connection_version < 0x0102)
+ 		*info = ~0; /* indicate info not valid */
+ 	else
+-		*info = ecx;
++		*info = call.ecx;
+ 	return APM_SUCCESS;
+ }
+ 
+@@ -737,9 +777,10 @@ static int apm_get_event(apm_event_t *ev
+ static int set_power_state(u_short what, u_short state)
+ {
+ 	u32 eax;
++	int err;
+ 
+-	if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax))
+-		return (eax >> 8) & 0xff;
++	if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax, &err))
++		return err;
+ 	return APM_SUCCESS;
+ }
+ 
+@@ -770,6 +811,7 @@ static int apm_do_idle(void)
+ 	u8 ret = 0;
+ 	int idled = 0;
+ 	int polling;
++	int err;
+ 
+ 	polling = !!(current_thread_info()->status & TS_POLLING);
+ 	if (polling) {
+@@ -782,7 +824,7 @@ static int apm_do_idle(void)
+ 	}
+ 	if (!need_resched()) {
+ 		idled = 1;
+-		ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax);
++		ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax, &err);
+ 	}
+ 	if (polling)
+ 		current_thread_info()->status |= TS_POLLING;
+@@ -797,8 +839,7 @@ static int apm_do_idle(void)
+ 		 * Only report the failure the first 5 times.
+ 		 */
+ 		if (++t < 5) {
+-			printk(KERN_DEBUG "apm_do_idle failed (%d)\n",
+-			       (eax >> 8) & 0xff);
++			printk(KERN_DEBUG "apm_do_idle failed (%d)\n", err);
+ 			t = jiffies;
+ 		}
+ 		return -1;
+@@ -816,9 +857,10 @@ static int apm_do_idle(void)
+ static void apm_do_busy(void)
+ {
+ 	u32 dummy;
++	int err;
+ 
+ 	if (clock_slowed || ALWAYS_CALL_BUSY) {
+-		(void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
++		(void)apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy, &err);
+ 		clock_slowed = 0;
+ 	}
+ }
+@@ -937,7 +979,7 @@ static void apm_power_off(void)
+ 
+ 	/* Some bioses don't like being called from CPU != 0 */
+ 	if (apm_info.realmode_power_off) {
+-		(void)apm_save_cpus();
++		set_cpus_allowed_ptr(current, cpumask_of(0));
+ 		machine_real_restart(po_bios_call, sizeof(po_bios_call));
+ 	} else {
+ 		(void)set_system_power_state(APM_STATE_OFF);
+@@ -956,12 +998,13 @@ static void apm_power_off(void)
+ static int apm_enable_power_management(int enable)
+ {
+ 	u32 eax;
++	int err;
+ 
+ 	if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
+ 		return APM_NOT_ENGAGED;
+ 	if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
+-				 enable, &eax))
+-		return (eax >> 8) & 0xff;
++				 enable, &eax, &err))
++		return err;
+ 	if (enable)
+ 		apm_info.bios.flags &= ~APM_BIOS_DISABLED;
+ 	else
+@@ -986,24 +1029,23 @@ static int apm_enable_power_management(i
+ 
+ static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
+ {
+-	u32 eax;
+-	u32 ebx;
+-	u32 ecx;
+-	u32 edx;
+-	u32 dummy;
++	struct apm_bios_call call;
++
++	call.func = APM_FUNC_GET_STATUS;
++	call.ebx = APM_DEVICE_ALL;
++	call.ecx = 0;
+ 
+ 	if (apm_info.get_power_status_broken)
+ 		return APM_32_UNSUPPORTED;
+-	if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0,
+-			  &eax, &ebx, &ecx, &edx, &dummy))
+-		return (eax >> 8) & 0xff;
+-	*status = ebx;
+-	*bat = ecx;
++	if (apm_bios_call(&call))
++		return call.err;
++	*status = call.ebx;
++	*bat = call.ecx;
+ 	if (apm_info.get_power_status_swabinminutes) {
+-		*life = swab16((u16)edx);
++		*life = swab16((u16)call.edx);
+ 		*life |= 0x8000;
+ 	} else
+-		*life = edx;
++		*life = call.edx;
+ 	return APM_SUCCESS;
+ }
+ 
+@@ -1048,12 +1090,14 @@ static int apm_get_battery_status(u_shor
+ static int apm_engage_power_management(u_short device, int enable)
+ {
+ 	u32 eax;
++	int err;
+ 
+ 	if ((enable == 0) && (device == APM_DEVICE_ALL)
+ 	    && (apm_info.bios.flags & APM_BIOS_DISABLED))
+ 		return APM_DISABLED;
+-	if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax))
+-		return (eax >> 8) & 0xff;
++	if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable,
++				 &eax, &err))
++		return err;
+ 	if (device == APM_DEVICE_ALL) {
+ 		if (enable)
+ 			apm_info.bios.flags &= ~APM_BIOS_DISENGAGED;
+@@ -1682,16 +1726,14 @@ static int apm(void *unused)
+ 	char 		*power_stat;
+ 	char 		*bat_stat;
+ 
+-#ifdef CONFIG_SMP
+ 	/* 2002/08/01 - WT
+ 	 * This is to avoid random crashes at boot time during initialization
+ 	 * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D.
+ 	 * Some bioses don't like being called from CPU != 0.
+ 	 * Method suggested by Ingo Molnar.
+ 	 */
+-	set_cpus_allowed(current, cpumask_of_cpu(0));
++	set_cpus_allowed_ptr(current, cpumask_of(0));
+ 	BUG_ON(smp_processor_id() != 0);
+-#endif
+ 
+ 	if (apm_info.connection_version == 0) {
+ 		apm_info.connection_version = apm_info.bios.version;
+Index: linux-2.6-tip/arch/x86/kernel/asm-offsets_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/asm-offsets_32.c
++++ linux-2.6-tip/arch/x86/kernel/asm-offsets_32.c
+@@ -75,6 +75,7 @@ void foo(void)
+ 	OFFSET(PT_DS,  pt_regs, ds);
+ 	OFFSET(PT_ES,  pt_regs, es);
+ 	OFFSET(PT_FS,  pt_regs, fs);
++	OFFSET(PT_GS,  pt_regs, gs);
+ 	OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
+ 	OFFSET(PT_EIP, pt_regs, ip);
+ 	OFFSET(PT_CS,  pt_regs, cs);
+Index: linux-2.6-tip/arch/x86/kernel/asm-offsets_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/asm-offsets_64.c
++++ linux-2.6-tip/arch/x86/kernel/asm-offsets_64.c
+@@ -11,7 +11,6 @@
+ #include <linux/hardirq.h>
+ #include <linux/suspend.h>
+ #include <linux/kbuild.h>
+-#include <asm/pda.h>
+ #include <asm/processor.h>
+ #include <asm/segment.h>
+ #include <asm/thread_info.h>
+@@ -48,16 +47,6 @@ int main(void)
+ #endif
+ 	BLANK();
+ #undef ENTRY
+-#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
+-	ENTRY(kernelstack); 
+-	ENTRY(oldrsp); 
+-	ENTRY(pcurrent); 
+-	ENTRY(irqcount);
+-	ENTRY(cpunumber);
+-	ENTRY(irqstackptr);
+-	ENTRY(data_offset);
+-	BLANK();
+-#undef ENTRY
+ #ifdef CONFIG_PARAVIRT
+ 	BLANK();
+ 	OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
+Index: linux-2.6-tip/arch/x86/kernel/check.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/check.c
++++ linux-2.6-tip/arch/x86/kernel/check.c
+@@ -83,7 +83,7 @@ void __init setup_bios_corruption_check(
+ 		u64 size;
+ 		addr = find_e820_area_size(addr, &size, PAGE_SIZE);
+ 
+-		if (addr == 0)
++		if (!(addr + 1))
+ 			break;
+ 
+ 		if (addr >= corruption_check_size)
+Index: linux-2.6-tip/arch/x86/kernel/cpu/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/Makefile
++++ linux-2.6-tip/arch/x86/kernel/cpu/Makefile
+@@ -1,5 +1,5 @@
+ #
+-# Makefile for x86-compatible CPU details and quirks
++# Makefile for x86-compatible CPU details, features and quirks
+ #
+ 
+ # Don't trace early stages of a secondary CPU boot
+@@ -14,19 +14,22 @@ obj-y			+= vmware.o hypervisor.o
+ obj-$(CONFIG_X86_32)	+= bugs.o cmpxchg.o
+ obj-$(CONFIG_X86_64)	+= bugs_64.o
+ 
++obj-$(CONFIG_X86_CPU_DEBUG)		+= cpu_debug.o
++
+ obj-$(CONFIG_CPU_SUP_INTEL)		+= intel.o
+ obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
+ obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
+-obj-$(CONFIG_CPU_SUP_CENTAUR_32)	+= centaur.o
+-obj-$(CONFIG_CPU_SUP_CENTAUR_64)	+= centaur_64.o
++obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
+ obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
+ obj-$(CONFIG_CPU_SUP_UMC_32)		+= umc.o
+ 
+-obj-$(CONFIG_X86_MCE)	+= mcheck/
+-obj-$(CONFIG_MTRR)	+= mtrr/
+-obj-$(CONFIG_CPU_FREQ)	+= cpufreq/
++obj-$(CONFIG_PERF_COUNTERS)		+= perf_counter.o
++
++obj-$(CONFIG_X86_MCE)			+= mcheck/
++obj-$(CONFIG_MTRR)			+= mtrr/
++obj-$(CONFIG_CPU_FREQ)			+= cpufreq/
+ 
+-obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
++obj-$(CONFIG_X86_LOCAL_APIC)		+= perfctr-watchdog.o
+ 
+ quiet_cmd_mkcapflags = MKCAP   $@
+       cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
+Index: linux-2.6-tip/arch/x86/kernel/cpu/addon_cpuid_features.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/addon_cpuid_features.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/addon_cpuid_features.c
+@@ -7,7 +7,7 @@
+ #include <asm/pat.h>
+ #include <asm/processor.h>
+ 
+-#include <mach_apic.h>
++#include <asm/apic.h>
+ 
+ struct cpuid_bit {
+ 	u16 feature;
+@@ -29,7 +29,7 @@ void __cpuinit init_scattered_cpuid_feat
+ 	u32 regs[4];
+ 	const struct cpuid_bit *cb;
+ 
+-	static const struct cpuid_bit cpuid_bits[] = {
++	static const struct cpuid_bit __cpuinitconst cpuid_bits[] = {
+ 		{ X86_FEATURE_IDA, CR_EAX, 1, 0x00000006 },
+ 		{ 0, 0, 0, 0 }
+ 	};
+@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_feat
+  */
+ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
+ {
+-#ifdef CONFIG_X86_SMP
++#ifdef CONFIG_SMP
+ 	unsigned int eax, ebx, ecx, edx, sub_index;
+ 	unsigned int ht_mask_width, core_plus_mask_width;
+ 	unsigned int core_select_mask, core_level_siblings;
+@@ -116,22 +116,14 @@ void __cpuinit detect_extended_topology(
+ 
+ 	core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
+ 
+-#ifdef CONFIG_X86_32
+-	c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
++	c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
+ 						 & core_select_mask;
+-	c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
++	c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
+ 	/*
+ 	 * Reinit the apicid, now that we have extended initial_apicid.
+ 	 */
+-	c->apicid = phys_pkg_id(c->initial_apicid, 0);
+-#else
+-	c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
+-	c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
+-	/*
+-	 * Reinit the apicid, now that we have extended initial_apicid.
+-	 */
+-	c->apicid = phys_pkg_id(0);
+-#endif
++	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
++
+ 	c->x86_max_cores = (core_level_siblings / smp_num_siblings);
+ 
+ 
+@@ -143,37 +135,3 @@ void __cpuinit detect_extended_topology(
+ 	return;
+ #endif
+ }
+-
+-#ifdef CONFIG_X86_PAT
+-void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
+-{
+-	if (!cpu_has_pat)
+-		pat_disable("PAT not supported by CPU.");
+-
+-	switch (c->x86_vendor) {
+-	case X86_VENDOR_INTEL:
+-		/*
+-		 * There is a known erratum on Pentium III and Core Solo
+-		 * and Core Duo CPUs.
+-		 * " Page with PAT set to WC while associated MTRR is UC
+-		 *   may consolidate to UC "
+-		 * Because of this erratum, it is better to stick with
+-		 * setting WC in MTRR rather than using PAT on these CPUs.
+-		 *
+-		 * Enable PAT WC only on P4, Core 2 or later CPUs.
+-		 */
+-		if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
+-			return;
+-
+-		pat_disable("PAT WC disabled due to known CPU erratum.");
+-		return;
+-
+-	case X86_VENDOR_AMD:
+-	case X86_VENDOR_CENTAUR:
+-	case X86_VENDOR_TRANSMETA:
+-		return;
+-	}
+-
+-	pat_disable("PAT disabled. Not yet verified on this CPU type.");
+-}
+-#endif
+Index: linux-2.6-tip/arch/x86/kernel/cpu/amd.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/amd.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/amd.c
+@@ -5,6 +5,7 @@
+ #include <asm/io.h>
+ #include <asm/processor.h>
+ #include <asm/apic.h>
++#include <asm/cpu.h>
+ 
+ #ifdef CONFIG_X86_64
+ # include <asm/numa_64.h>
+@@ -12,8 +13,6 @@
+ # include <asm/cacheflush.h>
+ #endif
+ 
+-#include <mach_apic.h>
+-
+ #include "cpu.h"
+ 
+ #ifdef CONFIG_X86_32
+@@ -143,6 +142,55 @@ static void __cpuinit init_amd_k6(struct
+ 	}
+ }
+ 
++static void __cpuinit amd_k7_smp_check(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_SMP
++	/* calling is from identify_secondary_cpu() ? */
++	if (c->cpu_index == boot_cpu_id)
++		return;
++
++	/*
++	 * Certain Athlons might work (for various values of 'work') in SMP
++	 * but they are not certified as MP capable.
++	 */
++	/* Athlon 660/661 is valid. */
++	if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
++	    (c->x86_mask == 1)))
++		goto valid_k7;
++
++	/* Duron 670 is valid */
++	if ((c->x86_model == 7) && (c->x86_mask == 0))
++		goto valid_k7;
++
++	/*
++	 * Athlon 662, Duron 671, and Athlon >model 7 have capability
++	 * bit. It's worth noting that the A5 stepping (662) of some
++	 * Athlon XP's have the MP bit set.
++	 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
++	 * more.
++	 */
++	if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
++	    ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
++	     (c->x86_model > 7))
++		if (cpu_has_mp)
++			goto valid_k7;
++
++	/* If we get here, not a certified SMP capable AMD system. */
++
++	/*
++	 * Don't taint if we are running SMP kernel on a single non-MP
++	 * approved Athlon
++	 */
++	WARN_ONCE(1, "WARNING: This combination of AMD"
++		"processors is not suitable for SMP.\n");
++	if (!test_taint(TAINT_UNSAFE_SMP))
++		add_taint(TAINT_UNSAFE_SMP);
++
++valid_k7:
++	;
++#endif
++}
++
+ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
+ {
+ 	u32 l, h;
+@@ -177,6 +225,8 @@ static void __cpuinit init_amd_k7(struct
+ 	}
+ 
+ 	set_cpu_cap(c, X86_FEATURE_K7);
++
++	amd_k7_smp_check(c);
+ }
+ #endif
+ 
+@@ -370,6 +420,10 @@ static void __cpuinit init_amd(struct cp
+ 	if (c->x86 >= 6)
+ 		set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
+ 
++	/* Enable Performance counter for K7 and later */
++	if (c->x86 > 6 && c->x86 <= 0x11)
++		set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
++
+ 	if (!c->x86_model_id[0]) {
+ 		switch (c->x86) {
+ 		case 0xf:
+@@ -452,7 +506,7 @@ static unsigned int __cpuinit amd_size_c
+ }
+ #endif
+ 
+-static struct cpu_dev amd_cpu_dev __cpuinitdata = {
++static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
+ 	.c_vendor	= "AMD",
+ 	.c_ident	= { "AuthenticAMD" },
+ #ifdef CONFIG_X86_32
+Index: linux-2.6-tip/arch/x86/kernel/cpu/centaur.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/centaur.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/centaur.c
+@@ -1,11 +1,11 @@
++#include <linux/bitops.h>
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+-#include <linux/bitops.h>
+ 
+ #include <asm/processor.h>
+-#include <asm/msr.h>
+ #include <asm/e820.h>
+ #include <asm/mtrr.h>
++#include <asm/msr.h>
+ 
+ #include "cpu.h"
+ 
+@@ -276,7 +276,7 @@ static void __cpuinit init_c3(struct cpu
+ 		 */
+ 		c->x86_capability[5] = cpuid_edx(0xC0000001);
+ 	}
+-
++#ifdef CONFIG_X86_32
+ 	/* Cyrix III family needs CX8 & PGE explicitly enabled. */
+ 	if (c->x86_model >= 6 && c->x86_model <= 9) {
+ 		rdmsr(MSR_VIA_FCR, lo, hi);
+@@ -288,6 +288,11 @@ static void __cpuinit init_c3(struct cpu
+ 	/* Before Nehemiah, the C3's had 3dNOW! */
+ 	if (c->x86_model >= 6 && c->x86_model < 9)
+ 		set_cpu_cap(c, X86_FEATURE_3DNOW);
++#endif
++	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
++		c->x86_cache_alignment = c->x86_clflush_size * 2;
++		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
++	}
+ 
+ 	display_cacheinfo(c);
+ }
+@@ -316,16 +321,25 @@ enum {
+ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+ {
+ 	switch (c->x86) {
++#ifdef CONFIG_X86_32
+ 	case 5:
+ 		/* Emulate MTRRs using Centaur's MCR. */
+ 		set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
+ 		break;
++#endif
++	case 6:
++		if (c->x86_model >= 0xf)
++			set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
++		break;
+ 	}
++#ifdef CONFIG_X86_64
++	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
++#endif
+ }
+ 
+ static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
+ {
+-
++#ifdef CONFIG_X86_32
+ 	char *name;
+ 	u32  fcr_set = 0;
+ 	u32  fcr_clr = 0;
+@@ -337,8 +351,10 @@ static void __cpuinit init_centaur(struc
+ 	 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
+ 	 */
+ 	clear_cpu_cap(c, 0*32+31);
+-
++#endif
++	early_init_centaur(c);
+ 	switch (c->x86) {
++#ifdef CONFIG_X86_32
+ 	case 5:
+ 		switch (c->x86_model) {
+ 		case 4:
+@@ -442,16 +458,20 @@ static void __cpuinit init_centaur(struc
+ 		}
+ 		sprintf(c->x86_model_id, "WinChip %s", name);
+ 		break;
+-
++#endif
+ 	case 6:
+ 		init_c3(c);
+ 		break;
+ 	}
++#ifdef CONFIG_X86_64
++	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
++#endif
+ }
+ 
+ static unsigned int __cpuinit
+ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size)
+ {
++#ifdef CONFIG_X86_32
+ 	/* VIA C3 CPUs (670-68F) need further shifting. */
+ 	if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
+ 		size >>= 8;
+@@ -464,11 +484,11 @@ centaur_size_cache(struct cpuinfo_x86 *c
+ 	if ((c->x86 == 6) && (c->x86_model == 9) &&
+ 				(c->x86_mask == 1) && (size == 65))
+ 		size -= 1;
+-
++#endif
+ 	return size;
+ }
+ 
+-static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
++static const struct cpu_dev __cpuinitconst centaur_cpu_dev = {
+ 	.c_vendor	= "Centaur",
+ 	.c_ident	= { "CentaurHauls" },
+ 	.c_early_init	= early_init_centaur,
+Index: linux-2.6-tip/arch/x86/kernel/cpu/centaur_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/centaur_64.c
++++ /dev/null
+@@ -1,37 +0,0 @@
+-#include <linux/init.h>
+-#include <linux/smp.h>
+-
+-#include <asm/cpufeature.h>
+-#include <asm/processor.h>
+-
+-#include "cpu.h"
+-
+-static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
+-{
+-	if (c->x86 == 0x6 && c->x86_model >= 0xf)
+-		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+-
+-	set_cpu_cap(c, X86_FEATURE_SYSENTER32);
+-}
+-
+-static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
+-{
+-	early_init_centaur(c);
+-
+-	if (c->x86 == 0x6 && c->x86_model >= 0xf) {
+-		c->x86_cache_alignment = c->x86_clflush_size * 2;
+-		set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+-	}
+-	set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
+-}
+-
+-static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
+-	.c_vendor	= "Centaur",
+-	.c_ident	= { "CentaurHauls" },
+-	.c_early_init	= early_init_centaur,
+-	.c_init		= init_centaur,
+-	.c_x86_vendor	= X86_VENDOR_CENTAUR,
+-};
+-
+-cpu_dev_register(centaur_cpu_dev);
+-
+Index: linux-2.6-tip/arch/x86/kernel/cpu/common.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/common.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/common.c
+@@ -1,118 +1,118 @@
+-#include <linux/init.h>
+-#include <linux/kernel.h>
+-#include <linux/sched.h>
+-#include <linux/string.h>
+ #include <linux/bootmem.h>
++#include <linux/linkage.h>
+ #include <linux/bitops.h>
++#include <linux/kernel.h>
+ #include <linux/module.h>
+-#include <linux/kgdb.h>
+-#include <linux/topology.h>
++#include <linux/percpu.h>
++#include <linux/string.h>
+ #include <linux/delay.h>
++#include <linux/sched.h>
++#include <linux/init.h>
++#include <linux/kgdb.h>
+ #include <linux/smp.h>
+-#include <linux/percpu.h>
+-#include <asm/i387.h>
+-#include <asm/msr.h>
+-#include <asm/io.h>
+-#include <asm/linkage.h>
++#include <linux/io.h>
++
++#include <asm/stackprotector.h>
++#include <asm/perf_counter.h>
+ #include <asm/mmu_context.h>
++#include <asm/hypervisor.h>
++#include <asm/processor.h>
++#include <asm/sections.h>
++#include <asm/topology.h>
++#include <asm/cpumask.h>
++#include <asm/pgtable.h>
++#include <asm/atomic.h>
++#include <asm/proto.h>
++#include <asm/setup.h>
++#include <asm/apic.h>
++#include <asm/desc.h>
++#include <asm/i387.h>
+ #include <asm/mtrr.h>
++#include <asm/numa.h>
++#include <asm/asm.h>
++#include <asm/cpu.h>
+ #include <asm/mce.h>
++#include <asm/msr.h>
+ #include <asm/pat.h>
+-#include <asm/asm.h>
+-#include <asm/numa.h>
+ #include <asm/smp.h>
++
+ #ifdef CONFIG_X86_LOCAL_APIC
+-#include <asm/mpspec.h>
+-#include <asm/apic.h>
+-#include <mach_apic.h>
+-#include <asm/genapic.h>
++#include <asm/uv/uv.h>
+ #endif
+ 
+-#include <asm/pda.h>
+-#include <asm/pgtable.h>
+-#include <asm/processor.h>
+-#include <asm/desc.h>
+-#include <asm/atomic.h>
+-#include <asm/proto.h>
+-#include <asm/sections.h>
+-#include <asm/setup.h>
+-#include <asm/hypervisor.h>
+-
+ #include "cpu.h"
+ 
+-#ifdef CONFIG_X86_64
+-
+ /* all of these masks are initialized in setup_cpu_local_masks() */
+-cpumask_var_t cpu_callin_mask;
+-cpumask_var_t cpu_callout_mask;
+ cpumask_var_t cpu_initialized_mask;
++cpumask_var_t cpu_callout_mask;
++cpumask_var_t cpu_callin_mask;
+ 
+ /* representing cpus for which sibling maps can be computed */
+ cpumask_var_t cpu_sibling_setup_mask;
+ 
+-#else /* CONFIG_X86_32 */
+-
+-cpumask_t cpu_callin_map;
+-cpumask_t cpu_callout_map;
+-cpumask_t cpu_initialized;
+-cpumask_t cpu_sibling_setup_map;
+-
+-#endif /* CONFIG_X86_32 */
+-
++/* correctly size the local cpu masks */
++void __init setup_cpu_local_masks(void)
++{
++	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
++	alloc_bootmem_cpumask_var(&cpu_callin_mask);
++	alloc_bootmem_cpumask_var(&cpu_callout_mask);
++	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
++}
+ 
+-static struct cpu_dev *this_cpu __cpuinitdata;
++static const struct cpu_dev *this_cpu __cpuinitdata;
+ 
++DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
+ #ifdef CONFIG_X86_64
+-/* We need valid kernel segments for data and code in long mode too
+- * IRET will check the segment types  kkeil 2000/10/28
+- * Also sysret mandates a special GDT layout
+- */
+-/* The TLS descriptors are currently at a different place compared to i386.
+-   Hopefully nobody expects them at a fixed place (Wine?) */
+-DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
+-	[GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
+-	[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
+-	[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
+-	[GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
+-	[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
+-	[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
+-} };
++	/*
++	 * We need valid kernel segments for data and code in long mode too
++	 * IRET will check the segment types  kkeil 2000/10/28
++	 * Also sysret mandates a special GDT layout
++	 *
++	 * TLS descriptors are currently at a different place compared to i386.
++	 * Hopefully nobody expects them at a fixed place (Wine?)
++	 */
++	[GDT_ENTRY_KERNEL32_CS]		= { { { 0x0000ffff, 0x00cf9b00 } } },
++	[GDT_ENTRY_KERNEL_CS]		= { { { 0x0000ffff, 0x00af9b00 } } },
++	[GDT_ENTRY_KERNEL_DS]		= { { { 0x0000ffff, 0x00cf9300 } } },
++	[GDT_ENTRY_DEFAULT_USER32_CS]	= { { { 0x0000ffff, 0x00cffb00 } } },
++	[GDT_ENTRY_DEFAULT_USER_DS]	= { { { 0x0000ffff, 0x00cff300 } } },
++	[GDT_ENTRY_DEFAULT_USER_CS]	= { { { 0x0000ffff, 0x00affb00 } } },
+ #else
+-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
+-	[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
+-	[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
+-	[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
+-	[GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
++	[GDT_ENTRY_KERNEL_CS]		= { { { 0x0000ffff, 0x00cf9a00 } } },
++	[GDT_ENTRY_KERNEL_DS]		= { { { 0x0000ffff, 0x00cf9200 } } },
++	[GDT_ENTRY_DEFAULT_USER_CS]	= { { { 0x0000ffff, 0x00cffa00 } } },
++	[GDT_ENTRY_DEFAULT_USER_DS]	= { { { 0x0000ffff, 0x00cff200 } } },
+ 	/*
+ 	 * Segments used for calling PnP BIOS have byte granularity.
+ 	 * They code segments and data segments have fixed 64k limits,
+ 	 * the transfer segment sizes are set at run time.
+ 	 */
+ 	/* 32-bit code */
+-	[GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
++	[GDT_ENTRY_PNPBIOS_CS32]	= { { { 0x0000ffff, 0x00409a00 } } },
+ 	/* 16-bit code */
+-	[GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
++	[GDT_ENTRY_PNPBIOS_CS16]	= { { { 0x0000ffff, 0x00009a00 } } },
+ 	/* 16-bit data */
+-	[GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
++	[GDT_ENTRY_PNPBIOS_DS]		= { { { 0x0000ffff, 0x00009200 } } },
+ 	/* 16-bit data */
+-	[GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
++	[GDT_ENTRY_PNPBIOS_TS1]		= { { { 0x00000000, 0x00009200 } } },
+ 	/* 16-bit data */
+-	[GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
++	[GDT_ENTRY_PNPBIOS_TS2]		= { { { 0x00000000, 0x00009200 } } },
+ 	/*
+ 	 * The APM segments have byte granularity and their bases
+ 	 * are set at run time.  All have 64k limits.
+ 	 */
+ 	/* 32-bit code */
+-	[GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
++	[GDT_ENTRY_APMBIOS_BASE]	= { { { 0x0000ffff, 0x00409a00 } } },
+ 	/* 16-bit code */
+-	[GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
++	[GDT_ENTRY_APMBIOS_BASE+1]	= { { { 0x0000ffff, 0x00009a00 } } },
+ 	/* data */
+-	[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
++	[GDT_ENTRY_APMBIOS_BASE+2]	= { { { 0x0000ffff, 0x00409200 } } },
+ 
+-	[GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
+-	[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
+-} };
++	[GDT_ENTRY_ESPFIX_SS]		= { { { 0x00000000, 0x00c09200 } } },
++	[GDT_ENTRY_PERCPU]		= { { { 0x0000ffff, 0x00cf9200 } } },
++	GDT_STACK_CANARY_INIT
+ #endif
++} };
+ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
+ 
+ #ifdef CONFIG_X86_32
+@@ -153,16 +153,17 @@ static inline int flag_is_changeable_p(u
+ 	 * the CPUID. Add "volatile" to not allow gcc to
+ 	 * optimize the subsequent calls to this function.
+ 	 */
+-	asm volatile ("pushfl\n\t"
+-		      "pushfl\n\t"
+-		      "popl %0\n\t"
+-		      "movl %0,%1\n\t"
+-		      "xorl %2,%0\n\t"
+-		      "pushl %0\n\t"
+-		      "popfl\n\t"
+-		      "pushfl\n\t"
+-		      "popl %0\n\t"
+-		      "popfl\n\t"
++	asm volatile ("pushfl		\n\t"
++		      "pushfl		\n\t"
++		      "popl %0		\n\t"
++		      "movl %0, %1	\n\t"
++		      "xorl %2, %0	\n\t"
++		      "pushl %0		\n\t"
++		      "popfl		\n\t"
++		      "pushfl		\n\t"
++		      "popl %0		\n\t"
++		      "popfl		\n\t"
++
+ 		      : "=&r" (f1), "=&r" (f2)
+ 		      : "ir" (flag));
+ 
+@@ -177,18 +178,22 @@ static int __cpuinit have_cpuid_p(void)
+ 
+ static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
+ {
+-	if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
+-		/* Disable processor serial number */
+-		unsigned long lo, hi;
+-		rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+-		lo |= 0x200000;
+-		wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
+-		printk(KERN_NOTICE "CPU serial number disabled.\n");
+-		clear_cpu_cap(c, X86_FEATURE_PN);
++	unsigned long lo, hi;
+ 
+-		/* Disabling the serial number may affect the cpuid level */
+-		c->cpuid_level = cpuid_eax(0);
+-	}
++	if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
++		return;
++
++	/* Disable processor serial number: */
++
++	rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
++	lo |= 0x200000;
++	wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
++
++	printk(KERN_NOTICE "CPU serial number disabled.\n");
++	clear_cpu_cap(c, X86_FEATURE_PN);
++
++	/* Disabling the serial number may affect the cpuid level */
++	c->cpuid_level = cpuid_eax(0);
+ }
+ 
+ static int __init x86_serial_nr_setup(char *s)
+@@ -213,16 +218,64 @@ static inline void squash_the_stupid_ser
+ #endif
+ 
+ /*
++ * Some CPU features depend on higher CPUID levels, which may not always
++ * be available due to CPUID level capping or broken virtualization
++ * software.  Add those features to this table to auto-disable them.
++ */
++struct cpuid_dependent_feature {
++	u32 feature;
++	u32 level;
++};
++
++static const struct cpuid_dependent_feature __cpuinitconst
++cpuid_dependent_features[] = {
++	{ X86_FEATURE_MWAIT,		0x00000005 },
++	{ X86_FEATURE_DCA,		0x00000009 },
++	{ X86_FEATURE_XSAVE,		0x0000000d },
++	{ 0, 0 }
++};
++
++static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
++{
++	const struct cpuid_dependent_feature *df;
++
++	for (df = cpuid_dependent_features; df->feature; df++) {
++
++		if (!cpu_has(c, df->feature))
++			continue;
++		/*
++		 * Note: cpuid_level is set to -1 if unavailable, but
++		 * extended_extended_level is set to 0 if unavailable
++		 * and the legitimate extended levels are all negative
++		 * when signed; hence the weird messing around with
++		 * signs here...
++		 */
++		if (!((s32)df->level < 0 ?
++		     (u32)df->level > (u32)c->extended_cpuid_level :
++		     (s32)df->level > (s32)c->cpuid_level))
++			continue;
++
++		clear_cpu_cap(c, df->feature);
++		if (!warn)
++			continue;
++
++		printk(KERN_WARNING
++		       "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
++				x86_cap_flags[df->feature], df->level);
++	}
++}
++
++/*
+  * Naming convention should be: <Name> [(<Codename>)]
+  * This table only is used unless init_<vendor>() below doesn't set it;
+- * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
+- *
++ * in particular, if CPUID levels 0x80000002..4 are supported, this
++ * isn't used
+  */
+ 
+ /* Look up CPU names by table lookup. */
+-static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
++static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
+ {
+-	struct cpu_model_info *info;
++	const struct cpu_model_info *info;
+ 
+ 	if (c->x86_model >= 16)
+ 		return NULL;	/* Range check */
+@@ -242,21 +295,34 @@ static char __cpuinit *table_lookup_mode
+ 
+ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+ 
+-/* Current gdt points %fs at the "master" per-cpu area: after this,
+- * it's on the real one. */
+-void switch_to_new_gdt(void)
++void load_percpu_segment(int cpu)
++{
++#ifdef CONFIG_X86_32
++	loadsegment(fs, __KERNEL_PERCPU);
++#else
++	loadsegment(gs, 0);
++	wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
++#endif
++	load_stack_canary_segment();
++}
++
++/*
++ * Current gdt points %fs at the "master" per-cpu area: after this,
++ * it's on the real one.
++ */
++void switch_to_new_gdt(int cpu)
+ {
+ 	struct desc_ptr gdt_descr;
+ 
+-	gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
++	gdt_descr.address = (long)get_cpu_gdt_table(cpu);
+ 	gdt_descr.size = GDT_SIZE - 1;
+ 	load_gdt(&gdt_descr);
+-#ifdef CONFIG_X86_32
+-	asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
+-#endif
++	/* Reload the per-cpu base */
++
++	load_percpu_segment(cpu);
+ }
+ 
+-static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
++static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
+ 
+ static void __cpuinit default_init(struct cpuinfo_x86 *c)
+ {
+@@ -275,7 +341,7 @@ static void __cpuinit default_init(struc
+ #endif
+ }
+ 
+-static struct cpu_dev __cpuinitdata default_cpu = {
++static const struct cpu_dev __cpuinitconst default_cpu = {
+ 	.c_init	= default_init,
+ 	.c_vendor = "Unknown",
+ 	.c_x86_vendor = X86_VENDOR_UNKNOWN,
+@@ -289,22 +355,24 @@ static void __cpuinit get_model_name(str
+ 	if (c->extended_cpuid_level < 0x80000004)
+ 		return;
+ 
+-	v = (unsigned int *) c->x86_model_id;
++	v = (unsigned int *)c->x86_model_id;
+ 	cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
+ 	cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
+ 	cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
+ 	c->x86_model_id[48] = 0;
+ 
+-	/* Intel chips right-justify this string for some dumb reason;
+-	   undo that brain damage */
++	/*
++	 * Intel chips right-justify this string for some dumb reason;
++	 * undo that brain damage:
++	 */
+ 	p = q = &c->x86_model_id[0];
+ 	while (*p == ' ')
+-	     p++;
++		p++;
+ 	if (p != q) {
+-	     while (*p)
+-		  *q++ = *p++;
+-	     while (q <= &c->x86_model_id[48])
+-		  *q++ = '\0';	/* Zero-pad the rest */
++		while (*p)
++			*q++ = *p++;
++		while (q <= &c->x86_model_id[48])
++			*q++ = '\0';	/* Zero-pad the rest */
+ 	}
+ }
+ 
+@@ -373,36 +441,30 @@ void __cpuinit detect_ht(struct cpuinfo_
+ 
+ 	if (smp_num_siblings == 1) {
+ 		printk(KERN_INFO  "CPU: Hyper-Threading is disabled\n");
+-	} else if (smp_num_siblings > 1) {
++		goto out;
++	}
+ 
+-		if (smp_num_siblings > nr_cpu_ids) {
+-			printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
+-					smp_num_siblings);
+-			smp_num_siblings = 1;
+-			return;
+-		}
++	if (smp_num_siblings <= 1)
++		goto out;
+ 
+-		index_msb = get_count_order(smp_num_siblings);
+-#ifdef CONFIG_X86_64
+-		c->phys_proc_id = phys_pkg_id(index_msb);
+-#else
+-		c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
+-#endif
++	if (smp_num_siblings > nr_cpu_ids) {
++		pr_warning("CPU: Unsupported number of siblings %d",
++			   smp_num_siblings);
++		smp_num_siblings = 1;
++		return;
++	}
+ 
+-		smp_num_siblings = smp_num_siblings / c->x86_max_cores;
++	index_msb = get_count_order(smp_num_siblings);
++	c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
+ 
+-		index_msb = get_count_order(smp_num_siblings);
++	smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+ 
+-		core_bits = get_count_order(c->x86_max_cores);
++	index_msb = get_count_order(smp_num_siblings);
+ 
+-#ifdef CONFIG_X86_64
+-		c->cpu_core_id = phys_pkg_id(index_msb) &
+-					       ((1 << core_bits) - 1);
+-#else
+-		c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
+-					       ((1 << core_bits) - 1);
+-#endif
+-	}
++	core_bits = get_count_order(c->x86_max_cores);
++
++	c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
++				       ((1 << core_bits) - 1);
+ 
+ out:
+ 	if ((c->x86_max_cores * smp_num_siblings) > 1) {
+@@ -417,8 +479,8 @@ out:
+ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
+ {
+ 	char *v = c->x86_vendor_id;
+-	int i;
+ 	static int printed;
++	int i;
+ 
+ 	for (i = 0; i < X86_VENDOR_NUM; i++) {
+ 		if (!cpu_devs[i])
+@@ -427,6 +489,7 @@ static void __cpuinit get_cpu_vendor(str
+ 		if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
+ 		    (cpu_devs[i]->c_ident[1] &&
+ 		     !strcmp(v, cpu_devs[i]->c_ident[1]))) {
++
+ 			this_cpu = cpu_devs[i];
+ 			c->x86_vendor = this_cpu->c_x86_vendor;
+ 			return;
+@@ -435,7 +498,9 @@ static void __cpuinit get_cpu_vendor(str
+ 
+ 	if (!printed) {
+ 		printed++;
+-		printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
++		printk(KERN_ERR
++		    "CPU: vendor_id '%s' unknown, using generic init.\n", v);
++
+ 		printk(KERN_ERR "CPU: Your system may be unstable.\n");
+ 	}
+ 
+@@ -455,14 +520,17 @@ void __cpuinit cpu_detect(struct cpuinfo
+ 	/* Intel-defined flags: level 0x00000001 */
+ 	if (c->cpuid_level >= 0x00000001) {
+ 		u32 junk, tfms, cap0, misc;
++
+ 		cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
+ 		c->x86 = (tfms >> 8) & 0xf;
+ 		c->x86_model = (tfms >> 4) & 0xf;
+ 		c->x86_mask = tfms & 0xf;
++
+ 		if (c->x86 == 0xf)
+ 			c->x86 += (tfms >> 20) & 0xff;
+ 		if (c->x86 >= 0x6)
+ 			c->x86_model += ((tfms >> 16) & 0xf) << 4;
++
+ 		if (cap0 & (1<<19)) {
+ 			c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
+ 			c->x86_cache_alignment = c->x86_clflush_size;
+@@ -478,6 +546,7 @@ static void __cpuinit get_cpu_cap(struct
+ 	/* Intel-defined flags: level 0x00000001 */
+ 	if (c->cpuid_level >= 0x00000001) {
+ 		u32 capability, excap;
++
+ 		cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
+ 		c->x86_capability[0] = capability;
+ 		c->x86_capability[4] = excap;
+@@ -486,6 +555,7 @@ static void __cpuinit get_cpu_cap(struct
+ 	/* AMD-defined flags: level 0x80000001 */
+ 	xlvl = cpuid_eax(0x80000000);
+ 	c->extended_cpuid_level = xlvl;
++
+ 	if ((xlvl & 0xffff0000) == 0x80000000) {
+ 		if (xlvl >= 0x80000001) {
+ 			c->x86_capability[1] = cpuid_edx(0x80000001);
+@@ -493,13 +563,15 @@ static void __cpuinit get_cpu_cap(struct
+ 		}
+ 	}
+ 
+-#ifdef CONFIG_X86_64
+ 	if (c->extended_cpuid_level >= 0x80000008) {
+ 		u32 eax = cpuid_eax(0x80000008);
+ 
+ 		c->x86_virt_bits = (eax >> 8) & 0xff;
+ 		c->x86_phys_bits = eax & 0xff;
+ 	}
++#ifdef CONFIG_X86_32
++	else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
++		c->x86_phys_bits = 36;
+ #endif
+ 
+ 	if (c->extended_cpuid_level >= 0x80000007)
+@@ -546,8 +618,12 @@ static void __init early_identify_cpu(st
+ {
+ #ifdef CONFIG_X86_64
+ 	c->x86_clflush_size = 64;
++	c->x86_phys_bits = 36;
++	c->x86_virt_bits = 48;
+ #else
+ 	c->x86_clflush_size = 32;
++	c->x86_phys_bits = 32;
++	c->x86_virt_bits = 32;
+ #endif
+ 	c->x86_cache_alignment = c->x86_clflush_size;
+ 
+@@ -570,21 +646,20 @@ static void __init early_identify_cpu(st
+ 	if (this_cpu->c_early_init)
+ 		this_cpu->c_early_init(c);
+ 
+-	validate_pat_support(c);
+-
+ #ifdef CONFIG_SMP
+ 	c->cpu_index = boot_cpu_id;
+ #endif
++	filter_cpuid_features(c, false);
+ }
+ 
+ void __init early_cpu_init(void)
+ {
+-	struct cpu_dev **cdev;
++	const struct cpu_dev *const *cdev;
+ 	int count = 0;
+ 
+-	printk("KERNEL supported cpus:\n");
++	printk(KERN_INFO "KERNEL supported cpus:\n");
+ 	for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
+-		struct cpu_dev *cpudev = *cdev;
++		const struct cpu_dev *cpudev = *cdev;
+ 		unsigned int j;
+ 
+ 		if (count >= X86_VENDOR_NUM)
+@@ -595,7 +670,7 @@ void __init early_cpu_init(void)
+ 		for (j = 0; j < 2; j++) {
+ 			if (!cpudev->c_ident[j])
+ 				continue;
+-			printk("  %s %s\n", cpudev->c_vendor,
++			printk(KERN_INFO "  %s %s\n", cpudev->c_vendor,
+ 				cpudev->c_ident[j]);
+ 		}
+ 	}
+@@ -637,7 +712,7 @@ static void __cpuinit generic_identify(s
+ 		c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
+ #ifdef CONFIG_X86_32
+ # ifdef CONFIG_X86_HT
+-		c->apicid = phys_pkg_id(c->initial_apicid, 0);
++		c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
+ # else
+ 		c->apicid = c->initial_apicid;
+ # endif
+@@ -671,9 +746,13 @@ static void __cpuinit identify_cpu(struc
+ 	c->x86_coreid_bits = 0;
+ #ifdef CONFIG_X86_64
+ 	c->x86_clflush_size = 64;
++	c->x86_phys_bits = 36;
++	c->x86_virt_bits = 48;
+ #else
+ 	c->cpuid_level = -1;	/* CPUID not detected */
+ 	c->x86_clflush_size = 32;
++	c->x86_phys_bits = 32;
++	c->x86_virt_bits = 32;
+ #endif
+ 	c->x86_cache_alignment = c->x86_clflush_size;
+ 	memset(&c->x86_capability, 0, sizeof c->x86_capability);
+@@ -684,7 +763,7 @@ static void __cpuinit identify_cpu(struc
+ 		this_cpu->c_identify(c);
+ 
+ #ifdef CONFIG_X86_64
+-	c->apicid = phys_pkg_id(0);
++	c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
+ #endif
+ 
+ 	/*
+@@ -704,13 +783,16 @@ static void __cpuinit identify_cpu(struc
+ 	squash_the_stupid_serial_number(c);
+ 
+ 	/*
+-	 * The vendor-specific functions might have changed features.  Now
+-	 * we do "generic changes."
++	 * The vendor-specific functions might have changed features.
++	 * Now we do "generic changes."
+ 	 */
+ 
++	/* Filter out anything that depends on CPUID levels we don't have */
++	filter_cpuid_features(c, true);
++
+ 	/* If the model name is still unset, do table lookup. */
+ 	if (!c->x86_model_id[0]) {
+-		char *p;
++		const char *p;
+ 		p = table_lookup_model(c);
+ 		if (p)
+ 			strcpy(c->x86_model_id, p);
+@@ -766,12 +848,14 @@ static void vgetcpu_set_mode(void)
+ void __init identify_boot_cpu(void)
+ {
+ 	identify_cpu(&boot_cpu_data);
++	init_c1e_mask();
+ #ifdef CONFIG_X86_32
+ 	sysenter_setup();
+ 	enable_sep_cpu();
+ #else
+ 	vgetcpu_set_mode();
+ #endif
++	init_hw_perf_counters();
+ }
+ 
+ void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
+@@ -785,11 +869,11 @@ void __cpuinit identify_secondary_cpu(st
+ }
+ 
+ struct msr_range {
+-	unsigned min;
+-	unsigned max;
++	unsigned	min;
++	unsigned	max;
+ };
+ 
+-static struct msr_range msr_range_array[] __cpuinitdata = {
++static const struct msr_range msr_range_array[] __cpuinitconst = {
+ 	{ 0x00000000, 0x00000418},
+ 	{ 0xc0000000, 0xc000040b},
+ 	{ 0xc0010000, 0xc0010142},
+@@ -798,14 +882,15 @@ static struct msr_range msr_range_array[
+ 
+ static void __cpuinit print_cpu_msr(void)
+ {
++	unsigned index_min, index_max;
+ 	unsigned index;
+ 	u64 val;
+ 	int i;
+-	unsigned index_min, index_max;
+ 
+ 	for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
+ 		index_min = msr_range_array[i].min;
+ 		index_max = msr_range_array[i].max;
++
+ 		for (index = index_min; index < index_max; index++) {
+ 			if (rdmsrl_amd_safe(index, &val))
+ 				continue;
+@@ -815,6 +900,7 @@ static void __cpuinit print_cpu_msr(void
+ }
+ 
+ static int show_msr __cpuinitdata;
++
+ static __init int setup_show_msr(char *arg)
+ {
+ 	int num;
+@@ -836,12 +922,14 @@ __setup("noclflush", setup_noclflush);
+ 
+ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
+ {
+-	char *vendor = NULL;
++	const char *vendor = NULL;
+ 
+-	if (c->x86_vendor < X86_VENDOR_NUM)
++	if (c->x86_vendor < X86_VENDOR_NUM) {
+ 		vendor = this_cpu->c_vendor;
+-	else if (c->cpuid_level >= 0)
+-		vendor = c->x86_vendor_id;
++	} else {
++		if (c->cpuid_level >= 0)
++			vendor = c->x86_vendor_id;
++	}
+ 
+ 	if (vendor && !strstr(c->x86_model_id, vendor))
+ 		printk(KERN_CONT "%s ", vendor);
+@@ -868,65 +956,47 @@ void __cpuinit print_cpu_info(struct cpu
+ static __init int setup_disablecpuid(char *arg)
+ {
+ 	int bit;
++
+ 	if (get_option(&arg, &bit) && bit < NCAPINTS*32)
+ 		setup_clear_cpu_cap(bit);
+ 	else
+ 		return 0;
++
+ 	return 1;
+ }
+ __setup("clearcpuid=", setup_disablecpuid);
+ 
+ #ifdef CONFIG_X86_64
+-struct x8664_pda **_cpu_pda __read_mostly;
+-EXPORT_SYMBOL(_cpu_pda);
+-
+ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
+ 
+-static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
++DEFINE_PER_CPU_FIRST(union irq_stack_union,
++		     irq_stack_union) __aligned(PAGE_SIZE);
+ 
+-void __cpuinit pda_init(int cpu)
+-{
+-	struct x8664_pda *pda = cpu_pda(cpu);
++DEFINE_PER_CPU(char *, irq_stack_ptr) =
++	init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+ 
+-	/* Setup up data that may be needed in __get_free_pages early */
+-	loadsegment(fs, 0);
+-	loadsegment(gs, 0);
+-	/* Memory clobbers used to order PDA accessed */
+-	mb();
+-	wrmsrl(MSR_GS_BASE, pda);
+-	mb();
+-
+-	pda->cpunumber = cpu;
+-	pda->irqcount = -1;
+-	pda->kernelstack = (unsigned long)stack_thread_info() -
+-				 PDA_STACKOFFSET + THREAD_SIZE;
+-	pda->active_mm = &init_mm;
+-	pda->mmu_state = 0;
+-
+-	if (cpu == 0) {
+-		/* others are initialized in smpboot.c */
+-		pda->pcurrent = &init_task;
+-		pda->irqstackptr = boot_cpu_stack;
+-		pda->irqstackptr += IRQSTACKSIZE - 64;
+-	} else {
+-		if (!pda->irqstackptr) {
+-			pda->irqstackptr = (char *)
+-				__get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
+-			if (!pda->irqstackptr)
+-				panic("cannot allocate irqstack for cpu %d",
+-				      cpu);
+-			pda->irqstackptr += IRQSTACKSIZE - 64;
+-		}
++DEFINE_PER_CPU(unsigned long, kernel_stack) =
++	(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
++EXPORT_PER_CPU_SYMBOL(kernel_stack);
+ 
+-		if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
+-			pda->nodenumber = cpu_to_node(cpu);
+-	}
+-}
++DEFINE_PER_CPU(unsigned int, irq_count) = -1;
+ 
+-static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
+-				  DEBUG_STKSZ] __page_aligned_bss;
++/*
++ * Special IST stacks which the CPU switches to when it calls
++ * an IST-marked descriptor entry. Up to 7 stacks (hardware
++ * limit), all of them are 4K, except the debug stack which
++ * is 8K.
++ */
++static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
++	  [0 ... N_EXCEPTION_STACKS - 1]	= EXCEPTION_STKSZ,
++#if DEBUG_STACK > 0
++	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
++#endif
++};
+ 
+-extern asmlinkage void ignore_sysret(void);
++static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
++	__aligned(PAGE_SIZE);
+ 
+ /* May not be marked __init: used by software suspend */
+ void syscall_init(void)
+@@ -957,16 +1027,38 @@ unsigned long kernel_eflags;
+  */
+ DEFINE_PER_CPU(struct orig_ist, orig_ist);
+ 
+-#else
++#else	/* CONFIG_X86_64 */
++
++#ifdef CONFIG_CC_STACKPROTECTOR
++DEFINE_PER_CPU(unsigned long, stack_canary);
++#endif
+ 
+-/* Make sure %fs is initialized properly in idle threads */
++/* Make sure %fs and %gs are initialized properly in idle threads */
+ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
+ {
+ 	memset(regs, 0, sizeof(struct pt_regs));
+ 	regs->fs = __KERNEL_PERCPU;
++	regs->gs = __KERNEL_STACK_CANARY;
++
+ 	return regs;
+ }
+-#endif
++#endif	/* CONFIG_X86_64 */
++
++/*
++ * Clear all 6 debug registers:
++ */
++static void clear_all_debug_regs(void)
++{
++	int i;
++
++	for (i = 0; i < 8; i++) {
++		/* Ignore db4, db5 */
++		if ((i == 4) || (i == 5))
++			continue;
++
++		set_debugreg(0, i);
++	}
++}
+ 
+ /*
+  * cpu_init() initializes state that is per-CPU. Some data is already
+@@ -976,21 +1068,25 @@ struct pt_regs * __cpuinit idle_regs(str
+  * A lot of state is already set up in PDA init for 64 bit
+  */
+ #ifdef CONFIG_X86_64
++
+ void __cpuinit cpu_init(void)
+ {
+-	int cpu = stack_smp_processor_id();
+-	struct tss_struct *t = &per_cpu(init_tss, cpu);
+-	struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
+-	unsigned long v;
+-	char *estacks = NULL;
++	struct orig_ist *orig_ist;
+ 	struct task_struct *me;
++	struct tss_struct *t;
++	unsigned long v;
++	int cpu;
+ 	int i;
+ 
+-	/* CPU 0 is initialised in head64.c */
+-	if (cpu != 0)
+-		pda_init(cpu);
+-	else
+-		estacks = boot_exception_stacks;
++	cpu = stack_smp_processor_id();
++	t = &per_cpu(init_tss, cpu);
++	orig_ist = &per_cpu(orig_ist, cpu);
++
++#ifdef CONFIG_NUMA
++	if (cpu != 0 && percpu_read(node_number) == 0 &&
++	    cpu_to_node(cpu) != NUMA_NO_NODE)
++		percpu_write(node_number, cpu_to_node(cpu));
++#endif
+ 
+ 	me = current;
+ 
+@@ -1006,7 +1102,9 @@ void __cpuinit cpu_init(void)
+ 	 * and set up the GDT descriptor:
+ 	 */
+ 
+-	switch_to_new_gdt();
++	switch_to_new_gdt(cpu);
++	loadsegment(fs, 0);
++
+ 	load_idt((const struct desc_ptr *)&idt_descr);
+ 
+ 	memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
+@@ -1017,31 +1115,24 @@ void __cpuinit cpu_init(void)
+ 	barrier();
+ 
+ 	check_efer();
+-	if (cpu != 0 && x2apic)
++	if (cpu != 0)
+ 		enable_x2apic();
+ 
+ 	/*
+ 	 * set up and load the per-CPU TSS
+ 	 */
+ 	if (!orig_ist->ist[0]) {
+-		static const unsigned int order[N_EXCEPTION_STACKS] = {
+-		  [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+-		  [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+-		};
++		char *estacks = per_cpu(exception_stacks, cpu);
++
+ 		for (v = 0; v < N_EXCEPTION_STACKS; v++) {
+-			if (cpu) {
+-				estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
+-				if (!estacks)
+-					panic("Cannot allocate exception "
+-					      "stack %ld %d\n", v, cpu);
+-			}
+-			estacks += PAGE_SIZE << order[v];
++			estacks += exception_stack_sizes[v];
+ 			orig_ist->ist[v] = t->x86_tss.ist[v] =
+ 					(unsigned long)estacks;
+ 		}
+ 	}
+ 
+ 	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
++
+ 	/*
+ 	 * <= is required because the CPU will access up to
+ 	 * 8 bits beyond the end of the IO permission bitmap.
+@@ -1051,8 +1142,7 @@ void __cpuinit cpu_init(void)
+ 
+ 	atomic_inc(&init_mm.mm_count);
+ 	me->active_mm = &init_mm;
+-	if (me->mm)
+-		BUG();
++	BUG_ON(me->mm);
+ 	enter_lazy_tlb(&init_mm, me);
+ 
+ 	load_sp0(t, &current->thread);
+@@ -1069,22 +1159,9 @@ void __cpuinit cpu_init(void)
+ 	 */
+ 	if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
+ 		arch_kgdb_ops.correct_hw_break();
+-	else {
+-#endif
+-	/*
+-	 * Clear all 6 debug registers:
+-	 */
+-
+-	set_debugreg(0UL, 0);
+-	set_debugreg(0UL, 1);
+-	set_debugreg(0UL, 2);
+-	set_debugreg(0UL, 3);
+-	set_debugreg(0UL, 6);
+-	set_debugreg(0UL, 7);
+-#ifdef CONFIG_KGDB
+-	/* If the kgdb is connected no debug regs should be altered. */
+-	}
++	else
+ #endif
++		clear_all_debug_regs();
+ 
+ 	fpu_init();
+ 
+@@ -1105,7 +1182,8 @@ void __cpuinit cpu_init(void)
+ 
+ 	if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
+ 		printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
+-		for (;;) local_irq_enable();
++		for (;;)
++			local_irq_enable();
+ 	}
+ 
+ 	printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+@@ -1114,15 +1192,14 @@ void __cpuinit cpu_init(void)
+ 		clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ 
+ 	load_idt(&idt_descr);
+-	switch_to_new_gdt();
++	switch_to_new_gdt(cpu);
+ 
+ 	/*
+ 	 * Set up and load the per-CPU TSS and LDT
+ 	 */
+ 	atomic_inc(&init_mm.mm_count);
+ 	curr->active_mm = &init_mm;
+-	if (curr->mm)
+-		BUG();
++	BUG_ON(curr->mm);
+ 	enter_lazy_tlb(&init_mm, curr);
+ 
+ 	load_sp0(t, thread);
+@@ -1130,21 +1207,14 @@ void __cpuinit cpu_init(void)
+ 	load_TR_desc();
+ 	load_LDT(&init_mm.context);
+ 
++	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
++
+ #ifdef CONFIG_DOUBLEFAULT
+ 	/* Set up doublefault TSS pointer in the GDT */
+ 	__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+ #endif
+ 
+-	/* Clear %gs. */
+-	asm volatile ("mov %0, %%gs" : : "r" (0));
+-
+-	/* Clear all 6 debug registers: */
+-	set_debugreg(0, 0);
+-	set_debugreg(0, 1);
+-	set_debugreg(0, 2);
+-	set_debugreg(0, 3);
+-	set_debugreg(0, 6);
+-	set_debugreg(0, 7);
++	clear_all_debug_regs();
+ 
+ 	/*
+ 	 * Force FPU initialization:
+@@ -1164,6 +1234,4 @@ void __cpuinit cpu_init(void)
+ 
+ 	xsave_init();
+ }
+-
+-
+ #endif
+Index: linux-2.6-tip/arch/x86/kernel/cpu/cpu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/cpu.h
++++ linux-2.6-tip/arch/x86/kernel/cpu/cpu.h
+@@ -3,33 +3,34 @@
+ #define ARCH_X86_CPU_H
+ 
+ struct cpu_model_info {
+-	int vendor;
+-	int family;
+-	char *model_names[16];
++	int		vendor;
++	int		family;
++	const char	*model_names[16];
+ };
+ 
+ /* attempt to consolidate cpu attributes */
+ struct cpu_dev {
+-	char	* c_vendor;
++	const char	*c_vendor;
+ 
+ 	/* some have two possibilities for cpuid string */
+-	char	* c_ident[2];	
++	const char	*c_ident[2];
+ 
+ 	struct		cpu_model_info c_models[4];
+ 
+-	void            (*c_early_init)(struct cpuinfo_x86 *c);
+-	void		(*c_init)(struct cpuinfo_x86 * c);
+-	void		(*c_identify)(struct cpuinfo_x86 * c);
+-	unsigned int	(*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
+-	int	c_x86_vendor;
++	void            (*c_early_init)(struct cpuinfo_x86 *);
++	void		(*c_init)(struct cpuinfo_x86 *);
++	void		(*c_identify)(struct cpuinfo_x86 *);
++	unsigned int	(*c_size_cache)(struct cpuinfo_x86 *, unsigned int);
++	int		c_x86_vendor;
+ };
+ 
+ #define cpu_dev_register(cpu_devX) \
+-	static struct cpu_dev *__cpu_dev_##cpu_devX __used \
++	static const struct cpu_dev *const __cpu_dev_##cpu_devX __used \
+ 	__attribute__((__section__(".x86_cpu_dev.init"))) = \
+ 	&cpu_devX;
+ 
+-extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[];
++extern const struct cpu_dev *const __x86_cpu_dev_start[],
++			    *const __x86_cpu_dev_end[];
+ 
+ extern void display_cacheinfo(struct cpuinfo_x86 *c);
+ 
+Index: linux-2.6-tip/arch/x86/kernel/cpu/cpu_debug.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/cpu/cpu_debug.c
+@@ -0,0 +1,901 @@
++/*
++ * CPU x86 architecture debug code
++ *
++ * Copyright(C) 2009 Jaswinder Singh Rajput
++ *
++ * For licencing details see kernel-base/COPYING
++ */
++
++#include <linux/interrupt.h>
++#include <linux/compiler.h>
++#include <linux/seq_file.h>
++#include <linux/debugfs.h>
++#include <linux/kprobes.h>
++#include <linux/uaccess.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/percpu.h>
++#include <linux/signal.h>
++#include <linux/errno.h>
++#include <linux/sched.h>
++#include <linux/types.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/smp.h>
++
++#include <asm/cpu_debug.h>
++#include <asm/paravirt.h>
++#include <asm/system.h>
++#include <asm/traps.h>
++#include <asm/apic.h>
++#include <asm/desc.h>
++
++static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]);
++static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]);
++static DEFINE_PER_CPU(unsigned, cpu_modelflag);
++static DEFINE_PER_CPU(int, cpu_priv_count);
++static DEFINE_PER_CPU(unsigned, cpu_model);
++
++static DEFINE_MUTEX(cpu_debug_lock);
++
++static struct dentry *cpu_debugfs_dir;
++
++static struct cpu_debug_base cpu_base[] = {
++	{ "mc",		CPU_MC,		0	},
++	{ "monitor",	CPU_MONITOR,	0	},
++	{ "time",	CPU_TIME,	0	},
++	{ "pmc",	CPU_PMC,	1	},
++	{ "platform",	CPU_PLATFORM,	0	},
++	{ "apic",	CPU_APIC,	0	},
++	{ "poweron",	CPU_POWERON,	0	},
++	{ "control",	CPU_CONTROL,	0	},
++	{ "features",	CPU_FEATURES,	0	},
++	{ "lastbranch",	CPU_LBRANCH,	0	},
++	{ "bios",	CPU_BIOS,	0	},
++	{ "freq",	CPU_FREQ,	0	},
++	{ "mtrr",	CPU_MTRR,	0	},
++	{ "perf",	CPU_PERF,	0	},
++	{ "cache",	CPU_CACHE,	0	},
++	{ "sysenter",	CPU_SYSENTER,	0	},
++	{ "therm",	CPU_THERM,	0	},
++	{ "misc",	CPU_MISC,	0	},
++	{ "debug",	CPU_DEBUG,	0	},
++	{ "pat",	CPU_PAT,	0	},
++	{ "vmx",	CPU_VMX,	0	},
++	{ "call",	CPU_CALL,	0	},
++	{ "base",	CPU_BASE,	0	},
++	{ "ver",	CPU_VER,	0	},
++	{ "conf",	CPU_CONF,	0	},
++	{ "smm",	CPU_SMM,	0	},
++	{ "svm",	CPU_SVM,	0	},
++	{ "osvm",	CPU_OSVM,	0	},
++	{ "tss",	CPU_TSS,	0	},
++	{ "cr",		CPU_CR,		0	},
++	{ "dt",		CPU_DT,		0	},
++	{ "registers",	CPU_REG_ALL,	0	},
++};
++
++static struct cpu_file_base cpu_file[] = {
++	{ "index",	CPU_REG_ALL,	0	},
++	{ "value",	CPU_REG_ALL,	1	},
++};
++
++/* Intel Registers Range */
++static struct cpu_debug_range cpu_intel_range[] = {
++	{ 0x00000000, 0x00000001, CPU_MC,	CPU_INTEL_ALL		},
++	{ 0x00000006, 0x00000007, CPU_MONITOR,	CPU_CX_AT_XE		},
++	{ 0x00000010, 0x00000010, CPU_TIME,	CPU_INTEL_ALL		},
++	{ 0x00000011, 0x00000013, CPU_PMC,	CPU_INTEL_PENTIUM	},
++	{ 0x00000017, 0x00000017, CPU_PLATFORM,	CPU_PX_CX_AT_XE		},
++	{ 0x0000001B, 0x0000001B, CPU_APIC,	CPU_P6_CX_AT_XE		},
++
++	{ 0x0000002A, 0x0000002A, CPU_POWERON,	CPU_PX_CX_AT_XE		},
++	{ 0x0000002B, 0x0000002B, CPU_POWERON,	CPU_INTEL_XEON		},
++	{ 0x0000002C, 0x0000002C, CPU_FREQ,	CPU_INTEL_XEON		},
++	{ 0x0000003A, 0x0000003A, CPU_CONTROL,	CPU_CX_AT_XE		},
++
++	{ 0x00000040, 0x00000043, CPU_LBRANCH,	CPU_PM_CX_AT_XE		},
++	{ 0x00000044, 0x00000047, CPU_LBRANCH,	CPU_PM_CO_AT		},
++	{ 0x00000060, 0x00000063, CPU_LBRANCH,	CPU_C2_AT		},
++	{ 0x00000064, 0x00000067, CPU_LBRANCH,	CPU_INTEL_ATOM		},
++
++	{ 0x00000079, 0x00000079, CPU_BIOS,	CPU_P6_CX_AT_XE		},
++	{ 0x00000088, 0x0000008A, CPU_CACHE,	CPU_INTEL_P6		},
++	{ 0x0000008B, 0x0000008B, CPU_BIOS,	CPU_P6_CX_AT_XE		},
++	{ 0x0000009B, 0x0000009B, CPU_MONITOR,	CPU_INTEL_XEON		},
++
++	{ 0x000000C1, 0x000000C2, CPU_PMC,	CPU_P6_CX_AT		},
++	{ 0x000000CD, 0x000000CD, CPU_FREQ,	CPU_CX_AT		},
++	{ 0x000000E7, 0x000000E8, CPU_PERF,	CPU_CX_AT		},
++	{ 0x000000FE, 0x000000FE, CPU_MTRR,	CPU_P6_CX_XE		},
++
++	{ 0x00000116, 0x00000116, CPU_CACHE,	CPU_INTEL_P6		},
++	{ 0x00000118, 0x00000118, CPU_CACHE,	CPU_INTEL_P6		},
++	{ 0x00000119, 0x00000119, CPU_CACHE,	CPU_INTEL_PX		},
++	{ 0x0000011A, 0x0000011B, CPU_CACHE,	CPU_INTEL_P6		},
++	{ 0x0000011E, 0x0000011E, CPU_CACHE,	CPU_PX_CX_AT		},
++
++	{ 0x00000174, 0x00000176, CPU_SYSENTER,	CPU_P6_CX_AT_XE		},
++	{ 0x00000179, 0x0000017A, CPU_MC,	CPU_PX_CX_AT_XE		},
++	{ 0x0000017B, 0x0000017B, CPU_MC,	CPU_P6_XE		},
++	{ 0x00000186, 0x00000187, CPU_PMC,	CPU_P6_CX_AT		},
++	{ 0x00000198, 0x00000199, CPU_PERF,	CPU_PM_CX_AT_XE		},
++	{ 0x0000019A, 0x0000019A, CPU_TIME,	CPU_PM_CX_AT_XE		},
++	{ 0x0000019B, 0x0000019D, CPU_THERM,	CPU_PM_CX_AT_XE		},
++	{ 0x000001A0, 0x000001A0, CPU_MISC,	CPU_PM_CX_AT_XE		},
++
++	{ 0x000001C9, 0x000001C9, CPU_LBRANCH,	CPU_PM_CX_AT		},
++	{ 0x000001D7, 0x000001D8, CPU_LBRANCH,	CPU_INTEL_XEON		},
++	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	CPU_CX_AT_XE		},
++	{ 0x000001DA, 0x000001DA, CPU_LBRANCH,	CPU_INTEL_XEON		},
++	{ 0x000001DB, 0x000001DB, CPU_LBRANCH,	CPU_P6_XE		},
++	{ 0x000001DC, 0x000001DC, CPU_LBRANCH,	CPU_INTEL_P6		},
++	{ 0x000001DD, 0x000001DE, CPU_LBRANCH,	CPU_PX_CX_AT_XE		},
++	{ 0x000001E0, 0x000001E0, CPU_LBRANCH,	CPU_INTEL_P6		},
++
++	{ 0x00000200, 0x0000020F, CPU_MTRR,	CPU_P6_CX_XE		},
++	{ 0x00000250, 0x00000250, CPU_MTRR,	CPU_P6_CX_XE		},
++	{ 0x00000258, 0x00000259, CPU_MTRR,	CPU_P6_CX_XE		},
++	{ 0x00000268, 0x0000026F, CPU_MTRR,	CPU_P6_CX_XE		},
++	{ 0x00000277, 0x00000277, CPU_PAT,	CPU_C2_AT_XE		},
++	{ 0x000002FF, 0x000002FF, CPU_MTRR,	CPU_P6_CX_XE		},
++
++	{ 0x00000300, 0x00000308, CPU_PMC,	CPU_INTEL_XEON		},
++	{ 0x00000309, 0x0000030B, CPU_PMC,	CPU_C2_AT_XE		},
++	{ 0x0000030C, 0x00000311, CPU_PMC,	CPU_INTEL_XEON		},
++	{ 0x00000345, 0x00000345, CPU_PMC,	CPU_C2_AT		},
++	{ 0x00000360, 0x00000371, CPU_PMC,	CPU_INTEL_XEON		},
++	{ 0x0000038D, 0x00000390, CPU_PMC,	CPU_C2_AT		},
++	{ 0x000003A0, 0x000003BE, CPU_PMC,	CPU_INTEL_XEON		},
++	{ 0x000003C0, 0x000003CD, CPU_PMC,	CPU_INTEL_XEON		},
++	{ 0x000003E0, 0x000003E1, CPU_PMC,	CPU_INTEL_XEON		},
++	{ 0x000003F0, 0x000003F0, CPU_PMC,	CPU_INTEL_XEON		},
++	{ 0x000003F1, 0x000003F1, CPU_PMC,	CPU_C2_AT_XE		},
++	{ 0x000003F2, 0x000003F2, CPU_PMC,	CPU_INTEL_XEON		},
++
++	{ 0x00000400, 0x00000402, CPU_MC,	CPU_PM_CX_AT_XE		},
++	{ 0x00000403, 0x00000403, CPU_MC,	CPU_INTEL_XEON		},
++	{ 0x00000404, 0x00000406, CPU_MC,	CPU_PM_CX_AT_XE		},
++	{ 0x00000407, 0x00000407, CPU_MC,	CPU_INTEL_XEON		},
++	{ 0x00000408, 0x0000040A, CPU_MC,	CPU_PM_CX_AT_XE		},
++	{ 0x0000040B, 0x0000040B, CPU_MC,	CPU_INTEL_XEON		},
++	{ 0x0000040C, 0x0000040E, CPU_MC,	CPU_PM_CX_XE		},
++	{ 0x0000040F, 0x0000040F, CPU_MC,	CPU_INTEL_XEON		},
++	{ 0x00000410, 0x00000412, CPU_MC,	CPU_PM_CX_AT_XE		},
++	{ 0x00000413, 0x00000417, CPU_MC,	CPU_CX_AT_XE		},
++	{ 0x00000480, 0x0000048B, CPU_VMX,	CPU_CX_AT_XE		},
++
++	{ 0x00000600, 0x00000600, CPU_DEBUG,	CPU_PM_CX_AT_XE		},
++	{ 0x00000680, 0x0000068F, CPU_LBRANCH,	CPU_INTEL_XEON		},
++	{ 0x000006C0, 0x000006CF, CPU_LBRANCH,	CPU_INTEL_XEON		},
++
++	{ 0x000107CC, 0x000107D3, CPU_PMC,	CPU_INTEL_XEON_MP	},
++
++	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	CPU_INTEL_XEON		},
++	{ 0xC0000081, 0xC0000082, CPU_CALL,	CPU_INTEL_XEON		},
++	{ 0xC0000084, 0xC0000084, CPU_CALL,	CPU_INTEL_XEON		},
++	{ 0xC0000100, 0xC0000102, CPU_BASE,	CPU_INTEL_XEON		},
++};
++
++/* AMD Registers Range */
++static struct cpu_debug_range cpu_amd_range[] = {
++	{ 0x00000000, 0x00000001, CPU_MC,	CPU_K10_PLUS,		},
++	{ 0x00000010, 0x00000010, CPU_TIME,	CPU_K8_PLUS,		},
++	{ 0x0000001B, 0x0000001B, CPU_APIC,	CPU_K8_PLUS,		},
++	{ 0x0000002A, 0x0000002A, CPU_POWERON,	CPU_K7_PLUS		},
++	{ 0x0000008B, 0x0000008B, CPU_VER,	CPU_K8_PLUS		},
++	{ 0x000000FE, 0x000000FE, CPU_MTRR,	CPU_K8_PLUS,		},
++
++	{ 0x00000174, 0x00000176, CPU_SYSENTER,	CPU_K8_PLUS,		},
++	{ 0x00000179, 0x0000017B, CPU_MC,	CPU_K8_PLUS,		},
++	{ 0x000001D9, 0x000001D9, CPU_DEBUG,	CPU_K8_PLUS,		},
++	{ 0x000001DB, 0x000001DE, CPU_LBRANCH,	CPU_K8_PLUS,		},
++
++	{ 0x00000200, 0x0000020F, CPU_MTRR,	CPU_K8_PLUS,		},
++	{ 0x00000250, 0x00000250, CPU_MTRR,	CPU_K8_PLUS,		},
++	{ 0x00000258, 0x00000259, CPU_MTRR,	CPU_K8_PLUS,		},
++	{ 0x00000268, 0x0000026F, CPU_MTRR,	CPU_K8_PLUS,		},
++	{ 0x00000277, 0x00000277, CPU_PAT,	CPU_K8_PLUS,		},
++	{ 0x000002FF, 0x000002FF, CPU_MTRR,	CPU_K8_PLUS,		},
++
++	{ 0x00000400, 0x00000413, CPU_MC,	CPU_K8_PLUS,		},
++
++	{ 0xC0000080, 0xC0000080, CPU_FEATURES,	CPU_AMD_ALL,		},
++	{ 0xC0000081, 0xC0000084, CPU_CALL,	CPU_K8_PLUS,		},
++	{ 0xC0000100, 0xC0000102, CPU_BASE,	CPU_K8_PLUS,		},
++	{ 0xC0000103, 0xC0000103, CPU_TIME,	CPU_K10_PLUS,		},
++
++	{ 0xC0010000, 0xC0010007, CPU_PMC,	CPU_K8_PLUS,		},
++	{ 0xC0010010, 0xC0010010, CPU_CONF,	CPU_K7_PLUS,		},
++	{ 0xC0010015, 0xC0010015, CPU_CONF,	CPU_K7_PLUS,		},
++	{ 0xC0010016, 0xC001001A, CPU_MTRR,	CPU_K8_PLUS,		},
++	{ 0xC001001D, 0xC001001D, CPU_MTRR,	CPU_K8_PLUS,		},
++	{ 0xC001001F, 0xC001001F, CPU_CONF,	CPU_K8_PLUS,		},
++	{ 0xC0010030, 0xC0010035, CPU_BIOS,	CPU_K8_PLUS,		},
++	{ 0xC0010044, 0xC0010048, CPU_MC,	CPU_K8_PLUS,		},
++	{ 0xC0010050, 0xC0010056, CPU_SMM,	CPU_K0F_PLUS,		},
++	{ 0xC0010058, 0xC0010058, CPU_CONF,	CPU_K10_PLUS,		},
++	{ 0xC0010060, 0xC0010060, CPU_CACHE,	CPU_AMD_11,		},
++	{ 0xC0010061, 0xC0010068, CPU_SMM,	CPU_K10_PLUS,		},
++	{ 0xC0010069, 0xC001006B, CPU_SMM,	CPU_AMD_11,		},
++	{ 0xC0010070, 0xC0010071, CPU_SMM,	CPU_K10_PLUS,		},
++	{ 0xC0010111, 0xC0010113, CPU_SMM,	CPU_K8_PLUS,		},
++	{ 0xC0010114, 0xC0010118, CPU_SVM,	CPU_K10_PLUS,		},
++	{ 0xC0010140, 0xC0010141, CPU_OSVM,	CPU_K10_PLUS,		},
++	{ 0xC0011022, 0xC0011023, CPU_CONF,	CPU_K10_PLUS,		},
++};
++
++
++/* Intel */
++static int get_intel_modelflag(unsigned model)
++{
++	int flag;
++
++	switch (model) {
++	case 0x0501:
++	case 0x0502:
++	case 0x0504:
++		flag = CPU_INTEL_PENTIUM;
++		break;
++	case 0x0601:
++	case 0x0603:
++	case 0x0605:
++	case 0x0607:
++	case 0x0608:
++	case 0x060A:
++	case 0x060B:
++		flag = CPU_INTEL_P6;
++		break;
++	case 0x0609:
++	case 0x060D:
++		flag = CPU_INTEL_PENTIUM_M;
++		break;
++	case 0x060E:
++		flag = CPU_INTEL_CORE;
++		break;
++	case 0x060F:
++	case 0x0617:
++		flag = CPU_INTEL_CORE2;
++		break;
++	case 0x061C:
++		flag = CPU_INTEL_ATOM;
++		break;
++	case 0x0F00:
++	case 0x0F01:
++	case 0x0F02:
++	case 0x0F03:
++	case 0x0F04:
++		flag = CPU_INTEL_XEON_P4;
++		break;
++	case 0x0F06:
++		flag = CPU_INTEL_XEON_MP;
++		break;
++	default:
++		flag = CPU_NONE;
++		break;
++	}
++
++	return flag;
++}
++
++/* AMD */
++static int get_amd_modelflag(unsigned model)
++{
++	int flag;
++
++	switch (model >> 8) {
++	case 0x6:
++		flag = CPU_AMD_K6;
++		break;
++	case 0x7:
++		flag = CPU_AMD_K7;
++		break;
++	case 0x8:
++		flag = CPU_AMD_K8;
++		break;
++	case 0xf:
++		flag = CPU_AMD_0F;
++		break;
++	case 0x10:
++		flag = CPU_AMD_10;
++		break;
++	case 0x11:
++		flag = CPU_AMD_11;
++		break;
++	default:
++		flag = CPU_NONE;
++		break;
++	}
++
++	return flag;
++}
++
++static int get_cpu_modelflag(unsigned cpu)
++{
++	int flag;
++
++	flag = per_cpu(cpu_model, cpu);
++
++	switch (flag >> 16) {
++	case X86_VENDOR_INTEL:
++		flag = get_intel_modelflag(flag);
++		break;
++	case X86_VENDOR_AMD:
++		flag = get_amd_modelflag(flag & 0xffff);
++		break;
++	default:
++		flag = CPU_NONE;
++		break;
++	}
++
++	return flag;
++}
++
++static int get_cpu_range_count(unsigned cpu)
++{
++	int index;
++
++	switch (per_cpu(cpu_model, cpu) >> 16) {
++	case X86_VENDOR_INTEL:
++		index = ARRAY_SIZE(cpu_intel_range);
++		break;
++	case X86_VENDOR_AMD:
++		index = ARRAY_SIZE(cpu_amd_range);
++		break;
++	default:
++		index = 0;
++		break;
++	}
++
++	return index;
++}
++
++static int is_typeflag_valid(unsigned cpu, unsigned flag)
++{
++	unsigned vendor, modelflag;
++	int i, index;
++
++	/* Standard Registers should be always valid */
++	if (flag >= CPU_TSS)
++		return 1;
++
++	modelflag = per_cpu(cpu_modelflag, cpu);
++	vendor = per_cpu(cpu_model, cpu) >> 16;
++	index = get_cpu_range_count(cpu);
++
++	for (i = 0; i < index; i++) {
++		switch (vendor) {
++		case X86_VENDOR_INTEL:
++			if ((cpu_intel_range[i].model & modelflag) &&
++			    (cpu_intel_range[i].flag & flag))
++				return 1;
++			break;
++		case X86_VENDOR_AMD:
++			if ((cpu_amd_range[i].model & modelflag) &&
++			    (cpu_amd_range[i].flag & flag))
++				return 1;
++			break;
++		}
++	}
++
++	/* Invalid */
++	return 0;
++}
++
++static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max,
++			      int index, unsigned flag)
++{
++	unsigned modelflag;
++
++	modelflag = per_cpu(cpu_modelflag, cpu);
++	*max = 0;
++	switch (per_cpu(cpu_model, cpu) >> 16) {
++	case X86_VENDOR_INTEL:
++		if ((cpu_intel_range[index].model & modelflag) &&
++		    (cpu_intel_range[index].flag & flag)) {
++			*min = cpu_intel_range[index].min;
++			*max = cpu_intel_range[index].max;
++		}
++		break;
++	case X86_VENDOR_AMD:
++		if ((cpu_amd_range[index].model & modelflag) &&
++		    (cpu_amd_range[index].flag & flag)) {
++			*min = cpu_amd_range[index].min;
++			*max = cpu_amd_range[index].max;
++		}
++		break;
++	}
++
++	return *max;
++}
++
++/* This function can also be called with seq = NULL for printk */
++static void print_cpu_data(struct seq_file *seq, unsigned type,
++			   u32 low, u32 high)
++{
++	struct cpu_private *priv;
++	u64 val = high;
++
++	if (seq) {
++		priv = seq->private;
++		if (priv->file) {
++			val = (val << 32) | low;
++			seq_printf(seq, "0x%llx\n", val);
++		} else
++			seq_printf(seq, " %08x: %08x_%08x\n",
++				   type, high, low);
++	} else
++		printk(KERN_INFO " %08x: %08x_%08x\n", type, high, low);
++}
++
++/* This function can also be called with seq = NULL for printk */
++static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag)
++{
++	unsigned msr, msr_min, msr_max;
++	struct cpu_private *priv;
++	u32 low, high;
++	int i, range;
++
++	if (seq) {
++		priv = seq->private;
++		if (priv->file) {
++			if (!rdmsr_safe_on_cpu(priv->cpu, priv->reg,
++					       &low, &high))
++				print_cpu_data(seq, priv->reg, low, high);
++			return;
++		}
++	}
++
++	range = get_cpu_range_count(cpu);
++
++	for (i = 0; i < range; i++) {
++		if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag))
++			continue;
++
++		for (msr = msr_min; msr <= msr_max; msr++) {
++			if (rdmsr_safe_on_cpu(cpu, msr, &low, &high))
++				continue;
++			print_cpu_data(seq, msr, low, high);
++		}
++	}
++}
++
++static void print_tss(void *arg)
++{
++	struct pt_regs *regs = task_pt_regs(current);
++	struct seq_file *seq = arg;
++	unsigned int seg;
++
++	seq_printf(seq, " RAX\t: %016lx\n", regs->ax);
++	seq_printf(seq, " RBX\t: %016lx\n", regs->bx);
++	seq_printf(seq, " RCX\t: %016lx\n", regs->cx);
++	seq_printf(seq, " RDX\t: %016lx\n", regs->dx);
++
++	seq_printf(seq, " RSI\t: %016lx\n", regs->si);
++	seq_printf(seq, " RDI\t: %016lx\n", regs->di);
++	seq_printf(seq, " RBP\t: %016lx\n", regs->bp);
++	seq_printf(seq, " ESP\t: %016lx\n", regs->sp);
++
++#ifdef CONFIG_X86_64
++	seq_printf(seq, " R08\t: %016lx\n", regs->r8);
++	seq_printf(seq, " R09\t: %016lx\n", regs->r9);
++	seq_printf(seq, " R10\t: %016lx\n", regs->r10);
++	seq_printf(seq, " R11\t: %016lx\n", regs->r11);
++	seq_printf(seq, " R12\t: %016lx\n", regs->r12);
++	seq_printf(seq, " R13\t: %016lx\n", regs->r13);
++	seq_printf(seq, " R14\t: %016lx\n", regs->r14);
++	seq_printf(seq, " R15\t: %016lx\n", regs->r15);
++#endif
++
++	asm("movl %%cs,%0" : "=r" (seg));
++	seq_printf(seq, " CS\t:             %04x\n", seg);
++	asm("movl %%ds,%0" : "=r" (seg));
++	seq_printf(seq, " DS\t:             %04x\n", seg);
++	seq_printf(seq, " SS\t:             %04lx\n", regs->ss & 0xffff);
++	asm("movl %%es,%0" : "=r" (seg));
++	seq_printf(seq, " ES\t:             %04x\n", seg);
++	asm("movl %%fs,%0" : "=r" (seg));
++	seq_printf(seq, " FS\t:             %04x\n", seg);
++	asm("movl %%gs,%0" : "=r" (seg));
++	seq_printf(seq, " GS\t:             %04x\n", seg);
++
++	seq_printf(seq, " EFLAGS\t: %016lx\n", regs->flags);
++
++	seq_printf(seq, " EIP\t: %016lx\n", regs->ip);
++}
++
++static void print_cr(void *arg)
++{
++	struct seq_file *seq = arg;
++
++	seq_printf(seq, " cr0\t: %016lx\n", read_cr0());
++	seq_printf(seq, " cr2\t: %016lx\n", read_cr2());
++	seq_printf(seq, " cr3\t: %016lx\n", read_cr3());
++	seq_printf(seq, " cr4\t: %016lx\n", read_cr4_safe());
++#ifdef CONFIG_X86_64
++	seq_printf(seq, " cr8\t: %016lx\n", read_cr8());
++#endif
++}
++
++static void print_desc_ptr(char *str, struct seq_file *seq, struct desc_ptr dt)
++{
++	seq_printf(seq, " %s\t: %016llx\n", str, (u64)(dt.address | dt.size));
++}
++
++static void print_dt(void *seq)
++{
++	struct desc_ptr dt;
++	unsigned long ldt;
++
++	/* IDT */
++	store_idt((struct desc_ptr *)&dt);
++	print_desc_ptr("IDT", seq, dt);
++
++	/* GDT */
++	store_gdt((struct desc_ptr *)&dt);
++	print_desc_ptr("GDT", seq, dt);
++
++	/* LDT */
++	store_ldt(ldt);
++	seq_printf(seq, " LDT\t: %016lx\n", ldt);
++
++	/* TR */
++	store_tr(ldt);
++	seq_printf(seq, " TR\t: %016lx\n", ldt);
++}
++
++static void print_dr(void *arg)
++{
++	struct seq_file *seq = arg;
++	unsigned long dr;
++	int i;
++
++	for (i = 0; i < 8; i++) {
++		/* Ignore db4, db5 */
++		if ((i == 4) || (i == 5))
++			continue;
++		get_debugreg(dr, i);
++		seq_printf(seq, " dr%d\t: %016lx\n", i, dr);
++	}
++
++	seq_printf(seq, "\n MSR\t:\n");
++}
++
++static void print_apic(void *arg)
++{
++	struct seq_file *seq = arg;
++
++#ifdef CONFIG_X86_LOCAL_APIC
++	seq_printf(seq, " LAPIC\t:\n");
++	seq_printf(seq, " ID\t\t: %08x\n",  apic_read(APIC_ID) >> 24);
++	seq_printf(seq, " LVR\t\t: %08x\n",  apic_read(APIC_LVR));
++	seq_printf(seq, " TASKPRI\t: %08x\n",  apic_read(APIC_TASKPRI));
++	seq_printf(seq, " ARBPRI\t\t: %08x\n",  apic_read(APIC_ARBPRI));
++	seq_printf(seq, " PROCPRI\t: %08x\n",  apic_read(APIC_PROCPRI));
++	seq_printf(seq, " LDR\t\t: %08x\n",  apic_read(APIC_LDR));
++	seq_printf(seq, " DFR\t\t: %08x\n",  apic_read(APIC_DFR));
++	seq_printf(seq, " SPIV\t\t: %08x\n",  apic_read(APIC_SPIV));
++	seq_printf(seq, " ISR\t\t: %08x\n",  apic_read(APIC_ISR));
++	seq_printf(seq, " ESR\t\t: %08x\n",  apic_read(APIC_ESR));
++	seq_printf(seq, " ICR\t\t: %08x\n",  apic_read(APIC_ICR));
++	seq_printf(seq, " ICR2\t\t: %08x\n",  apic_read(APIC_ICR2));
++	seq_printf(seq, " LVTT\t\t: %08x\n",  apic_read(APIC_LVTT));
++	seq_printf(seq, " LVTTHMR\t: %08x\n",  apic_read(APIC_LVTTHMR));
++	seq_printf(seq, " LVTPC\t\t: %08x\n",  apic_read(APIC_LVTPC));
++	seq_printf(seq, " LVT0\t\t: %08x\n",  apic_read(APIC_LVT0));
++	seq_printf(seq, " LVT1\t\t: %08x\n",  apic_read(APIC_LVT1));
++	seq_printf(seq, " LVTERR\t\t: %08x\n",  apic_read(APIC_LVTERR));
++	seq_printf(seq, " TMICT\t\t: %08x\n",  apic_read(APIC_TMICT));
++	seq_printf(seq, " TMCCT\t\t: %08x\n",  apic_read(APIC_TMCCT));
++	seq_printf(seq, " TDCR\t\t: %08x\n",  apic_read(APIC_TDCR));
++#endif /* CONFIG_X86_LOCAL_APIC */
++
++	seq_printf(seq, "\n MSR\t:\n");
++}
++
++static int cpu_seq_show(struct seq_file *seq, void *v)
++{
++	struct cpu_private *priv = seq->private;
++
++	if (priv == NULL)
++		return -EINVAL;
++
++	switch (cpu_base[priv->type].flag) {
++	case CPU_TSS:
++		smp_call_function_single(priv->cpu, print_tss, seq, 1);
++		break;
++	case CPU_CR:
++		smp_call_function_single(priv->cpu, print_cr, seq, 1);
++		break;
++	case CPU_DT:
++		smp_call_function_single(priv->cpu, print_dt, seq, 1);
++		break;
++	case CPU_DEBUG:
++		if (priv->file == CPU_INDEX_BIT)
++			smp_call_function_single(priv->cpu, print_dr, seq, 1);
++		print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
++		break;
++	case CPU_APIC:
++		if (priv->file == CPU_INDEX_BIT)
++			smp_call_function_single(priv->cpu, print_apic, seq, 1);
++		print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
++		break;
++
++	default:
++		print_msr(seq, priv->cpu, cpu_base[priv->type].flag);
++		break;
++	}
++	seq_printf(seq, "\n");
++
++	return 0;
++}
++
++static void *cpu_seq_start(struct seq_file *seq, loff_t *pos)
++{
++	if (*pos == 0) /* One time is enough ;-) */
++		return seq;
++
++	return NULL;
++}
++
++static void *cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++	(*pos)++;
++
++	return cpu_seq_start(seq, pos);
++}
++
++static void cpu_seq_stop(struct seq_file *seq, void *v)
++{
++}
++
++static const struct seq_operations cpu_seq_ops = {
++	.start		= cpu_seq_start,
++	.next		= cpu_seq_next,
++	.stop		= cpu_seq_stop,
++	.show		= cpu_seq_show,
++};
++
++static int cpu_seq_open(struct inode *inode, struct file *file)
++{
++	struct cpu_private *priv = inode->i_private;
++	struct seq_file *seq;
++	int err;
++
++	err = seq_open(file, &cpu_seq_ops);
++	if (!err) {
++		seq = file->private_data;
++		seq->private = priv;
++	}
++
++	return err;
++}
++
++static int write_msr(struct cpu_private *priv, u64 val)
++{
++	u32 low, high;
++
++	high = (val >> 32) & 0xffffffff;
++	low = val & 0xffffffff;
++
++	if (!wrmsr_safe_on_cpu(priv->cpu, priv->reg, low, high))
++		return 0;
++
++	return -EPERM;
++}
++
++static int write_cpu_register(struct cpu_private *priv, const char *buf)
++{
++	int ret = -EPERM;
++	u64 val;
++
++	ret = strict_strtoull(buf, 0, &val);
++	if (ret < 0)
++		return ret;
++
++	/* Supporting only MSRs */
++	if (priv->type < CPU_TSS_BIT)
++		return write_msr(priv, val);
++
++	return ret;
++}
++
++static ssize_t cpu_write(struct file *file, const char __user *ubuf,
++			     size_t count, loff_t *off)
++{
++	struct seq_file *seq = file->private_data;
++	struct cpu_private *priv = seq->private;
++	char buf[19];
++
++	if ((priv == NULL) || (count >= sizeof(buf)))
++		return -EINVAL;
++
++	if (copy_from_user(&buf, ubuf, count))
++		return -EFAULT;
++
++	buf[count] = 0;
++
++	if ((cpu_base[priv->type].write) && (cpu_file[priv->file].write))
++		if (!write_cpu_register(priv, buf))
++			return count;
++
++	return -EACCES;
++}
++
++static const struct file_operations cpu_fops = {
++	.owner		= THIS_MODULE,
++	.open		= cpu_seq_open,
++	.read		= seq_read,
++	.write		= cpu_write,
++	.llseek		= seq_lseek,
++	.release	= seq_release,
++};
++
++static int cpu_create_file(unsigned cpu, unsigned type, unsigned reg,
++			   unsigned file, struct dentry *dentry)
++{
++	struct cpu_private *priv = NULL;
++
++	/* Already intialized */
++	if (file == CPU_INDEX_BIT)
++		if (per_cpu(cpu_arr[type].init, cpu))
++			return 0;
++
++	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
++	if (priv == NULL)
++		return -ENOMEM;
++
++	priv->cpu = cpu;
++	priv->type = type;
++	priv->reg = reg;
++	priv->file = file;
++	mutex_lock(&cpu_debug_lock);
++	per_cpu(priv_arr[type], cpu) = priv;
++	per_cpu(cpu_priv_count, cpu)++;
++	mutex_unlock(&cpu_debug_lock);
++
++	if (file)
++		debugfs_create_file(cpu_file[file].name, S_IRUGO,
++				    dentry, (void *)priv, &cpu_fops);
++	else {
++		debugfs_create_file(cpu_base[type].name, S_IRUGO,
++				    per_cpu(cpu_arr[type].dentry, cpu),
++				    (void *)priv, &cpu_fops);
++		mutex_lock(&cpu_debug_lock);
++		per_cpu(cpu_arr[type].init, cpu) = 1;
++		mutex_unlock(&cpu_debug_lock);
++	}
++
++	return 0;
++}
++
++static int cpu_init_regfiles(unsigned cpu, unsigned int type, unsigned reg,
++			     struct dentry *dentry)
++{
++	unsigned file;
++	int err = 0;
++
++	for (file = 0; file <  ARRAY_SIZE(cpu_file); file++) {
++		err = cpu_create_file(cpu, type, reg, file, dentry);
++		if (err)
++			return err;
++	}
++
++	return err;
++}
++
++static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry)
++{
++	struct dentry *cpu_dentry = NULL;
++	unsigned reg, reg_min, reg_max;
++	int i, range, err = 0;
++	char reg_dir[12];
++	u32 low, high;
++
++	range = get_cpu_range_count(cpu);
++
++	for (i = 0; i < range; i++) {
++		if (!get_cpu_range(cpu, &reg_min, &reg_max, i,
++				   cpu_base[type].flag))
++			continue;
++
++		for (reg = reg_min; reg <= reg_max; reg++) {
++			if (rdmsr_safe_on_cpu(cpu, reg, &low, &high))
++				continue;
++
++			sprintf(reg_dir, "0x%x", reg);
++			cpu_dentry = debugfs_create_dir(reg_dir, dentry);
++			err = cpu_init_regfiles(cpu, type, reg, cpu_dentry);
++			if (err)
++				return err;
++		}
++	}
++
++	return err;
++}
++
++static int cpu_init_allreg(unsigned cpu, struct dentry *dentry)
++{
++	struct dentry *cpu_dentry = NULL;
++	unsigned type;
++	int err = 0;
++
++	for (type = 0; type <  ARRAY_SIZE(cpu_base) - 1; type++) {
++		if (!is_typeflag_valid(cpu, cpu_base[type].flag))
++			continue;
++		cpu_dentry = debugfs_create_dir(cpu_base[type].name, dentry);
++		per_cpu(cpu_arr[type].dentry, cpu) = cpu_dentry;
++
++		if (type < CPU_TSS_BIT)
++			err = cpu_init_msr(cpu, type, cpu_dentry);
++		else
++			err = cpu_create_file(cpu, type, 0, CPU_INDEX_BIT,
++					      cpu_dentry);
++		if (err)
++			return err;
++	}
++
++	return err;
++}
++
++static int cpu_init_cpu(void)
++{
++	struct dentry *cpu_dentry = NULL;
++	struct cpuinfo_x86 *cpui;
++	char cpu_dir[12];
++	unsigned cpu;
++	int err = 0;
++
++	for (cpu = 0; cpu < nr_cpu_ids; cpu++) {
++		cpui = &cpu_data(cpu);
++		if (!cpu_has(cpui, X86_FEATURE_MSR))
++			continue;
++		per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) |
++					   (cpui->x86 << 8) |
++					   (cpui->x86_model));
++		per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu);
++
++		sprintf(cpu_dir, "cpu%d", cpu);
++		cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir);
++		err = cpu_init_allreg(cpu, cpu_dentry);
++
++		pr_info("cpu%d(%d) debug files %d\n",
++			cpu, nr_cpu_ids, per_cpu(cpu_priv_count, cpu));
++		if (per_cpu(cpu_priv_count, cpu) > MAX_CPU_FILES) {
++			pr_err("Register files count %d exceeds limit %d\n",
++				per_cpu(cpu_priv_count, cpu), MAX_CPU_FILES);
++			per_cpu(cpu_priv_count, cpu) = MAX_CPU_FILES;
++			err = -ENFILE;
++		}
++		if (err)
++			return err;
++	}
++
++	return err;
++}
++
++static int __init cpu_debug_init(void)
++{
++	cpu_debugfs_dir = debugfs_create_dir("cpu", arch_debugfs_dir);
++
++	return cpu_init_cpu();
++}
++
++static void __exit cpu_debug_exit(void)
++{
++	int i, cpu;
++
++	if (cpu_debugfs_dir)
++		debugfs_remove_recursive(cpu_debugfs_dir);
++
++	for (cpu = 0; cpu <  nr_cpu_ids; cpu++)
++		for (i = 0; i < per_cpu(cpu_priv_count, cpu); i++)
++			kfree(per_cpu(priv_arr[i], cpu));
++}
++
++module_init(cpu_debug_init);
++module_exit(cpu_debug_exit);
++
++MODULE_AUTHOR("Jaswinder Singh Rajput");
++MODULE_DESCRIPTION("CPU Debug module");
++MODULE_LICENSE("GPL");
+Index: linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+@@ -33,7 +33,7 @@
+ #include <linux/cpufreq.h>
+ #include <linux/compiler.h>
+ #include <linux/dmi.h>
+-#include <linux/ftrace.h>
++#include <trace/power.h>
+ 
+ #include <linux/acpi.h>
+ #include <acpi/processor.h>
+@@ -70,6 +70,8 @@ struct acpi_cpufreq_data {
+ 
+ static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
+ 
++DEFINE_TRACE(power_mark);
++
+ /* acpi_perf_data is a pointer to percpu data. */
+ static struct acpi_processor_performance *acpi_perf_data;
+ 
+@@ -601,7 +603,7 @@ static int acpi_cpufreq_cpu_init(struct 
+ 	if (!data)
+ 		return -ENOMEM;
+ 
+-	data->acpi_data = percpu_ptr(acpi_perf_data, cpu);
++	data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu);
+ 	per_cpu(drv_data, cpu) = data;
+ 
+ 	if (cpu_has(c, X86_FEATURE_CONSTANT_TSC))
+Index: linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+@@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_p
+ 	}
+ 	/* Enable Enhanced PowerSaver */
+ 	rdmsrl(MSR_IA32_MISC_ENABLE, val);
+-	if (!(val & 1 << 16)) {
+-		val |= 1 << 16;
++	if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
++		val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
+ 		wrmsrl(MSR_IA32_MISC_ENABLE, val);
+ 		/* Can be locked at 0 */
+ 		rdmsrl(MSR_IA32_MISC_ENABLE, val);
+-		if (!(val & 1 << 16)) {
++		if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+ 			printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
+ 			return -ENODEV;
+ 		}
+Index: linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c
+@@ -203,7 +203,7 @@ static int cpufreq_p4_cpu_init(struct cp
+ 	unsigned int i;
+ 
+ #ifdef CONFIG_SMP
+-	cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
++	cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
+ #endif
+ 
+ 	/* Errata workaround */
+Index: linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+@@ -56,7 +56,10 @@ static DEFINE_PER_CPU(struct powernow_k8
+ static int cpu_family = CPU_OPTERON;
+ 
+ #ifndef CONFIG_SMP
+-DEFINE_PER_CPU(cpumask_t, cpu_core_map);
++static inline const struct cpumask *cpu_core_mask(int cpu)
++{
++	return cpumask_of(0);
++}
+ #endif
+ 
+ /* Return a frequency in MHz, given an input fid */
+@@ -654,7 +657,7 @@ static int fill_powernow_table(struct po
+ 
+ 	dprintk("cfid 0x%x, cvid 0x%x\n", data->currfid, data->currvid);
+ 	data->powernow_table = powernow_table;
+-	if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
++	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
+ 		print_basics(data);
+ 
+ 	for (j = 0; j < data->numps; j++)
+@@ -808,7 +811,7 @@ static int powernow_k8_cpu_init_acpi(str
+ 
+ 	/* fill in data */
+ 	data->numps = data->acpi_data.state_count;
+-	if (first_cpu(per_cpu(cpu_core_map, data->cpu)) == data->cpu)
++	if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu)
+ 		print_basics(data);
+ 	powernow_k8_acpi_pst_values(data, 0);
+ 
+@@ -1224,7 +1227,7 @@ static int __cpuinit powernowk8_cpu_init
+ 	if (cpu_family == CPU_HW_PSTATE)
+ 		cpumask_copy(pol->cpus, cpumask_of(pol->cpu));
+ 	else
+-		cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu));
++		cpumask_copy(pol->cpus, cpu_core_mask(pol->cpu));
+ 	data->available_cores = pol->cpus;
+ 
+ 	if (cpu_family == CPU_HW_PSTATE)
+@@ -1286,7 +1289,7 @@ static unsigned int powernowk8_get (unsi
+ 	unsigned int khz = 0;
+ 	unsigned int first;
+ 
+-	first = first_cpu(per_cpu(cpu_core_map, cpu));
++	first = cpumask_first(cpu_core_mask(cpu));
+ 	data = per_cpu(powernow_data, first);
+ 
+ 	if (!data)
+Index: linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+@@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpuf
+ 	   enable it if not. */
+ 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ 
+-	if (!(l & (1<<16))) {
+-		l |= (1<<16);
++	if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
++		l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
+ 		dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
+ 		wrmsr(MSR_IA32_MISC_ENABLE, l, h);
+ 
+ 		/* check to see if it stuck */
+ 		rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+-		if (!(l & (1<<16))) {
++		if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
+ 			printk(KERN_INFO PFX
+ 				"couldn't enable Enhanced SpeedStep\n");
+ 			return -ENODEV;
+Index: linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c
+@@ -322,7 +322,7 @@ static int speedstep_cpu_init(struct cpu
+ 
+ 	/* only run on CPU to be set, or on its sibling */
+ #ifdef CONFIG_SMP
+-	cpumask_copy(policy->cpus, &per_cpu(cpu_sibling_map, policy->cpu));
++	cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu));
+ #endif
+ 
+ 	cpus_allowed = current->cpus_allowed;
+Index: linux-2.6-tip/arch/x86/kernel/cpu/cyrix.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/cyrix.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/cyrix.c
+@@ -61,23 +61,23 @@ static void __cpuinit do_cyrix_devid(uns
+  */
+ static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
+ 
+-static char Cx86_model[][9] __cpuinitdata = {
++static const char __cpuinitconst Cx86_model[][9] = {
+ 	"Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
+ 	"M II ", "Unknown"
+ };
+-static char Cx486_name[][5] __cpuinitdata = {
++static const char __cpuinitconst Cx486_name[][5] = {
+ 	"SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
+ 	"SRx2", "DRx2"
+ };
+-static char Cx486S_name[][4] __cpuinitdata = {
++static const char __cpuinitconst Cx486S_name[][4] = {
+ 	"S", "S2", "Se", "S2e"
+ };
+-static char Cx486D_name[][4] __cpuinitdata = {
++static const char __cpuinitconst Cx486D_name[][4] = {
+ 	"DX", "DX2", "?", "?", "?", "DX4"
+ };
+ static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
+-static char cyrix_model_mult1[] __cpuinitdata = "12??43";
+-static char cyrix_model_mult2[] __cpuinitdata = "12233445";
++static const char __cpuinitconst cyrix_model_mult1[] = "12??43";
++static const char __cpuinitconst cyrix_model_mult2[] = "12233445";
+ 
+ /*
+  * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
+@@ -435,7 +435,7 @@ static void __cpuinit cyrix_identify(str
+ 	}
+ }
+ 
+-static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
++static const struct cpu_dev __cpuinitconst cyrix_cpu_dev = {
+ 	.c_vendor	= "Cyrix",
+ 	.c_ident	= { "CyrixInstead" },
+ 	.c_early_init	= early_init_cyrix,
+@@ -446,7 +446,7 @@ static struct cpu_dev cyrix_cpu_dev __cp
+ 
+ cpu_dev_register(cyrix_cpu_dev);
+ 
+-static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
++static const struct cpu_dev __cpuinitconst nsc_cpu_dev = {
+ 	.c_vendor	= "NSC",
+ 	.c_ident	= { "Geode by NSC" },
+ 	.c_init		= init_nsc,
+Index: linux-2.6-tip/arch/x86/kernel/cpu/intel.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/intel.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/intel.c
+@@ -4,6 +4,7 @@
+ #include <linux/string.h>
+ #include <linux/bitops.h>
+ #include <linux/smp.h>
++#include <linux/sched.h>
+ #include <linux/thread_info.h>
+ #include <linux/module.h>
+ 
+@@ -13,6 +14,7 @@
+ #include <asm/uaccess.h>
+ #include <asm/ds.h>
+ #include <asm/bugs.h>
++#include <asm/cpu.h>
+ 
+ #ifdef CONFIG_X86_64
+ #include <asm/topology.h>
+@@ -24,7 +26,6 @@
+ #ifdef CONFIG_X86_LOCAL_APIC
+ #include <asm/mpspec.h>
+ #include <asm/apic.h>
+-#include <mach_apic.h>
+ #endif
+ 
+ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
+@@ -54,15 +55,60 @@ static void __cpuinit early_init_intel(s
+ 		c->x86_cache_alignment = 128;
+ #endif
+ 
++	/* CPUID workaround for 0F33/0F34 CPU */
++	if (c->x86 == 0xF && c->x86_model == 0x3
++	    && (c->x86_mask == 0x3 || c->x86_mask == 0x4))
++		c->x86_phys_bits = 36;
++
+ 	/*
+ 	 * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate
+-	 * with P/T states and does not stop in deep C-states
++	 * with P/T states and does not stop in deep C-states.
++	 *
++	 * It is also reliable across cores and sockets. (but not across
++	 * cabinets - we turn it off in that case explicitly.)
+ 	 */
+ 	if (c->x86_power & (1 << 8)) {
+ 		set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
+ 		set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
++		set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE);
++		sched_clock_stable = 1;
+ 	}
+ 
++	/*
++	 * There is a known erratum on Pentium III and Core Solo
++	 * and Core Duo CPUs.
++	 * " Page with PAT set to WC while associated MTRR is UC
++	 *   may consolidate to UC "
++	 * Because of this erratum, it is better to stick with
++	 * setting WC in MTRR rather than using PAT on these CPUs.
++	 *
++	 * Enable PAT WC only on P4, Core 2 or later CPUs.
++	 */
++	if (c->x86 == 6 && c->x86_model < 15)
++		clear_cpu_cap(c, X86_FEATURE_PAT);
++
++#ifdef CONFIG_KMEMCHECK
++	/*
++	 * P4s have a "fast strings" feature which causes single-
++	 * stepping REP instructions to only generate a #DB on
++	 * cache-line boundaries.
++	 *
++	 * Ingo Molnar reported a Pentium D (model 6) and a Xeon
++	 * (model 2) with the same problem.
++	 */
++	if (c->x86 == 15) {
++		u64 misc_enable;
++
++		rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
++
++		if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) {
++			printk(KERN_INFO "kmemcheck: Disabling fast string operations\n");
++
++			misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING;
++			wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
++		}
++	}
++#endif
+ }
+ 
+ #ifdef CONFIG_X86_32
+@@ -99,6 +145,28 @@ static void __cpuinit trap_init_f00f_bug
+ }
+ #endif
+ 
++static void __cpuinit intel_smp_check(struct cpuinfo_x86 *c)
++{
++#ifdef CONFIG_SMP
++	/* calling is from identify_secondary_cpu() ? */
++	if (c->cpu_index == boot_cpu_id)
++		return;
++
++	/*
++	 * Mask B, Pentium, but not Pentium MMX
++	 */
++	if (c->x86 == 5 &&
++	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
++	    c->x86_model <= 3) {
++		/*
++		 * Remember we have B step Pentia with bugs
++		 */
++		WARN_ONCE(1, "WARNING: SMP operation may be unreliable"
++				    "with B stepping processors.\n");
++	}
++#endif
++}
++
+ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
+ {
+ 	unsigned long lo, hi;
+@@ -135,10 +203,10 @@ static void __cpuinit intel_workarounds(
+ 	 */
+ 	if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
+ 		rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
+-		if ((lo & (1<<9)) == 0) {
++		if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) {
+ 			printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
+ 			printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
+-			lo |= (1<<9);	/* Disable hw prefetching */
++			lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
+ 			wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
+ 		}
+ 	}
+@@ -175,6 +243,8 @@ static void __cpuinit intel_workarounds(
+ #ifdef CONFIG_X86_NUMAQ
+ 	numaq_tsc_disable();
+ #endif
++
++	intel_smp_check(c);
+ }
+ #else
+ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
+@@ -374,7 +444,7 @@ static unsigned int __cpuinit intel_size
+ }
+ #endif
+ 
+-static struct cpu_dev intel_cpu_dev __cpuinitdata = {
++static const struct cpu_dev __cpuinitconst intel_cpu_dev = {
+ 	.c_vendor	= "Intel",
+ 	.c_ident	= { "GenuineIntel" },
+ #ifdef CONFIG_X86_32
+Index: linux-2.6-tip/arch/x86/kernel/cpu/intel_cacheinfo.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/intel_cacheinfo.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/intel_cacheinfo.c
+@@ -32,7 +32,7 @@ struct _cache_table
+ };
+ 
+ /* all the cache descriptor types we care about (no TLB or trace cache entries) */
+-static struct _cache_table cache_table[] __cpuinitdata =
++static const struct _cache_table __cpuinitconst cache_table[] =
+ {
+ 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
+ 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
+@@ -147,10 +147,19 @@ struct _cpuid4_info {
+ 	union _cpuid4_leaf_ecx ecx;
+ 	unsigned long size;
+ 	unsigned long can_disable;
+-	cpumask_t shared_cpu_map;	/* future?: only cpus/node is needed */
++	DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
+ };
+ 
+-#ifdef CONFIG_PCI
++/* subset of above _cpuid4_info w/o shared_cpu_map */
++struct _cpuid4_info_regs {
++	union _cpuid4_leaf_eax eax;
++	union _cpuid4_leaf_ebx ebx;
++	union _cpuid4_leaf_ecx ecx;
++	unsigned long size;
++	unsigned long can_disable;
++};
++
++#if defined(CONFIG_PCI) && defined(CONFIG_SYSFS)
+ static struct pci_device_id k8_nb_id[] = {
+ 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
+ 	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
+@@ -197,15 +206,15 @@ union l3_cache {
+ 	unsigned val;
+ };
+ 
+-static unsigned short assocs[] __cpuinitdata = {
++static const unsigned short __cpuinitconst assocs[] = {
+ 	[1] = 1, [2] = 2, [4] = 4, [6] = 8,
+ 	[8] = 16, [0xa] = 32, [0xb] = 48,
+ 	[0xc] = 64,
+ 	[0xf] = 0xffff // ??
+ };
+ 
+-static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
+-static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
++static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
++static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
+ 
+ static void __cpuinit
+ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
+@@ -278,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_
+ }
+ 
+ static void __cpuinit
+-amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
++amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
+ {
+ 	if (index < 3)
+ 		return;
+@@ -286,7 +295,8 @@ amd_check_l3_disable(int index, struct _
+ }
+ 
+ static int
+-__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
++__cpuinit cpuid4_cache_lookup_regs(int index,
++				   struct _cpuid4_info_regs *this_leaf)
+ {
+ 	union _cpuid4_leaf_eax 	eax;
+ 	union _cpuid4_leaf_ebx 	ebx;
+@@ -353,11 +363,10 @@ unsigned int __cpuinit init_intel_cachei
+ 		 * parameters cpuid leaf to find the cache details
+ 		 */
+ 		for (i = 0; i < num_cache_leaves; i++) {
+-			struct _cpuid4_info this_leaf;
+-
++			struct _cpuid4_info_regs this_leaf;
+ 			int retval;
+ 
+-			retval = cpuid4_cache_lookup(i, &this_leaf);
++			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
+ 			if (retval >= 0) {
+ 				switch(this_leaf.eax.split.level) {
+ 				    case 1:
+@@ -490,6 +499,8 @@ unsigned int __cpuinit init_intel_cachei
+ 	return l2;
+ }
+ 
++#ifdef CONFIG_SYSFS
++
+ /* pointer to _cpuid4_info array (for each cache leaf) */
+ static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
+ #define CPUID4_INFO_IDX(x, y)	(&((per_cpu(cpuid4_info, x))[y]))
+@@ -506,17 +517,20 @@ static void __cpuinit cache_shared_cpu_m
+ 	num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
+ 
+ 	if (num_threads_sharing == 1)
+-		cpu_set(cpu, this_leaf->shared_cpu_map);
++		cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
+ 	else {
+ 		index_msb = get_count_order(num_threads_sharing);
+ 
+ 		for_each_online_cpu(i) {
+ 			if (cpu_data(i).apicid >> index_msb ==
+ 			    c->apicid >> index_msb) {
+-				cpu_set(i, this_leaf->shared_cpu_map);
++				cpumask_set_cpu(i,
++					to_cpumask(this_leaf->shared_cpu_map));
+ 				if (i != cpu && per_cpu(cpuid4_info, i))  {
+-					sibling_leaf = CPUID4_INFO_IDX(i, index);
+-					cpu_set(cpu, sibling_leaf->shared_cpu_map);
++					sibling_leaf =
++						CPUID4_INFO_IDX(i, index);
++					cpumask_set_cpu(cpu, to_cpumask(
++						sibling_leaf->shared_cpu_map));
+ 				}
+ 			}
+ 		}
+@@ -528,9 +542,10 @@ static void __cpuinit cache_remove_share
+ 	int sibling;
+ 
+ 	this_leaf = CPUID4_INFO_IDX(cpu, index);
+-	for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
++	for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
+ 		sibling_leaf = CPUID4_INFO_IDX(sibling, index);
+-		cpu_clear(cpu, sibling_leaf->shared_cpu_map);
++		cpumask_clear_cpu(cpu,
++				  to_cpumask(sibling_leaf->shared_cpu_map));
+ 	}
+ }
+ #else
+@@ -549,6 +564,15 @@ static void __cpuinit free_cache_attribu
+ 	per_cpu(cpuid4_info, cpu) = NULL;
+ }
+ 
++static int
++__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
++{
++	struct _cpuid4_info_regs *leaf_regs =
++		(struct _cpuid4_info_regs *)this_leaf;
++
++	return cpuid4_cache_lookup_regs(index, leaf_regs);
++}
++
+ static void __cpuinit get_cpu_leaves(void *_retval)
+ {
+ 	int j, *retval = _retval, cpu = smp_processor_id();
+@@ -590,8 +614,6 @@ static int __cpuinit detect_cache_attrib
+ 	return retval;
+ }
+ 
+-#ifdef CONFIG_SYSFS
+-
+ #include <linux/kobject.h>
+ #include <linux/sysfs.h>
+ 
+@@ -635,8 +657,9 @@ static ssize_t show_shared_cpu_map_func(
+ 	int n = 0;
+ 
+ 	if (len > 1) {
+-		cpumask_t *mask = &this_leaf->shared_cpu_map;
++		const struct cpumask *mask;
+ 
++		mask = to_cpumask(this_leaf->shared_cpu_map);
+ 		n = type?
+ 			cpulist_scnprintf(buf, len-2, mask) :
+ 			cpumask_scnprintf(buf, len-2, mask);
+@@ -699,7 +722,8 @@ static struct pci_dev *get_k8_northbridg
+ 
+ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
+ {
+-	int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
++	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
++	int node = cpu_to_node(cpumask_first(mask));
+ 	struct pci_dev *dev = NULL;
+ 	ssize_t ret = 0;
+ 	int i;
+@@ -733,7 +757,8 @@ static ssize_t
+ store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
+ 		    size_t count)
+ {
+-	int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
++	const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
++	int node = cpu_to_node(cpumask_first(mask));
+ 	struct pci_dev *dev = NULL;
+ 	unsigned int ret, index, val;
+ 
+@@ -878,7 +903,7 @@ err_out:
+ 	return -ENOMEM;
+ }
+ 
+-static cpumask_t cache_dev_map = CPU_MASK_NONE;
++static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
+ 
+ /* Add/Remove cache interface for CPU device */
+ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
+@@ -918,7 +943,7 @@ static int __cpuinit cache_add_dev(struc
+ 		}
+ 		kobject_uevent(&(this_object->kobj), KOBJ_ADD);
+ 	}
+-	cpu_set(cpu, cache_dev_map);
++	cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
+ 
+ 	kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
+ 	return 0;
+@@ -931,9 +956,9 @@ static void __cpuinit cache_remove_dev(s
+ 
+ 	if (per_cpu(cpuid4_info, cpu) == NULL)
+ 		return;
+-	if (!cpu_isset(cpu, cache_dev_map))
++	if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
+ 		return;
+-	cpu_clear(cpu, cache_dev_map);
++	cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
+ 
+ 	for (i = 0; i < num_cache_leaves; i++)
+ 		kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mcheck/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mcheck/Makefile
++++ linux-2.6-tip/arch/x86/kernel/cpu/mcheck/Makefile
+@@ -4,3 +4,4 @@ obj-$(CONFIG_X86_32)		+= k7.o p4.o p5.o 
+ obj-$(CONFIG_X86_MCE_INTEL)	+= mce_intel_64.o
+ obj-$(CONFIG_X86_MCE_AMD)	+= mce_amd_64.o
+ obj-$(CONFIG_X86_MCE_NONFATAL)	+= non-fatal.o
++obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mcheck/mce_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mcheck/mce_32.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/mcheck/mce_32.c
+@@ -60,20 +60,6 @@ void mcheck_init(struct cpuinfo_x86 *c)
+ 	}
+ }
+ 
+-static unsigned long old_cr4 __initdata;
+-
+-void __init stop_mce(void)
+-{
+-	old_cr4 = read_cr4();
+-	clear_in_cr4(X86_CR4_MCE);
+-}
+-
+-void __init restart_mce(void)
+-{
+-	if (old_cr4 & X86_CR4_MCE)
+-		set_in_cr4(X86_CR4_MCE);
+-}
+-
+ static int __init mcheck_disable(char *str)
+ {
+ 	mce_disabled = 1;
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mcheck/mce_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mcheck/mce_64.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/mcheck/mce_64.c
+@@ -3,6 +3,8 @@
+  * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
+  * Rest from unknown author(s).
+  * 2004 Andi Kleen. Rewrote most of it.
++ * Copyright 2008 Intel Corporation
++ * Author: Andi Kleen
+  */
+ 
+ #include <linux/init.h>
+@@ -24,6 +26,9 @@
+ #include <linux/ctype.h>
+ #include <linux/kmod.h>
+ #include <linux/kdebug.h>
++#include <linux/kobject.h>
++#include <linux/sysfs.h>
++#include <linux/ratelimit.h>
+ #include <asm/processor.h>
+ #include <asm/msr.h>
+ #include <asm/mce.h>
+@@ -32,7 +37,6 @@
+ #include <asm/idle.h>
+ 
+ #define MISC_MCELOG_MINOR 227
+-#define NR_SYSFS_BANKS 6
+ 
+ atomic_t mce_entry;
+ 
+@@ -47,7 +51,7 @@ static int mce_dont_init;
+  */
+ static int tolerant = 1;
+ static int banks;
+-static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
++static u64 *bank;
+ static unsigned long notify_user;
+ static int rip_msr;
+ static int mce_bootlog = -1;
+@@ -58,6 +62,19 @@ static char *trigger_argv[2] = { trigger
+ 
+ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
+ 
++/* MCA banks polled by the period polling timer for corrected events */
++DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
++	[0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
++};
++
++/* Do initial initialization of a struct mce */
++void mce_setup(struct mce *m)
++{
++	memset(m, 0, sizeof(struct mce));
++	m->cpu = smp_processor_id();
++	rdtscll(m->tsc);
++}
++
+ /*
+  * Lockless MCE logging infrastructure.
+  * This avoids deadlocks on printk locks without having to break locks. Also
+@@ -119,11 +136,11 @@ static void print_mce(struct mce *m)
+ 			print_symbol("{%s}", m->ip);
+ 		printk("\n");
+ 	}
+-	printk(KERN_EMERG "TSC %Lx ", m->tsc);
++	printk(KERN_EMERG "TSC %llx ", m->tsc);
+ 	if (m->addr)
+-		printk("ADDR %Lx ", m->addr);
++		printk("ADDR %llx ", m->addr);
+ 	if (m->misc)
+-		printk("MISC %Lx ", m->misc);
++		printk("MISC %llx ", m->misc);
+ 	printk("\n");
+ 	printk(KERN_EMERG "This is not a software problem!\n");
+ 	printk(KERN_EMERG "Run through mcelog --ascii to decode "
+@@ -149,8 +166,10 @@ static void mce_panic(char *msg, struct 
+ 	panic(msg);
+ }
+ 
+-static int mce_available(struct cpuinfo_x86 *c)
++int mce_available(struct cpuinfo_x86 *c)
+ {
++	if (mce_dont_init)
++		return 0;
+ 	return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+ }
+ 
+@@ -172,7 +191,77 @@ static inline void mce_get_rip(struct mc
+ }
+ 
+ /*
+- * The actual machine check handler
++ * Poll for corrected events or events that happened before reset.
++ * Those are just logged through /dev/mcelog.
++ *
++ * This is executed in standard interrupt context.
++ */
++void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
++{
++	struct mce m;
++	int i;
++
++	mce_setup(&m);
++
++	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
++	for (i = 0; i < banks; i++) {
++		if (!bank[i] || !test_bit(i, *b))
++			continue;
++
++		m.misc = 0;
++		m.addr = 0;
++		m.bank = i;
++		m.tsc = 0;
++
++		barrier();
++		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
++		if (!(m.status & MCI_STATUS_VAL))
++			continue;
++
++		/*
++		 * Uncorrected events are handled by the exception handler
++		 * when it is enabled. But when the exception is disabled log
++		 * everything.
++		 *
++		 * TBD do the same check for MCI_STATUS_EN here?
++		 */
++		if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC))
++			continue;
++
++		if (m.status & MCI_STATUS_MISCV)
++			rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc);
++		if (m.status & MCI_STATUS_ADDRV)
++			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
++
++		if (!(flags & MCP_TIMESTAMP))
++			m.tsc = 0;
++		/*
++		 * Don't get the IP here because it's unlikely to
++		 * have anything to do with the actual error location.
++		 */
++
++		mce_log(&m);
++		add_taint(TAINT_MACHINE_CHECK);
++
++		/*
++		 * Clear state for this bank.
++		 */
++		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
++	}
++
++	/*
++	 * Don't clear MCG_STATUS here because it's only defined for
++	 * exceptions.
++	 */
++}
++
++/*
++ * The actual machine check handler. This only handles real
++ * exceptions when something got corrupted coming in through int 18.
++ *
++ * This is executed in NMI context not subject to normal locking rules. This
++ * implies that most kernel services cannot be safely used. Don't even
++ * think about putting a printk in there!
+  */
+ void do_machine_check(struct pt_regs * regs, long error_code)
+ {
+@@ -190,17 +279,18 @@ void do_machine_check(struct pt_regs * r
+ 	 * error.
+ 	 */
+ 	int kill_it = 0;
++	DECLARE_BITMAP(toclear, MAX_NR_BANKS);
+ 
+ 	atomic_inc(&mce_entry);
+ 
+-	if ((regs
+-	     && notify_die(DIE_NMI, "machine check", regs, error_code,
++	if (notify_die(DIE_NMI, "machine check", regs, error_code,
+ 			   18, SIGKILL) == NOTIFY_STOP)
+-	    || !banks)
+ 		goto out2;
++	if (!banks)
++		goto out2;
++
++	mce_setup(&m);
+ 
+-	memset(&m, 0, sizeof(struct mce));
+-	m.cpu = smp_processor_id();
+ 	rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
+ 	/* if the restart IP is not valid, we're done for */
+ 	if (!(m.mcgstatus & MCG_STATUS_RIPV))
+@@ -210,18 +300,32 @@ void do_machine_check(struct pt_regs * r
+ 	barrier();
+ 
+ 	for (i = 0; i < banks; i++) {
+-		if (i < NR_SYSFS_BANKS && !bank[i])
++		__clear_bit(i, toclear);
++		if (!bank[i])
+ 			continue;
+ 
+ 		m.misc = 0;
+ 		m.addr = 0;
+ 		m.bank = i;
+-		m.tsc = 0;
+ 
+ 		rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status);
+ 		if ((m.status & MCI_STATUS_VAL) == 0)
+ 			continue;
+ 
++		/*
++		 * Non uncorrected errors are handled by machine_check_poll
++		 * Leave them alone.
++		 */
++		if ((m.status & MCI_STATUS_UC) == 0)
++			continue;
++
++		/*
++		 * Set taint even when machine check was not enabled.
++		 */
++		add_taint(TAINT_MACHINE_CHECK);
++
++		__set_bit(i, toclear);
++
+ 		if (m.status & MCI_STATUS_EN) {
+ 			/* if PCC was set, there's no way out */
+ 			no_way_out |= !!(m.status & MCI_STATUS_PCC);
+@@ -235,6 +339,12 @@ void do_machine_check(struct pt_regs * r
+ 					no_way_out = 1;
+ 				kill_it = 1;
+ 			}
++		} else {
++			/*
++			 * Machine check event was not enabled. Clear, but
++			 * ignore.
++			 */
++			continue;
+ 		}
+ 
+ 		if (m.status & MCI_STATUS_MISCV)
+@@ -243,10 +353,7 @@ void do_machine_check(struct pt_regs * r
+ 			rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
+ 
+ 		mce_get_rip(&m, regs);
+-		if (error_code >= 0)
+-			rdtscll(m.tsc);
+-		if (error_code != -2)
+-			mce_log(&m);
++		mce_log(&m);
+ 
+ 		/* Did this bank cause the exception? */
+ 		/* Assume that the bank with uncorrectable errors did it,
+@@ -255,14 +362,8 @@ void do_machine_check(struct pt_regs * r
+ 			panicm = m;
+ 			panicm_found = 1;
+ 		}
+-
+-		add_taint(TAINT_MACHINE_CHECK);
+ 	}
+ 
+-	/* Never do anything final in the polling timer */
+-	if (!regs)
+-		goto out;
+-
+ 	/* If we didn't find an uncorrectable error, pick
+ 	   the last one (shouldn't happen, just being safe). */
+ 	if (!panicm_found)
+@@ -309,10 +410,11 @@ void do_machine_check(struct pt_regs * r
+ 	/* notify userspace ASAP */
+ 	set_thread_flag(TIF_MCE_NOTIFY);
+ 
+- out:
+ 	/* the last thing we do is clear state */
+-	for (i = 0; i < banks; i++)
+-		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
++	for (i = 0; i < banks; i++) {
++		if (test_bit(i, toclear))
++			wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
++	}
+ 	wrmsrl(MSR_IA32_MCG_STATUS, 0);
+  out2:
+ 	atomic_dec(&mce_entry);
+@@ -332,15 +434,13 @@ void do_machine_check(struct pt_regs * r
+  * and historically has been the register value of the
+  * MSR_IA32_THERMAL_STATUS (Intel) msr.
+  */
+-void mce_log_therm_throt_event(unsigned int cpu, __u64 status)
++void mce_log_therm_throt_event(__u64 status)
+ {
+ 	struct mce m;
+ 
+-	memset(&m, 0, sizeof(m));
+-	m.cpu = cpu;
++	mce_setup(&m);
+ 	m.bank = MCE_THERMAL_BANK;
+ 	m.status = status;
+-	rdtscll(m.tsc);
+ 	mce_log(&m);
+ }
+ #endif /* CONFIG_X86_MCE_INTEL */
+@@ -353,18 +453,18 @@ void mce_log_therm_throt_event(unsigned 
+ 
+ static int check_interval = 5 * 60; /* 5 minutes */
+ static int next_interval; /* in jiffies */
+-static void mcheck_timer(struct work_struct *work);
+-static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer);
++static void mcheck_timer(unsigned long);
++static DEFINE_PER_CPU(struct timer_list, mce_timer);
+ 
+-static void mcheck_check_cpu(void *info)
++static void mcheck_timer(unsigned long data)
+ {
+-	if (mce_available(&current_cpu_data))
+-		do_machine_check(NULL, 0);
+-}
++	struct timer_list *t = &per_cpu(mce_timer, data);
+ 
+-static void mcheck_timer(struct work_struct *work)
+-{
+-	on_each_cpu(mcheck_check_cpu, NULL, 1);
++	WARN_ON(smp_processor_id() != data);
++
++	if (mce_available(&current_cpu_data))
++		machine_check_poll(MCP_TIMESTAMP,
++				&__get_cpu_var(mce_poll_banks));
+ 
+ 	/*
+ 	 * Alert userspace if needed.  If we logged an MCE, reduce the
+@@ -377,31 +477,41 @@ static void mcheck_timer(struct work_str
+ 				(int)round_jiffies_relative(check_interval*HZ));
+ 	}
+ 
+-	schedule_delayed_work(&mcheck_work, next_interval);
++	t->expires = jiffies + next_interval;
++	add_timer(t);
++}
++
++static void mce_do_trigger(struct work_struct *work)
++{
++	call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT);
+ }
+ 
++static DECLARE_WORK(mce_trigger_work, mce_do_trigger);
++
+ /*
+- * This is only called from process context.  This is where we do
+- * anything we need to alert userspace about new MCEs.  This is called
+- * directly from the poller and also from entry.S and idle, thanks to
+- * TIF_MCE_NOTIFY.
++ * Notify the user(s) about new machine check events.
++ * Can be called from interrupt context, but not from machine check/NMI
++ * context.
+  */
+ int mce_notify_user(void)
+ {
++	/* Not more than two messages every minute */
++	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
++
+ 	clear_thread_flag(TIF_MCE_NOTIFY);
+ 	if (test_and_clear_bit(0, &notify_user)) {
+-		static unsigned long last_print;
+-		unsigned long now = jiffies;
+-
+ 		wake_up_interruptible(&mce_wait);
+-		if (trigger[0])
+-			call_usermodehelper(trigger, trigger_argv, NULL,
+-						UMH_NO_WAIT);
+ 
+-		if (time_after_eq(now, last_print + (check_interval*HZ))) {
+-			last_print = now;
++		/*
++		 * There is no risk of missing notifications because
++		 * work_pending is always cleared before the function is
++		 * executed.
++		 */
++		if (trigger[0] && !work_pending(&mce_trigger_work))
++			schedule_work(&mce_trigger_work);
++
++		if (__ratelimit(&ratelimit))
+ 			printk(KERN_INFO "Machine check events logged\n");
+-		}
+ 
+ 		return 1;
+ 	}
+@@ -425,63 +535,78 @@ static struct notifier_block mce_idle_no
+ 
+ static __init int periodic_mcheck_init(void)
+ {
+-	next_interval = check_interval * HZ;
+-	if (next_interval)
+-		schedule_delayed_work(&mcheck_work,
+-				      round_jiffies_relative(next_interval));
+-	idle_notifier_register(&mce_idle_notifier);
+-	return 0;
++       idle_notifier_register(&mce_idle_notifier);
++       return 0;
+ }
+ __initcall(periodic_mcheck_init);
+ 
+-
+ /*
+  * Initialize Machine Checks for a CPU.
+  */
+-static void mce_init(void *dummy)
++static int mce_cap_init(void)
+ {
+ 	u64 cap;
+-	int i;
++	unsigned b;
+ 
+ 	rdmsrl(MSR_IA32_MCG_CAP, cap);
+-	banks = cap & 0xff;
+-	if (banks > MCE_EXTENDED_BANK) {
+-		banks = MCE_EXTENDED_BANK;
+-		printk(KERN_INFO "MCE: warning: using only %d banks\n",
+-		       MCE_EXTENDED_BANK);
++	b = cap & 0xff;
++	if (b > MAX_NR_BANKS) {
++		printk(KERN_WARNING
++		       "MCE: Using only %u machine check banks out of %u\n",
++			MAX_NR_BANKS, b);
++		b = MAX_NR_BANKS;
++	}
++
++	/* Don't support asymmetric configurations today */
++	WARN_ON(banks != 0 && b != banks);
++	banks = b;
++	if (!bank) {
++		bank = kmalloc(banks * sizeof(u64), GFP_KERNEL);
++		if (!bank)
++			return -ENOMEM;
++		memset(bank, 0xff, banks * sizeof(u64));
+ 	}
++
+ 	/* Use accurate RIP reporting if available. */
+ 	if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
+ 		rip_msr = MSR_IA32_MCG_EIP;
+ 
+-	/* Log the machine checks left over from the previous reset.
+-	   This also clears all registers */
+-	do_machine_check(NULL, mce_bootlog ? -1 : -2);
++	return 0;
++}
++
++static void mce_init(void *dummy)
++{
++	u64 cap;
++	int i;
++	mce_banks_t all_banks;
++
++	/*
++	 * Log the machine checks left over from the previous reset.
++	 */
++	bitmap_fill(all_banks, MAX_NR_BANKS);
++	machine_check_poll(MCP_UC, &all_banks);
+ 
+ 	set_in_cr4(X86_CR4_MCE);
+ 
++	rdmsrl(MSR_IA32_MCG_CAP, cap);
+ 	if (cap & MCG_CTL_P)
+ 		wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+ 
+ 	for (i = 0; i < banks; i++) {
+-		if (i < NR_SYSFS_BANKS)
+-			wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+-		else
+-			wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
+-
++		wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+ 		wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+ 	}
+ }
+ 
+ /* Add per CPU specific workarounds here */
+-static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
++static void mce_cpu_quirks(struct cpuinfo_x86 *c)
+ {
+ 	/* This should be disabled by the BIOS, but isn't always */
+ 	if (c->x86_vendor == X86_VENDOR_AMD) {
+-		if(c->x86 == 15)
++		if (c->x86 == 15 && banks > 4)
+ 			/* disable GART TBL walk error reporting, which trips off
+ 			   incorrectly with the IOMMU & 3ware & Cerberus. */
+-			clear_bit(10, &bank[4]);
++			clear_bit(10, (unsigned long *)&bank[4]);
+ 		if(c->x86 <= 17 && mce_bootlog < 0)
+ 			/* Lots of broken BIOS around that don't clear them
+ 			   by default and leave crap in there. Don't log. */
+@@ -504,20 +629,38 @@ static void mce_cpu_features(struct cpui
+ 	}
+ }
+ 
++static void mce_init_timer(void)
++{
++	struct timer_list *t = &__get_cpu_var(mce_timer);
++
++	/* data race harmless because everyone sets to the same value */
++	if (!next_interval)
++		next_interval = check_interval * HZ;
++	if (!next_interval)
++		return;
++	setup_timer(t, mcheck_timer, smp_processor_id());
++	t->expires = round_jiffies(jiffies + next_interval);
++	add_timer(t);
++}
++
+ /*
+  * Called for each booted CPU to set up machine checks.
+  * Must be called with preempt off.
+  */
+ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+ {
+-	mce_cpu_quirks(c);
++	if (!mce_available(c))
++		return;
+ 
+-	if (mce_dont_init ||
+-	    !mce_available(c))
++	if (mce_cap_init() < 0) {
++		mce_dont_init = 1;
+ 		return;
++	}
++	mce_cpu_quirks(c);
+ 
+ 	mce_init(NULL);
+ 	mce_cpu_features(c);
++	mce_init_timer();
+ }
+ 
+ /*
+@@ -573,7 +716,7 @@ static ssize_t mce_read(struct file *fil
+ {
+ 	unsigned long *cpu_tsc;
+ 	static DEFINE_MUTEX(mce_read_mutex);
+-	unsigned next;
++	unsigned prev, next;
+ 	char __user *buf = ubuf;
+ 	int i, err;
+ 
+@@ -592,25 +735,32 @@ static ssize_t mce_read(struct file *fil
+ 	}
+ 
+ 	err = 0;
+-	for (i = 0; i < next; i++) {
+-		unsigned long start = jiffies;
+-
+-		while (!mcelog.entry[i].finished) {
+-			if (time_after_eq(jiffies, start + 2)) {
+-				memset(mcelog.entry + i,0, sizeof(struct mce));
+-				goto timeout;
++	prev = 0;
++	do {
++		for (i = prev; i < next; i++) {
++			unsigned long start = jiffies;
++
++			while (!mcelog.entry[i].finished) {
++				if (time_after_eq(jiffies, start + 2)) {
++					memset(mcelog.entry + i, 0,
++					       sizeof(struct mce));
++					goto timeout;
++				}
++				cpu_relax();
+ 			}
+-			cpu_relax();
++			smp_rmb();
++			err |= copy_to_user(buf, mcelog.entry + i,
++					    sizeof(struct mce));
++			buf += sizeof(struct mce);
++timeout:
++			;
+ 		}
+-		smp_rmb();
+-		err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
+-		buf += sizeof(struct mce);
+- timeout:
+-		;
+-	}
+ 
+-	memset(mcelog.entry, 0, next * sizeof(struct mce));
+-	mcelog.next = 0;
++		memset(mcelog.entry + prev, 0,
++		       (next - prev) * sizeof(struct mce));
++		prev = next;
++		next = cmpxchg(&mcelog.next, prev, 0);
++	} while (next != prev);
+ 
+ 	synchronize_sched();
+ 
+@@ -680,20 +830,6 @@ static struct miscdevice mce_log_device 
+ 	&mce_chrdev_ops,
+ };
+ 
+-static unsigned long old_cr4 __initdata;
+-
+-void __init stop_mce(void)
+-{
+-	old_cr4 = read_cr4();
+-	clear_in_cr4(X86_CR4_MCE);
+-}
+-
+-void __init restart_mce(void)
+-{
+-	if (old_cr4 & X86_CR4_MCE)
+-		set_in_cr4(X86_CR4_MCE);
+-}
+-
+ /*
+  * Old style boot options parsing. Only for compatibility.
+  */
+@@ -703,8 +839,7 @@ static int __init mcheck_disable(char *s
+ 	return 1;
+ }
+ 
+-/* mce=off disables machine check. Note you can re-enable it later
+-   using sysfs.
++/* mce=off disables machine check.
+    mce=TOLERANCELEVEL (number, see above)
+    mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
+    mce=nobootlog Don't log MCEs from before booting. */
+@@ -728,6 +863,29 @@ __setup("mce=", mcheck_enable);
+  * Sysfs support
+  */
+ 
++/*
++ * Disable machine checks on suspend and shutdown. We can't really handle
++ * them later.
++ */
++static int mce_disable(void)
++{
++	int i;
++
++	for (i = 0; i < banks; i++)
++		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
++	return 0;
++}
++
++static int mce_suspend(struct sys_device *dev, pm_message_t state)
++{
++	return mce_disable();
++}
++
++static int mce_shutdown(struct sys_device *dev)
++{
++	return mce_disable();
++}
++
+ /* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
+    Only one CPU is active at this time, the others get readded later using
+    CPU hotplug. */
+@@ -738,20 +896,24 @@ static int mce_resume(struct sys_device 
+ 	return 0;
+ }
+ 
++static void mce_cpu_restart(void *data)
++{
++	del_timer_sync(&__get_cpu_var(mce_timer));
++	if (mce_available(&current_cpu_data))
++		mce_init(NULL);
++	mce_init_timer();
++}
++
+ /* Reinit MCEs after user configuration changes */
+ static void mce_restart(void)
+ {
+-	if (next_interval)
+-		cancel_delayed_work(&mcheck_work);
+-	/* Timer race is harmless here */
+-	on_each_cpu(mce_init, NULL, 1);
+ 	next_interval = check_interval * HZ;
+-	if (next_interval)
+-		schedule_delayed_work(&mcheck_work,
+-				      round_jiffies_relative(next_interval));
++	on_each_cpu(mce_cpu_restart, NULL, 1);
+ }
+ 
+ static struct sysdev_class mce_sysclass = {
++	.suspend = mce_suspend,
++	.shutdown = mce_shutdown,
+ 	.resume = mce_resume,
+ 	.name = "machinecheck",
+ };
+@@ -778,16 +940,26 @@ void (*threshold_cpu_callback)(unsigned 
+ 	}								\
+ 	static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
+ 
+-/*
+- * TBD should generate these dynamically based on number of available banks.
+- * Have only 6 contol banks in /sysfs until then.
+- */
+-ACCESSOR(bank0ctl,bank[0],mce_restart())
+-ACCESSOR(bank1ctl,bank[1],mce_restart())
+-ACCESSOR(bank2ctl,bank[2],mce_restart())
+-ACCESSOR(bank3ctl,bank[3],mce_restart())
+-ACCESSOR(bank4ctl,bank[4],mce_restart())
+-ACCESSOR(bank5ctl,bank[5],mce_restart())
++static struct sysdev_attribute *bank_attrs;
++
++static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
++			 char *buf)
++{
++	u64 b = bank[attr - bank_attrs];
++	return sprintf(buf, "%llx\n", b);
++}
++
++static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
++			const char *buf, size_t siz)
++{
++	char *end;
++	u64 new = simple_strtoull(buf, &end, 0);
++	if (end == buf)
++		return -EINVAL;
++	bank[attr - bank_attrs] = new;
++	mce_restart();
++	return end-buf;
++}
+ 
+ static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr,
+ 				char *buf)
+@@ -814,13 +986,11 @@ static SYSDEV_ATTR(trigger, 0644, show_t
+ static SYSDEV_INT_ATTR(tolerant, 0644, tolerant);
+ ACCESSOR(check_interval,check_interval,mce_restart())
+ static struct sysdev_attribute *mce_attributes[] = {
+-	&attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
+-	&attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl,
+ 	&attr_tolerant.attr, &attr_check_interval, &attr_trigger,
+ 	NULL
+ };
+ 
+-static cpumask_t mce_device_initialized = CPU_MASK_NONE;
++static cpumask_var_t mce_device_initialized;
+ 
+ /* Per cpu sysdev init.  All of the cpus still share the same ctl bank */
+ static __cpuinit int mce_create_device(unsigned int cpu)
+@@ -845,11 +1015,22 @@ static __cpuinit int mce_create_device(u
+ 		if (err)
+ 			goto error;
+ 	}
+-	cpu_set(cpu, mce_device_initialized);
++	for (i = 0; i < banks; i++) {
++		err = sysdev_create_file(&per_cpu(device_mce, cpu),
++					&bank_attrs[i]);
++		if (err)
++			goto error2;
++	}
++	cpumask_set_cpu(cpu, mce_device_initialized);
+ 
+ 	return 0;
++error2:
++	while (--i >= 0) {
++		sysdev_remove_file(&per_cpu(device_mce, cpu),
++					&bank_attrs[i]);
++	}
+ error:
+-	while (i--) {
++	while (--i >= 0) {
+ 		sysdev_remove_file(&per_cpu(device_mce,cpu),
+ 				   mce_attributes[i]);
+ 	}
+@@ -862,14 +1043,44 @@ static __cpuinit void mce_remove_device(
+ {
+ 	int i;
+ 
+-	if (!cpu_isset(cpu, mce_device_initialized))
++	if (!cpumask_test_cpu(cpu, mce_device_initialized))
+ 		return;
+ 
+ 	for (i = 0; mce_attributes[i]; i++)
+ 		sysdev_remove_file(&per_cpu(device_mce,cpu),
+ 			mce_attributes[i]);
++	for (i = 0; i < banks; i++)
++		sysdev_remove_file(&per_cpu(device_mce, cpu),
++			&bank_attrs[i]);
+ 	sysdev_unregister(&per_cpu(device_mce,cpu));
+-	cpu_clear(cpu, mce_device_initialized);
++	cpumask_clear_cpu(cpu, mce_device_initialized);
++}
++
++/* Make sure there are no machine checks on offlined CPUs. */
++static void mce_disable_cpu(void *h)
++{
++	int i;
++	unsigned long action = *(unsigned long *)h;
++
++	if (!mce_available(&current_cpu_data))
++		return;
++	if (!(action & CPU_TASKS_FROZEN))
++		cmci_clear();
++	for (i = 0; i < banks; i++)
++		wrmsrl(MSR_IA32_MC0_CTL + i*4, 0);
++}
++
++static void mce_reenable_cpu(void *h)
++{
++	int i;
++	unsigned long action = *(unsigned long *)h;
++
++	if (!mce_available(&current_cpu_data))
++		return;
++	if (!(action & CPU_TASKS_FROZEN))
++		cmci_reenable();
++	for (i = 0; i < banks; i++)
++		wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]);
+ }
+ 
+ /* Get notified when a cpu comes on/off. Be hotplug friendly. */
+@@ -877,6 +1088,7 @@ static int __cpuinit mce_cpu_callback(st
+ 				      unsigned long action, void *hcpu)
+ {
+ 	unsigned int cpu = (unsigned long)hcpu;
++	struct timer_list *t = &per_cpu(mce_timer, cpu);
+ 
+ 	switch (action) {
+ 	case CPU_ONLINE:
+@@ -891,6 +1103,21 @@ static int __cpuinit mce_cpu_callback(st
+ 			threshold_cpu_callback(action, cpu);
+ 		mce_remove_device(cpu);
+ 		break;
++	case CPU_DOWN_PREPARE:
++	case CPU_DOWN_PREPARE_FROZEN:
++		del_timer_sync(t);
++		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
++		break;
++	case CPU_DOWN_FAILED:
++	case CPU_DOWN_FAILED_FROZEN:
++		t->expires = round_jiffies(jiffies + next_interval);
++		add_timer_on(t, cpu);
++		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
++		break;
++	case CPU_POST_DEAD:
++		/* intentionally ignoring frozen here */
++		cmci_rediscover(cpu);
++		break;
+ 	}
+ 	return NOTIFY_OK;
+ }
+@@ -899,6 +1126,34 @@ static struct notifier_block mce_cpu_not
+ 	.notifier_call = mce_cpu_callback,
+ };
+ 
++static __init int mce_init_banks(void)
++{
++	int i;
++
++	bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
++				GFP_KERNEL);
++	if (!bank_attrs)
++		return -ENOMEM;
++
++	for (i = 0; i < banks; i++) {
++		struct sysdev_attribute *a = &bank_attrs[i];
++		a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i);
++		if (!a->attr.name)
++			goto nomem;
++		a->attr.mode = 0644;
++		a->show = show_bank;
++		a->store = set_bank;
++	}
++	return 0;
++
++nomem:
++	while (--i >= 0)
++		kfree(bank_attrs[i].attr.name);
++	kfree(bank_attrs);
++	bank_attrs = NULL;
++	return -ENOMEM;
++}
++
+ static __init int mce_init_device(void)
+ {
+ 	int err;
+@@ -906,6 +1161,13 @@ static __init int mce_init_device(void)
+ 
+ 	if (!mce_available(&boot_cpu_data))
+ 		return -EIO;
++
++	alloc_cpumask_var(&mce_device_initialized, GFP_KERNEL);
++
++	err = mce_init_banks();
++	if (err)
++		return err;
++
+ 	err = sysdev_class_register(&mce_sysclass);
+ 	if (err)
+ 		return err;
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+@@ -67,7 +67,7 @@ static struct threshold_block threshold_
+ struct threshold_bank {
+ 	struct kobject *kobj;
+ 	struct threshold_block *blocks;
+-	cpumask_t cpus;
++	cpumask_var_t cpus;
+ };
+ static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
+ 
+@@ -79,6 +79,8 @@ static unsigned char shared_bank[NR_BANK
+ 
+ static DEFINE_PER_CPU(unsigned char, bank_map);	/* see which banks are on */
+ 
++static void amd_threshold_interrupt(void);
++
+ /*
+  * CPU Initialization
+  */
+@@ -90,7 +92,8 @@ struct thresh_restart {
+ };
+ 
+ /* must be called with correct cpu affinity */
+-static long threshold_restart_bank(void *_tr)
++/* Called via smp_call_function_single() */
++static void threshold_restart_bank(void *_tr)
+ {
+ 	struct thresh_restart *tr = _tr;
+ 	u32 mci_misc_hi, mci_misc_lo;
+@@ -117,7 +120,6 @@ static long threshold_restart_bank(void 
+ 
+ 	mci_misc_hi |= MASK_COUNT_EN_HI;
+ 	wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi);
+-	return 0;
+ }
+ 
+ /* cpu init entry point, called from mce.c with preempt off */
+@@ -174,6 +176,8 @@ void mce_amd_feature_init(struct cpuinfo
+ 			tr.reset = 0;
+ 			tr.old_limit = 0;
+ 			threshold_restart_bank(&tr);
++
++			mce_threshold_vector = amd_threshold_interrupt;
+ 		}
+ 	}
+ }
+@@ -187,19 +191,13 @@ void mce_amd_feature_init(struct cpuinfo
+  * the interrupt goes off when error_count reaches threshold_limit.
+  * the handler will simply log mcelog w/ software defined bank number.
+  */
+-asmlinkage void mce_threshold_interrupt(void)
++static void amd_threshold_interrupt(void)
+ {
+ 	unsigned int bank, block;
+ 	struct mce m;
+ 	u32 low = 0, high = 0, address = 0;
+ 
+-	ack_APIC_irq();
+-	exit_idle();
+-	irq_enter();
+-
+-	memset(&m, 0, sizeof(m));
+-	rdtscll(m.tsc);
+-	m.cpu = smp_processor_id();
++	mce_setup(&m);
+ 
+ 	/* assume first bank caused it */
+ 	for (bank = 0; bank < NR_BANKS; ++bank) {
+@@ -233,7 +231,8 @@ asmlinkage void mce_threshold_interrupt(
+ 
+ 			/* Log the machine check that caused the threshold
+ 			   event. */
+-			do_machine_check(NULL, 0);
++			machine_check_poll(MCP_TIMESTAMP,
++					&__get_cpu_var(mce_poll_banks));
+ 
+ 			if (high & MASK_OVERFLOW_HI) {
+ 				rdmsrl(address, m.misc);
+@@ -243,13 +242,10 @@ asmlinkage void mce_threshold_interrupt(
+ 				       + bank * NR_BLOCKS
+ 				       + block;
+ 				mce_log(&m);
+-				goto out;
++				return;
+ 			}
+ 		}
+ 	}
+-out:
+-	inc_irq_stat(irq_threshold_count);
+-	irq_exit();
+ }
+ 
+ /*
+@@ -283,7 +279,7 @@ static ssize_t store_interrupt_enable(st
+ 	tr.b = b;
+ 	tr.reset = 0;
+ 	tr.old_limit = 0;
+-	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
++	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
+ 
+ 	return end - buf;
+ }
+@@ -305,23 +301,32 @@ static ssize_t store_threshold_limit(str
+ 	tr.b = b;
+ 	tr.reset = 0;
+ 
+-	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
++	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
+ 
+ 	return end - buf;
+ }
+ 
+-static long local_error_count(void *_b)
++struct threshold_block_cross_cpu {
++	struct threshold_block *tb;
++	long retval;
++};
++
++static void local_error_count_handler(void *_tbcc)
+ {
+-	struct threshold_block *b = _b;
++	struct threshold_block_cross_cpu *tbcc = _tbcc;
++	struct threshold_block *b = tbcc->tb;
+ 	u32 low, high;
+ 
+ 	rdmsr(b->address, low, high);
+-	return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
++	tbcc->retval = (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit);
+ }
+ 
+ static ssize_t show_error_count(struct threshold_block *b, char *buf)
+ {
+-	return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b));
++	struct threshold_block_cross_cpu tbcc = { .tb = b, };
++
++	smp_call_function_single(b->cpu, local_error_count_handler, &tbcc, 1);
++	return sprintf(buf, "%lx\n", tbcc.retval);
+ }
+ 
+ static ssize_t store_error_count(struct threshold_block *b,
+@@ -329,7 +334,7 @@ static ssize_t store_error_count(struct 
+ {
+ 	struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 };
+ 
+-	work_on_cpu(b->cpu, threshold_restart_bank, &tr);
++	smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
+ 	return 1;
+ }
+ 
+@@ -398,7 +403,7 @@ static __cpuinit int allocate_threshold_
+ 	if ((bank >= NR_BANKS) || (block >= NR_BLOCKS))
+ 		return 0;
+ 
+-	if (rdmsr_safe(address, &low, &high))
++	if (rdmsr_safe_on_cpu(cpu, address, &low, &high))
+ 		return 0;
+ 
+ 	if (!(high & MASK_VALID_HI)) {
+@@ -462,12 +467,11 @@ out_free:
+ 	return err;
+ }
+ 
+-static __cpuinit long local_allocate_threshold_blocks(void *_bank)
++static __cpuinit long
++local_allocate_threshold_blocks(int cpu, unsigned int bank)
+ {
+-	unsigned int *bank = _bank;
+-
+-	return allocate_threshold_blocks(smp_processor_id(), *bank, 0,
+-					 MSR_IA32_MC0_MISC + *bank * 4);
++	return allocate_threshold_blocks(cpu, bank, 0,
++					 MSR_IA32_MC0_MISC + bank * 4);
+ }
+ 
+ /* symlinks sibling shared banks to first core.  first core owns dir/files. */
+@@ -481,7 +485,7 @@ static __cpuinit int threshold_create_ba
+ 
+ #ifdef CONFIG_SMP
+ 	if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) {	/* symlink */
+-		i = first_cpu(per_cpu(cpu_core_map, cpu));
++		i = cpumask_first(cpu_core_mask(cpu));
+ 
+ 		/* first core not up yet */
+ 		if (cpu_data(i).cpu_core_id)
+@@ -501,7 +505,7 @@ static __cpuinit int threshold_create_ba
+ 		if (err)
+ 			goto out;
+ 
+-		b->cpus = per_cpu(cpu_core_map, cpu);
++		cpumask_copy(b->cpus, cpu_core_mask(cpu));
+ 		per_cpu(threshold_banks, cpu)[bank] = b;
+ 		goto out;
+ 	}
+@@ -512,24 +516,29 @@ static __cpuinit int threshold_create_ba
+ 		err = -ENOMEM;
+ 		goto out;
+ 	}
++	if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
++		kfree(b);
++		err = -ENOMEM;
++		goto out;
++	}
+ 
+ 	b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
+ 	if (!b->kobj)
+ 		goto out_free;
+ 
+ #ifndef CONFIG_SMP
+-	b->cpus = CPU_MASK_ALL;
++	cpumask_setall(b->cpus);
+ #else
+-	b->cpus = per_cpu(cpu_core_map, cpu);
++	cpumask_copy(b->cpus, cpu_core_mask(cpu));
+ #endif
+ 
+ 	per_cpu(threshold_banks, cpu)[bank] = b;
+ 
+-	err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank);
++	err = local_allocate_threshold_blocks(cpu, bank);
+ 	if (err)
+ 		goto out_free;
+ 
+-	for_each_cpu_mask_nr(i, b->cpus) {
++	for_each_cpu(i, b->cpus) {
+ 		if (i == cpu)
+ 			continue;
+ 
+@@ -545,6 +554,7 @@ static __cpuinit int threshold_create_ba
+ 
+ out_free:
+ 	per_cpu(threshold_banks, cpu)[bank] = NULL;
++	free_cpumask_var(b->cpus);
+ 	kfree(b);
+ out:
+ 	return err;
+@@ -619,7 +629,7 @@ static void threshold_remove_bank(unsign
+ #endif
+ 
+ 	/* remove all sibling symlinks before unregistering */
+-	for_each_cpu_mask_nr(i, b->cpus) {
++	for_each_cpu(i, b->cpus) {
+ 		if (i == cpu)
+ 			continue;
+ 
+@@ -632,6 +642,7 @@ static void threshold_remove_bank(unsign
+ free_out:
+ 	kobject_del(b->kobj);
+ 	kobject_put(b->kobj);
++	free_cpumask_var(b->cpus);
+ 	kfree(b);
+ 	per_cpu(threshold_banks, cpu)[bank] = NULL;
+ }
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+@@ -1,17 +1,21 @@
+ /*
+  * Intel specific MCE features.
+  * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
++ * Copyright (C) 2008, 2009 Intel Corporation
++ * Author: Andi Kleen
+  */
+ 
+ #include <linux/init.h>
+ #include <linux/interrupt.h>
+ #include <linux/percpu.h>
+ #include <asm/processor.h>
++#include <asm/apic.h>
+ #include <asm/msr.h>
+ #include <asm/mce.h>
+ #include <asm/hw_irq.h>
+ #include <asm/idle.h>
+ #include <asm/therm_throt.h>
++#include <asm/apic.h>
+ 
+ asmlinkage void smp_thermal_interrupt(void)
+ {
+@@ -24,7 +28,7 @@ asmlinkage void smp_thermal_interrupt(vo
+ 
+ 	rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
+ 	if (therm_throt_process(msr_val & 1))
+-		mce_log_therm_throt_event(smp_processor_id(), msr_val);
++		mce_log_therm_throt_event(msr_val);
+ 
+ 	inc_irq_stat(irq_thermal_count);
+ 	irq_exit();
+@@ -48,13 +52,13 @@ static void intel_init_thermal(struct cp
+ 	 */
+ 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ 	h = apic_read(APIC_LVTTHMR);
+-	if ((l & (1 << 3)) && (h & APIC_DM_SMI)) {
++	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
+ 		printk(KERN_DEBUG
+ 		       "CPU%d: Thermal monitoring handled by SMI\n", cpu);
+ 		return;
+ 	}
+ 
+-	if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
++	if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
+ 		tm2 = 1;
+ 
+ 	if (h & APIC_VECTOR_MASK) {
+@@ -72,7 +76,7 @@ static void intel_init_thermal(struct cp
+ 	wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
+ 
+ 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+-	wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h);
++	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
+ 
+ 	l = apic_read(APIC_LVTTHMR);
+ 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+@@ -84,7 +88,209 @@ static void intel_init_thermal(struct cp
+ 	return;
+ }
+ 
++/*
++ * Support for Intel Correct Machine Check Interrupts. This allows
++ * the CPU to raise an interrupt when a corrected machine check happened.
++ * Normally we pick those up using a regular polling timer.
++ * Also supports reliable discovery of shared banks.
++ */
++
++static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
++
++/*
++ * cmci_discover_lock protects against parallel discovery attempts
++ * which could race against each other.
++ */
++static DEFINE_SPINLOCK(cmci_discover_lock);
++
++#define CMCI_THRESHOLD 1
++
++static int cmci_supported(int *banks)
++{
++	u64 cap;
++
++	/*
++	 * Vendor check is not strictly needed, but the initial
++	 * initialization is vendor keyed and this
++	 * makes sure none of the backdoors are entered otherwise.
++	 */
++	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
++		return 0;
++	if (!cpu_has_apic || lapic_get_maxlvt() < 6)
++		return 0;
++	rdmsrl(MSR_IA32_MCG_CAP, cap);
++	*banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
++	return !!(cap & MCG_CMCI_P);
++}
++
++/*
++ * The interrupt handler. This is called on every event.
++ * Just call the poller directly to log any events.
++ * This could in theory increase the threshold under high load,
++ * but doesn't for now.
++ */
++static void intel_threshold_interrupt(void)
++{
++	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
++	mce_notify_user();
++}
++
++static void print_update(char *type, int *hdr, int num)
++{
++	if (*hdr == 0)
++		printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
++	*hdr = 1;
++	printk(KERN_CONT " %s:%d", type, num);
++}
++
++/*
++ * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
++ * on this CPU. Use the algorithm recommended in the SDM to discover shared
++ * banks.
++ */
++static void cmci_discover(int banks, int boot)
++{
++	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
++	int hdr = 0;
++	int i;
++
++	spin_lock(&cmci_discover_lock);
++	for (i = 0; i < banks; i++) {
++		u64 val;
++
++		if (test_bit(i, owned))
++			continue;
++
++		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
++
++		/* Already owned by someone else? */
++		if (val & CMCI_EN) {
++			if (test_and_clear_bit(i, owned) || boot)
++				print_update("SHD", &hdr, i);
++			__clear_bit(i, __get_cpu_var(mce_poll_banks));
++			continue;
++		}
++
++		val |= CMCI_EN | CMCI_THRESHOLD;
++		wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
++		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
++
++		/* Did the enable bit stick? -- the bank supports CMCI */
++		if (val & CMCI_EN) {
++			if (!test_and_set_bit(i, owned) || boot)
++				print_update("CMCI", &hdr, i);
++			__clear_bit(i, __get_cpu_var(mce_poll_banks));
++		} else {
++			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
++		}
++	}
++	spin_unlock(&cmci_discover_lock);
++	if (hdr)
++		printk(KERN_CONT "\n");
++}
++
++/*
++ * Just in case we missed an event during initialization check
++ * all the CMCI owned banks.
++ */
++void cmci_recheck(void)
++{
++	unsigned long flags;
++	int banks;
++
++	if (!mce_available(&current_cpu_data) || !cmci_supported(&banks))
++		return;
++	local_irq_save(flags);
++	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
++	local_irq_restore(flags);
++}
++
++/*
++ * Disable CMCI on this CPU for all banks it owns when it goes down.
++ * This allows other CPUs to claim the banks on rediscovery.
++ */
++void cmci_clear(void)
++{
++	int i;
++	int banks;
++	u64 val;
++
++	if (!cmci_supported(&banks))
++		return;
++	spin_lock(&cmci_discover_lock);
++	for (i = 0; i < banks; i++) {
++		if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
++			continue;
++		/* Disable CMCI */
++		rdmsrl(MSR_IA32_MC0_CTL2 + i, val);
++		val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
++		wrmsrl(MSR_IA32_MC0_CTL2 + i, val);
++		__clear_bit(i, __get_cpu_var(mce_banks_owned));
++	}
++	spin_unlock(&cmci_discover_lock);
++}
++
++/*
++ * After a CPU went down cycle through all the others and rediscover
++ * Must run in process context.
++ */
++void cmci_rediscover(int dying)
++{
++	int banks;
++	int cpu;
++	cpumask_var_t old;
++
++	if (!cmci_supported(&banks))
++		return;
++	if (!alloc_cpumask_var(&old, GFP_KERNEL))
++		return;
++	cpumask_copy(old, &current->cpus_allowed);
++
++	for_each_online_cpu (cpu) {
++		if (cpu == dying)
++			continue;
++		if (set_cpus_allowed_ptr(current, cpumask_of(cpu)))
++			continue;
++		/* Recheck banks in case CPUs don't all have the same */
++		if (cmci_supported(&banks))
++			cmci_discover(banks, 0);
++	}
++
++	set_cpus_allowed_ptr(current, old);
++	free_cpumask_var(old);
++}
++
++/*
++ * Reenable CMCI on this CPU in case a CPU down failed.
++ */
++void cmci_reenable(void)
++{
++	int banks;
++	if (cmci_supported(&banks))
++		cmci_discover(banks, 0);
++}
++
++static void intel_init_cmci(void)
++{
++	int banks;
++
++	if (!cmci_supported(&banks))
++		return;
++
++	mce_threshold_vector = intel_threshold_interrupt;
++	cmci_discover(banks, 1);
++	/*
++	 * For CPU #0 this runs with still disabled APIC, but that's
++	 * ok because only the vector is set up. We still do another
++	 * check for the banks later for CPU #0 just to make sure
++	 * to not miss any events.
++	 */
++	apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
++	cmci_recheck();
++}
++
+ void mce_intel_feature_init(struct cpuinfo_x86 *c)
+ {
+ 	intel_init_thermal(c);
++	intel_init_cmci();
+ }
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mcheck/p4.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mcheck/p4.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/mcheck/p4.c
+@@ -85,7 +85,7 @@ static void intel_init_thermal(struct cp
+ 	 */
+ 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+ 	h = apic_read(APIC_LVTTHMR);
+-	if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
++	if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
+ 		printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
+ 				cpu);
+ 		return; /* -EBUSY */
+@@ -111,7 +111,7 @@ static void intel_init_thermal(struct cp
+ 	vendor_thermal_interrupt = intel_thermal_interrupt;
+ 
+ 	rdmsr(MSR_IA32_MISC_ENABLE, l, h);
+-	wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h);
++	wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
+ 
+ 	l = apic_read(APIC_LVTTHMR);
+ 	apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mcheck/threshold.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/cpu/mcheck/threshold.c
+@@ -0,0 +1,29 @@
++/*
++ * Common corrected MCE threshold handler code:
++ */
++#include <linux/interrupt.h>
++#include <linux/kernel.h>
++
++#include <asm/irq_vectors.h>
++#include <asm/apic.h>
++#include <asm/idle.h>
++#include <asm/mce.h>
++
++static void default_threshold_interrupt(void)
++{
++	printk(KERN_ERR "Unexpected threshold interrupt at vector %x\n",
++			 THRESHOLD_APIC_VECTOR);
++}
++
++void (*mce_threshold_vector)(void) = default_threshold_interrupt;
++
++asmlinkage void mce_threshold_interrupt(void)
++{
++	exit_idle();
++	irq_enter();
++	inc_irq_stat(irq_threshold_count);
++	mce_threshold_vector();
++	irq_exit();
++	/* Ack only at the end to avoid potential reentry */
++	ack_APIC_irq();
++}
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mtrr/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mtrr/Makefile
++++ linux-2.6-tip/arch/x86/kernel/cpu/mtrr/Makefile
+@@ -1,3 +1,3 @@
+-obj-y		:= main.o if.o generic.o state.o
++obj-y		:= main.o if.o generic.o state.o cleanup.o
+ obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
+ 
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mtrr/cleanup.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/cpu/mtrr/cleanup.c
+@@ -0,0 +1,1101 @@
++/*  MTRR (Memory Type Range Register) cleanup
++
++    Copyright (C) 2009 Yinghai Lu
++
++    This library is free software; you can redistribute it and/or
++    modify it under the terms of the GNU Library General Public
++    License as published by the Free Software Foundation; either
++    version 2 of the License, or (at your option) any later version.
++
++    This library is distributed in the hope that it will be useful,
++    but WITHOUT ANY WARRANTY; without even the implied warranty of
++    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++    Library General Public License for more details.
++
++    You should have received a copy of the GNU Library General Public
++    License along with this library; if not, write to the Free
++    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
++*/
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/pci.h>
++#include <linux/smp.h>
++#include <linux/cpu.h>
++#include <linux/mutex.h>
++#include <linux/sort.h>
++
++#include <asm/e820.h>
++#include <asm/mtrr.h>
++#include <asm/uaccess.h>
++#include <asm/processor.h>
++#include <asm/msr.h>
++#include <asm/kvm_para.h>
++#include "mtrr.h"
++
++/* should be related to MTRR_VAR_RANGES nums */
++#define RANGE_NUM 256
++
++struct res_range {
++	unsigned long start;
++	unsigned long end;
++};
++
++static int __init
++add_range(struct res_range *range, int nr_range, unsigned long start,
++			      unsigned long end)
++{
++	/* out of slots */
++	if (nr_range >= RANGE_NUM)
++		return nr_range;
++
++	range[nr_range].start = start;
++	range[nr_range].end = end;
++
++	nr_range++;
++
++	return nr_range;
++}
++
++static int __init
++add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
++			      unsigned long end)
++{
++	int i;
++
++	/* try to merge it with old one */
++	for (i = 0; i < nr_range; i++) {
++		unsigned long final_start, final_end;
++		unsigned long common_start, common_end;
++
++		if (!range[i].end)
++			continue;
++
++		common_start = max(range[i].start, start);
++		common_end = min(range[i].end, end);
++		if (common_start > common_end + 1)
++			continue;
++
++		final_start = min(range[i].start, start);
++		final_end = max(range[i].end, end);
++
++		range[i].start = final_start;
++		range[i].end =  final_end;
++		return nr_range;
++	}
++
++	/* need to add that */
++	return add_range(range, nr_range, start, end);
++}
++
++static void __init
++subtract_range(struct res_range *range, unsigned long start, unsigned long end)
++{
++	int i, j;
++
++	for (j = 0; j < RANGE_NUM; j++) {
++		if (!range[j].end)
++			continue;
++
++		if (start <= range[j].start && end >= range[j].end) {
++			range[j].start = 0;
++			range[j].end = 0;
++			continue;
++		}
++
++		if (start <= range[j].start && end < range[j].end &&
++		    range[j].start < end + 1) {
++			range[j].start = end + 1;
++			continue;
++		}
++
++
++		if (start > range[j].start && end >= range[j].end &&
++		    range[j].end > start - 1) {
++			range[j].end = start - 1;
++			continue;
++		}
++
++		if (start > range[j].start && end < range[j].end) {
++			/* find the new spare */
++			for (i = 0; i < RANGE_NUM; i++) {
++				if (range[i].end == 0)
++					break;
++			}
++			if (i < RANGE_NUM) {
++				range[i].end = range[j].end;
++				range[i].start = end + 1;
++			} else {
++				printk(KERN_ERR "run of slot in ranges\n");
++			}
++			range[j].end = start - 1;
++			continue;
++		}
++	}
++}
++
++static int __init cmp_range(const void *x1, const void *x2)
++{
++	const struct res_range *r1 = x1;
++	const struct res_range *r2 = x2;
++	long start1, start2;
++
++	start1 = r1->start;
++	start2 = r2->start;
++
++	return start1 - start2;
++}
++
++struct var_mtrr_range_state {
++	unsigned long base_pfn;
++	unsigned long size_pfn;
++	mtrr_type type;
++};
++
++static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
++static int __initdata debug_print;
++
++static int __init
++x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
++		       unsigned long extra_remove_base,
++		       unsigned long extra_remove_size)
++{
++	unsigned long base, size;
++	mtrr_type type;
++	int i;
++
++	for (i = 0; i < num_var_ranges; i++) {
++		type = range_state[i].type;
++		if (type != MTRR_TYPE_WRBACK)
++			continue;
++		base = range_state[i].base_pfn;
++		size = range_state[i].size_pfn;
++		nr_range = add_range_with_merge(range, nr_range, base,
++						base + size - 1);
++	}
++	if (debug_print) {
++		printk(KERN_DEBUG "After WB checking\n");
++		for (i = 0; i < nr_range; i++)
++			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
++				 range[i].start, range[i].end + 1);
++	}
++
++	/* take out UC ranges */
++	for (i = 0; i < num_var_ranges; i++) {
++		type = range_state[i].type;
++		if (type != MTRR_TYPE_UNCACHABLE &&
++		    type != MTRR_TYPE_WRPROT)
++			continue;
++		size = range_state[i].size_pfn;
++		if (!size)
++			continue;
++		base = range_state[i].base_pfn;
++		if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
++		    (mtrr_state.enabled & 1)) {
++			/* Var MTRR contains UC entry below 1M? Skip it: */
++			printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d "
++				"contains strange UC entry under 1M, check "
++				"with your system vendor!\n", i);
++			if (base + size <= (1<<(20-PAGE_SHIFT)))
++				continue;
++			size -= (1<<(20-PAGE_SHIFT)) - base;
++			base = 1<<(20-PAGE_SHIFT);
++		}
++		subtract_range(range, base, base + size - 1);
++	}
++	if (extra_remove_size)
++		subtract_range(range, extra_remove_base,
++				 extra_remove_base + extra_remove_size  - 1);
++
++	/* get new range num */
++	nr_range = 0;
++	for (i = 0; i < RANGE_NUM; i++) {
++		if (!range[i].end)
++			continue;
++		nr_range++;
++	}
++	if  (debug_print) {
++		printk(KERN_DEBUG "After UC checking\n");
++		for (i = 0; i < nr_range; i++)
++			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
++				 range[i].start, range[i].end + 1);
++	}
++
++	/* sort the ranges */
++	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
++	if  (debug_print) {
++		printk(KERN_DEBUG "After sorting\n");
++		for (i = 0; i < nr_range; i++)
++			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
++				 range[i].start, range[i].end + 1);
++	}
++
++	/* clear those is not used */
++	for (i = nr_range; i < RANGE_NUM; i++)
++		memset(&range[i], 0, sizeof(range[i]));
++
++	return nr_range;
++}
++
++static struct res_range __initdata range[RANGE_NUM];
++static int __initdata nr_range;
++
++#ifdef CONFIG_MTRR_SANITIZER
++
++static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
++{
++	unsigned long sum;
++	int i;
++
++	sum = 0;
++	for (i = 0; i < nr_range; i++)
++		sum += range[i].end + 1 - range[i].start;
++
++	return sum;
++}
++
++static int enable_mtrr_cleanup __initdata =
++	CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
++
++static int __init disable_mtrr_cleanup_setup(char *str)
++{
++	enable_mtrr_cleanup = 0;
++	return 0;
++}
++early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
++
++static int __init enable_mtrr_cleanup_setup(char *str)
++{
++	enable_mtrr_cleanup = 1;
++	return 0;
++}
++early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
++
++static int __init mtrr_cleanup_debug_setup(char *str)
++{
++	debug_print = 1;
++	return 0;
++}
++early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
++
++struct var_mtrr_state {
++	unsigned long	range_startk;
++	unsigned long	range_sizek;
++	unsigned long	chunk_sizek;
++	unsigned long	gran_sizek;
++	unsigned int	reg;
++};
++
++static void __init
++set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
++		unsigned char type, unsigned int address_bits)
++{
++	u32 base_lo, base_hi, mask_lo, mask_hi;
++	u64 base, mask;
++
++	if (!sizek) {
++		fill_mtrr_var_range(reg, 0, 0, 0, 0);
++		return;
++	}
++
++	mask = (1ULL << address_bits) - 1;
++	mask &= ~((((u64)sizek) << 10) - 1);
++
++	base  = ((u64)basek) << 10;
++
++	base |= type;
++	mask |= 0x800;
++
++	base_lo = base & ((1ULL<<32) - 1);
++	base_hi = base >> 32;
++
++	mask_lo = mask & ((1ULL<<32) - 1);
++	mask_hi = mask >> 32;
++
++	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
++}
++
++static void __init
++save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
++		unsigned char type)
++{
++	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
++	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
++	range_state[reg].type = type;
++}
++
++static void __init
++set_var_mtrr_all(unsigned int address_bits)
++{
++	unsigned long basek, sizek;
++	unsigned char type;
++	unsigned int reg;
++
++	for (reg = 0; reg < num_var_ranges; reg++) {
++		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
++		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
++		type = range_state[reg].type;
++
++		set_var_mtrr(reg, basek, sizek, type, address_bits);
++	}
++}
++
++static unsigned long to_size_factor(unsigned long sizek, char *factorp)
++{
++	char factor;
++	unsigned long base = sizek;
++
++	if (base & ((1<<10) - 1)) {
++		/* not MB alignment */
++		factor = 'K';
++	} else if (base & ((1<<20) - 1)) {
++		factor = 'M';
++		base >>= 10;
++	} else {
++		factor = 'G';
++		base >>= 20;
++	}
++
++	*factorp = factor;
++
++	return base;
++}
++
++static unsigned int __init
++range_to_mtrr(unsigned int reg, unsigned long range_startk,
++	      unsigned long range_sizek, unsigned char type)
++{
++	if (!range_sizek || (reg >= num_var_ranges))
++		return reg;
++
++	while (range_sizek) {
++		unsigned long max_align, align;
++		unsigned long sizek;
++
++		/* Compute the maximum size I can make a range */
++		if (range_startk)
++			max_align = ffs(range_startk) - 1;
++		else
++			max_align = 32;
++		align = fls(range_sizek) - 1;
++		if (align > max_align)
++			align = max_align;
++
++		sizek = 1 << align;
++		if (debug_print) {
++			char start_factor = 'K', size_factor = 'K';
++			unsigned long start_base, size_base;
++
++			start_base = to_size_factor(range_startk,
++							 &start_factor),
++			size_base = to_size_factor(sizek, &size_factor),
++
++			printk(KERN_DEBUG "Setting variable MTRR %d, "
++				"base: %ld%cB, range: %ld%cB, type %s\n",
++				reg, start_base, start_factor,
++				size_base, size_factor,
++				(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
++				   ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")
++				);
++		}
++		save_var_mtrr(reg++, range_startk, sizek, type);
++		range_startk += sizek;
++		range_sizek -= sizek;
++		if (reg >= num_var_ranges)
++			break;
++	}
++	return reg;
++}
++
++static unsigned __init
++range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
++			unsigned long sizek)
++{
++	unsigned long hole_basek, hole_sizek;
++	unsigned long second_basek, second_sizek;
++	unsigned long range0_basek, range0_sizek;
++	unsigned long range_basek, range_sizek;
++	unsigned long chunk_sizek;
++	unsigned long gran_sizek;
++
++	hole_basek = 0;
++	hole_sizek = 0;
++	second_basek = 0;
++	second_sizek = 0;
++	chunk_sizek = state->chunk_sizek;
++	gran_sizek = state->gran_sizek;
++
++	/* align with gran size, prevent small block used up MTRRs */
++	range_basek = ALIGN(state->range_startk, gran_sizek);
++	if ((range_basek > basek) && basek)
++		return second_sizek;
++	state->range_sizek -= (range_basek - state->range_startk);
++	range_sizek = ALIGN(state->range_sizek, gran_sizek);
++
++	while (range_sizek > state->range_sizek) {
++		range_sizek -= gran_sizek;
++		if (!range_sizek)
++			return 0;
++	}
++	state->range_sizek = range_sizek;
++
++	/* try to append some small hole */
++	range0_basek = state->range_startk;
++	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
++
++	/* no increase */
++	if (range0_sizek == state->range_sizek) {
++		if (debug_print)
++			printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
++				range0_basek<<10,
++				(range0_basek + state->range_sizek)<<10);
++		state->reg = range_to_mtrr(state->reg, range0_basek,
++				state->range_sizek, MTRR_TYPE_WRBACK);
++		return 0;
++	}
++
++	/* only cut back, when it is not the last */
++	if (sizek) {
++		while (range0_basek + range0_sizek > (basek + sizek)) {
++			if (range0_sizek >= chunk_sizek)
++				range0_sizek -= chunk_sizek;
++			else
++				range0_sizek = 0;
++
++			if (!range0_sizek)
++				break;
++		}
++	}
++
++second_try:
++	range_basek = range0_basek + range0_sizek;
++
++	/* one hole in the middle */
++	if (range_basek > basek && range_basek <= (basek + sizek))
++		second_sizek = range_basek - basek;
++
++	if (range0_sizek > state->range_sizek) {
++
++		/* one hole in middle or at end */
++		hole_sizek = range0_sizek - state->range_sizek - second_sizek;
++
++		/* hole size should be less than half of range0 size */
++		if (hole_sizek >= (range0_sizek >> 1) &&
++		    range0_sizek >= chunk_sizek) {
++			range0_sizek -= chunk_sizek;
++			second_sizek = 0;
++			hole_sizek = 0;
++
++			goto second_try;
++		}
++	}
++
++	if (range0_sizek) {
++		if (debug_print)
++			printk(KERN_DEBUG "range0: %016lx - %016lx\n",
++				range0_basek<<10,
++				(range0_basek + range0_sizek)<<10);
++		state->reg = range_to_mtrr(state->reg, range0_basek,
++				range0_sizek, MTRR_TYPE_WRBACK);
++	}
++
++	if (range0_sizek < state->range_sizek) {
++		/* need to handle left over */
++		range_sizek = state->range_sizek - range0_sizek;
++
++		if (debug_print)
++			printk(KERN_DEBUG "range: %016lx - %016lx\n",
++				 range_basek<<10,
++				 (range_basek + range_sizek)<<10);
++		state->reg = range_to_mtrr(state->reg, range_basek,
++				 range_sizek, MTRR_TYPE_WRBACK);
++	}
++
++	if (hole_sizek) {
++		hole_basek = range_basek - hole_sizek - second_sizek;
++		if (debug_print)
++			printk(KERN_DEBUG "hole: %016lx - %016lx\n",
++				 hole_basek<<10,
++				 (hole_basek + hole_sizek)<<10);
++		state->reg = range_to_mtrr(state->reg, hole_basek,
++				 hole_sizek, MTRR_TYPE_UNCACHABLE);
++	}
++
++	return second_sizek;
++}
++
++static void __init
++set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
++		   unsigned long size_pfn)
++{
++	unsigned long basek, sizek;
++	unsigned long second_sizek = 0;
++
++	if (state->reg >= num_var_ranges)
++		return;
++
++	basek = base_pfn << (PAGE_SHIFT - 10);
++	sizek = size_pfn << (PAGE_SHIFT - 10);
++
++	/* See if I can merge with the last range */
++	if ((basek <= 1024) ||
++	    (state->range_startk + state->range_sizek == basek)) {
++		unsigned long endk = basek + sizek;
++		state->range_sizek = endk - state->range_startk;
++		return;
++	}
++	/* Write the range mtrrs */
++	if (state->range_sizek != 0)
++		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
++
++	/* Allocate an msr */
++	state->range_startk = basek + second_sizek;
++	state->range_sizek  = sizek - second_sizek;
++}
++
++/* mininum size of mtrr block that can take hole */
++static u64 mtrr_chunk_size __initdata = (256ULL<<20);
++
++static int __init parse_mtrr_chunk_size_opt(char *p)
++{
++	if (!p)
++		return -EINVAL;
++	mtrr_chunk_size = memparse(p, &p);
++	return 0;
++}
++early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
++
++/* granity of mtrr of block */
++static u64 mtrr_gran_size __initdata;
++
++static int __init parse_mtrr_gran_size_opt(char *p)
++{
++	if (!p)
++		return -EINVAL;
++	mtrr_gran_size = memparse(p, &p);
++	return 0;
++}
++early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
++
++static int nr_mtrr_spare_reg __initdata =
++				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
++
++static int __init parse_mtrr_spare_reg(char *arg)
++{
++	if (arg)
++		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
++	return 0;
++}
++
++early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
++
++static int __init
++x86_setup_var_mtrrs(struct res_range *range, int nr_range,
++		    u64 chunk_size, u64 gran_size)
++{
++	struct var_mtrr_state var_state;
++	int i;
++	int num_reg;
++
++	var_state.range_startk	= 0;
++	var_state.range_sizek	= 0;
++	var_state.reg		= 0;
++	var_state.chunk_sizek	= chunk_size >> 10;
++	var_state.gran_sizek	= gran_size >> 10;
++
++	memset(range_state, 0, sizeof(range_state));
++
++	/* Write the range etc */
++	for (i = 0; i < nr_range; i++)
++		set_var_mtrr_range(&var_state, range[i].start,
++				   range[i].end - range[i].start + 1);
++
++	/* Write the last range */
++	if (var_state.range_sizek != 0)
++		range_to_mtrr_with_hole(&var_state, 0, 0);
++
++	num_reg = var_state.reg;
++	/* Clear out the extra MTRR's */
++	while (var_state.reg < num_var_ranges) {
++		save_var_mtrr(var_state.reg, 0, 0, 0);
++		var_state.reg++;
++	}
++
++	return num_reg;
++}
++
++struct mtrr_cleanup_result {
++	unsigned long gran_sizek;
++	unsigned long chunk_sizek;
++	unsigned long lose_cover_sizek;
++	unsigned int num_reg;
++	int bad;
++};
++
++/*
++ * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
++ * chunk size: gran_size, ..., 2G
++ * so we need (1+16)*8
++ */
++#define NUM_RESULT	136
++#define PSHIFT		(PAGE_SHIFT - 10)
++
++static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
++static unsigned long __initdata min_loss_pfn[RANGE_NUM];
++
++static void __init print_out_mtrr_range_state(void)
++{
++	int i;
++	char start_factor = 'K', size_factor = 'K';
++	unsigned long start_base, size_base;
++	mtrr_type type;
++
++	for (i = 0; i < num_var_ranges; i++) {
++
++		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
++		if (!size_base)
++			continue;
++
++		size_base = to_size_factor(size_base, &size_factor),
++		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
++		start_base = to_size_factor(start_base, &start_factor),
++		type = range_state[i].type;
++
++		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
++			i, start_base, start_factor,
++			size_base, size_factor,
++			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
++			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
++			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
++			);
++	}
++}
++
++static int __init mtrr_need_cleanup(void)
++{
++	int i;
++	mtrr_type type;
++	unsigned long size;
++	/* extra one for all 0 */
++	int num[MTRR_NUM_TYPES + 1];
++
++	/* check entries number */
++	memset(num, 0, sizeof(num));
++	for (i = 0; i < num_var_ranges; i++) {
++		type = range_state[i].type;
++		size = range_state[i].size_pfn;
++		if (type >= MTRR_NUM_TYPES)
++			continue;
++		if (!size)
++			type = MTRR_NUM_TYPES;
++		if (type == MTRR_TYPE_WRPROT)
++			type = MTRR_TYPE_UNCACHABLE;
++		num[type]++;
++	}
++
++	/* check if we got UC entries */
++	if (!num[MTRR_TYPE_UNCACHABLE])
++		return 0;
++
++	/* check if we only had WB and UC */
++	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
++		num_var_ranges - num[MTRR_NUM_TYPES])
++		return 0;
++
++	return 1;
++}
++
++static unsigned long __initdata range_sums;
++static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
++					 unsigned long extra_remove_base,
++					 unsigned long extra_remove_size,
++					 int i)
++{
++	int num_reg;
++	static struct res_range range_new[RANGE_NUM];
++	static int nr_range_new;
++	unsigned long range_sums_new;
++
++	/* convert ranges to var ranges state */
++	num_reg = x86_setup_var_mtrrs(range, nr_range,
++						chunk_size, gran_size);
++
++	/* we got new setting in range_state, check it */
++	memset(range_new, 0, sizeof(range_new));
++	nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
++				extra_remove_base, extra_remove_size);
++	range_sums_new = sum_ranges(range_new, nr_range_new);
++
++	result[i].chunk_sizek = chunk_size >> 10;
++	result[i].gran_sizek = gran_size >> 10;
++	result[i].num_reg = num_reg;
++	if (range_sums < range_sums_new) {
++		result[i].lose_cover_sizek =
++			(range_sums_new - range_sums) << PSHIFT;
++		result[i].bad = 1;
++	} else
++		result[i].lose_cover_sizek =
++			(range_sums - range_sums_new) << PSHIFT;
++
++	/* double check it */
++	if (!result[i].bad && !result[i].lose_cover_sizek) {
++		if (nr_range_new != nr_range ||
++			memcmp(range, range_new, sizeof(range)))
++				result[i].bad = 1;
++	}
++
++	if (!result[i].bad && (range_sums - range_sums_new <
++				min_loss_pfn[num_reg])) {
++		min_loss_pfn[num_reg] =
++			range_sums - range_sums_new;
++	}
++}
++
++static void __init mtrr_print_out_one_result(int i)
++{
++	char gran_factor, chunk_factor, lose_factor;
++	unsigned long gran_base, chunk_base, lose_base;
++
++	gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
++	chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
++	lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
++	printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
++			result[i].bad ? "*BAD*" : " ",
++			gran_base, gran_factor, chunk_base, chunk_factor);
++	printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
++			result[i].num_reg, result[i].bad ? "-" : "",
++			lose_base, lose_factor);
++}
++
++static int __init mtrr_search_optimal_index(void)
++{
++	int i;
++	int num_reg_good;
++	int index_good;
++
++	if (nr_mtrr_spare_reg >= num_var_ranges)
++		nr_mtrr_spare_reg = num_var_ranges - 1;
++	num_reg_good = -1;
++	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
++		if (!min_loss_pfn[i])
++			num_reg_good = i;
++	}
++
++	index_good = -1;
++	if (num_reg_good != -1) {
++		for (i = 0; i < NUM_RESULT; i++) {
++			if (!result[i].bad &&
++			    result[i].num_reg == num_reg_good &&
++			    !result[i].lose_cover_sizek) {
++				index_good = i;
++				break;
++			}
++		}
++	}
++
++	return index_good;
++}
++
++
++int __init mtrr_cleanup(unsigned address_bits)
++{
++	unsigned long extra_remove_base, extra_remove_size;
++	unsigned long base, size, def, dummy;
++	mtrr_type type;
++	u64 chunk_size, gran_size;
++	int index_good;
++	int i;
++
++	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
++		return 0;
++	rdmsr(MTRRdefType_MSR, def, dummy);
++	def &= 0xff;
++	if (def != MTRR_TYPE_UNCACHABLE)
++		return 0;
++
++	/* get it and store it aside */
++	memset(range_state, 0, sizeof(range_state));
++	for (i = 0; i < num_var_ranges; i++) {
++		mtrr_if->get(i, &base, &size, &type);
++		range_state[i].base_pfn = base;
++		range_state[i].size_pfn = size;
++		range_state[i].type = type;
++	}
++
++	/* check if we need handle it and can handle it */
++	if (!mtrr_need_cleanup())
++		return 0;
++
++	/* print original var MTRRs at first, for debugging: */
++	printk(KERN_DEBUG "original variable MTRRs\n");
++	print_out_mtrr_range_state();
++
++	memset(range, 0, sizeof(range));
++	extra_remove_size = 0;
++	extra_remove_base = 1 << (32 - PAGE_SHIFT);
++	if (mtrr_tom2)
++		extra_remove_size =
++			(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
++	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
++					  extra_remove_size);
++	/*
++	 * [0, 1M) should always be coverred by var mtrr with WB
++	 * and fixed mtrrs should take effective before var mtrr for it
++	 */
++	nr_range = add_range_with_merge(range, nr_range, 0,
++					(1ULL<<(20 - PAGE_SHIFT)) - 1);
++	/* sort the ranges */
++	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
++
++	range_sums = sum_ranges(range, nr_range);
++	printk(KERN_INFO "total RAM coverred: %ldM\n",
++	       range_sums >> (20 - PAGE_SHIFT));
++
++	if (mtrr_chunk_size && mtrr_gran_size) {
++		i = 0;
++		mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
++				      extra_remove_base, extra_remove_size, i);
++
++		mtrr_print_out_one_result(i);
++
++		if (!result[i].bad) {
++			set_var_mtrr_all(address_bits);
++			printk(KERN_DEBUG "New variable MTRRs\n");
++			print_out_mtrr_range_state();
++			return 1;
++		}
++		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
++		       "will find optimal one\n");
++	}
++
++	i = 0;
++	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
++	memset(result, 0, sizeof(result));
++	for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
++
++		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
++		     chunk_size <<= 1) {
++
++			if (i >= NUM_RESULT)
++				continue;
++
++			mtrr_calc_range_state(chunk_size, gran_size,
++				      extra_remove_base, extra_remove_size, i);
++			if (debug_print) {
++				mtrr_print_out_one_result(i);
++				printk(KERN_INFO "\n");
++			}
++
++			i++;
++		}
++	}
++
++	/* try to find the optimal index */
++	index_good = mtrr_search_optimal_index();
++
++	if (index_good != -1) {
++		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
++		i = index_good;
++		mtrr_print_out_one_result(i);
++
++		/* convert ranges to var ranges state */
++		chunk_size = result[i].chunk_sizek;
++		chunk_size <<= 10;
++		gran_size = result[i].gran_sizek;
++		gran_size <<= 10;
++		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
++		set_var_mtrr_all(address_bits);
++		printk(KERN_DEBUG "New variable MTRRs\n");
++		print_out_mtrr_range_state();
++		return 1;
++	} else {
++		/* print out all */
++		for (i = 0; i < NUM_RESULT; i++)
++			mtrr_print_out_one_result(i);
++	}
++
++	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
++	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
++
++	return 0;
++}
++#else
++int __init mtrr_cleanup(unsigned address_bits)
++{
++	return 0;
++}
++#endif
++
++static int disable_mtrr_trim;
++
++static int __init disable_mtrr_trim_setup(char *str)
++{
++	disable_mtrr_trim = 1;
++	return 0;
++}
++early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
++
++/*
++ * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
++ * for memory >4GB. Check for that here.
++ * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
++ * apply to are wrong, but so far we don't know of any such case in the wild.
++ */
++#define Tom2Enabled (1U << 21)
++#define Tom2ForceMemTypeWB (1U << 22)
++
++int __init amd_special_default_mtrr(void)
++{
++	u32 l, h;
++
++	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
++		return 0;
++	if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
++		return 0;
++	/* In case some hypervisor doesn't pass SYSCFG through */
++	if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
++		return 0;
++	/*
++	 * Memory between 4GB and top of mem is forced WB by this magic bit.
++	 * Reserved before K8RevF, but should be zero there.
++	 */
++	if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
++		 (Tom2Enabled | Tom2ForceMemTypeWB))
++		return 1;
++	return 0;
++}
++
++static u64 __init real_trim_memory(unsigned long start_pfn,
++				   unsigned long limit_pfn)
++{
++	u64 trim_start, trim_size;
++	trim_start = start_pfn;
++	trim_start <<= PAGE_SHIFT;
++	trim_size = limit_pfn;
++	trim_size <<= PAGE_SHIFT;
++	trim_size -= trim_start;
++
++	return e820_update_range(trim_start, trim_size, E820_RAM,
++				E820_RESERVED);
++}
++/**
++ * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
++ * @end_pfn: ending page frame number
++ *
++ * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
++ * memory configurations.  This routine checks that the highest MTRR matches
++ * the end of memory, to make sure the MTRRs having a write back type cover
++ * all of the memory the kernel is intending to use. If not, it'll trim any
++ * memory off the end by adjusting end_pfn, removing it from the kernel's
++ * allocation pools, warning the user with an obnoxious message.
++ */
++int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
++{
++	unsigned long i, base, size, highest_pfn = 0, def, dummy;
++	mtrr_type type;
++	u64 total_trim_size;
++
++	/* extra one for all 0 */
++	int num[MTRR_NUM_TYPES + 1];
++	/*
++	 * Make sure we only trim uncachable memory on machines that
++	 * support the Intel MTRR architecture:
++	 */
++	if (!is_cpu(INTEL) || disable_mtrr_trim)
++		return 0;
++	rdmsr(MTRRdefType_MSR, def, dummy);
++	def &= 0xff;
++	if (def != MTRR_TYPE_UNCACHABLE)
++		return 0;
++
++	/* get it and store it aside */
++	memset(range_state, 0, sizeof(range_state));
++	for (i = 0; i < num_var_ranges; i++) {
++		mtrr_if->get(i, &base, &size, &type);
++		range_state[i].base_pfn = base;
++		range_state[i].size_pfn = size;
++		range_state[i].type = type;
++	}
++
++	/* Find highest cached pfn */
++	for (i = 0; i < num_var_ranges; i++) {
++		type = range_state[i].type;
++		if (type != MTRR_TYPE_WRBACK)
++			continue;
++		base = range_state[i].base_pfn;
++		size = range_state[i].size_pfn;
++		if (highest_pfn < base + size)
++			highest_pfn = base + size;
++	}
++
++	/* kvm/qemu doesn't have mtrr set right, don't trim them all */
++	if (!highest_pfn) {
++		printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
++		return 0;
++	}
++
++	/* check entries number */
++	memset(num, 0, sizeof(num));
++	for (i = 0; i < num_var_ranges; i++) {
++		type = range_state[i].type;
++		if (type >= MTRR_NUM_TYPES)
++			continue;
++		size = range_state[i].size_pfn;
++		if (!size)
++			type = MTRR_NUM_TYPES;
++		num[type]++;
++	}
++
++	/* no entry for WB? */
++	if (!num[MTRR_TYPE_WRBACK])
++		return 0;
++
++	/* check if we only had WB and UC */
++	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
++		num_var_ranges - num[MTRR_NUM_TYPES])
++		return 0;
++
++	memset(range, 0, sizeof(range));
++	nr_range = 0;
++	if (mtrr_tom2) {
++		range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
++		range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
++		if (highest_pfn < range[nr_range].end + 1)
++			highest_pfn = range[nr_range].end + 1;
++		nr_range++;
++	}
++	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
++
++	total_trim_size = 0;
++	/* check the head */
++	if (range[0].start)
++		total_trim_size += real_trim_memory(0, range[0].start);
++	/* check the holes */
++	for (i = 0; i < nr_range - 1; i++) {
++		if (range[i].end + 1 < range[i+1].start)
++			total_trim_size += real_trim_memory(range[i].end + 1,
++							    range[i+1].start);
++	}
++	/* check the top */
++	i = nr_range - 1;
++	if (range[i].end + 1 < end_pfn)
++		total_trim_size += real_trim_memory(range[i].end + 1,
++							 end_pfn);
++
++	if (total_trim_size) {
++		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
++			" all of memory, losing %lluMB of RAM.\n",
++			total_trim_size >> 20);
++
++		if (!changed_by_mtrr_cleanup)
++			WARN_ON(1);
++
++		printk(KERN_INFO "update e820 for mtrr\n");
++		update_e820();
++
++		return 1;
++	}
++
++	return 0;
++}
++
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mtrr/generic.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mtrr/generic.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/mtrr/generic.c
+@@ -33,14 +33,6 @@ u64 mtrr_tom2;
+ struct mtrr_state_type mtrr_state = {};
+ EXPORT_SYMBOL_GPL(mtrr_state);
+ 
+-static int __initdata mtrr_show;
+-static int __init mtrr_debug(char *opt)
+-{
+-	mtrr_show = 1;
+-	return 0;
+-}
+-early_param("mtrr.show", mtrr_debug);
+-
+ /**
+  * BIOS is expected to clear MtrrFixDramModEn bit, see for example
+  * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
+@@ -216,18 +208,94 @@ void mtrr_save_fixed_ranges(void *info)
+ 		get_fixed_ranges(mtrr_state.fixed_ranges);
+ }
+ 
+-static void print_fixed(unsigned base, unsigned step, const mtrr_type*types)
++static unsigned __initdata last_fixed_start;
++static unsigned __initdata last_fixed_end;
++static mtrr_type __initdata last_fixed_type;
++
++static void __init print_fixed_last(void)
++{
++	if (!last_fixed_end)
++		return;
++
++	printk(KERN_DEBUG "  %05X-%05X %s\n", last_fixed_start,
++		last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
++
++	last_fixed_end = 0;
++}
++
++static void __init update_fixed_last(unsigned base, unsigned end,
++				       mtrr_type type)
++{
++	last_fixed_start = base;
++	last_fixed_end = end;
++	last_fixed_type = type;
++}
++
++static void __init print_fixed(unsigned base, unsigned step,
++			       const mtrr_type *types)
+ {
+ 	unsigned i;
+ 
+-	for (i = 0; i < 8; ++i, ++types, base += step)
+-		printk(KERN_INFO "MTRR %05X-%05X %s\n",
+-			base, base + step - 1, mtrr_attrib_to_str(*types));
++	for (i = 0; i < 8; ++i, ++types, base += step) {
++		if (last_fixed_end == 0) {
++			update_fixed_last(base, base + step, *types);
++			continue;
++		}
++		if (last_fixed_end == base && last_fixed_type == *types) {
++			last_fixed_end = base + step;
++			continue;
++		}
++		/* new segments: gap or different type */
++		print_fixed_last();
++		update_fixed_last(base, base + step, *types);
++	}
+ }
+ 
+ static void prepare_set(void);
+ static void post_set(void);
+ 
++static void __init print_mtrr_state(void)
++{
++	unsigned int i;
++	int high_width;
++
++	printk(KERN_DEBUG "MTRR default type: %s\n",
++			 mtrr_attrib_to_str(mtrr_state.def_type));
++	if (mtrr_state.have_fixed) {
++		printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n",
++		       mtrr_state.enabled & 1 ? "en" : "dis");
++		print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
++		for (i = 0; i < 2; ++i)
++			print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
++		for (i = 0; i < 8; ++i)
++			print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
++
++		/* tail */
++		print_fixed_last();
++	}
++	printk(KERN_DEBUG "MTRR variable ranges %sabled:\n",
++	       mtrr_state.enabled & 2 ? "en" : "dis");
++	high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
++	for (i = 0; i < num_var_ranges; ++i) {
++		if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
++			printk(KERN_DEBUG "  %u base %0*X%05X000 mask %0*X%05X000 %s\n",
++			       i,
++			       high_width,
++			       mtrr_state.var_ranges[i].base_hi,
++			       mtrr_state.var_ranges[i].base_lo >> 12,
++			       high_width,
++			       mtrr_state.var_ranges[i].mask_hi,
++			       mtrr_state.var_ranges[i].mask_lo >> 12,
++			       mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
++		else
++			printk(KERN_DEBUG "  %u disabled\n", i);
++	}
++	if (mtrr_tom2) {
++		printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n",
++				  mtrr_tom2, mtrr_tom2>>20);
++	}
++}
++
+ /*  Grab all of the MTRR state for this CPU into *state  */
+ void __init get_mtrr_state(void)
+ {
+@@ -259,41 +327,9 @@ void __init get_mtrr_state(void)
+ 		mtrr_tom2 |= low;
+ 		mtrr_tom2 &= 0xffffff800000ULL;
+ 	}
+-	if (mtrr_show) {
+-		int high_width;
+ 
+-		printk(KERN_INFO "MTRR default type: %s\n", mtrr_attrib_to_str(mtrr_state.def_type));
+-		if (mtrr_state.have_fixed) {
+-			printk(KERN_INFO "MTRR fixed ranges %sabled:\n",
+-			       mtrr_state.enabled & 1 ? "en" : "dis");
+-			print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
+-			for (i = 0; i < 2; ++i)
+-				print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8);
+-			for (i = 0; i < 8; ++i)
+-				print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8);
+-		}
+-		printk(KERN_INFO "MTRR variable ranges %sabled:\n",
+-		       mtrr_state.enabled & 2 ? "en" : "dis");
+-		high_width = ((size_or_mask ? ffs(size_or_mask) - 1 : 32) - (32 - PAGE_SHIFT) + 3) / 4;
+-		for (i = 0; i < num_var_ranges; ++i) {
+-			if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
+-				printk(KERN_INFO "MTRR %u base %0*X%05X000 mask %0*X%05X000 %s\n",
+-				       i,
+-				       high_width,
+-				       mtrr_state.var_ranges[i].base_hi,
+-				       mtrr_state.var_ranges[i].base_lo >> 12,
+-				       high_width,
+-				       mtrr_state.var_ranges[i].mask_hi,
+-				       mtrr_state.var_ranges[i].mask_lo >> 12,
+-				       mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
+-			else
+-				printk(KERN_INFO "MTRR %u disabled\n", i);
+-		}
+-		if (mtrr_tom2) {
+-			printk(KERN_INFO "TOM2: %016llx aka %lldM\n",
+-					  mtrr_tom2, mtrr_tom2>>20);
+-		}
+-	}
++	print_mtrr_state();
++
+ 	mtrr_state_set = 1;
+ 
+ 	/* PAT setup for BP. We need to go through sync steps here */
+@@ -383,22 +419,31 @@ static void generic_get_mtrr(unsigned in
+ {
+ 	unsigned int mask_lo, mask_hi, base_lo, base_hi;
+ 	unsigned int tmp, hi;
++	int cpu;
++
++	/*
++	 * get_mtrr doesn't need to update mtrr_state, also it could be called
++	 * from any cpu, so try to print it out directly.
++	 */
++	cpu = get_cpu();
+ 
+ 	rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
++
+ 	if ((mask_lo & 0x800) == 0) {
+ 		/*  Invalid (i.e. free) range  */
+ 		*base = 0;
+ 		*size = 0;
+ 		*type = 0;
+-		return;
++		goto out_put_cpu;
+ 	}
+ 
+ 	rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi);
+ 
+-	/* Work out the shifted address mask. */
++	/* Work out the shifted address mask: */
+ 	tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT;
+ 	mask_lo = size_or_mask | tmp;
+-	/* Expand tmp with high bits to all 1s*/
++
++	/* Expand tmp with high bits to all 1s: */
+ 	hi = fls(tmp);
+ 	if (hi > 0) {
+ 		tmp |= ~((1<<(hi - 1)) - 1);
+@@ -409,11 +454,19 @@ static void generic_get_mtrr(unsigned in
+ 		}
+ 	}
+ 
+-	/* This works correctly if size is a power of two, i.e. a
+-	   contiguous range. */
++	/*
++	 * This works correctly if size is a power of two, i.e. a
++	 * contiguous range:
++	 */
+ 	*size = -mask_lo;
+ 	*base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
+ 	*type = base_lo & 0xff;
++
++	printk(KERN_DEBUG "  get_mtrr: cpu%d reg%02d base=%010lx size=%010lx %s\n",
++			cpu, reg, *base, *size,
++			mtrr_attrib_to_str(*type & 0xff));
++out_put_cpu:
++	put_cpu();
+ }
+ 
+ /**
+@@ -495,7 +548,7 @@ static unsigned long set_mtrr_state(void
+ 
+ 
+ static unsigned long cr4 = 0;
+-static DEFINE_SPINLOCK(set_atomicity_lock);
++static DEFINE_RAW_SPINLOCK(set_atomicity_lock);
+ 
+ /*
+  * Since we are disabling the cache don't allow any interrupts - they
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mtrr/main.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mtrr/main.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/mtrr/main.c
+@@ -574,7 +574,7 @@ struct mtrr_value {
+ 	unsigned long	lsize;
+ };
+ 
+-static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES];
++static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
+ 
+ static int mtrr_save(struct sys_device * sysdev, pm_message_t state)
+ {
+@@ -582,9 +582,9 @@ static int mtrr_save(struct sys_device *
+ 
+ 	for (i = 0; i < num_var_ranges; i++) {
+ 		mtrr_if->get(i,
+-			     &mtrr_state[i].lbase,
+-			     &mtrr_state[i].lsize,
+-			     &mtrr_state[i].ltype);
++			     &mtrr_value[i].lbase,
++			     &mtrr_value[i].lsize,
++			     &mtrr_value[i].ltype);
+ 	}
+ 	return 0;
+ }
+@@ -594,11 +594,11 @@ static int mtrr_restore(struct sys_devic
+ 	int i;
+ 
+ 	for (i = 0; i < num_var_ranges; i++) {
+-		if (mtrr_state[i].lsize) 
++		if (mtrr_value[i].lsize)
+ 			set_mtrr(i,
+-				 mtrr_state[i].lbase,
+-				 mtrr_state[i].lsize,
+-				 mtrr_state[i].ltype);
++				 mtrr_value[i].lbase,
++				 mtrr_value[i].lsize,
++				 mtrr_value[i].ltype);
+ 	}
+ 	return 0;
+ }
+@@ -610,1058 +610,7 @@ static struct sysdev_driver mtrr_sysdev_
+ 	.resume		= mtrr_restore,
+ };
+ 
+-/* should be related to MTRR_VAR_RANGES nums */
+-#define RANGE_NUM 256
+-
+-struct res_range {
+-	unsigned long start;
+-	unsigned long end;
+-};
+-
+-static int __init
+-add_range(struct res_range *range, int nr_range, unsigned long start,
+-			      unsigned long end)
+-{
+-	/* out of slots */
+-	if (nr_range >= RANGE_NUM)
+-		return nr_range;
+-
+-	range[nr_range].start = start;
+-	range[nr_range].end = end;
+-
+-	nr_range++;
+-
+-	return nr_range;
+-}
+-
+-static int __init
+-add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
+-			      unsigned long end)
+-{
+-	int i;
+-
+-	/* try to merge it with old one */
+-	for (i = 0; i < nr_range; i++) {
+-		unsigned long final_start, final_end;
+-		unsigned long common_start, common_end;
+-
+-		if (!range[i].end)
+-			continue;
+-
+-		common_start = max(range[i].start, start);
+-		common_end = min(range[i].end, end);
+-		if (common_start > common_end + 1)
+-			continue;
+-
+-		final_start = min(range[i].start, start);
+-		final_end = max(range[i].end, end);
+-
+-		range[i].start = final_start;
+-		range[i].end =  final_end;
+-		return nr_range;
+-	}
+-
+-	/* need to add that */
+-	return add_range(range, nr_range, start, end);
+-}
+-
+-static void __init
+-subtract_range(struct res_range *range, unsigned long start, unsigned long end)
+-{
+-	int i, j;
+-
+-	for (j = 0; j < RANGE_NUM; j++) {
+-		if (!range[j].end)
+-			continue;
+-
+-		if (start <= range[j].start && end >= range[j].end) {
+-			range[j].start = 0;
+-			range[j].end = 0;
+-			continue;
+-		}
+-
+-		if (start <= range[j].start && end < range[j].end &&
+-		    range[j].start < end + 1) {
+-			range[j].start = end + 1;
+-			continue;
+-		}
+-
+-
+-		if (start > range[j].start && end >= range[j].end &&
+-		    range[j].end > start - 1) {
+-			range[j].end = start - 1;
+-			continue;
+-		}
+-
+-		if (start > range[j].start && end < range[j].end) {
+-			/* find the new spare */
+-			for (i = 0; i < RANGE_NUM; i++) {
+-				if (range[i].end == 0)
+-					break;
+-			}
+-			if (i < RANGE_NUM) {
+-				range[i].end = range[j].end;
+-				range[i].start = end + 1;
+-			} else {
+-				printk(KERN_ERR "run of slot in ranges\n");
+-			}
+-			range[j].end = start - 1;
+-			continue;
+-		}
+-	}
+-}
+-
+-static int __init cmp_range(const void *x1, const void *x2)
+-{
+-	const struct res_range *r1 = x1;
+-	const struct res_range *r2 = x2;
+-	long start1, start2;
+-
+-	start1 = r1->start;
+-	start2 = r2->start;
+-
+-	return start1 - start2;
+-}
+-
+-struct var_mtrr_range_state {
+-	unsigned long base_pfn;
+-	unsigned long size_pfn;
+-	mtrr_type type;
+-};
+-
+-static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
+-static int __initdata debug_print;
+-
+-static int __init
+-x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
+-		       unsigned long extra_remove_base,
+-		       unsigned long extra_remove_size)
+-{
+-	unsigned long i, base, size;
+-	mtrr_type type;
+-
+-	for (i = 0; i < num_var_ranges; i++) {
+-		type = range_state[i].type;
+-		if (type != MTRR_TYPE_WRBACK)
+-			continue;
+-		base = range_state[i].base_pfn;
+-		size = range_state[i].size_pfn;
+-		nr_range = add_range_with_merge(range, nr_range, base,
+-						base + size - 1);
+-	}
+-	if (debug_print) {
+-		printk(KERN_DEBUG "After WB checking\n");
+-		for (i = 0; i < nr_range; i++)
+-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+-				 range[i].start, range[i].end + 1);
+-	}
+-
+-	/* take out UC ranges */
+-	for (i = 0; i < num_var_ranges; i++) {
+-		type = range_state[i].type;
+-		if (type != MTRR_TYPE_UNCACHABLE &&
+-		    type != MTRR_TYPE_WRPROT)
+-			continue;
+-		size = range_state[i].size_pfn;
+-		if (!size)
+-			continue;
+-		base = range_state[i].base_pfn;
+-		subtract_range(range, base, base + size - 1);
+-	}
+-	if (extra_remove_size)
+-		subtract_range(range, extra_remove_base,
+-				 extra_remove_base + extra_remove_size  - 1);
+-
+-	/* get new range num */
+-	nr_range = 0;
+-	for (i = 0; i < RANGE_NUM; i++) {
+-		if (!range[i].end)
+-			continue;
+-		nr_range++;
+-	}
+-	if  (debug_print) {
+-		printk(KERN_DEBUG "After UC checking\n");
+-		for (i = 0; i < nr_range; i++)
+-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+-				 range[i].start, range[i].end + 1);
+-	}
+-
+-	/* sort the ranges */
+-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+-	if  (debug_print) {
+-		printk(KERN_DEBUG "After sorting\n");
+-		for (i = 0; i < nr_range; i++)
+-			printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
+-				 range[i].start, range[i].end + 1);
+-	}
+-
+-	/* clear those is not used */
+-	for (i = nr_range; i < RANGE_NUM; i++)
+-		memset(&range[i], 0, sizeof(range[i]));
+-
+-	return nr_range;
+-}
+-
+-static struct res_range __initdata range[RANGE_NUM];
+-static int __initdata nr_range;
+-
+-#ifdef CONFIG_MTRR_SANITIZER
+-
+-static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
+-{
+-	unsigned long sum;
+-	int i;
+-
+-	sum = 0;
+-	for (i = 0; i < nr_range; i++)
+-		sum += range[i].end + 1 - range[i].start;
+-
+-	return sum;
+-}
+-
+-static int enable_mtrr_cleanup __initdata =
+-	CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
+-
+-static int __init disable_mtrr_cleanup_setup(char *str)
+-{
+-	enable_mtrr_cleanup = 0;
+-	return 0;
+-}
+-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+-
+-static int __init enable_mtrr_cleanup_setup(char *str)
+-{
+-	enable_mtrr_cleanup = 1;
+-	return 0;
+-}
+-early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup);
+-
+-static int __init mtrr_cleanup_debug_setup(char *str)
+-{
+-	debug_print = 1;
+-	return 0;
+-}
+-early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
+-
+-struct var_mtrr_state {
+-	unsigned long	range_startk;
+-	unsigned long	range_sizek;
+-	unsigned long	chunk_sizek;
+-	unsigned long	gran_sizek;
+-	unsigned int	reg;
+-};
+-
+-static void __init
+-set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+-		unsigned char type, unsigned int address_bits)
+-{
+-	u32 base_lo, base_hi, mask_lo, mask_hi;
+-	u64 base, mask;
+-
+-	if (!sizek) {
+-		fill_mtrr_var_range(reg, 0, 0, 0, 0);
+-		return;
+-	}
+-
+-	mask = (1ULL << address_bits) - 1;
+-	mask &= ~((((u64)sizek) << 10) - 1);
+-
+-	base  = ((u64)basek) << 10;
+-
+-	base |= type;
+-	mask |= 0x800;
+-
+-	base_lo = base & ((1ULL<<32) - 1);
+-	base_hi = base >> 32;
+-
+-	mask_lo = mask & ((1ULL<<32) - 1);
+-	mask_hi = mask >> 32;
+-
+-	fill_mtrr_var_range(reg, base_lo, base_hi, mask_lo, mask_hi);
+-}
+-
+-static void __init
+-save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
+-		unsigned char type)
+-{
+-	range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
+-	range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
+-	range_state[reg].type = type;
+-}
+-
+-static void __init
+-set_var_mtrr_all(unsigned int address_bits)
+-{
+-	unsigned long basek, sizek;
+-	unsigned char type;
+-	unsigned int reg;
+-
+-	for (reg = 0; reg < num_var_ranges; reg++) {
+-		basek = range_state[reg].base_pfn << (PAGE_SHIFT - 10);
+-		sizek = range_state[reg].size_pfn << (PAGE_SHIFT - 10);
+-		type = range_state[reg].type;
+-
+-		set_var_mtrr(reg, basek, sizek, type, address_bits);
+-	}
+-}
+-
+-static unsigned long to_size_factor(unsigned long sizek, char *factorp)
+-{
+-	char factor;
+-	unsigned long base = sizek;
+-
+-	if (base & ((1<<10) - 1)) {
+-		/* not MB alignment */
+-		factor = 'K';
+-	} else if (base & ((1<<20) - 1)){
+-		factor = 'M';
+-		base >>= 10;
+-	} else {
+-		factor = 'G';
+-		base >>= 20;
+-	}
+-
+-	*factorp = factor;
+-
+-	return base;
+-}
+-
+-static unsigned int __init
+-range_to_mtrr(unsigned int reg, unsigned long range_startk,
+-	      unsigned long range_sizek, unsigned char type)
+-{
+-	if (!range_sizek || (reg >= num_var_ranges))
+-		return reg;
+-
+-	while (range_sizek) {
+-		unsigned long max_align, align;
+-		unsigned long sizek;
+-
+-		/* Compute the maximum size I can make a range */
+-		if (range_startk)
+-			max_align = ffs(range_startk) - 1;
+-		else
+-			max_align = 32;
+-		align = fls(range_sizek) - 1;
+-		if (align > max_align)
+-			align = max_align;
+-
+-		sizek = 1 << align;
+-		if (debug_print) {
+-			char start_factor = 'K', size_factor = 'K';
+-			unsigned long start_base, size_base;
+-
+-			start_base = to_size_factor(range_startk, &start_factor),
+-			size_base = to_size_factor(sizek, &size_factor),
+-
+-			printk(KERN_DEBUG "Setting variable MTRR %d, "
+-				"base: %ld%cB, range: %ld%cB, type %s\n",
+-				reg, start_base, start_factor,
+-				size_base, size_factor,
+-				(type == MTRR_TYPE_UNCACHABLE)?"UC":
+-				    ((type == MTRR_TYPE_WRBACK)?"WB":"Other")
+-				);
+-		}
+-		save_var_mtrr(reg++, range_startk, sizek, type);
+-		range_startk += sizek;
+-		range_sizek -= sizek;
+-		if (reg >= num_var_ranges)
+-			break;
+-	}
+-	return reg;
+-}
+-
+-static unsigned __init
+-range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
+-			unsigned long sizek)
+-{
+-	unsigned long hole_basek, hole_sizek;
+-	unsigned long second_basek, second_sizek;
+-	unsigned long range0_basek, range0_sizek;
+-	unsigned long range_basek, range_sizek;
+-	unsigned long chunk_sizek;
+-	unsigned long gran_sizek;
+-
+-	hole_basek = 0;
+-	hole_sizek = 0;
+-	second_basek = 0;
+-	second_sizek = 0;
+-	chunk_sizek = state->chunk_sizek;
+-	gran_sizek = state->gran_sizek;
+-
+-	/* align with gran size, prevent small block used up MTRRs */
+-	range_basek = ALIGN(state->range_startk, gran_sizek);
+-	if ((range_basek > basek) && basek)
+-		return second_sizek;
+-	state->range_sizek -= (range_basek - state->range_startk);
+-	range_sizek = ALIGN(state->range_sizek, gran_sizek);
+-
+-	while (range_sizek > state->range_sizek) {
+-		range_sizek -= gran_sizek;
+-		if (!range_sizek)
+-			return 0;
+-	}
+-	state->range_sizek = range_sizek;
+-
+-	/* try to append some small hole */
+-	range0_basek = state->range_startk;
+-	range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
+-
+-	/* no increase */
+-	if (range0_sizek == state->range_sizek) {
+-		if (debug_print)
+-			printk(KERN_DEBUG "rangeX: %016lx - %016lx\n",
+-				range0_basek<<10,
+-				(range0_basek + state->range_sizek)<<10);
+-		state->reg = range_to_mtrr(state->reg, range0_basek,
+-				state->range_sizek, MTRR_TYPE_WRBACK);
+-		return 0;
+-	}
+-
+-	/* only cut back, when it is not the last */
+-	if (sizek) {
+-		while (range0_basek + range0_sizek > (basek + sizek)) {
+-			if (range0_sizek >= chunk_sizek)
+-				range0_sizek -= chunk_sizek;
+-			else
+-				range0_sizek = 0;
+-
+-			if (!range0_sizek)
+-				break;
+-		}
+-	}
+-
+-second_try:
+-	range_basek = range0_basek + range0_sizek;
+-
+-	/* one hole in the middle */
+-	if (range_basek > basek && range_basek <= (basek + sizek))
+-		second_sizek = range_basek - basek;
+-
+-	if (range0_sizek > state->range_sizek) {
+-
+-		/* one hole in middle or at end */
+-		hole_sizek = range0_sizek - state->range_sizek - second_sizek;
+-
+-		/* hole size should be less than half of range0 size */
+-		if (hole_sizek >= (range0_sizek >> 1) &&
+-		    range0_sizek >= chunk_sizek) {
+-			range0_sizek -= chunk_sizek;
+-			second_sizek = 0;
+-			hole_sizek = 0;
+-
+-			goto second_try;
+-		}
+-	}
+-
+-	if (range0_sizek) {
+-		if (debug_print)
+-			printk(KERN_DEBUG "range0: %016lx - %016lx\n",
+-				range0_basek<<10,
+-				(range0_basek + range0_sizek)<<10);
+-		state->reg = range_to_mtrr(state->reg, range0_basek,
+-				range0_sizek, MTRR_TYPE_WRBACK);
+-	}
+-
+-	if (range0_sizek < state->range_sizek) {
+-		/* need to handle left over */
+-		range_sizek = state->range_sizek - range0_sizek;
+-
+-		if (debug_print)
+-			printk(KERN_DEBUG "range: %016lx - %016lx\n",
+-				 range_basek<<10,
+-				 (range_basek + range_sizek)<<10);
+-		state->reg = range_to_mtrr(state->reg, range_basek,
+-				 range_sizek, MTRR_TYPE_WRBACK);
+-	}
+-
+-	if (hole_sizek) {
+-		hole_basek = range_basek - hole_sizek - second_sizek;
+-		if (debug_print)
+-			printk(KERN_DEBUG "hole: %016lx - %016lx\n",
+-				 hole_basek<<10,
+-				 (hole_basek + hole_sizek)<<10);
+-		state->reg = range_to_mtrr(state->reg, hole_basek,
+-				 hole_sizek, MTRR_TYPE_UNCACHABLE);
+-	}
+-
+-	return second_sizek;
+-}
+-
+-static void __init
+-set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
+-		   unsigned long size_pfn)
+-{
+-	unsigned long basek, sizek;
+-	unsigned long second_sizek = 0;
+-
+-	if (state->reg >= num_var_ranges)
+-		return;
+-
+-	basek = base_pfn << (PAGE_SHIFT - 10);
+-	sizek = size_pfn << (PAGE_SHIFT - 10);
+-
+-	/* See if I can merge with the last range */
+-	if ((basek <= 1024) ||
+-	    (state->range_startk + state->range_sizek == basek)) {
+-		unsigned long endk = basek + sizek;
+-		state->range_sizek = endk - state->range_startk;
+-		return;
+-	}
+-	/* Write the range mtrrs */
+-	if (state->range_sizek != 0)
+-		second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
+-
+-	/* Allocate an msr */
+-	state->range_startk = basek + second_sizek;
+-	state->range_sizek  = sizek - second_sizek;
+-}
+-
+-/* mininum size of mtrr block that can take hole */
+-static u64 mtrr_chunk_size __initdata = (256ULL<<20);
+-
+-static int __init parse_mtrr_chunk_size_opt(char *p)
+-{
+-	if (!p)
+-		return -EINVAL;
+-	mtrr_chunk_size = memparse(p, &p);
+-	return 0;
+-}
+-early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
+-
+-/* granity of mtrr of block */
+-static u64 mtrr_gran_size __initdata;
+-
+-static int __init parse_mtrr_gran_size_opt(char *p)
+-{
+-	if (!p)
+-		return -EINVAL;
+-	mtrr_gran_size = memparse(p, &p);
+-	return 0;
+-}
+-early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
+-
+-static int nr_mtrr_spare_reg __initdata =
+-				 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
+-
+-static int __init parse_mtrr_spare_reg(char *arg)
+-{
+-	if (arg)
+-		nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
+-	return 0;
+-}
+-
+-early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
+-
+-static int __init
+-x86_setup_var_mtrrs(struct res_range *range, int nr_range,
+-		    u64 chunk_size, u64 gran_size)
+-{
+-	struct var_mtrr_state var_state;
+-	int i;
+-	int num_reg;
+-
+-	var_state.range_startk	= 0;
+-	var_state.range_sizek	= 0;
+-	var_state.reg		= 0;
+-	var_state.chunk_sizek	= chunk_size >> 10;
+-	var_state.gran_sizek	= gran_size >> 10;
+-
+-	memset(range_state, 0, sizeof(range_state));
+-
+-	/* Write the range etc */
+-	for (i = 0; i < nr_range; i++)
+-		set_var_mtrr_range(&var_state, range[i].start,
+-				   range[i].end - range[i].start + 1);
+-
+-	/* Write the last range */
+-	if (var_state.range_sizek != 0)
+-		range_to_mtrr_with_hole(&var_state, 0, 0);
+-
+-	num_reg = var_state.reg;
+-	/* Clear out the extra MTRR's */
+-	while (var_state.reg < num_var_ranges) {
+-		save_var_mtrr(var_state.reg, 0, 0, 0);
+-		var_state.reg++;
+-	}
+-
+-	return num_reg;
+-}
+-
+-struct mtrr_cleanup_result {
+-	unsigned long gran_sizek;
+-	unsigned long chunk_sizek;
+-	unsigned long lose_cover_sizek;
+-	unsigned int num_reg;
+-	int bad;
+-};
+-
+-/*
+- * gran_size: 64K, 128K, 256K, 512K, 1M, 2M, ..., 2G
+- * chunk size: gran_size, ..., 2G
+- * so we need (1+16)*8
+- */
+-#define NUM_RESULT	136
+-#define PSHIFT		(PAGE_SHIFT - 10)
+-
+-static struct mtrr_cleanup_result __initdata result[NUM_RESULT];
+-static unsigned long __initdata min_loss_pfn[RANGE_NUM];
+-
+-static void __init print_out_mtrr_range_state(void)
+-{
+-	int i;
+-	char start_factor = 'K', size_factor = 'K';
+-	unsigned long start_base, size_base;
+-	mtrr_type type;
+-
+-	for (i = 0; i < num_var_ranges; i++) {
+-
+-		size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10);
+-		if (!size_base)
+-			continue;
+-
+-		size_base = to_size_factor(size_base, &size_factor),
+-		start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10);
+-		start_base = to_size_factor(start_base, &start_factor),
+-		type = range_state[i].type;
+-
+-		printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n",
+-			i, start_base, start_factor,
+-			size_base, size_factor,
+-			(type == MTRR_TYPE_UNCACHABLE) ? "UC" :
+-			    ((type == MTRR_TYPE_WRPROT) ? "WP" :
+-			     ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other"))
+-			);
+-	}
+-}
+-
+-static int __init mtrr_need_cleanup(void)
+-{
+-	int i;
+-	mtrr_type type;
+-	unsigned long size;
+-	/* extra one for all 0 */
+-	int num[MTRR_NUM_TYPES + 1];
+-
+-	/* check entries number */
+-	memset(num, 0, sizeof(num));
+-	for (i = 0; i < num_var_ranges; i++) {
+-		type = range_state[i].type;
+-		size = range_state[i].size_pfn;
+-		if (type >= MTRR_NUM_TYPES)
+-			continue;
+-		if (!size)
+-			type = MTRR_NUM_TYPES;
+-		if (type == MTRR_TYPE_WRPROT)
+-			type = MTRR_TYPE_UNCACHABLE;
+-		num[type]++;
+-	}
+-
+-	/* check if we got UC entries */
+-	if (!num[MTRR_TYPE_UNCACHABLE])
+-		return 0;
+-
+-	/* check if we only had WB and UC */
+-	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+-		num_var_ranges - num[MTRR_NUM_TYPES])
+-		return 0;
+-
+-	return 1;
+-}
+-
+-static unsigned long __initdata range_sums;
+-static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
+-					 unsigned long extra_remove_base,
+-					 unsigned long extra_remove_size,
+-					 int i)
+-{
+-	int num_reg;
+-	static struct res_range range_new[RANGE_NUM];
+-	static int nr_range_new;
+-	unsigned long range_sums_new;
+-
+-	/* convert ranges to var ranges state */
+-	num_reg = x86_setup_var_mtrrs(range, nr_range,
+-						chunk_size, gran_size);
+-
+-	/* we got new setting in range_state, check it */
+-	memset(range_new, 0, sizeof(range_new));
+-	nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
+-				extra_remove_base, extra_remove_size);
+-	range_sums_new = sum_ranges(range_new, nr_range_new);
+-
+-	result[i].chunk_sizek = chunk_size >> 10;
+-	result[i].gran_sizek = gran_size >> 10;
+-	result[i].num_reg = num_reg;
+-	if (range_sums < range_sums_new) {
+-		result[i].lose_cover_sizek =
+-			(range_sums_new - range_sums) << PSHIFT;
+-		result[i].bad = 1;
+-	} else
+-		result[i].lose_cover_sizek =
+-			(range_sums - range_sums_new) << PSHIFT;
+-
+-	/* double check it */
+-	if (!result[i].bad && !result[i].lose_cover_sizek) {
+-		if (nr_range_new != nr_range ||
+-			memcmp(range, range_new, sizeof(range)))
+-				result[i].bad = 1;
+-	}
+-
+-	if (!result[i].bad && (range_sums - range_sums_new <
+-				min_loss_pfn[num_reg])) {
+-		min_loss_pfn[num_reg] =
+-			range_sums - range_sums_new;
+-	}
+-}
+-
+-static void __init mtrr_print_out_one_result(int i)
+-{
+-	char gran_factor, chunk_factor, lose_factor;
+-	unsigned long gran_base, chunk_base, lose_base;
+-
+-	gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
+-	chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
+-	lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
+-	printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t",
+-			result[i].bad ? "*BAD*" : " ",
+-			gran_base, gran_factor, chunk_base, chunk_factor);
+-	printk(KERN_CONT "num_reg: %d  \tlose cover RAM: %s%ld%c\n",
+-			result[i].num_reg, result[i].bad ? "-" : "",
+-			lose_base, lose_factor);
+-}
+-
+-static int __init mtrr_search_optimal_index(void)
+-{
+-	int i;
+-	int num_reg_good;
+-	int index_good;
+-
+-	if (nr_mtrr_spare_reg >= num_var_ranges)
+-		nr_mtrr_spare_reg = num_var_ranges - 1;
+-	num_reg_good = -1;
+-	for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
+-		if (!min_loss_pfn[i])
+-			num_reg_good = i;
+-	}
+-
+-	index_good = -1;
+-	if (num_reg_good != -1) {
+-		for (i = 0; i < NUM_RESULT; i++) {
+-			if (!result[i].bad &&
+-			    result[i].num_reg == num_reg_good &&
+-			    !result[i].lose_cover_sizek) {
+-				index_good = i;
+-				break;
+-			}
+-		}
+-	}
+-
+-	return index_good;
+-}
+-
+-
+-static int __init mtrr_cleanup(unsigned address_bits)
+-{
+-	unsigned long extra_remove_base, extra_remove_size;
+-	unsigned long base, size, def, dummy;
+-	mtrr_type type;
+-	u64 chunk_size, gran_size;
+-	int index_good;
+-	int i;
+-
+-	if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
+-		return 0;
+-	rdmsr(MTRRdefType_MSR, def, dummy);
+-	def &= 0xff;
+-	if (def != MTRR_TYPE_UNCACHABLE)
+-		return 0;
+-
+-	/* get it and store it aside */
+-	memset(range_state, 0, sizeof(range_state));
+-	for (i = 0; i < num_var_ranges; i++) {
+-		mtrr_if->get(i, &base, &size, &type);
+-		range_state[i].base_pfn = base;
+-		range_state[i].size_pfn = size;
+-		range_state[i].type = type;
+-	}
+-
+-	/* check if we need handle it and can handle it */
+-	if (!mtrr_need_cleanup())
+-		return 0;
+-
+-	/* print original var MTRRs at first, for debugging: */
+-	printk(KERN_DEBUG "original variable MTRRs\n");
+-	print_out_mtrr_range_state();
+-
+-	memset(range, 0, sizeof(range));
+-	extra_remove_size = 0;
+-	extra_remove_base = 1 << (32 - PAGE_SHIFT);
+-	if (mtrr_tom2)
+-		extra_remove_size =
+-			(mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base;
+-	nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base,
+-					  extra_remove_size);
+-	/*
+-	 * [0, 1M) should always be coverred by var mtrr with WB
+-	 * and fixed mtrrs should take effective before var mtrr for it
+-	 */
+-	nr_range = add_range_with_merge(range, nr_range, 0,
+-					(1ULL<<(20 - PAGE_SHIFT)) - 1);
+-	/* sort the ranges */
+-	sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
+-
+-	range_sums = sum_ranges(range, nr_range);
+-	printk(KERN_INFO "total RAM coverred: %ldM\n",
+-	       range_sums >> (20 - PAGE_SHIFT));
+-
+-	if (mtrr_chunk_size && mtrr_gran_size) {
+-		i = 0;
+-		mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
+-				      extra_remove_base, extra_remove_size, i);
+-
+-		mtrr_print_out_one_result(i);
+-
+-		if (!result[i].bad) {
+-			set_var_mtrr_all(address_bits);
+-			return 1;
+-		}
+-		printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, "
+-		       "will find optimal one\n");
+-	}
+-
+-	i = 0;
+-	memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn));
+-	memset(result, 0, sizeof(result));
+-	for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) {
+-
+-		for (chunk_size = gran_size; chunk_size < (1ULL<<32);
+-		     chunk_size <<= 1) {
+-
+-			if (i >= NUM_RESULT)
+-				continue;
+-
+-			mtrr_calc_range_state(chunk_size, gran_size,
+-				      extra_remove_base, extra_remove_size, i);
+-			if (debug_print) {
+-				mtrr_print_out_one_result(i);
+-				printk(KERN_INFO "\n");
+-			}
+-
+-			i++;
+-		}
+-	}
+-
+-	/* try to find the optimal index */
+-	index_good = mtrr_search_optimal_index();
+-
+-	if (index_good != -1) {
+-		printk(KERN_INFO "Found optimal setting for mtrr clean up\n");
+-		i = index_good;
+-		mtrr_print_out_one_result(i);
+-
+-		/* convert ranges to var ranges state */
+-		chunk_size = result[i].chunk_sizek;
+-		chunk_size <<= 10;
+-		gran_size = result[i].gran_sizek;
+-		gran_size <<= 10;
+-		x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
+-		set_var_mtrr_all(address_bits);
+-		printk(KERN_DEBUG "New variable MTRRs\n");
+-		print_out_mtrr_range_state();
+-		return 1;
+-	} else {
+-		/* print out all */
+-		for (i = 0; i < NUM_RESULT; i++)
+-			mtrr_print_out_one_result(i);
+-	}
+-
+-	printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n");
+-	printk(KERN_INFO "please specify mtrr_gran_size/mtrr_chunk_size\n");
+-
+-	return 0;
+-}
+-#else
+-static int __init mtrr_cleanup(unsigned address_bits)
+-{
+-	return 0;
+-}
+-#endif
+-
+-static int __initdata changed_by_mtrr_cleanup;
+-
+-static int disable_mtrr_trim;
+-
+-static int __init disable_mtrr_trim_setup(char *str)
+-{
+-	disable_mtrr_trim = 1;
+-	return 0;
+-}
+-early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
+-
+-/*
+- * Newer AMD K8s and later CPUs have a special magic MSR way to force WB
+- * for memory >4GB. Check for that here.
+- * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
+- * apply to are wrong, but so far we don't know of any such case in the wild.
+- */
+-#define Tom2Enabled (1U << 21)
+-#define Tom2ForceMemTypeWB (1U << 22)
+-
+-int __init amd_special_default_mtrr(void)
+-{
+-	u32 l, h;
+-
+-	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+-		return 0;
+-	if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
+-		return 0;
+-	/* In case some hypervisor doesn't pass SYSCFG through */
+-	if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
+-		return 0;
+-	/*
+-	 * Memory between 4GB and top of mem is forced WB by this magic bit.
+-	 * Reserved before K8RevF, but should be zero there.
+-	 */
+-	if ((l & (Tom2Enabled | Tom2ForceMemTypeWB)) ==
+-		 (Tom2Enabled | Tom2ForceMemTypeWB))
+-		return 1;
+-	return 0;
+-}
+-
+-static u64 __init real_trim_memory(unsigned long start_pfn,
+-				   unsigned long limit_pfn)
+-{
+-	u64 trim_start, trim_size;
+-	trim_start = start_pfn;
+-	trim_start <<= PAGE_SHIFT;
+-	trim_size = limit_pfn;
+-	trim_size <<= PAGE_SHIFT;
+-	trim_size -= trim_start;
+-
+-	return e820_update_range(trim_start, trim_size, E820_RAM,
+-				E820_RESERVED);
+-}
+-/**
+- * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
+- * @end_pfn: ending page frame number
+- *
+- * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
+- * memory configurations.  This routine checks that the highest MTRR matches
+- * the end of memory, to make sure the MTRRs having a write back type cover
+- * all of the memory the kernel is intending to use. If not, it'll trim any
+- * memory off the end by adjusting end_pfn, removing it from the kernel's
+- * allocation pools, warning the user with an obnoxious message.
+- */
+-int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
+-{
+-	unsigned long i, base, size, highest_pfn = 0, def, dummy;
+-	mtrr_type type;
+-	u64 total_trim_size;
+-
+-	/* extra one for all 0 */
+-	int num[MTRR_NUM_TYPES + 1];
+-	/*
+-	 * Make sure we only trim uncachable memory on machines that
+-	 * support the Intel MTRR architecture:
+-	 */
+-	if (!is_cpu(INTEL) || disable_mtrr_trim)
+-		return 0;
+-	rdmsr(MTRRdefType_MSR, def, dummy);
+-	def &= 0xff;
+-	if (def != MTRR_TYPE_UNCACHABLE)
+-		return 0;
+-
+-	/* get it and store it aside */
+-	memset(range_state, 0, sizeof(range_state));
+-	for (i = 0; i < num_var_ranges; i++) {
+-		mtrr_if->get(i, &base, &size, &type);
+-		range_state[i].base_pfn = base;
+-		range_state[i].size_pfn = size;
+-		range_state[i].type = type;
+-	}
+-
+-	/* Find highest cached pfn */
+-	for (i = 0; i < num_var_ranges; i++) {
+-		type = range_state[i].type;
+-		if (type != MTRR_TYPE_WRBACK)
+-			continue;
+-		base = range_state[i].base_pfn;
+-		size = range_state[i].size_pfn;
+-		if (highest_pfn < base + size)
+-			highest_pfn = base + size;
+-	}
+-
+-	/* kvm/qemu doesn't have mtrr set right, don't trim them all */
+-	if (!highest_pfn) {
+-		printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
+-		return 0;
+-	}
+-
+-	/* check entries number */
+-	memset(num, 0, sizeof(num));
+-	for (i = 0; i < num_var_ranges; i++) {
+-		type = range_state[i].type;
+-		if (type >= MTRR_NUM_TYPES)
+-			continue;
+-		size = range_state[i].size_pfn;
+-		if (!size)
+-			type = MTRR_NUM_TYPES;
+-		num[type]++;
+-	}
+-
+-	/* no entry for WB? */
+-	if (!num[MTRR_TYPE_WRBACK])
+-		return 0;
+-
+-	/* check if we only had WB and UC */
+-	if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
+-		num_var_ranges - num[MTRR_NUM_TYPES])
+-		return 0;
+-
+-	memset(range, 0, sizeof(range));
+-	nr_range = 0;
+-	if (mtrr_tom2) {
+-		range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
+-		range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
+-		if (highest_pfn < range[nr_range].end + 1)
+-			highest_pfn = range[nr_range].end + 1;
+-		nr_range++;
+-	}
+-	nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
+-
+-	total_trim_size = 0;
+-	/* check the head */
+-	if (range[0].start)
+-		total_trim_size += real_trim_memory(0, range[0].start);
+-	/* check the holes */
+-	for (i = 0; i < nr_range - 1; i++) {
+-		if (range[i].end + 1 < range[i+1].start)
+-			total_trim_size += real_trim_memory(range[i].end + 1,
+-							    range[i+1].start);
+-	}
+-	/* check the top */
+-	i = nr_range - 1;
+-	if (range[i].end + 1 < end_pfn)
+-		total_trim_size += real_trim_memory(range[i].end + 1,
+-							 end_pfn);
+-
+-	if (total_trim_size) {
+-		printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover"
+-			" all of memory, losing %lluMB of RAM.\n",
+-			total_trim_size >> 20);
+-
+-		if (!changed_by_mtrr_cleanup)
+-			WARN_ON(1);
+-
+-		printk(KERN_INFO "update e820 for mtrr\n");
+-		update_e820();
+-
+-		return 1;
+-	}
+-
+-	return 0;
+-}
++int __initdata changed_by_mtrr_cleanup;
+ 
+ /**
+  * mtrr_bp_init - initialize mtrrs on the boot CPU
+Index: linux-2.6-tip/arch/x86/kernel/cpu/mtrr/mtrr.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/mtrr/mtrr.h
++++ linux-2.6-tip/arch/x86/kernel/cpu/mtrr/mtrr.h
+@@ -79,6 +79,7 @@ extern struct mtrr_ops * mtrr_if;
+ 
+ extern unsigned int num_var_ranges;
+ extern u64 mtrr_tom2;
++extern struct mtrr_state_type mtrr_state;
+ 
+ void mtrr_state_warn(void);
+ const char *mtrr_attrib_to_str(int x);
+@@ -88,3 +89,6 @@ void mtrr_wrmsr(unsigned, unsigned, unsi
+ int amd_init_mtrr(void);
+ int cyrix_init_mtrr(void);
+ int centaur_init_mtrr(void);
++
++extern int changed_by_mtrr_cleanup;
++extern int mtrr_cleanup(unsigned address_bits);
+Index: linux-2.6-tip/arch/x86/kernel/cpu/perf_counter.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/cpu/perf_counter.c
+@@ -0,0 +1,989 @@
++/*
++ * Performance counter x86 architecture code
++ *
++ *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
++ *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
++ *  Copyright(C) 2009 Jaswinder Singh Rajput
++ *
++ *  For licencing details see kernel-base/COPYING
++ */
++
++#include <linux/perf_counter.h>
++#include <linux/capability.h>
++#include <linux/notifier.h>
++#include <linux/hardirq.h>
++#include <linux/kprobes.h>
++#include <linux/module.h>
++#include <linux/kdebug.h>
++#include <linux/sched.h>
++
++#include <asm/apic.h>
++
++static bool perf_counters_initialized __read_mostly;
++
++/*
++ * Number of (generic) HW counters:
++ */
++static int nr_counters_generic __read_mostly;
++static u64 perf_counter_mask __read_mostly;
++static u64 counter_value_mask __read_mostly;
++static int counter_value_bits __read_mostly;
++
++static int nr_counters_fixed __read_mostly;
++
++struct cpu_hw_counters {
++	struct perf_counter	*counters[X86_PMC_IDX_MAX];
++	unsigned long		used[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
++	unsigned long		interrupts;
++	u64			throttle_ctrl;
++	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
++	int			enabled;
++};
++
++/*
++ * struct pmc_x86_ops - performance counter x86 ops
++ */
++struct pmc_x86_ops {
++	u64		(*save_disable_all)(void);
++	void		(*restore_all)(u64);
++	u64		(*get_status)(u64);
++	void		(*ack_status)(u64);
++	void		(*enable)(int, u64);
++	void		(*disable)(int, u64);
++	unsigned	eventsel;
++	unsigned	perfctr;
++	u64		(*event_map)(int);
++	u64		(*raw_event)(u64);
++	int		max_events;
++};
++
++static struct pmc_x86_ops *pmc_ops __read_mostly;
++
++static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = {
++	.enabled = 1,
++};
++
++static __read_mostly int intel_perfmon_version;
++
++/*
++ * Intel PerfMon v3. Used on Core2 and later.
++ */
++static const u64 intel_perfmon_event_map[] =
++{
++  [PERF_COUNT_CPU_CYCLES]		= 0x003c,
++  [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
++  [PERF_COUNT_CACHE_REFERENCES]		= 0x4f2e,
++  [PERF_COUNT_CACHE_MISSES]		= 0x412e,
++  [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
++  [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
++  [PERF_COUNT_BUS_CYCLES]		= 0x013c,
++};
++
++static u64 pmc_intel_event_map(int event)
++{
++	return intel_perfmon_event_map[event];
++}
++
++static u64 pmc_intel_raw_event(u64 event)
++{
++#define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
++#define CORE_EVNTSEL_UNIT_MASK		0x0000FF00ULL
++#define CORE_EVNTSEL_COUNTER_MASK	0xFF000000ULL
++
++#define CORE_EVNTSEL_MASK 		\
++	(CORE_EVNTSEL_EVENT_MASK |	\
++	 CORE_EVNTSEL_UNIT_MASK  |	\
++	 CORE_EVNTSEL_COUNTER_MASK)
++
++	return event & CORE_EVNTSEL_MASK;
++}
++
++/*
++ * AMD Performance Monitor K7 and later.
++ */
++static const u64 amd_perfmon_event_map[] =
++{
++  [PERF_COUNT_CPU_CYCLES]		= 0x0076,
++  [PERF_COUNT_INSTRUCTIONS]		= 0x00c0,
++  [PERF_COUNT_CACHE_REFERENCES]		= 0x0080,
++  [PERF_COUNT_CACHE_MISSES]		= 0x0081,
++  [PERF_COUNT_BRANCH_INSTRUCTIONS]	= 0x00c4,
++  [PERF_COUNT_BRANCH_MISSES]		= 0x00c5,
++};
++
++static u64 pmc_amd_event_map(int event)
++{
++	return amd_perfmon_event_map[event];
++}
++
++static u64 pmc_amd_raw_event(u64 event)
++{
++#define K7_EVNTSEL_EVENT_MASK	0x7000000FFULL
++#define K7_EVNTSEL_UNIT_MASK	0x00000FF00ULL
++#define K7_EVNTSEL_COUNTER_MASK	0x0FF000000ULL
++
++#define K7_EVNTSEL_MASK			\
++	(K7_EVNTSEL_EVENT_MASK |	\
++	 K7_EVNTSEL_UNIT_MASK  |	\
++	 K7_EVNTSEL_COUNTER_MASK)
++
++	return event & K7_EVNTSEL_MASK;
++}
++
++/*
++ * Propagate counter elapsed time into the generic counter.
++ * Can only be executed on the CPU where the counter is active.
++ * Returns the delta events processed.
++ */
++static void
++x86_perf_counter_update(struct perf_counter *counter,
++			struct hw_perf_counter *hwc, int idx)
++{
++	u64 prev_raw_count, new_raw_count, delta;
++
++	/*
++	 * Careful: an NMI might modify the previous counter value.
++	 *
++	 * Our tactic to handle this is to first atomically read and
++	 * exchange a new raw count - then add that new-prev delta
++	 * count to the generic counter atomically:
++	 */
++again:
++	prev_raw_count = atomic64_read(&hwc->prev_count);
++	rdmsrl(hwc->counter_base + idx, new_raw_count);
++
++	if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
++					new_raw_count) != prev_raw_count)
++		goto again;
++
++	/*
++	 * Now we have the new raw value and have updated the prev
++	 * timestamp already. We can now calculate the elapsed delta
++	 * (counter-)time and add that to the generic counter.
++	 *
++	 * Careful, not all hw sign-extends above the physical width
++	 * of the count, so we do that by clipping the delta to 32 bits:
++	 */
++	delta = (u64)(u32)((s32)new_raw_count - (s32)prev_raw_count);
++
++	atomic64_add(delta, &counter->count);
++	atomic64_sub(delta, &hwc->period_left);
++}
++
++/*
++ * Setup the hardware configuration for a given hw_event_type
++ */
++static int __hw_perf_counter_init(struct perf_counter *counter)
++{
++	struct perf_counter_hw_event *hw_event = &counter->hw_event;
++	struct hw_perf_counter *hwc = &counter->hw;
++
++	if (unlikely(!perf_counters_initialized))
++		return -EINVAL;
++
++	/*
++	 * Generate PMC IRQs:
++	 * (keep 'enabled' bit clear for now)
++	 */
++	hwc->config = ARCH_PERFMON_EVENTSEL_INT;
++
++	/*
++	 * Count user and OS events unless requested not to.
++	 */
++	if (!hw_event->exclude_user)
++		hwc->config |= ARCH_PERFMON_EVENTSEL_USR;
++	if (!hw_event->exclude_kernel)
++		hwc->config |= ARCH_PERFMON_EVENTSEL_OS;
++
++	/*
++	 * If privileged enough, allow NMI events:
++	 */
++	hwc->nmi = 0;
++	if (capable(CAP_SYS_ADMIN) && hw_event->nmi)
++		hwc->nmi = 1;
++
++	hwc->irq_period		= hw_event->irq_period;
++	/*
++	 * Intel PMCs cannot be accessed sanely above 32 bit width,
++	 * so we install an artificial 1<<31 period regardless of
++	 * the generic counter period:
++	 */
++	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
++		if ((s64)hwc->irq_period <= 0 || hwc->irq_period > 0x7FFFFFFF)
++			hwc->irq_period = 0x7FFFFFFF;
++
++	atomic64_set(&hwc->period_left, hwc->irq_period);
++
++	/*
++	 * Raw event type provide the config in the event structure
++	 */
++	if (perf_event_raw(hw_event)) {
++		hwc->config |= pmc_ops->raw_event(perf_event_config(hw_event));
++	} else {
++		if (perf_event_id(hw_event) >= pmc_ops->max_events)
++			return -EINVAL;
++		/*
++		 * The generic map:
++		 */
++		hwc->config |= pmc_ops->event_map(perf_event_id(hw_event));
++	}
++	counter->wakeup_pending = 0;
++
++	return 0;
++}
++
++static u64 pmc_intel_save_disable_all(void)
++{
++	u64 ctrl;
++
++	rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
++	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
++
++	return ctrl;
++}
++
++static u64 pmc_amd_save_disable_all(void)
++{
++	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
++	int enabled, idx;
++
++	enabled = cpuc->enabled;
++	cpuc->enabled = 0;
++	/*
++	 * ensure we write the disable before we start disabling the
++	 * counters proper, so that pcm_amd_enable() does the right thing.
++	 */
++	barrier();
++
++	for (idx = 0; idx < nr_counters_generic; idx++) {
++		u64 val;
++
++		rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
++		if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) {
++			val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
++			wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
++		}
++	}
++
++	return enabled;
++}
++
++u64 hw_perf_save_disable(void)
++{
++	if (unlikely(!perf_counters_initialized))
++		return 0;
++
++	return pmc_ops->save_disable_all();
++}
++/*
++ * Exported because of ACPI idle
++ */
++EXPORT_SYMBOL_GPL(hw_perf_save_disable);
++
++static void pmc_intel_restore_all(u64 ctrl)
++{
++	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
++}
++
++static void pmc_amd_restore_all(u64 ctrl)
++{
++	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
++	int idx;
++
++	cpuc->enabled = ctrl;
++	barrier();
++	if (!ctrl)
++		return;
++
++	for (idx = 0; idx < nr_counters_generic; idx++) {
++		if (test_bit(idx, cpuc->active_mask)) {
++			u64 val;
++
++			rdmsrl(MSR_K7_EVNTSEL0 + idx, val);
++			val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
++			wrmsrl(MSR_K7_EVNTSEL0 + idx, val);
++		}
++	}
++}
++
++void hw_perf_restore(u64 ctrl)
++{
++	if (unlikely(!perf_counters_initialized))
++		return;
++
++	pmc_ops->restore_all(ctrl);
++}
++/*
++ * Exported because of ACPI idle
++ */
++EXPORT_SYMBOL_GPL(hw_perf_restore);
++
++static u64 pmc_intel_get_status(u64 mask)
++{
++	u64 status;
++
++	rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
++
++	return status;
++}
++
++static u64 pmc_amd_get_status(u64 mask)
++{
++	u64 status = 0;
++	int idx;
++
++	for (idx = 0; idx < nr_counters_generic; idx++) {
++		s64 val;
++
++		if (!(mask & (1 << idx)))
++			continue;
++
++		rdmsrl(MSR_K7_PERFCTR0 + idx, val);
++		val <<= (64 - counter_value_bits);
++		if (val >= 0)
++			status |= (1 << idx);
++	}
++
++	return status;
++}
++
++static u64 hw_perf_get_status(u64 mask)
++{
++	if (unlikely(!perf_counters_initialized))
++		return 0;
++
++	return pmc_ops->get_status(mask);
++}
++
++static void pmc_intel_ack_status(u64 ack)
++{
++	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
++}
++
++static void pmc_amd_ack_status(u64 ack)
++{
++}
++
++static void hw_perf_ack_status(u64 ack)
++{
++	if (unlikely(!perf_counters_initialized))
++		return;
++
++	pmc_ops->ack_status(ack);
++}
++
++static void pmc_intel_enable(int idx, u64 config)
++{
++	wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx,
++			config | ARCH_PERFMON_EVENTSEL0_ENABLE);
++}
++
++static void pmc_amd_enable(int idx, u64 config)
++{
++	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
++
++	set_bit(idx, cpuc->active_mask);
++	if (cpuc->enabled)
++		config |= ARCH_PERFMON_EVENTSEL0_ENABLE;
++
++	wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
++}
++
++static void hw_perf_enable(int idx, u64 config)
++{
++	if (unlikely(!perf_counters_initialized))
++		return;
++
++	pmc_ops->enable(idx, config);
++}
++
++static void pmc_intel_disable(int idx, u64 config)
++{
++	wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + idx, config);
++}
++
++static void pmc_amd_disable(int idx, u64 config)
++{
++	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
++
++	clear_bit(idx, cpuc->active_mask);
++	wrmsrl(MSR_K7_EVNTSEL0 + idx, config);
++
++}
++
++static void hw_perf_disable(int idx, u64 config)
++{
++	if (unlikely(!perf_counters_initialized))
++		return;
++
++	pmc_ops->disable(idx, config);
++}
++
++static inline void
++__pmc_fixed_disable(struct perf_counter *counter,
++		    struct hw_perf_counter *hwc, unsigned int __idx)
++{
++	int idx = __idx - X86_PMC_IDX_FIXED;
++	u64 ctrl_val, mask;
++	int err;
++
++	mask = 0xfULL << (idx * 4);
++
++	rdmsrl(hwc->config_base, ctrl_val);
++	ctrl_val &= ~mask;
++	err = checking_wrmsrl(hwc->config_base, ctrl_val);
++}
++
++static inline void
++__pmc_generic_disable(struct perf_counter *counter,
++			   struct hw_perf_counter *hwc, unsigned int idx)
++{
++	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
++		__pmc_fixed_disable(counter, hwc, idx);
++	else
++		hw_perf_disable(idx, hwc->config);
++}
++
++static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]);
++
++/*
++ * Set the next IRQ period, based on the hwc->period_left value.
++ * To be called with the counter disabled in hw:
++ */
++static void
++__hw_perf_counter_set_period(struct perf_counter *counter,
++			     struct hw_perf_counter *hwc, int idx)
++{
++	s64 left = atomic64_read(&hwc->period_left);
++	s64 period = hwc->irq_period;
++	int err;
++
++	/*
++	 * If we are way outside a reasoable range then just skip forward:
++	 */
++	if (unlikely(left <= -period)) {
++		left = period;
++		atomic64_set(&hwc->period_left, left);
++	}
++
++	if (unlikely(left <= 0)) {
++		left += period;
++		atomic64_set(&hwc->period_left, left);
++	}
++
++	per_cpu(prev_left[idx], smp_processor_id()) = left;
++
++	/*
++	 * The hw counter starts counting from this counter offset,
++	 * mark it to be able to extra future deltas:
++	 */
++	atomic64_set(&hwc->prev_count, (u64)-left);
++
++	err = checking_wrmsrl(hwc->counter_base + idx,
++			     (u64)(-left) & counter_value_mask);
++}
++
++static inline void
++__pmc_fixed_enable(struct perf_counter *counter,
++		   struct hw_perf_counter *hwc, unsigned int __idx)
++{
++	int idx = __idx - X86_PMC_IDX_FIXED;
++	u64 ctrl_val, bits, mask;
++	int err;
++
++	/*
++	 * Enable IRQ generation (0x8),
++	 * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
++	 * if requested:
++	 */
++	bits = 0x8ULL;
++	if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
++		bits |= 0x2;
++	if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
++		bits |= 0x1;
++	bits <<= (idx * 4);
++	mask = 0xfULL << (idx * 4);
++
++	rdmsrl(hwc->config_base, ctrl_val);
++	ctrl_val &= ~mask;
++	ctrl_val |= bits;
++	err = checking_wrmsrl(hwc->config_base, ctrl_val);
++}
++
++static void
++__pmc_generic_enable(struct perf_counter *counter,
++			  struct hw_perf_counter *hwc, int idx)
++{
++	if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
++		__pmc_fixed_enable(counter, hwc, idx);
++	else
++		hw_perf_enable(idx, hwc->config);
++}
++
++static int
++fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc)
++{
++	unsigned int event;
++
++	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
++		return -1;
++
++	if (unlikely(hwc->nmi))
++		return -1;
++
++	event = hwc->config & ARCH_PERFMON_EVENT_MASK;
++
++	if (unlikely(event == pmc_ops->event_map(PERF_COUNT_INSTRUCTIONS)))
++		return X86_PMC_IDX_FIXED_INSTRUCTIONS;
++	if (unlikely(event == pmc_ops->event_map(PERF_COUNT_CPU_CYCLES)))
++		return X86_PMC_IDX_FIXED_CPU_CYCLES;
++	if (unlikely(event == pmc_ops->event_map(PERF_COUNT_BUS_CYCLES)))
++		return X86_PMC_IDX_FIXED_BUS_CYCLES;
++
++	return -1;
++}
++
++/*
++ * Find a PMC slot for the freshly enabled / scheduled in counter:
++ */
++static int pmc_generic_enable(struct perf_counter *counter)
++{
++	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
++	struct hw_perf_counter *hwc = &counter->hw;
++	int idx;
++
++	idx = fixed_mode_idx(counter, hwc);
++	if (idx >= 0) {
++		/*
++		 * Try to get the fixed counter, if that is already taken
++		 * then try to get a generic counter:
++		 */
++		if (test_and_set_bit(idx, cpuc->used))
++			goto try_generic;
++
++		hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL;
++		/*
++		 * We set it so that counter_base + idx in wrmsr/rdmsr maps to
++		 * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2:
++		 */
++		hwc->counter_base =
++			MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED;
++		hwc->idx = idx;
++	} else {
++		idx = hwc->idx;
++		/* Try to get the previous generic counter again */
++		if (test_and_set_bit(idx, cpuc->used)) {
++try_generic:
++			idx = find_first_zero_bit(cpuc->used, nr_counters_generic);
++			if (idx == nr_counters_generic)
++				return -EAGAIN;
++
++			set_bit(idx, cpuc->used);
++			hwc->idx = idx;
++		}
++		hwc->config_base  = pmc_ops->eventsel;
++		hwc->counter_base = pmc_ops->perfctr;
++	}
++
++	perf_counters_lapic_init(hwc->nmi);
++
++	__pmc_generic_disable(counter, hwc, idx);
++
++	cpuc->counters[idx] = counter;
++	/*
++	 * Make it visible before enabling the hw:
++	 */
++	smp_wmb();
++
++	__hw_perf_counter_set_period(counter, hwc, idx);
++	__pmc_generic_enable(counter, hwc, idx);
++
++	return 0;
++}
++
++void perf_counter_print_debug(void)
++{
++	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
++	struct cpu_hw_counters *cpuc;
++	int cpu, idx;
++
++	if (!nr_counters_generic)
++		return;
++
++	local_irq_disable();
++
++	cpu = smp_processor_id();
++	cpuc = &per_cpu(cpu_hw_counters, cpu);
++
++	if (intel_perfmon_version >= 2) {
++		rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl);
++		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
++		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
++		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
++
++		pr_info("\n");
++		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
++		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
++		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
++		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
++	}
++	pr_info("CPU#%d: used:       %016llx\n", cpu, *(u64 *)cpuc->used);
++
++	for (idx = 0; idx < nr_counters_generic; idx++) {
++		rdmsrl(pmc_ops->eventsel + idx, pmc_ctrl);
++		rdmsrl(pmc_ops->perfctr  + idx, pmc_count);
++
++		prev_left = per_cpu(prev_left[idx], cpu);
++
++		pr_info("CPU#%d:   gen-PMC%d ctrl:  %016llx\n",
++			cpu, idx, pmc_ctrl);
++		pr_info("CPU#%d:   gen-PMC%d count: %016llx\n",
++			cpu, idx, pmc_count);
++		pr_info("CPU#%d:   gen-PMC%d left:  %016llx\n",
++			cpu, idx, prev_left);
++	}
++	for (idx = 0; idx < nr_counters_fixed; idx++) {
++		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count);
++
++		pr_info("CPU#%d: fixed-PMC%d count: %016llx\n",
++			cpu, idx, pmc_count);
++	}
++	local_irq_enable();
++}
++
++static void pmc_generic_disable(struct perf_counter *counter)
++{
++	struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters);
++	struct hw_perf_counter *hwc = &counter->hw;
++	unsigned int idx = hwc->idx;
++
++	__pmc_generic_disable(counter, hwc, idx);
++
++	clear_bit(idx, cpuc->used);
++	cpuc->counters[idx] = NULL;
++	/*
++	 * Make sure the cleared pointer becomes visible before we
++	 * (potentially) free the counter:
++	 */
++	smp_wmb();
++
++	/*
++	 * Drain the remaining delta count out of a counter
++	 * that we are disabling:
++	 */
++	x86_perf_counter_update(counter, hwc, idx);
++}
++
++/*
++ * Save and restart an expired counter. Called by NMI contexts,
++ * so it has to be careful about preempting normal counter ops:
++ */
++static void perf_save_and_restart(struct perf_counter *counter)
++{
++	struct hw_perf_counter *hwc = &counter->hw;
++	int idx = hwc->idx;
++
++	x86_perf_counter_update(counter, hwc, idx);
++	__hw_perf_counter_set_period(counter, hwc, idx);
++
++	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
++		__pmc_generic_enable(counter, hwc, idx);
++}
++
++/*
++ * Maximum interrupt frequency of 100KHz per CPU
++ */
++#define PERFMON_MAX_INTERRUPTS (100000/HZ)
++
++/*
++ * This handler is triggered by the local APIC, so the APIC IRQ handling
++ * rules apply:
++ */
++static int __smp_perf_counter_interrupt(struct pt_regs *regs, int nmi)
++{
++	int bit, cpu = smp_processor_id();
++	u64 ack, status;
++	struct cpu_hw_counters *cpuc = &per_cpu(cpu_hw_counters, cpu);
++	int ret = 0;
++
++	cpuc->throttle_ctrl = hw_perf_save_disable();
++
++	status = hw_perf_get_status(cpuc->throttle_ctrl);
++	if (!status)
++		goto out;
++
++	ret = 1;
++again:
++	inc_irq_stat(apic_perf_irqs);
++	ack = status;
++	for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
++		struct perf_counter *counter = cpuc->counters[bit];
++
++		clear_bit(bit, (unsigned long *) &status);
++		if (!counter)
++			continue;
++
++		perf_save_and_restart(counter);
++		perf_counter_output(counter, nmi, regs);
++	}
++
++	hw_perf_ack_status(ack);
++
++	/*
++	 * Repeat if there is more work to be done:
++	 */
++	status = hw_perf_get_status(cpuc->throttle_ctrl);
++	if (status)
++		goto again;
++out:
++	/*
++	 * Restore - do not reenable when global enable is off or throttled:
++	 */
++	if (++cpuc->interrupts < PERFMON_MAX_INTERRUPTS)
++		hw_perf_restore(cpuc->throttle_ctrl);
++
++	return ret;
++}
++
++void perf_counter_unthrottle(void)
++{
++	struct cpu_hw_counters *cpuc;
++
++	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
++		return;
++
++	if (unlikely(!perf_counters_initialized))
++		return;
++
++	cpuc = &__get_cpu_var(cpu_hw_counters);
++	if (cpuc->interrupts >= PERFMON_MAX_INTERRUPTS) {
++		if (printk_ratelimit())
++			printk(KERN_WARNING "PERFMON: max interrupts exceeded!\n");
++		hw_perf_restore(cpuc->throttle_ctrl);
++	}
++	cpuc->interrupts = 0;
++}
++
++void smp_perf_counter_interrupt(struct pt_regs *regs)
++{
++	irq_enter();
++	apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
++	ack_APIC_irq();
++	__smp_perf_counter_interrupt(regs, 0);
++	irq_exit();
++}
++
++/*
++ * This handler is triggered by NMI contexts:
++ */
++void perf_counter_notify(struct pt_regs *regs)
++{
++	struct cpu_hw_counters *cpuc;
++	unsigned long flags;
++	int bit, cpu;
++
++	local_irq_save(flags);
++	cpu = smp_processor_id();
++	cpuc = &per_cpu(cpu_hw_counters, cpu);
++
++	for_each_bit(bit, cpuc->used, X86_PMC_IDX_MAX) {
++		struct perf_counter *counter = cpuc->counters[bit];
++
++		if (!counter)
++			continue;
++
++		if (counter->wakeup_pending) {
++			counter->wakeup_pending = 0;
++			wake_up(&counter->waitq);
++		}
++	}
++
++	local_irq_restore(flags);
++}
++
++void perf_counters_lapic_init(int nmi)
++{
++	u32 apic_val;
++
++	if (!perf_counters_initialized)
++		return;
++	/*
++	 * Enable the performance counter vector in the APIC LVT:
++	 */
++	apic_val = apic_read(APIC_LVTERR);
++
++	apic_write(APIC_LVTERR, apic_val | APIC_LVT_MASKED);
++	if (nmi)
++		apic_write(APIC_LVTPC, APIC_DM_NMI);
++	else
++		apic_write(APIC_LVTPC, LOCAL_PERF_VECTOR);
++	apic_write(APIC_LVTERR, apic_val);
++}
++
++static int __kprobes
++perf_counter_nmi_handler(struct notifier_block *self,
++			 unsigned long cmd, void *__args)
++{
++	struct die_args *args = __args;
++	struct pt_regs *regs;
++	int ret;
++
++	switch (cmd) {
++	case DIE_NMI:
++	case DIE_NMI_IPI:
++		break;
++
++	default:
++		return NOTIFY_DONE;
++	}
++
++	regs = args->regs;
++
++	apic_write(APIC_LVTPC, APIC_DM_NMI);
++	ret = __smp_perf_counter_interrupt(regs, 1);
++
++	return ret ? NOTIFY_STOP : NOTIFY_OK;
++}
++
++static __read_mostly struct notifier_block perf_counter_nmi_notifier = {
++	.notifier_call		= perf_counter_nmi_handler,
++	.next			= NULL,
++	.priority		= 1
++};
++
++static struct pmc_x86_ops pmc_intel_ops = {
++	.save_disable_all	= pmc_intel_save_disable_all,
++	.restore_all		= pmc_intel_restore_all,
++	.get_status		= pmc_intel_get_status,
++	.ack_status		= pmc_intel_ack_status,
++	.enable			= pmc_intel_enable,
++	.disable		= pmc_intel_disable,
++	.eventsel		= MSR_ARCH_PERFMON_EVENTSEL0,
++	.perfctr		= MSR_ARCH_PERFMON_PERFCTR0,
++	.event_map		= pmc_intel_event_map,
++	.raw_event		= pmc_intel_raw_event,
++	.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
++};
++
++static struct pmc_x86_ops pmc_amd_ops = {
++	.save_disable_all	= pmc_amd_save_disable_all,
++	.restore_all		= pmc_amd_restore_all,
++	.get_status		= pmc_amd_get_status,
++	.ack_status		= pmc_amd_ack_status,
++	.enable			= pmc_amd_enable,
++	.disable		= pmc_amd_disable,
++	.eventsel		= MSR_K7_EVNTSEL0,
++	.perfctr		= MSR_K7_PERFCTR0,
++	.event_map		= pmc_amd_event_map,
++	.raw_event		= pmc_amd_raw_event,
++	.max_events		= ARRAY_SIZE(amd_perfmon_event_map),
++};
++
++static struct pmc_x86_ops *pmc_intel_init(void)
++{
++	union cpuid10_edx edx;
++	union cpuid10_eax eax;
++	unsigned int unused;
++	unsigned int ebx;
++
++	/*
++	 * Check whether the Architectural PerfMon supports
++	 * Branch Misses Retired Event or not.
++	 */
++	cpuid(10, &eax.full, &ebx, &unused, &edx.full);
++	if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED)
++		return NULL;
++
++	intel_perfmon_version = eax.split.version_id;
++	if (intel_perfmon_version < 2)
++		return NULL;
++
++	pr_info("Intel Performance Monitoring support detected.\n");
++	pr_info("... version:         %d\n", intel_perfmon_version);
++	pr_info("... bit width:       %d\n", eax.split.bit_width);
++	pr_info("... mask length:     %d\n", eax.split.mask_length);
++
++	nr_counters_generic = eax.split.num_counters;
++	nr_counters_fixed = edx.split.num_counters_fixed;
++	counter_value_mask = (1ULL << eax.split.bit_width) - 1;
++
++	return &pmc_intel_ops;
++}
++
++static struct pmc_x86_ops *pmc_amd_init(void)
++{
++	nr_counters_generic = 4;
++	nr_counters_fixed = 0;
++	counter_value_mask = 0x0000FFFFFFFFFFFFULL;
++	counter_value_bits = 48;
++
++	pr_info("AMD Performance Monitoring support detected.\n");
++
++	return &pmc_amd_ops;
++}
++
++void __init init_hw_perf_counters(void)
++{
++	if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
++		return;
++
++	switch (boot_cpu_data.x86_vendor) {
++	case X86_VENDOR_INTEL:
++		pmc_ops = pmc_intel_init();
++		break;
++	case X86_VENDOR_AMD:
++		pmc_ops = pmc_amd_init();
++		break;
++	}
++	if (!pmc_ops)
++		return;
++
++	pr_info("... num counters:    %d\n", nr_counters_generic);
++	if (nr_counters_generic > X86_PMC_MAX_GENERIC) {
++		nr_counters_generic = X86_PMC_MAX_GENERIC;
++		WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!",
++			nr_counters_generic, X86_PMC_MAX_GENERIC);
++	}
++	perf_counter_mask = (1 << nr_counters_generic) - 1;
++	perf_max_counters = nr_counters_generic;
++
++	pr_info("... value mask:      %016Lx\n", counter_value_mask);
++
++	if (nr_counters_fixed > X86_PMC_MAX_FIXED) {
++		nr_counters_fixed = X86_PMC_MAX_FIXED;
++		WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!",
++			nr_counters_fixed, X86_PMC_MAX_FIXED);
++	}
++	pr_info("... fixed counters:  %d\n", nr_counters_fixed);
++
++	perf_counter_mask |= ((1LL << nr_counters_fixed)-1) << X86_PMC_IDX_FIXED;
++
++	pr_info("... counter mask:    %016Lx\n", perf_counter_mask);
++	perf_counters_initialized = true;
++
++	perf_counters_lapic_init(0);
++	register_die_notifier(&perf_counter_nmi_notifier);
++}
++
++static void pmc_generic_read(struct perf_counter *counter)
++{
++	x86_perf_counter_update(counter, &counter->hw, counter->hw.idx);
++}
++
++static const struct hw_perf_counter_ops x86_perf_counter_ops = {
++	.enable		= pmc_generic_enable,
++	.disable	= pmc_generic_disable,
++	.read		= pmc_generic_read,
++};
++
++const struct hw_perf_counter_ops *
++hw_perf_counter_init(struct perf_counter *counter)
++{
++	int err;
++
++	err = __hw_perf_counter_init(counter);
++	if (err)
++		return NULL;
++
++	return &x86_perf_counter_ops;
++}
+Index: linux-2.6-tip/arch/x86/kernel/cpu/perfctr-watchdog.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/perfctr-watchdog.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/perfctr-watchdog.c
+@@ -20,7 +20,7 @@
+ #include <linux/kprobes.h>
+ 
+ #include <asm/apic.h>
+-#include <asm/intel_arch_perfmon.h>
++#include <asm/perf_counter.h>
+ 
+ struct nmi_watchdog_ctlblk {
+ 	unsigned int cccr_msr;
+Index: linux-2.6-tip/arch/x86/kernel/cpu/proc.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/proc.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/proc.c
+@@ -7,15 +7,14 @@
+ /*
+  *	Get CPU information for use by the procfs.
+  */
+-#ifdef CONFIG_X86_32
+ static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
+ 			      unsigned int cpu)
+ {
+-#ifdef CONFIG_X86_HT
++#ifdef CONFIG_SMP
+ 	if (c->x86_max_cores * smp_num_siblings > 1) {
+ 		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
+ 		seq_printf(m, "siblings\t: %d\n",
+-			   cpus_weight(per_cpu(cpu_core_map, cpu)));
++			   cpumask_weight(cpu_sibling_mask(cpu)));
+ 		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
+ 		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
+ 		seq_printf(m, "apicid\t\t: %d\n", c->apicid);
+@@ -24,6 +23,7 @@ static void show_cpuinfo_core(struct seq
+ #endif
+ }
+ 
++#ifdef CONFIG_X86_32
+ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
+ {
+ 	/*
+@@ -50,22 +50,6 @@ static void show_cpuinfo_misc(struct seq
+ 		   c->wp_works_ok ? "yes" : "no");
+ }
+ #else
+-static void show_cpuinfo_core(struct seq_file *m, struct cpuinfo_x86 *c,
+-			      unsigned int cpu)
+-{
+-#ifdef CONFIG_SMP
+-	if (c->x86_max_cores * smp_num_siblings > 1) {
+-		seq_printf(m, "physical id\t: %d\n", c->phys_proc_id);
+-		seq_printf(m, "siblings\t: %d\n",
+-			   cpus_weight(per_cpu(cpu_core_map, cpu)));
+-		seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id);
+-		seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
+-		seq_printf(m, "apicid\t\t: %d\n", c->apicid);
+-		seq_printf(m, "initial apicid\t: %d\n", c->initial_apicid);
+-	}
+-#endif
+-}
+-
+ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
+ {
+ 	seq_printf(m,
+@@ -159,9 +143,9 @@ static int show_cpuinfo(struct seq_file 
+ static void *c_start(struct seq_file *m, loff_t *pos)
+ {
+ 	if (*pos == 0)	/* just in case, cpu 0 is not the first */
+-		*pos = first_cpu(cpu_online_map);
++		*pos = cpumask_first(cpu_online_mask);
+ 	else
+-		*pos = next_cpu_nr(*pos - 1, cpu_online_map);
++		*pos = cpumask_next(*pos - 1, cpu_online_mask);
+ 	if ((*pos) < nr_cpu_ids)
+ 		return &cpu_data(*pos);
+ 	return NULL;
+Index: linux-2.6-tip/arch/x86/kernel/cpu/transmeta.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/transmeta.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/transmeta.c
+@@ -98,7 +98,7 @@ static void __cpuinit init_transmeta(str
+ #endif
+ }
+ 
+-static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
++static const struct cpu_dev __cpuinitconst transmeta_cpu_dev = {
+ 	.c_vendor	= "Transmeta",
+ 	.c_ident	= { "GenuineTMx86", "TransmetaCPU" },
+ 	.c_early_init	= early_init_transmeta,
+Index: linux-2.6-tip/arch/x86/kernel/cpu/umc.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/cpu/umc.c
++++ linux-2.6-tip/arch/x86/kernel/cpu/umc.c
+@@ -8,7 +8,7 @@
+  * so no special init takes place.
+  */
+ 
+-static struct cpu_dev umc_cpu_dev __cpuinitdata = {
++static const struct cpu_dev __cpuinitconst umc_cpu_dev = {
+ 	.c_vendor	= "UMC",
+ 	.c_ident	= { "UMC UMC UMC" },
+ 	.c_models = {
+Index: linux-2.6-tip/arch/x86/kernel/crash.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/crash.c
++++ linux-2.6-tip/arch/x86/kernel/crash.c
+@@ -24,12 +24,10 @@
+ #include <asm/apic.h>
+ #include <asm/hpet.h>
+ #include <linux/kdebug.h>
+-#include <asm/smp.h>
++#include <asm/cpu.h>
+ #include <asm/reboot.h>
+ #include <asm/virtext.h>
+ 
+-#include <mach_ipi.h>
+-
+ 
+ #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
+ 
+Index: linux-2.6-tip/arch/x86/kernel/dumpstack.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/dumpstack.c
++++ linux-2.6-tip/arch/x86/kernel/dumpstack.c
+@@ -10,10 +10,12 @@
+ #include <linux/kdebug.h>
+ #include <linux/module.h>
+ #include <linux/ptrace.h>
++#include <linux/ftrace.h>
+ #include <linux/kexec.h>
+ #include <linux/bug.h>
+ #include <linux/nmi.h>
+ #include <linux/sysfs.h>
++#include <linux/ftrace.h>
+ 
+ #include <asm/stacktrace.h>
+ 
+@@ -99,7 +101,7 @@ print_context_stack(struct thread_info *
+ 				frame = frame->next_frame;
+ 				bp = (unsigned long) frame;
+ 			} else {
+-				ops->address(data, addr, bp == 0);
++				ops->address(data, addr, 0);
+ 			}
+ 			print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
+ 		}
+@@ -186,7 +188,7 @@ void dump_stack(void)
+ }
+ EXPORT_SYMBOL(dump_stack);
+ 
+-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
++static raw_spinlock_t die_lock = RAW_SPIN_LOCK_UNLOCKED(die_lock);
+ static int die_owner = -1;
+ static unsigned int die_nest_count;
+ 
+@@ -195,16 +197,21 @@ unsigned __kprobes long oops_begin(void)
+ 	int cpu;
+ 	unsigned long flags;
+ 
++	/* notify the hw-branch tracer so it may disable tracing and
++	   add the last trace to the trace buffer -
++	   the earlier this happens, the more useful the trace. */
++	trace_hw_branch_oops();
++
+ 	oops_enter();
+ 
+ 	/* racy, but better than risking deadlock. */
+ 	raw_local_irq_save(flags);
+ 	cpu = smp_processor_id();
+-	if (!__raw_spin_trylock(&die_lock)) {
++	if (!spin_trylock(&die_lock)) {
+ 		if (cpu == die_owner)
+ 			/* nested oops. should stop eventually */;
+ 		else
+-			__raw_spin_lock(&die_lock);
++			spin_lock(&die_lock);
+ 	}
+ 	die_nest_count++;
+ 	die_owner = cpu;
+@@ -224,7 +231,7 @@ void __kprobes oops_end(unsigned long fl
+ 	die_nest_count--;
+ 	if (!die_nest_count)
+ 		/* Nest count reaches zero, release the lock. */
+-		__raw_spin_unlock(&die_lock);
++		spin_unlock(&die_lock);
+ 	raw_local_irq_restore(flags);
+ 	oops_exit();
+ 
+Index: linux-2.6-tip/arch/x86/kernel/dumpstack_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/dumpstack_64.c
++++ linux-2.6-tip/arch/x86/kernel/dumpstack_64.c
+@@ -23,10 +23,14 @@ static unsigned long *in_exception_stack
+ 					unsigned *usedp, char **idp)
+ {
+ 	static char ids[][8] = {
++#if DEBUG_STACK > 0
+ 		[DEBUG_STACK - 1] = "#DB",
++#endif
+ 		[NMI_STACK - 1] = "NMI",
+ 		[DOUBLEFAULT_STACK - 1] = "#DF",
++#if STACKFAULT_STACK > 0
+ 		[STACKFAULT_STACK - 1] = "#SS",
++#endif
+ 		[MCE_STACK - 1] = "#MC",
+ #if DEBUG_STKSZ > EXCEPTION_STKSZ
+ 		[N_EXCEPTION_STACKS ...
+@@ -106,7 +110,8 @@ void dump_trace(struct task_struct *task
+ 		const struct stacktrace_ops *ops, void *data)
+ {
+ 	const unsigned cpu = get_cpu();
+-	unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
++	unsigned long *irq_stack_end =
++		(unsigned long *)per_cpu(irq_stack_ptr, cpu);
+ 	unsigned used = 0;
+ 	struct thread_info *tinfo;
+ 	int graph = 0;
+@@ -160,23 +165,23 @@ void dump_trace(struct task_struct *task
+ 			stack = (unsigned long *) estack_end[-2];
+ 			continue;
+ 		}
+-		if (irqstack_end) {
+-			unsigned long *irqstack;
+-			irqstack = irqstack_end -
+-				(IRQSTACKSIZE - 64) / sizeof(*irqstack);
++		if (irq_stack_end) {
++			unsigned long *irq_stack;
++			irq_stack = irq_stack_end -
++				(IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
+ 
+-			if (stack >= irqstack && stack < irqstack_end) {
++			if (stack >= irq_stack && stack < irq_stack_end) {
+ 				if (ops->stack(data, "IRQ") < 0)
+ 					break;
+ 				bp = print_context_stack(tinfo, stack, bp,
+-					ops, data, irqstack_end, &graph);
++					ops, data, irq_stack_end, &graph);
+ 				/*
+ 				 * We link to the next stack (which would be
+ 				 * the process stack normally) the last
+ 				 * pointer (index -1 to end) in the IRQ stack:
+ 				 */
+-				stack = (unsigned long *) (irqstack_end[-1]);
+-				irqstack_end = NULL;
++				stack = (unsigned long *) (irq_stack_end[-1]);
++				irq_stack_end = NULL;
+ 				ops->stack(data, "EOI");
+ 				continue;
+ 			}
+@@ -199,10 +204,10 @@ show_stack_log_lvl(struct task_struct *t
+ 	unsigned long *stack;
+ 	int i;
+ 	const int cpu = smp_processor_id();
+-	unsigned long *irqstack_end =
+-		(unsigned long *) (cpu_pda(cpu)->irqstackptr);
+-	unsigned long *irqstack =
+-		(unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
++	unsigned long *irq_stack_end =
++		(unsigned long *)(per_cpu(irq_stack_ptr, cpu));
++	unsigned long *irq_stack =
++		(unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
+ 
+ 	/*
+ 	 * debugging aid: "show_stack(NULL, NULL);" prints the
+@@ -218,9 +223,9 @@ show_stack_log_lvl(struct task_struct *t
+ 
+ 	stack = sp;
+ 	for (i = 0; i < kstack_depth_to_print; i++) {
+-		if (stack >= irqstack && stack <= irqstack_end) {
+-			if (stack == irqstack_end) {
+-				stack = (unsigned long *) (irqstack_end[-1]);
++		if (stack >= irq_stack && stack <= irq_stack_end) {
++			if (stack == irq_stack_end) {
++				stack = (unsigned long *) (irq_stack_end[-1]);
+ 				printk(" <EOI> ");
+ 			}
+ 		} else {
+@@ -241,7 +246,7 @@ void show_registers(struct pt_regs *regs
+ 	int i;
+ 	unsigned long sp;
+ 	const int cpu = smp_processor_id();
+-	struct task_struct *cur = cpu_pda(cpu)->pcurrent;
++	struct task_struct *cur = current;
+ 
+ 	sp = regs->sp;
+ 	printk("CPU %d ", cpu);
+Index: linux-2.6-tip/arch/x86/kernel/e820.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/e820.c
++++ linux-2.6-tip/arch/x86/kernel/e820.c
+@@ -110,19 +110,50 @@ int __init e820_all_mapped(u64 start, u6
+ /*
+  * Add a memory region to the kernel e820 map.
+  */
+-void __init e820_add_region(u64 start, u64 size, int type)
++static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
++					 int type)
+ {
+-	int x = e820.nr_map;
++	int x = e820x->nr_map;
+ 
+-	if (x == ARRAY_SIZE(e820.map)) {
++	if (x == ARRAY_SIZE(e820x->map)) {
+ 		printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
+ 		return;
+ 	}
+ 
+-	e820.map[x].addr = start;
+-	e820.map[x].size = size;
+-	e820.map[x].type = type;
+-	e820.nr_map++;
++	e820x->map[x].addr = start;
++	e820x->map[x].size = size;
++	e820x->map[x].type = type;
++	e820x->nr_map++;
++}
++
++void __init e820_add_region(u64 start, u64 size, int type)
++{
++	__e820_add_region(&e820, start, size, type);
++}
++
++static void __init e820_print_type(u32 type)
++{
++	switch (type) {
++	case E820_RAM:
++	case E820_RESERVED_KERN:
++		printk(KERN_CONT "(usable)");
++		break;
++	case E820_RESERVED:
++		printk(KERN_CONT "(reserved)");
++		break;
++	case E820_ACPI:
++		printk(KERN_CONT "(ACPI data)");
++		break;
++	case E820_NVS:
++		printk(KERN_CONT "(ACPI NVS)");
++		break;
++	case E820_UNUSABLE:
++		printk(KERN_CONT "(unusable)");
++		break;
++	default:
++		printk(KERN_CONT "type %u", type);
++		break;
++	}
+ }
+ 
+ void __init e820_print_map(char *who)
+@@ -134,27 +165,8 @@ void __init e820_print_map(char *who)
+ 		       (unsigned long long) e820.map[i].addr,
+ 		       (unsigned long long)
+ 		       (e820.map[i].addr + e820.map[i].size));
+-		switch (e820.map[i].type) {
+-		case E820_RAM:
+-		case E820_RESERVED_KERN:
+-			printk(KERN_CONT "(usable)\n");
+-			break;
+-		case E820_RESERVED:
+-			printk(KERN_CONT "(reserved)\n");
+-			break;
+-		case E820_ACPI:
+-			printk(KERN_CONT "(ACPI data)\n");
+-			break;
+-		case E820_NVS:
+-			printk(KERN_CONT "(ACPI NVS)\n");
+-			break;
+-		case E820_UNUSABLE:
+-			printk("(unusable)\n");
+-			break;
+-		default:
+-			printk(KERN_CONT "type %u\n", e820.map[i].type);
+-			break;
+-		}
++		e820_print_type(e820.map[i].type);
++		printk(KERN_CONT "\n");
+ 	}
+ }
+ 
+@@ -221,7 +233,7 @@ void __init e820_print_map(char *who)
+  */
+ 
+ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
+-				int *pnr_map)
++			     u32 *pnr_map)
+ {
+ 	struct change_member {
+ 		struct e820entry *pbios; /* pointer to original bios entry */
+@@ -417,11 +429,12 @@ static int __init append_e820_map(struct
+ 	return __append_e820_map(biosmap, nr_map);
+ }
+ 
+-static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
++static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
+ 					u64 size, unsigned old_type,
+ 					unsigned new_type)
+ {
+-	int i;
++	u64 end;
++	unsigned int i;
+ 	u64 real_updated_size = 0;
+ 
+ 	BUG_ON(old_type == new_type);
+@@ -429,27 +442,55 @@ static u64 __init e820_update_range_map(
+ 	if (size > (ULLONG_MAX - start))
+ 		size = ULLONG_MAX - start;
+ 
+-	for (i = 0; i < e820.nr_map; i++) {
++	end = start + size;
++	printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
++		       (unsigned long long) start,
++		       (unsigned long long) end);
++	e820_print_type(old_type);
++	printk(KERN_CONT " ==> ");
++	e820_print_type(new_type);
++	printk(KERN_CONT "\n");
++
++	for (i = 0; i < e820x->nr_map; i++) {
+ 		struct e820entry *ei = &e820x->map[i];
+ 		u64 final_start, final_end;
++		u64 ei_end;
++
+ 		if (ei->type != old_type)
+ 			continue;
+-		/* totally covered? */
+-		if (ei->addr >= start &&
+-		    (ei->addr + ei->size) <= (start + size)) {
++
++		ei_end = ei->addr + ei->size;
++		/* totally covered by new range? */
++		if (ei->addr >= start && ei_end <= end) {
+ 			ei->type = new_type;
+ 			real_updated_size += ei->size;
+ 			continue;
+ 		}
++
++		/* new range is totally covered? */
++		if (ei->addr < start && ei_end > end) {
++			__e820_add_region(e820x, start, size, new_type);
++			__e820_add_region(e820x, end, ei_end - end, ei->type);
++			ei->size = start - ei->addr;
++			real_updated_size += size;
++			continue;
++		}
++
+ 		/* partially covered */
+ 		final_start = max(start, ei->addr);
+-		final_end = min(start + size, ei->addr + ei->size);
++		final_end = min(end, ei_end);
+ 		if (final_start >= final_end)
+ 			continue;
+-		e820_add_region(final_start, final_end - final_start,
+-					 new_type);
++
++		__e820_add_region(e820x, final_start, final_end - final_start,
++				  new_type);
++
+ 		real_updated_size += final_end - final_start;
+ 
++		/*
++		 * left range could be head or tail, so need to update
++		 * size at first.
++		 */
+ 		ei->size -= final_end - final_start;
+ 		if (ei->addr < final_start)
+ 			continue;
+@@ -461,13 +502,13 @@ static u64 __init e820_update_range_map(
+ u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
+ 			     unsigned new_type)
+ {
+-	return e820_update_range_map(&e820, start, size, old_type, new_type);
++	return __e820_update_range(&e820, start, size, old_type, new_type);
+ }
+ 
+ static u64 __init e820_update_range_saved(u64 start, u64 size,
+ 					  unsigned old_type, unsigned new_type)
+ {
+-	return e820_update_range_map(&e820_saved, start, size, old_type,
++	return __e820_update_range(&e820_saved, start, size, old_type,
+ 				     new_type);
+ }
+ 
+@@ -511,7 +552,7 @@ u64 __init e820_remove_range(u64 start, 
+ 
+ void __init update_e820(void)
+ {
+-	int nr_map;
++	u32 nr_map;
+ 
+ 	nr_map = e820.nr_map;
+ 	if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
+@@ -522,7 +563,7 @@ void __init update_e820(void)
+ }
+ static void __init update_e820_saved(void)
+ {
+-	int nr_map;
++	u32 nr_map;
+ 
+ 	nr_map = e820_saved.nr_map;
+ 	if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
+@@ -858,6 +899,9 @@ void __init reserve_early_overlap_ok(u64
+  */
+ void __init reserve_early(u64 start, u64 end, char *name)
+ {
++	if (start >= end)
++		return;
++
+ 	drop_overlaps_that_are_ok(start, end);
+ 	__reserve_early(start, end, name, 0);
+ }
+@@ -1017,8 +1061,8 @@ u64 __init find_e820_area_size(u64 start
+ 			continue;
+ 		return addr;
+ 	}
+-	return -1UL;
+ 
++	return -1ULL;
+ }
+ 
+ /*
+@@ -1031,13 +1075,22 @@ u64 __init early_reserve_e820(u64 startt
+ 	u64 start;
+ 
+ 	start = startt;
+-	while (size < sizet)
++	while (size < sizet && (start + 1))
+ 		start = find_e820_area_size(start, &size, align);
+ 
+ 	if (size < sizet)
+ 		return 0;
+ 
++#ifdef CONFIG_X86_32
++	if (start >= MAXMEM)
++		return 0;
++	if (start + size > MAXMEM)
++		size = MAXMEM - start;
++#endif
++
+ 	addr = round_down(start + size - sizet, align);
++	if (addr < start)
++		return 0;
+ 	e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
+ 	e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
+ 	printk(KERN_INFO "update e820 for early_reserve_e820\n");
+@@ -1250,7 +1303,7 @@ early_param("memmap", parse_memmap_opt);
+ void __init finish_e820_parsing(void)
+ {
+ 	if (userdef) {
+-		int nr = e820.nr_map;
++		u32 nr = e820.nr_map;
+ 
+ 		if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
+ 			early_panic("Invalid user supplied memory map");
+@@ -1333,7 +1386,7 @@ void __init e820_reserve_resources_late(
+ char *__init default_machine_specific_memory_setup(void)
+ {
+ 	char *who = "BIOS-e820";
+-	int new_nr;
++	u32 new_nr;
+ 	/*
+ 	 * Try to copy the BIOS-supplied E820-map.
+ 	 *
+Index: linux-2.6-tip/arch/x86/kernel/early-quirks.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/early-quirks.c
++++ linux-2.6-tip/arch/x86/kernel/early-quirks.c
+@@ -97,6 +97,7 @@ static void __init nvidia_bugs(int num, 
+ }
+ 
+ #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
++#if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC)
+ static u32 __init ati_ixp4x0_rev(int num, int slot, int func)
+ {
+ 	u32 d;
+@@ -114,6 +115,7 @@ static u32 __init ati_ixp4x0_rev(int num
+ 	d &= 0xff;
+ 	return d;
+ }
++#endif
+ 
+ static void __init ati_bugs(int num, int slot, int func)
+ {
+Index: linux-2.6-tip/arch/x86/kernel/early_printk.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/early_printk.c
++++ linux-2.6-tip/arch/x86/kernel/early_printk.c
+@@ -13,8 +13,8 @@
+ #include <asm/setup.h>
+ #include <xen/hvc-console.h>
+ #include <asm/pci-direct.h>
+-#include <asm/pgtable.h>
+ #include <asm/fixmap.h>
++#include <asm/pgtable.h>
+ #include <linux/usb/ehci_def.h>
+ 
+ /* Simple VGA output */
+@@ -59,7 +59,7 @@ static void early_vga_write(struct conso
+ static struct console early_vga_console = {
+ 	.name =		"earlyvga",
+ 	.write =	early_vga_write,
+-	.flags =	CON_PRINTBUFFER,
++	.flags =	CON_PRINTBUFFER | CON_ATOMIC,
+ 	.index =	-1,
+ };
+ 
+@@ -156,7 +156,7 @@ static __init void early_serial_init(cha
+ static struct console early_serial_console = {
+ 	.name =		"earlyser",
+ 	.write =	early_serial_write,
+-	.flags =	CON_PRINTBUFFER,
++	.flags =	CON_PRINTBUFFER | CON_ATOMIC,
+ 	.index =	-1,
+ };
+ 
+@@ -250,7 +250,7 @@ static int dbgp_wait_until_complete(void
+ 	return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
+ }
+ 
+-static void dbgp_mdelay(int ms)
++static void __init dbgp_mdelay(int ms)
+ {
+ 	int i;
+ 
+@@ -311,7 +311,7 @@ static void dbgp_set_data(const void *bu
+ 	writel(hi, &ehci_debug->data47);
+ }
+ 
+-static void dbgp_get_data(void *buf, int size)
++static void __init dbgp_get_data(void *buf, int size)
+ {
+ 	unsigned char *bytes = buf;
+ 	u32 lo, hi;
+@@ -355,7 +355,7 @@ static int dbgp_bulk_write(unsigned devn
+ 	return ret;
+ }
+ 
+-static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
++static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
+ 				 int size)
+ {
+ 	u32 pids, addr, ctrl;
+@@ -386,8 +386,8 @@ static int dbgp_bulk_read(unsigned devnu
+ 	return ret;
+ }
+ 
+-static int dbgp_control_msg(unsigned devnum, int requesttype, int request,
+-	int value, int index, void *data, int size)
++static int __init dbgp_control_msg(unsigned devnum, int requesttype,
++	int request, int value, int index, void *data, int size)
+ {
+ 	u32 pids, addr, ctrl;
+ 	struct usb_ctrlrequest req;
+@@ -489,7 +489,7 @@ static u32 __init find_dbgp(int ehci_num
+ 	return 0;
+ }
+ 
+-static int ehci_reset_port(int port)
++static int __init ehci_reset_port(int port)
+ {
+ 	u32 portsc;
+ 	u32 delay_time, delay;
+@@ -532,7 +532,7 @@ static int ehci_reset_port(int port)
+ 	return -EBUSY;
+ }
+ 
+-static int ehci_wait_for_port(int port)
++static int __init ehci_wait_for_port(int port)
+ {
+ 	u32 status;
+ 	int ret, reps;
+@@ -557,13 +557,13 @@ static inline void dbgp_printk(const cha
+ 
+ typedef void (*set_debug_port_t)(int port);
+ 
+-static void default_set_debug_port(int port)
++static void __init default_set_debug_port(int port)
+ {
+ }
+ 
+-static set_debug_port_t set_debug_port = default_set_debug_port;
++static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
+ 
+-static void nvidia_set_debug_port(int port)
++static void __init nvidia_set_debug_port(int port)
+ {
+ 	u32 dword;
+ 	dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
+@@ -881,7 +881,7 @@ static int __initdata early_console_init
+ 
+ asmlinkage void early_printk(const char *fmt, ...)
+ {
+-	char buf[512];
++	static char buf[512];
+ 	int n;
+ 	va_list ap;
+ 
+Index: linux-2.6-tip/arch/x86/kernel/efi.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/efi.c
++++ linux-2.6-tip/arch/x86/kernel/efi.c
+@@ -366,10 +366,12 @@ void __init efi_init(void)
+ 					SMBIOS_TABLE_GUID)) {
+ 			efi.smbios = config_tables[i].table;
+ 			printk(" SMBIOS=0x%lx ", config_tables[i].table);
++#ifdef CONFIG_X86_UV
+ 		} else if (!efi_guidcmp(config_tables[i].guid,
+ 					UV_SYSTEM_TABLE_GUID)) {
+ 			efi.uv_systab = config_tables[i].table;
+ 			printk(" UVsystab=0x%lx ", config_tables[i].table);
++#endif
+ 		} else if (!efi_guidcmp(config_tables[i].guid,
+ 					HCDP_TABLE_GUID)) {
+ 			efi.hcdp = config_tables[i].table;
+Index: linux-2.6-tip/arch/x86/kernel/efi_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/efi_64.c
++++ linux-2.6-tip/arch/x86/kernel/efi_64.c
+@@ -36,6 +36,7 @@
+ #include <asm/proto.h>
+ #include <asm/efi.h>
+ #include <asm/cacheflush.h>
++#include <asm/fixmap.h>
+ 
+ static pgd_t save_pgd __initdata;
+ static unsigned long efi_flags __initdata;
+Index: linux-2.6-tip/arch/x86/kernel/efi_stub_32.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/efi_stub_32.S
++++ linux-2.6-tip/arch/x86/kernel/efi_stub_32.S
+@@ -6,7 +6,7 @@
+  */
+ 
+ #include <linux/linkage.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ 
+ /*
+  * efi_call_phys(void *, ...) is a function with variable parameters.
+@@ -113,6 +113,7 @@ ENTRY(efi_call_phys)
+ 	movl	(%edx), %ecx
+ 	pushl	%ecx
+ 	ret
++ENDPROC(efi_call_phys)
+ .previous
+ 
+ .data
+Index: linux-2.6-tip/arch/x86/kernel/efi_stub_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/efi_stub_64.S
++++ linux-2.6-tip/arch/x86/kernel/efi_stub_64.S
+@@ -41,6 +41,7 @@ ENTRY(efi_call0)
+ 	addq $32, %rsp
+ 	RESTORE_XMM
+ 	ret
++ENDPROC(efi_call0)
+ 
+ ENTRY(efi_call1)
+ 	SAVE_XMM
+@@ -50,6 +51,7 @@ ENTRY(efi_call1)
+ 	addq $32, %rsp
+ 	RESTORE_XMM
+ 	ret
++ENDPROC(efi_call1)
+ 
+ ENTRY(efi_call2)
+ 	SAVE_XMM
+@@ -59,6 +61,7 @@ ENTRY(efi_call2)
+ 	addq $32, %rsp
+ 	RESTORE_XMM
+ 	ret
++ENDPROC(efi_call2)
+ 
+ ENTRY(efi_call3)
+ 	SAVE_XMM
+@@ -69,6 +72,7 @@ ENTRY(efi_call3)
+ 	addq $32, %rsp
+ 	RESTORE_XMM
+ 	ret
++ENDPROC(efi_call3)
+ 
+ ENTRY(efi_call4)
+ 	SAVE_XMM
+@@ -80,6 +84,7 @@ ENTRY(efi_call4)
+ 	addq $32, %rsp
+ 	RESTORE_XMM
+ 	ret
++ENDPROC(efi_call4)
+ 
+ ENTRY(efi_call5)
+ 	SAVE_XMM
+@@ -92,6 +97,7 @@ ENTRY(efi_call5)
+ 	addq $48, %rsp
+ 	RESTORE_XMM
+ 	ret
++ENDPROC(efi_call5)
+ 
+ ENTRY(efi_call6)
+ 	SAVE_XMM
+@@ -107,3 +113,4 @@ ENTRY(efi_call6)
+ 	addq $48, %rsp
+ 	RESTORE_XMM
+ 	ret
++ENDPROC(efi_call6)
+Index: linux-2.6-tip/arch/x86/kernel/entry_32.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/entry_32.S
++++ linux-2.6-tip/arch/x86/kernel/entry_32.S
+@@ -30,12 +30,13 @@
+  *	1C(%esp) - %ds
+  *	20(%esp) - %es
+  *	24(%esp) - %fs
+- *	28(%esp) - orig_eax
+- *	2C(%esp) - %eip
+- *	30(%esp) - %cs
+- *	34(%esp) - %eflags
+- *	38(%esp) - %oldesp
+- *	3C(%esp) - %oldss
++ *	28(%esp) - %gs		saved iff !CONFIG_X86_32_LAZY_GS
++ *	2C(%esp) - orig_eax
++ *	30(%esp) - %eip
++ *	34(%esp) - %cs
++ *	38(%esp) - %eflags
++ *	3C(%esp) - %oldesp
++ *	40(%esp) - %oldss
+  *
+  * "current" is in register %ebx during any slow entries.
+  */
+@@ -46,7 +47,7 @@
+ #include <asm/errno.h>
+ #include <asm/segment.h>
+ #include <asm/smp.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ #include <asm/desc.h>
+ #include <asm/percpu.h>
+ #include <asm/dwarf2.h>
+@@ -101,121 +102,221 @@
+ #define resume_userspace_sig	resume_userspace
+ #endif
+ 
+-#define SAVE_ALL \
+-	cld; \
+-	pushl %fs; \
+-	CFI_ADJUST_CFA_OFFSET 4;\
+-	/*CFI_REL_OFFSET fs, 0;*/\
+-	pushl %es; \
+-	CFI_ADJUST_CFA_OFFSET 4;\
+-	/*CFI_REL_OFFSET es, 0;*/\
+-	pushl %ds; \
+-	CFI_ADJUST_CFA_OFFSET 4;\
+-	/*CFI_REL_OFFSET ds, 0;*/\
+-	pushl %eax; \
+-	CFI_ADJUST_CFA_OFFSET 4;\
+-	CFI_REL_OFFSET eax, 0;\
+-	pushl %ebp; \
+-	CFI_ADJUST_CFA_OFFSET 4;\
+-	CFI_REL_OFFSET ebp, 0;\
+-	pushl %edi; \
+-	CFI_ADJUST_CFA_OFFSET 4;\
+-	CFI_REL_OFFSET edi, 0;\
+-	pushl %esi; \
+-	CFI_ADJUST_CFA_OFFSET 4;\
+-	CFI_REL_OFFSET esi, 0;\
+-	pushl %edx; \
+-	CFI_ADJUST_CFA_OFFSET 4;\
+-	CFI_REL_OFFSET edx, 0;\
+-	pushl %ecx; \
+-	CFI_ADJUST_CFA_OFFSET 4;\
+-	CFI_REL_OFFSET ecx, 0;\
+-	pushl %ebx; \
+-	CFI_ADJUST_CFA_OFFSET 4;\
+-	CFI_REL_OFFSET ebx, 0;\
+-	movl $(__USER_DS), %edx; \
+-	movl %edx, %ds; \
+-	movl %edx, %es; \
+-	movl $(__KERNEL_PERCPU), %edx; \
++/*
++ * User gs save/restore
++ *
++ * %gs is used for userland TLS and kernel only uses it for stack
++ * canary which is required to be at %gs:20 by gcc.  Read the comment
++ * at the top of stackprotector.h for more info.
++ *
++ * Local labels 98 and 99 are used.
++ */
++#ifdef CONFIG_X86_32_LAZY_GS
++
++ /* unfortunately push/pop can't be no-op */
++.macro PUSH_GS
++	pushl $0
++	CFI_ADJUST_CFA_OFFSET 4
++.endm
++.macro POP_GS pop=0
++	addl $(4 + \pop), %esp
++	CFI_ADJUST_CFA_OFFSET -(4 + \pop)
++.endm
++.macro POP_GS_EX
++.endm
++
++ /* all the rest are no-op */
++.macro PTGS_TO_GS
++.endm
++.macro PTGS_TO_GS_EX
++.endm
++.macro GS_TO_REG reg
++.endm
++.macro REG_TO_PTGS reg
++.endm
++.macro SET_KERNEL_GS reg
++.endm
++
++#else	/* CONFIG_X86_32_LAZY_GS */
++
++.macro PUSH_GS
++	pushl %gs
++	CFI_ADJUST_CFA_OFFSET 4
++	/*CFI_REL_OFFSET gs, 0*/
++.endm
++
++.macro POP_GS pop=0
++98:	popl %gs
++	CFI_ADJUST_CFA_OFFSET -4
++	/*CFI_RESTORE gs*/
++  .if \pop <> 0
++	add $\pop, %esp
++	CFI_ADJUST_CFA_OFFSET -\pop
++  .endif
++.endm
++.macro POP_GS_EX
++.pushsection .fixup, "ax"
++99:	movl $0, (%esp)
++	jmp 98b
++.section __ex_table, "a"
++	.align 4
++	.long 98b, 99b
++.popsection
++.endm
++
++.macro PTGS_TO_GS
++98:	mov PT_GS(%esp), %gs
++.endm
++.macro PTGS_TO_GS_EX
++.pushsection .fixup, "ax"
++99:	movl $0, PT_GS(%esp)
++	jmp 98b
++.section __ex_table, "a"
++	.align 4
++	.long 98b, 99b
++.popsection
++.endm
++
++.macro GS_TO_REG reg
++	movl %gs, \reg
++	/*CFI_REGISTER gs, \reg*/
++.endm
++.macro REG_TO_PTGS reg
++	movl \reg, PT_GS(%esp)
++	/*CFI_REL_OFFSET gs, PT_GS*/
++.endm
++.macro SET_KERNEL_GS reg
++	movl $(__KERNEL_STACK_CANARY), \reg
++	movl \reg, %gs
++.endm
++
++#endif	/* CONFIG_X86_32_LAZY_GS */
++
++.macro SAVE_ALL
++	cld
++	PUSH_GS
++	pushl %fs
++	CFI_ADJUST_CFA_OFFSET 4
++	/*CFI_REL_OFFSET fs, 0;*/
++	pushl %es
++	CFI_ADJUST_CFA_OFFSET 4
++	/*CFI_REL_OFFSET es, 0;*/
++	pushl %ds
++	CFI_ADJUST_CFA_OFFSET 4
++	/*CFI_REL_OFFSET ds, 0;*/
++	pushl %eax
++	CFI_ADJUST_CFA_OFFSET 4
++	CFI_REL_OFFSET eax, 0
++	pushl %ebp
++	CFI_ADJUST_CFA_OFFSET 4
++	CFI_REL_OFFSET ebp, 0
++	pushl %edi
++	CFI_ADJUST_CFA_OFFSET 4
++	CFI_REL_OFFSET edi, 0
++	pushl %esi
++	CFI_ADJUST_CFA_OFFSET 4
++	CFI_REL_OFFSET esi, 0
++	pushl %edx
++	CFI_ADJUST_CFA_OFFSET 4
++	CFI_REL_OFFSET edx, 0
++	pushl %ecx
++	CFI_ADJUST_CFA_OFFSET 4
++	CFI_REL_OFFSET ecx, 0
++	pushl %ebx
++	CFI_ADJUST_CFA_OFFSET 4
++	CFI_REL_OFFSET ebx, 0
++	movl $(__USER_DS), %edx
++	movl %edx, %ds
++	movl %edx, %es
++	movl $(__KERNEL_PERCPU), %edx
+ 	movl %edx, %fs
++	SET_KERNEL_GS %edx
++.endm
+ 
+-#define RESTORE_INT_REGS \
+-	popl %ebx;	\
+-	CFI_ADJUST_CFA_OFFSET -4;\
+-	CFI_RESTORE ebx;\
+-	popl %ecx;	\
+-	CFI_ADJUST_CFA_OFFSET -4;\
+-	CFI_RESTORE ecx;\
+-	popl %edx;	\
+-	CFI_ADJUST_CFA_OFFSET -4;\
+-	CFI_RESTORE edx;\
+-	popl %esi;	\
+-	CFI_ADJUST_CFA_OFFSET -4;\
+-	CFI_RESTORE esi;\
+-	popl %edi;	\
+-	CFI_ADJUST_CFA_OFFSET -4;\
+-	CFI_RESTORE edi;\
+-	popl %ebp;	\
+-	CFI_ADJUST_CFA_OFFSET -4;\
+-	CFI_RESTORE ebp;\
+-	popl %eax;	\
+-	CFI_ADJUST_CFA_OFFSET -4;\
++.macro RESTORE_INT_REGS
++	popl %ebx
++	CFI_ADJUST_CFA_OFFSET -4
++	CFI_RESTORE ebx
++	popl %ecx
++	CFI_ADJUST_CFA_OFFSET -4
++	CFI_RESTORE ecx
++	popl %edx
++	CFI_ADJUST_CFA_OFFSET -4
++	CFI_RESTORE edx
++	popl %esi
++	CFI_ADJUST_CFA_OFFSET -4
++	CFI_RESTORE esi
++	popl %edi
++	CFI_ADJUST_CFA_OFFSET -4
++	CFI_RESTORE edi
++	popl %ebp
++	CFI_ADJUST_CFA_OFFSET -4
++	CFI_RESTORE ebp
++	popl %eax
++	CFI_ADJUST_CFA_OFFSET -4
+ 	CFI_RESTORE eax
++.endm
+ 
+-#define RESTORE_REGS	\
+-	RESTORE_INT_REGS; \
+-1:	popl %ds;	\
+-	CFI_ADJUST_CFA_OFFSET -4;\
+-	/*CFI_RESTORE ds;*/\
+-2:	popl %es;	\
+-	CFI_ADJUST_CFA_OFFSET -4;\
+-	/*CFI_RESTORE es;*/\
+-3:	popl %fs;	\
+-	CFI_ADJUST_CFA_OFFSET -4;\
+-	/*CFI_RESTORE fs;*/\
+-.pushsection .fixup,"ax";	\
+-4:	movl $0,(%esp);	\
+-	jmp 1b;		\
+-5:	movl $0,(%esp);	\
+-	jmp 2b;		\
+-6:	movl $0,(%esp);	\
+-	jmp 3b;		\
+-.section __ex_table,"a";\
+-	.align 4;	\
+-	.long 1b,4b;	\
+-	.long 2b,5b;	\
+-	.long 3b,6b;	\
++.macro RESTORE_REGS pop=0
++	RESTORE_INT_REGS
++1:	popl %ds
++	CFI_ADJUST_CFA_OFFSET -4
++	/*CFI_RESTORE ds;*/
++2:	popl %es
++	CFI_ADJUST_CFA_OFFSET -4
++	/*CFI_RESTORE es;*/
++3:	popl %fs
++	CFI_ADJUST_CFA_OFFSET -4
++	/*CFI_RESTORE fs;*/
++	POP_GS \pop
++.pushsection .fixup, "ax"
++4:	movl $0, (%esp)
++	jmp 1b
++5:	movl $0, (%esp)
++	jmp 2b
++6:	movl $0, (%esp)
++	jmp 3b
++.section __ex_table, "a"
++	.align 4
++	.long 1b, 4b
++	.long 2b, 5b
++	.long 3b, 6b
+ .popsection
++	POP_GS_EX
++.endm
+ 
+-#define RING0_INT_FRAME \
+-	CFI_STARTPROC simple;\
+-	CFI_SIGNAL_FRAME;\
+-	CFI_DEF_CFA esp, 3*4;\
+-	/*CFI_OFFSET cs, -2*4;*/\
++.macro RING0_INT_FRAME
++	CFI_STARTPROC simple
++	CFI_SIGNAL_FRAME
++	CFI_DEF_CFA esp, 3*4
++	/*CFI_OFFSET cs, -2*4;*/
+ 	CFI_OFFSET eip, -3*4
++.endm
+ 
+-#define RING0_EC_FRAME \
+-	CFI_STARTPROC simple;\
+-	CFI_SIGNAL_FRAME;\
+-	CFI_DEF_CFA esp, 4*4;\
+-	/*CFI_OFFSET cs, -2*4;*/\
++.macro RING0_EC_FRAME
++	CFI_STARTPROC simple
++	CFI_SIGNAL_FRAME
++	CFI_DEF_CFA esp, 4*4
++	/*CFI_OFFSET cs, -2*4;*/
+ 	CFI_OFFSET eip, -3*4
++.endm
+ 
+-#define RING0_PTREGS_FRAME \
+-	CFI_STARTPROC simple;\
+-	CFI_SIGNAL_FRAME;\
+-	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
+-	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
+-	CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
+-	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
+-	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
+-	CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
+-	CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
+-	CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
+-	CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
+-	CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
+-	CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
++.macro RING0_PTREGS_FRAME
++	CFI_STARTPROC simple
++	CFI_SIGNAL_FRAME
++	CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
++	/*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
++	CFI_OFFSET eip, PT_EIP-PT_OLDESP
++	/*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
++	/*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
++	CFI_OFFSET eax, PT_EAX-PT_OLDESP
++	CFI_OFFSET ebp, PT_EBP-PT_OLDESP
++	CFI_OFFSET edi, PT_EDI-PT_OLDESP
++	CFI_OFFSET esi, PT_ESI-PT_OLDESP
++	CFI_OFFSET edx, PT_EDX-PT_OLDESP
++	CFI_OFFSET ecx, PT_ECX-PT_OLDESP
+ 	CFI_OFFSET ebx, PT_EBX-PT_OLDESP
++.endm
+ 
+ ENTRY(ret_from_fork)
+ 	CFI_STARTPROC
+@@ -270,14 +371,18 @@ END(ret_from_exception)
+ #ifdef CONFIG_PREEMPT
+ ENTRY(resume_kernel)
+ 	DISABLE_INTERRUPTS(CLBR_ANY)
++	cmpl $0, kernel_preemption
++	jz restore_nocheck
+ 	cmpl $0,TI_preempt_count(%ebp)	# non-zero preempt_count ?
+ 	jnz restore_nocheck
+ need_resched:
+ 	movl TI_flags(%ebp), %ecx	# need_resched set ?
+ 	testb $_TIF_NEED_RESCHED, %cl
+-	jz restore_all
++	jz restore_nocheck
+ 	testl $X86_EFLAGS_IF,PT_EFLAGS(%esp)	# interrupts off (exception path) ?
+-	jz restore_all
++	jz restore_nocheck
++	DISABLE_INTERRUPTS(CLBR_ANY)
++
+ 	call preempt_schedule_irq
+ 	jmp need_resched
+ END(resume_kernel)
+@@ -341,8 +446,7 @@ sysenter_past_esp:
+ 
+ 	GET_THREAD_INFO(%ebp)
+ 
+-	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
+-	testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
++	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
+ 	jnz sysenter_audit
+ sysenter_do_call:
+ 	cmpl $(nr_syscalls), %eax
+@@ -353,7 +457,7 @@ sysenter_do_call:
+ 	DISABLE_INTERRUPTS(CLBR_ANY)
+ 	TRACE_IRQS_OFF
+ 	movl TI_flags(%ebp), %ecx
+-	testw $_TIF_ALLWORK_MASK, %cx
++	testl $_TIF_ALLWORK_MASK, %ecx
+ 	jne sysexit_audit
+ sysenter_exit:
+ /* if something modifies registers it must also disable sysexit */
+@@ -362,11 +466,12 @@ sysenter_exit:
+ 	xorl %ebp,%ebp
+ 	TRACE_IRQS_ON
+ 1:	mov  PT_FS(%esp), %fs
++	PTGS_TO_GS
+ 	ENABLE_INTERRUPTS_SYSEXIT
+ 
+ #ifdef CONFIG_AUDITSYSCALL
+ sysenter_audit:
+-	testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
++	testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
+ 	jnz syscall_trace_entry
+ 	addl $4,%esp
+ 	CFI_ADJUST_CFA_OFFSET -4
+@@ -383,7 +488,7 @@ sysenter_audit:
+ 	jmp sysenter_do_call
+ 
+ sysexit_audit:
+-	testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
++	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
+ 	jne syscall_exit_work
+ 	TRACE_IRQS_ON
+ 	ENABLE_INTERRUPTS(CLBR_ANY)
+@@ -396,7 +501,7 @@ sysexit_audit:
+ 	DISABLE_INTERRUPTS(CLBR_ANY)
+ 	TRACE_IRQS_OFF
+ 	movl TI_flags(%ebp), %ecx
+-	testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
++	testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
+ 	jne syscall_exit_work
+ 	movl PT_EAX(%esp),%eax	/* reload syscall return value */
+ 	jmp sysenter_exit
+@@ -410,6 +515,7 @@ sysexit_audit:
+ 	.align 4
+ 	.long 1b,2b
+ .popsection
++	PTGS_TO_GS_EX
+ ENDPROC(ia32_sysenter_target)
+ 
+ 	# system call handler stub
+@@ -420,8 +526,7 @@ ENTRY(system_call)
+ 	SAVE_ALL
+ 	GET_THREAD_INFO(%ebp)
+ 					# system call tracing in operation / emulation
+-	/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
+-	testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
++	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
+ 	jnz syscall_trace_entry
+ 	cmpl $(nr_syscalls), %eax
+ 	jae syscall_badsys
+@@ -435,7 +540,7 @@ syscall_exit:
+ 					# between sampling and the iret
+ 	TRACE_IRQS_OFF
+ 	movl TI_flags(%ebp), %ecx
+-	testw $_TIF_ALLWORK_MASK, %cx	# current->work
++	testl $_TIF_ALLWORK_MASK, %ecx	# current->work
+ 	jne syscall_exit_work
+ 
+ restore_all:
+@@ -452,8 +557,7 @@ restore_all:
+ restore_nocheck:
+ 	TRACE_IRQS_IRET
+ restore_nocheck_notrace:
+-	RESTORE_REGS
+-	addl $4, %esp			# skip orig_eax/error_code
++	RESTORE_REGS 4			# skip orig_eax/error_code
+ 	CFI_ADJUST_CFA_OFFSET -4
+ irq_return:
+ 	INTERRUPT_RETURN
+@@ -513,20 +617,19 @@ ENDPROC(system_call)
+ 	ALIGN
+ 	RING0_PTREGS_FRAME		# can't unwind into user space anyway
+ work_pending:
+-	testb $_TIF_NEED_RESCHED, %cl
++	testl $(_TIF_NEED_RESCHED), %ecx
+ 	jz work_notifysig
+ work_resched:
+-	call schedule
++	call __schedule
+ 	LOCKDEP_SYS_EXIT
+ 	DISABLE_INTERRUPTS(CLBR_ANY)	# make sure we don't miss an interrupt
+ 					# setting need_resched or sigpending
+ 					# between sampling and the iret
+-	TRACE_IRQS_OFF
+ 	movl TI_flags(%ebp), %ecx
+ 	andl $_TIF_WORK_MASK, %ecx	# is there any work to be done other
+ 					# than syscall tracing?
+ 	jz restore_all
+-	testb $_TIF_NEED_RESCHED, %cl
++	testl $(_TIF_NEED_RESCHED), %ecx
+ 	jnz work_resched
+ 
+ work_notifysig:				# deal with pending signals and
+@@ -571,7 +674,7 @@ END(syscall_trace_entry)
+ 	# perform syscall exit tracing
+ 	ALIGN
+ syscall_exit_work:
+-	testb $_TIF_WORK_SYSCALL_EXIT, %cl
++	testl $_TIF_WORK_SYSCALL_EXIT, %ecx
+ 	jz work_pending
+ 	TRACE_IRQS_ON
+ 	ENABLE_INTERRUPTS(CLBR_ANY)	# could let syscall_trace_leave() call
+@@ -595,28 +698,50 @@ syscall_badsys:
+ END(syscall_badsys)
+ 	CFI_ENDPROC
+ 
+-#define FIXUP_ESPFIX_STACK \
+-	/* since we are on a wrong stack, we cant make it a C code :( */ \
+-	PER_CPU(gdt_page, %ebx); \
+-	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
+-	addl %esp, %eax; \
+-	pushl $__KERNEL_DS; \
+-	CFI_ADJUST_CFA_OFFSET 4; \
+-	pushl %eax; \
+-	CFI_ADJUST_CFA_OFFSET 4; \
+-	lss (%esp), %esp; \
+-	CFI_ADJUST_CFA_OFFSET -8;
+-#define UNWIND_ESPFIX_STACK \
+-	movl %ss, %eax; \
+-	/* see if on espfix stack */ \
+-	cmpw $__ESPFIX_SS, %ax; \
+-	jne 27f; \
+-	movl $__KERNEL_DS, %eax; \
+-	movl %eax, %ds; \
+-	movl %eax, %es; \
+-	/* switch to normal stack */ \
+-	FIXUP_ESPFIX_STACK; \
+-27:;
++/*
++ * System calls that need a pt_regs pointer.
++ */
++#define PTREGSCALL(name) \
++	ALIGN; \
++ptregs_##name: \
++	leal 4(%esp),%eax; \
++	jmp sys_##name;
++
++PTREGSCALL(iopl)
++PTREGSCALL(fork)
++PTREGSCALL(clone)
++PTREGSCALL(vfork)
++PTREGSCALL(execve)
++PTREGSCALL(sigaltstack)
++PTREGSCALL(sigreturn)
++PTREGSCALL(rt_sigreturn)
++PTREGSCALL(vm86)
++PTREGSCALL(vm86old)
++
++.macro FIXUP_ESPFIX_STACK
++	/* since we are on a wrong stack, we cant make it a C code :( */
++	PER_CPU(gdt_page, %ebx)
++	GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
++	addl %esp, %eax
++	pushl $__KERNEL_DS
++	CFI_ADJUST_CFA_OFFSET 4
++	pushl %eax
++	CFI_ADJUST_CFA_OFFSET 4
++	lss (%esp), %esp
++	CFI_ADJUST_CFA_OFFSET -8
++.endm
++.macro UNWIND_ESPFIX_STACK
++	movl %ss, %eax
++	/* see if on espfix stack */
++	cmpw $__ESPFIX_SS, %ax
++	jne 27f
++	movl $__KERNEL_DS, %eax
++	movl %eax, %ds
++	movl %eax, %es
++	/* switch to normal stack */
++	FIXUP_ESPFIX_STACK
++27:
++.endm
+ 
+ /*
+  * Build the entry stubs and pointer table with some assembler magic.
+@@ -672,7 +797,7 @@ common_interrupt:
+ ENDPROC(common_interrupt)
+ 	CFI_ENDPROC
+ 
+-#define BUILD_INTERRUPT(name, nr)	\
++#define BUILD_INTERRUPT3(name, nr, fn)	\
+ ENTRY(name)				\
+ 	RING0_INT_FRAME;		\
+ 	pushl $~(nr);			\
+@@ -680,13 +805,15 @@ ENTRY(name)				\
+ 	SAVE_ALL;			\
+ 	TRACE_IRQS_OFF			\
+ 	movl %esp,%eax;			\
+-	call smp_##name;		\
++	call fn;			\
+ 	jmp ret_from_intr;		\
+ 	CFI_ENDPROC;			\
+ ENDPROC(name)
+ 
++#define BUILD_INTERRUPT(name, nr)	BUILD_INTERRUPT3(name, nr, smp_##name)
++
+ /* The include is where all of the SMP etc. interrupts come from */
+-#include "entry_arch.h"
++#include <asm/entry_arch.h>
+ 
+ ENTRY(coprocessor_error)
+ 	RING0_INT_FRAME
+@@ -1068,7 +1195,10 @@ ENTRY(page_fault)
+ 	CFI_ADJUST_CFA_OFFSET 4
+ 	ALIGN
+ error_code:
+-	/* the function address is in %fs's slot on the stack */
++	/* the function address is in %gs's slot on the stack */
++	pushl %fs
++	CFI_ADJUST_CFA_OFFSET 4
++	/*CFI_REL_OFFSET fs, 0*/
+ 	pushl %es
+ 	CFI_ADJUST_CFA_OFFSET 4
+ 	/*CFI_REL_OFFSET es, 0*/
+@@ -1097,20 +1227,15 @@ error_code:
+ 	CFI_ADJUST_CFA_OFFSET 4
+ 	CFI_REL_OFFSET ebx, 0
+ 	cld
+-	pushl %fs
+-	CFI_ADJUST_CFA_OFFSET 4
+-	/*CFI_REL_OFFSET fs, 0*/
+ 	movl $(__KERNEL_PERCPU), %ecx
+ 	movl %ecx, %fs
+ 	UNWIND_ESPFIX_STACK
+-	popl %ecx
+-	CFI_ADJUST_CFA_OFFSET -4
+-	/*CFI_REGISTER es, ecx*/
+-	movl PT_FS(%esp), %edi		# get the function address
++	GS_TO_REG %ecx
++	movl PT_GS(%esp), %edi		# get the function address
+ 	movl PT_ORIG_EAX(%esp), %edx	# get the error code
+ 	movl $-1, PT_ORIG_EAX(%esp)	# no syscall to restart
+-	mov  %ecx, PT_FS(%esp)
+-	/*CFI_REL_OFFSET fs, ES*/
++	REG_TO_PTGS %ecx
++	SET_KERNEL_GS %ecx
+ 	movl $(__USER_DS), %ecx
+ 	movl %ecx, %ds
+ 	movl %ecx, %es
+@@ -1134,26 +1259,27 @@ END(page_fault)
+  * by hand onto the new stack - while updating the return eip past
+  * the instruction that would have done it for sysenter.
+  */
+-#define FIX_STACK(offset, ok, label)		\
+-	cmpw $__KERNEL_CS,4(%esp);		\
+-	jne ok;					\
+-label:						\
+-	movl TSS_sysenter_sp0+offset(%esp),%esp;	\
+-	CFI_DEF_CFA esp, 0;			\
+-	CFI_UNDEFINED eip;			\
+-	pushfl;					\
+-	CFI_ADJUST_CFA_OFFSET 4;		\
+-	pushl $__KERNEL_CS;			\
+-	CFI_ADJUST_CFA_OFFSET 4;		\
+-	pushl $sysenter_past_esp;		\
+-	CFI_ADJUST_CFA_OFFSET 4;		\
++.macro FIX_STACK offset ok label
++	cmpw $__KERNEL_CS, 4(%esp)
++	jne \ok
++\label:
++	movl TSS_sysenter_sp0 + \offset(%esp), %esp
++	CFI_DEF_CFA esp, 0
++	CFI_UNDEFINED eip
++	pushfl
++	CFI_ADJUST_CFA_OFFSET 4
++	pushl $__KERNEL_CS
++	CFI_ADJUST_CFA_OFFSET 4
++	pushl $sysenter_past_esp
++	CFI_ADJUST_CFA_OFFSET 4
+ 	CFI_REL_OFFSET eip, 0
++.endm
+ 
+ ENTRY(debug)
+ 	RING0_INT_FRAME
+ 	cmpl $ia32_sysenter_target,(%esp)
+ 	jne debug_stack_correct
+-	FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
++	FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
+ debug_stack_correct:
+ 	pushl $-1			# mark this as an int
+ 	CFI_ADJUST_CFA_OFFSET 4
+@@ -1211,7 +1337,7 @@ nmi_stack_correct:
+ 
+ nmi_stack_fixup:
+ 	RING0_INT_FRAME
+-	FIX_STACK(12,nmi_stack_correct, 1)
++	FIX_STACK 12, nmi_stack_correct, 1
+ 	jmp nmi_stack_correct
+ 
+ nmi_debug_stack_check:
+@@ -1222,7 +1348,7 @@ nmi_debug_stack_check:
+ 	jb nmi_stack_correct
+ 	cmpl $debug_esp_fix_insn,(%esp)
+ 	ja nmi_stack_correct
+-	FIX_STACK(24,nmi_stack_correct, 1)
++	FIX_STACK 24, nmi_stack_correct, 1
+ 	jmp nmi_stack_correct
+ 
+ nmi_espfix_stack:
+@@ -1234,7 +1360,7 @@ nmi_espfix_stack:
+ 	CFI_ADJUST_CFA_OFFSET 4
+ 	pushl %esp
+ 	CFI_ADJUST_CFA_OFFSET 4
+-	addw $4, (%esp)
++	addl $4, (%esp)
+ 	/* copy the iret frame of 12 bytes */
+ 	.rept 3
+ 	pushl 16(%esp)
+Index: linux-2.6-tip/arch/x86/kernel/entry_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/entry_64.S
++++ linux-2.6-tip/arch/x86/kernel/entry_64.S
+@@ -48,10 +48,11 @@
+ #include <asm/unistd.h>
+ #include <asm/thread_info.h>
+ #include <asm/hw_irq.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ #include <asm/irqflags.h>
+ #include <asm/paravirt.h>
+ #include <asm/ftrace.h>
++#include <asm/percpu.h>
+ 
+ /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
+ #include <linux/elf-em.h>
+@@ -76,20 +77,17 @@ ENTRY(ftrace_caller)
+ 	movq 8(%rbp), %rsi
+ 	subq $MCOUNT_INSN_SIZE, %rdi
+ 
+-.globl ftrace_call
+-ftrace_call:
++GLOBAL(ftrace_call)
+ 	call ftrace_stub
+ 
+ 	MCOUNT_RESTORE_FRAME
+ 
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-.globl ftrace_graph_call
+-ftrace_graph_call:
++GLOBAL(ftrace_graph_call)
+ 	jmp ftrace_stub
+ #endif
+ 
+-.globl ftrace_stub
+-ftrace_stub:
++GLOBAL(ftrace_stub)
+ 	retq
+ END(ftrace_caller)
+ 
+@@ -109,8 +107,7 @@ ENTRY(mcount)
+ 	jnz ftrace_graph_caller
+ #endif
+ 
+-.globl ftrace_stub
+-ftrace_stub:
++GLOBAL(ftrace_stub)
+ 	retq
+ 
+ trace:
+@@ -147,9 +144,7 @@ ENTRY(ftrace_graph_caller)
+ 	retq
+ END(ftrace_graph_caller)
+ 
+-
+-.globl return_to_handler
+-return_to_handler:
++GLOBAL(return_to_handler)
+ 	subq  $80, %rsp
+ 
+ 	movq %rax, (%rsp)
+@@ -187,6 +182,7 @@ return_to_handler:
+ ENTRY(native_usergs_sysret64)
+ 	swapgs
+ 	sysretq
++ENDPROC(native_usergs_sysret64)
+ #endif /* CONFIG_PARAVIRT */
+ 
+ 
+@@ -209,7 +205,7 @@ ENTRY(native_usergs_sysret64)
+ 
+ 	/* %rsp:at FRAMEEND */
+ 	.macro FIXUP_TOP_OF_STACK tmp offset=0
+-	movq %gs:pda_oldrsp,\tmp
++	movq PER_CPU_VAR(old_rsp),\tmp
+ 	movq \tmp,RSP+\offset(%rsp)
+ 	movq $__USER_DS,SS+\offset(%rsp)
+ 	movq $__USER_CS,CS+\offset(%rsp)
+@@ -220,7 +216,7 @@ ENTRY(native_usergs_sysret64)
+ 
+ 	.macro RESTORE_TOP_OF_STACK tmp offset=0
+ 	movq RSP+\offset(%rsp),\tmp
+-	movq \tmp,%gs:pda_oldrsp
++	movq \tmp,PER_CPU_VAR(old_rsp)
+ 	movq EFLAGS+\offset(%rsp),\tmp
+ 	movq \tmp,R11+\offset(%rsp)
+ 	.endm
+@@ -336,15 +332,15 @@ ENTRY(save_args)
+ 	je 1f
+ 	SWAPGS
+ 	/*
+-	 * irqcount is used to check if a CPU is already on an interrupt stack
++	 * irq_count is used to check if a CPU is already on an interrupt stack
+ 	 * or not. While this is essentially redundant with preempt_count it is
+ 	 * a little cheaper to use a separate counter in the PDA (short of
+ 	 * moving irq_enter into assembly, which would be too much work)
+ 	 */
+-1:	incl %gs:pda_irqcount
++1:	incl PER_CPU_VAR(irq_count)
+ 	jne 2f
+ 	popq_cfi %rax			/* move return address... */
+-	mov %gs:pda_irqstackptr,%rsp
++	mov PER_CPU_VAR(irq_stack_ptr),%rsp
+ 	EMPTY_FRAME 0
+ 	pushq_cfi %rbp			/* backlink for unwinder */
+ 	pushq_cfi %rax			/* ... to the new stack */
+@@ -372,6 +368,7 @@ ENTRY(save_rest)
+ END(save_rest)
+ 
+ /* save complete stack frame */
++	.pushsection .kprobes.text, "ax"
+ ENTRY(save_paranoid)
+ 	XCPT_FRAME 1 RDI+8
+ 	cld
+@@ -400,6 +397,7 @@ ENTRY(save_paranoid)
+ 1:	ret
+ 	CFI_ENDPROC
+ END(save_paranoid)
++	.popsection
+ 
+ /*
+  * A newly forked process directly context switches into this address.
+@@ -409,6 +407,8 @@ END(save_paranoid)
+ ENTRY(ret_from_fork)
+ 	DEFAULT_FRAME
+ 
++	LOCK ; btr $TIF_FORK,TI_flags(%r8)
++
+ 	push kernel_eflags(%rip)
+ 	CFI_ADJUST_CFA_OFFSET 8
+ 	popf					# reset kernel eflags
+@@ -418,7 +418,6 @@ ENTRY(ret_from_fork)
+ 
+ 	GET_THREAD_INFO(%rcx)
+ 
+-	CFI_REMEMBER_STATE
+ 	RESTORE_REST
+ 
+ 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
+@@ -430,7 +429,6 @@ ENTRY(ret_from_fork)
+ 	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
+ 	jmp ret_from_sys_call			# go to the SYSRET fastpath
+ 
+-	CFI_RESTORE_STATE
+ 	CFI_ENDPROC
+ END(ret_from_fork)
+ 
+@@ -468,7 +466,7 @@ END(ret_from_fork)
+ ENTRY(system_call)
+ 	CFI_STARTPROC	simple
+ 	CFI_SIGNAL_FRAME
+-	CFI_DEF_CFA	rsp,PDA_STACKOFFSET
++	CFI_DEF_CFA	rsp,KERNEL_STACK_OFFSET
+ 	CFI_REGISTER	rip,rcx
+ 	/*CFI_REGISTER	rflags,r11*/
+ 	SWAPGS_UNSAFE_STACK
+@@ -479,8 +477,8 @@ ENTRY(system_call)
+ 	 */
+ ENTRY(system_call_after_swapgs)
+ 
+-	movq	%rsp,%gs:pda_oldrsp
+-	movq	%gs:pda_kernelstack,%rsp
++	movq	%rsp,PER_CPU_VAR(old_rsp)
++	movq	PER_CPU_VAR(kernel_stack),%rsp
+ 	/*
+ 	 * No need to follow this irqs off/on section - it's straight
+ 	 * and short:
+@@ -523,7 +521,7 @@ sysret_check:
+ 	CFI_REGISTER	rip,rcx
+ 	RESTORE_ARGS 0,-ARG_SKIP,1
+ 	/*CFI_REGISTER	rflags,r11*/
+-	movq	%gs:pda_oldrsp, %rsp
++	movq	PER_CPU_VAR(old_rsp), %rsp
+ 	USERGS_SYSRET64
+ 
+ 	CFI_RESTORE_STATE
+@@ -630,16 +628,14 @@ tracesys:
+  * Syscall return path ending with IRET.
+  * Has correct top of stack, but partial stack frame.
+  */
+-	.globl int_ret_from_sys_call
+-	.globl int_with_check
+-int_ret_from_sys_call:
++GLOBAL(int_ret_from_sys_call)
+ 	DISABLE_INTERRUPTS(CLBR_NONE)
+ 	TRACE_IRQS_OFF
+ 	testl $3,CS-ARGOFFSET(%rsp)
+ 	je retint_restore_args
+ 	movl $_TIF_ALLWORK_MASK,%edi
+ 	/* edi:	mask to check */
+-int_with_check:
++GLOBAL(int_with_check)
+ 	LOCKDEP_SYS_EXIT_IRQ
+ 	GET_THREAD_INFO(%rcx)
+ 	movl TI_flags(%rcx),%edx
+@@ -833,11 +829,11 @@ common_interrupt:
+ 	XCPT_FRAME
+ 	addq $-0x80,(%rsp)		/* Adjust vector to [-256,-1] range */
+ 	interrupt do_IRQ
+-	/* 0(%rsp): oldrsp-ARGOFFSET */
++	/* 0(%rsp): old_rsp-ARGOFFSET */
+ ret_from_intr:
+ 	DISABLE_INTERRUPTS(CLBR_NONE)
+ 	TRACE_IRQS_OFF
+-	decl %gs:pda_irqcount
++	decl PER_CPU_VAR(irq_count)
+ 	leaveq
+ 	CFI_DEF_CFA_REGISTER	rsp
+ 	CFI_ADJUST_CFA_OFFSET	-8
+@@ -982,10 +978,14 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
+ 	irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
+ #endif
+ 
++#ifdef CONFIG_X86_UV
+ apicinterrupt UV_BAU_MESSAGE \
+ 	uv_bau_message_intr1 uv_bau_message_interrupt
++#endif
+ apicinterrupt LOCAL_TIMER_VECTOR \
+ 	apic_timer_interrupt smp_apic_timer_interrupt
++apicinterrupt GENERIC_INTERRUPT_VECTOR \
++	generic_interrupt smp_generic_interrupt
+ 
+ #ifdef CONFIG_SMP
+ apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
+@@ -1025,6 +1025,11 @@ apicinterrupt ERROR_APIC_VECTOR \
+ apicinterrupt SPURIOUS_APIC_VECTOR \
+ 	spurious_interrupt smp_spurious_interrupt
+ 
++#ifdef CONFIG_PERF_COUNTERS
++apicinterrupt LOCAL_PERF_VECTOR \
++	perf_counter_interrupt smp_perf_counter_interrupt
++#endif
++
+ /*
+  * Exception entry points.
+  */
+@@ -1073,10 +1078,10 @@ ENTRY(\sym)
+ 	TRACE_IRQS_OFF
+ 	movq %rsp,%rdi		/* pt_regs pointer */
+ 	xorl %esi,%esi		/* no error code */
+-	movq %gs:pda_data_offset, %rbp
+-	subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
++	PER_CPU(init_tss, %rbp)
++	subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
+ 	call \do_sym
+-	addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
++	addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
+ 	jmp paranoid_exit	/* %ebx: no swapgs flag */
+ 	CFI_ENDPROC
+ END(\sym)
+@@ -1138,7 +1143,7 @@ ENTRY(native_load_gs_index)
+ 	CFI_STARTPROC
+ 	pushf
+ 	CFI_ADJUST_CFA_OFFSET 8
+-	DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI))
++	DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
+ 	SWAPGS
+ gs_change:
+ 	movl %edi,%gs
+@@ -1260,14 +1265,14 @@ ENTRY(call_softirq)
+ 	CFI_REL_OFFSET rbp,0
+ 	mov  %rsp,%rbp
+ 	CFI_DEF_CFA_REGISTER rbp
+-	incl %gs:pda_irqcount
+-	cmove %gs:pda_irqstackptr,%rsp
++	incl PER_CPU_VAR(irq_count)
++	cmove PER_CPU_VAR(irq_stack_ptr),%rsp
+ 	push  %rbp			# backlink for old unwinder
+ 	call __do_softirq
+ 	leaveq
+ 	CFI_DEF_CFA_REGISTER	rsp
+ 	CFI_ADJUST_CFA_OFFSET   -8
+-	decl %gs:pda_irqcount
++	decl PER_CPU_VAR(irq_count)
+ 	ret
+ 	CFI_ENDPROC
+ END(call_softirq)
+@@ -1297,15 +1302,15 @@ ENTRY(xen_do_hypervisor_callback)   # do
+ 	movq %rdi, %rsp            # we don't return, adjust the stack frame
+ 	CFI_ENDPROC
+ 	DEFAULT_FRAME
+-11:	incl %gs:pda_irqcount
++11:	incl PER_CPU_VAR(irq_count)
+ 	movq %rsp,%rbp
+ 	CFI_DEF_CFA_REGISTER rbp
+-	cmovzq %gs:pda_irqstackptr,%rsp
++	cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
+ 	pushq %rbp			# backlink for old unwinder
+ 	call xen_evtchn_do_upcall
+ 	popq %rsp
+ 	CFI_DEF_CFA_REGISTER rsp
+-	decl %gs:pda_irqcount
++	decl PER_CPU_VAR(irq_count)
+ 	jmp  error_exit
+ 	CFI_ENDPROC
+ END(do_hypervisor_callback)
+Index: linux-2.6-tip/arch/x86/kernel/es7000_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/es7000_32.c
++++ /dev/null
+@@ -1,378 +0,0 @@
+-/*
+- * Written by: Garry Forsgren, Unisys Corporation
+- *             Natalie Protasevich, Unisys Corporation
+- * This file contains the code to configure and interface
+- * with Unisys ES7000 series hardware system manager.
+- *
+- * Copyright (c) 2003 Unisys Corporation.  All Rights Reserved.
+- *
+- * This program is free software; you can redistribute it and/or modify it
+- * under the terms of version 2 of the GNU General Public License as
+- * published by the Free Software Foundation.
+- *
+- * This program is distributed in the hope that it would be useful, but
+- * WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+- *
+- * You should have received a copy of the GNU General Public License along
+- * with this program; if not, write the Free Software Foundation, Inc., 59
+- * Temple Place - Suite 330, Boston MA 02111-1307, USA.
+- *
+- * Contact information: Unisys Corporation, Township Line & Union Meeting
+- * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
+- *
+- * http://www.unisys.com
+- */
+-
+-#include <linux/module.h>
+-#include <linux/types.h>
+-#include <linux/kernel.h>
+-#include <linux/smp.h>
+-#include <linux/string.h>
+-#include <linux/spinlock.h>
+-#include <linux/errno.h>
+-#include <linux/notifier.h>
+-#include <linux/reboot.h>
+-#include <linux/init.h>
+-#include <linux/acpi.h>
+-#include <asm/io.h>
+-#include <asm/nmi.h>
+-#include <asm/smp.h>
+-#include <asm/atomic.h>
+-#include <asm/apicdef.h>
+-#include <mach_mpparse.h>
+-#include <asm/genapic.h>
+-#include <asm/setup.h>
+-
+-/*
+- * ES7000 chipsets
+- */
+-
+-#define NON_UNISYS		0
+-#define ES7000_CLASSIC		1
+-#define ES7000_ZORRO		2
+-
+-
+-#define	MIP_REG			1
+-#define	MIP_PSAI_REG		4
+-
+-#define	MIP_BUSY		1
+-#define	MIP_SPIN		0xf0000
+-#define	MIP_VALID		0x0100000000000000ULL
+-#define	MIP_PORT(VALUE)	((VALUE >> 32) & 0xffff)
+-
+-#define	MIP_RD_LO(VALUE)	(VALUE & 0xffffffff)
+-
+-struct mip_reg_info {
+-	unsigned long long mip_info;
+-	unsigned long long delivery_info;
+-	unsigned long long host_reg;
+-	unsigned long long mip_reg;
+-};
+-
+-struct part_info {
+-	unsigned char type;
+-	unsigned char length;
+-	unsigned char part_id;
+-	unsigned char apic_mode;
+-	unsigned long snum;
+-	char ptype[16];
+-	char sname[64];
+-	char pname[64];
+-};
+-
+-struct psai {
+-	unsigned long long entry_type;
+-	unsigned long long addr;
+-	unsigned long long bep_addr;
+-};
+-
+-struct es7000_mem_info {
+-	unsigned char type;
+-	unsigned char length;
+-	unsigned char resv[6];
+-	unsigned long long  start;
+-	unsigned long long  size;
+-};
+-
+-struct es7000_oem_table {
+-	unsigned long long hdr;
+-	struct mip_reg_info mip;
+-	struct part_info pif;
+-	struct es7000_mem_info shm;
+-	struct psai psai;
+-};
+-
+-#ifdef CONFIG_ACPI
+-
+-struct oem_table {
+-	struct acpi_table_header Header;
+-	u32 OEMTableAddr;
+-	u32 OEMTableSize;
+-};
+-
+-extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
+-extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
+-#endif
+-
+-struct mip_reg {
+-	unsigned long long off_0;
+-	unsigned long long off_8;
+-	unsigned long long off_10;
+-	unsigned long long off_18;
+-	unsigned long long off_20;
+-	unsigned long long off_28;
+-	unsigned long long off_30;
+-	unsigned long long off_38;
+-};
+-
+-#define	MIP_SW_APIC		0x1020b
+-#define	MIP_FUNC(VALUE)		(VALUE & 0xff)
+-
+-/*
+- * ES7000 Globals
+- */
+-
+-static volatile unsigned long	*psai = NULL;
+-static struct mip_reg		*mip_reg;
+-static struct mip_reg		*host_reg;
+-static int 			mip_port;
+-static unsigned long		mip_addr, host_addr;
+-
+-int es7000_plat;
+-
+-/*
+- * GSI override for ES7000 platforms.
+- */
+-
+-static unsigned int base;
+-
+-static int
+-es7000_rename_gsi(int ioapic, int gsi)
+-{
+-	if (es7000_plat == ES7000_ZORRO)
+-		return gsi;
+-
+-	if (!base) {
+-		int i;
+-		for (i = 0; i < nr_ioapics; i++)
+-			base += nr_ioapic_registers[i];
+-	}
+-
+-	if (!ioapic && (gsi < 16))
+-		gsi += base;
+-	return gsi;
+-}
+-
+-static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
+-{
+-	unsigned long vect = 0, psaival = 0;
+-
+-	if (psai == NULL)
+-		return -1;
+-
+-	vect = ((unsigned long)__pa(eip)/0x1000) << 16;
+-	psaival = (0x1000000 | vect | cpu);
+-
+-	while (*psai & 0x1000000)
+-		;
+-
+-	*psai = psaival;
+-
+-	return 0;
+-}
+-
+-static void noop_wait_for_deassert(atomic_t *deassert_not_used)
+-{
+-}
+-
+-static int __init es7000_update_genapic(void)
+-{
+-	genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+-
+-	/* MPENTIUMIII */
+-	if (boot_cpu_data.x86 == 6 &&
+-	    (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
+-		es7000_update_genapic_to_cluster();
+-		genapic->wait_for_init_deassert = noop_wait_for_deassert;
+-		genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
+-	}
+-
+-	return 0;
+-}
+-
+-void __init
+-setup_unisys(void)
+-{
+-	/*
+-	 * Determine the generation of the ES7000 currently running.
+-	 *
+-	 * es7000_plat = 1 if the machine is a 5xx ES7000 box
+-	 * es7000_plat = 2 if the machine is a x86_64 ES7000 box
+-	 *
+-	 */
+-	if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
+-		es7000_plat = ES7000_ZORRO;
+-	else
+-		es7000_plat = ES7000_CLASSIC;
+-	ioapic_renumber_irq = es7000_rename_gsi;
+-
+-	x86_quirks->update_genapic = es7000_update_genapic;
+-}
+-
+-/*
+- * Parse the OEM Table
+- */
+-
+-int __init
+-parse_unisys_oem (char *oemptr)
+-{
+-	int                     i;
+-	int 			success = 0;
+-	unsigned char           type, size;
+-	unsigned long           val;
+-	char                    *tp = NULL;
+-	struct psai             *psaip = NULL;
+-	struct mip_reg_info 	*mi;
+-	struct mip_reg		*host, *mip;
+-
+-	tp = oemptr;
+-
+-	tp += 8;
+-
+-	for (i=0; i <= 6; i++) {
+-		type = *tp++;
+-		size = *tp++;
+-		tp -= 2;
+-		switch (type) {
+-		case MIP_REG:
+-			mi = (struct mip_reg_info *)tp;
+-			val = MIP_RD_LO(mi->host_reg);
+-			host_addr = val;
+-			host = (struct mip_reg *)val;
+-			host_reg = __va(host);
+-			val = MIP_RD_LO(mi->mip_reg);
+-			mip_port = MIP_PORT(mi->mip_info);
+-			mip_addr = val;
+-			mip = (struct mip_reg *)val;
+-			mip_reg = __va(mip);
+-			pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
+-				 (unsigned long)host_reg);
+-			pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
+-				 (unsigned long)mip_reg);
+-			success++;
+-			break;
+-		case MIP_PSAI_REG:
+-			psaip = (struct psai *)tp;
+-			if (tp != NULL) {
+-				if (psaip->addr)
+-					psai = __va(psaip->addr);
+-				else
+-					psai = NULL;
+-				success++;
+-			}
+-			break;
+-		default:
+-			break;
+-		}
+-		tp += size;
+-	}
+-
+-	if (success < 2) {
+-		es7000_plat = NON_UNISYS;
+-	} else
+-		setup_unisys();
+-	return es7000_plat;
+-}
+-
+-#ifdef CONFIG_ACPI
+-static unsigned long oem_addrX;
+-static unsigned long oem_size;
+-int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
+-{
+-	struct acpi_table_header *header = NULL;
+-	int i = 0;
+-
+-	while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
+-		if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
+-			struct oem_table *t = (struct oem_table *)header;
+-
+-			oem_addrX = t->OEMTableAddr;
+-			oem_size = t->OEMTableSize;
+-
+-			*oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
+-								    oem_size);
+-			return 0;
+-		}
+-	}
+-	return -1;
+-}
+-
+-void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
+-{
+-}
+-#endif
+-
+-static void
+-es7000_spin(int n)
+-{
+-	int i = 0;
+-
+-	while (i++ < n)
+-		rep_nop();
+-}
+-
+-static int __init
+-es7000_mip_write(struct mip_reg *mip_reg)
+-{
+-	int			status = 0;
+-	int			spin;
+-
+-	spin = MIP_SPIN;
+-	while (((unsigned long long)host_reg->off_38 &
+-		(unsigned long long)MIP_VALID) != 0) {
+-			if (--spin <= 0) {
+-				printk("es7000_mip_write: Timeout waiting for Host Valid Flag");
+-				return -1;
+-			}
+-		es7000_spin(MIP_SPIN);
+-	}
+-
+-	memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
+-	outb(1, mip_port);
+-
+-	spin = MIP_SPIN;
+-
+-	while (((unsigned long long)mip_reg->off_38 &
+-		(unsigned long long)MIP_VALID) == 0) {
+-		if (--spin <= 0) {
+-			printk("es7000_mip_write: Timeout waiting for MIP Valid Flag");
+-			return -1;
+-		}
+-		es7000_spin(MIP_SPIN);
+-	}
+-
+-	status = ((unsigned long long)mip_reg->off_0 &
+-		(unsigned long long)0xffff0000000000ULL) >> 48;
+-	mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 &
+-		(unsigned long long)~MIP_VALID);
+-	return status;
+-}
+-
+-void __init
+-es7000_sw_apic(void)
+-{
+-	if (es7000_plat) {
+-		int mip_status;
+-		struct mip_reg es7000_mip_reg;
+-
+-		printk("ES7000: Enabling APIC mode.\n");
+-        	memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
+-        	es7000_mip_reg.off_0 = MIP_SW_APIC;
+-        	es7000_mip_reg.off_38 = (MIP_VALID);
+-        	while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
+-              		printk("es7000_sw_apic: command failed, status = %x\n",
+-				mip_status);
+-		return;
+-	}
+-}
+Index: linux-2.6-tip/arch/x86/kernel/ftrace.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/ftrace.c
++++ linux-2.6-tip/arch/x86/kernel/ftrace.c
+@@ -18,6 +18,7 @@
+ #include <linux/init.h>
+ #include <linux/list.h>
+ 
++#include <asm/cacheflush.h>
+ #include <asm/ftrace.h>
+ #include <linux/ftrace.h>
+ #include <asm/nops.h>
+@@ -26,6 +27,18 @@
+ 
+ #ifdef CONFIG_DYNAMIC_FTRACE
+ 
++int ftrace_arch_code_modify_prepare(void)
++{
++	set_kernel_text_rw();
++	return 0;
++}
++
++int ftrace_arch_code_modify_post_process(void)
++{
++	set_kernel_text_ro();
++	return 0;
++}
++
+ union ftrace_code_union {
+ 	char code[MCOUNT_INSN_SIZE];
+ 	struct {
+@@ -66,11 +79,11 @@ static unsigned char *ftrace_call_replac
+  *
+  * 1) Put the instruction pointer into the IP buffer
+  *    and the new code into the "code" buffer.
+- * 2) Set a flag that says we are modifying code
+- * 3) Wait for any running NMIs to finish.
+- * 4) Write the code
+- * 5) clear the flag.
+- * 6) Wait for any running NMIs to finish.
++ * 2) Wait for any running NMIs to finish and set a flag that says
++ *    we are modifying code, it is done in an atomic operation.
++ * 3) Write the code
++ * 4) clear the flag.
++ * 5) Wait for any running NMIs to finish.
+  *
+  * If an NMI is executed, the first thing it does is to call
+  * "ftrace_nmi_enter". This will check if the flag is set to write
+@@ -82,9 +95,9 @@ static unsigned char *ftrace_call_replac
+  * are the same as what exists.
+  */
+ 
+-static atomic_t in_nmi = ATOMIC_INIT(0);
++#define MOD_CODE_WRITE_FLAG (1 << 31)	/* set when NMI should do the write */
++static atomic_t nmi_running = ATOMIC_INIT(0);
+ static int mod_code_status;		/* holds return value of text write */
+-static int mod_code_write;		/* set when NMI should do the write */
+ static void *mod_code_ip;		/* holds the IP to write to */
+ static void *mod_code_newcode;		/* holds the text to write to the IP */
+ 
+@@ -101,6 +114,20 @@ int ftrace_arch_read_dyn_info(char *buf,
+ 	return r;
+ }
+ 
++static void clear_mod_flag(void)
++{
++	int old = atomic_read(&nmi_running);
++
++	for (;;) {
++		int new = old & ~MOD_CODE_WRITE_FLAG;
++
++		if (old == new)
++			break;
++
++		old = atomic_cmpxchg(&nmi_running, old, new);
++	}
++}
++
+ static void ftrace_mod_code(void)
+ {
+ 	/*
+@@ -111,37 +138,52 @@ static void ftrace_mod_code(void)
+ 	 */
+ 	mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode,
+ 					     MCOUNT_INSN_SIZE);
++
++	/* if we fail, then kill any new writers */
++	if (mod_code_status)
++		clear_mod_flag();
+ }
+ 
+ void ftrace_nmi_enter(void)
+ {
+-	atomic_inc(&in_nmi);
+-	/* Must have in_nmi seen before reading write flag */
+-	smp_mb();
+-	if (mod_code_write) {
++	if (atomic_inc_return(&nmi_running) & MOD_CODE_WRITE_FLAG) {
++		smp_rmb();
+ 		ftrace_mod_code();
+ 		atomic_inc(&nmi_update_count);
+ 	}
++	/* Must have previous changes seen before executions */
++	smp_mb();
+ }
+ 
+ void ftrace_nmi_exit(void)
+ {
+-	/* Finish all executions before clearing in_nmi */
+-	smp_wmb();
+-	atomic_dec(&in_nmi);
++	/* Finish all executions before clearing nmi_running */
++	smp_mb();
++	atomic_dec(&nmi_running);
++}
++
++static void wait_for_nmi_and_set_mod_flag(void)
++{
++	if (!atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG))
++		return;
++
++	do {
++		cpu_relax();
++	} while (atomic_cmpxchg(&nmi_running, 0, MOD_CODE_WRITE_FLAG));
++
++	nmi_wait_count++;
+ }
+ 
+ static void wait_for_nmi(void)
+ {
+-	int waited = 0;
++	if (!atomic_read(&nmi_running))
++		return;
+ 
+-	while (atomic_read(&in_nmi)) {
+-		waited = 1;
++	do {
+ 		cpu_relax();
+-	}
++	} while (atomic_read(&nmi_running));
+ 
+-	if (waited)
+-		nmi_wait_count++;
++	nmi_wait_count++;
+ }
+ 
+ static int
+@@ -151,14 +193,9 @@ do_ftrace_mod_code(unsigned long ip, voi
+ 	mod_code_newcode = new_code;
+ 
+ 	/* The buffers need to be visible before we let NMIs write them */
+-	smp_wmb();
+-
+-	mod_code_write = 1;
+-
+-	/* Make sure write bit is visible before we wait on NMIs */
+ 	smp_mb();
+ 
+-	wait_for_nmi();
++	wait_for_nmi_and_set_mod_flag();
+ 
+ 	/* Make sure all running NMIs have finished before we write the code */
+ 	smp_mb();
+@@ -166,13 +203,9 @@ do_ftrace_mod_code(unsigned long ip, voi
+ 	ftrace_mod_code();
+ 
+ 	/* Make sure the write happens before clearing the bit */
+-	smp_wmb();
+-
+-	mod_code_write = 0;
+-
+-	/* make sure NMIs see the cleared bit */
+ 	smp_mb();
+ 
++	clear_mod_flag();
+ 	wait_for_nmi();
+ 
+ 	return mod_code_status;
+@@ -368,100 +401,8 @@ int ftrace_disable_ftrace_graph_caller(v
+ 	return ftrace_mod_jmp(ip, old_offset, new_offset);
+ }
+ 
+-#else /* CONFIG_DYNAMIC_FTRACE */
+-
+-/*
+- * These functions are picked from those used on
+- * this page for dynamic ftrace. They have been
+- * simplified to ignore all traces in NMI context.
+- */
+-static atomic_t in_nmi;
+-
+-void ftrace_nmi_enter(void)
+-{
+-	atomic_inc(&in_nmi);
+-}
+-
+-void ftrace_nmi_exit(void)
+-{
+-	atomic_dec(&in_nmi);
+-}
+-
+ #endif /* !CONFIG_DYNAMIC_FTRACE */
+ 
+-/* Add a function return address to the trace stack on thread info.*/
+-static int push_return_trace(unsigned long ret, unsigned long long time,
+-				unsigned long func, int *depth)
+-{
+-	int index;
+-
+-	if (!current->ret_stack)
+-		return -EBUSY;
+-
+-	/* The return trace stack is full */
+-	if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+-		atomic_inc(&current->trace_overrun);
+-		return -EBUSY;
+-	}
+-
+-	index = ++current->curr_ret_stack;
+-	barrier();
+-	current->ret_stack[index].ret = ret;
+-	current->ret_stack[index].func = func;
+-	current->ret_stack[index].calltime = time;
+-	*depth = index;
+-
+-	return 0;
+-}
+-
+-/* Retrieve a function return address to the trace stack on thread info.*/
+-static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
+-{
+-	int index;
+-
+-	index = current->curr_ret_stack;
+-
+-	if (unlikely(index < 0)) {
+-		ftrace_graph_stop();
+-		WARN_ON(1);
+-		/* Might as well panic, otherwise we have no where to go */
+-		*ret = (unsigned long)panic;
+-		return;
+-	}
+-
+-	*ret = current->ret_stack[index].ret;
+-	trace->func = current->ret_stack[index].func;
+-	trace->calltime = current->ret_stack[index].calltime;
+-	trace->overrun = atomic_read(&current->trace_overrun);
+-	trace->depth = index;
+-	barrier();
+-	current->curr_ret_stack--;
+-
+-}
+-
+-/*
+- * Send the trace to the ring-buffer.
+- * @return the original return address.
+- */
+-unsigned long ftrace_return_to_handler(void)
+-{
+-	struct ftrace_graph_ret trace;
+-	unsigned long ret;
+-
+-	pop_return_trace(&trace, &ret);
+-	trace.rettime = cpu_clock(raw_smp_processor_id());
+-	ftrace_graph_return(&trace);
+-
+-	if (unlikely(!ret)) {
+-		ftrace_graph_stop();
+-		WARN_ON(1);
+-		/* Might as well panic. What else to do? */
+-		ret = (unsigned long)panic;
+-	}
+-
+-	return ret;
+-}
+-
+ /*
+  * Hook the return address and push it in the stack of return addrs
+  * in current thread info.
+@@ -469,14 +410,13 @@ unsigned long ftrace_return_to_handler(v
+ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
+ {
+ 	unsigned long old;
+-	unsigned long long calltime;
+ 	int faulted;
+ 	struct ftrace_graph_ent trace;
+ 	unsigned long return_hooker = (unsigned long)
+ 				&return_to_handler;
+ 
+ 	/* Nmi's are currently unsupported */
+-	if (unlikely(atomic_read(&in_nmi)))
++	if (unlikely(in_nmi()))
+ 		return;
+ 
+ 	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+@@ -512,17 +452,7 @@ void prepare_ftrace_return(unsigned long
+ 		return;
+ 	}
+ 
+-	if (unlikely(!__kernel_text_address(old))) {
+-		ftrace_graph_stop();
+-		*parent = old;
+-		WARN_ON(1);
+-		return;
+-	}
+-
+-	calltime = cpu_clock(raw_smp_processor_id());
+-
+-	if (push_return_trace(old, calltime,
+-				self_addr, &trace.depth) == -EBUSY) {
++	if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) {
+ 		*parent = old;
+ 		return;
+ 	}
+@@ -536,3 +466,66 @@ void prepare_ftrace_return(unsigned long
+ 	}
+ }
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
++
++#ifdef CONFIG_FTRACE_SYSCALLS
++
++extern unsigned long __start_syscalls_metadata[];
++extern unsigned long __stop_syscalls_metadata[];
++extern unsigned long *sys_call_table;
++
++static struct syscall_metadata **syscalls_metadata;
++
++static struct syscall_metadata *find_syscall_meta(unsigned long *syscall)
++{
++	struct syscall_metadata *start;
++	struct syscall_metadata *stop;
++	char str[KSYM_SYMBOL_LEN];
++
++
++	start = (struct syscall_metadata *)__start_syscalls_metadata;
++	stop = (struct syscall_metadata *)__stop_syscalls_metadata;
++	kallsyms_lookup((unsigned long) syscall, NULL, NULL, NULL, str);
++
++	for ( ; start < stop; start++) {
++		if (start->name && !strcmp(start->name, str))
++			return start;
++	}
++	return NULL;
++}
++
++struct syscall_metadata *syscall_nr_to_meta(int nr)
++{
++	if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0)
++		return NULL;
++
++	return syscalls_metadata[nr];
++}
++
++void arch_init_ftrace_syscalls(void)
++{
++	int i;
++	struct syscall_metadata *meta;
++	unsigned long **psys_syscall_table = &sys_call_table;
++	static atomic_t refs;
++
++	if (atomic_inc_return(&refs) != 1)
++		goto end;
++
++	syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) *
++					FTRACE_SYSCALL_MAX, GFP_KERNEL);
++	if (!syscalls_metadata) {
++		WARN_ON(1);
++		return;
++	}
++
++	for (i = 0; i < FTRACE_SYSCALL_MAX; i++) {
++		meta = find_syscall_meta(psys_syscall_table[i]);
++		syscalls_metadata[i] = meta;
++	}
++	return;
++
++	/* Paranoid: avoid overflow */
++end:
++	atomic_dec(&refs);
++}
++#endif
+Index: linux-2.6-tip/arch/x86/kernel/genapic_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/genapic_64.c
++++ /dev/null
+@@ -1,82 +0,0 @@
+-/*
+- * Copyright 2004 James Cleverdon, IBM.
+- * Subject to the GNU Public License, v.2
+- *
+- * Generic APIC sub-arch probe layer.
+- *
+- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+- * James Cleverdon.
+- */
+-#include <linux/threads.h>
+-#include <linux/cpumask.h>
+-#include <linux/string.h>
+-#include <linux/module.h>
+-#include <linux/kernel.h>
+-#include <linux/ctype.h>
+-#include <linux/init.h>
+-#include <linux/hardirq.h>
+-#include <linux/dmar.h>
+-
+-#include <asm/smp.h>
+-#include <asm/ipi.h>
+-#include <asm/genapic.h>
+-#include <asm/setup.h>
+-
+-extern struct genapic apic_flat;
+-extern struct genapic apic_physflat;
+-extern struct genapic apic_x2xpic_uv_x;
+-extern struct genapic apic_x2apic_phys;
+-extern struct genapic apic_x2apic_cluster;
+-
+-struct genapic __read_mostly *genapic = &apic_flat;
+-
+-static struct genapic *apic_probe[] __initdata = {
+-	&apic_x2apic_uv_x,
+-	&apic_x2apic_phys,
+-	&apic_x2apic_cluster,
+-	&apic_physflat,
+-	NULL,
+-};
+-
+-/*
+- * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
+- */
+-void __init setup_apic_routing(void)
+-{
+-	if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
+-		if (!intr_remapping_enabled)
+-			genapic = &apic_flat;
+-	}
+-
+-	if (genapic == &apic_flat) {
+-		if (max_physical_apicid >= 8)
+-			genapic = &apic_physflat;
+-		printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
+-	}
+-
+-	if (x86_quirks->update_genapic)
+-		x86_quirks->update_genapic();
+-}
+-
+-/* Same for both flat and physical. */
+-
+-void apic_send_IPI_self(int vector)
+-{
+-	__send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
+-}
+-
+-int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	int i;
+-
+-	for (i = 0; apic_probe[i]; ++i) {
+-		if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
+-			genapic = apic_probe[i];
+-			printk(KERN_INFO "Setting APIC routing to %s.\n",
+-				genapic->name);
+-			return 1;
+-		}
+-	}
+-	return 0;
+-}
+Index: linux-2.6-tip/arch/x86/kernel/genapic_flat_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/genapic_flat_64.c
++++ /dev/null
+@@ -1,307 +0,0 @@
+-/*
+- * Copyright 2004 James Cleverdon, IBM.
+- * Subject to the GNU Public License, v.2
+- *
+- * Flat APIC subarch code.
+- *
+- * Hacked for x86-64 by James Cleverdon from i386 architecture code by
+- * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
+- * James Cleverdon.
+- */
+-#include <linux/errno.h>
+-#include <linux/threads.h>
+-#include <linux/cpumask.h>
+-#include <linux/string.h>
+-#include <linux/kernel.h>
+-#include <linux/ctype.h>
+-#include <linux/init.h>
+-#include <linux/hardirq.h>
+-#include <asm/smp.h>
+-#include <asm/ipi.h>
+-#include <asm/genapic.h>
+-#include <mach_apicdef.h>
+-
+-#ifdef CONFIG_ACPI
+-#include <acpi/acpi_bus.h>
+-#endif
+-
+-static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	return 1;
+-}
+-
+-static const struct cpumask *flat_target_cpus(void)
+-{
+-	return cpu_online_mask;
+-}
+-
+-static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+-{
+-	/* Careful. Some cpus do not strictly honor the set of cpus
+-	 * specified in the interrupt destination when using lowest
+-	 * priority interrupt delivery mode.
+-	 *
+-	 * In particular there was a hyperthreading cpu observed to
+-	 * deliver interrupts to the wrong hyperthread when only one
+-	 * hyperthread was specified in the interrupt desitination.
+-	 */
+-	cpumask_clear(retmask);
+-	cpumask_bits(retmask)[0] = APIC_ALL_CPUS;
+-}
+-
+-/*
+- * Set up the logical destination ID.
+- *
+- * Intel recommends to set DFR, LDR and TPR before enabling
+- * an APIC.  See e.g. "AP-388 82489DX User's Manual" (Intel
+- * document number 292116).  So here it goes...
+- */
+-static void flat_init_apic_ldr(void)
+-{
+-	unsigned long val;
+-	unsigned long num, id;
+-
+-	num = smp_processor_id();
+-	id = 1UL << num;
+-	apic_write(APIC_DFR, APIC_DFR_FLAT);
+-	val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
+-	val |= SET_APIC_LOGICAL_ID(id);
+-	apic_write(APIC_LDR, val);
+-}
+-
+-static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
+-{
+-	unsigned long flags;
+-
+-	local_irq_save(flags);
+-	__send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL);
+-	local_irq_restore(flags);
+-}
+-
+-static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+-{
+-	unsigned long mask = cpumask_bits(cpumask)[0];
+-
+-	_flat_send_IPI_mask(mask, vector);
+-}
+-
+-static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+-					  int vector)
+-{
+-	unsigned long mask = cpumask_bits(cpumask)[0];
+-	int cpu = smp_processor_id();
+-
+-	if (cpu < BITS_PER_LONG)
+-		clear_bit(cpu, &mask);
+-	_flat_send_IPI_mask(mask, vector);
+-}
+-
+-static void flat_send_IPI_allbutself(int vector)
+-{
+-	int cpu = smp_processor_id();
+-#ifdef	CONFIG_HOTPLUG_CPU
+-	int hotplug = 1;
+-#else
+-	int hotplug = 0;
+-#endif
+-	if (hotplug || vector == NMI_VECTOR) {
+-		if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) {
+-			unsigned long mask = cpumask_bits(cpu_online_mask)[0];
+-
+-			if (cpu < BITS_PER_LONG)
+-				clear_bit(cpu, &mask);
+-
+-			_flat_send_IPI_mask(mask, vector);
+-		}
+-	} else if (num_online_cpus() > 1) {
+-		__send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
+-	}
+-}
+-
+-static void flat_send_IPI_all(int vector)
+-{
+-	if (vector == NMI_VECTOR)
+-		flat_send_IPI_mask(cpu_online_mask, vector);
+-	else
+-		__send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
+-}
+-
+-static unsigned int get_apic_id(unsigned long x)
+-{
+-	unsigned int id;
+-
+-	id = (((x)>>24) & 0xFFu);
+-	return id;
+-}
+-
+-static unsigned long set_apic_id(unsigned int id)
+-{
+-	unsigned long x;
+-
+-	x = ((id & 0xFFu)<<24);
+-	return x;
+-}
+-
+-static unsigned int read_xapic_id(void)
+-{
+-	unsigned int id;
+-
+-	id = get_apic_id(apic_read(APIC_ID));
+-	return id;
+-}
+-
+-static int flat_apic_id_registered(void)
+-{
+-	return physid_isset(read_xapic_id(), phys_cpu_present_map);
+-}
+-
+-static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+-{
+-	return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+-}
+-
+-static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+-						const struct cpumask *andmask)
+-{
+-	unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS;
+-	unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS;
+-
+-	return mask1 & mask2;
+-}
+-
+-static unsigned int phys_pkg_id(int index_msb)
+-{
+-	return hard_smp_processor_id() >> index_msb;
+-}
+-
+-struct genapic apic_flat =  {
+-	.name = "flat",
+-	.acpi_madt_oem_check = flat_acpi_madt_oem_check,
+-	.int_delivery_mode = dest_LowestPrio,
+-	.int_dest_mode = (APIC_DEST_LOGICAL != 0),
+-	.target_cpus = flat_target_cpus,
+-	.vector_allocation_domain = flat_vector_allocation_domain,
+-	.apic_id_registered = flat_apic_id_registered,
+-	.init_apic_ldr = flat_init_apic_ldr,
+-	.send_IPI_all = flat_send_IPI_all,
+-	.send_IPI_allbutself = flat_send_IPI_allbutself,
+-	.send_IPI_mask = flat_send_IPI_mask,
+-	.send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
+-	.send_IPI_self = apic_send_IPI_self,
+-	.cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
+-	.cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
+-	.phys_pkg_id = phys_pkg_id,
+-	.get_apic_id = get_apic_id,
+-	.set_apic_id = set_apic_id,
+-	.apic_id_mask = (0xFFu<<24),
+-};
+-
+-/*
+- * Physflat mode is used when there are more than 8 CPUs on a AMD system.
+- * We cannot use logical delivery in this case because the mask
+- * overflows, so use physical mode.
+- */
+-static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-#ifdef CONFIG_ACPI
+-	/*
+-	 * Quirk: some x86_64 machines can only use physical APIC mode
+-	 * regardless of how many processors are present (x86_64 ES7000
+-	 * is an example).
+-	 */
+-	if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
+-		(acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) {
+-		printk(KERN_DEBUG "system APIC only can use physical flat");
+-		return 1;
+-	}
+-#endif
+-
+-	return 0;
+-}
+-
+-static const struct cpumask *physflat_target_cpus(void)
+-{
+-	return cpu_online_mask;
+-}
+-
+-static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
+-{
+-	cpumask_clear(retmask);
+-	cpumask_set_cpu(cpu, retmask);
+-}
+-
+-static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
+-{
+-	send_IPI_mask_sequence(cpumask, vector);
+-}
+-
+-static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
+-					      int vector)
+-{
+-	send_IPI_mask_allbutself(cpumask, vector);
+-}
+-
+-static void physflat_send_IPI_allbutself(int vector)
+-{
+-	send_IPI_mask_allbutself(cpu_online_mask, vector);
+-}
+-
+-static void physflat_send_IPI_all(int vector)
+-{
+-	physflat_send_IPI_mask(cpu_online_mask, vector);
+-}
+-
+-static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask)
+-{
+-	int cpu;
+-
+-	/*
+-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+-	 * May as well be the first.
+-	 */
+-	cpu = cpumask_first(cpumask);
+-	if ((unsigned)cpu < nr_cpu_ids)
+-		return per_cpu(x86_cpu_to_apicid, cpu);
+-	else
+-		return BAD_APICID;
+-}
+-
+-static unsigned int
+-physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+-				const struct cpumask *andmask)
+-{
+-	int cpu;
+-
+-	/*
+-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+-	 * May as well be the first.
+-	 */
+-	for_each_cpu_and(cpu, cpumask, andmask)
+-		if (cpumask_test_cpu(cpu, cpu_online_mask))
+-			break;
+-	if (cpu < nr_cpu_ids)
+-		return per_cpu(x86_cpu_to_apicid, cpu);
+-	return BAD_APICID;
+-}
+-
+-struct genapic apic_physflat =  {
+-	.name = "physical flat",
+-	.acpi_madt_oem_check = physflat_acpi_madt_oem_check,
+-	.int_delivery_mode = dest_Fixed,
+-	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+-	.target_cpus = physflat_target_cpus,
+-	.vector_allocation_domain = physflat_vector_allocation_domain,
+-	.apic_id_registered = flat_apic_id_registered,
+-	.init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
+-	.send_IPI_all = physflat_send_IPI_all,
+-	.send_IPI_allbutself = physflat_send_IPI_allbutself,
+-	.send_IPI_mask = physflat_send_IPI_mask,
+-	.send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
+-	.send_IPI_self = apic_send_IPI_self,
+-	.cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+-	.cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
+-	.phys_pkg_id = phys_pkg_id,
+-	.get_apic_id = get_apic_id,
+-	.set_apic_id = set_apic_id,
+-	.apic_id_mask = (0xFFu<<24),
+-};
+Index: linux-2.6-tip/arch/x86/kernel/genx2apic_cluster.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/genx2apic_cluster.c
++++ /dev/null
+@@ -1,198 +0,0 @@
+-#include <linux/threads.h>
+-#include <linux/cpumask.h>
+-#include <linux/string.h>
+-#include <linux/kernel.h>
+-#include <linux/ctype.h>
+-#include <linux/init.h>
+-#include <linux/dmar.h>
+-
+-#include <asm/smp.h>
+-#include <asm/ipi.h>
+-#include <asm/genapic.h>
+-
+-DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
+-
+-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	if (cpu_has_x2apic)
+-		return 1;
+-
+-	return 0;
+-}
+-
+-/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+-
+-static const struct cpumask *x2apic_target_cpus(void)
+-{
+-	return cpumask_of(0);
+-}
+-
+-/*
+- * for now each logical cpu is in its own vector allocation domain.
+- */
+-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
+-{
+-	cpumask_clear(retmask);
+-	cpumask_set_cpu(cpu, retmask);
+-}
+-
+-static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+-				   unsigned int dest)
+-{
+-	unsigned long cfg;
+-
+-	cfg = __prepare_ICR(0, vector, dest);
+-
+-	/*
+-	 * send the IPI.
+-	 */
+-	x2apic_icr_write(cfg, apicid);
+-}
+-
+-/*
+- * for now, we send the IPI's one by one in the cpumask.
+- * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
+- * at once. We have 16 cpu's in a cluster. This will minimize IPI register
+- * writes.
+- */
+-static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+-{
+-	unsigned long flags;
+-	unsigned long query_cpu;
+-
+-	local_irq_save(flags);
+-	for_each_cpu(query_cpu, mask)
+-		__x2apic_send_IPI_dest(
+-			per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+-			vector, APIC_DEST_LOGICAL);
+-	local_irq_restore(flags);
+-}
+-
+-static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+-					    int vector)
+-{
+-	unsigned long flags;
+-	unsigned long query_cpu;
+-	unsigned long this_cpu = smp_processor_id();
+-
+-	local_irq_save(flags);
+-	for_each_cpu(query_cpu, mask)
+-		if (query_cpu != this_cpu)
+-			__x2apic_send_IPI_dest(
+-				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+-				vector, APIC_DEST_LOGICAL);
+-	local_irq_restore(flags);
+-}
+-
+-static void x2apic_send_IPI_allbutself(int vector)
+-{
+-	unsigned long flags;
+-	unsigned long query_cpu;
+-	unsigned long this_cpu = smp_processor_id();
+-
+-	local_irq_save(flags);
+-	for_each_online_cpu(query_cpu)
+-		if (query_cpu != this_cpu)
+-			__x2apic_send_IPI_dest(
+-				per_cpu(x86_cpu_to_logical_apicid, query_cpu),
+-				vector, APIC_DEST_LOGICAL);
+-	local_irq_restore(flags);
+-}
+-
+-static void x2apic_send_IPI_all(int vector)
+-{
+-	x2apic_send_IPI_mask(cpu_online_mask, vector);
+-}
+-
+-static int x2apic_apic_id_registered(void)
+-{
+-	return 1;
+-}
+-
+-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+-{
+-	int cpu;
+-
+-	/*
+-	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
+-	 * May as well be the first.
+-	 */
+-	cpu = cpumask_first(cpumask);
+-	if ((unsigned)cpu < nr_cpu_ids)
+-		return per_cpu(x86_cpu_to_logical_apicid, cpu);
+-	else
+-		return BAD_APICID;
+-}
+-
+-static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+-						  const struct cpumask *andmask)
+-{
+-	int cpu;
+-
+-	/*
+-	 * We're using fixed IRQ delivery, can only return one logical APIC ID.
+-	 * May as well be the first.
+-	 */
+-	for_each_cpu_and(cpu, cpumask, andmask)
+-		if (cpumask_test_cpu(cpu, cpu_online_mask))
+-			break;
+-	if (cpu < nr_cpu_ids)
+-		return per_cpu(x86_cpu_to_logical_apicid, cpu);
+-	return BAD_APICID;
+-}
+-
+-static unsigned int get_apic_id(unsigned long x)
+-{
+-	unsigned int id;
+-
+-	id = x;
+-	return id;
+-}
+-
+-static unsigned long set_apic_id(unsigned int id)
+-{
+-	unsigned long x;
+-
+-	x = id;
+-	return x;
+-}
+-
+-static unsigned int phys_pkg_id(int index_msb)
+-{
+-	return current_cpu_data.initial_apicid >> index_msb;
+-}
+-
+-static void x2apic_send_IPI_self(int vector)
+-{
+-	apic_write(APIC_SELF_IPI, vector);
+-}
+-
+-static void init_x2apic_ldr(void)
+-{
+-	int cpu = smp_processor_id();
+-
+-	per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
+-	return;
+-}
+-
+-struct genapic apic_x2apic_cluster = {
+-	.name = "cluster x2apic",
+-	.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+-	.int_delivery_mode = dest_LowestPrio,
+-	.int_dest_mode = (APIC_DEST_LOGICAL != 0),
+-	.target_cpus = x2apic_target_cpus,
+-	.vector_allocation_domain = x2apic_vector_allocation_domain,
+-	.apic_id_registered = x2apic_apic_id_registered,
+-	.init_apic_ldr = init_x2apic_ldr,
+-	.send_IPI_all = x2apic_send_IPI_all,
+-	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
+-	.send_IPI_mask = x2apic_send_IPI_mask,
+-	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
+-	.send_IPI_self = x2apic_send_IPI_self,
+-	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+-	.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+-	.phys_pkg_id = phys_pkg_id,
+-	.get_apic_id = get_apic_id,
+-	.set_apic_id = set_apic_id,
+-	.apic_id_mask = (0xFFFFFFFFu),
+-};
+Index: linux-2.6-tip/arch/x86/kernel/genx2apic_phys.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/genx2apic_phys.c
++++ /dev/null
+@@ -1,194 +0,0 @@
+-#include <linux/threads.h>
+-#include <linux/cpumask.h>
+-#include <linux/string.h>
+-#include <linux/kernel.h>
+-#include <linux/ctype.h>
+-#include <linux/init.h>
+-#include <linux/dmar.h>
+-
+-#include <asm/smp.h>
+-#include <asm/ipi.h>
+-#include <asm/genapic.h>
+-
+-static int x2apic_phys;
+-
+-static int set_x2apic_phys_mode(char *arg)
+-{
+-	x2apic_phys = 1;
+-	return 0;
+-}
+-early_param("x2apic_phys", set_x2apic_phys_mode);
+-
+-static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	if (cpu_has_x2apic && x2apic_phys)
+-		return 1;
+-
+-	return 0;
+-}
+-
+-/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+-
+-static const struct cpumask *x2apic_target_cpus(void)
+-{
+-	return cpumask_of(0);
+-}
+-
+-static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
+-{
+-	cpumask_clear(retmask);
+-	cpumask_set_cpu(cpu, retmask);
+-}
+-
+-static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
+-				   unsigned int dest)
+-{
+-	unsigned long cfg;
+-
+-	cfg = __prepare_ICR(0, vector, dest);
+-
+-	/*
+-	 * send the IPI.
+-	 */
+-	x2apic_icr_write(cfg, apicid);
+-}
+-
+-static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
+-{
+-	unsigned long flags;
+-	unsigned long query_cpu;
+-
+-	local_irq_save(flags);
+-	for_each_cpu(query_cpu, mask) {
+-		__x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
+-				       vector, APIC_DEST_PHYSICAL);
+-	}
+-	local_irq_restore(flags);
+-}
+-
+-static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
+-					    int vector)
+-{
+-	unsigned long flags;
+-	unsigned long query_cpu;
+-	unsigned long this_cpu = smp_processor_id();
+-
+-	local_irq_save(flags);
+-	for_each_cpu(query_cpu, mask) {
+-		if (query_cpu != this_cpu)
+-			__x2apic_send_IPI_dest(
+-				per_cpu(x86_cpu_to_apicid, query_cpu),
+-				vector, APIC_DEST_PHYSICAL);
+-	}
+-	local_irq_restore(flags);
+-}
+-
+-static void x2apic_send_IPI_allbutself(int vector)
+-{
+-	unsigned long flags;
+-	unsigned long query_cpu;
+-	unsigned long this_cpu = smp_processor_id();
+-
+-	local_irq_save(flags);
+-	for_each_online_cpu(query_cpu)
+-		if (query_cpu != this_cpu)
+-			__x2apic_send_IPI_dest(
+-				per_cpu(x86_cpu_to_apicid, query_cpu),
+-				vector, APIC_DEST_PHYSICAL);
+-	local_irq_restore(flags);
+-}
+-
+-static void x2apic_send_IPI_all(int vector)
+-{
+-	x2apic_send_IPI_mask(cpu_online_mask, vector);
+-}
+-
+-static int x2apic_apic_id_registered(void)
+-{
+-	return 1;
+-}
+-
+-static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
+-{
+-	int cpu;
+-
+-	/*
+-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+-	 * May as well be the first.
+-	 */
+-	cpu = cpumask_first(cpumask);
+-	if ((unsigned)cpu < nr_cpu_ids)
+-		return per_cpu(x86_cpu_to_apicid, cpu);
+-	else
+-		return BAD_APICID;
+-}
+-
+-static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+-						  const struct cpumask *andmask)
+-{
+-	int cpu;
+-
+-	/*
+-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+-	 * May as well be the first.
+-	 */
+-	for_each_cpu_and(cpu, cpumask, andmask)
+-		if (cpumask_test_cpu(cpu, cpu_online_mask))
+-			break;
+-	if (cpu < nr_cpu_ids)
+-		return per_cpu(x86_cpu_to_apicid, cpu);
+-	return BAD_APICID;
+-}
+-
+-static unsigned int get_apic_id(unsigned long x)
+-{
+-	unsigned int id;
+-
+-	id = x;
+-	return id;
+-}
+-
+-static unsigned long set_apic_id(unsigned int id)
+-{
+-	unsigned long x;
+-
+-	x = id;
+-	return x;
+-}
+-
+-static unsigned int phys_pkg_id(int index_msb)
+-{
+-	return current_cpu_data.initial_apicid >> index_msb;
+-}
+-
+-static void x2apic_send_IPI_self(int vector)
+-{
+-	apic_write(APIC_SELF_IPI, vector);
+-}
+-
+-static void init_x2apic_ldr(void)
+-{
+-	return;
+-}
+-
+-struct genapic apic_x2apic_phys = {
+-	.name = "physical x2apic",
+-	.acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
+-	.int_delivery_mode = dest_Fixed,
+-	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+-	.target_cpus = x2apic_target_cpus,
+-	.vector_allocation_domain = x2apic_vector_allocation_domain,
+-	.apic_id_registered = x2apic_apic_id_registered,
+-	.init_apic_ldr = init_x2apic_ldr,
+-	.send_IPI_all = x2apic_send_IPI_all,
+-	.send_IPI_allbutself = x2apic_send_IPI_allbutself,
+-	.send_IPI_mask = x2apic_send_IPI_mask,
+-	.send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
+-	.send_IPI_self = x2apic_send_IPI_self,
+-	.cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
+-	.cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
+-	.phys_pkg_id = phys_pkg_id,
+-	.get_apic_id = get_apic_id,
+-	.set_apic_id = set_apic_id,
+-	.apic_id_mask = (0xFFFFFFFFu),
+-};
+Index: linux-2.6-tip/arch/x86/kernel/genx2apic_uv_x.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/genx2apic_uv_x.c
++++ /dev/null
+@@ -1,600 +0,0 @@
+-/*
+- * This file is subject to the terms and conditions of the GNU General Public
+- * License.  See the file "COPYING" in the main directory of this archive
+- * for more details.
+- *
+- * SGI UV APIC functions (note: not an Intel compatible APIC)
+- *
+- * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
+- */
+-
+-#include <linux/kernel.h>
+-#include <linux/threads.h>
+-#include <linux/cpu.h>
+-#include <linux/cpumask.h>
+-#include <linux/string.h>
+-#include <linux/ctype.h>
+-#include <linux/init.h>
+-#include <linux/sched.h>
+-#include <linux/module.h>
+-#include <linux/hardirq.h>
+-#include <linux/timer.h>
+-#include <linux/proc_fs.h>
+-#include <asm/current.h>
+-#include <asm/smp.h>
+-#include <asm/ipi.h>
+-#include <asm/genapic.h>
+-#include <asm/pgtable.h>
+-#include <asm/uv/uv_mmrs.h>
+-#include <asm/uv/uv_hub.h>
+-#include <asm/uv/bios.h>
+-
+-DEFINE_PER_CPU(int, x2apic_extra_bits);
+-
+-static enum uv_system_type uv_system_type;
+-
+-static int uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	if (!strcmp(oem_id, "SGI")) {
+-		if (!strcmp(oem_table_id, "UVL"))
+-			uv_system_type = UV_LEGACY_APIC;
+-		else if (!strcmp(oem_table_id, "UVX"))
+-			uv_system_type = UV_X2APIC;
+-		else if (!strcmp(oem_table_id, "UVH")) {
+-			uv_system_type = UV_NON_UNIQUE_APIC;
+-			return 1;
+-		}
+-	}
+-	return 0;
+-}
+-
+-enum uv_system_type get_uv_system_type(void)
+-{
+-	return uv_system_type;
+-}
+-
+-int is_uv_system(void)
+-{
+-	return uv_system_type != UV_NONE;
+-}
+-EXPORT_SYMBOL_GPL(is_uv_system);
+-
+-DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
+-EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
+-
+-struct uv_blade_info *uv_blade_info;
+-EXPORT_SYMBOL_GPL(uv_blade_info);
+-
+-short *uv_node_to_blade;
+-EXPORT_SYMBOL_GPL(uv_node_to_blade);
+-
+-short *uv_cpu_to_blade;
+-EXPORT_SYMBOL_GPL(uv_cpu_to_blade);
+-
+-short uv_possible_blades;
+-EXPORT_SYMBOL_GPL(uv_possible_blades);
+-
+-unsigned long sn_rtc_cycles_per_second;
+-EXPORT_SYMBOL(sn_rtc_cycles_per_second);
+-
+-/* Start with all IRQs pointing to boot CPU.  IRQ balancing will shift them. */
+-
+-static const struct cpumask *uv_target_cpus(void)
+-{
+-	return cpumask_of(0);
+-}
+-
+-static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask)
+-{
+-	cpumask_clear(retmask);
+-	cpumask_set_cpu(cpu, retmask);
+-}
+-
+-int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
+-{
+-	unsigned long val;
+-	int pnode;
+-
+-	pnode = uv_apicid_to_pnode(phys_apicid);
+-	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+-	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+-	    (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+-	    APIC_DM_INIT;
+-	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+-	mdelay(10);
+-
+-	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
+-	    (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) |
+-	    (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) |
+-	    APIC_DM_STARTUP;
+-	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+-	return 0;
+-}
+-
+-static void uv_send_IPI_one(int cpu, int vector)
+-{
+-	unsigned long val, apicid, lapicid;
+-	int pnode;
+-
+-	apicid = per_cpu(x86_cpu_to_apicid, cpu);
+-	lapicid = apicid & 0x3f;		/* ZZZ macro needed */
+-	pnode = uv_apicid_to_pnode(apicid);
+-	val =
+-	    (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid <<
+-					      UVH_IPI_INT_APIC_ID_SHFT) |
+-	    (vector << UVH_IPI_INT_VECTOR_SHFT);
+-	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
+-}
+-
+-static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
+-{
+-	unsigned int cpu;
+-
+-	for_each_cpu(cpu, mask)
+-		uv_send_IPI_one(cpu, vector);
+-}
+-
+-static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+-{
+-	unsigned int cpu;
+-	unsigned int this_cpu = smp_processor_id();
+-
+-	for_each_cpu(cpu, mask)
+-		if (cpu != this_cpu)
+-			uv_send_IPI_one(cpu, vector);
+-}
+-
+-static void uv_send_IPI_allbutself(int vector)
+-{
+-	unsigned int cpu;
+-	unsigned int this_cpu = smp_processor_id();
+-
+-	for_each_online_cpu(cpu)
+-		if (cpu != this_cpu)
+-			uv_send_IPI_one(cpu, vector);
+-}
+-
+-static void uv_send_IPI_all(int vector)
+-{
+-	uv_send_IPI_mask(cpu_online_mask, vector);
+-}
+-
+-static int uv_apic_id_registered(void)
+-{
+-	return 1;
+-}
+-
+-static void uv_init_apic_ldr(void)
+-{
+-}
+-
+-static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
+-{
+-	int cpu;
+-
+-	/*
+-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+-	 * May as well be the first.
+-	 */
+-	cpu = cpumask_first(cpumask);
+-	if ((unsigned)cpu < nr_cpu_ids)
+-		return per_cpu(x86_cpu_to_apicid, cpu);
+-	else
+-		return BAD_APICID;
+-}
+-
+-static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
+-					      const struct cpumask *andmask)
+-{
+-	int cpu;
+-
+-	/*
+-	 * We're using fixed IRQ delivery, can only return one phys APIC ID.
+-	 * May as well be the first.
+-	 */
+-	for_each_cpu_and(cpu, cpumask, andmask)
+-		if (cpumask_test_cpu(cpu, cpu_online_mask))
+-			break;
+-	if (cpu < nr_cpu_ids)
+-		return per_cpu(x86_cpu_to_apicid, cpu);
+-	return BAD_APICID;
+-}
+-
+-static unsigned int get_apic_id(unsigned long x)
+-{
+-	unsigned int id;
+-
+-	WARN_ON(preemptible() && num_online_cpus() > 1);
+-	id = x | __get_cpu_var(x2apic_extra_bits);
+-
+-	return id;
+-}
+-
+-static unsigned long set_apic_id(unsigned int id)
+-{
+-	unsigned long x;
+-
+-	/* maskout x2apic_extra_bits ? */
+-	x = id;
+-	return x;
+-}
+-
+-static unsigned int uv_read_apic_id(void)
+-{
+-
+-	return get_apic_id(apic_read(APIC_ID));
+-}
+-
+-static unsigned int phys_pkg_id(int index_msb)
+-{
+-	return uv_read_apic_id() >> index_msb;
+-}
+-
+-static void uv_send_IPI_self(int vector)
+-{
+-	apic_write(APIC_SELF_IPI, vector);
+-}
+-
+-struct genapic apic_x2apic_uv_x = {
+-	.name = "UV large system",
+-	.acpi_madt_oem_check = uv_acpi_madt_oem_check,
+-	.int_delivery_mode = dest_Fixed,
+-	.int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+-	.target_cpus = uv_target_cpus,
+-	.vector_allocation_domain = uv_vector_allocation_domain,
+-	.apic_id_registered = uv_apic_id_registered,
+-	.init_apic_ldr = uv_init_apic_ldr,
+-	.send_IPI_all = uv_send_IPI_all,
+-	.send_IPI_allbutself = uv_send_IPI_allbutself,
+-	.send_IPI_mask = uv_send_IPI_mask,
+-	.send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
+-	.send_IPI_self = uv_send_IPI_self,
+-	.cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
+-	.cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
+-	.phys_pkg_id = phys_pkg_id,
+-	.get_apic_id = get_apic_id,
+-	.set_apic_id = set_apic_id,
+-	.apic_id_mask = (0xFFFFFFFFu),
+-};
+-
+-static __cpuinit void set_x2apic_extra_bits(int pnode)
+-{
+-	__get_cpu_var(x2apic_extra_bits) = (pnode << 6);
+-}
+-
+-/*
+- * Called on boot cpu.
+- */
+-static __init int boot_pnode_to_blade(int pnode)
+-{
+-	int blade;
+-
+-	for (blade = 0; blade < uv_num_possible_blades(); blade++)
+-		if (pnode == uv_blade_info[blade].pnode)
+-			return blade;
+-	BUG();
+-}
+-
+-struct redir_addr {
+-	unsigned long redirect;
+-	unsigned long alias;
+-};
+-
+-#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT
+-
+-static __initdata struct redir_addr redir_addrs[] = {
+-	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG},
+-	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG},
+-	{UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG},
+-};
+-
+-static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
+-{
+-	union uvh_si_alias0_overlay_config_u alias;
+-	union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect;
+-	int i;
+-
+-	for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) {
+-		alias.v = uv_read_local_mmr(redir_addrs[i].alias);
+-		if (alias.s.base == 0) {
+-			*size = (1UL << alias.s.m_alias);
+-			redirect.v = uv_read_local_mmr(redir_addrs[i].redirect);
+-			*base = (unsigned long)redirect.s.dest_base << DEST_SHIFT;
+-			return;
+-		}
+-	}
+-	BUG();
+-}
+-
+-static __init void map_low_mmrs(void)
+-{
+-	init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
+-	init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
+-}
+-
+-enum map_type {map_wb, map_uc};
+-
+-static __init void map_high(char *id, unsigned long base, int shift,
+-			    int max_pnode, enum map_type map_type)
+-{
+-	unsigned long bytes, paddr;
+-
+-	paddr = base << shift;
+-	bytes = (1UL << shift) * (max_pnode + 1);
+-	printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
+-	       					paddr + bytes);
+-	if (map_type == map_uc)
+-		init_extra_mapping_uc(paddr, bytes);
+-	else
+-		init_extra_mapping_wb(paddr, bytes);
+-
+-}
+-static __init void map_gru_high(int max_pnode)
+-{
+-	union uvh_rh_gam_gru_overlay_config_mmr_u gru;
+-	int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
+-
+-	gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
+-	if (gru.s.enable)
+-		map_high("GRU", gru.s.base, shift, max_pnode, map_wb);
+-}
+-
+-static __init void map_config_high(int max_pnode)
+-{
+-	union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
+-	int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
+-
+-	cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
+-	if (cfg.s.enable)
+-		map_high("CONFIG", cfg.s.base, shift, max_pnode, map_uc);
+-}
+-
+-static __init void map_mmr_high(int max_pnode)
+-{
+-	union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
+-	int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
+-
+-	mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
+-	if (mmr.s.enable)
+-		map_high("MMR", mmr.s.base, shift, max_pnode, map_uc);
+-}
+-
+-static __init void map_mmioh_high(int max_pnode)
+-{
+-	union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
+-	int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
+-
+-	mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
+-	if (mmioh.s.enable)
+-		map_high("MMIOH", mmioh.s.base, shift, max_pnode, map_uc);
+-}
+-
+-static __init void uv_rtc_init(void)
+-{
+-	long status;
+-	u64 ticks_per_sec;
+-
+-	status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK,
+-					&ticks_per_sec);
+-	if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) {
+-		printk(KERN_WARNING
+-			"unable to determine platform RTC clock frequency, "
+-			"guessing.\n");
+-		/* BIOS gives wrong value for clock freq. so guess */
+-		sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
+-	} else
+-		sn_rtc_cycles_per_second = ticks_per_sec;
+-}
+-
+-/*
+- * percpu heartbeat timer
+- */
+-static void uv_heartbeat(unsigned long ignored)
+-{
+-	struct timer_list *timer = &uv_hub_info->scir.timer;
+-	unsigned char bits = uv_hub_info->scir.state;
+-
+-	/* flip heartbeat bit */
+-	bits ^= SCIR_CPU_HEARTBEAT;
+-
+-	/* is this cpu idle? */
+-	if (idle_cpu(raw_smp_processor_id()))
+-		bits &= ~SCIR_CPU_ACTIVITY;
+-	else
+-		bits |= SCIR_CPU_ACTIVITY;
+-
+-	/* update system controller interface reg */
+-	uv_set_scir_bits(bits);
+-
+-	/* enable next timer period */
+-	mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+-}
+-
+-static void __cpuinit uv_heartbeat_enable(int cpu)
+-{
+-	if (!uv_cpu_hub_info(cpu)->scir.enabled) {
+-		struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer;
+-
+-		uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
+-		setup_timer(timer, uv_heartbeat, cpu);
+-		timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
+-		add_timer_on(timer, cpu);
+-		uv_cpu_hub_info(cpu)->scir.enabled = 1;
+-	}
+-
+-	/* check boot cpu */
+-	if (!uv_cpu_hub_info(0)->scir.enabled)
+-		uv_heartbeat_enable(0);
+-}
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-static void __cpuinit uv_heartbeat_disable(int cpu)
+-{
+-	if (uv_cpu_hub_info(cpu)->scir.enabled) {
+-		uv_cpu_hub_info(cpu)->scir.enabled = 0;
+-		del_timer(&uv_cpu_hub_info(cpu)->scir.timer);
+-	}
+-	uv_set_cpu_scir_bits(cpu, 0xff);
+-}
+-
+-/*
+- * cpu hotplug notifier
+- */
+-static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self,
+-				       unsigned long action, void *hcpu)
+-{
+-	long cpu = (long)hcpu;
+-
+-	switch (action) {
+-	case CPU_ONLINE:
+-		uv_heartbeat_enable(cpu);
+-		break;
+-	case CPU_DOWN_PREPARE:
+-		uv_heartbeat_disable(cpu);
+-		break;
+-	default:
+-		break;
+-	}
+-	return NOTIFY_OK;
+-}
+-
+-static __init void uv_scir_register_cpu_notifier(void)
+-{
+-	hotcpu_notifier(uv_scir_cpu_notify, 0);
+-}
+-
+-#else /* !CONFIG_HOTPLUG_CPU */
+-
+-static __init void uv_scir_register_cpu_notifier(void)
+-{
+-}
+-
+-static __init int uv_init_heartbeat(void)
+-{
+-	int cpu;
+-
+-	if (is_uv_system())
+-		for_each_online_cpu(cpu)
+-			uv_heartbeat_enable(cpu);
+-	return 0;
+-}
+-
+-late_initcall(uv_init_heartbeat);
+-
+-#endif /* !CONFIG_HOTPLUG_CPU */
+-
+-/*
+- * Called on each cpu to initialize the per_cpu UV data area.
+- * 	ZZZ hotplug not supported yet
+- */
+-void __cpuinit uv_cpu_init(void)
+-{
+-	/* CPU 0 initilization will be done via uv_system_init. */
+-	if (!uv_blade_info)
+-		return;
+-
+-	uv_blade_info[uv_numa_blade_id()].nr_online_cpus++;
+-
+-	if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
+-		set_x2apic_extra_bits(uv_hub_info->pnode);
+-}
+-
+-
+-void __init uv_system_init(void)
+-{
+-	union uvh_si_addr_map_config_u m_n_config;
+-	union uvh_node_id_u node_id;
+-	unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
+-	int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
+-	int max_pnode = 0;
+-	unsigned long mmr_base, present;
+-
+-	map_low_mmrs();
+-
+-	m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
+-	m_val = m_n_config.s.m_skt;
+-	n_val = m_n_config.s.n_skt;
+-	mmr_base =
+-	    uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) &
+-	    ~UV_MMR_ENABLE;
+-	printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base);
+-
+-	for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++)
+-		uv_possible_blades +=
+-		  hweight64(uv_read_local_mmr( UVH_NODE_PRESENT_TABLE + i * 8));
+-	printk(KERN_DEBUG "UV: Found %d blades\n", uv_num_possible_blades());
+-
+-	bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades();
+-	uv_blade_info = kmalloc(bytes, GFP_KERNEL);
+-
+-	get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size);
+-
+-	bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes();
+-	uv_node_to_blade = kmalloc(bytes, GFP_KERNEL);
+-	memset(uv_node_to_blade, 255, bytes);
+-
+-	bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus();
+-	uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL);
+-	memset(uv_cpu_to_blade, 255, bytes);
+-
+-	blade = 0;
+-	for (i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) {
+-		present = uv_read_local_mmr(UVH_NODE_PRESENT_TABLE + i * 8);
+-		for (j = 0; j < 64; j++) {
+-			if (!test_bit(j, &present))
+-				continue;
+-			uv_blade_info[blade].pnode = (i * 64 + j);
+-			uv_blade_info[blade].nr_possible_cpus = 0;
+-			uv_blade_info[blade].nr_online_cpus = 0;
+-			blade++;
+-		}
+-	}
+-
+-	node_id.v = uv_read_local_mmr(UVH_NODE_ID);
+-	gnode_upper = (((unsigned long)node_id.s.node_id) &
+-		       ~((1 << n_val) - 1)) << m_val;
+-
+-	uv_bios_init();
+-	uv_bios_get_sn_info(0, &uv_type, &sn_partition_id,
+-			    &sn_coherency_id, &sn_region_size);
+-	uv_rtc_init();
+-
+-	for_each_present_cpu(cpu) {
+-		nid = cpu_to_node(cpu);
+-		pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu));
+-		blade = boot_pnode_to_blade(pnode);
+-		lcpu = uv_blade_info[blade].nr_possible_cpus;
+-		uv_blade_info[blade].nr_possible_cpus++;
+-
+-		uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base;
+-		uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size;
+-		uv_cpu_hub_info(cpu)->m_val = m_val;
+-		uv_cpu_hub_info(cpu)->n_val = m_val;
+-		uv_cpu_hub_info(cpu)->numa_blade_id = blade;
+-		uv_cpu_hub_info(cpu)->blade_processor_id = lcpu;
+-		uv_cpu_hub_info(cpu)->pnode = pnode;
+-		uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) - 1;
+-		uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1;
+-		uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper;
+-		uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base;
+-		uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id;
+-		uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu;
+-		uv_node_to_blade[nid] = blade;
+-		uv_cpu_to_blade[cpu] = blade;
+-		max_pnode = max(pnode, max_pnode);
+-
+-		printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, "
+-			"lcpu %d, blade %d\n",
+-			cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
+-			lcpu, blade);
+-	}
+-
+-	map_gru_high(max_pnode);
+-	map_mmr_high(max_pnode);
+-	map_config_high(max_pnode);
+-	map_mmioh_high(max_pnode);
+-
+-	uv_cpu_init();
+-	uv_scir_register_cpu_notifier();
+-	proc_mkdir("sgi_uv", NULL);
+-}
+Index: linux-2.6-tip/arch/x86/kernel/head32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/head32.c
++++ linux-2.6-tip/arch/x86/kernel/head32.c
+@@ -18,7 +18,7 @@ void __init i386_start_kernel(void)
+ {
+ 	reserve_trampoline_memory();
+ 
+-	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
++	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+ 
+ #ifdef CONFIG_BLK_DEV_INITRD
+ 	/* Reserve INITRD */
+@@ -29,9 +29,6 @@ void __init i386_start_kernel(void)
+ 		reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
+ 	}
+ #endif
+-	reserve_early(init_pg_tables_start, init_pg_tables_end,
+-			"INIT_PG_TABLE");
+-
+ 	reserve_ebda_region();
+ 
+ 	/*
+Index: linux-2.6-tip/arch/x86/kernel/head64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/head64.c
++++ linux-2.6-tip/arch/x86/kernel/head64.c
+@@ -26,32 +26,15 @@
+ #include <asm/bios_ebda.h>
+ #include <asm/trampoline.h>
+ 
+-/* boot cpu pda */
+-static struct x8664_pda _boot_cpu_pda;
+-
+-#ifdef CONFIG_SMP
+-/*
+- * We install an empty cpu_pda pointer table to indicate to early users
+- * (numa_set_node) that the cpu_pda pointer table for cpus other than
+- * the boot cpu is not yet setup.
+- */
+-static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
+-#else
+-static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
+-#endif
+-
+-void __init x86_64_init_pda(void)
+-{
+-	_cpu_pda = __cpu_pda;
+-	cpu_pda(0) = &_boot_cpu_pda;
+-	pda_init(0);
+-}
+-
+ static void __init zap_identity_mappings(void)
+ {
+ 	pgd_t *pgd = pgd_offset_k(0UL);
+ 	pgd_clear(pgd);
+-	__flush_tlb_all();
++	/*
++	 * preempt_disable/enable does not work this early in the
++	 * bootup yet:
++	 */
++	write_cr3(read_cr3());
+ }
+ 
+ /* Don't add a printk in there. printk relies on the PDA which is not initialized 
+@@ -112,8 +95,6 @@ void __init x86_64_start_kernel(char * r
+ 	if (console_loglevel == 10)
+ 		early_printk("Kernel alive\n");
+ 
+-	x86_64_init_pda();
+-
+ 	x86_64_start_reservations(real_mode_data);
+ }
+ 
+@@ -123,7 +104,7 @@ void __init x86_64_start_reservations(ch
+ 
+ 	reserve_trampoline_memory();
+ 
+-	reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
++	reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+ 
+ #ifdef CONFIG_BLK_DEV_INITRD
+ 	/* Reserve INITRD */
+Index: linux-2.6-tip/arch/x86/kernel/head_32.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/head_32.S
++++ linux-2.6-tip/arch/x86/kernel/head_32.S
+@@ -11,14 +11,15 @@
+ #include <linux/init.h>
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+-#include <asm/page.h>
+-#include <asm/pgtable.h>
++#include <asm/page_types.h>
++#include <asm/pgtable_types.h>
+ #include <asm/desc.h>
+ #include <asm/cache.h>
+ #include <asm/thread_info.h>
+ #include <asm/asm-offsets.h>
+ #include <asm/setup.h>
+ #include <asm/processor-flags.h>
++#include <asm/percpu.h>
+ 
+ /* Physical address */
+ #define pa(X) ((X) - __PAGE_OFFSET)
+@@ -37,42 +38,40 @@
+ #define X86_VENDOR_ID	new_cpu_data+CPUINFO_x86_vendor_id
+ 
+ /*
+- * This is how much memory *in addition to the memory covered up to
+- * and including _end* we need mapped initially.
++ * This is how much memory in addition to the memory covered up to
++ * and including _end we need mapped initially.
+  * We need:
+- *  - one bit for each possible page, but only in low memory, which means
+- *     2^32/4096/8 = 128K worst case (4G/4G split.)
+- *  - enough space to map all low memory, which means
+- *     (2^32/4096) / 1024 pages (worst case, non PAE)
+- *     (2^32/4096) / 512 + 4 pages (worst case for PAE)
+- *  - a few pages for allocator use before the kernel pagetable has
+- *     been set up
++ *     (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE)
++ *     (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE)
+  *
+  * Modulo rounding, each megabyte assigned here requires a kilobyte of
+  * memory, which is currently unreclaimed.
+  *
+  * This should be a multiple of a page.
++ *
++ * KERNEL_IMAGE_SIZE should be greater than pa(_end)
++ * and small than max_low_pfn, otherwise will waste some page table entries
+  */
+-LOW_PAGES = 1<<(32-PAGE_SHIFT_asm)
+-
+-/*
+- * To preserve the DMA pool in PAGEALLOC kernels, we'll allocate
+- * pagetables from above the 16MB DMA limit, so we'll have to set
+- * up pagetables 16MB more (worst-case):
+- */
+-#ifdef CONFIG_DEBUG_PAGEALLOC
+-LOW_PAGES = LOW_PAGES + 0x1000000
+-#endif
+ 
+ #if PTRS_PER_PMD > 1
+-PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD
++#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD)
+ #else
+-PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD)
++#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)
+ #endif
+-BOOTBITMAP_SIZE = LOW_PAGES / 8
+-ALLOCATOR_SLOP = 4
+ 
+-INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm
++/* Enough space to fit pagetables for the low memory linear map */
++MAPPING_BEYOND_END = \
++	PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT
++
++/*
++ * Worst-case size of the kernel mapping we need to make:
++ * the worst-case size of the kernel itself, plus the extra we need
++ * to map for the linear map.
++ */
++KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT
++
++INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm
++RESERVE_BRK(pagetables, INIT_MAP_SIZE)
+ 
+ /*
+  * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,
+@@ -165,10 +164,10 @@ num_subarch_entries = (. - subarch_entri
+ 
+ /*
+  * Initialize page tables.  This creates a PDE and a set of page
+- * tables, which are located immediately beyond _end.  The variable
+- * init_pg_tables_end is set up to point to the first "safe" location.
++ * tables, which are located immediately beyond __brk_base.  The variable
++ * _brk_end is set up to point to the first "safe" location.
+  * Mappings are created both at virtual address 0 (identity mapping)
+- * and PAGE_OFFSET for up to _end+sizeof(page tables)+INIT_MAP_BEYOND_END.
++ * and PAGE_OFFSET for up to _end.
+  *
+  * Note that the stack is not yet set up!
+  */
+@@ -189,8 +188,7 @@ default_entry:
+ 
+ 	xorl %ebx,%ebx				/* %ebx is kept at zero */
+ 
+-	movl $pa(pg0), %edi
+-	movl %edi, pa(init_pg_tables_start)
++	movl $pa(__brk_base), %edi
+ 	movl $pa(swapper_pg_pmd), %edx
+ 	movl $PTE_IDENT_ATTR, %eax
+ 10:
+@@ -208,14 +206,14 @@ default_entry:
+ 	loop 11b
+ 
+ 	/*
+-	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+-	 * bytes beyond the end of our own page tables.
++	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
+ 	 */
+-	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
++	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
+ 	cmpl %ebp,%eax
+ 	jb 10b
+ 1:
+-	movl %edi,pa(init_pg_tables_end)
++	addl $__PAGE_OFFSET, %edi
++	movl %edi, pa(_brk_end)
+ 	shrl $12, %eax
+ 	movl %eax, pa(max_pfn_mapped)
+ 
+@@ -226,8 +224,7 @@ default_entry:
+ 
+ page_pde_offset = (__PAGE_OFFSET >> 20);
+ 
+-	movl $pa(pg0), %edi
+-	movl %edi, pa(init_pg_tables_start)
++	movl $pa(__brk_base), %edi
+ 	movl $pa(swapper_pg_dir), %edx
+ 	movl $PTE_IDENT_ATTR, %eax
+ 10:
+@@ -241,14 +238,13 @@ page_pde_offset = (__PAGE_OFFSET >> 20);
+ 	addl $0x1000,%eax
+ 	loop 11b
+ 	/*
+-	 * End condition: we must map up to and including INIT_MAP_BEYOND_END
+-	 * bytes beyond the end of our own page tables; the +0x007 is
+-	 * the attribute bits
++	 * End condition: we must map up to the end + MAPPING_BEYOND_END.
+ 	 */
+-	leal (INIT_MAP_BEYOND_END+PTE_IDENT_ATTR)(%edi),%ebp
++	movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp
+ 	cmpl %ebp,%eax
+ 	jb 10b
+-	movl %edi,pa(init_pg_tables_end)
++	addl $__PAGE_OFFSET, %edi
++	movl %edi, pa(_brk_end)
+ 	shrl $12, %eax
+ 	movl %eax, pa(max_pfn_mapped)
+ 
+@@ -429,14 +425,34 @@ is386:	movl $2,%ecx		# set MP
+ 	ljmp $(__KERNEL_CS),$1f
+ 1:	movl $(__KERNEL_DS),%eax	# reload all the segment registers
+ 	movl %eax,%ss			# after changing gdt.
+-	movl %eax,%fs			# gets reset once there's real percpu
+ 
+ 	movl $(__USER_DS),%eax		# DS/ES contains default USER segment
+ 	movl %eax,%ds
+ 	movl %eax,%es
+ 
+-	xorl %eax,%eax			# Clear GS and LDT
++	movl $(__KERNEL_PERCPU), %eax
++	movl %eax,%fs			# set this cpu's percpu
++
++#ifdef CONFIG_CC_STACKPROTECTOR
++	/*
++	 * The linker can't handle this by relocation.  Manually set
++	 * base address in stack canary segment descriptor.
++	 */
++	cmpb $0,ready
++	jne 1f
++	movl $per_cpu__gdt_page,%eax
++	movl $per_cpu__stack_canary,%ecx
++	subl $20, %ecx
++	movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
++	shrl $16, %ecx
++	movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
++	movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
++1:
++#endif
++	movl $(__KERNEL_STACK_CANARY),%eax
+ 	movl %eax,%gs
++
++	xorl %eax,%eax			# Clear LDT
+ 	lldt %ax
+ 
+ 	cld			# gcc2 wants the direction flag cleared at all times
+@@ -446,8 +462,6 @@ is386:	movl $2,%ecx		# set MP
+ 	movb $1, ready
+ 	cmpb $0,%cl		# the first CPU calls start_kernel
+ 	je   1f
+-	movl $(__KERNEL_PERCPU), %eax
+-	movl %eax,%fs		# set this cpu's percpu
+ 	movl (stack_start), %esp
+ 1:
+ #endif /* CONFIG_SMP */
+@@ -548,12 +562,8 @@ early_fault:
+ 	pushl %eax
+ 	pushl %edx		/* trapno */
+ 	pushl $fault_msg
+-#ifdef CONFIG_EARLY_PRINTK
+-	call early_printk
+-#else
+ 	call printk
+ #endif
+-#endif
+ 	call dump_stack
+ hlt_loop:
+ 	hlt
+@@ -580,12 +590,12 @@ ignore_int:
+ 	pushl 32(%esp)
+ 	pushl 40(%esp)
+ 	pushl $int_msg
+-#ifdef CONFIG_EARLY_PRINTK
+-	call early_printk
+-#else
+ 	call printk
+-#endif
++
++	call dump_stack
++
+ 	addl $(5*4),%esp
++	call dump_stack
+ 	popl %ds
+ 	popl %es
+ 	popl %edx
+@@ -622,6 +632,7 @@ swapper_pg_fixmap:
+ 	.fill 1024,4,0
+ ENTRY(empty_zero_page)
+ 	.fill 4096,1,0
++
+ /*
+  * This starts the data section.
+  */
+@@ -660,7 +671,7 @@ early_recursion_flag:
+ 	.long 0
+ 
+ int_msg:
+-	.asciz "Unknown interrupt or fault at EIP %p %p %p\n"
++	.asciz "Unknown interrupt or fault at: %p %p %p\n"
+ 
+ fault_msg:
+ /* fault info: */
+Index: linux-2.6-tip/arch/x86/kernel/head_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/head_64.S
++++ linux-2.6-tip/arch/x86/kernel/head_64.S
+@@ -19,6 +19,7 @@
+ #include <asm/msr.h>
+ #include <asm/cache.h>
+ #include <asm/processor-flags.h>
++#include <asm/percpu.h>
+ 
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/asm-offsets.h>
+@@ -226,12 +227,15 @@ ENTRY(secondary_startup_64)
+ 	movl %eax,%fs
+ 	movl %eax,%gs
+ 
+-	/* 
+-	 * Setup up a dummy PDA. this is just for some early bootup code
+-	 * that does in_interrupt() 
+-	 */ 
++	/* Set up %gs.
++	 *
++	 * The base of %gs always points to the bottom of the irqstack
++	 * union.  If the stack protector canary is enabled, it is
++	 * located at %gs:40.  Note that, on SMP, the boot cpu uses
++	 * init data section till per cpu areas are set up.
++	 */
+ 	movl	$MSR_GS_BASE,%ecx
+-	movq	$empty_zero_page,%rax
++	movq	initial_gs(%rip),%rax
+ 	movq    %rax,%rdx
+ 	shrq	$32,%rdx
+ 	wrmsr	
+@@ -257,6 +261,8 @@ ENTRY(secondary_startup_64)
+ 	.align	8
+ 	ENTRY(initial_code)
+ 	.quad	x86_64_start_kernel
++	ENTRY(initial_gs)
++	.quad	INIT_PER_CPU_VAR(irq_stack_union)
+ 	__FINITDATA
+ 
+ 	ENTRY(stack_start)
+@@ -323,8 +329,6 @@ early_idt_ripmsg:
+ #endif /* CONFIG_EARLY_PRINTK */
+ 	.previous
+ 
+-.balign PAGE_SIZE
+-
+ #define NEXT_PAGE(name) \
+ 	.balign	PAGE_SIZE; \
+ ENTRY(name)
+@@ -401,7 +405,8 @@ NEXT_PAGE(level2_spare_pgt)
+ 	.globl early_gdt_descr
+ early_gdt_descr:
+ 	.word	GDT_ENTRIES*8-1
+-	.quad   per_cpu__gdt_page
++early_gdt_descr_base:
++	.quad	INIT_PER_CPU_VAR(gdt_page)
+ 
+ ENTRY(phys_base)
+ 	/* This must match the first entry in level2_kernel_pgt */
+@@ -412,7 +417,7 @@ ENTRY(phys_base)
+ 	.section .bss, "aw", @nobits
+ 	.align L1_CACHE_BYTES
+ ENTRY(idt_table)
+-	.skip 256 * 16
++	.skip IDT_ENTRIES * 16
+ 
+ 	.section .bss.page_aligned, "aw", @nobits
+ 	.align PAGE_SIZE
+Index: linux-2.6-tip/arch/x86/kernel/hpet.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/hpet.c
++++ linux-2.6-tip/arch/x86/kernel/hpet.c
+@@ -80,6 +80,7 @@ static inline void hpet_clear_mapping(vo
+  */
+ static int boot_hpet_disable;
+ int hpet_force_user;
++static int hpet_verbose;
+ 
+ static int __init hpet_setup(char *str)
+ {
+@@ -88,6 +89,8 @@ static int __init hpet_setup(char *str)
+ 			boot_hpet_disable = 1;
+ 		if (!strncmp("force", str, 5))
+ 			hpet_force_user = 1;
++		if (!strncmp("verbose", str, 7))
++			hpet_verbose = 1;
+ 	}
+ 	return 1;
+ }
+@@ -119,6 +122,43 @@ int is_hpet_enabled(void)
+ }
+ EXPORT_SYMBOL_GPL(is_hpet_enabled);
+ 
++static void _hpet_print_config(const char *function, int line)
++{
++	u32 i, timers, l, h;
++	printk(KERN_INFO "hpet: %s(%d):\n", function, line);
++	l = hpet_readl(HPET_ID);
++	h = hpet_readl(HPET_PERIOD);
++	timers = ((l & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT) + 1;
++	printk(KERN_INFO "hpet: ID: 0x%x, PERIOD: 0x%x\n", l, h);
++	l = hpet_readl(HPET_CFG);
++	h = hpet_readl(HPET_STATUS);
++	printk(KERN_INFO "hpet: CFG: 0x%x, STATUS: 0x%x\n", l, h);
++	l = hpet_readl(HPET_COUNTER);
++	h = hpet_readl(HPET_COUNTER+4);
++	printk(KERN_INFO "hpet: COUNTER_l: 0x%x, COUNTER_h: 0x%x\n", l, h);
++
++	for (i = 0; i < timers; i++) {
++		l = hpet_readl(HPET_Tn_CFG(i));
++		h = hpet_readl(HPET_Tn_CFG(i)+4);
++		printk(KERN_INFO "hpet: T%d: CFG_l: 0x%x, CFG_h: 0x%x\n",
++		       i, l, h);
++		l = hpet_readl(HPET_Tn_CMP(i));
++		h = hpet_readl(HPET_Tn_CMP(i)+4);
++		printk(KERN_INFO "hpet: T%d: CMP_l: 0x%x, CMP_h: 0x%x\n",
++		       i, l, h);
++		l = hpet_readl(HPET_Tn_ROUTE(i));
++		h = hpet_readl(HPET_Tn_ROUTE(i)+4);
++		printk(KERN_INFO "hpet: T%d ROUTE_l: 0x%x, ROUTE_h: 0x%x\n",
++		       i, l, h);
++	}
++}
++
++#define hpet_print_config()					\
++do {								\
++	if (hpet_verbose)					\
++		_hpet_print_config(__FUNCTION__, __LINE__);	\
++} while (0)
++
+ /*
+  * When the hpet driver (/dev/hpet) is enabled, we need to reserve
+  * timer 0 and timer 1 in case of RTC emulation.
+@@ -301,6 +341,7 @@ static void hpet_set_mode(enum clock_eve
+ 		 */
+ 		hpet_writel((unsigned long) delta, HPET_Tn_CMP(timer));
+ 		hpet_start_counter();
++		hpet_print_config();
+ 		break;
+ 
+ 	case CLOCK_EVT_MODE_ONESHOT:
+@@ -327,6 +368,7 @@ static void hpet_set_mode(enum clock_eve
+ 			irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu));
+ 			enable_irq(hdev->irq);
+ 		}
++		hpet_print_config();
+ 		break;
+ 	}
+ }
+@@ -468,7 +510,8 @@ static int hpet_setup_irq(struct hpet_de
+ {
+ 
+ 	if (request_irq(dev->irq, hpet_interrupt_handler,
+-			IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev))
++			IRQF_DISABLED | IRQF_NOBALANCING | IRQF_TIMER,
++			dev->name, dev))
+ 		return -1;
+ 
+ 	disable_irq(dev->irq);
+@@ -545,6 +588,7 @@ static void hpet_msi_capability_lookup(u
+ 
+ 	num_timers = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+ 	num_timers++; /* Value read out starts from 0 */
++	hpet_print_config();
+ 
+ 	hpet_devs = kzalloc(sizeof(struct hpet_dev) * num_timers, GFP_KERNEL);
+ 	if (!hpet_devs)
+@@ -812,6 +856,7 @@ int __init hpet_enable(void)
+ 	 * information and the number of channels
+ 	 */
+ 	id = hpet_readl(HPET_ID);
++	hpet_print_config();
+ 
+ #ifdef CONFIG_HPET_EMULATE_RTC
+ 	/*
+@@ -864,6 +909,7 @@ static __init int hpet_late_init(void)
+ 		return -ENODEV;
+ 
+ 	hpet_reserve_platform_timers(hpet_readl(HPET_ID));
++	hpet_print_config();
+ 
+ 	for_each_online_cpu(cpu) {
+ 		hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu);
+Index: linux-2.6-tip/arch/x86/kernel/i8253.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/i8253.c
++++ linux-2.6-tip/arch/x86/kernel/i8253.c
+@@ -3,19 +3,19 @@
+  *
+  */
+ #include <linux/clockchips.h>
+-#include <linux/init.h>
+ #include <linux/interrupt.h>
++#include <linux/spinlock.h>
+ #include <linux/jiffies.h>
+ #include <linux/module.h>
+-#include <linux/spinlock.h>
++#include <linux/delay.h>
++#include <linux/init.h>
++#include <linux/io.h>
+ 
+-#include <asm/smp.h>
+-#include <asm/delay.h>
+ #include <asm/i8253.h>
+-#include <asm/io.h>
+ #include <asm/hpet.h>
++#include <asm/smp.h>
+ 
+-DEFINE_SPINLOCK(i8253_lock);
++DEFINE_RAW_SPINLOCK(i8253_lock);
+ EXPORT_SYMBOL(i8253_lock);
+ 
+ #ifdef CONFIG_X86_32
+@@ -40,7 +40,7 @@ static void init_pit_timer(enum clock_ev
+ {
+ 	spin_lock(&i8253_lock);
+ 
+-	switch(mode) {
++	switch (mode) {
+ 	case CLOCK_EVT_MODE_PERIODIC:
+ 		/* binary, mode 2, LSB/MSB, ch 0 */
+ 		outb_pit(0x34, PIT_MODE);
+@@ -95,7 +95,7 @@ static int pit_next_event(unsigned long 
+  * registered. This mechanism replaces the previous #ifdef LOCAL_APIC -
+  * !using_apic_timer decisions in do_timer_interrupt_hook()
+  */
+-static struct clock_event_device pit_clockevent = {
++static struct clock_event_device pit_ce = {
+ 	.name		= "pit",
+ 	.features	= CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+ 	.set_mode	= init_pit_timer,
+@@ -114,15 +114,13 @@ void __init setup_pit_timer(void)
+ 	 * Start pit with the boot cpu mask and make it global after the
+ 	 * IO_APIC has been initialized.
+ 	 */
+-	pit_clockevent.cpumask = cpumask_of(smp_processor_id());
+-	pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
+-				     pit_clockevent.shift);
+-	pit_clockevent.max_delta_ns =
+-		clockevent_delta2ns(0x7FFF, &pit_clockevent);
+-	pit_clockevent.min_delta_ns =
+-		clockevent_delta2ns(0xF, &pit_clockevent);
+-	clockevents_register_device(&pit_clockevent);
+-	global_clock_event = &pit_clockevent;
++	pit_ce.cpumask = cpumask_of(smp_processor_id());
++	pit_ce.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, pit_ce.shift);
++	pit_ce.max_delta_ns = clockevent_delta2ns(0x7FFF, &pit_ce);
++	pit_ce.min_delta_ns = clockevent_delta2ns(0xF, &pit_ce);
++
++	clockevents_register_device(&pit_ce);
++	global_clock_event = &pit_ce;
+ }
+ 
+ #ifndef CONFIG_X86_64
+@@ -133,11 +131,11 @@ void __init setup_pit_timer(void)
+  */
+ static cycle_t pit_read(void)
+ {
++	static int old_count;
++	static u32 old_jifs;
+ 	unsigned long flags;
+ 	int count;
+ 	u32 jifs;
+-	static int old_count;
+-	static u32 old_jifs;
+ 
+ 	spin_lock_irqsave(&i8253_lock, flags);
+ 	/*
+@@ -179,9 +177,9 @@ static cycle_t pit_read(void)
+ 	 * Previous attempts to handle these cases intelligently were
+ 	 * buggy, so we just do the simple thing now.
+ 	 */
+-	if (count > old_count && jifs == old_jifs) {
++	if (count > old_count && jifs == old_jifs)
+ 		count = old_count;
+-	}
++
+ 	old_count = count;
+ 	old_jifs = jifs;
+ 
+@@ -192,13 +190,13 @@ static cycle_t pit_read(void)
+ 	return (cycle_t)(jifs * LATCH) + count;
+ }
+ 
+-static struct clocksource clocksource_pit = {
+-	.name	= "pit",
+-	.rating = 110,
+-	.read	= pit_read,
+-	.mask	= CLOCKSOURCE_MASK(32),
+-	.mult	= 0,
+-	.shift	= 20,
++static struct clocksource pit_cs = {
++	.name		= "pit",
++	.rating		= 110,
++	.read		= pit_read,
++	.mask		= CLOCKSOURCE_MASK(32),
++	.mult		= 0,
++	.shift		= 20,
+ };
+ 
+ static void pit_disable_clocksource(void)
+@@ -206,9 +204,9 @@ static void pit_disable_clocksource(void
+ 	/*
+ 	 * Use mult to check whether it is registered or not
+ 	 */
+-	if (clocksource_pit.mult) {
+-		clocksource_unregister(&clocksource_pit);
+-		clocksource_pit.mult = 0;
++	if (pit_cs.mult) {
++		clocksource_unregister(&pit_cs);
++		pit_cs.mult = 0;
+ 	}
+ }
+ 
+@@ -222,13 +220,13 @@ static int __init init_pit_clocksource(v
+ 	  * - when local APIC timer is active (PIT is switched off)
+ 	  */
+ 	if (num_possible_cpus() > 1 || is_hpet_enabled() ||
+-	    pit_clockevent.mode != CLOCK_EVT_MODE_PERIODIC)
++	    pit_ce.mode != CLOCK_EVT_MODE_PERIODIC)
+ 		return 0;
+ 
+-	clocksource_pit.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
+-						   clocksource_pit.shift);
+-	return clocksource_register(&clocksource_pit);
++	pit_cs.mult = clocksource_hz2mult(CLOCK_TICK_RATE, pit_cs.shift);
++
++	return clocksource_register(&pit_cs);
+ }
+ arch_initcall(init_pit_clocksource);
+ 
+-#endif
++#endif /* !CONFIG_X86_64 */
+Index: linux-2.6-tip/arch/x86/kernel/i8259.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/i8259.c
++++ linux-2.6-tip/arch/x86/kernel/i8259.c
+@@ -22,7 +22,6 @@
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
+ #include <asm/apic.h>
+-#include <asm/arch_hooks.h>
+ #include <asm/i8259.h>
+ 
+ /*
+@@ -33,8 +32,8 @@
+  */
+ 
+ static int i8259A_auto_eoi;
+-DEFINE_SPINLOCK(i8259A_lock);
+ static void mask_and_ack_8259A(unsigned int);
++DEFINE_RAW_SPINLOCK(i8259A_lock);
+ 
+ struct irq_chip i8259A_chip = {
+ 	.name		= "XT-PIC",
+@@ -169,6 +168,8 @@ static void mask_and_ack_8259A(unsigned 
+ 	 */
+ 	if (cached_irq_mask & irqmask)
+ 		goto spurious_8259A_irq;
++	if (irq & 8)
++		outb(0x60+(irq&7), PIC_SLAVE_CMD); /* 'Specific EOI' to slave */
+ 	cached_irq_mask |= irqmask;
+ 
+ handle_real_irq:
+@@ -329,10 +330,10 @@ void init_8259A(int auto_eoi)
+ 	/* 8259A-1 (the master) has a slave on IR2 */
+ 	outb_pic(1U << PIC_CASCADE_IR, PIC_MASTER_IMR);
+ 
+-	if (auto_eoi)	/* master does Auto EOI */
+-		outb_pic(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+-	else		/* master expects normal EOI */
+-		outb_pic(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
++	if (!auto_eoi)	/* master expects normal EOI */
++		outb_p(MASTER_ICW4_DEFAULT, PIC_MASTER_IMR);
++	else		/* master does Auto EOI */
++		outb_p(MASTER_ICW4_DEFAULT | PIC_ICW4_AEOI, PIC_MASTER_IMR);
+ 
+ 	outb_pic(0x11, PIC_SLAVE_CMD);	/* ICW1: select 8259A-2 init */
+ 
+Index: linux-2.6-tip/arch/x86/kernel/io_apic.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/io_apic.c
++++ /dev/null
+@@ -1,4182 +0,0 @@
+-/*
+- *	Intel IO-APIC support for multi-Pentium hosts.
+- *
+- *	Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
+- *
+- *	Many thanks to Stig Venaas for trying out countless experimental
+- *	patches and reporting/debugging problems patiently!
+- *
+- *	(c) 1999, Multiple IO-APIC support, developed by
+- *	Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
+- *      Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
+- *	further tested and cleaned up by Zach Brown <zab@redhat.com>
+- *	and Ingo Molnar <mingo@redhat.com>
+- *
+- *	Fixes
+- *	Maciej W. Rozycki	:	Bits for genuine 82489DX APICs;
+- *					thanks to Eric Gilmore
+- *					and Rolf G. Tews
+- *					for testing these extensively
+- *	Paul Diefenbaugh	:	Added full ACPI support
+- */
+-
+-#include <linux/mm.h>
+-#include <linux/interrupt.h>
+-#include <linux/init.h>
+-#include <linux/delay.h>
+-#include <linux/sched.h>
+-#include <linux/pci.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/compiler.h>
+-#include <linux/acpi.h>
+-#include <linux/module.h>
+-#include <linux/sysdev.h>
+-#include <linux/msi.h>
+-#include <linux/htirq.h>
+-#include <linux/freezer.h>
+-#include <linux/kthread.h>
+-#include <linux/jiffies.h>	/* time_after() */
+-#ifdef CONFIG_ACPI
+-#include <acpi/acpi_bus.h>
+-#endif
+-#include <linux/bootmem.h>
+-#include <linux/dmar.h>
+-#include <linux/hpet.h>
+-
+-#include <asm/idle.h>
+-#include <asm/io.h>
+-#include <asm/smp.h>
+-#include <asm/desc.h>
+-#include <asm/proto.h>
+-#include <asm/acpi.h>
+-#include <asm/dma.h>
+-#include <asm/timer.h>
+-#include <asm/i8259.h>
+-#include <asm/nmi.h>
+-#include <asm/msidef.h>
+-#include <asm/hypertransport.h>
+-#include <asm/setup.h>
+-#include <asm/irq_remapping.h>
+-#include <asm/hpet.h>
+-#include <asm/uv/uv_hub.h>
+-#include <asm/uv/uv_irq.h>
+-
+-#include <mach_ipi.h>
+-#include <mach_apic.h>
+-#include <mach_apicdef.h>
+-
+-#define __apicdebuginit(type) static type __init
+-
+-/*
+- *      Is the SiS APIC rmw bug present ?
+- *      -1 = don't know, 0 = no, 1 = yes
+- */
+-int sis_apic_bug = -1;
+-
+-static DEFINE_SPINLOCK(ioapic_lock);
+-static DEFINE_SPINLOCK(vector_lock);
+-
+-/*
+- * # of IRQ routing registers
+- */
+-int nr_ioapic_registers[MAX_IO_APICS];
+-
+-/* I/O APIC entries */
+-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
+-int nr_ioapics;
+-
+-/* MP IRQ source entries */
+-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
+-
+-/* # of MP IRQ source entries */
+-int mp_irq_entries;
+-
+-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
+-int mp_bus_id_to_type[MAX_MP_BUSSES];
+-#endif
+-
+-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
+-
+-int skip_ioapic_setup;
+-
+-static int __init parse_noapic(char *str)
+-{
+-	/* disable IO-APIC */
+-	disable_ioapic_setup();
+-	return 0;
+-}
+-early_param("noapic", parse_noapic);
+-
+-struct irq_pin_list;
+-
+-/*
+- * This is performance-critical, we want to do it O(1)
+- *
+- * the indexing order of this array favors 1:1 mappings
+- * between pins and IRQs.
+- */
+-
+-struct irq_pin_list {
+-	int apic, pin;
+-	struct irq_pin_list *next;
+-};
+-
+-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+-{
+-	struct irq_pin_list *pin;
+-	int node;
+-
+-	node = cpu_to_node(cpu);
+-
+-	pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
+-
+-	return pin;
+-}
+-
+-struct irq_cfg {
+-	struct irq_pin_list *irq_2_pin;
+-	cpumask_var_t domain;
+-	cpumask_var_t old_domain;
+-	unsigned move_cleanup_count;
+-	u8 vector;
+-	u8 move_in_progress : 1;
+-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+-	u8 move_desc_pending : 1;
+-#endif
+-};
+-
+-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
+-#ifdef CONFIG_SPARSE_IRQ
+-static struct irq_cfg irq_cfgx[] = {
+-#else
+-static struct irq_cfg irq_cfgx[NR_IRQS] = {
+-#endif
+-	[0]  = { .vector = IRQ0_VECTOR,  },
+-	[1]  = { .vector = IRQ1_VECTOR,  },
+-	[2]  = { .vector = IRQ2_VECTOR,  },
+-	[3]  = { .vector = IRQ3_VECTOR,  },
+-	[4]  = { .vector = IRQ4_VECTOR,  },
+-	[5]  = { .vector = IRQ5_VECTOR,  },
+-	[6]  = { .vector = IRQ6_VECTOR,  },
+-	[7]  = { .vector = IRQ7_VECTOR,  },
+-	[8]  = { .vector = IRQ8_VECTOR,  },
+-	[9]  = { .vector = IRQ9_VECTOR,  },
+-	[10] = { .vector = IRQ10_VECTOR, },
+-	[11] = { .vector = IRQ11_VECTOR, },
+-	[12] = { .vector = IRQ12_VECTOR, },
+-	[13] = { .vector = IRQ13_VECTOR, },
+-	[14] = { .vector = IRQ14_VECTOR, },
+-	[15] = { .vector = IRQ15_VECTOR, },
+-};
+-
+-int __init arch_early_irq_init(void)
+-{
+-	struct irq_cfg *cfg;
+-	struct irq_desc *desc;
+-	int count;
+-	int i;
+-
+-	cfg = irq_cfgx;
+-	count = ARRAY_SIZE(irq_cfgx);
+-
+-	for (i = 0; i < count; i++) {
+-		desc = irq_to_desc(i);
+-		desc->chip_data = &cfg[i];
+-		alloc_bootmem_cpumask_var(&cfg[i].domain);
+-		alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+-		if (i < NR_IRQS_LEGACY)
+-			cpumask_setall(cfg[i].domain);
+-	}
+-
+-	return 0;
+-}
+-
+-#ifdef CONFIG_SPARSE_IRQ
+-static struct irq_cfg *irq_cfg(unsigned int irq)
+-{
+-	struct irq_cfg *cfg = NULL;
+-	struct irq_desc *desc;
+-
+-	desc = irq_to_desc(irq);
+-	if (desc)
+-		cfg = desc->chip_data;
+-
+-	return cfg;
+-}
+-
+-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+-{
+-	struct irq_cfg *cfg;
+-	int node;
+-
+-	node = cpu_to_node(cpu);
+-
+-	cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
+-	if (cfg) {
+-		if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+-			kfree(cfg);
+-			cfg = NULL;
+-		} else if (!alloc_cpumask_var_node(&cfg->old_domain,
+-							  GFP_ATOMIC, node)) {
+-			free_cpumask_var(cfg->domain);
+-			kfree(cfg);
+-			cfg = NULL;
+-		} else {
+-			cpumask_clear(cfg->domain);
+-			cpumask_clear(cfg->old_domain);
+-		}
+-	}
+-
+-	return cfg;
+-}
+-
+-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+-{
+-	struct irq_cfg *cfg;
+-
+-	cfg = desc->chip_data;
+-	if (!cfg) {
+-		desc->chip_data = get_one_free_irq_cfg(cpu);
+-		if (!desc->chip_data) {
+-			printk(KERN_ERR "can not alloc irq_cfg\n");
+-			BUG_ON(1);
+-		}
+-	}
+-
+-	return 0;
+-}
+-
+-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+-
+-static void
+-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+-{
+-	struct irq_pin_list *old_entry, *head, *tail, *entry;
+-
+-	cfg->irq_2_pin = NULL;
+-	old_entry = old_cfg->irq_2_pin;
+-	if (!old_entry)
+-		return;
+-
+-	entry = get_one_free_irq_2_pin(cpu);
+-	if (!entry)
+-		return;
+-
+-	entry->apic	= old_entry->apic;
+-	entry->pin	= old_entry->pin;
+-	head		= entry;
+-	tail		= entry;
+-	old_entry	= old_entry->next;
+-	while (old_entry) {
+-		entry = get_one_free_irq_2_pin(cpu);
+-		if (!entry) {
+-			entry = head;
+-			while (entry) {
+-				head = entry->next;
+-				kfree(entry);
+-				entry = head;
+-			}
+-			/* still use the old one */
+-			return;
+-		}
+-		entry->apic	= old_entry->apic;
+-		entry->pin	= old_entry->pin;
+-		tail->next	= entry;
+-		tail		= entry;
+-		old_entry	= old_entry->next;
+-	}
+-
+-	tail->next = NULL;
+-	cfg->irq_2_pin = head;
+-}
+-
+-static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+-{
+-	struct irq_pin_list *entry, *next;
+-
+-	if (old_cfg->irq_2_pin == cfg->irq_2_pin)
+-		return;
+-
+-	entry = old_cfg->irq_2_pin;
+-
+-	while (entry) {
+-		next = entry->next;
+-		kfree(entry);
+-		entry = next;
+-	}
+-	old_cfg->irq_2_pin = NULL;
+-}
+-
+-void arch_init_copy_chip_data(struct irq_desc *old_desc,
+-				 struct irq_desc *desc, int cpu)
+-{
+-	struct irq_cfg *cfg;
+-	struct irq_cfg *old_cfg;
+-
+-	cfg = get_one_free_irq_cfg(cpu);
+-
+-	if (!cfg)
+-		return;
+-
+-	desc->chip_data = cfg;
+-
+-	old_cfg = old_desc->chip_data;
+-
+-	memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+-
+-	init_copy_irq_2_pin(old_cfg, cfg, cpu);
+-}
+-
+-static void free_irq_cfg(struct irq_cfg *old_cfg)
+-{
+-	kfree(old_cfg);
+-}
+-
+-void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+-{
+-	struct irq_cfg *old_cfg, *cfg;
+-
+-	old_cfg = old_desc->chip_data;
+-	cfg = desc->chip_data;
+-
+-	if (old_cfg == cfg)
+-		return;
+-
+-	if (old_cfg) {
+-		free_irq_2_pin(old_cfg, cfg);
+-		free_irq_cfg(old_cfg);
+-		old_desc->chip_data = NULL;
+-	}
+-}
+-
+-static void
+-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+-{
+-	struct irq_cfg *cfg = desc->chip_data;
+-
+-	if (!cfg->move_in_progress) {
+-		/* it means that domain is not changed */
+-		if (!cpumask_intersects(&desc->affinity, mask))
+-			cfg->move_desc_pending = 1;
+-	}
+-}
+-#endif
+-
+-#else
+-static struct irq_cfg *irq_cfg(unsigned int irq)
+-{
+-	return irq < nr_irqs ? irq_cfgx + irq : NULL;
+-}
+-
+-#endif
+-
+-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
+-static inline void
+-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
+-{
+-}
+-#endif
+-
+-struct io_apic {
+-	unsigned int index;
+-	unsigned int unused[3];
+-	unsigned int data;
+-};
+-
+-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+-{
+-	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+-		+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
+-}
+-
+-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+-{
+-	struct io_apic __iomem *io_apic = io_apic_base(apic);
+-	writel(reg, &io_apic->index);
+-	return readl(&io_apic->data);
+-}
+-
+-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+-{
+-	struct io_apic __iomem *io_apic = io_apic_base(apic);
+-	writel(reg, &io_apic->index);
+-	writel(value, &io_apic->data);
+-}
+-
+-/*
+- * Re-write a value: to be used for read-modify-write
+- * cycles where the read already set up the index register.
+- *
+- * Older SiS APIC requires we rewrite the index register
+- */
+-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+-{
+-	struct io_apic __iomem *io_apic = io_apic_base(apic);
+-
+-	if (sis_apic_bug)
+-		writel(reg, &io_apic->index);
+-	writel(value, &io_apic->data);
+-}
+-
+-static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
+-{
+-	struct irq_pin_list *entry;
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	entry = cfg->irq_2_pin;
+-	for (;;) {
+-		unsigned int reg;
+-		int pin;
+-
+-		if (!entry)
+-			break;
+-		pin = entry->pin;
+-		reg = io_apic_read(entry->apic, 0x10 + pin*2);
+-		/* Is the remote IRR bit set? */
+-		if (reg & IO_APIC_REDIR_REMOTE_IRR) {
+-			spin_unlock_irqrestore(&ioapic_lock, flags);
+-			return true;
+-		}
+-		if (!entry->next)
+-			break;
+-		entry = entry->next;
+-	}
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+-	return false;
+-}
+-
+-union entry_union {
+-	struct { u32 w1, w2; };
+-	struct IO_APIC_route_entry entry;
+-};
+-
+-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
+-{
+-	union entry_union eu;
+-	unsigned long flags;
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
+-	eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-	return eu.entry;
+-}
+-
+-/*
+- * When we write a new IO APIC routing entry, we need to write the high
+- * word first! If the mask bit in the low word is clear, we will enable
+- * the interrupt, and we need to make sure the entry is fully populated
+- * before that happens.
+- */
+-static void
+-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+-{
+-	union entry_union eu;
+-	eu.entry = e;
+-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+-}
+-
+-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+-{
+-	unsigned long flags;
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	__ioapic_write_entry(apic, pin, e);
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-}
+-
+-/*
+- * When we mask an IO APIC routing entry, we need to write the low
+- * word first, in order to set the mask bit before we change the
+- * high bits!
+- */
+-static void ioapic_mask_entry(int apic, int pin)
+-{
+-	unsigned long flags;
+-	union entry_union eu = { .entry.mask = 1 };
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	io_apic_write(apic, 0x10 + 2*pin, eu.w1);
+-	io_apic_write(apic, 0x11 + 2*pin, eu.w2);
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-}
+-
+-#ifdef CONFIG_SMP
+-static void send_cleanup_vector(struct irq_cfg *cfg)
+-{
+-	cpumask_var_t cleanup_mask;
+-
+-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+-		unsigned int i;
+-		cfg->move_cleanup_count = 0;
+-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+-			cfg->move_cleanup_count++;
+-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+-			send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+-	} else {
+-		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+-		cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+-		send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+-		free_cpumask_var(cleanup_mask);
+-	}
+-	cfg->move_in_progress = 0;
+-}
+-
+-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+-{
+-	int apic, pin;
+-	struct irq_pin_list *entry;
+-	u8 vector = cfg->vector;
+-
+-	entry = cfg->irq_2_pin;
+-	for (;;) {
+-		unsigned int reg;
+-
+-		if (!entry)
+-			break;
+-
+-		apic = entry->apic;
+-		pin = entry->pin;
+-#ifdef CONFIG_INTR_REMAP
+-		/*
+-		 * With interrupt-remapping, destination information comes
+-		 * from interrupt-remapping table entry.
+-		 */
+-		if (!irq_remapped(irq))
+-			io_apic_write(apic, 0x11 + pin*2, dest);
+-#else
+-		io_apic_write(apic, 0x11 + pin*2, dest);
+-#endif
+-		reg = io_apic_read(apic, 0x10 + pin*2);
+-		reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+-		reg |= vector;
+-		io_apic_modify(apic, 0x10 + pin*2, reg);
+-		if (!entry->next)
+-			break;
+-		entry = entry->next;
+-	}
+-}
+-
+-static int
+-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+-
+-/*
+- * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
+- * of that, or returns BAD_APICID and leaves desc->affinity untouched.
+- */
+-static unsigned int
+-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+-{
+-	struct irq_cfg *cfg;
+-	unsigned int irq;
+-
+-	if (!cpumask_intersects(mask, cpu_online_mask))
+-		return BAD_APICID;
+-
+-	irq = desc->irq;
+-	cfg = desc->chip_data;
+-	if (assign_irq_vector(irq, cfg, mask))
+-		return BAD_APICID;
+-
+-	cpumask_and(&desc->affinity, cfg->domain, mask);
+-	set_extra_move_desc(desc, mask);
+-	return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
+-}
+-
+-static void
+-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+-{
+-	struct irq_cfg *cfg;
+-	unsigned long flags;
+-	unsigned int dest;
+-	unsigned int irq;
+-
+-	irq = desc->irq;
+-	cfg = desc->chip_data;
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	dest = set_desc_affinity(desc, mask);
+-	if (dest != BAD_APICID) {
+-		/* Only the high 8 bits are valid. */
+-		dest = SET_APIC_LOGICAL_ID(dest);
+-		__target_IO_APIC_irq(irq, dest, cfg);
+-	}
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-}
+-
+-static void
+-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+-{
+-	struct irq_desc *desc;
+-
+-	desc = irq_to_desc(irq);
+-
+-	set_ioapic_affinity_irq_desc(desc, mask);
+-}
+-#endif /* CONFIG_SMP */
+-
+-/*
+- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
+- * shared ISA-space IRQs, so we have to support them. We are super
+- * fast in the common case, and fast for shared ISA-space IRQs.
+- */
+-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+-{
+-	struct irq_pin_list *entry;
+-
+-	entry = cfg->irq_2_pin;
+-	if (!entry) {
+-		entry = get_one_free_irq_2_pin(cpu);
+-		if (!entry) {
+-			printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
+-					apic, pin);
+-			return;
+-		}
+-		cfg->irq_2_pin = entry;
+-		entry->apic = apic;
+-		entry->pin = pin;
+-		return;
+-	}
+-
+-	while (entry->next) {
+-		/* not again, please */
+-		if (entry->apic == apic && entry->pin == pin)
+-			return;
+-
+-		entry = entry->next;
+-	}
+-
+-	entry->next = get_one_free_irq_2_pin(cpu);
+-	entry = entry->next;
+-	entry->apic = apic;
+-	entry->pin = pin;
+-}
+-
+-/*
+- * Reroute an IRQ to a different pin.
+- */
+-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+-				      int oldapic, int oldpin,
+-				      int newapic, int newpin)
+-{
+-	struct irq_pin_list *entry = cfg->irq_2_pin;
+-	int replaced = 0;
+-
+-	while (entry) {
+-		if (entry->apic == oldapic && entry->pin == oldpin) {
+-			entry->apic = newapic;
+-			entry->pin = newpin;
+-			replaced = 1;
+-			/* every one is different, right? */
+-			break;
+-		}
+-		entry = entry->next;
+-	}
+-
+-	/* why? call replace before add? */
+-	if (!replaced)
+-		add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+-}
+-
+-static inline void io_apic_modify_irq(struct irq_cfg *cfg,
+-				int mask_and, int mask_or,
+-				void (*final)(struct irq_pin_list *entry))
+-{
+-	int pin;
+-	struct irq_pin_list *entry;
+-
+-	for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+-		unsigned int reg;
+-		pin = entry->pin;
+-		reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+-		reg &= mask_and;
+-		reg |= mask_or;
+-		io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+-		if (final)
+-			final(entry);
+-	}
+-}
+-
+-static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
+-{
+-	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
+-}
+-
+-#ifdef CONFIG_X86_64
+-static void io_apic_sync(struct irq_pin_list *entry)
+-{
+-	/*
+-	 * Synchronize the IO-APIC and the CPU by doing
+-	 * a dummy read from the IO-APIC
+-	 */
+-	struct io_apic __iomem *io_apic;
+-	io_apic = io_apic_base(entry->apic);
+-	readl(&io_apic->data);
+-}
+-
+-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+-{
+-	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+-}
+-#else /* CONFIG_X86_32 */
+-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+-{
+-	io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
+-}
+-
+-static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
+-{
+-	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+-			IO_APIC_REDIR_MASKED, NULL);
+-}
+-
+-static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
+-{
+-	io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
+-			IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
+-}
+-#endif /* CONFIG_X86_32 */
+-
+-static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
+-{
+-	struct irq_cfg *cfg = desc->chip_data;
+-	unsigned long flags;
+-
+-	BUG_ON(!cfg);
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	__mask_IO_APIC_irq(cfg);
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-}
+-
+-static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
+-{
+-	struct irq_cfg *cfg = desc->chip_data;
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	__unmask_IO_APIC_irq(cfg);
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-}
+-
+-static void mask_IO_APIC_irq(unsigned int irq)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-
+-	mask_IO_APIC_irq_desc(desc);
+-}
+-static void unmask_IO_APIC_irq(unsigned int irq)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-
+-	unmask_IO_APIC_irq_desc(desc);
+-}
+-
+-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
+-{
+-	struct IO_APIC_route_entry entry;
+-
+-	/* Check delivery_mode to be sure we're not clearing an SMI pin */
+-	entry = ioapic_read_entry(apic, pin);
+-	if (entry.delivery_mode == dest_SMI)
+-		return;
+-	/*
+-	 * Disable it in the IO-APIC irq-routing table:
+-	 */
+-	ioapic_mask_entry(apic, pin);
+-}
+-
+-static void clear_IO_APIC (void)
+-{
+-	int apic, pin;
+-
+-	for (apic = 0; apic < nr_ioapics; apic++)
+-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+-			clear_IO_APIC_pin(apic, pin);
+-}
+-
+-#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
+-void send_IPI_self(int vector)
+-{
+-	unsigned int cfg;
+-
+-	/*
+-	 * Wait for idle.
+-	 */
+-	apic_wait_icr_idle();
+-	cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
+-	/*
+-	 * Send the IPI. The write to APIC_ICR fires this off.
+-	 */
+-	apic_write(APIC_ICR, cfg);
+-}
+-#endif /* !CONFIG_SMP && CONFIG_X86_32*/
+-
+-#ifdef CONFIG_X86_32
+-/*
+- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
+- * specific CPU-side IRQs.
+- */
+-
+-#define MAX_PIRQS 8
+-static int pirq_entries [MAX_PIRQS];
+-static int pirqs_enabled;
+-
+-static int __init ioapic_pirq_setup(char *str)
+-{
+-	int i, max;
+-	int ints[MAX_PIRQS+1];
+-
+-	get_options(str, ARRAY_SIZE(ints), ints);
+-
+-	for (i = 0; i < MAX_PIRQS; i++)
+-		pirq_entries[i] = -1;
+-
+-	pirqs_enabled = 1;
+-	apic_printk(APIC_VERBOSE, KERN_INFO
+-			"PIRQ redirection, working around broken MP-BIOS.\n");
+-	max = MAX_PIRQS;
+-	if (ints[0] < MAX_PIRQS)
+-		max = ints[0];
+-
+-	for (i = 0; i < max; i++) {
+-		apic_printk(APIC_VERBOSE, KERN_DEBUG
+-				"... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
+-		/*
+-		 * PIRQs are mapped upside down, usually.
+-		 */
+-		pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
+-	}
+-	return 1;
+-}
+-
+-__setup("pirq=", ioapic_pirq_setup);
+-#endif /* CONFIG_X86_32 */
+-
+-#ifdef CONFIG_INTR_REMAP
+-/* I/O APIC RTE contents at the OS boot up */
+-static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
+-
+-/*
+- * Saves and masks all the unmasked IO-APIC RTE's
+- */
+-int save_mask_IO_APIC_setup(void)
+-{
+-	union IO_APIC_reg_01 reg_01;
+-	unsigned long flags;
+-	int apic, pin;
+-
+-	/*
+-	 * The number of IO-APIC IRQ registers (== #pins):
+-	 */
+-	for (apic = 0; apic < nr_ioapics; apic++) {
+-		spin_lock_irqsave(&ioapic_lock, flags);
+-		reg_01.raw = io_apic_read(apic, 1);
+-		spin_unlock_irqrestore(&ioapic_lock, flags);
+-		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+-	}
+-
+-	for (apic = 0; apic < nr_ioapics; apic++) {
+-		early_ioapic_entries[apic] =
+-			kzalloc(sizeof(struct IO_APIC_route_entry) *
+-				nr_ioapic_registers[apic], GFP_KERNEL);
+-		if (!early_ioapic_entries[apic])
+-			goto nomem;
+-	}
+-
+-	for (apic = 0; apic < nr_ioapics; apic++)
+-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+-			struct IO_APIC_route_entry entry;
+-
+-			entry = early_ioapic_entries[apic][pin] =
+-				ioapic_read_entry(apic, pin);
+-			if (!entry.mask) {
+-				entry.mask = 1;
+-				ioapic_write_entry(apic, pin, entry);
+-			}
+-		}
+-
+-	return 0;
+-
+-nomem:
+-	while (apic >= 0)
+-		kfree(early_ioapic_entries[apic--]);
+-	memset(early_ioapic_entries, 0,
+-		ARRAY_SIZE(early_ioapic_entries));
+-
+-	return -ENOMEM;
+-}
+-
+-void restore_IO_APIC_setup(void)
+-{
+-	int apic, pin;
+-
+-	for (apic = 0; apic < nr_ioapics; apic++) {
+-		if (!early_ioapic_entries[apic])
+-			break;
+-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
+-			ioapic_write_entry(apic, pin,
+-					   early_ioapic_entries[apic][pin]);
+-		kfree(early_ioapic_entries[apic]);
+-		early_ioapic_entries[apic] = NULL;
+-	}
+-}
+-
+-void reinit_intr_remapped_IO_APIC(int intr_remapping)
+-{
+-	/*
+-	 * for now plain restore of previous settings.
+-	 * TBD: In the case of OS enabling interrupt-remapping,
+-	 * IO-APIC RTE's need to be setup to point to interrupt-remapping
+-	 * table entries. for now, do a plain restore, and wait for
+-	 * the setup_IO_APIC_irqs() to do proper initialization.
+-	 */
+-	restore_IO_APIC_setup();
+-}
+-#endif
+-
+-/*
+- * Find the IRQ entry number of a certain pin.
+- */
+-static int find_irq_entry(int apic, int pin, int type)
+-{
+-	int i;
+-
+-	for (i = 0; i < mp_irq_entries; i++)
+-		if (mp_irqs[i].mp_irqtype == type &&
+-		    (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
+-		     mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
+-		    mp_irqs[i].mp_dstirq == pin)
+-			return i;
+-
+-	return -1;
+-}
+-
+-/*
+- * Find the pin to which IRQ[irq] (ISA) is connected
+- */
+-static int __init find_isa_irq_pin(int irq, int type)
+-{
+-	int i;
+-
+-	for (i = 0; i < mp_irq_entries; i++) {
+-		int lbus = mp_irqs[i].mp_srcbus;
+-
+-		if (test_bit(lbus, mp_bus_not_pci) &&
+-		    (mp_irqs[i].mp_irqtype == type) &&
+-		    (mp_irqs[i].mp_srcbusirq == irq))
+-
+-			return mp_irqs[i].mp_dstirq;
+-	}
+-	return -1;
+-}
+-
+-static int __init find_isa_irq_apic(int irq, int type)
+-{
+-	int i;
+-
+-	for (i = 0; i < mp_irq_entries; i++) {
+-		int lbus = mp_irqs[i].mp_srcbus;
+-
+-		if (test_bit(lbus, mp_bus_not_pci) &&
+-		    (mp_irqs[i].mp_irqtype == type) &&
+-		    (mp_irqs[i].mp_srcbusirq == irq))
+-			break;
+-	}
+-	if (i < mp_irq_entries) {
+-		int apic;
+-		for(apic = 0; apic < nr_ioapics; apic++) {
+-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
+-				return apic;
+-		}
+-	}
+-
+-	return -1;
+-}
+-
+-/*
+- * Find a specific PCI IRQ entry.
+- * Not an __init, possibly needed by modules
+- */
+-static int pin_2_irq(int idx, int apic, int pin);
+-
+-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
+-{
+-	int apic, i, best_guess = -1;
+-
+-	apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+-		bus, slot, pin);
+-	if (test_bit(bus, mp_bus_not_pci)) {
+-		apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+-		return -1;
+-	}
+-	for (i = 0; i < mp_irq_entries; i++) {
+-		int lbus = mp_irqs[i].mp_srcbus;
+-
+-		for (apic = 0; apic < nr_ioapics; apic++)
+-			if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
+-			    mp_irqs[i].mp_dstapic == MP_APIC_ALL)
+-				break;
+-
+-		if (!test_bit(lbus, mp_bus_not_pci) &&
+-		    !mp_irqs[i].mp_irqtype &&
+-		    (bus == lbus) &&
+-		    (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
+-			int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
+-
+-			if (!(apic || IO_APIC_IRQ(irq)))
+-				continue;
+-
+-			if (pin == (mp_irqs[i].mp_srcbusirq & 3))
+-				return irq;
+-			/*
+-			 * Use the first all-but-pin matching entry as a
+-			 * best-guess fuzzy result for broken mptables.
+-			 */
+-			if (best_guess < 0)
+-				best_guess = irq;
+-		}
+-	}
+-	return best_guess;
+-}
+-
+-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+-
+-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+-/*
+- * EISA Edge/Level control register, ELCR
+- */
+-static int EISA_ELCR(unsigned int irq)
+-{
+-	if (irq < NR_IRQS_LEGACY) {
+-		unsigned int port = 0x4d0 + (irq >> 3);
+-		return (inb(port) >> (irq & 7)) & 1;
+-	}
+-	apic_printk(APIC_VERBOSE, KERN_INFO
+-			"Broken MPtable reports ISA irq %d\n", irq);
+-	return 0;
+-}
+-
+-#endif
+-
+-/* ISA interrupts are always polarity zero edge triggered,
+- * when listed as conforming in the MP table. */
+-
+-#define default_ISA_trigger(idx)	(0)
+-#define default_ISA_polarity(idx)	(0)
+-
+-/* EISA interrupts are always polarity zero and can be edge or level
+- * trigger depending on the ELCR value.  If an interrupt is listed as
+- * EISA conforming in the MP table, that means its trigger type must
+- * be read in from the ELCR */
+-
+-#define default_EISA_trigger(idx)	(EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
+-#define default_EISA_polarity(idx)	default_ISA_polarity(idx)
+-
+-/* PCI interrupts are always polarity one level triggered,
+- * when listed as conforming in the MP table. */
+-
+-#define default_PCI_trigger(idx)	(1)
+-#define default_PCI_polarity(idx)	(1)
+-
+-/* MCA interrupts are always polarity zero level triggered,
+- * when listed as conforming in the MP table. */
+-
+-#define default_MCA_trigger(idx)	(1)
+-#define default_MCA_polarity(idx)	default_ISA_polarity(idx)
+-
+-static int MPBIOS_polarity(int idx)
+-{
+-	int bus = mp_irqs[idx].mp_srcbus;
+-	int polarity;
+-
+-	/*
+-	 * Determine IRQ line polarity (high active or low active):
+-	 */
+-	switch (mp_irqs[idx].mp_irqflag & 3)
+-	{
+-		case 0: /* conforms, ie. bus-type dependent polarity */
+-			if (test_bit(bus, mp_bus_not_pci))
+-				polarity = default_ISA_polarity(idx);
+-			else
+-				polarity = default_PCI_polarity(idx);
+-			break;
+-		case 1: /* high active */
+-		{
+-			polarity = 0;
+-			break;
+-		}
+-		case 2: /* reserved */
+-		{
+-			printk(KERN_WARNING "broken BIOS!!\n");
+-			polarity = 1;
+-			break;
+-		}
+-		case 3: /* low active */
+-		{
+-			polarity = 1;
+-			break;
+-		}
+-		default: /* invalid */
+-		{
+-			printk(KERN_WARNING "broken BIOS!!\n");
+-			polarity = 1;
+-			break;
+-		}
+-	}
+-	return polarity;
+-}
+-
+-static int MPBIOS_trigger(int idx)
+-{
+-	int bus = mp_irqs[idx].mp_srcbus;
+-	int trigger;
+-
+-	/*
+-	 * Determine IRQ trigger mode (edge or level sensitive):
+-	 */
+-	switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
+-	{
+-		case 0: /* conforms, ie. bus-type dependent */
+-			if (test_bit(bus, mp_bus_not_pci))
+-				trigger = default_ISA_trigger(idx);
+-			else
+-				trigger = default_PCI_trigger(idx);
+-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
+-			switch (mp_bus_id_to_type[bus]) {
+-				case MP_BUS_ISA: /* ISA pin */
+-				{
+-					/* set before the switch */
+-					break;
+-				}
+-				case MP_BUS_EISA: /* EISA pin */
+-				{
+-					trigger = default_EISA_trigger(idx);
+-					break;
+-				}
+-				case MP_BUS_PCI: /* PCI pin */
+-				{
+-					/* set before the switch */
+-					break;
+-				}
+-				case MP_BUS_MCA: /* MCA pin */
+-				{
+-					trigger = default_MCA_trigger(idx);
+-					break;
+-				}
+-				default:
+-				{
+-					printk(KERN_WARNING "broken BIOS!!\n");
+-					trigger = 1;
+-					break;
+-				}
+-			}
+-#endif
+-			break;
+-		case 1: /* edge */
+-		{
+-			trigger = 0;
+-			break;
+-		}
+-		case 2: /* reserved */
+-		{
+-			printk(KERN_WARNING "broken BIOS!!\n");
+-			trigger = 1;
+-			break;
+-		}
+-		case 3: /* level */
+-		{
+-			trigger = 1;
+-			break;
+-		}
+-		default: /* invalid */
+-		{
+-			printk(KERN_WARNING "broken BIOS!!\n");
+-			trigger = 0;
+-			break;
+-		}
+-	}
+-	return trigger;
+-}
+-
+-static inline int irq_polarity(int idx)
+-{
+-	return MPBIOS_polarity(idx);
+-}
+-
+-static inline int irq_trigger(int idx)
+-{
+-	return MPBIOS_trigger(idx);
+-}
+-
+-int (*ioapic_renumber_irq)(int ioapic, int irq);
+-static int pin_2_irq(int idx, int apic, int pin)
+-{
+-	int irq, i;
+-	int bus = mp_irqs[idx].mp_srcbus;
+-
+-	/*
+-	 * Debugging check, we are in big trouble if this message pops up!
+-	 */
+-	if (mp_irqs[idx].mp_dstirq != pin)
+-		printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
+-
+-	if (test_bit(bus, mp_bus_not_pci)) {
+-		irq = mp_irqs[idx].mp_srcbusirq;
+-	} else {
+-		/*
+-		 * PCI IRQs are mapped in order
+-		 */
+-		i = irq = 0;
+-		while (i < apic)
+-			irq += nr_ioapic_registers[i++];
+-		irq += pin;
+-		/*
+-                 * For MPS mode, so far only needed by ES7000 platform
+-                 */
+-		if (ioapic_renumber_irq)
+-			irq = ioapic_renumber_irq(apic, irq);
+-	}
+-
+-#ifdef CONFIG_X86_32
+-	/*
+-	 * PCI IRQ command line redirection. Yes, limits are hardcoded.
+-	 */
+-	if ((pin >= 16) && (pin <= 23)) {
+-		if (pirq_entries[pin-16] != -1) {
+-			if (!pirq_entries[pin-16]) {
+-				apic_printk(APIC_VERBOSE, KERN_DEBUG
+-						"disabling PIRQ%d\n", pin-16);
+-			} else {
+-				irq = pirq_entries[pin-16];
+-				apic_printk(APIC_VERBOSE, KERN_DEBUG
+-						"using PIRQ%d -> IRQ %d\n",
+-						pin-16, irq);
+-			}
+-		}
+-	}
+-#endif
+-
+-	return irq;
+-}
+-
+-void lock_vector_lock(void)
+-{
+-	/* Used to the online set of cpus does not change
+-	 * during assign_irq_vector.
+-	 */
+-	spin_lock(&vector_lock);
+-}
+-
+-void unlock_vector_lock(void)
+-{
+-	spin_unlock(&vector_lock);
+-}
+-
+-static int
+-__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+-{
+-	/*
+-	 * NOTE! The local APIC isn't very good at handling
+-	 * multiple interrupts at the same interrupt level.
+-	 * As the interrupt level is determined by taking the
+-	 * vector number and shifting that right by 4, we
+-	 * want to spread these out a bit so that they don't
+-	 * all fall in the same interrupt level.
+-	 *
+-	 * Also, we've got to be careful not to trash gate
+-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+-	 */
+-	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+-	unsigned int old_vector;
+-	int cpu, err;
+-	cpumask_var_t tmp_mask;
+-
+-	if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+-		return -EBUSY;
+-
+-	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+-		return -ENOMEM;
+-
+-	old_vector = cfg->vector;
+-	if (old_vector) {
+-		cpumask_and(tmp_mask, mask, cpu_online_mask);
+-		cpumask_and(tmp_mask, cfg->domain, tmp_mask);
+-		if (!cpumask_empty(tmp_mask)) {
+-			free_cpumask_var(tmp_mask);
+-			return 0;
+-		}
+-	}
+-
+-	/* Only try and allocate irqs on cpus that are present */
+-	err = -ENOSPC;
+-	for_each_cpu_and(cpu, mask, cpu_online_mask) {
+-		int new_cpu;
+-		int vector, offset;
+-
+-		vector_allocation_domain(cpu, tmp_mask);
+-
+-		vector = current_vector;
+-		offset = current_offset;
+-next:
+-		vector += 8;
+-		if (vector >= first_system_vector) {
+-			/* If out of vectors on large boxen, must share them. */
+-			offset = (offset + 1) % 8;
+-			vector = FIRST_DEVICE_VECTOR + offset;
+-		}
+-		if (unlikely(current_vector == vector))
+-			continue;
+-
+-		if (test_bit(vector, used_vectors))
+-			goto next;
+-
+-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+-			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
+-				goto next;
+-		/* Found one! */
+-		current_vector = vector;
+-		current_offset = offset;
+-		if (old_vector) {
+-			cfg->move_in_progress = 1;
+-			cpumask_copy(cfg->old_domain, cfg->domain);
+-		}
+-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+-			per_cpu(vector_irq, new_cpu)[vector] = irq;
+-		cfg->vector = vector;
+-		cpumask_copy(cfg->domain, tmp_mask);
+-		err = 0;
+-		break;
+-	}
+-	free_cpumask_var(tmp_mask);
+-	return err;
+-}
+-
+-static int
+-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+-{
+-	int err;
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&vector_lock, flags);
+-	err = __assign_irq_vector(irq, cfg, mask);
+-	spin_unlock_irqrestore(&vector_lock, flags);
+-	return err;
+-}
+-
+-static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
+-{
+-	int cpu, vector;
+-
+-	BUG_ON(!cfg->vector);
+-
+-	vector = cfg->vector;
+-	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+-		per_cpu(vector_irq, cpu)[vector] = -1;
+-
+-	cfg->vector = 0;
+-	cpumask_clear(cfg->domain);
+-
+-	if (likely(!cfg->move_in_progress))
+-		return;
+-	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+-		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+-								vector++) {
+-			if (per_cpu(vector_irq, cpu)[vector] != irq)
+-				continue;
+-			per_cpu(vector_irq, cpu)[vector] = -1;
+-			break;
+-		}
+-	}
+-	cfg->move_in_progress = 0;
+-}
+-
+-void __setup_vector_irq(int cpu)
+-{
+-	/* Initialize vector_irq on a new cpu */
+-	/* This function must be called with vector_lock held */
+-	int irq, vector;
+-	struct irq_cfg *cfg;
+-	struct irq_desc *desc;
+-
+-	/* Mark the inuse vectors */
+-	for_each_irq_desc(irq, desc) {
+-		cfg = desc->chip_data;
+-		if (!cpumask_test_cpu(cpu, cfg->domain))
+-			continue;
+-		vector = cfg->vector;
+-		per_cpu(vector_irq, cpu)[vector] = irq;
+-	}
+-	/* Mark the free vectors */
+-	for (vector = 0; vector < NR_VECTORS; ++vector) {
+-		irq = per_cpu(vector_irq, cpu)[vector];
+-		if (irq < 0)
+-			continue;
+-
+-		cfg = irq_cfg(irq);
+-		if (!cpumask_test_cpu(cpu, cfg->domain))
+-			per_cpu(vector_irq, cpu)[vector] = -1;
+-	}
+-}
+-
+-static struct irq_chip ioapic_chip;
+-#ifdef CONFIG_INTR_REMAP
+-static struct irq_chip ir_ioapic_chip;
+-#endif
+-
+-#define IOAPIC_AUTO     -1
+-#define IOAPIC_EDGE     0
+-#define IOAPIC_LEVEL    1
+-
+-#ifdef CONFIG_X86_32
+-static inline int IO_APIC_irq_trigger(int irq)
+-{
+-	int apic, idx, pin;
+-
+-	for (apic = 0; apic < nr_ioapics; apic++) {
+-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+-			idx = find_irq_entry(apic, pin, mp_INT);
+-			if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
+-				return irq_trigger(idx);
+-		}
+-	}
+-	/*
+-         * nonexistent IRQs are edge default
+-         */
+-	return 0;
+-}
+-#else
+-static inline int IO_APIC_irq_trigger(int irq)
+-{
+-	return 1;
+-}
+-#endif
+-
+-static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
+-{
+-
+-	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+-	    trigger == IOAPIC_LEVEL)
+-		desc->status |= IRQ_LEVEL;
+-	else
+-		desc->status &= ~IRQ_LEVEL;
+-
+-#ifdef CONFIG_INTR_REMAP
+-	if (irq_remapped(irq)) {
+-		desc->status |= IRQ_MOVE_PCNTXT;
+-		if (trigger)
+-			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+-						      handle_fasteoi_irq,
+-						     "fasteoi");
+-		else
+-			set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
+-						      handle_edge_irq, "edge");
+-		return;
+-	}
+-#endif
+-	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
+-	    trigger == IOAPIC_LEVEL)
+-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+-					      handle_fasteoi_irq,
+-					      "fasteoi");
+-	else
+-		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+-					      handle_edge_irq, "edge");
+-}
+-
+-static int setup_ioapic_entry(int apic, int irq,
+-			      struct IO_APIC_route_entry *entry,
+-			      unsigned int destination, int trigger,
+-			      int polarity, int vector)
+-{
+-	/*
+-	 * add it to the IO-APIC irq-routing table:
+-	 */
+-	memset(entry,0,sizeof(*entry));
+-
+-#ifdef CONFIG_INTR_REMAP
+-	if (intr_remapping_enabled) {
+-		struct intel_iommu *iommu = map_ioapic_to_ir(apic);
+-		struct irte irte;
+-		struct IR_IO_APIC_route_entry *ir_entry =
+-			(struct IR_IO_APIC_route_entry *) entry;
+-		int index;
+-
+-		if (!iommu)
+-			panic("No mapping iommu for ioapic %d\n", apic);
+-
+-		index = alloc_irte(iommu, irq, 1);
+-		if (index < 0)
+-			panic("Failed to allocate IRTE for ioapic %d\n", apic);
+-
+-		memset(&irte, 0, sizeof(irte));
+-
+-		irte.present = 1;
+-		irte.dst_mode = INT_DEST_MODE;
+-		irte.trigger_mode = trigger;
+-		irte.dlvry_mode = INT_DELIVERY_MODE;
+-		irte.vector = vector;
+-		irte.dest_id = IRTE_DEST(destination);
+-
+-		modify_irte(irq, &irte);
+-
+-		ir_entry->index2 = (index >> 15) & 0x1;
+-		ir_entry->zero = 0;
+-		ir_entry->format = 1;
+-		ir_entry->index = (index & 0x7fff);
+-	} else
+-#endif
+-	{
+-		entry->delivery_mode = INT_DELIVERY_MODE;
+-		entry->dest_mode = INT_DEST_MODE;
+-		entry->dest = destination;
+-	}
+-
+-	entry->mask = 0;				/* enable IRQ */
+-	entry->trigger = trigger;
+-	entry->polarity = polarity;
+-	entry->vector = vector;
+-
+-	/* Mask level triggered irqs.
+-	 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
+-	 */
+-	if (trigger)
+-		entry->mask = 1;
+-	return 0;
+-}
+-
+-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
+-			      int trigger, int polarity)
+-{
+-	struct irq_cfg *cfg;
+-	struct IO_APIC_route_entry entry;
+-	unsigned int dest;
+-
+-	if (!IO_APIC_IRQ(irq))
+-		return;
+-
+-	cfg = desc->chip_data;
+-
+-	if (assign_irq_vector(irq, cfg, TARGET_CPUS))
+-		return;
+-
+-	dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+-
+-	apic_printk(APIC_VERBOSE,KERN_DEBUG
+-		    "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
+-		    "IRQ %d Mode:%i Active:%i)\n",
+-		    apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
+-		    irq, trigger, polarity);
+-
+-
+-	if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
+-			       dest, trigger, polarity, cfg->vector)) {
+-		printk("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
+-		       mp_ioapics[apic].mp_apicid, pin);
+-		__clear_irq_vector(irq, cfg);
+-		return;
+-	}
+-
+-	ioapic_register_intr(irq, desc, trigger);
+-	if (irq < NR_IRQS_LEGACY)
+-		disable_8259A_irq(irq);
+-
+-	ioapic_write_entry(apic, pin, entry);
+-}
+-
+-static void __init setup_IO_APIC_irqs(void)
+-{
+-	int apic, pin, idx, irq;
+-	int notcon = 0;
+-	struct irq_desc *desc;
+-	struct irq_cfg *cfg;
+-	int cpu = boot_cpu_id;
+-
+-	apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+-
+-	for (apic = 0; apic < nr_ioapics; apic++) {
+-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+-
+-			idx = find_irq_entry(apic, pin, mp_INT);
+-			if (idx == -1) {
+-				if (!notcon) {
+-					notcon = 1;
+-					apic_printk(APIC_VERBOSE,
+-						KERN_DEBUG " %d-%d",
+-						mp_ioapics[apic].mp_apicid,
+-						pin);
+-				} else
+-					apic_printk(APIC_VERBOSE, " %d-%d",
+-						mp_ioapics[apic].mp_apicid,
+-						pin);
+-				continue;
+-			}
+-			if (notcon) {
+-				apic_printk(APIC_VERBOSE,
+-					" (apicid-pin) not connected\n");
+-				notcon = 0;
+-			}
+-
+-			irq = pin_2_irq(idx, apic, pin);
+-#ifdef CONFIG_X86_32
+-			if (multi_timer_check(apic, irq))
+-				continue;
+-#endif
+-			desc = irq_to_desc_alloc_cpu(irq, cpu);
+-			if (!desc) {
+-				printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+-				continue;
+-			}
+-			cfg = desc->chip_data;
+-			add_pin_to_irq_cpu(cfg, cpu, apic, pin);
+-
+-			setup_IO_APIC_irq(apic, pin, irq, desc,
+-					irq_trigger(idx), irq_polarity(idx));
+-		}
+-	}
+-
+-	if (notcon)
+-		apic_printk(APIC_VERBOSE,
+-			" (apicid-pin) not connected\n");
+-}
+-
+-/*
+- * Set up the timer pin, possibly with the 8259A-master behind.
+- */
+-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
+-					int vector)
+-{
+-	struct IO_APIC_route_entry entry;
+-
+-#ifdef CONFIG_INTR_REMAP
+-	if (intr_remapping_enabled)
+-		return;
+-#endif
+-
+-	memset(&entry, 0, sizeof(entry));
+-
+-	/*
+-	 * We use logical delivery to get the timer IRQ
+-	 * to the first CPU.
+-	 */
+-	entry.dest_mode = INT_DEST_MODE;
+-	entry.mask = 1;					/* mask IRQ now */
+-	entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
+-	entry.delivery_mode = INT_DELIVERY_MODE;
+-	entry.polarity = 0;
+-	entry.trigger = 0;
+-	entry.vector = vector;
+-
+-	/*
+-	 * The timer IRQ doesn't have to know that behind the
+-	 * scene we may have a 8259A-master in AEOI mode ...
+-	 */
+-	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+-
+-	/*
+-	 * Add it to the IO-APIC irq-routing table:
+-	 */
+-	ioapic_write_entry(apic, pin, entry);
+-}
+-
+-
+-__apicdebuginit(void) print_IO_APIC(void)
+-{
+-	int apic, i;
+-	union IO_APIC_reg_00 reg_00;
+-	union IO_APIC_reg_01 reg_01;
+-	union IO_APIC_reg_02 reg_02;
+-	union IO_APIC_reg_03 reg_03;
+-	unsigned long flags;
+-	struct irq_cfg *cfg;
+-	struct irq_desc *desc;
+-	unsigned int irq;
+-
+-	if (apic_verbosity == APIC_QUIET)
+-		return;
+-
+-	printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
+-	for (i = 0; i < nr_ioapics; i++)
+-		printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
+-		       mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
+-
+-	/*
+-	 * We are a bit conservative about what we expect.  We have to
+-	 * know about every hardware change ASAP.
+-	 */
+-	printk(KERN_INFO "testing the IO APIC.......................\n");
+-
+-	for (apic = 0; apic < nr_ioapics; apic++) {
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	reg_00.raw = io_apic_read(apic, 0);
+-	reg_01.raw = io_apic_read(apic, 1);
+-	if (reg_01.bits.version >= 0x10)
+-		reg_02.raw = io_apic_read(apic, 2);
+-	if (reg_01.bits.version >= 0x20)
+-		reg_03.raw = io_apic_read(apic, 3);
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+-	printk("\n");
+-	printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
+-	printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
+-	printk(KERN_DEBUG ".......    : physical APIC id: %02X\n", reg_00.bits.ID);
+-	printk(KERN_DEBUG ".......    : Delivery Type: %X\n", reg_00.bits.delivery_type);
+-	printk(KERN_DEBUG ".......    : LTS          : %X\n", reg_00.bits.LTS);
+-
+-	printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)&reg_01);
+-	printk(KERN_DEBUG ".......     : max redirection entries: %04X\n", reg_01.bits.entries);
+-
+-	printk(KERN_DEBUG ".......     : PRQ implemented: %X\n", reg_01.bits.PRQ);
+-	printk(KERN_DEBUG ".......     : IO APIC version: %04X\n", reg_01.bits.version);
+-
+-	/*
+-	 * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
+-	 * but the value of reg_02 is read as the previous read register
+-	 * value, so ignore it if reg_02 == reg_01.
+-	 */
+-	if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
+-		printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
+-		printk(KERN_DEBUG ".......     : arbitration: %02X\n", reg_02.bits.arbitration);
+-	}
+-
+-	/*
+-	 * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
+-	 * or reg_03, but the value of reg_0[23] is read as the previous read
+-	 * register value, so ignore it if reg_03 == reg_0[12].
+-	 */
+-	if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
+-	    reg_03.raw != reg_01.raw) {
+-		printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
+-		printk(KERN_DEBUG ".......     : Boot DT    : %X\n", reg_03.bits.boot_DT);
+-	}
+-
+-	printk(KERN_DEBUG ".... IRQ redirection table:\n");
+-
+-	printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
+-			  " Stat Dmod Deli Vect:   \n");
+-
+-	for (i = 0; i <= reg_01.bits.entries; i++) {
+-		struct IO_APIC_route_entry entry;
+-
+-		entry = ioapic_read_entry(apic, i);
+-
+-		printk(KERN_DEBUG " %02x %03X ",
+-			i,
+-			entry.dest
+-		);
+-
+-		printk("%1d    %1d    %1d   %1d   %1d    %1d    %1d    %02X\n",
+-			entry.mask,
+-			entry.trigger,
+-			entry.irr,
+-			entry.polarity,
+-			entry.delivery_status,
+-			entry.dest_mode,
+-			entry.delivery_mode,
+-			entry.vector
+-		);
+-	}
+-	}
+-	printk(KERN_DEBUG "IRQ to pin mappings:\n");
+-	for_each_irq_desc(irq, desc) {
+-		struct irq_pin_list *entry;
+-
+-		cfg = desc->chip_data;
+-		entry = cfg->irq_2_pin;
+-		if (!entry)
+-			continue;
+-		printk(KERN_DEBUG "IRQ%d ", irq);
+-		for (;;) {
+-			printk("-> %d:%d", entry->apic, entry->pin);
+-			if (!entry->next)
+-				break;
+-			entry = entry->next;
+-		}
+-		printk("\n");
+-	}
+-
+-	printk(KERN_INFO ".................................... done.\n");
+-
+-	return;
+-}
+-
+-__apicdebuginit(void) print_APIC_bitfield(int base)
+-{
+-	unsigned int v;
+-	int i, j;
+-
+-	if (apic_verbosity == APIC_QUIET)
+-		return;
+-
+-	printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
+-	for (i = 0; i < 8; i++) {
+-		v = apic_read(base + i*0x10);
+-		for (j = 0; j < 32; j++) {
+-			if (v & (1<<j))
+-				printk("1");
+-			else
+-				printk("0");
+-		}
+-		printk("\n");
+-	}
+-}
+-
+-__apicdebuginit(void) print_local_APIC(void *dummy)
+-{
+-	unsigned int v, ver, maxlvt;
+-	u64 icr;
+-
+-	if (apic_verbosity == APIC_QUIET)
+-		return;
+-
+-	printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+-		smp_processor_id(), hard_smp_processor_id());
+-	v = apic_read(APIC_ID);
+-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
+-	v = apic_read(APIC_LVR);
+-	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
+-	ver = GET_APIC_VERSION(v);
+-	maxlvt = lapic_get_maxlvt();
+-
+-	v = apic_read(APIC_TASKPRI);
+-	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+-
+-	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
+-		if (!APIC_XAPIC(ver)) {
+-			v = apic_read(APIC_ARBPRI);
+-			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
+-			       v & APIC_ARBPRI_MASK);
+-		}
+-		v = apic_read(APIC_PROCPRI);
+-		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
+-	}
+-
+-	/*
+-	 * Remote read supported only in the 82489DX and local APIC for
+-	 * Pentium processors.
+-	 */
+-	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+-		v = apic_read(APIC_RRR);
+-		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
+-	}
+-
+-	v = apic_read(APIC_LDR);
+-	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
+-	if (!x2apic_enabled()) {
+-		v = apic_read(APIC_DFR);
+-		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
+-	}
+-	v = apic_read(APIC_SPIV);
+-	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
+-
+-	printk(KERN_DEBUG "... APIC ISR field:\n");
+-	print_APIC_bitfield(APIC_ISR);
+-	printk(KERN_DEBUG "... APIC TMR field:\n");
+-	print_APIC_bitfield(APIC_TMR);
+-	printk(KERN_DEBUG "... APIC IRR field:\n");
+-	print_APIC_bitfield(APIC_IRR);
+-
+-	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
+-		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
+-			apic_write(APIC_ESR, 0);
+-
+-		v = apic_read(APIC_ESR);
+-		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
+-	}
+-
+-	icr = apic_icr_read();
+-	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
+-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
+-
+-	v = apic_read(APIC_LVTT);
+-	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
+-
+-	if (maxlvt > 3) {                       /* PC is LVT#4. */
+-		v = apic_read(APIC_LVTPC);
+-		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
+-	}
+-	v = apic_read(APIC_LVT0);
+-	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
+-	v = apic_read(APIC_LVT1);
+-	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
+-
+-	if (maxlvt > 2) {			/* ERR is LVT#3. */
+-		v = apic_read(APIC_LVTERR);
+-		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
+-	}
+-
+-	v = apic_read(APIC_TMICT);
+-	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
+-	v = apic_read(APIC_TMCCT);
+-	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
+-	v = apic_read(APIC_TDCR);
+-	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+-	printk("\n");
+-}
+-
+-__apicdebuginit(void) print_all_local_APICs(void)
+-{
+-	int cpu;
+-
+-	preempt_disable();
+-	for_each_online_cpu(cpu)
+-		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+-	preempt_enable();
+-}
+-
+-__apicdebuginit(void) print_PIC(void)
+-{
+-	unsigned int v;
+-	unsigned long flags;
+-
+-	if (apic_verbosity == APIC_QUIET)
+-		return;
+-
+-	printk(KERN_DEBUG "\nprinting PIC contents\n");
+-
+-	spin_lock_irqsave(&i8259A_lock, flags);
+-
+-	v = inb(0xa1) << 8 | inb(0x21);
+-	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
+-
+-	v = inb(0xa0) << 8 | inb(0x20);
+-	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
+-
+-	outb(0x0b,0xa0);
+-	outb(0x0b,0x20);
+-	v = inb(0xa0) << 8 | inb(0x20);
+-	outb(0x0a,0xa0);
+-	outb(0x0a,0x20);
+-
+-	spin_unlock_irqrestore(&i8259A_lock, flags);
+-
+-	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
+-
+-	v = inb(0x4d1) << 8 | inb(0x4d0);
+-	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
+-}
+-
+-__apicdebuginit(int) print_all_ICs(void)
+-{
+-	print_PIC();
+-	print_all_local_APICs();
+-	print_IO_APIC();
+-
+-	return 0;
+-}
+-
+-fs_initcall(print_all_ICs);
+-
+-
+-/* Where if anywhere is the i8259 connect in external int mode */
+-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+-
+-void __init enable_IO_APIC(void)
+-{
+-	union IO_APIC_reg_01 reg_01;
+-	int i8259_apic, i8259_pin;
+-	int apic;
+-	unsigned long flags;
+-
+-#ifdef CONFIG_X86_32
+-	int i;
+-	if (!pirqs_enabled)
+-		for (i = 0; i < MAX_PIRQS; i++)
+-			pirq_entries[i] = -1;
+-#endif
+-
+-	/*
+-	 * The number of IO-APIC IRQ registers (== #pins):
+-	 */
+-	for (apic = 0; apic < nr_ioapics; apic++) {
+-		spin_lock_irqsave(&ioapic_lock, flags);
+-		reg_01.raw = io_apic_read(apic, 1);
+-		spin_unlock_irqrestore(&ioapic_lock, flags);
+-		nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+-	}
+-	for(apic = 0; apic < nr_ioapics; apic++) {
+-		int pin;
+-		/* See if any of the pins is in ExtINT mode */
+-		for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
+-			struct IO_APIC_route_entry entry;
+-			entry = ioapic_read_entry(apic, pin);
+-
+-			/* If the interrupt line is enabled and in ExtInt mode
+-			 * I have found the pin where the i8259 is connected.
+-			 */
+-			if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+-				ioapic_i8259.apic = apic;
+-				ioapic_i8259.pin  = pin;
+-				goto found_i8259;
+-			}
+-		}
+-	}
+- found_i8259:
+-	/* Look to see what if the MP table has reported the ExtINT */
+-	/* If we could not find the appropriate pin by looking at the ioapic
+-	 * the i8259 probably is not connected the ioapic but give the
+-	 * mptable a chance anyway.
+-	 */
+-	i8259_pin  = find_isa_irq_pin(0, mp_ExtINT);
+-	i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+-	/* Trust the MP table if nothing is setup in the hardware */
+-	if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+-		printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+-		ioapic_i8259.pin  = i8259_pin;
+-		ioapic_i8259.apic = i8259_apic;
+-	}
+-	/* Complain if the MP table and the hardware disagree */
+-	if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
+-		(i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+-	{
+-		printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
+-	}
+-
+-	/*
+-	 * Do not trust the IO-APIC being empty at bootup
+-	 */
+-	clear_IO_APIC();
+-}
+-
+-/*
+- * Not an __init, needed by the reboot code
+- */
+-void disable_IO_APIC(void)
+-{
+-	/*
+-	 * Clear the IO-APIC before rebooting:
+-	 */
+-	clear_IO_APIC();
+-
+-	/*
+-	 * If the i8259 is routed through an IOAPIC
+-	 * Put that IOAPIC in virtual wire mode
+-	 * so legacy interrupts can be delivered.
+-	 */
+-	if (ioapic_i8259.pin != -1) {
+-		struct IO_APIC_route_entry entry;
+-
+-		memset(&entry, 0, sizeof(entry));
+-		entry.mask            = 0; /* Enabled */
+-		entry.trigger         = 0; /* Edge */
+-		entry.irr             = 0;
+-		entry.polarity        = 0; /* High */
+-		entry.delivery_status = 0;
+-		entry.dest_mode       = 0; /* Physical */
+-		entry.delivery_mode   = dest_ExtINT; /* ExtInt */
+-		entry.vector          = 0;
+-		entry.dest            = read_apic_id();
+-
+-		/*
+-		 * Add it to the IO-APIC irq-routing table:
+-		 */
+-		ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
+-	}
+-
+-	disconnect_bsp_APIC(ioapic_i8259.pin != -1);
+-}
+-
+-#ifdef CONFIG_X86_32
+-/*
+- * function to set the IO-APIC physical IDs based on the
+- * values stored in the MPC table.
+- *
+- * by Matt Domsch <Matt_Domsch@dell.com>  Tue Dec 21 12:25:05 CST 1999
+- */
+-
+-static void __init setup_ioapic_ids_from_mpc(void)
+-{
+-	union IO_APIC_reg_00 reg_00;
+-	physid_mask_t phys_id_present_map;
+-	int apic;
+-	int i;
+-	unsigned char old_id;
+-	unsigned long flags;
+-
+-	if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+-		return;
+-
+-	/*
+-	 * Don't check I/O APIC IDs for xAPIC systems.  They have
+-	 * no meaning without the serial APIC bus.
+-	 */
+-	if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
+-		|| APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+-		return;
+-	/*
+-	 * This is broken; anything with a real cpu count has to
+-	 * circumvent this idiocy regardless.
+-	 */
+-	phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
+-
+-	/*
+-	 * Set the IOAPIC ID to the value stored in the MPC table.
+-	 */
+-	for (apic = 0; apic < nr_ioapics; apic++) {
+-
+-		/* Read the register 0 value */
+-		spin_lock_irqsave(&ioapic_lock, flags);
+-		reg_00.raw = io_apic_read(apic, 0);
+-		spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+-		old_id = mp_ioapics[apic].mp_apicid;
+-
+-		if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
+-			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
+-				apic, mp_ioapics[apic].mp_apicid);
+-			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+-				reg_00.bits.ID);
+-			mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
+-		}
+-
+-		/*
+-		 * Sanity check, is the ID really free? Every APIC in a
+-		 * system must have a unique ID or we get lots of nice
+-		 * 'stuck on smp_invalidate_needed IPI wait' messages.
+-		 */
+-		if (check_apicid_used(phys_id_present_map,
+-					mp_ioapics[apic].mp_apicid)) {
+-			printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
+-				apic, mp_ioapics[apic].mp_apicid);
+-			for (i = 0; i < get_physical_broadcast(); i++)
+-				if (!physid_isset(i, phys_id_present_map))
+-					break;
+-			if (i >= get_physical_broadcast())
+-				panic("Max APIC ID exceeded!\n");
+-			printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
+-				i);
+-			physid_set(i, phys_id_present_map);
+-			mp_ioapics[apic].mp_apicid = i;
+-		} else {
+-			physid_mask_t tmp;
+-			tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
+-			apic_printk(APIC_VERBOSE, "Setting %d in the "
+-					"phys_id_present_map\n",
+-					mp_ioapics[apic].mp_apicid);
+-			physids_or(phys_id_present_map, phys_id_present_map, tmp);
+-		}
+-
+-
+-		/*
+-		 * We need to adjust the IRQ routing table
+-		 * if the ID changed.
+-		 */
+-		if (old_id != mp_ioapics[apic].mp_apicid)
+-			for (i = 0; i < mp_irq_entries; i++)
+-				if (mp_irqs[i].mp_dstapic == old_id)
+-					mp_irqs[i].mp_dstapic
+-						= mp_ioapics[apic].mp_apicid;
+-
+-		/*
+-		 * Read the right value from the MPC table and
+-		 * write it into the ID register.
+-		 */
+-		apic_printk(APIC_VERBOSE, KERN_INFO
+-			"...changing IO-APIC physical APIC ID to %d ...",
+-			mp_ioapics[apic].mp_apicid);
+-
+-		reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
+-		spin_lock_irqsave(&ioapic_lock, flags);
+-		io_apic_write(apic, 0, reg_00.raw);
+-		spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+-		/*
+-		 * Sanity check
+-		 */
+-		spin_lock_irqsave(&ioapic_lock, flags);
+-		reg_00.raw = io_apic_read(apic, 0);
+-		spin_unlock_irqrestore(&ioapic_lock, flags);
+-		if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
+-			printk("could not set ID!\n");
+-		else
+-			apic_printk(APIC_VERBOSE, " ok.\n");
+-	}
+-}
+-#endif
+-
+-int no_timer_check __initdata;
+-
+-static int __init notimercheck(char *s)
+-{
+-	no_timer_check = 1;
+-	return 1;
+-}
+-__setup("no_timer_check", notimercheck);
+-
+-/*
+- * There is a nasty bug in some older SMP boards, their mptable lies
+- * about the timer IRQ. We do the following to work around the situation:
+- *
+- *	- timer IRQ defaults to IO-APIC IRQ
+- *	- if this function detects that timer IRQs are defunct, then we fall
+- *	  back to ISA timer IRQs
+- */
+-static int __init timer_irq_works(void)
+-{
+-	unsigned long t1 = jiffies;
+-	unsigned long flags;
+-
+-	if (no_timer_check)
+-		return 1;
+-
+-	local_save_flags(flags);
+-	local_irq_enable();
+-	/* Let ten ticks pass... */
+-	mdelay((10 * 1000) / HZ);
+-	local_irq_restore(flags);
+-
+-	/*
+-	 * Expect a few ticks at least, to be sure some possible
+-	 * glue logic does not lock up after one or two first
+-	 * ticks in a non-ExtINT mode.  Also the local APIC
+-	 * might have cached one ExtINT interrupt.  Finally, at
+-	 * least one tick may be lost due to delays.
+-	 */
+-
+-	/* jiffies wrap? */
+-	if (time_after(jiffies, t1 + 4))
+-		return 1;
+-	return 0;
+-}
+-
+-/*
+- * In the SMP+IOAPIC case it might happen that there are an unspecified
+- * number of pending IRQ events unhandled. These cases are very rare,
+- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
+- * better to do it this way as thus we do not have to be aware of
+- * 'pending' interrupts in the IRQ path, except at this point.
+- */
+-/*
+- * Edge triggered needs to resend any interrupt
+- * that was delayed but this is now handled in the device
+- * independent code.
+- */
+-
+-/*
+- * Starting up a edge-triggered IO-APIC interrupt is
+- * nasty - we need to make sure that we get the edge.
+- * If it is already asserted for some reason, we need
+- * return 1 to indicate that is was pending.
+- *
+- * This is not complete - we should be able to fake
+- * an edge even if it isn't on the 8259A...
+- */
+-
+-static unsigned int startup_ioapic_irq(unsigned int irq)
+-{
+-	int was_pending = 0;
+-	unsigned long flags;
+-	struct irq_cfg *cfg;
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	if (irq < NR_IRQS_LEGACY) {
+-		disable_8259A_irq(irq);
+-		if (i8259A_irq_pending(irq))
+-			was_pending = 1;
+-	}
+-	cfg = irq_cfg(irq);
+-	__unmask_IO_APIC_irq(cfg);
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+-	return was_pending;
+-}
+-
+-#ifdef CONFIG_X86_64
+-static int ioapic_retrigger_irq(unsigned int irq)
+-{
+-
+-	struct irq_cfg *cfg = irq_cfg(irq);
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&vector_lock, flags);
+-	send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
+-	spin_unlock_irqrestore(&vector_lock, flags);
+-
+-	return 1;
+-}
+-#else
+-static int ioapic_retrigger_irq(unsigned int irq)
+-{
+-	send_IPI_self(irq_cfg(irq)->vector);
+-
+-	return 1;
+-}
+-#endif
+-
+-/*
+- * Level and edge triggered IO-APIC interrupts need different handling,
+- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
+- * handled with the level-triggered descriptor, but that one has slightly
+- * more overhead. Level-triggered interrupts cannot be handled with the
+- * edge-triggered handler, without risking IRQ storms and other ugly
+- * races.
+- */
+-
+-#ifdef CONFIG_SMP
+-
+-#ifdef CONFIG_INTR_REMAP
+-static void ir_irq_migration(struct work_struct *work);
+-
+-static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
+-
+-/*
+- * Migrate the IO-APIC irq in the presence of intr-remapping.
+- *
+- * For edge triggered, irq migration is a simple atomic update(of vector
+- * and cpu destination) of IRTE and flush the hardware cache.
+- *
+- * For level triggered, we need to modify the io-apic RTE aswell with the update
+- * vector information, along with modifying IRTE with vector and destination.
+- * So irq migration for level triggered is little  bit more complex compared to
+- * edge triggered migration. But the good news is, we use the same algorithm
+- * for level triggered migration as we have today, only difference being,
+- * we now initiate the irq migration from process context instead of the
+- * interrupt context.
+- *
+- * In future, when we do a directed EOI (combined with cpu EOI broadcast
+- * suppression) to the IO-APIC, level triggered irq migration will also be
+- * as simple as edge triggered migration and we can do the irq migration
+- * with a simple atomic update to IO-APIC RTE.
+- */
+-static void
+-migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+-{
+-	struct irq_cfg *cfg;
+-	struct irte irte;
+-	int modify_ioapic_rte;
+-	unsigned int dest;
+-	unsigned long flags;
+-	unsigned int irq;
+-
+-	if (!cpumask_intersects(mask, cpu_online_mask))
+-		return;
+-
+-	irq = desc->irq;
+-	if (get_irte(irq, &irte))
+-		return;
+-
+-	cfg = desc->chip_data;
+-	if (assign_irq_vector(irq, cfg, mask))
+-		return;
+-
+-	set_extra_move_desc(desc, mask);
+-
+-	dest = cpu_mask_to_apicid_and(cfg->domain, mask);
+-
+-	modify_ioapic_rte = desc->status & IRQ_LEVEL;
+-	if (modify_ioapic_rte) {
+-		spin_lock_irqsave(&ioapic_lock, flags);
+-		__target_IO_APIC_irq(irq, dest, cfg);
+-		spin_unlock_irqrestore(&ioapic_lock, flags);
+-	}
+-
+-	irte.vector = cfg->vector;
+-	irte.dest_id = IRTE_DEST(dest);
+-
+-	/*
+-	 * Modified the IRTE and flushes the Interrupt entry cache.
+-	 */
+-	modify_irte(irq, &irte);
+-
+-	if (cfg->move_in_progress)
+-		send_cleanup_vector(cfg);
+-
+-	cpumask_copy(&desc->affinity, mask);
+-}
+-
+-static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
+-{
+-	int ret = -1;
+-	struct irq_cfg *cfg = desc->chip_data;
+-
+-	mask_IO_APIC_irq_desc(desc);
+-
+-	if (io_apic_level_ack_pending(cfg)) {
+-		/*
+-		 * Interrupt in progress. Migrating irq now will change the
+-		 * vector information in the IO-APIC RTE and that will confuse
+-		 * the EOI broadcast performed by cpu.
+-		 * So, delay the irq migration to the next instance.
+-		 */
+-		schedule_delayed_work(&ir_migration_work, 1);
+-		goto unmask;
+-	}
+-
+-	/* everthing is clear. we have right of way */
+-	migrate_ioapic_irq_desc(desc, &desc->pending_mask);
+-
+-	ret = 0;
+-	desc->status &= ~IRQ_MOVE_PENDING;
+-	cpumask_clear(&desc->pending_mask);
+-
+-unmask:
+-	unmask_IO_APIC_irq_desc(desc);
+-
+-	return ret;
+-}
+-
+-static void ir_irq_migration(struct work_struct *work)
+-{
+-	unsigned int irq;
+-	struct irq_desc *desc;
+-
+-	for_each_irq_desc(irq, desc) {
+-		if (desc->status & IRQ_MOVE_PENDING) {
+-			unsigned long flags;
+-
+-			spin_lock_irqsave(&desc->lock, flags);
+-			if (!desc->chip->set_affinity ||
+-			    !(desc->status & IRQ_MOVE_PENDING)) {
+-				desc->status &= ~IRQ_MOVE_PENDING;
+-				spin_unlock_irqrestore(&desc->lock, flags);
+-				continue;
+-			}
+-
+-			desc->chip->set_affinity(irq, &desc->pending_mask);
+-			spin_unlock_irqrestore(&desc->lock, flags);
+-		}
+-	}
+-}
+-
+-/*
+- * Migrates the IRQ destination in the process context.
+- */
+-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+-					    const struct cpumask *mask)
+-{
+-	if (desc->status & IRQ_LEVEL) {
+-		desc->status |= IRQ_MOVE_PENDING;
+-		cpumask_copy(&desc->pending_mask, mask);
+-		migrate_irq_remapped_level_desc(desc);
+-		return;
+-	}
+-
+-	migrate_ioapic_irq_desc(desc, mask);
+-}
+-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+-				       const struct cpumask *mask)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-
+-	set_ir_ioapic_affinity_irq_desc(desc, mask);
+-}
+-#endif
+-
+-asmlinkage void smp_irq_move_cleanup_interrupt(void)
+-{
+-	unsigned vector, me;
+-
+-	ack_APIC_irq();
+-	exit_idle();
+-	irq_enter();
+-
+-	me = smp_processor_id();
+-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+-		unsigned int irq;
+-		unsigned int irr;
+-		struct irq_desc *desc;
+-		struct irq_cfg *cfg;
+-		irq = __get_cpu_var(vector_irq)[vector];
+-
+-		if (irq == -1)
+-			continue;
+-
+-		desc = irq_to_desc(irq);
+-		if (!desc)
+-			continue;
+-
+-		cfg = irq_cfg(irq);
+-		spin_lock(&desc->lock);
+-		if (!cfg->move_cleanup_count)
+-			goto unlock;
+-
+-		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+-			goto unlock;
+-
+-		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+-		/*
+-		 * Check if the vector that needs to be cleanedup is
+-		 * registered at the cpu's IRR. If so, then this is not
+-		 * the best time to clean it up. Lets clean it up in the
+-		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
+-		 * to myself.
+-		 */
+-		if (irr  & (1 << (vector % 32))) {
+-			send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+-			goto unlock;
+-		}
+-		__get_cpu_var(vector_irq)[vector] = -1;
+-		cfg->move_cleanup_count--;
+-unlock:
+-		spin_unlock(&desc->lock);
+-	}
+-
+-	irq_exit();
+-}
+-
+-static void irq_complete_move(struct irq_desc **descp)
+-{
+-	struct irq_desc *desc = *descp;
+-	struct irq_cfg *cfg = desc->chip_data;
+-	unsigned vector, me;
+-
+-	if (likely(!cfg->move_in_progress)) {
+-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+-		if (likely(!cfg->move_desc_pending))
+-			return;
+-
+-		/* domain has not changed, but affinity did */
+-		me = smp_processor_id();
+-		if (cpu_isset(me, desc->affinity)) {
+-			*descp = desc = move_irq_desc(desc, me);
+-			/* get the new one */
+-			cfg = desc->chip_data;
+-			cfg->move_desc_pending = 0;
+-		}
+-#endif
+-		return;
+-	}
+-
+-	vector = ~get_irq_regs()->orig_ax;
+-	me = smp_processor_id();
+-
+-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
+-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
+-		*descp = desc = move_irq_desc(desc, me);
+-		/* get the new one */
+-		cfg = desc->chip_data;
+-#endif
+-		send_cleanup_vector(cfg);
+-	}
+-}
+-#else
+-static inline void irq_complete_move(struct irq_desc **descp) {}
+-#endif
+-
+-#ifdef CONFIG_INTR_REMAP
+-static void ack_x2apic_level(unsigned int irq)
+-{
+-	ack_x2APIC_irq();
+-}
+-
+-static void ack_x2apic_edge(unsigned int irq)
+-{
+-	ack_x2APIC_irq();
+-}
+-
+-#endif
+-
+-static void ack_apic_edge(unsigned int irq)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-
+-	irq_complete_move(&desc);
+-	move_native_irq(irq);
+-	ack_APIC_irq();
+-}
+-
+-atomic_t irq_mis_count;
+-
+-static void ack_apic_level(unsigned int irq)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-
+-#ifdef CONFIG_X86_32
+-	unsigned long v;
+-	int i;
+-#endif
+-	struct irq_cfg *cfg;
+-	int do_unmask_irq = 0;
+-
+-	irq_complete_move(&desc);
+-#ifdef CONFIG_GENERIC_PENDING_IRQ
+-	/* If we are moving the irq we need to mask it */
+-	if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+-		do_unmask_irq = 1;
+-		mask_IO_APIC_irq_desc(desc);
+-	}
+-#endif
+-
+-#ifdef CONFIG_X86_32
+-	/*
+-	* It appears there is an erratum which affects at least version 0x11
+-	* of I/O APIC (that's the 82093AA and cores integrated into various
+-	* chipsets).  Under certain conditions a level-triggered interrupt is
+-	* erroneously delivered as edge-triggered one but the respective IRR
+-	* bit gets set nevertheless.  As a result the I/O unit expects an EOI
+-	* message but it will never arrive and further interrupts are blocked
+-	* from the source.  The exact reason is so far unknown, but the
+-	* phenomenon was observed when two consecutive interrupt requests
+-	* from a given source get delivered to the same CPU and the source is
+-	* temporarily disabled in between.
+-	*
+-	* A workaround is to simulate an EOI message manually.  We achieve it
+-	* by setting the trigger mode to edge and then to level when the edge
+-	* trigger mode gets detected in the TMR of a local APIC for a
+-	* level-triggered interrupt.  We mask the source for the time of the
+-	* operation to prevent an edge-triggered interrupt escaping meanwhile.
+-	* The idea is from Manfred Spraul.  --macro
+-	*/
+-	cfg = desc->chip_data;
+-	i = cfg->vector;
+-
+-	v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
+-#endif
+-
+-	/*
+-	 * We must acknowledge the irq before we move it or the acknowledge will
+-	 * not propagate properly.
+-	 */
+-	ack_APIC_irq();
+-
+-	/* Now we can move and renable the irq */
+-	if (unlikely(do_unmask_irq)) {
+-		/* Only migrate the irq if the ack has been received.
+-		 *
+-		 * On rare occasions the broadcast level triggered ack gets
+-		 * delayed going to ioapics, and if we reprogram the
+-		 * vector while Remote IRR is still set the irq will never
+-		 * fire again.
+-		 *
+-		 * To prevent this scenario we read the Remote IRR bit
+-		 * of the ioapic.  This has two effects.
+-		 * - On any sane system the read of the ioapic will
+-		 *   flush writes (and acks) going to the ioapic from
+-		 *   this cpu.
+-		 * - We get to see if the ACK has actually been delivered.
+-		 *
+-		 * Based on failed experiments of reprogramming the
+-		 * ioapic entry from outside of irq context starting
+-		 * with masking the ioapic entry and then polling until
+-		 * Remote IRR was clear before reprogramming the
+-		 * ioapic I don't trust the Remote IRR bit to be
+-		 * completey accurate.
+-		 *
+-		 * However there appears to be no other way to plug
+-		 * this race, so if the Remote IRR bit is not
+-		 * accurate and is causing problems then it is a hardware bug
+-		 * and you can go talk to the chipset vendor about it.
+-		 */
+-		cfg = desc->chip_data;
+-		if (!io_apic_level_ack_pending(cfg))
+-			move_masked_irq(irq);
+-		unmask_IO_APIC_irq_desc(desc);
+-	}
+-
+-#ifdef CONFIG_X86_32
+-	if (!(v & (1 << (i & 0x1f)))) {
+-		atomic_inc(&irq_mis_count);
+-		spin_lock(&ioapic_lock);
+-		__mask_and_edge_IO_APIC_irq(cfg);
+-		__unmask_and_level_IO_APIC_irq(cfg);
+-		spin_unlock(&ioapic_lock);
+-	}
+-#endif
+-}
+-
+-static struct irq_chip ioapic_chip __read_mostly = {
+-	.name		= "IO-APIC",
+-	.startup	= startup_ioapic_irq,
+-	.mask		= mask_IO_APIC_irq,
+-	.unmask		= unmask_IO_APIC_irq,
+-	.ack		= ack_apic_edge,
+-	.eoi		= ack_apic_level,
+-#ifdef CONFIG_SMP
+-	.set_affinity	= set_ioapic_affinity_irq,
+-#endif
+-	.retrigger	= ioapic_retrigger_irq,
+-};
+-
+-#ifdef CONFIG_INTR_REMAP
+-static struct irq_chip ir_ioapic_chip __read_mostly = {
+-	.name		= "IR-IO-APIC",
+-	.startup	= startup_ioapic_irq,
+-	.mask		= mask_IO_APIC_irq,
+-	.unmask		= unmask_IO_APIC_irq,
+-	.ack		= ack_x2apic_edge,
+-	.eoi		= ack_x2apic_level,
+-#ifdef CONFIG_SMP
+-	.set_affinity	= set_ir_ioapic_affinity_irq,
+-#endif
+-	.retrigger	= ioapic_retrigger_irq,
+-};
+-#endif
+-
+-static inline void init_IO_APIC_traps(void)
+-{
+-	int irq;
+-	struct irq_desc *desc;
+-	struct irq_cfg *cfg;
+-
+-	/*
+-	 * NOTE! The local APIC isn't very good at handling
+-	 * multiple interrupts at the same interrupt level.
+-	 * As the interrupt level is determined by taking the
+-	 * vector number and shifting that right by 4, we
+-	 * want to spread these out a bit so that they don't
+-	 * all fall in the same interrupt level.
+-	 *
+-	 * Also, we've got to be careful not to trash gate
+-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+-	 */
+-	for_each_irq_desc(irq, desc) {
+-		cfg = desc->chip_data;
+-		if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
+-			/*
+-			 * Hmm.. We don't have an entry for this,
+-			 * so default to an old-fashioned 8259
+-			 * interrupt if we can..
+-			 */
+-			if (irq < NR_IRQS_LEGACY)
+-				make_8259A_irq(irq);
+-			else
+-				/* Strange. Oh, well.. */
+-				desc->chip = &no_irq_chip;
+-		}
+-	}
+-}
+-
+-/*
+- * The local APIC irq-chip implementation:
+- */
+-
+-static void mask_lapic_irq(unsigned int irq)
+-{
+-	unsigned long v;
+-
+-	v = apic_read(APIC_LVT0);
+-	apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
+-}
+-
+-static void unmask_lapic_irq(unsigned int irq)
+-{
+-	unsigned long v;
+-
+-	v = apic_read(APIC_LVT0);
+-	apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
+-}
+-
+-static void ack_lapic_irq(unsigned int irq)
+-{
+-	ack_APIC_irq();
+-}
+-
+-static struct irq_chip lapic_chip __read_mostly = {
+-	.name		= "local-APIC",
+-	.mask		= mask_lapic_irq,
+-	.unmask		= unmask_lapic_irq,
+-	.ack		= ack_lapic_irq,
+-};
+-
+-static void lapic_register_intr(int irq, struct irq_desc *desc)
+-{
+-	desc->status &= ~IRQ_LEVEL;
+-	set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
+-				      "edge");
+-}
+-
+-static void __init setup_nmi(void)
+-{
+-	/*
+-	 * Dirty trick to enable the NMI watchdog ...
+-	 * We put the 8259A master into AEOI mode and
+-	 * unmask on all local APICs LVT0 as NMI.
+-	 *
+-	 * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
+-	 * is from Maciej W. Rozycki - so we do not have to EOI from
+-	 * the NMI handler or the timer interrupt.
+-	 */
+-	apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
+-
+-	enable_NMI_through_LVT0();
+-
+-	apic_printk(APIC_VERBOSE, " done.\n");
+-}
+-
+-/*
+- * This looks a bit hackish but it's about the only one way of sending
+- * a few INTA cycles to 8259As and any associated glue logic.  ICR does
+- * not support the ExtINT mode, unfortunately.  We need to send these
+- * cycles as some i82489DX-based boards have glue logic that keeps the
+- * 8259A interrupt line asserted until INTA.  --macro
+- */
+-static inline void __init unlock_ExtINT_logic(void)
+-{
+-	int apic, pin, i;
+-	struct IO_APIC_route_entry entry0, entry1;
+-	unsigned char save_control, save_freq_select;
+-
+-	pin  = find_isa_irq_pin(8, mp_INT);
+-	if (pin == -1) {
+-		WARN_ON_ONCE(1);
+-		return;
+-	}
+-	apic = find_isa_irq_apic(8, mp_INT);
+-	if (apic == -1) {
+-		WARN_ON_ONCE(1);
+-		return;
+-	}
+-
+-	entry0 = ioapic_read_entry(apic, pin);
+-	clear_IO_APIC_pin(apic, pin);
+-
+-	memset(&entry1, 0, sizeof(entry1));
+-
+-	entry1.dest_mode = 0;			/* physical delivery */
+-	entry1.mask = 0;			/* unmask IRQ now */
+-	entry1.dest = hard_smp_processor_id();
+-	entry1.delivery_mode = dest_ExtINT;
+-	entry1.polarity = entry0.polarity;
+-	entry1.trigger = 0;
+-	entry1.vector = 0;
+-
+-	ioapic_write_entry(apic, pin, entry1);
+-
+-	save_control = CMOS_READ(RTC_CONTROL);
+-	save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
+-	CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
+-		   RTC_FREQ_SELECT);
+-	CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
+-
+-	i = 100;
+-	while (i-- > 0) {
+-		mdelay(10);
+-		if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
+-			i -= 10;
+-	}
+-
+-	CMOS_WRITE(save_control, RTC_CONTROL);
+-	CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
+-	clear_IO_APIC_pin(apic, pin);
+-
+-	ioapic_write_entry(apic, pin, entry0);
+-}
+-
+-static int disable_timer_pin_1 __initdata;
+-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
+-static int __init disable_timer_pin_setup(char *arg)
+-{
+-	disable_timer_pin_1 = 1;
+-	return 0;
+-}
+-early_param("disable_timer_pin_1", disable_timer_pin_setup);
+-
+-int timer_through_8259 __initdata;
+-
+-/*
+- * This code may look a bit paranoid, but it's supposed to cooperate with
+- * a wide range of boards and BIOS bugs.  Fortunately only the timer IRQ
+- * is so screwy.  Thanks to Brian Perkins for testing/hacking this beast
+- * fanatically on his truly buggy board.
+- *
+- * FIXME: really need to revamp this for all platforms.
+- */
+-static inline void __init check_timer(void)
+-{
+-	struct irq_desc *desc = irq_to_desc(0);
+-	struct irq_cfg *cfg = desc->chip_data;
+-	int cpu = boot_cpu_id;
+-	int apic1, pin1, apic2, pin2;
+-	unsigned long flags;
+-	unsigned int ver;
+-	int no_pin1 = 0;
+-
+-	local_irq_save(flags);
+-
+-	ver = apic_read(APIC_LVR);
+-	ver = GET_APIC_VERSION(ver);
+-
+-	/*
+-	 * get/set the timer IRQ vector:
+-	 */
+-	disable_8259A_irq(0);
+-	assign_irq_vector(0, cfg, TARGET_CPUS);
+-
+-	/*
+-	 * As IRQ0 is to be enabled in the 8259A, the virtual
+-	 * wire has to be disabled in the local APIC.  Also
+-	 * timer interrupts need to be acknowledged manually in
+-	 * the 8259A for the i82489DX when using the NMI
+-	 * watchdog as that APIC treats NMIs as level-triggered.
+-	 * The AEOI mode will finish them in the 8259A
+-	 * automatically.
+-	 */
+-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
+-	init_8259A(1);
+-#ifdef CONFIG_X86_32
+-	timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
+-#endif
+-
+-	pin1  = find_isa_irq_pin(0, mp_INT);
+-	apic1 = find_isa_irq_apic(0, mp_INT);
+-	pin2  = ioapic_i8259.pin;
+-	apic2 = ioapic_i8259.apic;
+-
+-	apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
+-		    "apic1=%d pin1=%d apic2=%d pin2=%d\n",
+-		    cfg->vector, apic1, pin1, apic2, pin2);
+-
+-	/*
+-	 * Some BIOS writers are clueless and report the ExtINTA
+-	 * I/O APIC input from the cascaded 8259A as the timer
+-	 * interrupt input.  So just in case, if only one pin
+-	 * was found above, try it both directly and through the
+-	 * 8259A.
+-	 */
+-	if (pin1 == -1) {
+-#ifdef CONFIG_INTR_REMAP
+-		if (intr_remapping_enabled)
+-			panic("BIOS bug: timer not connected to IO-APIC");
+-#endif
+-		pin1 = pin2;
+-		apic1 = apic2;
+-		no_pin1 = 1;
+-	} else if (pin2 == -1) {
+-		pin2 = pin1;
+-		apic2 = apic1;
+-	}
+-
+-	if (pin1 != -1) {
+-		/*
+-		 * Ok, does IRQ0 through the IOAPIC work?
+-		 */
+-		if (no_pin1) {
+-			add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+-			setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
+-		}
+-		unmask_IO_APIC_irq_desc(desc);
+-		if (timer_irq_works()) {
+-			if (nmi_watchdog == NMI_IO_APIC) {
+-				setup_nmi();
+-				enable_8259A_irq(0);
+-			}
+-			if (disable_timer_pin_1 > 0)
+-				clear_IO_APIC_pin(0, pin1);
+-			goto out;
+-		}
+-#ifdef CONFIG_INTR_REMAP
+-		if (intr_remapping_enabled)
+-			panic("timer doesn't work through Interrupt-remapped IO-APIC");
+-#endif
+-		clear_IO_APIC_pin(apic1, pin1);
+-		if (!no_pin1)
+-			apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
+-				    "8254 timer not connected to IO-APIC\n");
+-
+-		apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
+-			    "(IRQ0) through the 8259A ...\n");
+-		apic_printk(APIC_QUIET, KERN_INFO
+-			    "..... (found apic %d pin %d) ...\n", apic2, pin2);
+-		/*
+-		 * legacy devices should be connected to IO APIC #0
+-		 */
+-		replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+-		setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
+-		unmask_IO_APIC_irq_desc(desc);
+-		enable_8259A_irq(0);
+-		if (timer_irq_works()) {
+-			apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
+-			timer_through_8259 = 1;
+-			if (nmi_watchdog == NMI_IO_APIC) {
+-				disable_8259A_irq(0);
+-				setup_nmi();
+-				enable_8259A_irq(0);
+-			}
+-			goto out;
+-		}
+-		/*
+-		 * Cleanup, just in case ...
+-		 */
+-		disable_8259A_irq(0);
+-		clear_IO_APIC_pin(apic2, pin2);
+-		apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
+-	}
+-
+-	if (nmi_watchdog == NMI_IO_APIC) {
+-		apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
+-			    "through the IO-APIC - disabling NMI Watchdog!\n");
+-		nmi_watchdog = NMI_NONE;
+-	}
+-#ifdef CONFIG_X86_32
+-	timer_ack = 0;
+-#endif
+-
+-	apic_printk(APIC_QUIET, KERN_INFO
+-		    "...trying to set up timer as Virtual Wire IRQ...\n");
+-
+-	lapic_register_intr(0, desc);
+-	apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector);	/* Fixed mode */
+-	enable_8259A_irq(0);
+-
+-	if (timer_irq_works()) {
+-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+-		goto out;
+-	}
+-	disable_8259A_irq(0);
+-	apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
+-	apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
+-
+-	apic_printk(APIC_QUIET, KERN_INFO
+-		    "...trying to set up timer as ExtINT IRQ...\n");
+-
+-	init_8259A(0);
+-	make_8259A_irq(0);
+-	apic_write(APIC_LVT0, APIC_DM_EXTINT);
+-
+-	unlock_ExtINT_logic();
+-
+-	if (timer_irq_works()) {
+-		apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
+-		goto out;
+-	}
+-	apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
+-	panic("IO-APIC + timer doesn't work!  Boot with apic=debug and send a "
+-		"report.  Then try booting with the 'noapic' option.\n");
+-out:
+-	local_irq_restore(flags);
+-}
+-
+-/*
+- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
+- * to devices.  However there may be an I/O APIC pin available for
+- * this interrupt regardless.  The pin may be left unconnected, but
+- * typically it will be reused as an ExtINT cascade interrupt for
+- * the master 8259A.  In the MPS case such a pin will normally be
+- * reported as an ExtINT interrupt in the MP table.  With ACPI
+- * there is no provision for ExtINT interrupts, and in the absence
+- * of an override it would be treated as an ordinary ISA I/O APIC
+- * interrupt, that is edge-triggered and unmasked by default.  We
+- * used to do this, but it caused problems on some systems because
+- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
+- * the same ExtINT cascade interrupt to drive the local APIC of the
+- * bootstrap processor.  Therefore we refrain from routing IRQ2 to
+- * the I/O APIC in all cases now.  No actual device should request
+- * it anyway.  --macro
+- */
+-#define PIC_IRQS	(1 << PIC_CASCADE_IR)
+-
+-void __init setup_IO_APIC(void)
+-{
+-
+-#ifdef CONFIG_X86_32
+-	enable_IO_APIC();
+-#else
+-	/*
+-	 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
+-	 */
+-#endif
+-
+-	io_apic_irqs = ~PIC_IRQS;
+-
+-	apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
+-	/*
+-         * Set up IO-APIC IRQ routing.
+-         */
+-#ifdef CONFIG_X86_32
+-	if (!acpi_ioapic)
+-		setup_ioapic_ids_from_mpc();
+-#endif
+-	sync_Arb_IDs();
+-	setup_IO_APIC_irqs();
+-	init_IO_APIC_traps();
+-	check_timer();
+-}
+-
+-/*
+- *      Called after all the initialization is done. If we didnt find any
+- *      APIC bugs then we can allow the modify fast path
+- */
+-
+-static int __init io_apic_bug_finalize(void)
+-{
+-	if (sis_apic_bug == -1)
+-		sis_apic_bug = 0;
+-	return 0;
+-}
+-
+-late_initcall(io_apic_bug_finalize);
+-
+-struct sysfs_ioapic_data {
+-	struct sys_device dev;
+-	struct IO_APIC_route_entry entry[0];
+-};
+-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
+-
+-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
+-{
+-	struct IO_APIC_route_entry *entry;
+-	struct sysfs_ioapic_data *data;
+-	int i;
+-
+-	data = container_of(dev, struct sysfs_ioapic_data, dev);
+-	entry = data->entry;
+-	for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
+-		*entry = ioapic_read_entry(dev->id, i);
+-
+-	return 0;
+-}
+-
+-static int ioapic_resume(struct sys_device *dev)
+-{
+-	struct IO_APIC_route_entry *entry;
+-	struct sysfs_ioapic_data *data;
+-	unsigned long flags;
+-	union IO_APIC_reg_00 reg_00;
+-	int i;
+-
+-	data = container_of(dev, struct sysfs_ioapic_data, dev);
+-	entry = data->entry;
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	reg_00.raw = io_apic_read(dev->id, 0);
+-	if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
+-		reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
+-		io_apic_write(dev->id, 0, reg_00.raw);
+-	}
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-	for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
+-		ioapic_write_entry(dev->id, i, entry[i]);
+-
+-	return 0;
+-}
+-
+-static struct sysdev_class ioapic_sysdev_class = {
+-	.name = "ioapic",
+-	.suspend = ioapic_suspend,
+-	.resume = ioapic_resume,
+-};
+-
+-static int __init ioapic_init_sysfs(void)
+-{
+-	struct sys_device * dev;
+-	int i, size, error;
+-
+-	error = sysdev_class_register(&ioapic_sysdev_class);
+-	if (error)
+-		return error;
+-
+-	for (i = 0; i < nr_ioapics; i++ ) {
+-		size = sizeof(struct sys_device) + nr_ioapic_registers[i]
+-			* sizeof(struct IO_APIC_route_entry);
+-		mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
+-		if (!mp_ioapic_data[i]) {
+-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+-			continue;
+-		}
+-		dev = &mp_ioapic_data[i]->dev;
+-		dev->id = i;
+-		dev->cls = &ioapic_sysdev_class;
+-		error = sysdev_register(dev);
+-		if (error) {
+-			kfree(mp_ioapic_data[i]);
+-			mp_ioapic_data[i] = NULL;
+-			printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
+-			continue;
+-		}
+-	}
+-
+-	return 0;
+-}
+-
+-device_initcall(ioapic_init_sysfs);
+-
+-/*
+- * Dynamic irq allocate and deallocation
+- */
+-unsigned int create_irq_nr(unsigned int irq_want)
+-{
+-	/* Allocate an unused irq */
+-	unsigned int irq;
+-	unsigned int new;
+-	unsigned long flags;
+-	struct irq_cfg *cfg_new = NULL;
+-	int cpu = boot_cpu_id;
+-	struct irq_desc *desc_new = NULL;
+-
+-	irq = 0;
+-	spin_lock_irqsave(&vector_lock, flags);
+-	for (new = irq_want; new < NR_IRQS; new++) {
+-		if (platform_legacy_irq(new))
+-			continue;
+-
+-		desc_new = irq_to_desc_alloc_cpu(new, cpu);
+-		if (!desc_new) {
+-			printk(KERN_INFO "can not get irq_desc for %d\n", new);
+-			continue;
+-		}
+-		cfg_new = desc_new->chip_data;
+-
+-		if (cfg_new->vector != 0)
+-			continue;
+-		if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
+-			irq = new;
+-		break;
+-	}
+-	spin_unlock_irqrestore(&vector_lock, flags);
+-
+-	if (irq > 0) {
+-		dynamic_irq_init(irq);
+-		/* restore it, in case dynamic_irq_init clear it */
+-		if (desc_new)
+-			desc_new->chip_data = cfg_new;
+-	}
+-	return irq;
+-}
+-
+-static int nr_irqs_gsi = NR_IRQS_LEGACY;
+-int create_irq(void)
+-{
+-	unsigned int irq_want;
+-	int irq;
+-
+-	irq_want = nr_irqs_gsi;
+-	irq = create_irq_nr(irq_want);
+-
+-	if (irq == 0)
+-		irq = -1;
+-
+-	return irq;
+-}
+-
+-void destroy_irq(unsigned int irq)
+-{
+-	unsigned long flags;
+-	struct irq_cfg *cfg;
+-	struct irq_desc *desc;
+-
+-	/* store it, in case dynamic_irq_cleanup clear it */
+-	desc = irq_to_desc(irq);
+-	cfg = desc->chip_data;
+-	dynamic_irq_cleanup(irq);
+-	/* connect back irq_cfg */
+-	if (desc)
+-		desc->chip_data = cfg;
+-
+-#ifdef CONFIG_INTR_REMAP
+-	free_irte(irq);
+-#endif
+-	spin_lock_irqsave(&vector_lock, flags);
+-	__clear_irq_vector(irq, cfg);
+-	spin_unlock_irqrestore(&vector_lock, flags);
+-}
+-
+-/*
+- * MSI message composition
+- */
+-#ifdef CONFIG_PCI_MSI
+-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+-{
+-	struct irq_cfg *cfg;
+-	int err;
+-	unsigned dest;
+-
+-	cfg = irq_cfg(irq);
+-	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
+-	if (err)
+-		return err;
+-
+-	dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+-
+-#ifdef CONFIG_INTR_REMAP
+-	if (irq_remapped(irq)) {
+-		struct irte irte;
+-		int ir_index;
+-		u16 sub_handle;
+-
+-		ir_index = map_irq_to_irte_handle(irq, &sub_handle);
+-		BUG_ON(ir_index == -1);
+-
+-		memset (&irte, 0, sizeof(irte));
+-
+-		irte.present = 1;
+-		irte.dst_mode = INT_DEST_MODE;
+-		irte.trigger_mode = 0; /* edge */
+-		irte.dlvry_mode = INT_DELIVERY_MODE;
+-		irte.vector = cfg->vector;
+-		irte.dest_id = IRTE_DEST(dest);
+-
+-		modify_irte(irq, &irte);
+-
+-		msg->address_hi = MSI_ADDR_BASE_HI;
+-		msg->data = sub_handle;
+-		msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
+-				  MSI_ADDR_IR_SHV |
+-				  MSI_ADDR_IR_INDEX1(ir_index) |
+-				  MSI_ADDR_IR_INDEX2(ir_index);
+-	} else
+-#endif
+-	{
+-		msg->address_hi = MSI_ADDR_BASE_HI;
+-		msg->address_lo =
+-			MSI_ADDR_BASE_LO |
+-			((INT_DEST_MODE == 0) ?
+-				MSI_ADDR_DEST_MODE_PHYSICAL:
+-				MSI_ADDR_DEST_MODE_LOGICAL) |
+-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
+-				MSI_ADDR_REDIRECTION_CPU:
+-				MSI_ADDR_REDIRECTION_LOWPRI) |
+-			MSI_ADDR_DEST_ID(dest);
+-
+-		msg->data =
+-			MSI_DATA_TRIGGER_EDGE |
+-			MSI_DATA_LEVEL_ASSERT |
+-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
+-				MSI_DATA_DELIVERY_FIXED:
+-				MSI_DATA_DELIVERY_LOWPRI) |
+-			MSI_DATA_VECTOR(cfg->vector);
+-	}
+-	return err;
+-}
+-
+-#ifdef CONFIG_SMP
+-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-	struct irq_cfg *cfg;
+-	struct msi_msg msg;
+-	unsigned int dest;
+-
+-	dest = set_desc_affinity(desc, mask);
+-	if (dest == BAD_APICID)
+-		return;
+-
+-	cfg = desc->chip_data;
+-
+-	read_msi_msg_desc(desc, &msg);
+-
+-	msg.data &= ~MSI_DATA_VECTOR_MASK;
+-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+-
+-	write_msi_msg_desc(desc, &msg);
+-}
+-#ifdef CONFIG_INTR_REMAP
+-/*
+- * Migrate the MSI irq to another cpumask. This migration is
+- * done in the process context using interrupt-remapping hardware.
+- */
+-static void
+-ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-	struct irq_cfg *cfg = desc->chip_data;
+-	unsigned int dest;
+-	struct irte irte;
+-
+-	if (get_irte(irq, &irte))
+-		return;
+-
+-	dest = set_desc_affinity(desc, mask);
+-	if (dest == BAD_APICID)
+-		return;
+-
+-	irte.vector = cfg->vector;
+-	irte.dest_id = IRTE_DEST(dest);
+-
+-	/*
+-	 * atomically update the IRTE with the new destination and vector.
+-	 */
+-	modify_irte(irq, &irte);
+-
+-	/*
+-	 * After this point, all the interrupts will start arriving
+-	 * at the new destination. So, time to cleanup the previous
+-	 * vector allocation.
+-	 */
+-	if (cfg->move_in_progress)
+-		send_cleanup_vector(cfg);
+-}
+-
+-#endif
+-#endif /* CONFIG_SMP */
+-
+-/*
+- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+- * which implement the MSI or MSI-X Capability Structure.
+- */
+-static struct irq_chip msi_chip = {
+-	.name		= "PCI-MSI",
+-	.unmask		= unmask_msi_irq,
+-	.mask		= mask_msi_irq,
+-	.ack		= ack_apic_edge,
+-#ifdef CONFIG_SMP
+-	.set_affinity	= set_msi_irq_affinity,
+-#endif
+-	.retrigger	= ioapic_retrigger_irq,
+-};
+-
+-#ifdef CONFIG_INTR_REMAP
+-static struct irq_chip msi_ir_chip = {
+-	.name		= "IR-PCI-MSI",
+-	.unmask		= unmask_msi_irq,
+-	.mask		= mask_msi_irq,
+-	.ack		= ack_x2apic_edge,
+-#ifdef CONFIG_SMP
+-	.set_affinity	= ir_set_msi_irq_affinity,
+-#endif
+-	.retrigger	= ioapic_retrigger_irq,
+-};
+-
+-/*
+- * Map the PCI dev to the corresponding remapping hardware unit
+- * and allocate 'nvec' consecutive interrupt-remapping table entries
+- * in it.
+- */
+-static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
+-{
+-	struct intel_iommu *iommu;
+-	int index;
+-
+-	iommu = map_dev_to_ir(dev);
+-	if (!iommu) {
+-		printk(KERN_ERR
+-		       "Unable to map PCI %s to iommu\n", pci_name(dev));
+-		return -ENOENT;
+-	}
+-
+-	index = alloc_irte(iommu, irq, nvec);
+-	if (index < 0) {
+-		printk(KERN_ERR
+-		       "Unable to allocate %d IRTE for PCI %s\n", nvec,
+-		       pci_name(dev));
+-		return -ENOSPC;
+-	}
+-	return index;
+-}
+-#endif
+-
+-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
+-{
+-	int ret;
+-	struct msi_msg msg;
+-
+-	ret = msi_compose_msg(dev, irq, &msg);
+-	if (ret < 0)
+-		return ret;
+-
+-	set_irq_msi(irq, msidesc);
+-	write_msi_msg(irq, &msg);
+-
+-#ifdef CONFIG_INTR_REMAP
+-	if (irq_remapped(irq)) {
+-		struct irq_desc *desc = irq_to_desc(irq);
+-		/*
+-		 * irq migration in process context
+-		 */
+-		desc->status |= IRQ_MOVE_PCNTXT;
+-		set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
+-	} else
+-#endif
+-		set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
+-
+-	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
+-
+-	return 0;
+-}
+-
+-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
+-{
+-	unsigned int irq;
+-	int ret;
+-	unsigned int irq_want;
+-
+-	irq_want = nr_irqs_gsi;
+-	irq = create_irq_nr(irq_want);
+-	if (irq == 0)
+-		return -1;
+-
+-#ifdef CONFIG_INTR_REMAP
+-	if (!intr_remapping_enabled)
+-		goto no_ir;
+-
+-	ret = msi_alloc_irte(dev, irq, 1);
+-	if (ret < 0)
+-		goto error;
+-no_ir:
+-#endif
+-	ret = setup_msi_irq(dev, msidesc, irq);
+-	if (ret < 0) {
+-		destroy_irq(irq);
+-		return ret;
+-	}
+-	return 0;
+-
+-#ifdef CONFIG_INTR_REMAP
+-error:
+-	destroy_irq(irq);
+-	return ret;
+-#endif
+-}
+-
+-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+-{
+-	unsigned int irq;
+-	int ret, sub_handle;
+-	struct msi_desc *msidesc;
+-	unsigned int irq_want;
+-
+-#ifdef CONFIG_INTR_REMAP
+-	struct intel_iommu *iommu = 0;
+-	int index = 0;
+-#endif
+-
+-	irq_want = nr_irqs_gsi;
+-	sub_handle = 0;
+-	list_for_each_entry(msidesc, &dev->msi_list, list) {
+-		irq = create_irq_nr(irq_want);
+-		irq_want++;
+-		if (irq == 0)
+-			return -1;
+-#ifdef CONFIG_INTR_REMAP
+-		if (!intr_remapping_enabled)
+-			goto no_ir;
+-
+-		if (!sub_handle) {
+-			/*
+-			 * allocate the consecutive block of IRTE's
+-			 * for 'nvec'
+-			 */
+-			index = msi_alloc_irte(dev, irq, nvec);
+-			if (index < 0) {
+-				ret = index;
+-				goto error;
+-			}
+-		} else {
+-			iommu = map_dev_to_ir(dev);
+-			if (!iommu) {
+-				ret = -ENOENT;
+-				goto error;
+-			}
+-			/*
+-			 * setup the mapping between the irq and the IRTE
+-			 * base index, the sub_handle pointing to the
+-			 * appropriate interrupt remap table entry.
+-			 */
+-			set_irte_irq(irq, iommu, index, sub_handle);
+-		}
+-no_ir:
+-#endif
+-		ret = setup_msi_irq(dev, msidesc, irq);
+-		if (ret < 0)
+-			goto error;
+-		sub_handle++;
+-	}
+-	return 0;
+-
+-error:
+-	destroy_irq(irq);
+-	return ret;
+-}
+-
+-void arch_teardown_msi_irq(unsigned int irq)
+-{
+-	destroy_irq(irq);
+-}
+-
+-#ifdef CONFIG_DMAR
+-#ifdef CONFIG_SMP
+-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-	struct irq_cfg *cfg;
+-	struct msi_msg msg;
+-	unsigned int dest;
+-
+-	dest = set_desc_affinity(desc, mask);
+-	if (dest == BAD_APICID)
+-		return;
+-
+-	cfg = desc->chip_data;
+-
+-	dmar_msi_read(irq, &msg);
+-
+-	msg.data &= ~MSI_DATA_VECTOR_MASK;
+-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+-
+-	dmar_msi_write(irq, &msg);
+-}
+-
+-#endif /* CONFIG_SMP */
+-
+-struct irq_chip dmar_msi_type = {
+-	.name = "DMAR_MSI",
+-	.unmask = dmar_msi_unmask,
+-	.mask = dmar_msi_mask,
+-	.ack = ack_apic_edge,
+-#ifdef CONFIG_SMP
+-	.set_affinity = dmar_msi_set_affinity,
+-#endif
+-	.retrigger = ioapic_retrigger_irq,
+-};
+-
+-int arch_setup_dmar_msi(unsigned int irq)
+-{
+-	int ret;
+-	struct msi_msg msg;
+-
+-	ret = msi_compose_msg(NULL, irq, &msg);
+-	if (ret < 0)
+-		return ret;
+-	dmar_msi_write(irq, &msg);
+-	set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+-		"edge");
+-	return 0;
+-}
+-#endif
+-
+-#ifdef CONFIG_HPET_TIMER
+-
+-#ifdef CONFIG_SMP
+-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-	struct irq_cfg *cfg;
+-	struct msi_msg msg;
+-	unsigned int dest;
+-
+-	dest = set_desc_affinity(desc, mask);
+-	if (dest == BAD_APICID)
+-		return;
+-
+-	cfg = desc->chip_data;
+-
+-	hpet_msi_read(irq, &msg);
+-
+-	msg.data &= ~MSI_DATA_VECTOR_MASK;
+-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+-
+-	hpet_msi_write(irq, &msg);
+-}
+-
+-#endif /* CONFIG_SMP */
+-
+-struct irq_chip hpet_msi_type = {
+-	.name = "HPET_MSI",
+-	.unmask = hpet_msi_unmask,
+-	.mask = hpet_msi_mask,
+-	.ack = ack_apic_edge,
+-#ifdef CONFIG_SMP
+-	.set_affinity = hpet_msi_set_affinity,
+-#endif
+-	.retrigger = ioapic_retrigger_irq,
+-};
+-
+-int arch_setup_hpet_msi(unsigned int irq)
+-{
+-	int ret;
+-	struct msi_msg msg;
+-
+-	ret = msi_compose_msg(NULL, irq, &msg);
+-	if (ret < 0)
+-		return ret;
+-
+-	hpet_msi_write(irq, &msg);
+-	set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
+-		"edge");
+-
+-	return 0;
+-}
+-#endif
+-
+-#endif /* CONFIG_PCI_MSI */
+-/*
+- * Hypertransport interrupt support
+- */
+-#ifdef CONFIG_HT_IRQ
+-
+-#ifdef CONFIG_SMP
+-
+-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
+-{
+-	struct ht_irq_msg msg;
+-	fetch_ht_irq_msg(irq, &msg);
+-
+-	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+-	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+-
+-	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+-	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+-
+-	write_ht_irq_msg(irq, &msg);
+-}
+-
+-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-	struct irq_cfg *cfg;
+-	unsigned int dest;
+-
+-	dest = set_desc_affinity(desc, mask);
+-	if (dest == BAD_APICID)
+-		return;
+-
+-	cfg = desc->chip_data;
+-
+-	target_ht_irq(irq, dest, cfg->vector);
+-}
+-
+-#endif
+-
+-static struct irq_chip ht_irq_chip = {
+-	.name		= "PCI-HT",
+-	.mask		= mask_ht_irq,
+-	.unmask		= unmask_ht_irq,
+-	.ack		= ack_apic_edge,
+-#ifdef CONFIG_SMP
+-	.set_affinity	= set_ht_irq_affinity,
+-#endif
+-	.retrigger	= ioapic_retrigger_irq,
+-};
+-
+-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+-{
+-	struct irq_cfg *cfg;
+-	int err;
+-
+-	cfg = irq_cfg(irq);
+-	err = assign_irq_vector(irq, cfg, TARGET_CPUS);
+-	if (!err) {
+-		struct ht_irq_msg msg;
+-		unsigned dest;
+-
+-		dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
+-
+-		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+-
+-		msg.address_lo =
+-			HT_IRQ_LOW_BASE |
+-			HT_IRQ_LOW_DEST_ID(dest) |
+-			HT_IRQ_LOW_VECTOR(cfg->vector) |
+-			((INT_DEST_MODE == 0) ?
+-				HT_IRQ_LOW_DM_PHYSICAL :
+-				HT_IRQ_LOW_DM_LOGICAL) |
+-			HT_IRQ_LOW_RQEOI_EDGE |
+-			((INT_DELIVERY_MODE != dest_LowestPrio) ?
+-				HT_IRQ_LOW_MT_FIXED :
+-				HT_IRQ_LOW_MT_ARBITRATED) |
+-			HT_IRQ_LOW_IRQ_MASKED;
+-
+-		write_ht_irq_msg(irq, &msg);
+-
+-		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+-					      handle_edge_irq, "edge");
+-
+-		dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
+-	}
+-	return err;
+-}
+-#endif /* CONFIG_HT_IRQ */
+-
+-#ifdef CONFIG_X86_64
+-/*
+- * Re-target the irq to the specified CPU and enable the specified MMR located
+- * on the specified blade to allow the sending of MSIs to the specified CPU.
+- */
+-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
+-		       unsigned long mmr_offset)
+-{
+-	const struct cpumask *eligible_cpu = cpumask_of(cpu);
+-	struct irq_cfg *cfg;
+-	int mmr_pnode;
+-	unsigned long mmr_value;
+-	struct uv_IO_APIC_route_entry *entry;
+-	unsigned long flags;
+-	int err;
+-
+-	cfg = irq_cfg(irq);
+-
+-	err = assign_irq_vector(irq, cfg, eligible_cpu);
+-	if (err != 0)
+-		return err;
+-
+-	spin_lock_irqsave(&vector_lock, flags);
+-	set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
+-				      irq_name);
+-	spin_unlock_irqrestore(&vector_lock, flags);
+-
+-	mmr_value = 0;
+-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+-	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+-
+-	entry->vector = cfg->vector;
+-	entry->delivery_mode = INT_DELIVERY_MODE;
+-	entry->dest_mode = INT_DEST_MODE;
+-	entry->polarity = 0;
+-	entry->trigger = 0;
+-	entry->mask = 0;
+-	entry->dest = cpu_mask_to_apicid(eligible_cpu);
+-
+-	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+-
+-	return irq;
+-}
+-
+-/*
+- * Disable the specified MMR located on the specified blade so that MSIs are
+- * longer allowed to be sent.
+- */
+-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
+-{
+-	unsigned long mmr_value;
+-	struct uv_IO_APIC_route_entry *entry;
+-	int mmr_pnode;
+-
+-	mmr_value = 0;
+-	entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
+-	BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+-
+-	entry->mask = 1;
+-
+-	mmr_pnode = uv_blade_to_pnode(mmr_blade);
+-	uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+-}
+-#endif /* CONFIG_X86_64 */
+-
+-int __init io_apic_get_redir_entries (int ioapic)
+-{
+-	union IO_APIC_reg_01	reg_01;
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	reg_01.raw = io_apic_read(ioapic, 1);
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+-	return reg_01.bits.entries;
+-}
+-
+-void __init probe_nr_irqs_gsi(void)
+-{
+-	int nr = 0;
+-
+-	nr = acpi_probe_gsi();
+-	if (nr > nr_irqs_gsi) {
+-		nr_irqs_gsi = nr;
+-	} else {
+-		/* for acpi=off or acpi is not compiled in */
+-		int idx;
+-
+-		nr = 0;
+-		for (idx = 0; idx < nr_ioapics; idx++)
+-			nr += io_apic_get_redir_entries(idx) + 1;
+-
+-		if (nr > nr_irqs_gsi)
+-			nr_irqs_gsi = nr;
+-	}
+-
+-	printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
+-}
+-
+-/* --------------------------------------------------------------------------
+-                          ACPI-based IOAPIC Configuration
+-   -------------------------------------------------------------------------- */
+-
+-#ifdef CONFIG_ACPI
+-
+-#ifdef CONFIG_X86_32
+-int __init io_apic_get_unique_id(int ioapic, int apic_id)
+-{
+-	union IO_APIC_reg_00 reg_00;
+-	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
+-	physid_mask_t tmp;
+-	unsigned long flags;
+-	int i = 0;
+-
+-	/*
+-	 * The P4 platform supports up to 256 APIC IDs on two separate APIC
+-	 * buses (one for LAPICs, one for IOAPICs), where predecessors only
+-	 * supports up to 16 on one shared APIC bus.
+-	 *
+-	 * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
+-	 *      advantage of new APIC bus architecture.
+-	 */
+-
+-	if (physids_empty(apic_id_map))
+-		apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	reg_00.raw = io_apic_read(ioapic, 0);
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+-	if (apic_id >= get_physical_broadcast()) {
+-		printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
+-			"%d\n", ioapic, apic_id, reg_00.bits.ID);
+-		apic_id = reg_00.bits.ID;
+-	}
+-
+-	/*
+-	 * Every APIC in a system must have a unique ID or we get lots of nice
+-	 * 'stuck on smp_invalidate_needed IPI wait' messages.
+-	 */
+-	if (check_apicid_used(apic_id_map, apic_id)) {
+-
+-		for (i = 0; i < get_physical_broadcast(); i++) {
+-			if (!check_apicid_used(apic_id_map, i))
+-				break;
+-		}
+-
+-		if (i == get_physical_broadcast())
+-			panic("Max apic_id exceeded!\n");
+-
+-		printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
+-			"trying %d\n", ioapic, apic_id, i);
+-
+-		apic_id = i;
+-	}
+-
+-	tmp = apicid_to_cpu_present(apic_id);
+-	physids_or(apic_id_map, apic_id_map, tmp);
+-
+-	if (reg_00.bits.ID != apic_id) {
+-		reg_00.bits.ID = apic_id;
+-
+-		spin_lock_irqsave(&ioapic_lock, flags);
+-		io_apic_write(ioapic, 0, reg_00.raw);
+-		reg_00.raw = io_apic_read(ioapic, 0);
+-		spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+-		/* Sanity check */
+-		if (reg_00.bits.ID != apic_id) {
+-			printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
+-			return -1;
+-		}
+-	}
+-
+-	apic_printk(APIC_VERBOSE, KERN_INFO
+-			"IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
+-
+-	return apic_id;
+-}
+-
+-int __init io_apic_get_version(int ioapic)
+-{
+-	union IO_APIC_reg_01	reg_01;
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&ioapic_lock, flags);
+-	reg_01.raw = io_apic_read(ioapic, 1);
+-	spin_unlock_irqrestore(&ioapic_lock, flags);
+-
+-	return reg_01.bits.version;
+-}
+-#endif
+-
+-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
+-{
+-	struct irq_desc *desc;
+-	struct irq_cfg *cfg;
+-	int cpu = boot_cpu_id;
+-
+-	if (!IO_APIC_IRQ(irq)) {
+-		apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+-			ioapic);
+-		return -EINVAL;
+-	}
+-
+-	desc = irq_to_desc_alloc_cpu(irq, cpu);
+-	if (!desc) {
+-		printk(KERN_INFO "can not get irq_desc %d\n", irq);
+-		return 0;
+-	}
+-
+-	/*
+-	 * IRQs < 16 are already in the irq_2_pin[] map
+-	 */
+-	if (irq >= NR_IRQS_LEGACY) {
+-		cfg = desc->chip_data;
+-		add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
+-	}
+-
+-	setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
+-
+-	return 0;
+-}
+-
+-
+-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
+-{
+-	int i;
+-
+-	if (skip_ioapic_setup)
+-		return -1;
+-
+-	for (i = 0; i < mp_irq_entries; i++)
+-		if (mp_irqs[i].mp_irqtype == mp_INT &&
+-		    mp_irqs[i].mp_srcbusirq == bus_irq)
+-			break;
+-	if (i >= mp_irq_entries)
+-		return -1;
+-
+-	*trigger = irq_trigger(i);
+-	*polarity = irq_polarity(i);
+-	return 0;
+-}
+-
+-#endif /* CONFIG_ACPI */
+-
+-/*
+- * This function currently is only a helper for the i386 smp boot process where
+- * we need to reprogram the ioredtbls to cater for the cpus which have come online
+- * so mask in all cases should simply be TARGET_CPUS
+- */
+-#ifdef CONFIG_SMP
+-void __init setup_ioapic_dest(void)
+-{
+-	int pin, ioapic, irq, irq_entry;
+-	struct irq_desc *desc;
+-	struct irq_cfg *cfg;
+-	const struct cpumask *mask;
+-
+-	if (skip_ioapic_setup == 1)
+-		return;
+-
+-	for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
+-		for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+-			irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+-			if (irq_entry == -1)
+-				continue;
+-			irq = pin_2_irq(irq_entry, ioapic, pin);
+-
+-			/* setup_IO_APIC_irqs could fail to get vector for some device
+-			 * when you have too many devices, because at that time only boot
+-			 * cpu is online.
+-			 */
+-			desc = irq_to_desc(irq);
+-			cfg = desc->chip_data;
+-			if (!cfg->vector) {
+-				setup_IO_APIC_irq(ioapic, pin, irq, desc,
+-						  irq_trigger(irq_entry),
+-						  irq_polarity(irq_entry));
+-				continue;
+-
+-			}
+-
+-			/*
+-			 * Honour affinities which have been set in early boot
+-			 */
+-			if (desc->status &
+-			    (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+-				mask = &desc->affinity;
+-			else
+-				mask = TARGET_CPUS;
+-
+-#ifdef CONFIG_INTR_REMAP
+-			if (intr_remapping_enabled)
+-				set_ir_ioapic_affinity_irq_desc(desc, mask);
+-			else
+-#endif
+-				set_ioapic_affinity_irq_desc(desc, mask);
+-		}
+-
+-	}
+-}
+-#endif
+-
+-#define IOAPIC_RESOURCE_NAME_SIZE 11
+-
+-static struct resource *ioapic_resources;
+-
+-static struct resource * __init ioapic_setup_resources(void)
+-{
+-	unsigned long n;
+-	struct resource *res;
+-	char *mem;
+-	int i;
+-
+-	if (nr_ioapics <= 0)
+-		return NULL;
+-
+-	n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
+-	n *= nr_ioapics;
+-
+-	mem = alloc_bootmem(n);
+-	res = (void *)mem;
+-
+-	if (mem != NULL) {
+-		mem += sizeof(struct resource) * nr_ioapics;
+-
+-		for (i = 0; i < nr_ioapics; i++) {
+-			res[i].name = mem;
+-			res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+-			sprintf(mem,  "IOAPIC %u", i);
+-			mem += IOAPIC_RESOURCE_NAME_SIZE;
+-		}
+-	}
+-
+-	ioapic_resources = res;
+-
+-	return res;
+-}
+-
+-void __init ioapic_init_mappings(void)
+-{
+-	unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
+-	struct resource *ioapic_res;
+-	int i;
+-
+-	ioapic_res = ioapic_setup_resources();
+-	for (i = 0; i < nr_ioapics; i++) {
+-		if (smp_found_config) {
+-			ioapic_phys = mp_ioapics[i].mp_apicaddr;
+-#ifdef CONFIG_X86_32
+-			if (!ioapic_phys) {
+-				printk(KERN_ERR
+-				       "WARNING: bogus zero IO-APIC "
+-				       "address found in MPTABLE, "
+-				       "disabling IO/APIC support!\n");
+-				smp_found_config = 0;
+-				skip_ioapic_setup = 1;
+-				goto fake_ioapic_page;
+-			}
+-#endif
+-		} else {
+-#ifdef CONFIG_X86_32
+-fake_ioapic_page:
+-#endif
+-			ioapic_phys = (unsigned long)
+-				alloc_bootmem_pages(PAGE_SIZE);
+-			ioapic_phys = __pa(ioapic_phys);
+-		}
+-		set_fixmap_nocache(idx, ioapic_phys);
+-		apic_printk(APIC_VERBOSE,
+-			    "mapped IOAPIC to %08lx (%08lx)\n",
+-			    __fix_to_virt(idx), ioapic_phys);
+-		idx++;
+-
+-		if (ioapic_res != NULL) {
+-			ioapic_res->start = ioapic_phys;
+-			ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+-			ioapic_res++;
+-		}
+-	}
+-}
+-
+-static int __init ioapic_insert_resources(void)
+-{
+-	int i;
+-	struct resource *r = ioapic_resources;
+-
+-	if (!r) {
+-		printk(KERN_ERR
+-		       "IO APIC resources could be not be allocated.\n");
+-		return -1;
+-	}
+-
+-	for (i = 0; i < nr_ioapics; i++) {
+-		insert_resource(&iomem_resource, r);
+-		r++;
+-	}
+-
+-	return 0;
+-}
+-
+-/* Insert the IO APIC resources after PCI initialization has occured to handle
+- * IO APICS that are mapped in on a BAR in PCI space. */
+-late_initcall(ioapic_insert_resources);
+Index: linux-2.6-tip/arch/x86/kernel/io_delay.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/io_delay.c
++++ linux-2.6-tip/arch/x86/kernel/io_delay.c
+@@ -7,10 +7,10 @@
+  */
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+-#include <linux/init.h>
+ #include <linux/delay.h>
++#include <linux/init.h>
+ #include <linux/dmi.h>
+-#include <asm/io.h>
++#include <linux/io.h>
+ 
+ int io_delay_type __read_mostly = CONFIG_DEFAULT_IO_DELAY_TYPE;
+ 
+@@ -47,8 +47,7 @@ EXPORT_SYMBOL(native_io_delay);
+ static int __init dmi_io_delay_0xed_port(const struct dmi_system_id *id)
+ {
+ 	if (io_delay_type == CONFIG_IO_DELAY_TYPE_0X80) {
+-		printk(KERN_NOTICE "%s: using 0xed I/O delay port\n",
+-			id->ident);
++		pr_notice("%s: using 0xed I/O delay port\n", id->ident);
+ 		io_delay_type = CONFIG_IO_DELAY_TYPE_0XED;
+ 	}
+ 
+@@ -64,40 +63,40 @@ static struct dmi_system_id __initdata i
+ 		.callback	= dmi_io_delay_0xed_port,
+ 		.ident		= "Compaq Presario V6000",
+ 		.matches	= {
+-			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+-			DMI_MATCH(DMI_BOARD_NAME, "30B7")
++			DMI_MATCH(DMI_BOARD_VENDOR,	"Quanta"),
++			DMI_MATCH(DMI_BOARD_NAME,	"30B7")
+ 		}
+ 	},
+ 	{
+ 		.callback	= dmi_io_delay_0xed_port,
+ 		.ident		= "HP Pavilion dv9000z",
+ 		.matches	= {
+-			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+-			DMI_MATCH(DMI_BOARD_NAME, "30B9")
++			DMI_MATCH(DMI_BOARD_VENDOR,	"Quanta"),
++			DMI_MATCH(DMI_BOARD_NAME,	"30B9")
+ 		}
+ 	},
+ 	{
+ 		.callback	= dmi_io_delay_0xed_port,
+ 		.ident		= "HP Pavilion dv6000",
+ 		.matches	= {
+-			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+-			DMI_MATCH(DMI_BOARD_NAME, "30B8")
++			DMI_MATCH(DMI_BOARD_VENDOR,	"Quanta"),
++			DMI_MATCH(DMI_BOARD_NAME,	"30B8")
+ 		}
+ 	},
+ 	{
+ 		.callback	= dmi_io_delay_0xed_port,
+ 		.ident		= "HP Pavilion tx1000",
+ 		.matches	= {
+-			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+-			DMI_MATCH(DMI_BOARD_NAME, "30BF")
++			DMI_MATCH(DMI_BOARD_VENDOR,	"Quanta"),
++			DMI_MATCH(DMI_BOARD_NAME,	"30BF")
+ 		}
+ 	},
+ 	{
+ 		.callback	= dmi_io_delay_0xed_port,
+ 		.ident		= "Presario F700",
+ 		.matches	= {
+-			DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+-			DMI_MATCH(DMI_BOARD_NAME, "30D3")
++			DMI_MATCH(DMI_BOARD_VENDOR,	"Quanta"),
++			DMI_MATCH(DMI_BOARD_NAME,	"30D3")
+ 		}
+ 	},
+ 	{ }
+Index: linux-2.6-tip/arch/x86/kernel/ioport.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/ioport.c
++++ linux-2.6-tip/arch/x86/kernel/ioport.c
+@@ -85,19 +85,8 @@ asmlinkage long sys_ioperm(unsigned long
+ 
+ 	t->io_bitmap_max = bytes;
+ 
+-#ifdef CONFIG_X86_32
+-	/*
+-	 * Sets the lazy trigger so that the next I/O operation will
+-	 * reload the correct bitmap.
+-	 * Reset the owner so that a process switch will not set
+-	 * tss->io_bitmap_base to IO_BITMAP_OFFSET.
+-	 */
+-	tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
+-	tss->io_bitmap_owner = NULL;
+-#else
+ 	/* Update the TSS: */
+ 	memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+-#endif
+ 
+ 	put_cpu();
+ 
+@@ -131,9 +120,8 @@ static int do_iopl(unsigned int level, s
+ }
+ 
+ #ifdef CONFIG_X86_32
+-asmlinkage long sys_iopl(unsigned long regsp)
++long sys_iopl(struct pt_regs *regs)
+ {
+-	struct pt_regs *regs = (struct pt_regs *)&regsp;
+ 	unsigned int level = regs->bx;
+ 	struct thread_struct *t = &current->thread;
+ 	int rc;
+Index: linux-2.6-tip/arch/x86/kernel/ipi.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/ipi.c
++++ /dev/null
+@@ -1,190 +0,0 @@
+-#include <linux/cpumask.h>
+-#include <linux/interrupt.h>
+-#include <linux/init.h>
+-
+-#include <linux/mm.h>
+-#include <linux/delay.h>
+-#include <linux/spinlock.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/cache.h>
+-#include <linux/cpu.h>
+-#include <linux/module.h>
+-
+-#include <asm/smp.h>
+-#include <asm/mtrr.h>
+-#include <asm/tlbflush.h>
+-#include <asm/mmu_context.h>
+-#include <asm/apic.h>
+-#include <asm/proto.h>
+-
+-#ifdef CONFIG_X86_32
+-#include <mach_apic.h>
+-#include <mach_ipi.h>
+-
+-/*
+- * the following functions deal with sending IPIs between CPUs.
+- *
+- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
+- */
+-
+-static inline int __prepare_ICR(unsigned int shortcut, int vector)
+-{
+-	unsigned int icr = shortcut | APIC_DEST_LOGICAL;
+-
+-	switch (vector) {
+-	default:
+-		icr |= APIC_DM_FIXED | vector;
+-		break;
+-	case NMI_VECTOR:
+-		icr |= APIC_DM_NMI;
+-		break;
+-	}
+-	return icr;
+-}
+-
+-static inline int __prepare_ICR2(unsigned int mask)
+-{
+-	return SET_APIC_DEST_FIELD(mask);
+-}
+-
+-void __send_IPI_shortcut(unsigned int shortcut, int vector)
+-{
+-	/*
+-	 * Subtle. In the case of the 'never do double writes' workaround
+-	 * we have to lock out interrupts to be safe.  As we don't care
+-	 * of the value read we use an atomic rmw access to avoid costly
+-	 * cli/sti.  Otherwise we use an even cheaper single atomic write
+-	 * to the APIC.
+-	 */
+-	unsigned int cfg;
+-
+-	/*
+-	 * Wait for idle.
+-	 */
+-	apic_wait_icr_idle();
+-
+-	/*
+-	 * No need to touch the target chip field
+-	 */
+-	cfg = __prepare_ICR(shortcut, vector);
+-
+-	/*
+-	 * Send the IPI. The write to APIC_ICR fires this off.
+-	 */
+-	apic_write(APIC_ICR, cfg);
+-}
+-
+-void send_IPI_self(int vector)
+-{
+-	__send_IPI_shortcut(APIC_DEST_SELF, vector);
+-}
+-
+-/*
+- * This is used to send an IPI with no shorthand notation (the destination is
+- * specified in bits 56 to 63 of the ICR).
+- */
+-static inline void __send_IPI_dest_field(unsigned long mask, int vector)
+-{
+-	unsigned long cfg;
+-
+-	/*
+-	 * Wait for idle.
+-	 */
+-	if (unlikely(vector == NMI_VECTOR))
+-		safe_apic_wait_icr_idle();
+-	else
+-		apic_wait_icr_idle();
+-
+-	/*
+-	 * prepare target chip field
+-	 */
+-	cfg = __prepare_ICR2(mask);
+-	apic_write(APIC_ICR2, cfg);
+-
+-	/*
+-	 * program the ICR
+-	 */
+-	cfg = __prepare_ICR(0, vector);
+-
+-	/*
+-	 * Send the IPI. The write to APIC_ICR fires this off.
+-	 */
+-	apic_write(APIC_ICR, cfg);
+-}
+-
+-/*
+- * This is only used on smaller machines.
+- */
+-void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
+-{
+-	unsigned long mask = cpumask_bits(cpumask)[0];
+-	unsigned long flags;
+-
+-	local_irq_save(flags);
+-	WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
+-	__send_IPI_dest_field(mask, vector);
+-	local_irq_restore(flags);
+-}
+-
+-void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
+-{
+-	unsigned long flags;
+-	unsigned int query_cpu;
+-
+-	/*
+-	 * Hack. The clustered APIC addressing mode doesn't allow us to send
+-	 * to an arbitrary mask, so I do a unicasts to each CPU instead. This
+-	 * should be modified to do 1 message per cluster ID - mbligh
+-	 */
+-
+-	local_irq_save(flags);
+-	for_each_cpu(query_cpu, mask)
+-		__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
+-	local_irq_restore(flags);
+-}
+-
+-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
+-{
+-	unsigned long flags;
+-	unsigned int query_cpu;
+-	unsigned int this_cpu = smp_processor_id();
+-
+-	/* See Hack comment above */
+-
+-	local_irq_save(flags);
+-	for_each_cpu(query_cpu, mask)
+-		if (query_cpu != this_cpu)
+-			__send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
+-					      vector);
+-	local_irq_restore(flags);
+-}
+-
+-/* must come after the send_IPI functions above for inlining */
+-static int convert_apicid_to_cpu(int apic_id)
+-{
+-	int i;
+-
+-	for_each_possible_cpu(i) {
+-		if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
+-			return i;
+-	}
+-	return -1;
+-}
+-
+-int safe_smp_processor_id(void)
+-{
+-	int apicid, cpuid;
+-
+-	if (!boot_cpu_has(X86_FEATURE_APIC))
+-		return 0;
+-
+-	apicid = hard_smp_processor_id();
+-	if (apicid == BAD_APICID)
+-		return 0;
+-
+-	cpuid = convert_apicid_to_cpu(apicid);
+-
+-	return cpuid >= 0 ? cpuid : 0;
+-}
+-#endif
+Index: linux-2.6-tip/arch/x86/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/irq.c
++++ linux-2.6-tip/arch/x86/kernel/irq.c
+@@ -6,13 +6,18 @@
+ #include <linux/kernel_stat.h>
+ #include <linux/seq_file.h>
+ #include <linux/smp.h>
++#include <linux/ftrace.h>
+ 
+ #include <asm/apic.h>
+ #include <asm/io_apic.h>
+ #include <asm/irq.h>
++#include <asm/idle.h>
+ 
+ atomic_t irq_err_count;
+ 
++/* Function pointer for generic interrupt vector handling */
++void (*generic_interrupt_extension)(void) = NULL;
++
+ /*
+  * 'what should we do if we get a hw irq event on an illegal vector'.
+  * each architecture has to answer this themselves.
+@@ -36,63 +41,68 @@ void ack_bad_irq(unsigned int irq)
+ #endif
+ }
+ 
+-#ifdef CONFIG_X86_32
+-# define irq_stats(x)		(&per_cpu(irq_stat, x))
+-#else
+-# define irq_stats(x)		cpu_pda(x)
+-#endif
++#define irq_stats(x)		(&per_cpu(irq_stat, x))
+ /*
+  * /proc/interrupts printing:
+  */
+-static int show_other_interrupts(struct seq_file *p)
++static int show_other_interrupts(struct seq_file *p, int prec)
+ {
+ 	int j;
+ 
+-	seq_printf(p, "NMI: ");
++	seq_printf(p, "%*s: ", prec, "NMI");
+ 	for_each_online_cpu(j)
+ 		seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
+ 	seq_printf(p, "  Non-maskable interrupts\n");
+ #ifdef CONFIG_X86_LOCAL_APIC
+-	seq_printf(p, "LOC: ");
++	seq_printf(p, "%*s: ", prec, "LOC");
+ 	for_each_online_cpu(j)
+ 		seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
+ 	seq_printf(p, "  Local timer interrupts\n");
++
++	seq_printf(p, "%*s: ", prec, "SPU");
++	for_each_online_cpu(j)
++		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
++	seq_printf(p, "  Spurious interrupts\n");
++	seq_printf(p, "CNT: ");
++	for_each_online_cpu(j)
++		seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
++	seq_printf(p, "  Performance counter interrupts\n");
+ #endif
++	if (generic_interrupt_extension) {
++		seq_printf(p, "PLT: ");
++		for_each_online_cpu(j)
++			seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
++		seq_printf(p, "  Platform interrupts\n");
++	}
+ #ifdef CONFIG_SMP
+-	seq_printf(p, "RES: ");
++	seq_printf(p, "%*s: ", prec, "RES");
+ 	for_each_online_cpu(j)
+ 		seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
+ 	seq_printf(p, "  Rescheduling interrupts\n");
+-	seq_printf(p, "CAL: ");
++	seq_printf(p, "%*s: ", prec, "CAL");
+ 	for_each_online_cpu(j)
+ 		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+ 	seq_printf(p, "  Function call interrupts\n");
+-	seq_printf(p, "TLB: ");
++	seq_printf(p, "%*s: ", prec, "TLB");
+ 	for_each_online_cpu(j)
+ 		seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
+ 	seq_printf(p, "  TLB shootdowns\n");
+ #endif
+ #ifdef CONFIG_X86_MCE
+-	seq_printf(p, "TRM: ");
++	seq_printf(p, "%*s: ", prec, "TRM");
+ 	for_each_online_cpu(j)
+ 		seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
+ 	seq_printf(p, "  Thermal event interrupts\n");
+ # ifdef CONFIG_X86_64
+-	seq_printf(p, "THR: ");
++	seq_printf(p, "%*s: ", prec, "THR");
+ 	for_each_online_cpu(j)
+ 		seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
+ 	seq_printf(p, "  Threshold APIC interrupts\n");
+ # endif
+ #endif
+-#ifdef CONFIG_X86_LOCAL_APIC
+-	seq_printf(p, "SPU: ");
+-	for_each_online_cpu(j)
+-		seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
+-	seq_printf(p, "  Spurious interrupts\n");
+-#endif
+-	seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
++	seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
+ #if defined(CONFIG_X86_IO_APIC)
+-	seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
++	seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
+ #endif
+ 	return 0;
+ }
+@@ -100,19 +110,22 @@ static int show_other_interrupts(struct 
+ int show_interrupts(struct seq_file *p, void *v)
+ {
+ 	unsigned long flags, any_count = 0;
+-	int i = *(loff_t *) v, j;
++	int i = *(loff_t *) v, j, prec;
+ 	struct irqaction *action;
+ 	struct irq_desc *desc;
+ 
+ 	if (i > nr_irqs)
+ 		return 0;
+ 
++	for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
++		j *= 10;
++
+ 	if (i == nr_irqs)
+-		return show_other_interrupts(p);
++		return show_other_interrupts(p, prec);
+ 
+ 	/* print header */
+ 	if (i == 0) {
+-		seq_printf(p, "           ");
++		seq_printf(p, "%*s", prec + 8, "");
+ 		for_each_online_cpu(j)
+ 			seq_printf(p, "CPU%-8d", j);
+ 		seq_putc(p, '\n');
+@@ -123,23 +136,15 @@ int show_interrupts(struct seq_file *p, 
+ 		return 0;
+ 
+ 	spin_lock_irqsave(&desc->lock, flags);
+-#ifndef CONFIG_SMP
+-	any_count = kstat_irqs(i);
+-#else
+ 	for_each_online_cpu(j)
+ 		any_count |= kstat_irqs_cpu(i, j);
+-#endif
+ 	action = desc->action;
+ 	if (!action && !any_count)
+ 		goto out;
+ 
+-	seq_printf(p, "%3d: ", i);
+-#ifndef CONFIG_SMP
+-	seq_printf(p, "%10u ", kstat_irqs(i));
+-#else
++	seq_printf(p, "%*d: ", prec, i);
+ 	for_each_online_cpu(j)
+ 		seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+-#endif
+ 	seq_printf(p, " %8s", desc->chip->name);
+ 	seq_printf(p, "-%-8s", desc->name);
+ 
+@@ -164,7 +169,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+ 	sum += irq_stats(cpu)->apic_timer_irqs;
++	sum += irq_stats(cpu)->irq_spurious_count;
++	sum += irq_stats(cpu)->apic_perf_irqs;
+ #endif
++	if (generic_interrupt_extension)
++		sum += irq_stats(cpu)->generic_irqs;
+ #ifdef CONFIG_SMP
+ 	sum += irq_stats(cpu)->irq_resched_count;
+ 	sum += irq_stats(cpu)->irq_call_count;
+@@ -176,9 +185,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
+ 	sum += irq_stats(cpu)->irq_threshold_count;
+ #endif
+ #endif
+-#ifdef CONFIG_X86_LOCAL_APIC
+-	sum += irq_stats(cpu)->irq_spurious_count;
+-#endif
+ 	return sum;
+ }
+ 
+@@ -192,4 +198,63 @@ u64 arch_irq_stat(void)
+ 	return sum;
+ }
+ 
++
++/*
++ * do_IRQ handles all normal device IRQ's (the special
++ * SMP cross-CPU interrupts have their own specific
++ * handlers).
++ */
++unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
++{
++	struct pt_regs *old_regs = set_irq_regs(regs);
++
++	/* high bit used in ret_from_ code  */
++	unsigned vector = ~regs->orig_ax;
++	unsigned irq;
++
++	exit_idle();
++	irq_enter();
++
++	irq = __get_cpu_var(vector_irq)[vector];
++
++	if (!handle_irq(irq, regs)) {
++#ifdef CONFIG_X86_64
++		if (!disable_apic)
++			ack_APIC_irq();
++#endif
++
++		if (printk_ratelimit())
++			printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
++			       __func__, smp_processor_id(), vector, irq);
++	}
++
++	irq_exit();
++
++	set_irq_regs(old_regs);
++	return 1;
++}
++
++/*
++ * Handler for GENERIC_INTERRUPT_VECTOR.
++ */
++void smp_generic_interrupt(struct pt_regs *regs)
++{
++	struct pt_regs *old_regs = set_irq_regs(regs);
++
++	ack_APIC_irq();
++
++	exit_idle();
++
++	irq_enter();
++
++	inc_irq_stat(generic_irqs);
++
++	if (generic_interrupt_extension)
++		generic_interrupt_extension();
++
++	irq_exit();
++
++	set_irq_regs(old_regs);
++}
++
+ EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
+Index: linux-2.6-tip/arch/x86/kernel/irq_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/irq_32.c
++++ linux-2.6-tip/arch/x86/kernel/irq_32.c
+@@ -16,6 +16,7 @@
+ #include <linux/cpu.h>
+ #include <linux/delay.h>
+ #include <linux/uaccess.h>
++#include <linux/percpu.h>
+ 
+ #include <asm/apic.h>
+ 
+@@ -55,13 +56,13 @@ static inline void print_stack_overflow(
+ union irq_ctx {
+ 	struct thread_info      tinfo;
+ 	u32                     stack[THREAD_SIZE/sizeof(u32)];
+-};
++} __attribute__((aligned(PAGE_SIZE)));
+ 
+-static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
+-static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
++static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx);
++static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx);
+ 
+-static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+-static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
++static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, hardirq_stack);
++static DEFINE_PER_CPU_PAGE_ALIGNED(union irq_ctx, softirq_stack);
+ 
+ static void call_on_stack(void *func, void *stack)
+ {
+@@ -81,7 +82,7 @@ execute_on_irq_stack(int overflow, struc
+ 	u32 *isp, arg1, arg2;
+ 
+ 	curctx = (union irq_ctx *) current_thread_info();
+-	irqctx = hardirq_ctx[smp_processor_id()];
++	irqctx = __get_cpu_var(hardirq_ctx);
+ 
+ 	/*
+ 	 * this is where we switch to the IRQ stack. However, if we are
+@@ -125,34 +126,34 @@ void __cpuinit irq_ctx_init(int cpu)
+ {
+ 	union irq_ctx *irqctx;
+ 
+-	if (hardirq_ctx[cpu])
++	if (per_cpu(hardirq_ctx, cpu))
+ 		return;
+ 
+-	irqctx = (union irq_ctx*) &hardirq_stack[cpu*THREAD_SIZE];
++	irqctx = &per_cpu(hardirq_stack, cpu);
+ 	irqctx->tinfo.task		= NULL;
+ 	irqctx->tinfo.exec_domain	= NULL;
+ 	irqctx->tinfo.cpu		= cpu;
+ 	irqctx->tinfo.preempt_count	= HARDIRQ_OFFSET;
+ 	irqctx->tinfo.addr_limit	= MAKE_MM_SEG(0);
+ 
+-	hardirq_ctx[cpu] = irqctx;
++	per_cpu(hardirq_ctx, cpu) = irqctx;
+ 
+-	irqctx = (union irq_ctx *) &softirq_stack[cpu*THREAD_SIZE];
++	irqctx = &per_cpu(softirq_stack, cpu);
+ 	irqctx->tinfo.task		= NULL;
+ 	irqctx->tinfo.exec_domain	= NULL;
+ 	irqctx->tinfo.cpu		= cpu;
+ 	irqctx->tinfo.preempt_count	= 0;
+ 	irqctx->tinfo.addr_limit	= MAKE_MM_SEG(0);
+ 
+-	softirq_ctx[cpu] = irqctx;
++	per_cpu(softirq_ctx, cpu) = irqctx;
+ 
+ 	printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
+-	       cpu, hardirq_ctx[cpu], softirq_ctx[cpu]);
++	       cpu, per_cpu(hardirq_ctx, cpu),  per_cpu(softirq_ctx, cpu));
+ }
+ 
+ void irq_ctx_exit(int cpu)
+ {
+-	hardirq_ctx[cpu] = NULL;
++	per_cpu(hardirq_ctx, cpu) = NULL;
+ }
+ 
+ asmlinkage void do_softirq(void)
+@@ -169,7 +170,7 @@ asmlinkage void do_softirq(void)
+ 
+ 	if (local_softirq_pending()) {
+ 		curctx = current_thread_info();
+-		irqctx = softirq_ctx[smp_processor_id()];
++		irqctx = __get_cpu_var(softirq_ctx);
+ 		irqctx->tinfo.task = curctx->task;
+ 		irqctx->tinfo.previous_esp = current_stack_pointer;
+ 
+@@ -191,33 +192,16 @@ static inline int
+ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
+ #endif
+ 
+-/*
+- * do_IRQ handles all normal device IRQ's (the special
+- * SMP cross-CPU interrupts have their own specific
+- * handlers).
+- */
+-unsigned int do_IRQ(struct pt_regs *regs)
++bool handle_irq(unsigned irq, struct pt_regs *regs)
+ {
+-	struct pt_regs *old_regs;
+-	/* high bit used in ret_from_ code */
+-	int overflow;
+-	unsigned vector = ~regs->orig_ax;
+ 	struct irq_desc *desc;
+-	unsigned irq;
+-
+-
+-	old_regs = set_irq_regs(regs);
+-	irq_enter();
+-	irq = __get_cpu_var(vector_irq)[vector];
++	int overflow;
+ 
+ 	overflow = check_stack_overflow();
+ 
+ 	desc = irq_to_desc(irq);
+-	if (unlikely(!desc)) {
+-		printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n",
+-					__func__, irq, vector, smp_processor_id());
+-		BUG();
+-	}
++	if (unlikely(!desc))
++		return false;
+ 
+ 	if (!execute_on_irq_stack(overflow, desc, irq)) {
+ 		if (unlikely(overflow))
+@@ -225,13 +209,10 @@ unsigned int do_IRQ(struct pt_regs *regs
+ 		desc->handle_irq(irq, desc);
+ 	}
+ 
+-	irq_exit();
+-	set_irq_regs(old_regs);
+-	return 1;
++	return true;
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+-#include <mach_apic.h>
+ 
+ /* A cpu has been removed from cpu_online_mask.  Reset irq affinities. */
+ void fixup_irqs(void)
+@@ -248,7 +229,7 @@ void fixup_irqs(void)
+ 		if (irq == 2)
+ 			continue;
+ 
+-		affinity = &desc->affinity;
++		affinity = desc->affinity;
+ 		if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+ 			printk("Breaking affinity for irq %i\n", irq);
+ 			affinity = cpu_all_mask;
+Index: linux-2.6-tip/arch/x86/kernel/irq_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/irq_64.c
++++ linux-2.6-tip/arch/x86/kernel/irq_64.c
+@@ -18,6 +18,13 @@
+ #include <linux/smp.h>
+ #include <asm/io_apic.h>
+ #include <asm/idle.h>
++#include <asm/apic.h>
++
++DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
++EXPORT_PER_CPU_SYMBOL(irq_stat);
++
++DEFINE_PER_CPU(struct pt_regs *, irq_regs);
++EXPORT_PER_CPU_SYMBOL(irq_regs);
+ 
+ /*
+  * Probabilistic stack overflow check:
+@@ -41,42 +48,18 @@ static inline void stack_overflow_check(
+ #endif
+ }
+ 
+-/*
+- * do_IRQ handles all normal device IRQ's (the special
+- * SMP cross-CPU interrupts have their own specific
+- * handlers).
+- */
+-asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
++bool handle_irq(unsigned irq, struct pt_regs *regs)
+ {
+-	struct pt_regs *old_regs = set_irq_regs(regs);
+ 	struct irq_desc *desc;
+ 
+-	/* high bit used in ret_from_ code  */
+-	unsigned vector = ~regs->orig_ax;
+-	unsigned irq;
+-
+-	exit_idle();
+-	irq_enter();
+-	irq = __get_cpu_var(vector_irq)[vector];
+-
+ 	stack_overflow_check(regs);
+ 
+ 	desc = irq_to_desc(irq);
+-	if (likely(desc))
+-		generic_handle_irq_desc(irq, desc);
+-	else {
+-		if (!disable_apic)
+-			ack_APIC_irq();
+-
+-		if (printk_ratelimit())
+-			printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
+-				__func__, smp_processor_id(), vector);
+-	}
+-
+-	irq_exit();
++	if (unlikely(!desc))
++		return false;
+ 
+-	set_irq_regs(old_regs);
+-	return 1;
++	generic_handle_irq_desc(irq, desc);
++	return true;
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+@@ -100,7 +83,7 @@ void fixup_irqs(void)
+ 		/* interrupt's are disabled at this point */
+ 		spin_lock(&desc->lock);
+ 
+-		affinity = &desc->affinity;
++		affinity = desc->affinity;
+ 		if (!irq_has_action(irq) ||
+ 		    cpumask_equal(affinity, cpu_online_mask)) {
+ 			spin_unlock(&desc->lock);
+Index: linux-2.6-tip/arch/x86/kernel/irqinit_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/irqinit_32.c
++++ linux-2.6-tip/arch/x86/kernel/irqinit_32.c
+@@ -18,7 +18,7 @@
+ #include <asm/pgtable.h>
+ #include <asm/desc.h>
+ #include <asm/apic.h>
+-#include <asm/arch_hooks.h>
++#include <asm/setup.h>
+ #include <asm/i8259.h>
+ #include <asm/traps.h>
+ 
+@@ -50,6 +50,7 @@ static irqreturn_t math_error_irq(int cp
+  */
+ static struct irqaction fpu_irq = {
+ 	.handler = math_error_irq,
++	.flags = IRQF_NODELAY,
+ 	.mask = CPU_MASK_NONE,
+ 	.name = "fpu",
+ };
+@@ -78,6 +79,16 @@ void __init init_ISA_irqs(void)
+ 	}
+ }
+ 
++/*
++ * IRQ2 is cascade interrupt to second interrupt controller
++ */
++static struct irqaction irq2 = {
++	.handler = no_action,
++	.flags = IRQF_NODELAY,
++	.mask = CPU_MASK_NONE,
++	.name = "cascade",
++};
++
+ DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
+ 	[0 ... IRQ0_VECTOR - 1] = -1,
+ 	[IRQ0_VECTOR] = 0,
+@@ -111,28 +122,8 @@ int vector_used_by_percpu_irq(unsigned i
+ 	return 0;
+ }
+ 
+-/* Overridden in paravirt.c */
+-void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
+-
+-void __init native_init_IRQ(void)
++static void __init smp_intr_init(void)
+ {
+-	int i;
+-
+-	/* all the set up before the call gates are initialised */
+-	pre_intr_init_hook();
+-
+-	/*
+-	 * Cover the whole vector space, no vector can escape
+-	 * us. (some of these will be overridden and become
+-	 * 'special' SMP interrupts)
+-	 */
+-	for (i =  FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+-		/* SYSCALL_VECTOR was reserved in trap_init. */
+-		if (i != SYSCALL_VECTOR)
+-			set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]);
+-	}
+-
+-
+ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
+ 	/*
+ 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
+@@ -140,8 +131,15 @@ void __init native_init_IRQ(void)
+ 	 */
+ 	alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
+ 
+-	/* IPI for invalidation */
+-	alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
++	/* IPIs for invalidation */
++	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
++	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
++	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
++	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
++	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
++	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
++	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
++	alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
+ 
+ 	/* IPI for generic function call */
+ 	alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
+@@ -154,25 +152,65 @@ void __init native_init_IRQ(void)
+ 	set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
+ 	set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors);
+ #endif
++}
++
++static void __init apic_intr_init(void)
++{
++	smp_intr_init();
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+ 	/* self generated IPI for local APIC timer */
+ 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+ 
++	/* generic IPI for platform specific use */
++	alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
++
+ 	/* IPI vectors for APIC spurious and error interrupts */
+ 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+ 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
+-#endif
++# ifdef CONFIG_PERF_COUNTERS
++	alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
++# endif
+ 
+-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
++# ifdef CONFIG_X86_MCE_P4THERMAL
+ 	/* thermal monitor LVT interrupt */
+ 	alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
++# endif
+ #endif
++}
++
++/* Overridden in paravirt.c */
++void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
++
++void __init native_init_IRQ(void)
++{
++	int i;
++
++	/* Execute any quirks before the call gates are initialised: */
++	x86_quirk_pre_intr_init();
+ 
+-	/* setup after call gates are initialised (usually add in
+-	 * the architecture specific gates)
++	apic_intr_init();
++
++	/*
++	 * Cover the whole vector space, no vector can escape
++	 * us. (some of these will be overridden and become
++	 * 'special' SMP interrupts)
++	 */
++	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
++		int vector = FIRST_EXTERNAL_VECTOR + i;
++		/* SYSCALL_VECTOR was reserved in trap_init. */
++		if (!test_bit(vector, used_vectors))
++			set_intr_gate(vector, interrupt[i]);
++	}
++
++	if (!acpi_ioapic)
++		setup_irq(2, &irq2);
++
++	/*
++	 * Call quirks after call gates are initialised (usually add in
++	 * the architecture specific gates):
+ 	 */
+-	intr_init_hook();
++	x86_quirk_intr_init();
+ 
+ 	/*
+ 	 * External FPU? Set up irq13 if so, for
+Index: linux-2.6-tip/arch/x86/kernel/irqinit_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/irqinit_64.c
++++ linux-2.6-tip/arch/x86/kernel/irqinit_64.c
+@@ -147,9 +147,17 @@ static void __init apic_intr_init(void)
+ 	/* self generated IPI for local APIC timer */
+ 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
+ 
++	/* generic IPI for platform specific use */
++	alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt);
++
+ 	/* IPI vectors for APIC spurious and error interrupts */
+ 	alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
+ 	alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
++
++	/* Performance monitoring interrupt: */
++#ifdef CONFIG_PERF_COUNTERS
++	alloc_intr_gate(LOCAL_PERF_VECTOR, perf_counter_interrupt);
++#endif
+ }
+ 
+ void __init native_init_IRQ(void)
+@@ -157,6 +165,9 @@ void __init native_init_IRQ(void)
+ 	int i;
+ 
+ 	init_ISA_irqs();
++
++	apic_intr_init();
++
+ 	/*
+ 	 * Cover the whole vector space, no vector can escape
+ 	 * us. (some of these will be overridden and become
+@@ -164,12 +175,10 @@ void __init native_init_IRQ(void)
+ 	 */
+ 	for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) {
+ 		int vector = FIRST_EXTERNAL_VECTOR + i;
+-		if (vector != IA32_SYSCALL_VECTOR)
++		if (!test_bit(vector, used_vectors))
+ 			set_intr_gate(vector, interrupt[i]);
+ 	}
+ 
+-	apic_intr_init();
+-
+ 	if (!acpi_ioapic)
+ 		setup_irq(2, &irq2);
+ }
+Index: linux-2.6-tip/arch/x86/kernel/kdebugfs.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/kdebugfs.c
++++ linux-2.6-tip/arch/x86/kernel/kdebugfs.c
+@@ -8,11 +8,11 @@
+  */
+ #include <linux/debugfs.h>
+ #include <linux/uaccess.h>
+-#include <linux/stat.h>
++#include <linux/module.h>
+ #include <linux/init.h>
++#include <linux/stat.h>
+ #include <linux/io.h>
+ #include <linux/mm.h>
+-#include <linux/module.h>
+ 
+ #include <asm/setup.h>
+ 
+@@ -26,9 +26,8 @@ struct setup_data_node {
+ 	u32 len;
+ };
+ 
+-static ssize_t
+-setup_data_read(struct file *file, char __user *user_buf, size_t count,
+-		loff_t *ppos)
++static ssize_t setup_data_read(struct file *file, char __user *user_buf,
++			       size_t count, loff_t *ppos)
+ {
+ 	struct setup_data_node *node = file->private_data;
+ 	unsigned long remain;
+@@ -39,20 +38,21 @@ setup_data_read(struct file *file, char 
+ 
+ 	if (pos < 0)
+ 		return -EINVAL;
++
+ 	if (pos >= node->len)
+ 		return 0;
+ 
+ 	if (count > node->len - pos)
+ 		count = node->len - pos;
++
+ 	pa = node->paddr + sizeof(struct setup_data) + pos;
+ 	pg = pfn_to_page((pa + count - 1) >> PAGE_SHIFT);
+ 	if (PageHighMem(pg)) {
+ 		p = ioremap_cache(pa, count);
+ 		if (!p)
+ 			return -ENXIO;
+-	} else {
++	} else
+ 		p = __va(pa);
+-	}
+ 
+ 	remain = copy_to_user(user_buf, p, count);
+ 
+@@ -70,12 +70,13 @@ setup_data_read(struct file *file, char 
+ static int setup_data_open(struct inode *inode, struct file *file)
+ {
+ 	file->private_data = inode->i_private;
++
+ 	return 0;
+ }
+ 
+ static const struct file_operations fops_setup_data = {
+-	.read =		setup_data_read,
+-	.open =		setup_data_open,
++	.read		= setup_data_read,
++	.open		= setup_data_open,
+ };
+ 
+ static int __init
+@@ -84,57 +85,50 @@ create_setup_data_node(struct dentry *pa
+ {
+ 	struct dentry *d, *type, *data;
+ 	char buf[16];
+-	int error;
+ 
+ 	sprintf(buf, "%d", no);
+ 	d = debugfs_create_dir(buf, parent);
+-	if (!d) {
+-		error = -ENOMEM;
+-		goto err_return;
+-	}
++	if (!d)
++		return -ENOMEM;
++
+ 	type = debugfs_create_x32("type", S_IRUGO, d, &node->type);
+-	if (!type) {
+-		error = -ENOMEM;
++	if (!type)
+ 		goto err_dir;
+-	}
++
+ 	data = debugfs_create_file("data", S_IRUGO, d, node, &fops_setup_data);
+-	if (!data) {
+-		error = -ENOMEM;
++	if (!data)
+ 		goto err_type;
+-	}
++
+ 	return 0;
+ 
+ err_type:
+ 	debugfs_remove(type);
+ err_dir:
+ 	debugfs_remove(d);
+-err_return:
+-	return error;
++	return -ENOMEM;
+ }
+ 
+ static int __init create_setup_data_nodes(struct dentry *parent)
+ {
+ 	struct setup_data_node *node;
+ 	struct setup_data *data;
+-	int error, no = 0;
++	int error = -ENOMEM;
+ 	struct dentry *d;
+ 	struct page *pg;
+ 	u64 pa_data;
++	int no = 0;
+ 
+ 	d = debugfs_create_dir("setup_data", parent);
+-	if (!d) {
+-		error = -ENOMEM;
+-		goto err_return;
+-	}
++	if (!d)
++		return -ENOMEM;
+ 
+ 	pa_data = boot_params.hdr.setup_data;
+ 
+ 	while (pa_data) {
+ 		node = kmalloc(sizeof(*node), GFP_KERNEL);
+-		if (!node) {
+-			error = -ENOMEM;
++		if (!node)
+ 			goto err_dir;
+-		}
++
+ 		pg = pfn_to_page((pa_data+sizeof(*data)-1) >> PAGE_SHIFT);
+ 		if (PageHighMem(pg)) {
+ 			data = ioremap_cache(pa_data, sizeof(*data));
+@@ -143,9 +137,8 @@ static int __init create_setup_data_node
+ 				error = -ENXIO;
+ 				goto err_dir;
+ 			}
+-		} else {
++		} else
+ 			data = __va(pa_data);
+-		}
+ 
+ 		node->paddr = pa_data;
+ 		node->type = data->type;
+@@ -159,11 +152,11 @@ static int __init create_setup_data_node
+ 			goto err_dir;
+ 		no++;
+ 	}
++
+ 	return 0;
+ 
+ err_dir:
+ 	debugfs_remove(d);
+-err_return:
+ 	return error;
+ }
+ 
+@@ -175,28 +168,26 @@ static struct debugfs_blob_wrapper boot_
+ static int __init boot_params_kdebugfs_init(void)
+ {
+ 	struct dentry *dbp, *version, *data;
+-	int error;
++	int error = -ENOMEM;
+ 
+ 	dbp = debugfs_create_dir("boot_params", NULL);
+-	if (!dbp) {
+-		error = -ENOMEM;
+-		goto err_return;
+-	}
++	if (!dbp)
++		return -ENOMEM;
++
+ 	version = debugfs_create_x16("version", S_IRUGO, dbp,
+ 				     &boot_params.hdr.version);
+-	if (!version) {
+-		error = -ENOMEM;
++	if (!version)
+ 		goto err_dir;
+-	}
++
+ 	data = debugfs_create_blob("data", S_IRUGO, dbp,
+ 				   &boot_params_blob);
+-	if (!data) {
+-		error = -ENOMEM;
++	if (!data)
+ 		goto err_version;
+-	}
++
+ 	error = create_setup_data_nodes(dbp);
+ 	if (error)
+ 		goto err_data;
++
+ 	return 0;
+ 
+ err_data:
+@@ -205,10 +196,9 @@ err_version:
+ 	debugfs_remove(version);
+ err_dir:
+ 	debugfs_remove(dbp);
+-err_return:
+ 	return error;
+ }
+-#endif
++#endif /* CONFIG_DEBUG_BOOT_PARAMS */
+ 
+ static int __init arch_kdebugfs_init(void)
+ {
+Index: linux-2.6-tip/arch/x86/kernel/kgdb.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/kgdb.c
++++ linux-2.6-tip/arch/x86/kernel/kgdb.c
+@@ -46,7 +46,7 @@
+ #include <asm/apicdef.h>
+ #include <asm/system.h>
+ 
+-#include <mach_ipi.h>
++#include <asm/apic.h>
+ 
+ /*
+  * Put the error code here just in case the user cares:
+@@ -347,7 +347,7 @@ void kgdb_post_primary_code(struct pt_re
+  */
+ void kgdb_roundup_cpus(unsigned long flags)
+ {
+-	send_IPI_allbutself(APIC_DM_NMI);
++	apic->send_IPI_allbutself(APIC_DM_NMI);
+ }
+ #endif
+ 
+Index: linux-2.6-tip/arch/x86/kernel/kprobes.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/kprobes.c
++++ linux-2.6-tip/arch/x86/kernel/kprobes.c
+@@ -193,7 +193,7 @@ static int __kprobes can_boost(kprobe_op
+ 	kprobe_opcode_t opcode;
+ 	kprobe_opcode_t *orig_opcodes = opcodes;
+ 
+-	if (search_exception_tables(opcodes))
++	if (search_exception_tables((unsigned long)opcodes))
+ 		return 0;	/* Page fault may occur on this address. */
+ 
+ retry:
+@@ -454,7 +454,7 @@ static void __kprobes setup_singlestep(s
+ 		/* Boost up -- we can execute copied instructions directly */
+ 		reset_current_kprobe();
+ 		regs->ip = (unsigned long)p->ainsn.insn;
+-		preempt_enable_no_resched();
++		preempt_enable();
+ 		return;
+ 	}
+ #endif
+@@ -480,7 +480,7 @@ static int __kprobes reenter_kprobe(stru
+ 		arch_disarm_kprobe(p);
+ 		regs->ip = (unsigned long)p->addr;
+ 		reset_current_kprobe();
+-		preempt_enable_no_resched();
++		preempt_enable();
+ 		break;
+ #endif
+ 	case KPROBE_HIT_ACTIVE:
+@@ -576,7 +576,7 @@ static int __kprobes kprobe_handler(stru
+ 		}
+ 	} /* else: not a kprobe fault; let the kernel handle it */
+ 
+-	preempt_enable_no_resched();
++	preempt_enable();
+ 	return 0;
+ }
+ 
+@@ -638,13 +638,13 @@ static void __used __kprobes kretprobe_t
+ #else
+ 			"	pushf\n"
+ 			/*
+-			 * Skip cs, ip, orig_ax.
++			 * Skip cs, ip, orig_ax and gs.
+ 			 * trampoline_handler() will plug in these values
+ 			 */
+-			"	subl $12, %esp\n"
++			"	subl $16, %esp\n"
+ 			"	pushl %fs\n"
+-			"	pushl %ds\n"
+ 			"	pushl %es\n"
++			"	pushl %ds\n"
+ 			"	pushl %eax\n"
+ 			"	pushl %ebp\n"
+ 			"	pushl %edi\n"
+@@ -655,10 +655,10 @@ static void __used __kprobes kretprobe_t
+ 			"	movl %esp, %eax\n"
+ 			"	call trampoline_handler\n"
+ 			/* Move flags to cs */
+-			"	movl 52(%esp), %edx\n"
+-			"	movl %edx, 48(%esp)\n"
++			"	movl 56(%esp), %edx\n"
++			"	movl %edx, 52(%esp)\n"
+ 			/* Replace saved flags with true return address. */
+-			"	movl %eax, 52(%esp)\n"
++			"	movl %eax, 56(%esp)\n"
+ 			"	popl %ebx\n"
+ 			"	popl %ecx\n"
+ 			"	popl %edx\n"
+@@ -666,8 +666,8 @@ static void __used __kprobes kretprobe_t
+ 			"	popl %edi\n"
+ 			"	popl %ebp\n"
+ 			"	popl %eax\n"
+-			/* Skip ip, orig_ax, es, ds, fs */
+-			"	addl $20, %esp\n"
++			/* Skip ds, es, fs, gs, orig_ax and ip */
++			"	addl $24, %esp\n"
+ 			"	popf\n"
+ #endif
+ 			"	ret\n");
+@@ -691,6 +691,7 @@ static __used __kprobes void *trampoline
+ 	regs->cs = __KERNEL_CS;
+ #else
+ 	regs->cs = __KERNEL_CS | get_kernel_rpl();
++	regs->gs = 0;
+ #endif
+ 	regs->ip = trampoline_address;
+ 	regs->orig_ax = ~0UL;
+@@ -875,7 +876,7 @@ static int __kprobes post_kprobe_handler
+ 	}
+ 	reset_current_kprobe();
+ out:
+-	preempt_enable_no_resched();
++	preempt_enable();
+ 
+ 	/*
+ 	 * if somebody else is singlestepping across a probe point, flags
+@@ -909,7 +910,7 @@ int __kprobes kprobe_fault_handler(struc
+ 			restore_previous_kprobe(kcb);
+ 		else
+ 			reset_current_kprobe();
+-		preempt_enable_no_resched();
++		preempt_enable();
+ 		break;
+ 	case KPROBE_HIT_ACTIVE:
+ 	case KPROBE_HIT_SSDONE:
+@@ -1050,7 +1051,7 @@ int __kprobes longjmp_break_handler(stru
+ 		memcpy((kprobe_opcode_t *)(kcb->jprobe_saved_sp),
+ 		       kcb->jprobes_stack,
+ 		       MIN_STACK_SIZE(kcb->jprobe_saved_sp));
+-		preempt_enable_no_resched();
++		preempt_enable();
+ 		return 1;
+ 	}
+ 	return 0;
+Index: linux-2.6-tip/arch/x86/kernel/kvm.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/kvm.c
++++ linux-2.6-tip/arch/x86/kernel/kvm.c
+@@ -138,12 +138,6 @@ static void kvm_set_pte_atomic(pte_t *pt
+ 	kvm_mmu_write(ptep, pte_val(pte));
+ }
+ 
+-static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+-				pte_t *ptep, pte_t pte)
+-{
+-	kvm_mmu_write(ptep, pte_val(pte));
+-}
+-
+ static void kvm_pte_clear(struct mm_struct *mm,
+ 			  unsigned long addr, pte_t *ptep)
+ {
+@@ -220,7 +214,6 @@ static void paravirt_ops_setup(void)
+ #if PAGETABLE_LEVELS >= 3
+ #ifdef CONFIG_X86_PAE
+ 		pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+-		pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+ 		pv_mmu_ops.pte_clear = kvm_pte_clear;
+ 		pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+ #endif
+Index: linux-2.6-tip/arch/x86/kernel/kvmclock.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/kvmclock.c
++++ linux-2.6-tip/arch/x86/kernel/kvmclock.c
+@@ -19,7 +19,6 @@
+ #include <linux/clocksource.h>
+ #include <linux/kvm_para.h>
+ #include <asm/pvclock.h>
+-#include <asm/arch_hooks.h>
+ #include <asm/msr.h>
+ #include <asm/apic.h>
+ #include <linux/percpu.h>
+Index: linux-2.6-tip/arch/x86/kernel/machine_kexec_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/machine_kexec_32.c
++++ linux-2.6-tip/arch/x86/kernel/machine_kexec_32.c
+@@ -14,12 +14,12 @@
+ #include <linux/ftrace.h>
+ #include <linux/suspend.h>
+ #include <linux/gfp.h>
++#include <linux/io.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+-#include <asm/io.h>
+ #include <asm/apic.h>
+ #include <asm/cpufeature.h>
+ #include <asm/desc.h>
+@@ -63,7 +63,7 @@ static void load_segments(void)
+ 		"\tmovl %%eax,%%fs\n"
+ 		"\tmovl %%eax,%%gs\n"
+ 		"\tmovl %%eax,%%ss\n"
+-		::: "eax", "memory");
++		: : : "eax", "memory");
+ #undef STR
+ #undef __STR
+ }
+@@ -121,7 +121,7 @@ static void machine_kexec_page_table_set
+ static void machine_kexec_prepare_page_tables(struct kimage *image)
+ {
+ 	void *control_page;
+-	pmd_t *pmd = 0;
++	pmd_t *pmd = NULL;
+ 
+ 	control_page = page_address(image->control_code_page);
+ #ifdef CONFIG_X86_PAE
+@@ -205,7 +205,8 @@ void machine_kexec(struct kimage *image)
+ 
+ 	if (image->preserve_context) {
+ #ifdef CONFIG_X86_IO_APIC
+-		/* We need to put APICs in legacy mode so that we can
++		/*
++		 * We need to put APICs in legacy mode so that we can
+ 		 * get timer interrupts in second kernel. kexec/kdump
+ 		 * paths already have calls to disable_IO_APIC() in
+ 		 * one form or other. kexec jump path also need
+@@ -227,7 +228,8 @@ void machine_kexec(struct kimage *image)
+ 		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
+ 						<< PAGE_SHIFT);
+ 
+-	/* The segment registers are funny things, they have both a
++	/*
++	 * The segment registers are funny things, they have both a
+ 	 * visible and an invisible part.  Whenever the visible part is
+ 	 * set to a specific selector, the invisible part is loaded
+ 	 * with from a table in memory.  At no other time is the
+@@ -237,11 +239,12 @@ void machine_kexec(struct kimage *image)
+ 	 * segments, before I zap the gdt with an invalid value.
+ 	 */
+ 	load_segments();
+-	/* The gdt & idt are now invalid.
++	/*
++	 * The gdt & idt are now invalid.
+ 	 * If you want to load them you must set up your own idt & gdt.
+ 	 */
+-	set_gdt(phys_to_virt(0),0);
+-	set_idt(phys_to_virt(0),0);
++	set_gdt(phys_to_virt(0), 0);
++	set_idt(phys_to_virt(0), 0);
+ 
+ 	/* now call it */
+ 	image->start = relocate_kernel_ptr((unsigned long)image->head,
+Index: linux-2.6-tip/arch/x86/kernel/machine_kexec_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/machine_kexec_64.c
++++ linux-2.6-tip/arch/x86/kernel/machine_kexec_64.c
+@@ -12,20 +12,47 @@
+ #include <linux/reboot.h>
+ #include <linux/numa.h>
+ #include <linux/ftrace.h>
++#include <linux/io.h>
++#include <linux/suspend.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+-#include <asm/io.h>
+ 
+-#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
+-static u64 kexec_pgd[512] PAGE_ALIGNED;
+-static u64 kexec_pud0[512] PAGE_ALIGNED;
+-static u64 kexec_pmd0[512] PAGE_ALIGNED;
+-static u64 kexec_pte0[512] PAGE_ALIGNED;
+-static u64 kexec_pud1[512] PAGE_ALIGNED;
+-static u64 kexec_pmd1[512] PAGE_ALIGNED;
+-static u64 kexec_pte1[512] PAGE_ALIGNED;
++static int init_one_level2_page(struct kimage *image, pgd_t *pgd,
++				unsigned long addr)
++{
++	pud_t *pud;
++	pmd_t *pmd;
++	struct page *page;
++	int result = -ENOMEM;
++
++	addr &= PMD_MASK;
++	pgd += pgd_index(addr);
++	if (!pgd_present(*pgd)) {
++		page = kimage_alloc_control_pages(image, 0);
++		if (!page)
++			goto out;
++		pud = (pud_t *)page_address(page);
++		memset(pud, 0, PAGE_SIZE);
++		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
++	}
++	pud = pud_offset(pgd, addr);
++	if (!pud_present(*pud)) {
++		page = kimage_alloc_control_pages(image, 0);
++		if (!page)
++			goto out;
++		pmd = (pmd_t *)page_address(page);
++		memset(pmd, 0, PAGE_SIZE);
++		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
++	}
++	pmd = pmd_offset(pud, addr);
++	if (!pmd_present(*pmd))
++		set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
++	result = 0;
++out:
++	return result;
++}
+ 
+ static void init_level2_page(pmd_t *level2p, unsigned long addr)
+ {
+@@ -92,9 +119,8 @@ static int init_level4_page(struct kimag
+ 		}
+ 		level3p = (pud_t *)page_address(page);
+ 		result = init_level3_page(image, level3p, addr, last_addr);
+-		if (result) {
++		if (result)
+ 			goto out;
+-		}
+ 		set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
+ 		addr += PGDIR_SIZE;
+ 	}
+@@ -107,12 +133,72 @@ out:
+ 	return result;
+ }
+ 
++static void free_transition_pgtable(struct kimage *image)
++{
++	free_page((unsigned long)image->arch.pud);
++	free_page((unsigned long)image->arch.pmd);
++	free_page((unsigned long)image->arch.pte);
++}
++
++static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
++{
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++	unsigned long vaddr, paddr;
++	int result = -ENOMEM;
++
++	vaddr = (unsigned long)relocate_kernel;
++	paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
++	pgd += pgd_index(vaddr);
++	if (!pgd_present(*pgd)) {
++		pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
++		if (!pud)
++			goto err;
++		image->arch.pud = pud;
++		set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
++	}
++	pud = pud_offset(pgd, vaddr);
++	if (!pud_present(*pud)) {
++		pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
++		if (!pmd)
++			goto err;
++		image->arch.pmd = pmd;
++		set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
++	}
++	pmd = pmd_offset(pud, vaddr);
++	if (!pmd_present(*pmd)) {
++		pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
++		if (!pte)
++			goto err;
++		image->arch.pte = pte;
++		set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
++	}
++	pte = pte_offset_kernel(pmd, vaddr);
++	set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
++	return 0;
++err:
++	free_transition_pgtable(image);
++	return result;
++}
++
+ 
+ static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
+ {
+ 	pgd_t *level4p;
++	int result;
+ 	level4p = (pgd_t *)__va(start_pgtable);
+-	return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
++	result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
++	if (result)
++		return result;
++	/*
++	 * image->start may be outside 0 ~ max_pfn, for example when
++	 * jump back to original kernel from kexeced kernel
++	 */
++	result = init_one_level2_page(image, level4p, image->start);
++	if (result)
++		return result;
++	return init_transition_pgtable(image, level4p);
+ }
+ 
+ static void set_idt(void *newidt, u16 limit)
+@@ -174,7 +260,7 @@ int machine_kexec_prepare(struct kimage 
+ 
+ void machine_kexec_cleanup(struct kimage *image)
+ {
+-	return;
++	free_transition_pgtable(image);
+ }
+ 
+ /*
+@@ -185,36 +271,45 @@ void machine_kexec(struct kimage *image)
+ {
+ 	unsigned long page_list[PAGES_NR];
+ 	void *control_page;
++	int save_ftrace_enabled;
+ 
+-	tracer_disable();
++#ifdef CONFIG_KEXEC_JUMP
++	if (kexec_image->preserve_context)
++		save_processor_state();
++#endif
++
++	save_ftrace_enabled = __ftrace_enabled_save();
+ 
+ 	/* Interrupts aren't acceptable while we reboot */
+ 	local_irq_disable();
+ 
++	if (image->preserve_context) {
++#ifdef CONFIG_X86_IO_APIC
++		/*
++		 * We need to put APICs in legacy mode so that we can
++		 * get timer interrupts in second kernel. kexec/kdump
++		 * paths already have calls to disable_IO_APIC() in
++		 * one form or other. kexec jump path also need
++		 * one.
++		 */
++		disable_IO_APIC();
++#endif
++	}
++
+ 	control_page = page_address(image->control_code_page) + PAGE_SIZE;
+-	memcpy(control_page, relocate_kernel, PAGE_SIZE);
++	memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
+ 
+ 	page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
+-	page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
+-	page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
+-	page_list[VA_PGD] = (unsigned long)kexec_pgd;
+-	page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
+-	page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
+-	page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
+-	page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
+-	page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
+-	page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
+-	page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
+-	page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
+-	page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
+-	page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
+-	page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
+-	page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
+-
++	page_list[VA_CONTROL_PAGE] = (unsigned long)control_page;
+ 	page_list[PA_TABLE_PAGE] =
+ 	  (unsigned long)__pa(page_address(image->control_code_page));
+ 
+-	/* The segment registers are funny things, they have both a
++	if (image->type == KEXEC_TYPE_DEFAULT)
++		page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page)
++						<< PAGE_SHIFT);
++
++	/*
++	 * The segment registers are funny things, they have both a
+ 	 * visible and an invisible part.  Whenever the visible part is
+ 	 * set to a specific selector, the invisible part is loaded
+ 	 * with from a table in memory.  At no other time is the
+@@ -224,15 +319,25 @@ void machine_kexec(struct kimage *image)
+ 	 * segments, before I zap the gdt with an invalid value.
+ 	 */
+ 	load_segments();
+-	/* The gdt & idt are now invalid.
++	/*
++	 * The gdt & idt are now invalid.
+ 	 * If you want to load them you must set up your own idt & gdt.
+ 	 */
+-	set_gdt(phys_to_virt(0),0);
+-	set_idt(phys_to_virt(0),0);
++	set_gdt(phys_to_virt(0), 0);
++	set_idt(phys_to_virt(0), 0);
+ 
+ 	/* now call it */
+-	relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
+-			image->start);
++	image->start = relocate_kernel((unsigned long)image->head,
++				       (unsigned long)page_list,
++				       image->start,
++				       image->preserve_context);
++
++#ifdef CONFIG_KEXEC_JUMP
++	if (kexec_image->preserve_context)
++		restore_processor_state();
++#endif
++
++	__ftrace_enabled_restore(save_ftrace_enabled);
+ }
+ 
+ void arch_crash_save_vmcoreinfo(void)
+Index: linux-2.6-tip/arch/x86/kernel/mca_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/mca_32.c
++++ linux-2.6-tip/arch/x86/kernel/mca_32.c
+@@ -51,7 +51,6 @@
+ #include <linux/ioport.h>
+ #include <asm/uaccess.h>
+ #include <linux/init.h>
+-#include <asm/arch_hooks.h>
+ 
+ static unsigned char which_scsi;
+ 
+@@ -474,6 +473,4 @@ void __kprobes mca_handle_nmi(void)
+ 	 * adapter was responsible for the error.
+ 	 */
+ 	bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
+-
+-	mca_nmi_hook();
+-} /* mca_handle_nmi */
++}
+Index: linux-2.6-tip/arch/x86/kernel/microcode_amd.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/microcode_amd.c
++++ linux-2.6-tip/arch/x86/kernel/microcode_amd.c
+@@ -12,31 +12,30 @@
+  *
+  *  Licensed under the terms of the GNU General Public
+  *  License version 2. See file COPYING for details.
+-*/
+-
++ */
++#include <linux/platform_device.h>
+ #include <linux/capability.h>
+-#include <linux/kernel.h>
+-#include <linux/init.h>
+-#include <linux/sched.h>
+-#include <linux/cpumask.h>
+-#include <linux/module.h>
+-#include <linux/slab.h>
+-#include <linux/vmalloc.h>
+ #include <linux/miscdevice.h>
++#include <linux/firmware.h>
+ #include <linux/spinlock.h>
+-#include <linux/mm.h>
+-#include <linux/fs.h>
++#include <linux/cpumask.h>
++#include <linux/pci_ids.h>
++#include <linux/uaccess.h>
++#include <linux/vmalloc.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
+ #include <linux/mutex.h>
++#include <linux/sched.h>
++#include <linux/init.h>
++#include <linux/slab.h>
+ #include <linux/cpu.h>
+-#include <linux/firmware.h>
+-#include <linux/platform_device.h>
+ #include <linux/pci.h>
+-#include <linux/pci_ids.h>
+-#include <linux/uaccess.h>
++#include <linux/fs.h>
++#include <linux/mm.h>
+ 
+-#include <asm/msr.h>
+-#include <asm/processor.h>
+ #include <asm/microcode.h>
++#include <asm/processor.h>
++#include <asm/msr.h>
+ 
+ MODULE_DESCRIPTION("AMD Microcode Update Driver");
+ MODULE_AUTHOR("Peter Oruba");
+@@ -72,8 +71,8 @@ struct microcode_header_amd {
+ } __attribute__((packed));
+ 
+ struct microcode_amd {
+-	struct microcode_header_amd hdr;
+-	unsigned int mpb[0];
++	struct microcode_header_amd	hdr;
++	unsigned int			mpb[0];
+ };
+ 
+ #define UCODE_MAX_SIZE			2048
+@@ -81,7 +80,7 @@ struct microcode_amd {
+ #define UCODE_CONTAINER_HEADER_SIZE	12
+ 
+ /* serialize access to the physical write */
+-static DEFINE_SPINLOCK(microcode_update_lock);
++static DEFINE_RAW_SPINLOCK(microcode_update_lock);
+ 
+ static struct equiv_cpu_entry *equiv_cpu_table;
+ 
+@@ -184,8 +183,8 @@ static int get_ucode_data(void *to, cons
+ 	return 0;
+ }
+ 
+-static void *get_next_ucode(const u8 *buf, unsigned int size,
+-			    unsigned int *mc_size)
++static void *
++get_next_ucode(const u8 *buf, unsigned int size, unsigned int *mc_size)
+ {
+ 	unsigned int total_size;
+ 	u8 section_hdr[UCODE_CONTAINER_SECTION_HDR];
+@@ -223,7 +222,6 @@ static void *get_next_ucode(const u8 *bu
+ 	return mc;
+ }
+ 
+-
+ static int install_equiv_cpu_table(const u8 *buf)
+ {
+ 	u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE];
+@@ -372,4 +370,3 @@ struct microcode_ops * __init init_amd_m
+ {
+ 	return &microcode_amd_ops;
+ }
+-
+Index: linux-2.6-tip/arch/x86/kernel/microcode_core.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/microcode_core.c
++++ linux-2.6-tip/arch/x86/kernel/microcode_core.c
+@@ -70,67 +70,78 @@
+  *		Fix sigmatch() macro to handle old CPUs with pf == 0.
+  *		Thanks to Stuart Swales for pointing out this bug.
+  */
++#include <linux/platform_device.h>
+ #include <linux/capability.h>
+-#include <linux/kernel.h>
+-#include <linux/init.h>
+-#include <linux/sched.h>
++#include <linux/miscdevice.h>
++#include <linux/firmware.h>
+ #include <linux/smp_lock.h>
++#include <linux/spinlock.h>
+ #include <linux/cpumask.h>
+-#include <linux/module.h>
+-#include <linux/slab.h>
++#include <linux/uaccess.h>
+ #include <linux/vmalloc.h>
+-#include <linux/miscdevice.h>
+-#include <linux/spinlock.h>
+-#include <linux/mm.h>
+-#include <linux/fs.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
+ #include <linux/mutex.h>
++#include <linux/sched.h>
++#include <linux/init.h>
++#include <linux/slab.h>
+ #include <linux/cpu.h>
+-#include <linux/firmware.h>
+-#include <linux/platform_device.h>
++#include <linux/fs.h>
++#include <linux/mm.h>
+ 
+-#include <asm/msr.h>
+-#include <asm/uaccess.h>
+-#include <asm/processor.h>
+ #include <asm/microcode.h>
++#include <asm/processor.h>
++#include <asm/msr.h>
+ 
+ MODULE_DESCRIPTION("Microcode Update Driver");
+ MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
+ MODULE_LICENSE("GPL");
+ 
+-#define MICROCODE_VERSION 	"2.00"
++#define MICROCODE_VERSION	"2.00"
+ 
+-static struct microcode_ops *microcode_ops;
++static struct microcode_ops	*microcode_ops;
+ 
+ /* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+ static DEFINE_MUTEX(microcode_mutex);
+ 
+-struct ucode_cpu_info ucode_cpu_info[NR_CPUS];
++struct ucode_cpu_info		ucode_cpu_info[NR_CPUS];
+ EXPORT_SYMBOL_GPL(ucode_cpu_info);
+ 
+ #ifdef CONFIG_MICROCODE_OLD_INTERFACE
++struct update_for_cpu {
++	const void __user	*buf;
++	size_t			size;
++};
++
++static long update_for_cpu(void *_ufc)
++{
++	struct update_for_cpu *ufc = _ufc;
++	int error;
++
++	error = microcode_ops->request_microcode_user(smp_processor_id(),
++						      ufc->buf, ufc->size);
++	if (error < 0)
++		return error;
++	if (!error)
++		microcode_ops->apply_microcode(smp_processor_id());
++	return error;
++}
++
+ static int do_microcode_update(const void __user *buf, size_t size)
+ {
+-	cpumask_t old;
+ 	int error = 0;
+ 	int cpu;
+-
+-	old = current->cpus_allowed;
++	struct update_for_cpu ufc = { .buf = buf, .size = size };
+ 
+ 	for_each_online_cpu(cpu) {
+ 		struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ 
+ 		if (!uci->valid)
+ 			continue;
+-
+-		set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+-		error = microcode_ops->request_microcode_user(cpu, buf, size);
++		error = work_on_cpu(cpu, update_for_cpu, &ufc);
+ 		if (error < 0)
+-			goto out;
+-		if (!error)
+-			microcode_ops->apply_microcode(cpu);
++			break;
+ 	}
+-out:
+-	set_cpus_allowed_ptr(current, &old);
+ 	return error;
+ }
+ 
+@@ -198,18 +209,33 @@ static void microcode_dev_exit(void)
+ 
+ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
+ #else
+-#define microcode_dev_init() 0
+-#define microcode_dev_exit() do { } while (0)
++#define microcode_dev_init()	0
++#define microcode_dev_exit()	do { } while (0)
+ #endif
+ 
+ /* fake device for request_firmware */
+-static struct platform_device *microcode_pdev;
++static struct platform_device	*microcode_pdev;
++
++static long reload_for_cpu(void *unused)
++{
++	struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
++	int err = 0;
++
++	mutex_lock(&microcode_mutex);
++	if (uci->valid) {
++		err = microcode_ops->request_microcode_fw(smp_processor_id(),
++							  &microcode_pdev->dev);
++		if (!err)
++			microcode_ops->apply_microcode(smp_processor_id());
++	}
++	mutex_unlock(&microcode_mutex);
++	return err;
++}
+ 
+ static ssize_t reload_store(struct sys_device *dev,
+ 			    struct sysdev_attribute *attr,
+ 			    const char *buf, size_t sz)
+ {
+-	struct ucode_cpu_info *uci = ucode_cpu_info + dev->id;
+ 	char *end;
+ 	unsigned long val = simple_strtoul(buf, &end, 0);
+ 	int err = 0;
+@@ -218,21 +244,9 @@ static ssize_t reload_store(struct sys_d
+ 	if (end == buf)
+ 		return -EINVAL;
+ 	if (val == 1) {
+-		cpumask_t old = current->cpus_allowed;
+-
+ 		get_online_cpus();
+-		if (cpu_online(cpu)) {
+-			set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+-			mutex_lock(&microcode_mutex);
+-			if (uci->valid) {
+-				err = microcode_ops->request_microcode_fw(cpu,
+-						&microcode_pdev->dev);
+-				if (!err)
+-					microcode_ops->apply_microcode(cpu);
+-			}
+-			mutex_unlock(&microcode_mutex);
+-			set_cpus_allowed_ptr(current, &old);
+-		}
++		if (cpu_online(cpu))
++			err = work_on_cpu(cpu, reload_for_cpu, NULL);
+ 		put_online_cpus();
+ 	}
+ 	if (err)
+@@ -268,8 +282,8 @@ static struct attribute *mc_default_attr
+ };
+ 
+ static struct attribute_group mc_attr_group = {
+-	.attrs = mc_default_attrs,
+-	.name = "microcode",
++	.attrs		= mc_default_attrs,
++	.name		= "microcode",
+ };
+ 
+ static void __microcode_fini_cpu(int cpu)
+@@ -328,9 +342,9 @@ static int microcode_resume_cpu(int cpu)
+ 	return 0;
+ }
+ 
+-static void microcode_update_cpu(int cpu)
++static long microcode_update_cpu(void *unused)
+ {
+-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
++	struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id();
+ 	int err = 0;
+ 
+ 	/*
+@@ -338,30 +352,27 @@ static void microcode_update_cpu(int cpu
+ 	 * otherwise just request a firmware:
+ 	 */
+ 	if (uci->valid) {
+-		err = microcode_resume_cpu(cpu);
+-	} else {	
+-		collect_cpu_info(cpu);
++		err = microcode_resume_cpu(smp_processor_id());
++	} else {
++		collect_cpu_info(smp_processor_id());
+ 		if (uci->valid && system_state == SYSTEM_RUNNING)
+-			err = microcode_ops->request_microcode_fw(cpu,
++			err = microcode_ops->request_microcode_fw(
++					smp_processor_id(),
+ 					&microcode_pdev->dev);
+ 	}
+ 	if (!err)
+-		microcode_ops->apply_microcode(cpu);
++		microcode_ops->apply_microcode(smp_processor_id());
++	return err;
+ }
+ 
+-static void microcode_init_cpu(int cpu)
++static int microcode_init_cpu(int cpu)
+ {
+-	cpumask_t old = current->cpus_allowed;
+-
+-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu));
+-	/* We should bind the task to the CPU */
+-	BUG_ON(raw_smp_processor_id() != cpu);
+-
++	int err;
+ 	mutex_lock(&microcode_mutex);
+-	microcode_update_cpu(cpu);
++	err = work_on_cpu(cpu, microcode_update_cpu, NULL);
+ 	mutex_unlock(&microcode_mutex);
+ 
+-	set_cpus_allowed_ptr(current, &old);
++	return err;
+ }
+ 
+ static int mc_sysdev_add(struct sys_device *sys_dev)
+@@ -379,8 +390,18 @@ static int mc_sysdev_add(struct sys_devi
+ 	if (err)
+ 		return err;
+ 
+-	microcode_init_cpu(cpu);
+-	return 0;
++	err = microcode_init_cpu(cpu);
++#if 0
++	/*
++	 * While it looks correct, it's broken as we remove the sysfs
++	 * entry in sysdev_remove below again. The error handling in
++	 * this file is completely wreckaged and we have multiple
++	 * hotplug handling via notifier and sysdev as well.  Sigh.
++	 */
++	if (err)
++		sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
++#endif
++	return err;
+ }
+ 
+ static int mc_sysdev_remove(struct sys_device *sys_dev)
+@@ -404,14 +425,14 @@ static int mc_sysdev_resume(struct sys_d
+ 		return 0;
+ 
+ 	/* only CPU 0 will apply ucode here */
+-	microcode_update_cpu(0);
++	microcode_update_cpu(NULL);
+ 	return 0;
+ }
+ 
+ static struct sysdev_driver mc_sysdev_driver = {
+-	.add = mc_sysdev_add,
+-	.remove = mc_sysdev_remove,
+-	.resume = mc_sysdev_resume,
++	.add		= mc_sysdev_add,
++	.remove		= mc_sysdev_remove,
++	.resume		= mc_sysdev_resume,
+ };
+ 
+ static __cpuinit int
+@@ -424,7 +445,9 @@ mc_cpu_callback(struct notifier_block *n
+ 	switch (action) {
+ 	case CPU_ONLINE:
+ 	case CPU_ONLINE_FROZEN:
+-		microcode_init_cpu(cpu);
++		if (microcode_init_cpu(cpu))
++			printk(KERN_ERR "microcode: failed to init CPU%d\n",
++			       cpu);
+ 	case CPU_DOWN_FAILED:
+ 	case CPU_DOWN_FAILED_FROZEN:
+ 		pr_debug("microcode: CPU%d added\n", cpu);
+@@ -448,7 +471,7 @@ mc_cpu_callback(struct notifier_block *n
+ }
+ 
+ static struct notifier_block __refdata mc_cpu_notifier = {
+-	.notifier_call = mc_cpu_callback,
++	.notifier_call	= mc_cpu_callback,
+ };
+ 
+ static int __init microcode_init(void)
+Index: linux-2.6-tip/arch/x86/kernel/microcode_intel.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/microcode_intel.c
++++ linux-2.6-tip/arch/x86/kernel/microcode_intel.c
+@@ -70,28 +70,28 @@
+  *		Fix sigmatch() macro to handle old CPUs with pf == 0.
+  *		Thanks to Stuart Swales for pointing out this bug.
+  */
++#include <linux/platform_device.h>
+ #include <linux/capability.h>
+-#include <linux/kernel.h>
+-#include <linux/init.h>
+-#include <linux/sched.h>
++#include <linux/miscdevice.h>
++#include <linux/firmware.h>
+ #include <linux/smp_lock.h>
++#include <linux/spinlock.h>
+ #include <linux/cpumask.h>
+-#include <linux/module.h>
+-#include <linux/slab.h>
++#include <linux/uaccess.h>
+ #include <linux/vmalloc.h>
+-#include <linux/miscdevice.h>
+-#include <linux/spinlock.h>
+-#include <linux/mm.h>
+-#include <linux/fs.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
+ #include <linux/mutex.h>
++#include <linux/sched.h>
++#include <linux/init.h>
++#include <linux/slab.h>
+ #include <linux/cpu.h>
+-#include <linux/firmware.h>
+-#include <linux/platform_device.h>
++#include <linux/fs.h>
++#include <linux/mm.h>
+ 
+-#include <asm/msr.h>
+-#include <asm/uaccess.h>
+-#include <asm/processor.h>
+ #include <asm/microcode.h>
++#include <asm/processor.h>
++#include <asm/msr.h>
+ 
+ MODULE_DESCRIPTION("Microcode Update Driver");
+ MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
+@@ -129,12 +129,13 @@ struct extended_sigtable {
+ 	struct extended_signature sigs[0];
+ };
+ 
+-#define DEFAULT_UCODE_DATASIZE 	(2000)
++#define DEFAULT_UCODE_DATASIZE	(2000)
+ #define MC_HEADER_SIZE		(sizeof(struct microcode_header_intel))
+ #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
+ #define EXT_HEADER_SIZE		(sizeof(struct extended_sigtable))
+ #define EXT_SIGNATURE_SIZE	(sizeof(struct extended_signature))
+ #define DWSIZE			(sizeof(u32))
++
+ #define get_totalsize(mc) \
+ 	(((struct microcode_intel *)mc)->hdr.totalsize ? \
+ 	 ((struct microcode_intel *)mc)->hdr.totalsize : \
+@@ -150,7 +151,7 @@ struct extended_sigtable {
+ #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE)
+ 
+ /* serialize access to the physical write to MSR 0x79 */
+-static DEFINE_SPINLOCK(microcode_update_lock);
++static DEFINE_RAW_SPINLOCK(microcode_update_lock);
+ 
+ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig)
+ {
+@@ -196,31 +197,32 @@ static inline int update_match_cpu(struc
+ 	return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
+ }
+ 
+-static inline int 
+-update_match_revision(struct microcode_header_intel *mc_header,	int rev)
++static inline int
++update_match_revision(struct microcode_header_intel *mc_header, int rev)
+ {
+ 	return (mc_header->rev <= rev) ? 0 : 1;
+ }
+ 
+ static int microcode_sanity_check(void *mc)
+ {
++	unsigned long total_size, data_size, ext_table_size;
+ 	struct microcode_header_intel *mc_header = mc;
+ 	struct extended_sigtable *ext_header = NULL;
+-	struct extended_signature *ext_sig;
+-	unsigned long total_size, data_size, ext_table_size;
+ 	int sum, orig_sum, ext_sigcount = 0, i;
++	struct extended_signature *ext_sig;
+ 
+ 	total_size = get_totalsize(mc_header);
+ 	data_size = get_datasize(mc_header);
++
+ 	if (data_size + MC_HEADER_SIZE > total_size) {
+ 		printk(KERN_ERR "microcode: error! "
+-			"Bad data size in microcode data file\n");
++				"Bad data size in microcode data file\n");
+ 		return -EINVAL;
+ 	}
+ 
+ 	if (mc_header->ldrver != 1 || mc_header->hdrver != 1) {
+ 		printk(KERN_ERR "microcode: error! "
+-			"Unknown microcode update format\n");
++				"Unknown microcode update format\n");
+ 		return -EINVAL;
+ 	}
+ 	ext_table_size = total_size - (MC_HEADER_SIZE + data_size);
+@@ -318,11 +320,15 @@ get_matching_microcode(struct cpu_signat
+ 
+ static void apply_microcode(int cpu)
+ {
++	struct microcode_intel *mc_intel;
++	struct ucode_cpu_info *uci;
+ 	unsigned long flags;
+ 	unsigned int val[2];
+-	int cpu_num = raw_smp_processor_id();
+-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+-	struct microcode_intel *mc_intel = uci->mc;
++	int cpu_num;
++
++	cpu_num = raw_smp_processor_id();
++	uci = ucode_cpu_info + cpu;
++	mc_intel = uci->mc;
+ 
+ 	/* We should bind the task to the CPU */
+ 	BUG_ON(cpu_num != cpu);
+@@ -348,15 +354,17 @@ static void apply_microcode(int cpu)
+ 	spin_unlock_irqrestore(&microcode_update_lock, flags);
+ 	if (val[1] != mc_intel->hdr.rev) {
+ 		printk(KERN_ERR "microcode: CPU%d update from revision "
+-			"0x%x to 0x%x failed\n", cpu_num, uci->cpu_sig.rev, val[1]);
++				"0x%x to 0x%x failed\n",
++			cpu_num, uci->cpu_sig.rev, val[1]);
+ 		return;
+ 	}
+ 	printk(KERN_INFO "microcode: CPU%d updated from revision "
+-	       "0x%x to 0x%x, date = %04x-%02x-%02x \n",
++			 "0x%x to 0x%x, date = %04x-%02x-%02x \n",
+ 		cpu_num, uci->cpu_sig.rev, val[1],
+ 		mc_intel->hdr.date & 0xffff,
+ 		mc_intel->hdr.date >> 24,
+ 		(mc_intel->hdr.date >> 16) & 0xff);
++
+ 	uci->cpu_sig.rev = val[1];
+ }
+ 
+@@ -404,18 +412,23 @@ static int generic_load_microcode(int cp
+ 		leftover  -= mc_size;
+ 	}
+ 
+-	if (new_mc) {
+-		if (!leftover) {
+-			if (uci->mc)
+-				vfree(uci->mc);
+-			uci->mc = (struct microcode_intel *)new_mc;
+-			pr_debug("microcode: CPU%d found a matching microcode update with"
+-				 " version 0x%x (current=0x%x)\n",
+-				cpu, new_rev, uci->cpu_sig.rev);
+-		} else
+-			vfree(new_mc);
++	if (!new_mc)
++		goto out;
++
++	if (leftover) {
++		vfree(new_mc);
++		goto out;
+ 	}
+ 
++	if (uci->mc)
++		vfree(uci->mc);
++	uci->mc = (struct microcode_intel *)new_mc;
++
++	pr_debug("microcode: CPU%d found a matching microcode update with"
++		 " version 0x%x (current=0x%x)\n",
++			cpu, new_rev, uci->cpu_sig.rev);
++
++ out:
+ 	return (int)leftover;
+ }
+ 
+@@ -442,8 +455,8 @@ static int request_microcode_fw(int cpu,
+ 		return ret;
+ 	}
+ 
+-	ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size,
+-			&get_ucode_fw);
++	ret = generic_load_microcode(cpu, (void *)firmware->data,
++				     firmware->size, &get_ucode_fw);
+ 
+ 	release_firmware(firmware);
+ 
+@@ -460,7 +473,7 @@ static int request_microcode_user(int cp
+ 	/* We should bind the task to the CPU */
+ 	BUG_ON(cpu != raw_smp_processor_id());
+ 
+-	return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user);
++	return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
+ }
+ 
+ static void microcode_fini_cpu(int cpu)
+Index: linux-2.6-tip/arch/x86/kernel/mmconf-fam10h_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/mmconf-fam10h_64.c
++++ linux-2.6-tip/arch/x86/kernel/mmconf-fam10h_64.c
+@@ -226,7 +226,7 @@ static int __devinit set_check_enable_am
+         return 0;
+ }
+ 
+-static struct dmi_system_id __devinitdata mmconf_dmi_table[] = {
++static const struct dmi_system_id __cpuinitconst mmconf_dmi_table[] = {
+         {
+                 .callback = set_check_enable_amd_mmconf,
+                 .ident = "Sun Microsystems Machine",
+Index: linux-2.6-tip/arch/x86/kernel/module_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/module_32.c
++++ linux-2.6-tip/arch/x86/kernel/module_32.c
+@@ -42,7 +42,7 @@ void module_free(struct module *mod, voi
+ {
+ 	vfree(module_region);
+ 	/* FIXME: If module_region == mod->init_region, trim exception
+-           table entries. */
++	   table entries. */
+ }
+ 
+ /* We don't need anything special. */
+@@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr,
+ 		*para = NULL;
+ 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ 
+-	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 
++	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
+ 		if (!strcmp(".text", secstrings + s->sh_name))
+ 			text = s;
+ 		if (!strcmp(".altinstructions", secstrings + s->sh_name))
+ 			alt = s;
+ 		if (!strcmp(".smp_locks", secstrings + s->sh_name))
+-			locks= s;
++			locks = s;
+ 		if (!strcmp(".parainstructions", secstrings + s->sh_name))
+ 			para = s;
+ 	}
+Index: linux-2.6-tip/arch/x86/kernel/module_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/module_64.c
++++ linux-2.6-tip/arch/x86/kernel/module_64.c
+@@ -30,14 +30,14 @@
+ #include <asm/page.h>
+ #include <asm/pgtable.h>
+ 
+-#define DEBUGP(fmt...) 
++#define DEBUGP(fmt...)
+ 
+ #ifndef CONFIG_UML
+ void module_free(struct module *mod, void *module_region)
+ {
+ 	vfree(module_region);
+ 	/* FIXME: If module_region == mod->init_region, trim exception
+-           table entries. */
++	   table entries. */
+ }
+ 
+ void *module_alloc(unsigned long size)
+@@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechd
+ 	Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
+ 	Elf64_Sym *sym;
+ 	void *loc;
+-	u64 val; 
++	u64 val;
+ 
+ 	DEBUGP("Applying relocate section %u to %u\n", relsec,
+ 	       sechdrs[relsec].sh_info);
+@@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechd
+ 		sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
+ 			+ ELF64_R_SYM(rel[i].r_info);
+ 
+-	        DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
+-		       (int)ELF64_R_TYPE(rel[i].r_info), 
+-		       sym->st_value, rel[i].r_addend, (u64)loc);
++		DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
++			(int)ELF64_R_TYPE(rel[i].r_info),
++			sym->st_value, rel[i].r_addend, (u64)loc);
+ 
+-		val = sym->st_value + rel[i].r_addend; 
++		val = sym->st_value + rel[i].r_addend;
+ 
+ 		switch (ELF64_R_TYPE(rel[i].r_info)) {
+ 		case R_X86_64_NONE:
+@@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechd
+ 			if ((s64)val != *(s32 *)loc)
+ 				goto overflow;
+ 			break;
+-		case R_X86_64_PC32: 
++		case R_X86_64_PC32:
+ 			val -= (u64)loc;
+ 			*(u32 *)loc = val;
+ #if 0
+ 			if ((s64)val != *(s32 *)loc)
+-				goto overflow; 
++				goto overflow;
+ #endif
+ 			break;
+ 		default:
+-			printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n",
++			printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
+ 			       me->name, ELF64_R_TYPE(rel[i].r_info));
+ 			return -ENOEXEC;
+ 		}
+@@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechd
+ 	return 0;
+ 
+ overflow:
+-	printk(KERN_ERR "overflow in relocation type %d val %Lx\n", 
++	printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
+ 	       (int)ELF64_R_TYPE(rel[i].r_info), val);
+ 	printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
+ 	       me->name);
+@@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs,
+ 		   unsigned int relsec,
+ 		   struct module *me)
+ {
+-	printk("non add relocation not supported\n");
++	printk(KERN_ERR "non add relocation not supported\n");
+ 	return -ENOSYS;
+-} 
++}
+ 
+ int module_finalize(const Elf_Ehdr *hdr,
+-                    const Elf_Shdr *sechdrs,
+-                    struct module *me)
++		    const Elf_Shdr *sechdrs,
++		    struct module *me)
+ {
+ 	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
+ 		*para = NULL;
+@@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr,
+ 		if (!strcmp(".altinstructions", secstrings + s->sh_name))
+ 			alt = s;
+ 		if (!strcmp(".smp_locks", secstrings + s->sh_name))
+-			locks= s;
++			locks = s;
+ 		if (!strcmp(".parainstructions", secstrings + s->sh_name))
+ 			para = s;
+ 	}
+Index: linux-2.6-tip/arch/x86/kernel/mpparse.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/mpparse.c
++++ linux-2.6-tip/arch/x86/kernel/mpparse.c
+@@ -3,7 +3,7 @@
+  *	compliant MP-table parsing routines.
+  *
+  *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
+- *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
++ *	(c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
+  *      (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
+  */
+ 
+@@ -29,12 +29,7 @@
+ #include <asm/setup.h>
+ #include <asm/smp.h>
+ 
+-#include <mach_apic.h>
+-#ifdef CONFIG_X86_32
+-#include <mach_apicdef.h>
+-#include <mach_mpparse.h>
+-#endif
+-
++#include <asm/apic.h>
+ /*
+  * Checksum an MP configuration block.
+  */
+@@ -114,9 +109,6 @@ static void __init MP_bus_info(struct mp
+ 	} else
+ 		printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
+ }
+-#endif
+-
+-#ifdef CONFIG_X86_IO_APIC
+ 
+ static int bad_ioapic(unsigned long address)
+ {
+@@ -144,11 +136,11 @@ static void __init MP_ioapic_info(struct
+ 	if (bad_ioapic(m->apicaddr))
+ 		return;
+ 
+-	mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr;
+-	mp_ioapics[nr_ioapics].mp_apicid = m->apicid;
+-	mp_ioapics[nr_ioapics].mp_type = m->type;
+-	mp_ioapics[nr_ioapics].mp_apicver = m->apicver;
+-	mp_ioapics[nr_ioapics].mp_flags = m->flags;
++	mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
++	mp_ioapics[nr_ioapics].apicid = m->apicid;
++	mp_ioapics[nr_ioapics].type = m->type;
++	mp_ioapics[nr_ioapics].apicver = m->apicver;
++	mp_ioapics[nr_ioapics].flags = m->flags;
+ 	nr_ioapics++;
+ }
+ 
+@@ -160,55 +152,55 @@ static void print_MP_intsrc_info(struct 
+ 		m->srcbusirq, m->dstapic, m->dstirq);
+ }
+ 
+-static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
++static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
+ {
+ 	apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
+ 		" IRQ %02x, APIC ID %x, APIC INT %02x\n",
+-		mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
+-		(mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
+-		mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
++		mp_irq->irqtype, mp_irq->irqflag & 3,
++		(mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
++		mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
+ }
+ 
+ static void __init assign_to_mp_irq(struct mpc_intsrc *m,
+-				    struct mp_config_intsrc *mp_irq)
++				    struct mpc_intsrc *mp_irq)
+ {
+-	mp_irq->mp_dstapic = m->dstapic;
+-	mp_irq->mp_type = m->type;
+-	mp_irq->mp_irqtype = m->irqtype;
+-	mp_irq->mp_irqflag = m->irqflag;
+-	mp_irq->mp_srcbus = m->srcbus;
+-	mp_irq->mp_srcbusirq = m->srcbusirq;
+-	mp_irq->mp_dstirq = m->dstirq;
++	mp_irq->dstapic = m->dstapic;
++	mp_irq->type = m->type;
++	mp_irq->irqtype = m->irqtype;
++	mp_irq->irqflag = m->irqflag;
++	mp_irq->srcbus = m->srcbus;
++	mp_irq->srcbusirq = m->srcbusirq;
++	mp_irq->dstirq = m->dstirq;
+ }
+ 
+-static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
++static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
+ 					struct mpc_intsrc *m)
+ {
+-	m->dstapic = mp_irq->mp_dstapic;
+-	m->type = mp_irq->mp_type;
+-	m->irqtype = mp_irq->mp_irqtype;
+-	m->irqflag = mp_irq->mp_irqflag;
+-	m->srcbus = mp_irq->mp_srcbus;
+-	m->srcbusirq = mp_irq->mp_srcbusirq;
+-	m->dstirq = mp_irq->mp_dstirq;
++	m->dstapic = mp_irq->dstapic;
++	m->type = mp_irq->type;
++	m->irqtype = mp_irq->irqtype;
++	m->irqflag = mp_irq->irqflag;
++	m->srcbus = mp_irq->srcbus;
++	m->srcbusirq = mp_irq->srcbusirq;
++	m->dstirq = mp_irq->dstirq;
+ }
+ 
+-static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
++static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
+ 					struct mpc_intsrc *m)
+ {
+-	if (mp_irq->mp_dstapic != m->dstapic)
++	if (mp_irq->dstapic != m->dstapic)
+ 		return 1;
+-	if (mp_irq->mp_type != m->type)
++	if (mp_irq->type != m->type)
+ 		return 2;
+-	if (mp_irq->mp_irqtype != m->irqtype)
++	if (mp_irq->irqtype != m->irqtype)
+ 		return 3;
+-	if (mp_irq->mp_irqflag != m->irqflag)
++	if (mp_irq->irqflag != m->irqflag)
+ 		return 4;
+-	if (mp_irq->mp_srcbus != m->srcbus)
++	if (mp_irq->srcbus != m->srcbus)
+ 		return 5;
+-	if (mp_irq->mp_srcbusirq != m->srcbusirq)
++	if (mp_irq->srcbusirq != m->srcbusirq)
+ 		return 6;
+-	if (mp_irq->mp_dstirq != m->dstirq)
++	if (mp_irq->dstirq != m->dstirq)
+ 		return 7;
+ 
+ 	return 0;
+@@ -229,8 +221,12 @@ static void __init MP_intsrc_info(struct
+ 	if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ 		panic("Max # of irq sources exceeded!!\n");
+ }
++#else /* CONFIG_X86_IO_APIC */
++static inline void __init MP_bus_info(struct mpc_bus *m) {}
++static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
++static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {}
++#endif /* CONFIG_X86_IO_APIC */
+ 
+-#endif
+ 
+ static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
+ {
+@@ -280,6 +276,20 @@ static int __init smp_check_mpc(struct m
+ 	return 1;
+ }
+ 
++static void skip_entry(unsigned char **ptr, int *count, int size)
++{
++	*ptr += size;
++	*count += size;
++}
++
++static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
++{
++	printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"
++		"type %x\n", *mpt);
++	print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
++			1, mpc, mpc->length, 1);
++}
++
+ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
+ {
+ 	char str[16];
+@@ -292,16 +302,7 @@ static int __init smp_read_mpc(struct mp
+ 		return 0;
+ 
+ #ifdef CONFIG_X86_32
+-	/*
+-	 * need to make sure summit and es7000's mps_oem_check is safe to be
+-	 * called early via genericarch 's mps_oem_check
+-	 */
+-	if (early) {
+-#ifdef CONFIG_X86_NUMAQ
+-		numaq_mps_oem_check(mpc, oem, str);
+-#endif
+-	} else
+-		mps_oem_check(mpc, oem, str);
++	generic_mps_oem_check(mpc, oem, str);
+ #endif
+ 	/* save the local APIC address, it might be non-default */
+ 	if (!acpi_lapic)
+@@ -324,61 +325,30 @@ static int __init smp_read_mpc(struct mp
+ 	while (count < mpc->length) {
+ 		switch (*mpt) {
+ 		case MP_PROCESSOR:
+-			{
+-				struct mpc_cpu *m = (struct mpc_cpu *)mpt;
+-				/* ACPI may have already provided this data */
+-				if (!acpi_lapic)
+-					MP_processor_info(m);
+-				mpt += sizeof(*m);
+-				count += sizeof(*m);
+-				break;
+-			}
++			/* ACPI may have already provided this data */
++			if (!acpi_lapic)
++				MP_processor_info((struct mpc_cpu *)mpt);
++			skip_entry(&mpt, &count, sizeof(struct mpc_cpu));
++			break;
+ 		case MP_BUS:
+-			{
+-				struct mpc_bus *m = (struct mpc_bus *)mpt;
+-#ifdef CONFIG_X86_IO_APIC
+-				MP_bus_info(m);
+-#endif
+-				mpt += sizeof(*m);
+-				count += sizeof(*m);
+-				break;
+-			}
++			MP_bus_info((struct mpc_bus *)mpt);
++			skip_entry(&mpt, &count, sizeof(struct mpc_bus));
++			break;
+ 		case MP_IOAPIC:
+-			{
+-#ifdef CONFIG_X86_IO_APIC
+-				struct mpc_ioapic *m = (struct mpc_ioapic *)mpt;
+-				MP_ioapic_info(m);
+-#endif
+-				mpt += sizeof(struct mpc_ioapic);
+-				count += sizeof(struct mpc_ioapic);
+-				break;
+-			}
++			MP_ioapic_info((struct mpc_ioapic *)mpt);
++			skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
++			break;
+ 		case MP_INTSRC:
+-			{
+-#ifdef CONFIG_X86_IO_APIC
+-				struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
+-
+-				MP_intsrc_info(m);
+-#endif
+-				mpt += sizeof(struct mpc_intsrc);
+-				count += sizeof(struct mpc_intsrc);
+-				break;
+-			}
++			MP_intsrc_info((struct mpc_intsrc *)mpt);
++			skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
++			break;
+ 		case MP_LINTSRC:
+-			{
+-				struct mpc_lintsrc *m =
+-				    (struct mpc_lintsrc *)mpt;
+-				MP_lintsrc_info(m);
+-				mpt += sizeof(*m);
+-				count += sizeof(*m);
+-				break;
+-			}
++			MP_lintsrc_info((struct mpc_lintsrc *)mpt);
++			skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc));
++			break;
+ 		default:
+ 			/* wrong mptable */
+-			printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
+-			printk(KERN_ERR "type %x\n", *mpt);
+-			print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
+-					1, mpc, mpc->length, 1);
++			smp_dump_mptable(mpc, mpt);
+ 			count = mpc->length;
+ 			break;
+ 		}
+@@ -386,13 +356,13 @@ static int __init smp_read_mpc(struct mp
+ 			(*x86_quirks->mpc_record)++;
+ 	}
+ 
+-#ifdef CONFIG_X86_GENERICARCH
+-       generic_bigsmp_probe();
++#ifdef CONFIG_X86_BIGSMP
++	generic_bigsmp_probe();
+ #endif
+ 
+-#ifdef CONFIG_X86_32
+-	setup_apic_routing();
+-#endif
++	if (apic->setup_apic_routing)
++		apic->setup_apic_routing();
++
+ 	if (!num_processors)
+ 		printk(KERN_ERR "MPTABLE: no processors registered!\n");
+ 	return num_processors;
+@@ -417,7 +387,7 @@ static void __init construct_default_ioi
+ 	intsrc.type = MP_INTSRC;
+ 	intsrc.irqflag = 0;	/* conforming */
+ 	intsrc.srcbus = 0;
+-	intsrc.dstapic = mp_ioapics[0].mp_apicid;
++	intsrc.dstapic = mp_ioapics[0].apicid;
+ 
+ 	intsrc.irqtype = mp_INT;
+ 
+@@ -570,14 +540,76 @@ static inline void __init construct_defa
+ 	}
+ }
+ 
+-static struct intel_mp_floating *mpf_found;
++static struct mpf_intel *mpf_found;
++
++static unsigned long __init get_mpc_size(unsigned long physptr)
++{
++	struct mpc_table *mpc;
++	unsigned long size;
++
++	mpc = early_ioremap(physptr, PAGE_SIZE);
++	size = mpc->length;
++	early_iounmap(mpc, PAGE_SIZE);
++	apic_printk(APIC_VERBOSE, "  mpc: %lx-%lx\n", physptr, physptr + size);
++
++	return size;
++}
++
++static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
++{
++	struct mpc_table *mpc;
++	unsigned long size;
++
++	size = get_mpc_size(mpf->physptr);
++	mpc = early_ioremap(mpf->physptr, size);
++	/*
++	 * Read the physical hardware table.  Anything here will
++	 * override the defaults.
++	 */
++	if (!smp_read_mpc(mpc, early)) {
++#ifdef CONFIG_X86_LOCAL_APIC
++		smp_found_config = 0;
++#endif
++		printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"
++			"... disabling SMP support. (tell your hw vendor)\n");
++		early_iounmap(mpc, size);
++		return -1;
++	}
++	early_iounmap(mpc, size);
++
++	if (early)
++		return -1;
++
++#ifdef CONFIG_X86_IO_APIC
++	/*
++	 * If there are no explicit MP IRQ entries, then we are
++	 * broken.  We set up most of the low 16 IO-APIC pins to
++	 * ISA defaults and hope it will work.
++	 */
++	if (!mp_irq_entries) {
++		struct mpc_bus bus;
++
++		printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
++		       "using default mptable. (tell your hw vendor)\n");
++
++		bus.type = MP_BUS;
++		bus.busid = 0;
++		memcpy(bus.bustype, "ISA   ", 6);
++		MP_bus_info(&bus);
++
++		construct_default_ioirq_mptable(0);
++	}
++#endif
++
++	return 0;
++}
+ 
+ /*
+  * Scan the memory blocks for an SMP configuration block.
+  */
+ static void __init __get_smp_config(unsigned int early)
+ {
+-	struct intel_mp_floating *mpf = mpf_found;
++	struct mpf_intel *mpf = mpf_found;
+ 
+ 	if (!mpf)
+ 		return;
+@@ -598,9 +630,9 @@ static void __init __get_smp_config(unsi
+ 	}
+ 
+ 	printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
+-	       mpf->mpf_specification);
++	       mpf->specification);
+ #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
+-	if (mpf->mpf_feature2 & (1 << 7)) {
++	if (mpf->feature2 & (1 << 7)) {
+ 		printk(KERN_INFO "    IMCR and PIC compatibility mode.\n");
+ 		pic_mode = 1;
+ 	} else {
+@@ -611,7 +643,7 @@ static void __init __get_smp_config(unsi
+ 	/*
+ 	 * Now see if we need to read further.
+ 	 */
+-	if (mpf->mpf_feature1 != 0) {
++	if (mpf->feature1 != 0) {
+ 		if (early) {
+ 			/*
+ 			 * local APIC has default address
+@@ -621,49 +653,12 @@ static void __init __get_smp_config(unsi
+ 		}
+ 
+ 		printk(KERN_INFO "Default MP configuration #%d\n",
+-		       mpf->mpf_feature1);
+-		construct_default_ISA_mptable(mpf->mpf_feature1);
+-
+-	} else if (mpf->mpf_physptr) {
++		       mpf->feature1);
++		construct_default_ISA_mptable(mpf->feature1);
+ 
+-		/*
+-		 * Read the physical hardware table.  Anything here will
+-		 * override the defaults.
+-		 */
+-		if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) {
+-#ifdef CONFIG_X86_LOCAL_APIC
+-			smp_found_config = 0;
+-#endif
+-			printk(KERN_ERR
+-			       "BIOS bug, MP table errors detected!...\n");
+-			printk(KERN_ERR "... disabling SMP support. "
+-			       "(tell your hw vendor)\n");
+-			return;
+-		}
+-
+-		if (early)
++	} else if (mpf->physptr) {
++		if (check_physptr(mpf, early))
+ 			return;
+-#ifdef CONFIG_X86_IO_APIC
+-		/*
+-		 * If there are no explicit MP IRQ entries, then we are
+-		 * broken.  We set up most of the low 16 IO-APIC pins to
+-		 * ISA defaults and hope it will work.
+-		 */
+-		if (!mp_irq_entries) {
+-			struct mpc_bus bus;
+-
+-			printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
+-			       "using default mptable. "
+-			       "(tell your hw vendor)\n");
+-
+-			bus.type = MP_BUS;
+-			bus.busid = 0;
+-			memcpy(bus.bustype, "ISA   ", 6);
+-			MP_bus_info(&bus);
+-
+-			construct_default_ioirq_mptable(0);
+-		}
+-#endif
+ 	} else
+ 		BUG();
+ 
+@@ -684,54 +679,62 @@ void __init get_smp_config(void)
+ 	__get_smp_config(0);
+ }
+ 
++static void smp_reserve_bootmem(struct mpf_intel *mpf)
++{
++	unsigned long size = get_mpc_size(mpf->physptr);
++#ifdef CONFIG_X86_32
++	/*
++	 * We cannot access to MPC table to compute table size yet,
++	 * as only few megabytes from the bottom is mapped now.
++	 * PC-9800's MPC table places on the very last of physical
++	 * memory; so that simply reserving PAGE_SIZE from mpf->physptr
++	 * yields BUG() in reserve_bootmem.
++	 * also need to make sure physptr is below than max_low_pfn
++	 * we don't need reserve the area above max_low_pfn
++	 */
++	unsigned long end = max_low_pfn * PAGE_SIZE;
++
++	if (mpf->physptr < end) {
++		if (mpf->physptr + size > end)
++			size = end - mpf->physptr;
++		reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
++	}
++#else
++	reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
++#endif
++}
++
+ static int __init smp_scan_config(unsigned long base, unsigned long length,
+ 				  unsigned reserve)
+ {
+ 	unsigned int *bp = phys_to_virt(base);
+-	struct intel_mp_floating *mpf;
++	struct mpf_intel *mpf;
+ 
+ 	apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
+ 			bp, length);
+ 	BUILD_BUG_ON(sizeof(*mpf) != 16);
+ 
+ 	while (length > 0) {
+-		mpf = (struct intel_mp_floating *)bp;
++		mpf = (struct mpf_intel *)bp;
+ 		if ((*bp == SMP_MAGIC_IDENT) &&
+-		    (mpf->mpf_length == 1) &&
++		    (mpf->length == 1) &&
+ 		    !mpf_checksum((unsigned char *)bp, 16) &&
+-		    ((mpf->mpf_specification == 1)
+-		     || (mpf->mpf_specification == 4))) {
++		    ((mpf->specification == 1)
++		     || (mpf->specification == 4))) {
+ #ifdef CONFIG_X86_LOCAL_APIC
+ 			smp_found_config = 1;
+ #endif
+ 			mpf_found = mpf;
+ 
+-			printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
+-			       mpf, virt_to_phys(mpf));
++			printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
++			       mpf, (u64)virt_to_phys(mpf));
+ 
+ 			if (!reserve)
+ 				return 1;
+-			reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
+-					BOOTMEM_DEFAULT);
+-			if (mpf->mpf_physptr) {
+-				unsigned long size = PAGE_SIZE;
+-#ifdef CONFIG_X86_32
+-				/*
+-				 * We cannot access to MPC table to compute
+-				 * table size yet, as only few megabytes from
+-				 * the bottom is mapped now.
+-				 * PC-9800's MPC table places on the very last
+-				 * of physical memory; so that simply reserving
+-				 * PAGE_SIZE from mpg->mpf_physptr yields BUG()
+-				 * in reserve_bootmem.
+-				 */
+-				unsigned long end = max_low_pfn * PAGE_SIZE;
+-				if (mpf->mpf_physptr + size > end)
+-					size = end - mpf->mpf_physptr;
+-#endif
+-				reserve_bootmem_generic(mpf->mpf_physptr, size,
++			reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
+ 						BOOTMEM_DEFAULT);
+-			}
++			if (mpf->physptr)
++				smp_reserve_bootmem(mpf);
+ 
+ 			return 1;
+ 		}
+@@ -809,15 +812,15 @@ static int  __init get_MP_intsrc_index(s
+ 	/* not legacy */
+ 
+ 	for (i = 0; i < mp_irq_entries; i++) {
+-		if (mp_irqs[i].mp_irqtype != mp_INT)
++		if (mp_irqs[i].irqtype != mp_INT)
+ 			continue;
+ 
+-		if (mp_irqs[i].mp_irqflag != 0x0f)
++		if (mp_irqs[i].irqflag != 0x0f)
+ 			continue;
+ 
+-		if (mp_irqs[i].mp_srcbus != m->srcbus)
++		if (mp_irqs[i].srcbus != m->srcbus)
+ 			continue;
+-		if (mp_irqs[i].mp_srcbusirq != m->srcbusirq)
++		if (mp_irqs[i].srcbusirq != m->srcbusirq)
+ 			continue;
+ 		if (irq_used[i]) {
+ 			/* already claimed */
+@@ -834,7 +837,57 @@ static int  __init get_MP_intsrc_index(s
+ #define SPARE_SLOT_NUM 20
+ 
+ static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
+-#endif
++
++static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
++{
++	int i;
++
++	apic_printk(APIC_VERBOSE, "OLD ");
++	print_MP_intsrc_info(m);
++
++	i = get_MP_intsrc_index(m);
++	if (i > 0) {
++		assign_to_mpc_intsrc(&mp_irqs[i], m);
++		apic_printk(APIC_VERBOSE, "NEW ");
++		print_mp_irq_info(&mp_irqs[i]);
++		return;
++	}
++	if (!i) {
++		/* legacy, do nothing */
++		return;
++	}
++	if (*nr_m_spare < SPARE_SLOT_NUM) {
++		/*
++		 * not found (-1), or duplicated (-2) are invalid entries,
++		 * we need to use the slot later
++		 */
++		m_spare[*nr_m_spare] = m;
++		*nr_m_spare += 1;
++	}
++}
++#else /* CONFIG_X86_IO_APIC */
++static inline void check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
++#endif /* CONFIG_X86_IO_APIC */
++
++static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
++		      int count)
++{
++	if (!mpc_new_phys) {
++		pr_info("No spare slots, try to append...take your risk, "
++			"new mpc_length %x\n", count);
++	} else {
++		if (count <= mpc_new_length)
++			pr_info("No spare slots, try to append..., "
++				"new mpc_length %x\n", count);
++		else {
++			pr_err("mpc_new_length %lx is too small\n",
++				mpc_new_length);
++			return -1;
++		}
++	}
++
++	return 0;
++}
+ 
+ static int  __init replace_intsrc_all(struct mpc_table *mpc,
+ 					unsigned long mpc_new_phys,
+@@ -842,77 +895,33 @@ static int  __init replace_intsrc_all(st
+ {
+ #ifdef CONFIG_X86_IO_APIC
+ 	int i;
+-	int nr_m_spare = 0;
+ #endif
+-
+ 	int count = sizeof(*mpc);
++	int nr_m_spare = 0;
+ 	unsigned char *mpt = ((unsigned char *)mpc) + count;
+ 
+ 	printk(KERN_INFO "mpc_length %x\n", mpc->length);
+ 	while (count < mpc->length) {
+ 		switch (*mpt) {
+ 		case MP_PROCESSOR:
+-			{
+-				struct mpc_cpu *m = (struct mpc_cpu *)mpt;
+-				mpt += sizeof(*m);
+-				count += sizeof(*m);
+-				break;
+-			}
++			skip_entry(&mpt, &count, sizeof(struct mpc_cpu));
++			break;
+ 		case MP_BUS:
+-			{
+-				struct mpc_bus *m = (struct mpc_bus *)mpt;
+-				mpt += sizeof(*m);
+-				count += sizeof(*m);
+-				break;
+-			}
++			skip_entry(&mpt, &count, sizeof(struct mpc_bus));
++			break;
+ 		case MP_IOAPIC:
+-			{
+-				mpt += sizeof(struct mpc_ioapic);
+-				count += sizeof(struct mpc_ioapic);
+-				break;
+-			}
++			skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
++			break;
+ 		case MP_INTSRC:
+-			{
+-#ifdef CONFIG_X86_IO_APIC
+-				struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
+-
+-				printk(KERN_INFO "OLD ");
+-				print_MP_intsrc_info(m);
+-				i = get_MP_intsrc_index(m);
+-				if (i > 0) {
+-					assign_to_mpc_intsrc(&mp_irqs[i], m);
+-					printk(KERN_INFO "NEW ");
+-					print_mp_irq_info(&mp_irqs[i]);
+-				} else if (!i) {
+-					/* legacy, do nothing */
+-				} else if (nr_m_spare < SPARE_SLOT_NUM) {
+-					/*
+-					 * not found (-1), or duplicated (-2)
+-					 * are invalid entries,
+-					 * we need to use the slot  later
+-					 */
+-					m_spare[nr_m_spare] = m;
+-					nr_m_spare++;
+-				}
+-#endif
+-				mpt += sizeof(struct mpc_intsrc);
+-				count += sizeof(struct mpc_intsrc);
+-				break;
+-			}
++			check_irq_src((struct mpc_intsrc *)mpt, &nr_m_spare);
++			skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
++			break;
+ 		case MP_LINTSRC:
+-			{
+-				struct mpc_lintsrc *m =
+-				    (struct mpc_lintsrc *)mpt;
+-				mpt += sizeof(*m);
+-				count += sizeof(*m);
+-				break;
+-			}
++			skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc));
++			break;
+ 		default:
+ 			/* wrong mptable */
+-			printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
+-			printk(KERN_ERR "type %x\n", *mpt);
+-			print_hex_dump(KERN_ERR, "  ", DUMP_PREFIX_ADDRESS, 16,
+-					1, mpc, mpc->length, 1);
++			smp_dump_mptable(mpc, mpt);
+ 			goto out;
+ 		}
+ 	}
+@@ -922,30 +931,22 @@ static int  __init replace_intsrc_all(st
+ 		if (irq_used[i])
+ 			continue;
+ 
+-		if (mp_irqs[i].mp_irqtype != mp_INT)
++		if (mp_irqs[i].irqtype != mp_INT)
+ 			continue;
+ 
+-		if (mp_irqs[i].mp_irqflag != 0x0f)
++		if (mp_irqs[i].irqflag != 0x0f)
+ 			continue;
+ 
+ 		if (nr_m_spare > 0) {
+-			printk(KERN_INFO "*NEW* found ");
++			apic_printk(APIC_VERBOSE, "*NEW* found\n");
+ 			nr_m_spare--;
+ 			assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
+ 			m_spare[nr_m_spare] = NULL;
+ 		} else {
+ 			struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
+ 			count += sizeof(struct mpc_intsrc);
+-			if (!mpc_new_phys) {
+-				printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
+-			} else {
+-				if (count <= mpc_new_length)
+-					printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
+-				else {
+-					printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
+-					goto out;
+-				}
+-			}
++			if (!check_slot(mpc_new_phys, mpc_new_length, count))
++				goto out;
+ 			assign_to_mpc_intsrc(&mp_irqs[i], m);
+ 			mpc->length = count;
+ 			mpt += sizeof(struct mpc_intsrc);
+@@ -1001,7 +1002,7 @@ static int __init update_mp_table(void)
+ {
+ 	char str[16];
+ 	char oem[10];
+-	struct intel_mp_floating *mpf;
++	struct mpf_intel *mpf;
+ 	struct mpc_table *mpc, *mpc_new;
+ 
+ 	if (!enable_update_mptable)
+@@ -1014,19 +1015,19 @@ static int __init update_mp_table(void)
+ 	/*
+ 	 * Now see if we need to go further.
+ 	 */
+-	if (mpf->mpf_feature1 != 0)
++	if (mpf->feature1 != 0)
+ 		return 0;
+ 
+-	if (!mpf->mpf_physptr)
++	if (!mpf->physptr)
+ 		return 0;
+ 
+-	mpc = phys_to_virt(mpf->mpf_physptr);
++	mpc = phys_to_virt(mpf->physptr);
+ 
+ 	if (!smp_check_mpc(mpc, oem, str))
+ 		return 0;
+ 
+-	printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
+-	printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
++	printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf));
++	printk(KERN_INFO "physptr: %x\n", mpf->physptr);
+ 
+ 	if (mpc_new_phys && mpc->length > mpc_new_length) {
+ 		mpc_new_phys = 0;
+@@ -1047,23 +1048,23 @@ static int __init update_mp_table(void)
+ 		}
+ 		printk(KERN_INFO "use in-positon replacing\n");
+ 	} else {
+-		mpf->mpf_physptr = mpc_new_phys;
++		mpf->physptr = mpc_new_phys;
+ 		mpc_new = phys_to_virt(mpc_new_phys);
+ 		memcpy(mpc_new, mpc, mpc->length);
+ 		mpc = mpc_new;
+ 		/* check if we can modify that */
+-		if (mpc_new_phys - mpf->mpf_physptr) {
+-			struct intel_mp_floating *mpf_new;
++		if (mpc_new_phys - mpf->physptr) {
++			struct mpf_intel *mpf_new;
+ 			/* steal 16 bytes from [0, 1k) */
+ 			printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
+ 			mpf_new = phys_to_virt(0x400 - 16);
+ 			memcpy(mpf_new, mpf, 16);
+ 			mpf = mpf_new;
+-			mpf->mpf_physptr = mpc_new_phys;
++			mpf->physptr = mpc_new_phys;
+ 		}
+-		mpf->mpf_checksum = 0;
+-		mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
+-		printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
++		mpf->checksum = 0;
++		mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
++		printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
+ 	}
+ 
+ 	/*
+Index: linux-2.6-tip/arch/x86/kernel/msr.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/msr.c
++++ linux-2.6-tip/arch/x86/kernel/msr.c
+@@ -35,10 +35,10 @@
+ #include <linux/device.h>
+ #include <linux/cpu.h>
+ #include <linux/notifier.h>
++#include <linux/uaccess.h>
+ 
+ #include <asm/processor.h>
+ #include <asm/msr.h>
+-#include <asm/uaccess.h>
+ #include <asm/system.h>
+ 
+ static struct class *msr_class;
+Index: linux-2.6-tip/arch/x86/kernel/nmi.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/nmi.c
++++ /dev/null
+@@ -1,572 +0,0 @@
+-/*
+- *  NMI watchdog support on APIC systems
+- *
+- *  Started by Ingo Molnar <mingo@redhat.com>
+- *
+- *  Fixes:
+- *  Mikael Pettersson	: AMD K7 support for local APIC NMI watchdog.
+- *  Mikael Pettersson	: Power Management for local APIC NMI watchdog.
+- *  Mikael Pettersson	: Pentium 4 support for local APIC NMI watchdog.
+- *  Pavel Machek and
+- *  Mikael Pettersson	: PM converted to driver model. Disable/enable API.
+- */
+-
+-#include <asm/apic.h>
+-
+-#include <linux/nmi.h>
+-#include <linux/mm.h>
+-#include <linux/delay.h>
+-#include <linux/interrupt.h>
+-#include <linux/module.h>
+-#include <linux/sysdev.h>
+-#include <linux/sysctl.h>
+-#include <linux/percpu.h>
+-#include <linux/kprobes.h>
+-#include <linux/cpumask.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/kdebug.h>
+-#include <linux/smp.h>
+-
+-#include <asm/i8259.h>
+-#include <asm/io_apic.h>
+-#include <asm/proto.h>
+-#include <asm/timer.h>
+-
+-#include <asm/mce.h>
+-
+-#include <mach_traps.h>
+-
+-int unknown_nmi_panic;
+-int nmi_watchdog_enabled;
+-
+-static cpumask_t backtrace_mask = CPU_MASK_NONE;
+-
+-/* nmi_active:
+- * >0: the lapic NMI watchdog is active, but can be disabled
+- * <0: the lapic NMI watchdog has not been set up, and cannot
+- *     be enabled
+- *  0: the lapic NMI watchdog is disabled, but can be enabled
+- */
+-atomic_t nmi_active = ATOMIC_INIT(0);		/* oprofile uses this */
+-EXPORT_SYMBOL(nmi_active);
+-
+-unsigned int nmi_watchdog = NMI_NONE;
+-EXPORT_SYMBOL(nmi_watchdog);
+-
+-static int panic_on_timeout;
+-
+-static unsigned int nmi_hz = HZ;
+-static DEFINE_PER_CPU(short, wd_enabled);
+-static int endflag __initdata;
+-
+-static inline unsigned int get_nmi_count(int cpu)
+-{
+-#ifdef CONFIG_X86_64
+-	return cpu_pda(cpu)->__nmi_count;
+-#else
+-	return nmi_count(cpu);
+-#endif
+-}
+-
+-static inline int mce_in_progress(void)
+-{
+-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
+-	return atomic_read(&mce_entry) > 0;
+-#endif
+-	return 0;
+-}
+-
+-/*
+- * Take the local apic timer and PIT/HPET into account. We don't
+- * know which one is active, when we have highres/dyntick on
+- */
+-static inline unsigned int get_timer_irqs(int cpu)
+-{
+-#ifdef CONFIG_X86_64
+-	return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
+-#else
+-	return per_cpu(irq_stat, cpu).apic_timer_irqs +
+-		per_cpu(irq_stat, cpu).irq0_irqs;
+-#endif
+-}
+-
+-#ifdef CONFIG_SMP
+-/*
+- * The performance counters used by NMI_LOCAL_APIC don't trigger when
+- * the CPU is idle. To make sure the NMI watchdog really ticks on all
+- * CPUs during the test make them busy.
+- */
+-static __init void nmi_cpu_busy(void *data)
+-{
+-	local_irq_enable_in_hardirq();
+-	/*
+-	 * Intentionally don't use cpu_relax here. This is
+-	 * to make sure that the performance counter really ticks,
+-	 * even if there is a simulator or similar that catches the
+-	 * pause instruction. On a real HT machine this is fine because
+-	 * all other CPUs are busy with "useless" delay loops and don't
+-	 * care if they get somewhat less cycles.
+-	 */
+-	while (endflag == 0)
+-		mb();
+-}
+-#endif
+-
+-static void report_broken_nmi(int cpu, int *prev_nmi_count)
+-{
+-	printk(KERN_CONT "\n");
+-
+-	printk(KERN_WARNING
+-		"WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n",
+-			cpu, prev_nmi_count[cpu], get_nmi_count(cpu));
+-
+-	printk(KERN_WARNING
+-		"Please report this to bugzilla.kernel.org,\n");
+-	printk(KERN_WARNING
+-		"and attach the output of the 'dmesg' command.\n");
+-
+-	per_cpu(wd_enabled, cpu) = 0;
+-	atomic_dec(&nmi_active);
+-}
+-
+-static void __acpi_nmi_disable(void *__unused)
+-{
+-	apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED);
+-}
+-
+-int __init check_nmi_watchdog(void)
+-{
+-	unsigned int *prev_nmi_count;
+-	int cpu;
+-
+-	if (!nmi_watchdog_active() || !atomic_read(&nmi_active))
+-		return 0;
+-
+-	prev_nmi_count = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
+-	if (!prev_nmi_count)
+-		goto error;
+-
+-	printk(KERN_INFO "Testing NMI watchdog ... ");
+-
+-#ifdef CONFIG_SMP
+-	if (nmi_watchdog == NMI_LOCAL_APIC)
+-		smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
+-#endif
+-
+-	for_each_possible_cpu(cpu)
+-		prev_nmi_count[cpu] = get_nmi_count(cpu);
+-	local_irq_enable();
+-	mdelay((20 * 1000) / nmi_hz); /* wait 20 ticks */
+-
+-	for_each_online_cpu(cpu) {
+-		if (!per_cpu(wd_enabled, cpu))
+-			continue;
+-		if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5)
+-			report_broken_nmi(cpu, prev_nmi_count);
+-	}
+-	endflag = 1;
+-	if (!atomic_read(&nmi_active)) {
+-		kfree(prev_nmi_count);
+-		atomic_set(&nmi_active, -1);
+-		goto error;
+-	}
+-	printk("OK.\n");
+-
+-	/*
+-	 * now that we know it works we can reduce NMI frequency to
+-	 * something more reasonable; makes a difference in some configs
+-	 */
+-	if (nmi_watchdog == NMI_LOCAL_APIC)
+-		nmi_hz = lapic_adjust_nmi_hz(1);
+-
+-	kfree(prev_nmi_count);
+-	return 0;
+-error:
+-	if (nmi_watchdog == NMI_IO_APIC) {
+-		if (!timer_through_8259)
+-			disable_8259A_irq(0);
+-		on_each_cpu(__acpi_nmi_disable, NULL, 1);
+-	}
+-
+-#ifdef CONFIG_X86_32
+-	timer_ack = 0;
+-#endif
+-	return -1;
+-}
+-
+-static int __init setup_nmi_watchdog(char *str)
+-{
+-	unsigned int nmi;
+-
+-	if (!strncmp(str, "panic", 5)) {
+-		panic_on_timeout = 1;
+-		str = strchr(str, ',');
+-		if (!str)
+-			return 1;
+-		++str;
+-	}
+-
+-	if (!strncmp(str, "lapic", 5))
+-		nmi_watchdog = NMI_LOCAL_APIC;
+-	else if (!strncmp(str, "ioapic", 6))
+-		nmi_watchdog = NMI_IO_APIC;
+-	else {
+-		get_option(&str, &nmi);
+-		if (nmi >= NMI_INVALID)
+-			return 0;
+-		nmi_watchdog = nmi;
+-	}
+-
+-	return 1;
+-}
+-__setup("nmi_watchdog=", setup_nmi_watchdog);
+-
+-/*
+- * Suspend/resume support
+- */
+-#ifdef CONFIG_PM
+-
+-static int nmi_pm_active; /* nmi_active before suspend */
+-
+-static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
+-{
+-	/* only CPU0 goes here, other CPUs should be offline */
+-	nmi_pm_active = atomic_read(&nmi_active);
+-	stop_apic_nmi_watchdog(NULL);
+-	BUG_ON(atomic_read(&nmi_active) != 0);
+-	return 0;
+-}
+-
+-static int lapic_nmi_resume(struct sys_device *dev)
+-{
+-	/* only CPU0 goes here, other CPUs should be offline */
+-	if (nmi_pm_active > 0) {
+-		setup_apic_nmi_watchdog(NULL);
+-		touch_nmi_watchdog();
+-	}
+-	return 0;
+-}
+-
+-static struct sysdev_class nmi_sysclass = {
+-	.name		= "lapic_nmi",
+-	.resume		= lapic_nmi_resume,
+-	.suspend	= lapic_nmi_suspend,
+-};
+-
+-static struct sys_device device_lapic_nmi = {
+-	.id	= 0,
+-	.cls	= &nmi_sysclass,
+-};
+-
+-static int __init init_lapic_nmi_sysfs(void)
+-{
+-	int error;
+-
+-	/*
+-	 * should really be a BUG_ON but b/c this is an
+-	 * init call, it just doesn't work.  -dcz
+-	 */
+-	if (nmi_watchdog != NMI_LOCAL_APIC)
+-		return 0;
+-
+-	if (atomic_read(&nmi_active) < 0)
+-		return 0;
+-
+-	error = sysdev_class_register(&nmi_sysclass);
+-	if (!error)
+-		error = sysdev_register(&device_lapic_nmi);
+-	return error;
+-}
+-
+-/* must come after the local APIC's device_initcall() */
+-late_initcall(init_lapic_nmi_sysfs);
+-
+-#endif	/* CONFIG_PM */
+-
+-static void __acpi_nmi_enable(void *__unused)
+-{
+-	apic_write(APIC_LVT0, APIC_DM_NMI);
+-}
+-
+-/*
+- * Enable timer based NMIs on all CPUs:
+- */
+-void acpi_nmi_enable(void)
+-{
+-	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+-		on_each_cpu(__acpi_nmi_enable, NULL, 1);
+-}
+-
+-/*
+- * Disable timer based NMIs on all CPUs:
+- */
+-void acpi_nmi_disable(void)
+-{
+-	if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
+-		on_each_cpu(__acpi_nmi_disable, NULL, 1);
+-}
+-
+-/*
+- * This function is called as soon the LAPIC NMI watchdog driver has everything
+- * in place and it's ready to check if the NMIs belong to the NMI watchdog
+- */
+-void cpu_nmi_set_wd_enabled(void)
+-{
+-	__get_cpu_var(wd_enabled) = 1;
+-}
+-
+-void setup_apic_nmi_watchdog(void *unused)
+-{
+-	if (__get_cpu_var(wd_enabled))
+-		return;
+-
+-	/* cheap hack to support suspend/resume */
+-	/* if cpu0 is not active neither should the other cpus */
+-	if (smp_processor_id() != 0 && atomic_read(&nmi_active) <= 0)
+-		return;
+-
+-	switch (nmi_watchdog) {
+-	case NMI_LOCAL_APIC:
+-		if (lapic_watchdog_init(nmi_hz) < 0) {
+-			__get_cpu_var(wd_enabled) = 0;
+-			return;
+-		}
+-		/* FALL THROUGH */
+-	case NMI_IO_APIC:
+-		__get_cpu_var(wd_enabled) = 1;
+-		atomic_inc(&nmi_active);
+-	}
+-}
+-
+-void stop_apic_nmi_watchdog(void *unused)
+-{
+-	/* only support LOCAL and IO APICs for now */
+-	if (!nmi_watchdog_active())
+-		return;
+-	if (__get_cpu_var(wd_enabled) == 0)
+-		return;
+-	if (nmi_watchdog == NMI_LOCAL_APIC)
+-		lapic_watchdog_stop();
+-	else
+-		__acpi_nmi_disable(NULL);
+-	__get_cpu_var(wd_enabled) = 0;
+-	atomic_dec(&nmi_active);
+-}
+-
+-/*
+- * the best way to detect whether a CPU has a 'hard lockup' problem
+- * is to check it's local APIC timer IRQ counts. If they are not
+- * changing then that CPU has some problem.
+- *
+- * as these watchdog NMI IRQs are generated on every CPU, we only
+- * have to check the current processor.
+- *
+- * since NMIs don't listen to _any_ locks, we have to be extremely
+- * careful not to rely on unsafe variables. The printk might lock
+- * up though, so we have to break up any console locks first ...
+- * [when there will be more tty-related locks, break them up here too!]
+- */
+-
+-static DEFINE_PER_CPU(unsigned, last_irq_sum);
+-static DEFINE_PER_CPU(local_t, alert_counter);
+-static DEFINE_PER_CPU(int, nmi_touch);
+-
+-void touch_nmi_watchdog(void)
+-{
+-	if (nmi_watchdog_active()) {
+-		unsigned cpu;
+-
+-		/*
+-		 * Tell other CPUs to reset their alert counters. We cannot
+-		 * do it ourselves because the alert count increase is not
+-		 * atomic.
+-		 */
+-		for_each_present_cpu(cpu) {
+-			if (per_cpu(nmi_touch, cpu) != 1)
+-				per_cpu(nmi_touch, cpu) = 1;
+-		}
+-	}
+-
+-	/*
+-	 * Tickle the softlockup detector too:
+-	 */
+-	touch_softlockup_watchdog();
+-}
+-EXPORT_SYMBOL(touch_nmi_watchdog);
+-
+-notrace __kprobes int
+-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
+-{
+-	/*
+-	 * Since current_thread_info()-> is always on the stack, and we
+-	 * always switch the stack NMI-atomically, it's safe to use
+-	 * smp_processor_id().
+-	 */
+-	unsigned int sum;
+-	int touched = 0;
+-	int cpu = smp_processor_id();
+-	int rc = 0;
+-
+-	/* check for other users first */
+-	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+-			== NOTIFY_STOP) {
+-		rc = 1;
+-		touched = 1;
+-	}
+-
+-	sum = get_timer_irqs(cpu);
+-
+-	if (__get_cpu_var(nmi_touch)) {
+-		__get_cpu_var(nmi_touch) = 0;
+-		touched = 1;
+-	}
+-
+-	if (cpu_isset(cpu, backtrace_mask)) {
+-		static DEFINE_SPINLOCK(lock);	/* Serialise the printks */
+-
+-		spin_lock(&lock);
+-		printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
+-		dump_stack();
+-		spin_unlock(&lock);
+-		cpu_clear(cpu, backtrace_mask);
+-	}
+-
+-	/* Could check oops_in_progress here too, but it's safer not to */
+-	if (mce_in_progress())
+-		touched = 1;
+-
+-	/* if the none of the timers isn't firing, this cpu isn't doing much */
+-	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
+-		/*
+-		 * Ayiee, looks like this CPU is stuck ...
+-		 * wait a few IRQs (5 seconds) before doing the oops ...
+-		 */
+-		local_inc(&__get_cpu_var(alert_counter));
+-		if (local_read(&__get_cpu_var(alert_counter)) == 5 * nmi_hz)
+-			/*
+-			 * die_nmi will return ONLY if NOTIFY_STOP happens..
+-			 */
+-			die_nmi("BUG: NMI Watchdog detected LOCKUP",
+-				regs, panic_on_timeout);
+-	} else {
+-		__get_cpu_var(last_irq_sum) = sum;
+-		local_set(&__get_cpu_var(alert_counter), 0);
+-	}
+-
+-	/* see if the nmi watchdog went off */
+-	if (!__get_cpu_var(wd_enabled))
+-		return rc;
+-	switch (nmi_watchdog) {
+-	case NMI_LOCAL_APIC:
+-		rc |= lapic_wd_event(nmi_hz);
+-		break;
+-	case NMI_IO_APIC:
+-		/*
+-		 * don't know how to accurately check for this.
+-		 * just assume it was a watchdog timer interrupt
+-		 * This matches the old behaviour.
+-		 */
+-		rc = 1;
+-		break;
+-	}
+-	return rc;
+-}
+-
+-#ifdef CONFIG_SYSCTL
+-
+-static void enable_ioapic_nmi_watchdog_single(void *unused)
+-{
+-	__get_cpu_var(wd_enabled) = 1;
+-	atomic_inc(&nmi_active);
+-	__acpi_nmi_enable(NULL);
+-}
+-
+-static void enable_ioapic_nmi_watchdog(void)
+-{
+-	on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1);
+-	touch_nmi_watchdog();
+-}
+-
+-static void disable_ioapic_nmi_watchdog(void)
+-{
+-	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
+-}
+-
+-static int __init setup_unknown_nmi_panic(char *str)
+-{
+-	unknown_nmi_panic = 1;
+-	return 1;
+-}
+-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+-
+-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
+-{
+-	unsigned char reason = get_nmi_reason();
+-	char buf[64];
+-
+-	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
+-	die_nmi(buf, regs, 1); /* Always panic here */
+-	return 0;
+-}
+-
+-/*
+- * proc handler for /proc/sys/kernel/nmi
+- */
+-int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
+-			void __user *buffer, size_t *length, loff_t *ppos)
+-{
+-	int old_state;
+-
+-	nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
+-	old_state = nmi_watchdog_enabled;
+-	proc_dointvec(table, write, file, buffer, length, ppos);
+-	if (!!old_state == !!nmi_watchdog_enabled)
+-		return 0;
+-
+-	if (atomic_read(&nmi_active) < 0 || !nmi_watchdog_active()) {
+-		printk(KERN_WARNING
+-			"NMI watchdog is permanently disabled\n");
+-		return -EIO;
+-	}
+-
+-	if (nmi_watchdog == NMI_LOCAL_APIC) {
+-		if (nmi_watchdog_enabled)
+-			enable_lapic_nmi_watchdog();
+-		else
+-			disable_lapic_nmi_watchdog();
+-	} else if (nmi_watchdog == NMI_IO_APIC) {
+-		if (nmi_watchdog_enabled)
+-			enable_ioapic_nmi_watchdog();
+-		else
+-			disable_ioapic_nmi_watchdog();
+-	} else {
+-		printk(KERN_WARNING
+-			"NMI watchdog doesn't know what hardware to touch\n");
+-		return -EIO;
+-	}
+-	return 0;
+-}
+-
+-#endif /* CONFIG_SYSCTL */
+-
+-int do_nmi_callback(struct pt_regs *regs, int cpu)
+-{
+-#ifdef CONFIG_SYSCTL
+-	if (unknown_nmi_panic)
+-		return unknown_nmi_panic_callback(regs, cpu);
+-#endif
+-	return 0;
+-}
+-
+-void __trigger_all_cpu_backtrace(void)
+-{
+-	int i;
+-
+-	backtrace_mask = cpu_online_map;
+-	/* Wait for up to 10 seconds for all CPUs to do the backtrace */
+-	for (i = 0; i < 10 * 1000; i++) {
+-		if (cpus_empty(backtrace_mask))
+-			break;
+-		mdelay(1);
+-	}
+-}
+Index: linux-2.6-tip/arch/x86/kernel/numaq_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/numaq_32.c
++++ /dev/null
+@@ -1,293 +0,0 @@
+-/*
+- * Written by: Patricia Gaughen, IBM Corporation
+- *
+- * Copyright (C) 2002, IBM Corp.
+- *
+- * All rights reserved.          
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2 of the License, or
+- * (at your option) any later version.
+- *
+- * This program is distributed in the hope that it will be useful, but
+- * WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+- * NON INFRINGEMENT.  See the GNU General Public License for more
+- * details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+- *
+- * Send feedback to <gone@us.ibm.com>
+- */
+-
+-#include <linux/mm.h>
+-#include <linux/bootmem.h>
+-#include <linux/mmzone.h>
+-#include <linux/module.h>
+-#include <linux/nodemask.h>
+-#include <asm/numaq.h>
+-#include <asm/topology.h>
+-#include <asm/processor.h>
+-#include <asm/genapic.h>
+-#include <asm/e820.h>
+-#include <asm/setup.h>
+-
+-#define	MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
+-
+-/*
+- * Function: smp_dump_qct()
+- *
+- * Description: gets memory layout from the quad config table.  This
+- * function also updates node_online_map with the nodes (quads) present.
+- */
+-static void __init smp_dump_qct(void)
+-{
+-	int node;
+-	struct eachquadmem *eq;
+-	struct sys_cfg_data *scd =
+-		(struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR);
+-
+-	nodes_clear(node_online_map);
+-	for_each_node(node) {
+-		if (scd->quads_present31_0 & (1 << node)) {
+-			node_set_online(node);
+-			eq = &scd->eq[node];
+-			/* Convert to pages */
+-			node_start_pfn[node] = MB_TO_PAGES(
+-				eq->hi_shrd_mem_start - eq->priv_mem_size);
+-			node_end_pfn[node] = MB_TO_PAGES(
+-				eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
+-
+-			e820_register_active_regions(node, node_start_pfn[node],
+-							node_end_pfn[node]);
+-			memory_present(node,
+-				node_start_pfn[node], node_end_pfn[node]);
+-			node_remap_size[node] = node_memmap_size_bytes(node,
+-							node_start_pfn[node],
+-							node_end_pfn[node]);
+-		}
+-	}
+-}
+-
+-
+-void __cpuinit numaq_tsc_disable(void)
+-{
+-	if (!found_numaq)
+-		return;
+-
+-	if (num_online_nodes() > 1) {
+-		printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
+-		setup_clear_cpu_cap(X86_FEATURE_TSC);
+-	}
+-}
+-
+-static int __init numaq_pre_time_init(void)
+-{
+-	numaq_tsc_disable();
+-	return 0;
+-}
+-
+-int found_numaq;
+-/*
+- * Have to match translation table entries to main table entries by counter
+- * hence the mpc_record variable .... can't see a less disgusting way of
+- * doing this ....
+- */
+-struct mpc_config_translation {
+-	unsigned char mpc_type;
+-	unsigned char trans_len;
+-	unsigned char trans_type;
+-	unsigned char trans_quad;
+-	unsigned char trans_global;
+-	unsigned char trans_local;
+-	unsigned short trans_reserved;
+-};
+-
+-/* x86_quirks member */
+-static int mpc_record;
+-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
+-    __cpuinitdata;
+-
+-static inline int generate_logical_apicid(int quad, int phys_apicid)
+-{
+-	return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
+-}
+-
+-/* x86_quirks member */
+-static int mpc_apic_id(struct mpc_cpu *m)
+-{
+-	int quad = translation_table[mpc_record]->trans_quad;
+-	int logical_apicid = generate_logical_apicid(quad, m->apicid);
+-
+-	printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
+-	       m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
+-	       (m->cpufeature & CPU_MODEL_MASK) >> 4,
+-	       m->apicver, quad, logical_apicid);
+-	return logical_apicid;
+-}
+-
+-int mp_bus_id_to_node[MAX_MP_BUSSES];
+-
+-int mp_bus_id_to_local[MAX_MP_BUSSES];
+-
+-/* x86_quirks member */
+-static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
+-{
+-	int quad = translation_table[mpc_record]->trans_quad;
+-	int local = translation_table[mpc_record]->trans_local;
+-
+-	mp_bus_id_to_node[m->busid] = quad;
+-	mp_bus_id_to_local[m->busid] = local;
+-	printk(KERN_INFO "Bus #%d is %s (node %d)\n",
+-	       m->busid, name, quad);
+-}
+-
+-int quad_local_to_mp_bus_id [NR_CPUS/4][4];
+-
+-/* x86_quirks member */
+-static void mpc_oem_pci_bus(struct mpc_bus *m)
+-{
+-	int quad = translation_table[mpc_record]->trans_quad;
+-	int local = translation_table[mpc_record]->trans_local;
+-
+-	quad_local_to_mp_bus_id[quad][local] = m->busid;
+-}
+-
+-static void __init MP_translation_info(struct mpc_config_translation *m)
+-{
+-	printk(KERN_INFO
+-	       "Translation: record %d, type %d, quad %d, global %d, local %d\n",
+-	       mpc_record, m->trans_type, m->trans_quad, m->trans_global,
+-	       m->trans_local);
+-
+-	if (mpc_record >= MAX_MPC_ENTRY)
+-		printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
+-	else
+-		translation_table[mpc_record] = m;	/* stash this for later */
+-	if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
+-		node_set_online(m->trans_quad);
+-}
+-
+-static int __init mpf_checksum(unsigned char *mp, int len)
+-{
+-	int sum = 0;
+-
+-	while (len--)
+-		sum += *mp++;
+-
+-	return sum & 0xFF;
+-}
+-
+-/*
+- * Read/parse the MPC oem tables
+- */
+-
+-static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable,
+-				    unsigned short oemsize)
+-{
+-	int count = sizeof(*oemtable);	/* the header size */
+-	unsigned char *oemptr = ((unsigned char *)oemtable) + count;
+-
+-	mpc_record = 0;
+-	printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
+-	       oemtable);
+-	if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
+-		printk(KERN_WARNING
+-		       "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
+-		       oemtable->signature[0], oemtable->signature[1],
+-		       oemtable->signature[2], oemtable->signature[3]);
+-		return;
+-	}
+-	if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
+-		printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
+-		return;
+-	}
+-	while (count < oemtable->length) {
+-		switch (*oemptr) {
+-		case MP_TRANSLATION:
+-			{
+-				struct mpc_config_translation *m =
+-				    (struct mpc_config_translation *)oemptr;
+-				MP_translation_info(m);
+-				oemptr += sizeof(*m);
+-				count += sizeof(*m);
+-				++mpc_record;
+-				break;
+-			}
+-		default:
+-			{
+-				printk(KERN_WARNING
+-				       "Unrecognised OEM table entry type! - %d\n",
+-				       (int)*oemptr);
+-				return;
+-			}
+-		}
+-	}
+-}
+-
+-static int __init numaq_setup_ioapic_ids(void)
+-{
+-	/* so can skip it */
+-	return 1;
+-}
+-
+-static int __init numaq_update_genapic(void)
+-{
+-	genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
+-
+-	return 0;
+-}
+-
+-static struct x86_quirks numaq_x86_quirks __initdata = {
+-	.arch_pre_time_init	= numaq_pre_time_init,
+-	.arch_time_init		= NULL,
+-	.arch_pre_intr_init	= NULL,
+-	.arch_memory_setup	= NULL,
+-	.arch_intr_init		= NULL,
+-	.arch_trap_init		= NULL,
+-	.mach_get_smp_config	= NULL,
+-	.mach_find_smp_config	= NULL,
+-	.mpc_record		= &mpc_record,
+-	.mpc_apic_id		= mpc_apic_id,
+-	.mpc_oem_bus_info	= mpc_oem_bus_info,
+-	.mpc_oem_pci_bus	= mpc_oem_pci_bus,
+-	.smp_read_mpc_oem	= smp_read_mpc_oem,
+-	.setup_ioapic_ids	= numaq_setup_ioapic_ids,
+-	.update_genapic		= numaq_update_genapic,
+-};
+-
+-void numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+-{
+-	if (strncmp(oem, "IBM NUMA", 8))
+-		printk("Warning!  Not a NUMA-Q system!\n");
+-	else
+-		found_numaq = 1;
+-}
+-
+-static __init void early_check_numaq(void)
+-{
+-	/*
+-	 * Find possible boot-time SMP configuration:
+-	 */
+-	early_find_smp_config();
+-	/*
+-	 * get boot-time SMP configuration:
+-	 */
+-	if (smp_found_config)
+-		early_get_smp_config();
+-
+-	if (found_numaq)
+-		x86_quirks = &numaq_x86_quirks;
+-}
+-
+-int __init get_memcfg_numaq(void)
+-{
+-	early_check_numaq();
+-	if (!found_numaq)
+-		return 0;
+-	smp_dump_qct();
+-	return 1;
+-}
+Index: linux-2.6-tip/arch/x86/kernel/paravirt-spinlocks.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/paravirt-spinlocks.c
++++ linux-2.6-tip/arch/x86/kernel/paravirt-spinlocks.c
+@@ -8,7 +8,7 @@
+ #include <asm/paravirt.h>
+ 
+ static inline void
+-default_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
++default_spin_lock_flags(__raw_spinlock_t *lock, unsigned long flags)
+ {
+ 	__raw_spin_lock(lock);
+ }
+@@ -26,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = {
+ };
+ EXPORT_SYMBOL(pv_lock_ops);
+ 
+-void __init paravirt_use_bytelocks(void)
+-{
+-#ifdef CONFIG_SMP
+-	pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
+-	pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
+-	pv_lock_ops.spin_lock = __byte_spin_lock;
+-	pv_lock_ops.spin_trylock = __byte_spin_trylock;
+-	pv_lock_ops.spin_unlock = __byte_spin_unlock;
+-#endif
+-}
+Index: linux-2.6-tip/arch/x86/kernel/paravirt.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/paravirt.c
++++ linux-2.6-tip/arch/x86/kernel/paravirt.c
+@@ -28,7 +28,6 @@
+ #include <asm/paravirt.h>
+ #include <asm/desc.h>
+ #include <asm/setup.h>
+-#include <asm/arch_hooks.h>
+ #include <asm/pgtable.h>
+ #include <asm/time.h>
+ #include <asm/pgalloc.h>
+@@ -44,6 +43,17 @@ void _paravirt_nop(void)
+ {
+ }
+ 
++/* identity function, which can be inlined */
++u32 _paravirt_ident_32(u32 x)
++{
++	return x;
++}
++
++u64 _paravirt_ident_64(u64 x)
++{
++	return x;
++}
++
+ static void __init default_banner(void)
+ {
+ 	printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
+@@ -138,9 +148,16 @@ unsigned paravirt_patch_default(u8 type,
+ 	if (opfunc == NULL)
+ 		/* If there's no function, patch it with a ud2a (BUG) */
+ 		ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
+-	else if (opfunc == paravirt_nop)
++	else if (opfunc == _paravirt_nop)
+ 		/* If the operation is a nop, then nop the callsite */
+ 		ret = paravirt_patch_nop();
++
++	/* identity functions just return their single argument */
++	else if (opfunc == _paravirt_ident_32)
++		ret = paravirt_patch_ident_32(insnbuf, len);
++	else if (opfunc == _paravirt_ident_64)
++		ret = paravirt_patch_ident_64(insnbuf, len);
++
+ 	else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
+ 		 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
+ 		 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
+@@ -318,10 +335,10 @@ struct pv_time_ops pv_time_ops = {
+ 
+ struct pv_irq_ops pv_irq_ops = {
+ 	.init_IRQ = native_init_IRQ,
+-	.save_fl = native_save_fl,
+-	.restore_fl = native_restore_fl,
+-	.irq_disable = native_irq_disable,
+-	.irq_enable = native_irq_enable,
++	.save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
++	.restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
++	.irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
++	.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
+ 	.safe_halt = native_safe_halt,
+ 	.halt = native_halt,
+ #ifdef CONFIG_X86_64
+@@ -399,6 +416,28 @@ struct pv_apic_ops pv_apic_ops = {
+ #endif
+ };
+ 
++#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
++/* 32-bit pagetable entries */
++#define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_32)
++#else
++/* 64-bit pagetable entries */
++#define PTE_IDENT	__PV_IS_CALLEE_SAVE(_paravirt_ident_64)
++#endif
++
++#ifdef CONFIG_HIGHPTE
++/*
++ * kmap_atomic() might be an inline or a macro:
++ */
++static void *kmap_atomic_func(struct page *page, enum km_type idx)
++{
++	return kmap_atomic(page, idx);
++}
++static void *kmap_atomic_direct_func(struct page *page, enum km_type idx)
++{
++	return kmap_atomic_direct(page, idx);
++}
++#endif
++
+ struct pv_mmu_ops pv_mmu_ops = {
+ #ifndef CONFIG_X86_64
+ 	.pagetable_setup_start = native_pagetable_setup_start,
+@@ -439,33 +478,34 @@ struct pv_mmu_ops pv_mmu_ops = {
+ 	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
+ 
+ #ifdef CONFIG_HIGHPTE
+-	.kmap_atomic_pte = kmap_atomic,
++	.kmap_atomic_pte = kmap_atomic_func,
++	.kmap_atomic_pte_direct = kmap_atomic_direct_func,
+ #endif
+ 
+ #if PAGETABLE_LEVELS >= 3
+ #ifdef CONFIG_X86_PAE
+ 	.set_pte_atomic = native_set_pte_atomic,
+-	.set_pte_present = native_set_pte_present,
+ 	.pte_clear = native_pte_clear,
+ 	.pmd_clear = native_pmd_clear,
+ #endif
+ 	.set_pud = native_set_pud,
+-	.pmd_val = native_pmd_val,
+-	.make_pmd = native_make_pmd,
++
++	.pmd_val = PTE_IDENT,
++	.make_pmd = PTE_IDENT,
+ 
+ #if PAGETABLE_LEVELS == 4
+-	.pud_val = native_pud_val,
+-	.make_pud = native_make_pud,
++	.pud_val = PTE_IDENT,
++	.make_pud = PTE_IDENT,
++
+ 	.set_pgd = native_set_pgd,
+ #endif
+ #endif /* PAGETABLE_LEVELS >= 3 */
+ 
+-	.pte_val = native_pte_val,
+-	.pte_flags = native_pte_flags,
+-	.pgd_val = native_pgd_val,
++	.pte_val = PTE_IDENT,
++	.pgd_val = PTE_IDENT,
+ 
+-	.make_pte = native_make_pte,
+-	.make_pgd = native_make_pgd,
++	.make_pte = PTE_IDENT,
++	.make_pgd = PTE_IDENT,
+ 
+ 	.dup_mmap = paravirt_nop,
+ 	.exit_mmap = paravirt_nop,
+Index: linux-2.6-tip/arch/x86/kernel/paravirt_patch_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/paravirt_patch_32.c
++++ linux-2.6-tip/arch/x86/kernel/paravirt_patch_32.c
+@@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %c
+ DEF_NATIVE(pv_cpu_ops, clts, "clts");
+ DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
+ 
++unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
++{
++	/* arg in %eax, return in %eax */
++	return 0;
++}
++
++unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
++{
++	/* arg in %edx:%eax, return in %edx:%eax */
++	return 0;
++}
++
+ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+ 		      unsigned long addr, unsigned len)
+ {
+Index: linux-2.6-tip/arch/x86/kernel/paravirt_patch_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/paravirt_patch_64.c
++++ linux-2.6-tip/arch/x86/kernel/paravirt_patch_64.c
+@@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, 
+ DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
+ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
+ 
++DEF_NATIVE(, mov32, "mov %edi, %eax");
++DEF_NATIVE(, mov64, "mov %rdi, %rax");
++
++unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
++{
++	return paravirt_patch_insns(insnbuf, len,
++				    start__mov32, end__mov32);
++}
++
++unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
++{
++	return paravirt_patch_insns(insnbuf, len,
++				    start__mov64, end__mov64);
++}
++
+ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
+ 		      unsigned long addr, unsigned len)
+ {
+Index: linux-2.6-tip/arch/x86/kernel/pci-calgary_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/pci-calgary_64.c
++++ linux-2.6-tip/arch/x86/kernel/pci-calgary_64.c
+@@ -380,8 +380,9 @@ static inline struct iommu_table *find_i
+ 	return tbl;
+ }
+ 
+-static void calgary_unmap_sg(struct device *dev,
+-	struct scatterlist *sglist, int nelems, int direction)
++static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
++			     int nelems,enum dma_data_direction dir,
++			     struct dma_attrs *attrs)
+ {
+ 	struct iommu_table *tbl = find_iommu_table(dev);
+ 	struct scatterlist *s;
+@@ -404,7 +405,8 @@ static void calgary_unmap_sg(struct devi
+ }
+ 
+ static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
+-	int nelems, int direction)
++			  int nelems, enum dma_data_direction dir,
++			  struct dma_attrs *attrs)
+ {
+ 	struct iommu_table *tbl = find_iommu_table(dev);
+ 	struct scatterlist *s;
+@@ -429,15 +431,14 @@ static int calgary_map_sg(struct device 
+ 		s->dma_address = (entry << PAGE_SHIFT) | s->offset;
+ 
+ 		/* insert into HW table */
+-		tce_build(tbl, entry, npages, vaddr & PAGE_MASK,
+-			  direction);
++		tce_build(tbl, entry, npages, vaddr & PAGE_MASK, dir);
+ 
+ 		s->dma_length = s->length;
+ 	}
+ 
+ 	return nelems;
+ error:
+-	calgary_unmap_sg(dev, sg, nelems, direction);
++	calgary_unmap_sg(dev, sg, nelems, dir, NULL);
+ 	for_each_sg(sg, s, nelems, i) {
+ 		sg->dma_address = bad_dma_address;
+ 		sg->dma_length = 0;
+@@ -445,10 +446,12 @@ error:
+ 	return 0;
+ }
+ 
+-static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr,
+-	size_t size, int direction)
++static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
++				   unsigned long offset, size_t size,
++				   enum dma_data_direction dir,
++				   struct dma_attrs *attrs)
+ {
+-	void *vaddr = phys_to_virt(paddr);
++	void *vaddr = page_address(page) + offset;
+ 	unsigned long uaddr;
+ 	unsigned int npages;
+ 	struct iommu_table *tbl = find_iommu_table(dev);
+@@ -456,17 +459,18 @@ static dma_addr_t calgary_map_single(str
+ 	uaddr = (unsigned long)vaddr;
+ 	npages = iommu_num_pages(uaddr, size, PAGE_SIZE);
+ 
+-	return iommu_alloc(dev, tbl, vaddr, npages, direction);
++	return iommu_alloc(dev, tbl, vaddr, npages, dir);
+ }
+ 
+-static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle,
+-	size_t size, int direction)
++static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
++			       size_t size, enum dma_data_direction dir,
++			       struct dma_attrs *attrs)
+ {
+ 	struct iommu_table *tbl = find_iommu_table(dev);
+ 	unsigned int npages;
+ 
+-	npages = iommu_num_pages(dma_handle, size, PAGE_SIZE);
+-	iommu_free(tbl, dma_handle, npages);
++	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
++	iommu_free(tbl, dma_addr, npages);
+ }
+ 
+ static void* calgary_alloc_coherent(struct device *dev, size_t size,
+@@ -515,13 +519,13 @@ static void calgary_free_coherent(struct
+ 	free_pages((unsigned long)vaddr, get_order(size));
+ }
+ 
+-static struct dma_mapping_ops calgary_dma_ops = {
++static struct dma_map_ops calgary_dma_ops = {
+ 	.alloc_coherent = calgary_alloc_coherent,
+ 	.free_coherent = calgary_free_coherent,
+-	.map_single = calgary_map_single,
+-	.unmap_single = calgary_unmap_single,
+ 	.map_sg = calgary_map_sg,
+ 	.unmap_sg = calgary_unmap_sg,
++	.map_page = calgary_map_page,
++	.unmap_page = calgary_unmap_page,
+ };
+ 
+ static inline void __iomem * busno_to_bbar(unsigned char num)
+Index: linux-2.6-tip/arch/x86/kernel/pci-dma.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/pci-dma.c
++++ linux-2.6-tip/arch/x86/kernel/pci-dma.c
+@@ -1,4 +1,5 @@
+ #include <linux/dma-mapping.h>
++#include <linux/dma-debug.h>
+ #include <linux/dmar.h>
+ #include <linux/bootmem.h>
+ #include <linux/pci.h>
+@@ -12,7 +13,7 @@
+ 
+ static int forbid_dac __read_mostly;
+ 
+-struct dma_mapping_ops *dma_ops;
++struct dma_map_ops *dma_ops;
+ EXPORT_SYMBOL(dma_ops);
+ 
+ static int iommu_sac_force __read_mostly;
+@@ -44,6 +45,9 @@ struct device x86_dma_fallback_dev = {
+ };
+ EXPORT_SYMBOL(x86_dma_fallback_dev);
+ 
++/* Number of entries preallocated for DMA-API debugging */
++#define PREALLOC_DMA_DEBUG_ENTRIES       32768
++
+ int dma_set_mask(struct device *dev, u64 mask)
+ {
+ 	if (!dev->dma_mask || !dma_supported(dev, mask))
+@@ -224,7 +228,7 @@ early_param("iommu", iommu_setup);
+ 
+ int dma_supported(struct device *dev, u64 mask)
+ {
+-	struct dma_mapping_ops *ops = get_dma_ops(dev);
++	struct dma_map_ops *ops = get_dma_ops(dev);
+ 
+ #ifdef CONFIG_PCI
+ 	if (mask > 0xffffffff && forbid_dac > 0) {
+@@ -265,6 +269,12 @@ EXPORT_SYMBOL(dma_supported);
+ 
+ static int __init pci_iommu_init(void)
+ {
++	dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
++
++#ifdef CONFIG_PCI
++	dma_debug_add_bus(&pci_bus_type);
++#endif
++
+ 	calgary_iommu_init();
+ 
+ 	intel_iommu_init();
+Index: linux-2.6-tip/arch/x86/kernel/pci-gart_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/pci-gart_64.c
++++ linux-2.6-tip/arch/x86/kernel/pci-gart_64.c
+@@ -255,10 +255,13 @@ static dma_addr_t dma_map_area(struct de
+ }
+ 
+ /* Map a single area into the IOMMU */
+-static dma_addr_t
+-gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir)
++static dma_addr_t gart_map_page(struct device *dev, struct page *page,
++				unsigned long offset, size_t size,
++				enum dma_data_direction dir,
++				struct dma_attrs *attrs)
+ {
+ 	unsigned long bus;
++	phys_addr_t paddr = page_to_phys(page) + offset;
+ 
+ 	if (!dev)
+ 		dev = &x86_dma_fallback_dev;
+@@ -275,8 +278,9 @@ gart_map_single(struct device *dev, phys
+ /*
+  * Free a DMA mapping.
+  */
+-static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr,
+-			      size_t size, int direction)
++static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr,
++			    size_t size, enum dma_data_direction dir,
++			    struct dma_attrs *attrs)
+ {
+ 	unsigned long iommu_page;
+ 	int npages;
+@@ -298,8 +302,8 @@ static void gart_unmap_single(struct dev
+ /*
+  * Wrapper for pci_unmap_single working with scatterlists.
+  */
+-static void
+-gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
++static void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
++			  enum dma_data_direction dir, struct dma_attrs *attrs)
+ {
+ 	struct scatterlist *s;
+ 	int i;
+@@ -307,7 +311,7 @@ gart_unmap_sg(struct device *dev, struct
+ 	for_each_sg(sg, s, nents, i) {
+ 		if (!s->dma_length || !s->length)
+ 			break;
+-		gart_unmap_single(dev, s->dma_address, s->dma_length, dir);
++		gart_unmap_page(dev, s->dma_address, s->dma_length, dir, NULL);
+ 	}
+ }
+ 
+@@ -329,7 +333,7 @@ static int dma_map_sg_nonforce(struct de
+ 			addr = dma_map_area(dev, addr, s->length, dir, 0);
+ 			if (addr == bad_dma_address) {
+ 				if (i > 0)
+-					gart_unmap_sg(dev, sg, i, dir);
++					gart_unmap_sg(dev, sg, i, dir, NULL);
+ 				nents = 0;
+ 				sg[0].dma_length = 0;
+ 				break;
+@@ -400,8 +404,8 @@ dma_map_cont(struct device *dev, struct 
+  * DMA map all entries in a scatterlist.
+  * Merge chunks that have page aligned sizes into a continuous mapping.
+  */
+-static int
+-gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir)
++static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
++		       enum dma_data_direction dir, struct dma_attrs *attrs)
+ {
+ 	struct scatterlist *s, *ps, *start_sg, *sgmap;
+ 	int need = 0, nextneed, i, out, start;
+@@ -468,7 +472,7 @@ gart_map_sg(struct device *dev, struct s
+ 
+ error:
+ 	flush_gart();
+-	gart_unmap_sg(dev, sg, out, dir);
++	gart_unmap_sg(dev, sg, out, dir, NULL);
+ 
+ 	/* When it was forced or merged try again in a dumb way */
+ 	if (force_iommu || iommu_merge) {
+@@ -521,7 +525,7 @@ static void
+ gart_free_coherent(struct device *dev, size_t size, void *vaddr,
+ 		   dma_addr_t dma_addr)
+ {
+-	gart_unmap_single(dev, dma_addr, size, DMA_BIDIRECTIONAL);
++	gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, NULL);
+ 	free_pages((unsigned long)vaddr, get_order(size));
+ }
+ 
+@@ -707,11 +711,11 @@ static __init int init_k8_gatt(struct ag
+ 	return -1;
+ }
+ 
+-static struct dma_mapping_ops gart_dma_ops = {
+-	.map_single			= gart_map_single,
+-	.unmap_single			= gart_unmap_single,
++static struct dma_map_ops gart_dma_ops = {
+ 	.map_sg				= gart_map_sg,
+ 	.unmap_sg			= gart_unmap_sg,
++	.map_page			= gart_map_page,
++	.unmap_page			= gart_unmap_page,
+ 	.alloc_coherent			= gart_alloc_coherent,
+ 	.free_coherent			= gart_free_coherent,
+ };
+Index: linux-2.6-tip/arch/x86/kernel/pci-nommu.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/pci-nommu.c
++++ linux-2.6-tip/arch/x86/kernel/pci-nommu.c
+@@ -1,14 +1,14 @@
+ /* Fallback functions when the main IOMMU code is not compiled in. This
+    code is roughly equivalent to i386. */
+-#include <linux/mm.h>
+-#include <linux/init.h>
+-#include <linux/pci.h>
+-#include <linux/string.h>
+ #include <linux/dma-mapping.h>
+ #include <linux/scatterlist.h>
++#include <linux/string.h>
++#include <linux/init.h>
++#include <linux/pci.h>
++#include <linux/mm.h>
+ 
+-#include <asm/iommu.h>
+ #include <asm/processor.h>
++#include <asm/iommu.h>
+ #include <asm/dma.h>
+ 
+ static int
+@@ -25,19 +25,19 @@ check_addr(char *name, struct device *hw
+ 	return 1;
+ }
+ 
+-static dma_addr_t
+-nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
+-	       int direction)
++static dma_addr_t nommu_map_page(struct device *dev, struct page *page,
++				 unsigned long offset, size_t size,
++				 enum dma_data_direction dir,
++				 struct dma_attrs *attrs)
+ {
+-	dma_addr_t bus = paddr;
++	dma_addr_t bus = page_to_phys(page) + offset;
+ 	WARN_ON(size == 0);
+-	if (!check_addr("map_single", hwdev, bus, size))
+-				return bad_dma_address;
++	if (!check_addr("map_single", dev, bus, size))
++		return bad_dma_address;
+ 	flush_write_buffers();
+ 	return bus;
+ }
+ 
+-
+ /* Map a set of buffers described by scatterlist in streaming
+  * mode for DMA.  This is the scatter-gather version of the
+  * above pci_map_single interface.  Here the scatter gather list
+@@ -54,7 +54,8 @@ nommu_map_single(struct device *hwdev, p
+  * the same here.
+  */
+ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg,
+-	       int nents, int direction)
++			int nents, enum dma_data_direction dir,
++			struct dma_attrs *attrs)
+ {
+ 	struct scatterlist *s;
+ 	int i;
+@@ -78,12 +79,12 @@ static void nommu_free_coherent(struct d
+ 	free_pages((unsigned long)vaddr, get_order(size));
+ }
+ 
+-struct dma_mapping_ops nommu_dma_ops = {
+-	.alloc_coherent = dma_generic_alloc_coherent,
+-	.free_coherent = nommu_free_coherent,
+-	.map_single = nommu_map_single,
+-	.map_sg = nommu_map_sg,
+-	.is_phys = 1,
++struct dma_map_ops nommu_dma_ops = {
++	.alloc_coherent	= dma_generic_alloc_coherent,
++	.free_coherent	= nommu_free_coherent,
++	.map_sg		= nommu_map_sg,
++	.map_page	= nommu_map_page,
++	.is_phys	= 1,
+ };
+ 
+ void __init no_iommu_init(void)
+Index: linux-2.6-tip/arch/x86/kernel/pci-swiotlb.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/pci-swiotlb.c
+@@ -0,0 +1,84 @@
++/* Glue code to lib/swiotlb.c */
++
++#include <linux/pci.h>
++#include <linux/cache.h>
++#include <linux/module.h>
++#include <linux/swiotlb.h>
++#include <linux/bootmem.h>
++#include <linux/dma-mapping.h>
++
++#include <asm/iommu.h>
++#include <asm/swiotlb.h>
++#include <asm/dma.h>
++
++int swiotlb __read_mostly;
++
++void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
++{
++	return alloc_bootmem_low_pages(size);
++}
++
++void *swiotlb_alloc(unsigned order, unsigned long nslabs)
++{
++	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
++}
++
++dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
++{
++	return paddr;
++}
++
++phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
++{
++	return baddr;
++}
++
++int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
++{
++	return 0;
++}
++
++static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
++					dma_addr_t *dma_handle, gfp_t flags)
++{
++	void *vaddr;
++
++	vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags);
++	if (vaddr)
++		return vaddr;
++
++	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
++}
++
++struct dma_map_ops swiotlb_dma_ops = {
++	.mapping_error = swiotlb_dma_mapping_error,
++	.alloc_coherent = x86_swiotlb_alloc_coherent,
++	.free_coherent = swiotlb_free_coherent,
++	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
++	.sync_single_for_device = swiotlb_sync_single_for_device,
++	.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
++	.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
++	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
++	.sync_sg_for_device = swiotlb_sync_sg_for_device,
++	.map_sg = swiotlb_map_sg_attrs,
++	.unmap_sg = swiotlb_unmap_sg_attrs,
++	.map_page = swiotlb_map_page,
++	.unmap_page = swiotlb_unmap_page,
++	.dma_supported = NULL,
++};
++
++void __init pci_swiotlb_init(void)
++{
++	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
++#ifdef CONFIG_X86_64
++	if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
++	       swiotlb = 1;
++#endif
++	if (swiotlb_force)
++		swiotlb = 1;
++	if (swiotlb) {
++		printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
++		swiotlb_init();
++		dma_ops = &swiotlb_dma_ops;
++	}
++}
+Index: linux-2.6-tip/arch/x86/kernel/pci-swiotlb_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/pci-swiotlb_64.c
++++ /dev/null
+@@ -1,91 +0,0 @@
+-/* Glue code to lib/swiotlb.c */
+-
+-#include <linux/pci.h>
+-#include <linux/cache.h>
+-#include <linux/module.h>
+-#include <linux/swiotlb.h>
+-#include <linux/bootmem.h>
+-#include <linux/dma-mapping.h>
+-
+-#include <asm/iommu.h>
+-#include <asm/swiotlb.h>
+-#include <asm/dma.h>
+-
+-int swiotlb __read_mostly;
+-
+-void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
+-{
+-	return alloc_bootmem_low_pages(size);
+-}
+-
+-void *swiotlb_alloc(unsigned order, unsigned long nslabs)
+-{
+-	return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
+-}
+-
+-dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
+-{
+-	return paddr;
+-}
+-
+-phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+-{
+-	return baddr;
+-}
+-
+-int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
+-{
+-	return 0;
+-}
+-
+-static dma_addr_t
+-swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size,
+-			int direction)
+-{
+-	return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction);
+-}
+-
+-static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+-					dma_addr_t *dma_handle, gfp_t flags)
+-{
+-	void *vaddr;
+-
+-	vaddr = dma_generic_alloc_coherent(hwdev, size, dma_handle, flags);
+-	if (vaddr)
+-		return vaddr;
+-
+-	return swiotlb_alloc_coherent(hwdev, size, dma_handle, flags);
+-}
+-
+-struct dma_mapping_ops swiotlb_dma_ops = {
+-	.mapping_error = swiotlb_dma_mapping_error,
+-	.alloc_coherent = x86_swiotlb_alloc_coherent,
+-	.free_coherent = swiotlb_free_coherent,
+-	.map_single = swiotlb_map_single_phys,
+-	.unmap_single = swiotlb_unmap_single,
+-	.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
+-	.sync_single_for_device = swiotlb_sync_single_for_device,
+-	.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
+-	.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
+-	.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
+-	.sync_sg_for_device = swiotlb_sync_sg_for_device,
+-	.map_sg = swiotlb_map_sg,
+-	.unmap_sg = swiotlb_unmap_sg,
+-	.dma_supported = NULL,
+-};
+-
+-void __init pci_swiotlb_init(void)
+-{
+-	/* don't initialize swiotlb if iommu=off (no_iommu=1) */
+-#ifdef CONFIG_X86_64
+-	if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN)
+-	       swiotlb = 1;
+-#endif
+-	if (swiotlb_force)
+-		swiotlb = 1;
+-	if (swiotlb) {
+-		printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
+-		swiotlb_init();
+-		dma_ops = &swiotlb_dma_ops;
+-	}
+-}
+Index: linux-2.6-tip/arch/x86/kernel/probe_roms_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/probe_roms_32.c
++++ linux-2.6-tip/arch/x86/kernel/probe_roms_32.c
+@@ -18,7 +18,7 @@
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+ #include <asm/io.h>
+-#include <setup_arch.h>
++#include <asm/setup_arch.h>
+ 
+ static struct resource system_rom_resource = {
+ 	.name	= "System ROM",
+Index: linux-2.6-tip/arch/x86/kernel/process.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/process.c
++++ linux-2.6-tip/arch/x86/kernel/process.c
+@@ -1,16 +1,19 @@
+ #include <linux/errno.h>
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+-#include <asm/idle.h>
+ #include <linux/smp.h>
++#include <linux/prctl.h>
+ #include <linux/slab.h>
+ #include <linux/sched.h>
+ #include <linux/module.h>
+ #include <linux/pm.h>
+ #include <linux/clockchips.h>
+-#include <linux/ftrace.h>
++#include <trace/power.h>
+ #include <asm/system.h>
+ #include <asm/apic.h>
++#include <asm/idle.h>
++#include <asm/uaccess.h>
++#include <asm/i387.h>
+ 
+ unsigned long idle_halt;
+ EXPORT_SYMBOL(idle_halt);
+@@ -19,6 +22,9 @@ EXPORT_SYMBOL(idle_nomwait);
+ 
+ struct kmem_cache *task_xstate_cachep;
+ 
++DEFINE_TRACE(power_start);
++DEFINE_TRACE(power_end);
++
+ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
+ {
+ 	*dst = *src;
+@@ -52,10 +58,197 @@ void arch_task_cache_init(void)
+         task_xstate_cachep =
+         	kmem_cache_create("task_xstate", xstate_size,
+ 				  __alignof__(union thread_xstate),
+-				  SLAB_PANIC, NULL);
++				  SLAB_PANIC | SLAB_NOTRACK, NULL);
+ }
+ 
+ /*
++ * Free current thread data structures etc..
++ */
++void exit_thread(void)
++{
++	struct task_struct *me = current;
++	struct thread_struct *t = &me->thread;
++	unsigned long *bp = t->io_bitmap_ptr;
++
++	if (bp) {
++		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
++
++		t->io_bitmap_ptr = NULL;
++		clear_thread_flag(TIF_IO_BITMAP);
++		/*
++		 * Careful, clear this in the TSS too:
++		 */
++		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
++		t->io_bitmap_max = 0;
++		put_cpu();
++		kfree(bp);
++	}
++
++	ds_exit_thread(current);
++}
++
++void flush_thread(void)
++{
++	struct task_struct *tsk = current;
++
++#ifdef CONFIG_X86_64
++	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
++		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
++		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
++			clear_tsk_thread_flag(tsk, TIF_IA32);
++		} else {
++			set_tsk_thread_flag(tsk, TIF_IA32);
++			current_thread_info()->status |= TS_COMPAT;
++		}
++	}
++#endif
++
++	clear_tsk_thread_flag(tsk, TIF_DEBUG);
++
++	tsk->thread.debugreg0 = 0;
++	tsk->thread.debugreg1 = 0;
++	tsk->thread.debugreg2 = 0;
++	tsk->thread.debugreg3 = 0;
++	tsk->thread.debugreg6 = 0;
++	tsk->thread.debugreg7 = 0;
++	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
++	/*
++	 * Forget coprocessor state..
++	 */
++	tsk->fpu_counter = 0;
++	clear_fpu(tsk);
++	clear_used_math();
++}
++
++static void hard_disable_TSC(void)
++{
++	write_cr4(read_cr4() | X86_CR4_TSD);
++}
++
++void disable_TSC(void)
++{
++	preempt_disable();
++	if (!test_and_set_thread_flag(TIF_NOTSC))
++		/*
++		 * Must flip the CPU state synchronously with
++		 * TIF_NOTSC in the current running context.
++		 */
++		hard_disable_TSC();
++	preempt_enable();
++}
++
++static void hard_enable_TSC(void)
++{
++	write_cr4(read_cr4() & ~X86_CR4_TSD);
++}
++
++static void enable_TSC(void)
++{
++	preempt_disable();
++	if (test_and_clear_thread_flag(TIF_NOTSC))
++		/*
++		 * Must flip the CPU state synchronously with
++		 * TIF_NOTSC in the current running context.
++		 */
++		hard_enable_TSC();
++	preempt_enable();
++}
++
++int get_tsc_mode(unsigned long adr)
++{
++	unsigned int val;
++
++	if (test_thread_flag(TIF_NOTSC))
++		val = PR_TSC_SIGSEGV;
++	else
++		val = PR_TSC_ENABLE;
++
++	return put_user(val, (unsigned int __user *)adr);
++}
++
++int set_tsc_mode(unsigned int val)
++{
++	if (val == PR_TSC_SIGSEGV)
++		disable_TSC();
++	else if (val == PR_TSC_ENABLE)
++		enable_TSC();
++	else
++		return -EINVAL;
++
++	return 0;
++}
++
++void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
++		      struct tss_struct *tss)
++{
++	struct thread_struct *prev, *next;
++
++	prev = &prev_p->thread;
++	next = &next_p->thread;
++
++	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
++	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
++		ds_switch_to(prev_p, next_p);
++	else if (next->debugctlmsr != prev->debugctlmsr)
++		update_debugctlmsr(next->debugctlmsr);
++
++	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
++		set_debugreg(next->debugreg0, 0);
++		set_debugreg(next->debugreg1, 1);
++		set_debugreg(next->debugreg2, 2);
++		set_debugreg(next->debugreg3, 3);
++		/* no 4 and 5 */
++		set_debugreg(next->debugreg6, 6);
++		set_debugreg(next->debugreg7, 7);
++	}
++
++	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
++	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
++		/* prev and next are different */
++		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
++			hard_disable_TSC();
++		else
++			hard_enable_TSC();
++	}
++
++	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
++		/*
++		 * Copy the relevant range of the IO bitmap.
++		 * Normally this is 128 bytes or less:
++		 */
++		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
++		       max(prev->io_bitmap_max, next->io_bitmap_max));
++	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
++		/*
++		 * Clear any possible leftover bits:
++		 */
++		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
++	}
++}
++
++int sys_fork(struct pt_regs *regs)
++{
++	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
++}
++
++/*
++ * This is trivial, and on the face of it looks like it
++ * could equally well be done in user mode.
++ *
++ * Not so, for quite unobvious reasons - register pressure.
++ * In user mode vfork() cannot have a stack frame, and if
++ * done by calling the "clone()" system call directly, you
++ * do not have enough call-clobbered registers to hold all
++ * the information you need.
++ */
++int sys_vfork(struct pt_regs *regs)
++{
++	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
++		       NULL, NULL);
++}
++
++
++/*
+  * Idle related variables and functions
+  */
+ unsigned long boot_option_idle_override = 0;
+@@ -135,7 +328,7 @@ void stop_this_cpu(void *dummy)
+ 	/*
+ 	 * Remove this CPU:
+ 	 */
+-	cpu_clear(smp_processor_id(), cpu_online_map);
++	set_cpu_online(smp_processor_id(), false);
+ 	disable_local_APIC();
+ 
+ 	for (;;) {
+@@ -285,12 +478,13 @@ static int __cpuinit check_c1e_idle(cons
+ 	return 1;
+ }
+ 
+-static cpumask_t c1e_mask = CPU_MASK_NONE;
++static cpumask_var_t c1e_mask;
+ static int c1e_detected;
+ 
+ void c1e_remove_cpu(int cpu)
+ {
+-	cpu_clear(cpu, c1e_mask);
++	if (c1e_mask != NULL)
++		cpumask_clear_cpu(cpu, c1e_mask);
+ }
+ 
+ /*
+@@ -319,8 +513,8 @@ static void c1e_idle(void)
+ 	if (c1e_detected) {
+ 		int cpu = smp_processor_id();
+ 
+-		if (!cpu_isset(cpu, c1e_mask)) {
+-			cpu_set(cpu, c1e_mask);
++		if (!cpumask_test_cpu(cpu, c1e_mask)) {
++			cpumask_set_cpu(cpu, c1e_mask);
+ 			/*
+ 			 * Force broadcast so ACPI can not interfere. Needs
+ 			 * to run with interrupts enabled as it uses
+@@ -350,7 +544,7 @@ static void c1e_idle(void)
+ 
+ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
+ {
+-#ifdef CONFIG_X86_SMP
++#ifdef CONFIG_SMP
+ 	if (pm_idle == poll_idle && smp_num_siblings > 1) {
+ 		printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
+ 			" performance may degrade.\n");
+@@ -372,6 +566,15 @@ void __cpuinit select_idle_routine(const
+ 		pm_idle = default_idle;
+ }
+ 
++void __init init_c1e_mask(void)
++{
++	/* If we're using c1e_idle, we need to allocate c1e_mask. */
++	if (pm_idle == c1e_idle) {
++		alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
++		cpumask_clear(c1e_mask);
++	}
++}
++
+ static int __init idle_setup(char *str)
+ {
+ 	if (!str)
+Index: linux-2.6-tip/arch/x86/kernel/process_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/process_32.c
++++ linux-2.6-tip/arch/x86/kernel/process_32.c
+@@ -11,6 +11,7 @@
+ 
+ #include <stdarg.h>
+ 
++#include <linux/stackprotector.h>
+ #include <linux/cpu.h>
+ #include <linux/errno.h>
+ #include <linux/sched.h>
+@@ -66,9 +67,6 @@ asmlinkage void ret_from_fork(void) __as
+ DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+ EXPORT_PER_CPU_SYMBOL(current_task);
+ 
+-DEFINE_PER_CPU(int, cpu_number);
+-EXPORT_PER_CPU_SYMBOL(cpu_number);
+-
+ /*
+  * Return saved PC of a blocked thread.
+  */
+@@ -94,6 +92,15 @@ void cpu_idle(void)
+ {
+ 	int cpu = smp_processor_id();
+ 
++	/*
++	 * If we're the non-boot CPU, nothing set the stack canary up
++	 * for us.  CPU0 already has it initialized but no harm in
++	 * doing it again.  This is a good place for updating it, as
++	 * we wont ever return from this function (so the invalid
++	 * canaries already on the stack wont ever trigger).
++	 */
++	boot_init_stack_canary();
++
+ 	current_thread_info()->status |= TS_POLLING;
+ 
+ 	/* endless idle loop with no priority at all */
+@@ -101,23 +108,23 @@ void cpu_idle(void)
+ 		tick_nohz_stop_sched_tick(1);
+ 		while (!need_resched()) {
+ 
+-			check_pgt_cache();
+ 			rmb();
+ 
+ 			if (cpu_is_offline(cpu))
+ 				play_dead();
+ 
+ 			local_irq_disable();
+-			__get_cpu_var(irq_stat).idle_timestamp = jiffies;
+ 			/* Don't trace irqs off for idle */
+ 			stop_critical_timings();
+ 			pm_idle();
+ 			start_critical_timings();
+ 		}
++		local_irq_disable();
+ 		tick_nohz_restart_sched_tick();
+-		preempt_enable_no_resched();
+-		schedule();
++		__preempt_enable_no_resched();
++		__schedule();
+ 		preempt_disable();
++		local_irq_enable();
+ 	}
+ }
+ 
+@@ -132,7 +139,7 @@ void __show_regs(struct pt_regs *regs, i
+ 	if (user_mode_vm(regs)) {
+ 		sp = regs->sp;
+ 		ss = regs->ss & 0xffff;
+-		savesegment(gs, gs);
++		gs = get_user_gs(regs);
+ 	} else {
+ 		sp = (unsigned long) (&regs->sp);
+ 		savesegment(ss, ss);
+@@ -159,8 +166,10 @@ void __show_regs(struct pt_regs *regs, i
+ 		regs->ax, regs->bx, regs->cx, regs->dx);
+ 	printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+ 		regs->si, regs->di, regs->bp, sp);
+-	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+-	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
++	printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x"
++	       " preempt:%08x\n",
++	       (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss,
++	       preempt_count());
+ 
+ 	if (!all)
+ 		return;
+@@ -213,6 +222,7 @@ int kernel_thread(int (*fn)(void *), voi
+ 	regs.ds = __USER_DS;
+ 	regs.es = __USER_DS;
+ 	regs.fs = __KERNEL_PERCPU;
++	regs.gs = __KERNEL_STACK_CANARY;
+ 	regs.orig_ax = -1;
+ 	regs.ip = (unsigned long) kernel_thread_helper;
+ 	regs.cs = __KERNEL_CS | get_kernel_rpl();
+@@ -223,55 +233,6 @@ int kernel_thread(int (*fn)(void *), voi
+ }
+ EXPORT_SYMBOL(kernel_thread);
+ 
+-/*
+- * Free current thread data structures etc..
+- */
+-void exit_thread(void)
+-{
+-	/* The process may have allocated an io port bitmap... nuke it. */
+-	if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
+-		struct task_struct *tsk = current;
+-		struct thread_struct *t = &tsk->thread;
+-		int cpu = get_cpu();
+-		struct tss_struct *tss = &per_cpu(init_tss, cpu);
+-
+-		kfree(t->io_bitmap_ptr);
+-		t->io_bitmap_ptr = NULL;
+-		clear_thread_flag(TIF_IO_BITMAP);
+-		/*
+-		 * Careful, clear this in the TSS too:
+-		 */
+-		memset(tss->io_bitmap, 0xff, tss->io_bitmap_max);
+-		t->io_bitmap_max = 0;
+-		tss->io_bitmap_owner = NULL;
+-		tss->io_bitmap_max = 0;
+-		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+-		put_cpu();
+-	}
+-
+-	ds_exit_thread(current);
+-}
+-
+-void flush_thread(void)
+-{
+-	struct task_struct *tsk = current;
+-
+-	tsk->thread.debugreg0 = 0;
+-	tsk->thread.debugreg1 = 0;
+-	tsk->thread.debugreg2 = 0;
+-	tsk->thread.debugreg3 = 0;
+-	tsk->thread.debugreg6 = 0;
+-	tsk->thread.debugreg7 = 0;
+-	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
+-	clear_tsk_thread_flag(tsk, TIF_DEBUG);
+-	/*
+-	 * Forget coprocessor state..
+-	 */
+-	tsk->fpu_counter = 0;
+-	clear_fpu(tsk);
+-	clear_used_math();
+-}
+-
+ void release_thread(struct task_struct *dead_task)
+ {
+ 	BUG_ON(dead_task->mm);
+@@ -305,7 +266,7 @@ int copy_thread(int nr, unsigned long cl
+ 
+ 	p->thread.ip = (unsigned long) ret_from_fork;
+ 
+-	savesegment(gs, p->thread.gs);
++	task_user_gs(p) = get_user_gs(regs);
+ 
+ 	tsk = current;
+ 	if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
+@@ -343,7 +304,7 @@ int copy_thread(int nr, unsigned long cl
+ void
+ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+ {
+-	__asm__("movl %0, %%gs" : : "r"(0));
++	set_user_gs(regs, 0);
+ 	regs->fs		= 0;
+ 	set_fs(USER_DS);
+ 	regs->ds		= __USER_DS;
+@@ -359,127 +320,6 @@ start_thread(struct pt_regs *regs, unsig
+ }
+ EXPORT_SYMBOL_GPL(start_thread);
+ 
+-static void hard_disable_TSC(void)
+-{
+-	write_cr4(read_cr4() | X86_CR4_TSD);
+-}
+-
+-void disable_TSC(void)
+-{
+-	preempt_disable();
+-	if (!test_and_set_thread_flag(TIF_NOTSC))
+-		/*
+-		 * Must flip the CPU state synchronously with
+-		 * TIF_NOTSC in the current running context.
+-		 */
+-		hard_disable_TSC();
+-	preempt_enable();
+-}
+-
+-static void hard_enable_TSC(void)
+-{
+-	write_cr4(read_cr4() & ~X86_CR4_TSD);
+-}
+-
+-static void enable_TSC(void)
+-{
+-	preempt_disable();
+-	if (test_and_clear_thread_flag(TIF_NOTSC))
+-		/*
+-		 * Must flip the CPU state synchronously with
+-		 * TIF_NOTSC in the current running context.
+-		 */
+-		hard_enable_TSC();
+-	preempt_enable();
+-}
+-
+-int get_tsc_mode(unsigned long adr)
+-{
+-	unsigned int val;
+-
+-	if (test_thread_flag(TIF_NOTSC))
+-		val = PR_TSC_SIGSEGV;
+-	else
+-		val = PR_TSC_ENABLE;
+-
+-	return put_user(val, (unsigned int __user *)adr);
+-}
+-
+-int set_tsc_mode(unsigned int val)
+-{
+-	if (val == PR_TSC_SIGSEGV)
+-		disable_TSC();
+-	else if (val == PR_TSC_ENABLE)
+-		enable_TSC();
+-	else
+-		return -EINVAL;
+-
+-	return 0;
+-}
+-
+-static noinline void
+-__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+-		 struct tss_struct *tss)
+-{
+-	struct thread_struct *prev, *next;
+-
+-	prev = &prev_p->thread;
+-	next = &next_p->thread;
+-
+-	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
+-	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
+-		ds_switch_to(prev_p, next_p);
+-	else if (next->debugctlmsr != prev->debugctlmsr)
+-		update_debugctlmsr(next->debugctlmsr);
+-
+-	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
+-		set_debugreg(next->debugreg0, 0);
+-		set_debugreg(next->debugreg1, 1);
+-		set_debugreg(next->debugreg2, 2);
+-		set_debugreg(next->debugreg3, 3);
+-		/* no 4 and 5 */
+-		set_debugreg(next->debugreg6, 6);
+-		set_debugreg(next->debugreg7, 7);
+-	}
+-
+-	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
+-	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
+-		/* prev and next are different */
+-		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
+-			hard_disable_TSC();
+-		else
+-			hard_enable_TSC();
+-	}
+-
+-	if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+-		/*
+-		 * Disable the bitmap via an invalid offset. We still cache
+-		 * the previous bitmap owner and the IO bitmap contents:
+-		 */
+-		tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET;
+-		return;
+-	}
+-
+-	if (likely(next == tss->io_bitmap_owner)) {
+-		/*
+-		 * Previous owner of the bitmap (hence the bitmap content)
+-		 * matches the next task, we dont have to do anything but
+-		 * to set a valid offset in the TSS:
+-		 */
+-		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
+-		return;
+-	}
+-	/*
+-	 * Lazy TSS's I/O bitmap copy. We set an invalid offset here
+-	 * and we let the task to get a GPF in case an I/O instruction
+-	 * is performed.  The handler of the GPF will verify that the
+-	 * faulting task has a valid I/O bitmap and, it true, does the
+-	 * real copy and restart the instruction.  This will save us
+-	 * redundant copies when the currently switched task does not
+-	 * perform any I/O during its timeslice.
+-	 */
+-	tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY;
+-}
+ 
+ /*
+  *	switch_to(x,yn) should switch tasks from x to y.
+@@ -540,7 +380,7 @@ __switch_to(struct task_struct *prev_p, 
+ 	 * used %fs or %gs (it does not today), or if the kernel is
+ 	 * running inside of a hypervisor layer.
+ 	 */
+-	savesegment(gs, prev->gs);
++	lazy_save_gs(prev->gs);
+ 
+ 	/*
+ 	 * Load the per-thread Thread-Local Storage descriptor.
+@@ -586,64 +426,44 @@ __switch_to(struct task_struct *prev_p, 
+ 	 * Restore %gs if needed (which is common)
+ 	 */
+ 	if (prev->gs | next->gs)
+-		loadsegment(gs, next->gs);
++		lazy_load_gs(next->gs);
+ 
+-	x86_write_percpu(current_task, next_p);
++	percpu_write(current_task, next_p);
+ 
+ 	return prev_p;
+ }
+ 
+-asmlinkage int sys_fork(struct pt_regs regs)
+-{
+-	return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
+-}
+-
+-asmlinkage int sys_clone(struct pt_regs regs)
++int sys_clone(struct pt_regs *regs)
+ {
+ 	unsigned long clone_flags;
+ 	unsigned long newsp;
+ 	int __user *parent_tidptr, *child_tidptr;
+ 
+-	clone_flags = regs.bx;
+-	newsp = regs.cx;
+-	parent_tidptr = (int __user *)regs.dx;
+-	child_tidptr = (int __user *)regs.di;
++	clone_flags = regs->bx;
++	newsp = regs->cx;
++	parent_tidptr = (int __user *)regs->dx;
++	child_tidptr = (int __user *)regs->di;
+ 	if (!newsp)
+-		newsp = regs.sp;
+-	return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr);
+-}
+-
+-/*
+- * This is trivial, and on the face of it looks like it
+- * could equally well be done in user mode.
+- *
+- * Not so, for quite unobvious reasons - register pressure.
+- * In user mode vfork() cannot have a stack frame, and if
+- * done by calling the "clone()" system call directly, you
+- * do not have enough call-clobbered registers to hold all
+- * the information you need.
+- */
+-asmlinkage int sys_vfork(struct pt_regs regs)
+-{
+-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL);
++		newsp = regs->sp;
++	return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
+ }
+ 
+ /*
+  * sys_execve() executes a new program.
+  */
+-asmlinkage int sys_execve(struct pt_regs regs)
++int sys_execve(struct pt_regs *regs)
+ {
+ 	int error;
+ 	char *filename;
+ 
+-	filename = getname((char __user *) regs.bx);
++	filename = getname((char __user *) regs->bx);
+ 	error = PTR_ERR(filename);
+ 	if (IS_ERR(filename))
+ 		goto out;
+ 	error = do_execve(filename,
+-			(char __user * __user *) regs.cx,
+-			(char __user * __user *) regs.dx,
+-			&regs);
++			(char __user * __user *) regs->cx,
++			(char __user * __user *) regs->dx,
++			regs);
+ 	if (error == 0) {
+ 		/* Make sure we don't return using sysenter.. */
+ 		set_thread_flag(TIF_IRET);
+Index: linux-2.6-tip/arch/x86/kernel/process_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/process_64.c
++++ linux-2.6-tip/arch/x86/kernel/process_64.c
+@@ -16,6 +16,7 @@
+ 
+ #include <stdarg.h>
+ 
++#include <linux/stackprotector.h>
+ #include <linux/cpu.h>
+ #include <linux/errno.h>
+ #include <linux/sched.h>
+@@ -47,7 +48,6 @@
+ #include <asm/processor.h>
+ #include <asm/i387.h>
+ #include <asm/mmu_context.h>
+-#include <asm/pda.h>
+ #include <asm/prctl.h>
+ #include <asm/desc.h>
+ #include <asm/proto.h>
+@@ -58,6 +58,12 @@
+ 
+ asmlinkage extern void ret_from_fork(void);
+ 
++DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
++EXPORT_PER_CPU_SYMBOL(current_task);
++
++DEFINE_PER_CPU(unsigned long, old_rsp);
++static DEFINE_PER_CPU(unsigned char, is_idle);
++
+ unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
+ 
+ static ATOMIC_NOTIFIER_HEAD(idle_notifier);
+@@ -76,13 +82,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregist
+ 
+ void enter_idle(void)
+ {
+-	write_pda(isidle, 1);
++	percpu_write(is_idle, 1);
+ 	atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
+ }
+ 
+ static void __exit_idle(void)
+ {
+-	if (test_and_clear_bit_pda(0, isidle) == 0)
++	if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
+ 		return;
+ 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
+ }
+@@ -112,6 +118,16 @@ static inline void play_dead(void)
+ void cpu_idle(void)
+ {
+ 	current_thread_info()->status |= TS_POLLING;
++
++	/*
++	 * If we're the non-boot CPU, nothing set the stack canary up
++	 * for us.  CPU0 already has it initialized but no harm in
++	 * doing it again.  This is a good place for updating it, as
++	 * we wont ever return from this function (so the invalid
++	 * canaries already on the stack wont ever trigger).
++	 */
++	boot_init_stack_canary();
++
+ 	/* endless idle loop with no priority at all */
+ 	while (1) {
+ 		tick_nohz_stop_sched_tick(1);
+@@ -139,9 +155,11 @@ void cpu_idle(void)
+ 		}
+ 
+ 		tick_nohz_restart_sched_tick();
+-		preempt_enable_no_resched();
+-		schedule();
++		local_irq_disable();
++		__preempt_enable_no_resched();
++		__schedule();
+ 		preempt_disable();
++		local_irq_enable();
+ 	}
+ }
+ 
+@@ -221,61 +239,6 @@ void show_regs(struct pt_regs *regs)
+ 	show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
+ }
+ 
+-/*
+- * Free current thread data structures etc..
+- */
+-void exit_thread(void)
+-{
+-	struct task_struct *me = current;
+-	struct thread_struct *t = &me->thread;
+-
+-	if (me->thread.io_bitmap_ptr) {
+-		struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+-
+-		kfree(t->io_bitmap_ptr);
+-		t->io_bitmap_ptr = NULL;
+-		clear_thread_flag(TIF_IO_BITMAP);
+-		/*
+-		 * Careful, clear this in the TSS too:
+-		 */
+-		memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
+-		t->io_bitmap_max = 0;
+-		put_cpu();
+-	}
+-
+-	ds_exit_thread(current);
+-}
+-
+-void flush_thread(void)
+-{
+-	struct task_struct *tsk = current;
+-
+-	if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
+-		clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
+-		if (test_tsk_thread_flag(tsk, TIF_IA32)) {
+-			clear_tsk_thread_flag(tsk, TIF_IA32);
+-		} else {
+-			set_tsk_thread_flag(tsk, TIF_IA32);
+-			current_thread_info()->status |= TS_COMPAT;
+-		}
+-	}
+-	clear_tsk_thread_flag(tsk, TIF_DEBUG);
+-
+-	tsk->thread.debugreg0 = 0;
+-	tsk->thread.debugreg1 = 0;
+-	tsk->thread.debugreg2 = 0;
+-	tsk->thread.debugreg3 = 0;
+-	tsk->thread.debugreg6 = 0;
+-	tsk->thread.debugreg7 = 0;
+-	memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
+-	/*
+-	 * Forget coprocessor state..
+-	 */
+-	tsk->fpu_counter = 0;
+-	clear_fpu(tsk);
+-	clear_used_math();
+-}
+-
+ void release_thread(struct task_struct *dead_task)
+ {
+ 	if (dead_task->mm) {
+@@ -397,7 +360,7 @@ start_thread(struct pt_regs *regs, unsig
+ 	load_gs_index(0);
+ 	regs->ip		= new_ip;
+ 	regs->sp		= new_sp;
+-	write_pda(oldrsp, new_sp);
++	percpu_write(old_rsp, new_sp);
+ 	regs->cs		= __USER_CS;
+ 	regs->ss		= __USER_DS;
+ 	regs->flags		= 0x200;
+@@ -409,118 +372,6 @@ start_thread(struct pt_regs *regs, unsig
+ }
+ EXPORT_SYMBOL_GPL(start_thread);
+ 
+-static void hard_disable_TSC(void)
+-{
+-	write_cr4(read_cr4() | X86_CR4_TSD);
+-}
+-
+-void disable_TSC(void)
+-{
+-	preempt_disable();
+-	if (!test_and_set_thread_flag(TIF_NOTSC))
+-		/*
+-		 * Must flip the CPU state synchronously with
+-		 * TIF_NOTSC in the current running context.
+-		 */
+-		hard_disable_TSC();
+-	preempt_enable();
+-}
+-
+-static void hard_enable_TSC(void)
+-{
+-	write_cr4(read_cr4() & ~X86_CR4_TSD);
+-}
+-
+-static void enable_TSC(void)
+-{
+-	preempt_disable();
+-	if (test_and_clear_thread_flag(TIF_NOTSC))
+-		/*
+-		 * Must flip the CPU state synchronously with
+-		 * TIF_NOTSC in the current running context.
+-		 */
+-		hard_enable_TSC();
+-	preempt_enable();
+-}
+-
+-int get_tsc_mode(unsigned long adr)
+-{
+-	unsigned int val;
+-
+-	if (test_thread_flag(TIF_NOTSC))
+-		val = PR_TSC_SIGSEGV;
+-	else
+-		val = PR_TSC_ENABLE;
+-
+-	return put_user(val, (unsigned int __user *)adr);
+-}
+-
+-int set_tsc_mode(unsigned int val)
+-{
+-	if (val == PR_TSC_SIGSEGV)
+-		disable_TSC();
+-	else if (val == PR_TSC_ENABLE)
+-		enable_TSC();
+-	else
+-		return -EINVAL;
+-
+-	return 0;
+-}
+-
+-/*
+- * This special macro can be used to load a debugging register
+- */
+-#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
+-
+-static inline void __switch_to_xtra(struct task_struct *prev_p,
+-				    struct task_struct *next_p,
+-				    struct tss_struct *tss)
+-{
+-	struct thread_struct *prev, *next;
+-
+-	prev = &prev_p->thread,
+-	next = &next_p->thread;
+-
+-	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
+-	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
+-		ds_switch_to(prev_p, next_p);
+-	else if (next->debugctlmsr != prev->debugctlmsr)
+-		update_debugctlmsr(next->debugctlmsr);
+-
+-	if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
+-		loaddebug(next, 0);
+-		loaddebug(next, 1);
+-		loaddebug(next, 2);
+-		loaddebug(next, 3);
+-		/* no 4 and 5 */
+-		loaddebug(next, 6);
+-		loaddebug(next, 7);
+-	}
+-
+-	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
+-	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
+-		/* prev and next are different */
+-		if (test_tsk_thread_flag(next_p, TIF_NOTSC))
+-			hard_disable_TSC();
+-		else
+-			hard_enable_TSC();
+-	}
+-
+-	if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+-		/*
+-		 * Copy the relevant range of the IO bitmap.
+-		 * Normally this is 128 bytes or less:
+-		 */
+-		memcpy(tss->io_bitmap, next->io_bitmap_ptr,
+-		       max(prev->io_bitmap_max, next->io_bitmap_max));
+-	} else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+-		/*
+-		 * Clear any possible leftover bits:
+-		 */
+-		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
+-	}
+-}
+-
+ /*
+  *	switch_to(x,y) should switch tasks from x to y.
+  *
+@@ -618,21 +469,13 @@ __switch_to(struct task_struct *prev_p, 
+ 	/*
+ 	 * Switch the PDA and FPU contexts.
+ 	 */
+-	prev->usersp = read_pda(oldrsp);
+-	write_pda(oldrsp, next->usersp);
+-	write_pda(pcurrent, next_p);
++	prev->usersp = percpu_read(old_rsp);
++	percpu_write(old_rsp, next->usersp);
++	percpu_write(current_task, next_p);
+ 
+-	write_pda(kernelstack,
++	percpu_write(kernel_stack,
+ 		  (unsigned long)task_stack_page(next_p) +
+-		  THREAD_SIZE - PDA_STACKOFFSET);
+-#ifdef CONFIG_CC_STACKPROTECTOR
+-	write_pda(stack_canary, next_p->stack_canary);
+-	/*
+-	 * Build time only check to make sure the stack_canary is at
+-	 * offset 40 in the pda; this is a gcc ABI requirement
+-	 */
+-	BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
+-#endif
++		  THREAD_SIZE - KERNEL_STACK_OFFSET);
+ 
+ 	/*
+ 	 * Now maybe reload the debug registers and handle I/O bitmaps
+@@ -686,11 +529,6 @@ void set_personality_64bit(void)
+ 	current->personality &= ~READ_IMPLIES_EXEC;
+ }
+ 
+-asmlinkage long sys_fork(struct pt_regs *regs)
+-{
+-	return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
+-}
+-
+ asmlinkage long
+ sys_clone(unsigned long clone_flags, unsigned long newsp,
+ 	  void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+@@ -700,22 +538,6 @@ sys_clone(unsigned long clone_flags, uns
+ 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+ }
+ 
+-/*
+- * This is trivial, and on the face of it looks like it
+- * could equally well be done in user mode.
+- *
+- * Not so, for quite unobvious reasons - register pressure.
+- * In user mode vfork() cannot have a stack frame, and if
+- * done by calling the "clone()" system call directly, you
+- * do not have enough call-clobbered registers to hold all
+- * the information you need.
+- */
+-asmlinkage long sys_vfork(struct pt_regs *regs)
+-{
+-	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
+-		    NULL, NULL);
+-}
+-
+ unsigned long get_wchan(struct task_struct *p)
+ {
+ 	unsigned long stack;
+Index: linux-2.6-tip/arch/x86/kernel/ptrace.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/ptrace.c
++++ linux-2.6-tip/arch/x86/kernel/ptrace.c
+@@ -21,6 +21,7 @@
+ #include <linux/audit.h>
+ #include <linux/seccomp.h>
+ #include <linux/signal.h>
++#include <linux/ftrace.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/pgtable.h>
+@@ -75,10 +76,7 @@ static inline bool invalid_selector(u16 
+ static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
+ {
+ 	BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
+-	regno >>= 2;
+-	if (regno > FS)
+-		--regno;
+-	return &regs->bx + regno;
++	return &regs->bx + (regno >> 2);
+ }
+ 
+ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
+@@ -90,9 +88,10 @@ static u16 get_segment_reg(struct task_s
+ 	if (offset != offsetof(struct user_regs_struct, gs))
+ 		retval = *pt_regs_access(task_pt_regs(task), offset);
+ 	else {
+-		retval = task->thread.gs;
+ 		if (task == current)
+-			savesegment(gs, retval);
++			retval = get_user_gs(task_pt_regs(task));
++		else
++			retval = task_user_gs(task);
+ 	}
+ 	return retval;
+ }
+@@ -126,13 +125,10 @@ static int set_segment_reg(struct task_s
+ 		break;
+ 
+ 	case offsetof(struct user_regs_struct, gs):
+-		task->thread.gs = value;
+ 		if (task == current)
+-			/*
+-			 * The user-mode %gs is not affected by
+-			 * kernel entry, so we must update the CPU.
+-			 */
+-			loadsegment(gs, value);
++			set_user_gs(task_pt_regs(task), value);
++		else
++			task_user_gs(task) = value;
+ 	}
+ 
+ 	return 0;
+@@ -273,7 +269,7 @@ static unsigned long debugreg_addr_limit
+ 	if (test_tsk_thread_flag(task, TIF_IA32))
+ 		return IA32_PAGE_OFFSET - 3;
+ #endif
+-	return TASK_SIZE64 - 7;
++	return TASK_SIZE_MAX - 7;
+ }
+ 
+ #endif	/* CONFIG_X86_32 */
+@@ -1420,6 +1416,9 @@ asmregparm long syscall_trace_enter(stru
+ 	    tracehook_report_syscall_entry(regs))
+ 		ret = -1L;
+ 
++	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
++		ftrace_syscall_enter(regs);
++
+ 	if (unlikely(current->audit_context)) {
+ 		if (IS_IA32)
+ 			audit_syscall_entry(AUDIT_ARCH_I386,
+@@ -1443,6 +1442,9 @@ asmregparm void syscall_trace_leave(stru
+ 	if (unlikely(current->audit_context))
+ 		audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
+ 
++	if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
++		ftrace_syscall_exit(regs);
++
+ 	if (test_thread_flag(TIF_SYSCALL_TRACE))
+ 		tracehook_report_syscall_exit(regs, 0);
+ 
+Index: linux-2.6-tip/arch/x86/kernel/quirks.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/quirks.c
++++ linux-2.6-tip/arch/x86/kernel/quirks.c
+@@ -74,8 +74,7 @@ static void ich_force_hpet_resume(void)
+ 	if (!force_hpet_address)
+ 		return;
+ 
+-	if (rcba_base == NULL)
+-		BUG();
++	BUG_ON(rcba_base == NULL);
+ 
+ 	/* read the Function Disable register, dword mode only */
+ 	val = readl(rcba_base + 0x3404);
+@@ -172,7 +171,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I
+ 			 ich_force_enable_hpet);
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
+ 			 ich_force_enable_hpet);
+-
++DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16,	/* ICH10 */
++			 ich_force_enable_hpet);
+ 
+ static struct pci_dev *cached_dev;
+ 
+Index: linux-2.6-tip/arch/x86/kernel/reboot.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/reboot.c
++++ linux-2.6-tip/arch/x86/kernel/reboot.c
+@@ -14,6 +14,7 @@
+ #include <asm/reboot.h>
+ #include <asm/pci_x86.h>
+ #include <asm/virtext.h>
++#include <asm/cpu.h>
+ 
+ #ifdef CONFIG_X86_32
+ # include <linux/dmi.h>
+@@ -23,8 +24,6 @@
+ # include <asm/iommu.h>
+ #endif
+ 
+-#include <mach_ipi.h>
+-
+ /*
+  * Power off function, if any
+  */
+@@ -658,7 +657,7 @@ static int crash_nmi_callback(struct not
+ 
+ static void smp_send_nmi_allbutself(void)
+ {
+-	send_IPI_allbutself(NMI_VECTOR);
++	apic->send_IPI_allbutself(NMI_VECTOR);
+ }
+ 
+ static struct notifier_block crash_nmi_nb = {
+Index: linux-2.6-tip/arch/x86/kernel/relocate_kernel_32.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/relocate_kernel_32.S
++++ linux-2.6-tip/arch/x86/kernel/relocate_kernel_32.S
+@@ -7,7 +7,7 @@
+  */
+ 
+ #include <linux/linkage.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ #include <asm/kexec.h>
+ #include <asm/processor-flags.h>
+ 
+@@ -17,7 +17,8 @@
+ 
+ #define PTR(x) (x << 2)
+ 
+-/* control_page + KEXEC_CONTROL_CODE_MAX_SIZE
++/*
++ * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
+  * ~ control_page + PAGE_SIZE are used as data storage and stack for
+  * jumping back
+  */
+@@ -76,8 +77,10 @@ relocate_kernel:
+ 	movl	%eax, CP_PA_SWAP_PAGE(%edi)
+ 	movl	%ebx, CP_PA_BACKUP_PAGES_MAP(%edi)
+ 
+-	/* get physical address of control page now */
+-	/* this is impossible after page table switch */
++	/*
++	 * get physical address of control page now
++	 * this is impossible after page table switch
++	 */
+ 	movl	PTR(PA_CONTROL_PAGE)(%ebp), %edi
+ 
+ 	/* switch to new set of page tables */
+@@ -97,7 +100,8 @@ identity_mapped:
+ 	/* store the start address on the stack */
+ 	pushl   %edx
+ 
+-	/* Set cr0 to a known state:
++	/*
++	 * Set cr0 to a known state:
+ 	 *  - Paging disabled
+ 	 *  - Alignment check disabled
+ 	 *  - Write protect disabled
+@@ -113,7 +117,8 @@ identity_mapped:
+ 	/* clear cr4 if applicable */
+ 	testl	%ecx, %ecx
+ 	jz	1f
+-	/* Set cr4 to a known state:
++	/*
++	 * Set cr4 to a known state:
+ 	 * Setting everything to zero seems safe.
+ 	 */
+ 	xorl	%eax, %eax
+@@ -132,15 +137,18 @@ identity_mapped:
+ 	call	swap_pages
+ 	addl	$8, %esp
+ 
+-	/* To be certain of avoiding problems with self-modifying code
++	/*
++	 * To be certain of avoiding problems with self-modifying code
+ 	 * I need to execute a serializing instruction here.
+ 	 * So I flush the TLB, it's handy, and not processor dependent.
+ 	 */
+ 	xorl	%eax, %eax
+ 	movl	%eax, %cr3
+ 
+-	/* set all of the registers to known values */
+-	/* leave %esp alone */
++	/*
++	 * set all of the registers to known values
++	 * leave %esp alone
++	 */
+ 
+ 	testl	%esi, %esi
+ 	jnz 1f
+Index: linux-2.6-tip/arch/x86/kernel/relocate_kernel_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/relocate_kernel_64.S
++++ linux-2.6-tip/arch/x86/kernel/relocate_kernel_64.S
+@@ -7,10 +7,10 @@
+  */
+ 
+ #include <linux/linkage.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ #include <asm/kexec.h>
+ #include <asm/processor-flags.h>
+-#include <asm/pgtable.h>
++#include <asm/pgtable_types.h>
+ 
+ /*
+  * Must be relocatable PIC code callable as a C function
+@@ -19,145 +19,76 @@
+ #define PTR(x) (x << 3)
+ #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
+ 
++/*
++ * control_page + KEXEC_CONTROL_CODE_MAX_SIZE
++ * ~ control_page + PAGE_SIZE are used as data storage and stack for
++ * jumping back
++ */
++#define DATA(offset)		(KEXEC_CONTROL_CODE_MAX_SIZE+(offset))
++
++/* Minimal CPU state */
++#define RSP			DATA(0x0)
++#define CR0			DATA(0x8)
++#define CR3			DATA(0x10)
++#define CR4			DATA(0x18)
++
++/* other data */
++#define CP_PA_TABLE_PAGE	DATA(0x20)
++#define CP_PA_SWAP_PAGE		DATA(0x28)
++#define CP_PA_BACKUP_PAGES_MAP	DATA(0x30)
++
+ 	.text
+ 	.align PAGE_SIZE
+ 	.code64
+ 	.globl relocate_kernel
+ relocate_kernel:
+-	/* %rdi indirection_page
++	/*
++	 * %rdi indirection_page
+ 	 * %rsi page_list
+ 	 * %rdx start address
++	 * %rcx preserve_context
+ 	 */
+ 
+-	/* map the control page at its virtual address */
+-
+-	movq	$0x0000ff8000000000, %r10        /* mask */
+-	mov	$(39 - 3), %cl                   /* bits to shift */
+-	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
+-
+-	movq	%r11, %r9
+-	andq	%r10, %r9
+-	shrq	%cl, %r9
+-
+-	movq	PTR(VA_PGD)(%rsi), %r8
+-	addq	%r8, %r9
+-	movq	PTR(PA_PUD_0)(%rsi), %r8
+-	orq	$PAGE_ATTR, %r8
+-	movq	%r8, (%r9)
+-
+-	shrq	$9, %r10
+-	sub	$9, %cl
+-
+-	movq	%r11, %r9
+-	andq	%r10, %r9
+-	shrq	%cl, %r9
+-
+-	movq	PTR(VA_PUD_0)(%rsi), %r8
+-	addq	%r8, %r9
+-	movq	PTR(PA_PMD_0)(%rsi), %r8
+-	orq	$PAGE_ATTR, %r8
+-	movq	%r8, (%r9)
+-
+-	shrq	$9, %r10
+-	sub	$9, %cl
+-
+-	movq	%r11, %r9
+-	andq	%r10, %r9
+-	shrq	%cl, %r9
+-
+-	movq	PTR(VA_PMD_0)(%rsi), %r8
+-	addq	%r8, %r9
+-	movq	PTR(PA_PTE_0)(%rsi), %r8
+-	orq	$PAGE_ATTR, %r8
+-	movq	%r8, (%r9)
+-
+-	shrq	$9, %r10
+-	sub	$9, %cl
+-
+-	movq	%r11, %r9
+-	andq	%r10, %r9
+-	shrq	%cl, %r9
+-
+-	movq	PTR(VA_PTE_0)(%rsi), %r8
+-	addq	%r8, %r9
+-	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
+-	orq	$PAGE_ATTR, %r8
+-	movq	%r8, (%r9)
+-
+-	/* identity map the control page at its physical address */
+-
+-	movq	$0x0000ff8000000000, %r10        /* mask */
+-	mov	$(39 - 3), %cl                   /* bits to shift */
+-	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
+-
+-	movq	%r11, %r9
+-	andq	%r10, %r9
+-	shrq	%cl, %r9
+-
+-	movq	PTR(VA_PGD)(%rsi), %r8
+-	addq	%r8, %r9
+-	movq	PTR(PA_PUD_1)(%rsi), %r8
+-	orq	$PAGE_ATTR, %r8
+-	movq	%r8, (%r9)
+-
+-	shrq	$9, %r10
+-	sub	$9, %cl
+-
+-	movq	%r11, %r9
+-	andq	%r10, %r9
+-	shrq	%cl, %r9
+-
+-	movq	PTR(VA_PUD_1)(%rsi), %r8
+-	addq	%r8, %r9
+-	movq	PTR(PA_PMD_1)(%rsi), %r8
+-	orq	$PAGE_ATTR, %r8
+-	movq	%r8, (%r9)
+-
+-	shrq	$9, %r10
+-	sub	$9, %cl
+-
+-	movq	%r11, %r9
+-	andq	%r10, %r9
+-	shrq	%cl, %r9
+-
+-	movq	PTR(VA_PMD_1)(%rsi), %r8
+-	addq	%r8, %r9
+-	movq	PTR(PA_PTE_1)(%rsi), %r8
+-	orq	$PAGE_ATTR, %r8
+-	movq	%r8, (%r9)
+-
+-	shrq	$9, %r10
+-	sub	$9, %cl
+-
+-	movq	%r11, %r9
+-	andq	%r10, %r9
+-	shrq	%cl, %r9
++	/* Save the CPU context, used for jumping back */
++	pushq %rbx
++	pushq %rbp
++	pushq %r12
++	pushq %r13
++	pushq %r14
++	pushq %r15
++	pushf
+ 
+-	movq	PTR(VA_PTE_1)(%rsi), %r8
+-	addq	%r8, %r9
+-	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
+-	orq	$PAGE_ATTR, %r8
+-	movq	%r8, (%r9)
+-
+-relocate_new_kernel:
+-	/* %rdi indirection_page
+-	 * %rsi page_list
+-	 * %rdx start address
+-	 */
++	movq	PTR(VA_CONTROL_PAGE)(%rsi), %r11
++	movq	%rsp, RSP(%r11)
++	movq	%cr0, %rax
++	movq	%rax, CR0(%r11)
++	movq	%cr3, %rax
++	movq	%rax, CR3(%r11)
++	movq	%cr4, %rax
++	movq	%rax, CR4(%r11)
+ 
+ 	/* zero out flags, and disable interrupts */
+ 	pushq $0
+ 	popfq
+ 
+-	/* get physical address of control page now */
+-	/* this is impossible after page table switch */
++	/*
++	 * get physical address of control page now
++	 * this is impossible after page table switch
++	 */
+ 	movq	PTR(PA_CONTROL_PAGE)(%rsi), %r8
+ 
+ 	/* get physical address of page table now too */
+-	movq	PTR(PA_TABLE_PAGE)(%rsi), %rcx
++	movq	PTR(PA_TABLE_PAGE)(%rsi), %r9
++
++	/* get physical address of swap page now */
++	movq	PTR(PA_SWAP_PAGE)(%rsi), %r10
+ 
+-	/* switch to new set of page tables */
+-	movq	PTR(PA_PGD)(%rsi), %r9
++	/* save some information for jumping back */
++	movq	%r9, CP_PA_TABLE_PAGE(%r11)
++	movq	%r10, CP_PA_SWAP_PAGE(%r11)
++	movq	%rdi, CP_PA_BACKUP_PAGES_MAP(%r11)
++
++	/* Switch to the identity mapped page tables */
+ 	movq	%r9, %cr3
+ 
+ 	/* setup a new stack at the end of the physical control page */
+@@ -172,7 +103,8 @@ identity_mapped:
+ 	/* store the start address on the stack */
+ 	pushq   %rdx
+ 
+-	/* Set cr0 to a known state:
++	/*
++	 * Set cr0 to a known state:
+ 	 *  - Paging enabled
+ 	 *  - Alignment check disabled
+ 	 *  - Write protect disabled
+@@ -185,7 +117,8 @@ identity_mapped:
+ 	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
+ 	movq	%rax, %cr0
+ 
+-	/* Set cr4 to a known state:
++	/*
++	 * Set cr4 to a known state:
+ 	 *  - physical address extension enabled
+ 	 */
+ 	movq	$X86_CR4_PAE, %rax
+@@ -194,12 +127,88 @@ identity_mapped:
+ 	jmp 1f
+ 1:
+ 
+-	/* Switch to the identity mapped page tables,
+-	 * and flush the TLB.
+-	*/
+-	movq	%rcx, %cr3
++	/* Flush the TLB (needed?) */
++	movq	%r9, %cr3
++
++	movq	%rcx, %r11
++	call	swap_pages
++
++	/*
++	 * To be certain of avoiding problems with self-modifying code
++	 * I need to execute a serializing instruction here.
++	 * So I flush the TLB by reloading %cr3 here, it's handy,
++	 * and not processor dependent.
++	 */
++	movq	%cr3, %rax
++	movq	%rax, %cr3
++
++	/*
++	 * set all of the registers to known values
++	 * leave %rsp alone
++	 */
++
++	testq	%r11, %r11
++	jnz 1f
++	xorq	%rax, %rax
++	xorq	%rbx, %rbx
++	xorq    %rcx, %rcx
++	xorq    %rdx, %rdx
++	xorq    %rsi, %rsi
++	xorq    %rdi, %rdi
++	xorq    %rbp, %rbp
++	xorq	%r8,  %r8
++	xorq	%r9,  %r9
++	xorq	%r10, %r9
++	xorq	%r11, %r11
++	xorq	%r12, %r12
++	xorq	%r13, %r13
++	xorq	%r14, %r14
++	xorq	%r15, %r15
++
++	ret
++
++1:
++	popq	%rdx
++	leaq	PAGE_SIZE(%r10), %rsp
++	call	*%rdx
++
++	/* get the re-entry point of the peer system */
++	movq	0(%rsp), %rbp
++	call	1f
++1:
++	popq	%r8
++	subq	$(1b - relocate_kernel), %r8
++	movq	CP_PA_SWAP_PAGE(%r8), %r10
++	movq	CP_PA_BACKUP_PAGES_MAP(%r8), %rdi
++	movq	CP_PA_TABLE_PAGE(%r8), %rax
++	movq	%rax, %cr3
++	lea	PAGE_SIZE(%r8), %rsp
++	call	swap_pages
++	movq	$virtual_mapped, %rax
++	pushq	%rax
++	ret
++
++virtual_mapped:
++	movq	RSP(%r8), %rsp
++	movq	CR4(%r8), %rax
++	movq	%rax, %cr4
++	movq	CR3(%r8), %rax
++	movq	CR0(%r8), %r8
++	movq	%rax, %cr3
++	movq	%r8, %cr0
++	movq	%rbp, %rax
++
++	popf
++	popq	%r15
++	popq	%r14
++	popq	%r13
++	popq	%r12
++	popq	%rbp
++	popq	%rbx
++	ret
+ 
+ 	/* Do the copies */
++swap_pages:
+ 	movq	%rdi, %rcx 	/* Put the page_list in %rcx */
+ 	xorq	%rdi, %rdi
+ 	xorq	%rsi, %rsi
+@@ -231,36 +240,27 @@ identity_mapped:
+ 	movq	%rcx,   %rsi  /* For ever source page do a copy */
+ 	andq	$0xfffffffffffff000, %rsi
+ 
++	movq	%rdi, %rdx
++	movq	%rsi, %rax
++
++	movq	%r10, %rdi
+ 	movq	$512,   %rcx
+ 	rep ; movsq
+-	jmp	0b
+-3:
+-
+-	/* To be certain of avoiding problems with self-modifying code
+-	 * I need to execute a serializing instruction here.
+-	 * So I flush the TLB by reloading %cr3 here, it's handy,
+-	 * and not processor dependent.
+-	 */
+-	movq	%cr3, %rax
+-	movq	%rax, %cr3
+ 
+-	/* set all of the registers to known values */
+-	/* leave %rsp alone */
++	movq	%rax, %rdi
++	movq	%rdx, %rsi
++	movq	$512,   %rcx
++	rep ; movsq
+ 
+-	xorq	%rax, %rax
+-	xorq	%rbx, %rbx
+-	xorq    %rcx, %rcx
+-	xorq    %rdx, %rdx
+-	xorq    %rsi, %rsi
+-	xorq    %rdi, %rdi
+-	xorq    %rbp, %rbp
+-	xorq	%r8,  %r8
+-	xorq	%r9,  %r9
+-	xorq	%r10, %r9
+-	xorq	%r11, %r11
+-	xorq	%r12, %r12
+-	xorq	%r13, %r13
+-	xorq	%r14, %r14
+-	xorq	%r15, %r15
++	movq	%rdx, %rdi
++	movq	%r10, %rsi
++	movq	$512,   %rcx
++	rep ; movsq
+ 
++	lea	PAGE_SIZE(%rax), %rsi
++	jmp	0b
++3:
+ 	ret
++
++	.globl kexec_control_code_size
++.set kexec_control_code_size, . - relocate_kernel
+Index: linux-2.6-tip/arch/x86/kernel/rtc.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/rtc.c
++++ linux-2.6-tip/arch/x86/kernel/rtc.c
+@@ -1,14 +1,14 @@
+ /*
+  * RTC related functions
+  */
++#include <linux/platform_device.h>
++#include <linux/mc146818rtc.h>
+ #include <linux/acpi.h>
+ #include <linux/bcd.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/platform_device.h>
+ #include <linux/pnp.h>
+ 
+-#include <asm/time.h>
+ #include <asm/vsyscall.h>
++#include <asm/time.h>
+ 
+ #ifdef CONFIG_X86_32
+ /*
+@@ -16,9 +16,9 @@
+  * register we are working with.  It is required for NMI access to the
+  * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
+  */
+-volatile unsigned long cmos_lock = 0;
++volatile unsigned long cmos_lock;
+ EXPORT_SYMBOL(cmos_lock);
+-#endif
++#endif /* CONFIG_X86_32 */
+ 
+ /* For two digit years assume time is always after that */
+ #define CMOS_YEARS_OFFS 2000
+@@ -38,9 +38,9 @@ EXPORT_SYMBOL(rtc_lock);
+  */
+ int mach_set_rtc_mmss(unsigned long nowtime)
+ {
+-	int retval = 0;
+ 	int real_seconds, real_minutes, cmos_minutes;
+ 	unsigned char save_control, save_freq_select;
++	int retval = 0;
+ 
+ 	 /* tell the clock it's being set */
+ 	save_control = CMOS_READ(RTC_CONTROL);
+@@ -72,8 +72,8 @@ int mach_set_rtc_mmss(unsigned long nowt
+ 			real_seconds = bin2bcd(real_seconds);
+ 			real_minutes = bin2bcd(real_minutes);
+ 		}
+-		CMOS_WRITE(real_seconds,RTC_SECONDS);
+-		CMOS_WRITE(real_minutes,RTC_MINUTES);
++		CMOS_WRITE(real_seconds, RTC_SECONDS);
++		CMOS_WRITE(real_minutes, RTC_MINUTES);
+ 	} else {
+ 		printk(KERN_WARNING
+ 		       "set_rtc_mmss: can't update from %d to %d\n",
+@@ -151,6 +151,7 @@ unsigned char rtc_cmos_read(unsigned cha
+ 	outb(addr, RTC_PORT(0));
+ 	val = inb(RTC_PORT(1));
+ 	lock_cmos_suffix(addr);
++
+ 	return val;
+ }
+ EXPORT_SYMBOL(rtc_cmos_read);
+@@ -166,8 +167,8 @@ EXPORT_SYMBOL(rtc_cmos_write);
+ 
+ static int set_rtc_mmss(unsigned long nowtime)
+ {
+-	int retval;
+ 	unsigned long flags;
++	int retval;
+ 
+ 	spin_lock_irqsave(&rtc_lock, flags);
+ 	retval = set_wallclock(nowtime);
+@@ -242,6 +243,7 @@ static __init int add_rtc_cmos(void)
+ 	platform_device_register(&rtc_device);
+ 	dev_info(&rtc_device.dev,
+ 		 "registered platform RTC device (no PNP device found)\n");
++
+ 	return 0;
+ }
+ device_initcall(add_rtc_cmos);
+Index: linux-2.6-tip/arch/x86/kernel/scx200_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/scx200_32.c
++++ linux-2.6-tip/arch/x86/kernel/scx200_32.c
+@@ -78,8 +78,10 @@ static int __devinit scx200_probe(struct
+ 		if (scx200_cb_probe(SCx200_CB_BASE_FIXED)) {
+ 			scx200_cb_base = SCx200_CB_BASE_FIXED;
+ 		} else {
+-			pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base);
+-			if (scx200_cb_probe(base)) {
++			int err;
++
++			err = pci_read_config_dword(pdev, SCx200_CBA_SCRATCH, &base);
++			if (!err && scx200_cb_probe(base)) {
+ 				scx200_cb_base = base;
+ 			} else {
+ 				printk(KERN_WARNING NAME ": Configuration Block not found\n");
+Index: linux-2.6-tip/arch/x86/kernel/setup.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/setup.c
++++ linux-2.6-tip/arch/x86/kernel/setup.c
+@@ -74,14 +74,15 @@
+ #include <asm/e820.h>
+ #include <asm/mpspec.h>
+ #include <asm/setup.h>
+-#include <asm/arch_hooks.h>
+ #include <asm/efi.h>
++#include <asm/timer.h>
++#include <asm/i8259.h>
+ #include <asm/sections.h>
+ #include <asm/dmi.h>
+ #include <asm/io_apic.h>
+ #include <asm/ist.h>
+ #include <asm/vmi.h>
+-#include <setup_arch.h>
++#include <asm/setup_arch.h>
+ #include <asm/bios_ebda.h>
+ #include <asm/cacheflush.h>
+ #include <asm/processor.h>
+@@ -89,7 +90,7 @@
+ 
+ #include <asm/system.h>
+ #include <asm/vsyscall.h>
+-#include <asm/smp.h>
++#include <asm/cpu.h>
+ #include <asm/desc.h>
+ #include <asm/dma.h>
+ #include <asm/iommu.h>
+@@ -97,7 +98,6 @@
+ #include <asm/mmu_context.h>
+ #include <asm/proto.h>
+ 
+-#include <mach_apic.h>
+ #include <asm/paravirt.h>
+ #include <asm/hypervisor.h>
+ 
+@@ -112,6 +112,25 @@
+ #define ARCH_SETUP
+ #endif
+ 
++RESERVE_BRK(dmi_alloc, 65536);
++
++unsigned int boot_cpu_id __read_mostly;
++
++static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
++unsigned long _brk_end = (unsigned long)__brk_base;
++
++#ifdef CONFIG_X86_64
++int default_cpu_present_to_apicid(int mps_cpu)
++{
++	return __default_cpu_present_to_apicid(mps_cpu);
++}
++
++int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
++{
++	return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
++}
++#endif
++
+ #ifndef CONFIG_DEBUG_BOOT_PARAMS
+ struct boot_params __initdata boot_params;
+ #else
+@@ -144,12 +163,6 @@ static struct resource bss_resource = {
+ 
+ 
+ #ifdef CONFIG_X86_32
+-/* This value is set up by the early boot code to point to the value
+-   immediately after the boot time page tables.  It contains a *physical*
+-   address, and must not be in the .bss segment! */
+-unsigned long init_pg_tables_start __initdata = ~0UL;
+-unsigned long init_pg_tables_end __initdata = ~0UL;
+-
+ static struct resource video_ram_resource = {
+ 	.name	= "Video RAM area",
+ 	.start	= 0xa0000,
+@@ -188,7 +201,9 @@ struct ist_info ist_info;
+ #endif
+ 
+ #else
+-struct cpuinfo_x86 boot_cpu_data __read_mostly;
++struct cpuinfo_x86 boot_cpu_data __read_mostly = {
++	.x86_phys_bits = MAX_PHYSMEM_BITS,
++};
+ EXPORT_SYMBOL(boot_cpu_data);
+ #endif
+ 
+@@ -203,12 +218,6 @@ unsigned long mmu_cr4_features = X86_CR4
+ int bootloader_type;
+ 
+ /*
+- * Early DMI memory
+- */
+-int dmi_alloc_index;
+-char dmi_alloc_data[DMI_MAX_DATA];
+-
+-/*
+  * Setup options
+  */
+ struct screen_info screen_info;
+@@ -253,6 +262,35 @@ static inline void copy_edd(void)
+ }
+ #endif
+ 
++void * __init extend_brk(size_t size, size_t align)
++{
++	size_t mask = align - 1;
++	void *ret;
++
++	BUG_ON(_brk_start == 0);
++	BUG_ON(align & mask);
++
++	_brk_end = (_brk_end + mask) & ~mask;
++	BUG_ON((char *)(_brk_end + size) > __brk_limit);
++
++	ret = (void *)_brk_end;
++	_brk_end += size;
++
++	memset(ret, 0, size);
++
++	return ret;
++}
++
++static void __init reserve_brk(void)
++{
++	if (_brk_end > _brk_start)
++		reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
++
++	/* Mark brk area as locked down and no longer taking any
++	   new allocations */
++	_brk_start = 0;
++}
++
+ #ifdef CONFIG_BLK_DEV_INITRD
+ 
+ #ifdef CONFIG_X86_32
+@@ -586,20 +624,7 @@ static int __init setup_elfcorehdr(char 
+ early_param("elfcorehdr", setup_elfcorehdr);
+ #endif
+ 
+-static int __init default_update_genapic(void)
+-{
+-#ifdef CONFIG_X86_SMP
+-# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
+-	genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
+-# endif
+-#endif
+-
+-	return 0;
+-}
+-
+-static struct x86_quirks default_x86_quirks __initdata = {
+-	.update_genapic         = default_update_genapic,
+-};
++static struct x86_quirks default_x86_quirks __initdata;
+ 
+ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
+ 
+@@ -656,7 +681,6 @@ void __init setup_arch(char **cmdline_p)
+ #ifdef CONFIG_X86_32
+ 	memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
+ 	visws_early_detect();
+-	pre_setup_arch_hook();
+ #else
+ 	printk(KERN_INFO "Command line: %s\n", boot_command_line);
+ #endif
+@@ -715,11 +739,7 @@ void __init setup_arch(char **cmdline_p)
+ 	init_mm.start_code = (unsigned long) _text;
+ 	init_mm.end_code = (unsigned long) _etext;
+ 	init_mm.end_data = (unsigned long) _edata;
+-#ifdef CONFIG_X86_32
+-	init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
+-#else
+-	init_mm.brk = (unsigned long) &_end;
+-#endif
++	init_mm.brk = _brk_end;
+ 
+ 	code_resource.start = virt_to_phys(_text);
+ 	code_resource.end = virt_to_phys(_etext)-1;
+@@ -824,8 +844,7 @@ void __init setup_arch(char **cmdline_p)
+ #else
+ 	num_physpages = max_pfn;
+ 
+- 	if (cpu_has_x2apic)
+- 		check_x2apic();
++	check_x2apic();
+ 
+ 	/* How many end-of-memory variables you have, grandma! */
+ 	/* need this before calling reserve_initrd */
+@@ -841,6 +860,8 @@ void __init setup_arch(char **cmdline_p)
+ 	setup_bios_corruption_check();
+ #endif
+ 
++	reserve_brk();
++
+ 	/* max_pfn_mapped is updated here */
+ 	max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
+ 	max_pfn_mapped = max_low_pfn_mapped;
+@@ -865,9 +886,7 @@ void __init setup_arch(char **cmdline_p)
+ 
+ 	reserve_initrd();
+ 
+-#ifdef CONFIG_X86_64
+ 	vsmp_init();
+-#endif
+ 
+ 	io_delay_init();
+ 
+@@ -886,6 +905,7 @@ void __init setup_arch(char **cmdline_p)
+ #endif
+ 
+ 	initmem_init(0, max_pfn);
++	bootmem_state = DURING_BOOTMEM;
+ 
+ #ifdef CONFIG_ACPI_SLEEP
+ 	/*
+@@ -893,12 +913,11 @@ void __init setup_arch(char **cmdline_p)
+ 	 */
+ 	acpi_reserve_bootmem();
+ #endif
+-#ifdef CONFIG_X86_FIND_SMP_CONFIG
+ 	/*
+ 	 * Find and reserve possible boot-time SMP configuration:
+ 	 */
+ 	find_smp_config();
+-#endif
++
+ 	reserve_crashkernel();
+ 
+ #ifdef CONFIG_X86_64
+@@ -925,9 +944,7 @@ void __init setup_arch(char **cmdline_p)
+ 	map_vsyscall();
+ #endif
+ 
+-#ifdef CONFIG_X86_GENERICARCH
+ 	generic_apic_probe();
+-#endif
+ 
+ 	early_quirks();
+ 
+@@ -978,4 +995,95 @@ void __init setup_arch(char **cmdline_p)
+ #endif
+ }
+ 
++#ifdef CONFIG_X86_32
++
++/**
++ * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
++ *
++ * Description:
++ *	Perform any necessary interrupt initialisation prior to setting up
++ *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
++ *	interrupts should be initialised here if the machine emulates a PC
++ *	in any way.
++ **/
++void __init x86_quirk_pre_intr_init(void)
++{
++	if (x86_quirks->arch_pre_intr_init) {
++		if (x86_quirks->arch_pre_intr_init())
++			return;
++	}
++	init_ISA_irqs();
++}
+ 
++/**
++ * x86_quirk_intr_init - post gate setup interrupt initialisation
++ *
++ * Description:
++ *	Fill in any interrupts that may have been left out by the general
++ *	init_IRQ() routine.  interrupts having to do with the machine rather
++ *	than the devices on the I/O bus (like APIC interrupts in intel MP
++ *	systems) are started here.
++ **/
++void __init x86_quirk_intr_init(void)
++{
++	if (x86_quirks->arch_intr_init) {
++		if (x86_quirks->arch_intr_init())
++			return;
++	}
++}
++
++/**
++ * x86_quirk_trap_init - initialise system specific traps
++ *
++ * Description:
++ *	Called as the final act of trap_init().  Used in VISWS to initialise
++ *	the various board specific APIC traps.
++ **/
++void __init x86_quirk_trap_init(void)
++{
++	if (x86_quirks->arch_trap_init) {
++		if (x86_quirks->arch_trap_init())
++			return;
++	}
++}
++
++static struct irqaction irq0  = {
++	.handler = timer_interrupt,
++	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
++	.mask = CPU_MASK_NONE,
++	.name = "timer"
++};
++
++/**
++ * x86_quirk_pre_time_init - do any specific initialisations before.
++ *
++ **/
++void __init x86_quirk_pre_time_init(void)
++{
++	if (x86_quirks->arch_pre_time_init)
++		x86_quirks->arch_pre_time_init();
++}
++
++/**
++ * x86_quirk_time_init - do any specific initialisations for the system timer.
++ *
++ * Description:
++ *	Must plug the system timer interrupt source at HZ into the IRQ listed
++ *	in irq_vectors.h:TIMER_IRQ
++ **/
++void __init x86_quirk_time_init(void)
++{
++	if (x86_quirks->arch_time_init) {
++		/*
++		 * A nonzero return code does not mean failure, it means
++		 * that the architecture quirk does not want any
++		 * generic (timer) setup to be performed after this:
++		 */
++		if (x86_quirks->arch_time_init())
++			return;
++	}
++
++	irq0.mask = cpumask_of_cpu(0);
++	setup_irq(0, &irq0);
++}
++#endif /* CONFIG_X86_32 */
+Index: linux-2.6-tip/arch/x86/kernel/setup_percpu.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/setup_percpu.c
++++ linux-2.6-tip/arch/x86/kernel/setup_percpu.c
+@@ -7,402 +7,439 @@
+ #include <linux/crash_dump.h>
+ #include <linux/smp.h>
+ #include <linux/topology.h>
++#include <linux/pfn.h>
+ #include <asm/sections.h>
+ #include <asm/processor.h>
+ #include <asm/setup.h>
+ #include <asm/mpspec.h>
+ #include <asm/apicdef.h>
+ #include <asm/highmem.h>
++#include <asm/proto.h>
++#include <asm/cpumask.h>
++#include <asm/cpu.h>
++#include <asm/stackprotector.h>
+ 
+-#ifdef CONFIG_X86_LOCAL_APIC
+-unsigned int num_processors;
+-unsigned disabled_cpus __cpuinitdata;
+-/* Processor that is doing the boot up */
+-unsigned int boot_cpu_physical_apicid = -1U;
+-EXPORT_SYMBOL(boot_cpu_physical_apicid);
+-unsigned int max_physical_apicid;
+-
+-/* Bitmask of physically existing CPUs */
+-physid_mask_t phys_cpu_present_map;
+-#endif
+-
+-/* map cpu index to physical APIC ID */
+-DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
+-DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
+-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
+-EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
+-
+-#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
+-#define	X86_64_NUMA	1
+-
+-/* map cpu index to node index */
+-DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
+-EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
+-
+-/* which logical CPUs are on which nodes */
+-cpumask_t *node_to_cpumask_map;
+-EXPORT_SYMBOL(node_to_cpumask_map);
++#ifdef CONFIG_DEBUG_PER_CPU_MAPS
++# define DBG(x...) printk(KERN_DEBUG x)
++#else
++# define DBG(x...)
++#endif
+ 
+-/* setup node_to_cpumask_map */
+-static void __init setup_node_to_cpumask_map(void);
++DEFINE_PER_CPU(int, cpu_number);
++EXPORT_PER_CPU_SYMBOL(cpu_number);
+ 
++#ifdef CONFIG_X86_64
++#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
+ #else
+-static inline void setup_node_to_cpumask_map(void) { }
++#define BOOT_PERCPU_OFFSET 0
+ #endif
+ 
+-#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
++DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
++EXPORT_PER_CPU_SYMBOL(this_cpu_off);
++
++unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
++	[0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
++};
++EXPORT_SYMBOL(__per_cpu_offset);
++
+ /*
+- * Copy data used in early init routines from the initial arrays to the
+- * per cpu data areas.  These arrays then become expendable and the
+- * *_early_ptr's are zeroed indicating that the static arrays are gone.
++ * On x86_64 symbols referenced from code should be reachable using
++ * 32bit relocations.  Reserve space for static percpu variables in
++ * modules so that they are always served from the first chunk which
++ * is located at the percpu segment base.  On x86_32, anything can
++ * address anywhere.  No need to reserve space in the first chunk.
+  */
+-static void __init setup_per_cpu_maps(void)
++#ifdef CONFIG_X86_64
++#define PERCPU_FIRST_CHUNK_RESERVE	PERCPU_MODULE_RESERVE
++#else
++#define PERCPU_FIRST_CHUNK_RESERVE	0
++#endif
++
++/**
++ * pcpu_need_numa - determine percpu allocation needs to consider NUMA
++ *
++ * If NUMA is not configured or there is only one NUMA node available,
++ * there is no reason to consider NUMA.  This function determines
++ * whether percpu allocation should consider NUMA or not.
++ *
++ * RETURNS:
++ * true if NUMA should be considered; otherwise, false.
++ */
++static bool __init pcpu_need_numa(void)
+ {
+-	int cpu;
++#ifdef CONFIG_NEED_MULTIPLE_NODES
++	pg_data_t *last = NULL;
++	unsigned int cpu;
+ 
+ 	for_each_possible_cpu(cpu) {
+-		per_cpu(x86_cpu_to_apicid, cpu) =
+-				early_per_cpu_map(x86_cpu_to_apicid, cpu);
+-		per_cpu(x86_bios_cpu_apicid, cpu) =
+-				early_per_cpu_map(x86_bios_cpu_apicid, cpu);
+-#ifdef X86_64_NUMA
+-		per_cpu(x86_cpu_to_node_map, cpu) =
+-				early_per_cpu_map(x86_cpu_to_node_map, cpu);
+-#endif
+-	}
++		int node = early_cpu_to_node(cpu);
+ 
+-	/* indicate the early static arrays will soon be gone */
+-	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
+-	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
+-#ifdef X86_64_NUMA
+-	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
++		if (node_online(node) && NODE_DATA(node) &&
++		    last && last != NODE_DATA(node))
++			return true;
++
++		last = NODE_DATA(node);
++	}
+ #endif
++	return false;
+ }
+ 
+-#ifdef CONFIG_X86_32
+-/*
+- * Great future not-so-futuristic plan: make i386 and x86_64 do it
+- * the same way
+- */
+-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+-EXPORT_SYMBOL(__per_cpu_offset);
+-static inline void setup_cpu_pda_map(void) { }
+-
+-#elif !defined(CONFIG_SMP)
+-static inline void setup_cpu_pda_map(void) { }
+-
+-#else /* CONFIG_SMP && CONFIG_X86_64 */
+-
+-/*
+- * Allocate cpu_pda pointer table and array via alloc_bootmem.
++/**
++ * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
++ * @cpu: cpu to allocate for
++ * @size: size allocation in bytes
++ * @align: alignment
++ *
++ * Allocate @size bytes aligned at @align for cpu @cpu.  This wrapper
++ * does the right thing for NUMA regardless of the current
++ * configuration.
++ *
++ * RETURNS:
++ * Pointer to the allocated area on success, NULL on failure.
+  */
+-static void __init setup_cpu_pda_map(void)
++static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
++					unsigned long align)
+ {
+-	char *pda;
+-	struct x8664_pda **new_cpu_pda;
+-	unsigned long size;
+-	int cpu;
+-
+-	size = roundup(sizeof(struct x8664_pda), cache_line_size());
+-
+-	/* allocate cpu_pda array and pointer table */
+-	{
+-		unsigned long tsize = nr_cpu_ids * sizeof(void *);
+-		unsigned long asize = size * (nr_cpu_ids - 1);
+-
+-		tsize = roundup(tsize, cache_line_size());
+-		new_cpu_pda = alloc_bootmem(tsize + asize);
+-		pda = (char *)new_cpu_pda + tsize;
++	const unsigned long goal = __pa(MAX_DMA_ADDRESS);
++#ifdef CONFIG_NEED_MULTIPLE_NODES
++	int node = early_cpu_to_node(cpu);
++	void *ptr;
++
++	if (!node_online(node) || !NODE_DATA(node)) {
++		ptr = __alloc_bootmem_nopanic(size, align, goal);
++		pr_info("cpu %d has no node %d or node-local memory\n",
++			cpu, node);
++		pr_debug("per cpu data for cpu%d %lu bytes at %016lx\n",
++			 cpu, size, __pa(ptr));
++	} else {
++		ptr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
++						   size, align, goal);
++		pr_debug("per cpu data for cpu%d %lu bytes on node%d at "
++			 "%016lx\n", cpu, size, node, __pa(ptr));
+ 	}
+-
+-	/* initialize pointer table to static pda's */
+-	for_each_possible_cpu(cpu) {
+-		if (cpu == 0) {
+-			/* leave boot cpu pda in place */
+-			new_cpu_pda[0] = cpu_pda(0);
+-			continue;
+-		}
+-		new_cpu_pda[cpu] = (struct x8664_pda *)pda;
+-		new_cpu_pda[cpu]->in_bootmem = 1;
+-		pda += size;
+-	}
+-
+-	/* point to new pointer table */
+-	_cpu_pda = new_cpu_pda;
++	return ptr;
++#else
++	return __alloc_bootmem_nopanic(size, align, goal);
++#endif
+ }
+ 
+-#endif /* CONFIG_SMP && CONFIG_X86_64 */
+-
+-#ifdef CONFIG_X86_64
++/*
++ * Remap allocator
++ *
++ * This allocator uses PMD page as unit.  A PMD page is allocated for
++ * each cpu and each is remapped into vmalloc area using PMD mapping.
++ * As PMD page is quite large, only part of it is used for the first
++ * chunk.  Unused part is returned to the bootmem allocator.
++ *
++ * So, the PMD pages are mapped twice - once to the physical mapping
++ * and to the vmalloc area for the first percpu chunk.  The double
++ * mapping does add one more PMD TLB entry pressure but still is much
++ * better than only using 4k mappings while still being NUMA friendly.
++ */
++#ifdef CONFIG_NEED_MULTIPLE_NODES
++static size_t pcpur_size __initdata;
++static void **pcpur_ptrs __initdata;
+ 
+-/* correctly size the local cpu masks */
+-static void __init setup_cpu_local_masks(void)
++static struct page * __init pcpur_get_page(unsigned int cpu, int pageno)
+ {
+-	alloc_bootmem_cpumask_var(&cpu_initialized_mask);
+-	alloc_bootmem_cpumask_var(&cpu_callin_mask);
+-	alloc_bootmem_cpumask_var(&cpu_callout_mask);
+-	alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
+-}
++	size_t off = (size_t)pageno << PAGE_SHIFT;
+ 
+-#else /* CONFIG_X86_32 */
++	if (off >= pcpur_size)
++		return NULL;
+ 
+-static inline void setup_cpu_local_masks(void)
+-{
++	return virt_to_page(pcpur_ptrs[cpu] + off);
+ }
+ 
+-#endif /* CONFIG_X86_32 */
+-
+-/*
+- * Great future plan:
+- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
+- * Always point %gs to its beginning
+- */
+-void __init setup_per_cpu_areas(void)
++static ssize_t __init setup_pcpu_remap(size_t static_size)
+ {
+-	ssize_t size, old_size;
+-	char *ptr;
+-	int cpu;
+-	unsigned long align = 1;
+-
+-	/* Setup cpu_pda map */
+-	setup_cpu_pda_map();
+-
+-	/* Copy section for each CPU (we discard the original) */
+-	old_size = PERCPU_ENOUGH_ROOM;
+-	align = max_t(unsigned long, PAGE_SIZE, align);
+-	size = roundup(old_size, align);
++	static struct vm_struct vm;
++	pg_data_t *last;
++	size_t ptrs_size, dyn_size;
++	unsigned int cpu;
++	ssize_t ret;
+ 
+-	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
+-		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
+-
+-	pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size);
++	/*
++	 * If large page isn't supported, there's no benefit in doing
++	 * this.  Also, on non-NUMA, embedding is better.
++	 */
++	if (!cpu_has_pse || pcpu_need_numa())
++		return -EINVAL;
+ 
++	last = NULL;
+ 	for_each_possible_cpu(cpu) {
+-#ifndef CONFIG_NEED_MULTIPLE_NODES
+-		ptr = __alloc_bootmem(size, align,
+-				 __pa(MAX_DMA_ADDRESS));
+-#else
+ 		int node = early_cpu_to_node(cpu);
+-		if (!node_online(node) || !NODE_DATA(node)) {
+-			ptr = __alloc_bootmem(size, align,
+-					 __pa(MAX_DMA_ADDRESS));
+-			pr_info("cpu %d has no node %d or node-local memory\n",
+-				cpu, node);
+-			pr_debug("per cpu data for cpu%d at %016lx\n",
+-				 cpu, __pa(ptr));
+-		} else {
+-			ptr = __alloc_bootmem_node(NODE_DATA(node), size, align,
+-							__pa(MAX_DMA_ADDRESS));
+-			pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
+-				cpu, node, __pa(ptr));
+-		}
+-#endif
+-		per_cpu_offset(cpu) = ptr - __per_cpu_start;
+-		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
+-	}
+ 
+-	/* Setup percpu data maps */
+-	setup_per_cpu_maps();
++		if (node_online(node) && NODE_DATA(node) &&
++		    last && last != NODE_DATA(node))
++			goto proceed;
++
++		last = NODE_DATA(node);
++	}
++	return -EINVAL;
++
++proceed:
++	/*
++	 * Currently supports only single page.  Supporting multiple
++	 * pages won't be too difficult if it ever becomes necessary.
++	 */
++	pcpur_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
++			       PERCPU_DYNAMIC_RESERVE);
++	if (pcpur_size > PMD_SIZE) {
++		pr_warning("PERCPU: static data is larger than large page, "
++			   "can't use large page\n");
++		return -EINVAL;
++	}
++	dyn_size = pcpur_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
++
++	/* allocate pointer array and alloc large pages */
++	ptrs_size = PFN_ALIGN(num_possible_cpus() * sizeof(pcpur_ptrs[0]));
++	pcpur_ptrs = alloc_bootmem(ptrs_size);
+ 
+-	/* Setup node to cpumask map */
+-	setup_node_to_cpumask_map();
++	for_each_possible_cpu(cpu) {
++		pcpur_ptrs[cpu] = pcpu_alloc_bootmem(cpu, PMD_SIZE, PMD_SIZE);
++		if (!pcpur_ptrs[cpu])
++			goto enomem;
++
++		/*
++		 * Only use pcpur_size bytes and give back the rest.
++		 *
++		 * Ingo: The 2MB up-rounding bootmem is needed to make
++		 * sure the partial 2MB page is still fully RAM - it's
++		 * not well-specified to have a PAT-incompatible area
++		 * (unmapped RAM, device memory, etc.) in that hole.
++		 */
++		free_bootmem(__pa(pcpur_ptrs[cpu] + pcpur_size),
++			     PMD_SIZE - pcpur_size);
++
++		memcpy(pcpur_ptrs[cpu], __per_cpu_load, static_size);
++	}
++
++	/* allocate address and map */
++	vm.flags = VM_ALLOC;
++	vm.size = num_possible_cpus() * PMD_SIZE;
++	vm_area_register_early(&vm, PMD_SIZE);
+ 
+-	/* Setup cpu initialized, callin, callout masks */
+-	setup_cpu_local_masks();
+-}
++	for_each_possible_cpu(cpu) {
++		pmd_t *pmd;
+ 
++		pmd = populate_extra_pmd((unsigned long)vm.addr
++					 + cpu * PMD_SIZE);
++		set_pmd(pmd, pfn_pmd(page_to_pfn(virt_to_page(pcpur_ptrs[cpu])),
++				     PAGE_KERNEL_LARGE));
++	}
++
++	/* we're ready, commit */
++	pr_info("PERCPU: Remapped at %p with large pages, static data "
++		"%zu bytes\n", vm.addr, static_size);
++
++	ret = pcpu_setup_first_chunk(pcpur_get_page, static_size,
++				     PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
++				     PMD_SIZE, vm.addr, NULL);
++	goto out_free_ar;
++
++enomem:
++	for_each_possible_cpu(cpu)
++		if (pcpur_ptrs[cpu])
++			free_bootmem(__pa(pcpur_ptrs[cpu]), PMD_SIZE);
++	ret = -ENOMEM;
++out_free_ar:
++	free_bootmem(__pa(pcpur_ptrs), ptrs_size);
++	return ret;
++}
++#else
++static ssize_t __init setup_pcpu_remap(size_t static_size)
++{
++	return -EINVAL;
++}
+ #endif
+ 
+-#ifdef X86_64_NUMA
+-
+ /*
+- * Allocate node_to_cpumask_map based on number of available nodes
+- * Requires node_possible_map to be valid.
++ * Embedding allocator
+  *
+- * Note: node_to_cpumask() is not valid until after this is done.
++ * The first chunk is sized to just contain the static area plus
++ * module and dynamic reserves and embedded into linear physical
++ * mapping so that it can use PMD mapping without additional TLB
++ * pressure.
+  */
+-static void __init setup_node_to_cpumask_map(void)
++static ssize_t __init setup_pcpu_embed(size_t static_size)
+ {
+-	unsigned int node, num = 0;
+-	cpumask_t *map;
+-
+-	/* setup nr_node_ids if not done yet */
+-	if (nr_node_ids == MAX_NUMNODES) {
+-		for_each_node_mask(node, node_possible_map)
+-			num = node;
+-		nr_node_ids = num + 1;
+-	}
+-
+-	/* allocate the map */
+-	map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
++	size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
+ 
+-	pr_debug("Node to cpumask map at %p for %d nodes\n",
+-		 map, nr_node_ids);
++	/*
++	 * If large page isn't supported, there's no benefit in doing
++	 * this.  Also, embedding allocation doesn't play well with
++	 * NUMA.
++	 */
++	if (!cpu_has_pse || pcpu_need_numa())
++		return -EINVAL;
+ 
+-	/* node_to_cpumask() will now work */
+-	node_to_cpumask_map = map;
++	return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
++				      reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
+ }
+ 
+-void __cpuinit numa_set_node(int cpu, int node)
+-{
+-	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
+-
+-	if (cpu_pda(cpu) && node != NUMA_NO_NODE)
+-		cpu_pda(cpu)->nodenumber = node;
+-
+-	if (cpu_to_node_map)
+-		cpu_to_node_map[cpu] = node;
+-
+-	else if (per_cpu_offset(cpu))
+-		per_cpu(x86_cpu_to_node_map, cpu) = node;
+-
+-	else
+-		pr_debug("Setting node for non-present cpu %d\n", cpu);
+-}
++/*
++ * 4k page allocator
++ *
++ * This is the basic allocator.  Static percpu area is allocated
++ * page-by-page and most of initialization is done by the generic
++ * setup function.
++ */
++static struct page **pcpu4k_pages __initdata;
++static int pcpu4k_nr_static_pages __initdata;
+ 
+-void __cpuinit numa_clear_node(int cpu)
++static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
+ {
+-	numa_set_node(cpu, NUMA_NO_NODE);
++	if (pageno < pcpu4k_nr_static_pages)
++		return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
++	return NULL;
+ }
+ 
+-#ifndef CONFIG_DEBUG_PER_CPU_MAPS
+-
+-void __cpuinit numa_add_cpu(int cpu)
++static void __init pcpu4k_populate_pte(unsigned long addr)
+ {
+-	cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
++	populate_extra_pte(addr);
+ }
+ 
+-void __cpuinit numa_remove_cpu(int cpu)
++static ssize_t __init setup_pcpu_4k(size_t static_size)
+ {
+-	cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
+-}
++	size_t pages_size;
++	unsigned int cpu;
++	int i, j;
++	ssize_t ret;
+ 
+-#else /* CONFIG_DEBUG_PER_CPU_MAPS */
++	pcpu4k_nr_static_pages = PFN_UP(static_size);
+ 
+-/*
+- * --------- debug versions of the numa functions ---------
+- */
+-static void __cpuinit numa_set_cpumask(int cpu, int enable)
+-{
+-	int node = cpu_to_node(cpu);
+-	cpumask_t *mask;
+-	char buf[64];
+-
+-	if (node_to_cpumask_map == NULL) {
+-		printk(KERN_ERR "node_to_cpumask_map NULL\n");
+-		dump_stack();
+-		return;
+-	}
++	/* unaligned allocations can't be freed, round up to page size */
++	pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * num_possible_cpus()
++			       * sizeof(pcpu4k_pages[0]));
++	pcpu4k_pages = alloc_bootmem(pages_size);
+ 
+-	mask = &node_to_cpumask_map[node];
+-	if (enable)
+-		cpu_set(cpu, *mask);
+-	else
+-		cpu_clear(cpu, *mask);
++	/* allocate and copy */
++	j = 0;
++	for_each_possible_cpu(cpu)
++		for (i = 0; i < pcpu4k_nr_static_pages; i++) {
++			void *ptr;
+ 
+-	cpulist_scnprintf(buf, sizeof(buf), mask);
+-	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
+-		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
+-}
++			ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
++			if (!ptr)
++				goto enomem;
+ 
+-void __cpuinit numa_add_cpu(int cpu)
+-{
+-	numa_set_cpumask(cpu, 1);
+-}
++			memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
++			pcpu4k_pages[j++] = virt_to_page(ptr);
++		}
+ 
+-void __cpuinit numa_remove_cpu(int cpu)
+-{
+-	numa_set_cpumask(cpu, 0);
++	/* we're ready, commit */
++	pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
++		pcpu4k_nr_static_pages, static_size);
++
++	ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
++				     PERCPU_FIRST_CHUNK_RESERVE, -1,
++				     -1, NULL, pcpu4k_populate_pte);
++	goto out_free_ar;
++
++enomem:
++	while (--j >= 0)
++		free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE);
++	ret = -ENOMEM;
++out_free_ar:
++	free_bootmem(__pa(pcpu4k_pages), pages_size);
++	return ret;
+ }
+ 
+-int cpu_to_node(int cpu)
++static inline void setup_percpu_segment(int cpu)
+ {
+-	if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
+-		printk(KERN_WARNING
+-			"cpu_to_node(%d): usage too early!\n", cpu);
+-		dump_stack();
+-		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+-	}
+-	return per_cpu(x86_cpu_to_node_map, cpu);
++#ifdef CONFIG_X86_32
++	struct desc_struct gdt;
++
++	pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
++			0x2 | DESCTYPE_S, 0x8);
++	gdt.s = 1;
++	write_gdt_entry(get_cpu_gdt_table(cpu),
++			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
++#endif
+ }
+-EXPORT_SYMBOL(cpu_to_node);
+ 
+ /*
+- * Same function as cpu_to_node() but used if called before the
+- * per_cpu areas are setup.
++ * Great future plan:
++ * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
++ * Always point %gs to its beginning
+  */
+-int early_cpu_to_node(int cpu)
++void __init setup_per_cpu_areas(void)
+ {
+-	if (early_per_cpu_ptr(x86_cpu_to_node_map))
+-		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
+-
+-	if (!per_cpu_offset(cpu)) {
+-		printk(KERN_WARNING
+-			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
+-		dump_stack();
+-		return NUMA_NO_NODE;
+-	}
+-	return per_cpu(x86_cpu_to_node_map, cpu);
+-}
++	size_t static_size = __per_cpu_end - __per_cpu_start;
++	unsigned int cpu;
++	unsigned long delta;
++	size_t pcpu_unit_size;
++	ssize_t ret;
+ 
++	pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
++		NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
+ 
+-/* empty cpumask */
+-static const cpumask_t cpu_mask_none;
++	/*
++	 * Allocate percpu area.  If PSE is supported, try to make use
++	 * of large page mappings.  Please read comments on top of
++	 * each allocator for details.
++	 */
++	ret = setup_pcpu_remap(static_size);
++	if (ret < 0)
++		ret = setup_pcpu_embed(static_size);
++	if (ret < 0)
++		ret = setup_pcpu_4k(static_size);
++	if (ret < 0)
++		panic("cannot allocate static percpu area (%zu bytes, err=%zd)",
++		      static_size, ret);
+ 
+-/*
+- * Returns a pointer to the bitmask of CPUs on Node 'node'.
+- */
+-const cpumask_t *cpumask_of_node(int node)
+-{
+-	if (node_to_cpumask_map == NULL) {
+-		printk(KERN_WARNING
+-			"cpumask_of_node(%d): no node_to_cpumask_map!\n",
+-			node);
+-		dump_stack();
+-		return (const cpumask_t *)&cpu_online_map;
+-	}
+-	if (node >= nr_node_ids) {
+-		printk(KERN_WARNING
+-			"cpumask_of_node(%d): node > nr_node_ids(%d)\n",
+-			node, nr_node_ids);
+-		dump_stack();
+-		return &cpu_mask_none;
+-	}
+-	return &node_to_cpumask_map[node];
+-}
+-EXPORT_SYMBOL(cpumask_of_node);
++	pcpu_unit_size = ret;
+ 
+-/*
+- * Returns a bitmask of CPUs on Node 'node'.
+- *
+- * Side note: this function creates the returned cpumask on the stack
+- * so with a high NR_CPUS count, excessive stack space is used.  The
+- * node_to_cpumask_ptr function should be used whenever possible.
+- */
+-cpumask_t node_to_cpumask(int node)
+-{
+-	if (node_to_cpumask_map == NULL) {
+-		printk(KERN_WARNING
+-			"node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
+-		dump_stack();
+-		return cpu_online_map;
+-	}
+-	if (node >= nr_node_ids) {
+-		printk(KERN_WARNING
+-			"node_to_cpumask(%d): node > nr_node_ids(%d)\n",
+-			node, nr_node_ids);
+-		dump_stack();
+-		return cpu_mask_none;
++	/* alrighty, percpu areas up and running */
++	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
++	for_each_possible_cpu(cpu) {
++		per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size;
++		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
++		per_cpu(cpu_number, cpu) = cpu;
++		setup_percpu_segment(cpu);
++		setup_stack_canary_segment(cpu);
++		/*
++		 * Copy data used in early init routines from the
++		 * initial arrays to the per cpu data areas.  These
++		 * arrays then become expendable and the *_early_ptr's
++		 * are zeroed indicating that the static arrays are
++		 * gone.
++		 */
++#ifdef CONFIG_X86_LOCAL_APIC
++		per_cpu(x86_cpu_to_apicid, cpu) =
++			early_per_cpu_map(x86_cpu_to_apicid, cpu);
++		per_cpu(x86_bios_cpu_apicid, cpu) =
++			early_per_cpu_map(x86_bios_cpu_apicid, cpu);
++#endif
++#ifdef CONFIG_X86_64
++		per_cpu(irq_stack_ptr, cpu) =
++			per_cpu(irq_stack_union.irq_stack, cpu) +
++			IRQ_STACK_SIZE - 64;
++#ifdef CONFIG_NUMA
++		per_cpu(x86_cpu_to_node_map, cpu) =
++			early_per_cpu_map(x86_cpu_to_node_map, cpu);
++#endif
++#endif
++		/*
++		 * Up to this point, the boot CPU has been using .data.init
++		 * area.  Reload any changed state for the boot CPU.
++		 */
++		if (cpu == boot_cpu_id)
++			switch_to_new_gdt(cpu);
+ 	}
+-	return node_to_cpumask_map[node];
+-}
+-EXPORT_SYMBOL(node_to_cpumask);
+ 
+-/*
+- * --------- end of debug versions of the numa functions ---------
+- */
+-
+-#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
++	/* indicate the early static arrays will soon be gone */
++#ifdef CONFIG_X86_LOCAL_APIC
++	early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
++	early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
++#endif
++#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
++	early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
++#endif
+ 
+-#endif /* X86_64_NUMA */
++	/* Setup node to cpumask map */
++	setup_node_to_cpumask_map();
+ 
++	/* Setup cpu initialized, callin, callout masks */
++	setup_cpu_local_masks();
++}
+Index: linux-2.6-tip/arch/x86/kernel/signal.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/signal.c
++++ linux-2.6-tip/arch/x86/kernel/signal.c
+@@ -6,7 +6,7 @@
+  *  2000-06-20  Pentium III FXSR, SSE support by Gareth Hughes
+  *  2000-2002   x86-64 support by Andi Kleen
+  */
+-
++#include <linux/perf_counter.h>
+ #include <linux/sched.h>
+ #include <linux/mm.h>
+ #include <linux/smp.h>
+@@ -50,27 +50,23 @@
+ # define FIX_EFLAGS	__FIX_EFLAGS
+ #endif
+ 
+-#define COPY(x)			{		\
+-	err |= __get_user(regs->x, &sc->x);	\
+-}
+-
+-#define COPY_SEG(seg)		{			\
+-		unsigned short tmp;			\
+-		err |= __get_user(tmp, &sc->seg);	\
+-		regs->seg = tmp;			\
+-}
+-
+-#define COPY_SEG_CPL3(seg)	{			\
+-		unsigned short tmp;			\
+-		err |= __get_user(tmp, &sc->seg);	\
+-		regs->seg = tmp | 3;			\
+-}
+-
+-#define GET_SEG(seg)		{			\
+-		unsigned short tmp;			\
+-		err |= __get_user(tmp, &sc->seg);	\
+-		loadsegment(seg, tmp);			\
+-}
++#define COPY(x)			do {			\
++	get_user_ex(regs->x, &sc->x);			\
++} while (0)
++
++#define GET_SEG(seg)		({			\
++	unsigned short tmp;				\
++	get_user_ex(tmp, &sc->seg);			\
++	tmp;						\
++})
++
++#define COPY_SEG(seg)		do {			\
++	regs->seg = GET_SEG(seg);			\
++} while (0)
++
++#define COPY_SEG_CPL3(seg)	do {			\
++	regs->seg = GET_SEG(seg) | 3;			\
++} while (0)
+ 
+ static int
+ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
+@@ -83,45 +79,49 @@ restore_sigcontext(struct pt_regs *regs,
+ 	/* Always make any pending restarted system calls return -EINTR */
+ 	current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ 
++	get_user_try {
++
+ #ifdef CONFIG_X86_32
+-	GET_SEG(gs);
+-	COPY_SEG(fs);
+-	COPY_SEG(es);
+-	COPY_SEG(ds);
++		set_user_gs(regs, GET_SEG(gs));
++		COPY_SEG(fs);
++		COPY_SEG(es);
++		COPY_SEG(ds);
+ #endif /* CONFIG_X86_32 */
+ 
+-	COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
+-	COPY(dx); COPY(cx); COPY(ip);
++		COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
++		COPY(dx); COPY(cx); COPY(ip);
+ 
+ #ifdef CONFIG_X86_64
+-	COPY(r8);
+-	COPY(r9);
+-	COPY(r10);
+-	COPY(r11);
+-	COPY(r12);
+-	COPY(r13);
+-	COPY(r14);
+-	COPY(r15);
++		COPY(r8);
++		COPY(r9);
++		COPY(r10);
++		COPY(r11);
++		COPY(r12);
++		COPY(r13);
++		COPY(r14);
++		COPY(r15);
+ #endif /* CONFIG_X86_64 */
+ 
+ #ifdef CONFIG_X86_32
+-	COPY_SEG_CPL3(cs);
+-	COPY_SEG_CPL3(ss);
++		COPY_SEG_CPL3(cs);
++		COPY_SEG_CPL3(ss);
+ #else /* !CONFIG_X86_32 */
+-	/* Kernel saves and restores only the CS segment register on signals,
+-	 * which is the bare minimum needed to allow mixed 32/64-bit code.
+-	 * App's signal handler can save/restore other segments if needed. */
+-	COPY_SEG_CPL3(cs);
++		/* Kernel saves and restores only the CS segment register on signals,
++		 * which is the bare minimum needed to allow mixed 32/64-bit code.
++		 * App's signal handler can save/restore other segments if needed. */
++		COPY_SEG_CPL3(cs);
+ #endif /* CONFIG_X86_32 */
+ 
+-	err |= __get_user(tmpflags, &sc->flags);
+-	regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
+-	regs->orig_ax = -1;		/* disable syscall checks */
++		get_user_ex(tmpflags, &sc->flags);
++		regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
++		regs->orig_ax = -1;		/* disable syscall checks */
++
++		get_user_ex(buf, &sc->fpstate);
++		err |= restore_i387_xstate(buf);
+ 
+-	err |= __get_user(buf, &sc->fpstate);
+-	err |= restore_i387_xstate(buf);
++		get_user_ex(*pax, &sc->ax);
++	} get_user_catch(err);
+ 
+-	err |= __get_user(*pax, &sc->ax);
+ 	return err;
+ }
+ 
+@@ -131,57 +131,55 @@ setup_sigcontext(struct sigcontext __use
+ {
+ 	int err = 0;
+ 
+-#ifdef CONFIG_X86_32
+-	{
+-		unsigned int tmp;
++	put_user_try {
+ 
+-		savesegment(gs, tmp);
+-		err |= __put_user(tmp, (unsigned int __user *)&sc->gs);
+-	}
+-	err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs);
+-	err |= __put_user(regs->es, (unsigned int __user *)&sc->es);
+-	err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
+-#endif /* CONFIG_X86_32 */
+-
+-	err |= __put_user(regs->di, &sc->di);
+-	err |= __put_user(regs->si, &sc->si);
+-	err |= __put_user(regs->bp, &sc->bp);
+-	err |= __put_user(regs->sp, &sc->sp);
+-	err |= __put_user(regs->bx, &sc->bx);
+-	err |= __put_user(regs->dx, &sc->dx);
+-	err |= __put_user(regs->cx, &sc->cx);
+-	err |= __put_user(regs->ax, &sc->ax);
++#ifdef CONFIG_X86_32
++		put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
++		put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
++		put_user_ex(regs->es, (unsigned int __user *)&sc->es);
++		put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
++#endif /* CONFIG_X86_32 */
++
++		put_user_ex(regs->di, &sc->di);
++		put_user_ex(regs->si, &sc->si);
++		put_user_ex(regs->bp, &sc->bp);
++		put_user_ex(regs->sp, &sc->sp);
++		put_user_ex(regs->bx, &sc->bx);
++		put_user_ex(regs->dx, &sc->dx);
++		put_user_ex(regs->cx, &sc->cx);
++		put_user_ex(regs->ax, &sc->ax);
+ #ifdef CONFIG_X86_64
+-	err |= __put_user(regs->r8, &sc->r8);
+-	err |= __put_user(regs->r9, &sc->r9);
+-	err |= __put_user(regs->r10, &sc->r10);
+-	err |= __put_user(regs->r11, &sc->r11);
+-	err |= __put_user(regs->r12, &sc->r12);
+-	err |= __put_user(regs->r13, &sc->r13);
+-	err |= __put_user(regs->r14, &sc->r14);
+-	err |= __put_user(regs->r15, &sc->r15);
++		put_user_ex(regs->r8, &sc->r8);
++		put_user_ex(regs->r9, &sc->r9);
++		put_user_ex(regs->r10, &sc->r10);
++		put_user_ex(regs->r11, &sc->r11);
++		put_user_ex(regs->r12, &sc->r12);
++		put_user_ex(regs->r13, &sc->r13);
++		put_user_ex(regs->r14, &sc->r14);
++		put_user_ex(regs->r15, &sc->r15);
+ #endif /* CONFIG_X86_64 */
+ 
+-	err |= __put_user(current->thread.trap_no, &sc->trapno);
+-	err |= __put_user(current->thread.error_code, &sc->err);
+-	err |= __put_user(regs->ip, &sc->ip);
+-#ifdef CONFIG_X86_32
+-	err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs);
+-	err |= __put_user(regs->flags, &sc->flags);
+-	err |= __put_user(regs->sp, &sc->sp_at_signal);
+-	err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
++		put_user_ex(current->thread.trap_no, &sc->trapno);
++		put_user_ex(current->thread.error_code, &sc->err);
++		put_user_ex(regs->ip, &sc->ip);
++#ifdef CONFIG_X86_32
++		put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
++		put_user_ex(regs->flags, &sc->flags);
++		put_user_ex(regs->sp, &sc->sp_at_signal);
++		put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
+ #else /* !CONFIG_X86_32 */
+-	err |= __put_user(regs->flags, &sc->flags);
+-	err |= __put_user(regs->cs, &sc->cs);
+-	err |= __put_user(0, &sc->gs);
+-	err |= __put_user(0, &sc->fs);
++		put_user_ex(regs->flags, &sc->flags);
++		put_user_ex(regs->cs, &sc->cs);
++		put_user_ex(0, &sc->gs);
++		put_user_ex(0, &sc->fs);
+ #endif /* CONFIG_X86_32 */
+ 
+-	err |= __put_user(fpstate, &sc->fpstate);
+-
+-	/* non-iBCS2 extensions.. */
+-	err |= __put_user(mask, &sc->oldmask);
+-	err |= __put_user(current->thread.cr2, &sc->cr2);
++		put_user_ex(fpstate, &sc->fpstate);
++
++		/* non-iBCS2 extensions.. */
++		put_user_ex(mask, &sc->oldmask);
++		put_user_ex(current->thread.cr2, &sc->cr2);
++	} put_user_catch(err);
+ 
+ 	return err;
+ }
+@@ -189,6 +187,77 @@ setup_sigcontext(struct sigcontext __use
+ /*
+  * Set up a signal frame.
+  */
++
++/*
++ * Determine which stack to use..
++ */
++static unsigned long align_sigframe(unsigned long sp)
++{
++#ifdef CONFIG_X86_32
++	/*
++	 * Align the stack pointer according to the i386 ABI,
++	 * i.e. so that on function entry ((sp + 4) & 15) == 0.
++	 */
++	sp = ((sp + 4) & -16ul) - 4;
++#else /* !CONFIG_X86_32 */
++	sp = round_down(sp, 16) - 8;
++#endif
++	return sp;
++}
++
++static inline void __user *
++get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
++	     void __user **fpstate)
++{
++	/* Default to using normal stack */
++	unsigned long sp = regs->sp;
++	int onsigstack = on_sig_stack(sp);
++
++#ifdef CONFIG_X86_64
++	/* redzone */
++	sp -= 128;
++#endif /* CONFIG_X86_64 */
++
++	if (!onsigstack) {
++		/* This is the X/Open sanctioned signal stack switching.  */
++		if (ka->sa.sa_flags & SA_ONSTACK) {
++			if (sas_ss_flags(sp) == 0)
++				sp = current->sas_ss_sp + current->sas_ss_size;
++		} else {
++#ifdef CONFIG_X86_32
++			/* This is the legacy signal stack switching. */
++			if ((regs->ss & 0xffff) != __USER_DS &&
++				!(ka->sa.sa_flags & SA_RESTORER) &&
++					ka->sa.sa_restorer)
++				sp = (unsigned long) ka->sa.sa_restorer;
++#endif /* CONFIG_X86_32 */
++		}
++	}
++
++	if (used_math()) {
++		sp -= sig_xstate_size;
++#ifdef CONFIG_X86_64
++		sp = round_down(sp, 64);
++#endif /* CONFIG_X86_64 */
++		*fpstate = (void __user *)sp;
++	}
++
++	sp = align_sigframe(sp - frame_size);
++
++	/*
++	 * If we are on the alternate signal stack and would overflow it, don't.
++	 * Return an always-bogus address instead so we will die with SIGSEGV.
++	 */
++	if (onsigstack && !likely(on_sig_stack(sp)))
++		return (void __user *)-1L;
++
++	/* save i387 state */
++	if (used_math() && save_i387_xstate(*fpstate) < 0)
++		return (void __user *)-1L;
++
++	return (void __user *)sp;
++}
++
+ #ifdef CONFIG_X86_32
+ static const struct {
+ 	u16 poplmovl;
+@@ -212,54 +281,6 @@ static const struct {
+ 	0
+ };
+ 
+-/*
+- * Determine which stack to use..
+- */
+-static inline void __user *
+-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
+-	     void **fpstate)
+-{
+-	unsigned long sp;
+-
+-	/* Default to using normal stack */
+-	sp = regs->sp;
+-
+-	/*
+-	 * If we are on the alternate signal stack and would overflow it, don't.
+-	 * Return an always-bogus address instead so we will die with SIGSEGV.
+-	 */
+-	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - frame_size)))
+-		return (void __user *) -1L;
+-
+-	/* This is the X/Open sanctioned signal stack switching.  */
+-	if (ka->sa.sa_flags & SA_ONSTACK) {
+-		if (sas_ss_flags(sp) == 0)
+-			sp = current->sas_ss_sp + current->sas_ss_size;
+-	} else {
+-		/* This is the legacy signal stack switching. */
+-		if ((regs->ss & 0xffff) != __USER_DS &&
+-			!(ka->sa.sa_flags & SA_RESTORER) &&
+-				ka->sa.sa_restorer)
+-			sp = (unsigned long) ka->sa.sa_restorer;
+-	}
+-
+-	if (used_math()) {
+-		sp = sp - sig_xstate_size;
+-		*fpstate = (struct _fpstate *) sp;
+-		if (save_i387_xstate(*fpstate) < 0)
+-			return (void __user *)-1L;
+-	}
+-
+-	sp -= frame_size;
+-	/*
+-	 * Align the stack pointer according to the i386 ABI,
+-	 * i.e. so that on function entry ((sp + 4) & 15) == 0.
+-	 */
+-	sp = ((sp + 4) & -16ul) - 4;
+-
+-	return (void __user *) sp;
+-}
+-
+ static int
+ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
+ 	      struct pt_regs *regs)
+@@ -336,43 +357,41 @@ static int __setup_rt_frame(int sig, str
+ 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ 		return -EFAULT;
+ 
+-	err |= __put_user(sig, &frame->sig);
+-	err |= __put_user(&frame->info, &frame->pinfo);
+-	err |= __put_user(&frame->uc, &frame->puc);
+-	err |= copy_siginfo_to_user(&frame->info, info);
+-	if (err)
+-		return -EFAULT;
+-
+-	/* Create the ucontext.  */
+-	if (cpu_has_xsave)
+-		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+-	else
+-		err |= __put_user(0, &frame->uc.uc_flags);
+-	err |= __put_user(0, &frame->uc.uc_link);
+-	err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+-	err |= __put_user(sas_ss_flags(regs->sp),
+-			  &frame->uc.uc_stack.ss_flags);
+-	err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
+-	err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
+-				regs, set->sig[0]);
+-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+-	if (err)
+-		return -EFAULT;
+-
+-	/* Set up to return from userspace.  */
+-	restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
+-	if (ka->sa.sa_flags & SA_RESTORER)
+-		restorer = ka->sa.sa_restorer;
+-	err |= __put_user(restorer, &frame->pretcode);
++	put_user_try {
++		put_user_ex(sig, &frame->sig);
++		put_user_ex(&frame->info, &frame->pinfo);
++		put_user_ex(&frame->uc, &frame->puc);
++		err |= copy_siginfo_to_user(&frame->info, info);
++
++		/* Create the ucontext.  */
++		if (cpu_has_xsave)
++			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
++		else
++			put_user_ex(0, &frame->uc.uc_flags);
++		put_user_ex(0, &frame->uc.uc_link);
++		put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
++		put_user_ex(sas_ss_flags(regs->sp),
++			    &frame->uc.uc_stack.ss_flags);
++		put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
++		err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
++					regs, set->sig[0]);
++		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
++
++		/* Set up to return from userspace.  */
++		restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
++		if (ka->sa.sa_flags & SA_RESTORER)
++			restorer = ka->sa.sa_restorer;
++		put_user_ex(restorer, &frame->pretcode);
+ 
+-	/*
+-	 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
+-	 *
+-	 * WE DO NOT USE IT ANY MORE! It's only left here for historical
+-	 * reasons and because gdb uses it as a signature to notice
+-	 * signal handler stack frames.
+-	 */
+-	err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
++		/*
++		 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
++		 *
++		 * WE DO NOT USE IT ANY MORE! It's only left here for historical
++		 * reasons and because gdb uses it as a signature to notice
++		 * signal handler stack frames.
++		 */
++		put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
++	} put_user_catch(err);
+ 
+ 	if (err)
+ 		return -EFAULT;
+@@ -392,24 +411,6 @@ static int __setup_rt_frame(int sig, str
+ 	return 0;
+ }
+ #else /* !CONFIG_X86_32 */
+-/*
+- * Determine which stack to use..
+- */
+-static void __user *
+-get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size)
+-{
+-	/* Default to using normal stack - redzone*/
+-	sp -= 128;
+-
+-	/* This is the X/Open sanctioned signal stack switching.  */
+-	if (ka->sa.sa_flags & SA_ONSTACK) {
+-		if (sas_ss_flags(sp) == 0)
+-			sp = current->sas_ss_sp + current->sas_ss_size;
+-	}
+-
+-	return (void __user *)round_down(sp - size, 64);
+-}
+-
+ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
+ 			    sigset_t *set, struct pt_regs *regs)
+ {
+@@ -418,15 +419,7 @@ static int __setup_rt_frame(int sig, str
+ 	int err = 0;
+ 	struct task_struct *me = current;
+ 
+-	if (used_math()) {
+-		fp = get_stack(ka, regs->sp, sig_xstate_size);
+-		frame = (void __user *)round_down(
+-			(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
+-
+-		if (save_i387_xstate(fp) < 0)
+-			return -EFAULT;
+-	} else
+-		frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8;
++	frame = get_sigframe(ka, regs, sizeof(struct rt_sigframe), &fp);
+ 
+ 	if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
+ 		return -EFAULT;
+@@ -436,28 +429,30 @@ static int __setup_rt_frame(int sig, str
+ 			return -EFAULT;
+ 	}
+ 
+-	/* Create the ucontext.  */
+-	if (cpu_has_xsave)
+-		err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
+-	else
+-		err |= __put_user(0, &frame->uc.uc_flags);
+-	err |= __put_user(0, &frame->uc.uc_link);
+-	err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
+-	err |= __put_user(sas_ss_flags(regs->sp),
+-			  &frame->uc.uc_stack.ss_flags);
+-	err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
+-	err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
+-	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+-
+-	/* Set up to return from userspace.  If provided, use a stub
+-	   already in userspace.  */
+-	/* x86-64 should always use SA_RESTORER. */
+-	if (ka->sa.sa_flags & SA_RESTORER) {
+-		err |= __put_user(ka->sa.sa_restorer, &frame->pretcode);
+-	} else {
+-		/* could use a vstub here */
+-		return -EFAULT;
+-	}
++	put_user_try {
++		/* Create the ucontext.  */
++		if (cpu_has_xsave)
++			put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
++		else
++			put_user_ex(0, &frame->uc.uc_flags);
++		put_user_ex(0, &frame->uc.uc_link);
++		put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
++		put_user_ex(sas_ss_flags(regs->sp),
++			    &frame->uc.uc_stack.ss_flags);
++		put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
++		err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
++		err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
++
++		/* Set up to return from userspace.  If provided, use a stub
++		   already in userspace.  */
++		/* x86-64 should always use SA_RESTORER. */
++		if (ka->sa.sa_flags & SA_RESTORER) {
++			put_user_ex(ka->sa.sa_restorer, &frame->pretcode);
++		} else {
++			/* could use a vstub here */
++			err |= -EFAULT;
++		}
++	} put_user_catch(err);
+ 
+ 	if (err)
+ 		return -EFAULT;
+@@ -509,31 +504,41 @@ sys_sigaction(int sig, const struct old_
+ 	      struct old_sigaction __user *oact)
+ {
+ 	struct k_sigaction new_ka, old_ka;
+-	int ret;
++	int ret = 0;
+ 
+ 	if (act) {
+ 		old_sigset_t mask;
+ 
+-		if (!access_ok(VERIFY_READ, act, sizeof(*act)) ||
+-		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+-		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
++		if (!access_ok(VERIFY_READ, act, sizeof(*act)))
+ 			return -EFAULT;
+ 
+-		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+-		__get_user(mask, &act->sa_mask);
++		get_user_try {
++			get_user_ex(new_ka.sa.sa_handler, &act->sa_handler);
++			get_user_ex(new_ka.sa.sa_flags, &act->sa_flags);
++			get_user_ex(mask, &act->sa_mask);
++			get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer);
++		} get_user_catch(ret);
++
++		if (ret)
++			return -EFAULT;
+ 		siginitset(&new_ka.sa.sa_mask, mask);
+ 	}
+ 
+ 	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+ 
+ 	if (!ret && oact) {
+-		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) ||
+-		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+-		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
++		if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)))
+ 			return -EFAULT;
+ 
+-		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+-		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
++		put_user_try {
++			put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler);
++			put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags);
++			put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
++			put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer);
++		} put_user_catch(ret);
++
++		if (ret)
++			return -EFAULT;
+ 	}
+ 
+ 	return ret;
+@@ -541,14 +546,9 @@ sys_sigaction(int sig, const struct old_
+ #endif /* CONFIG_X86_32 */
+ 
+ #ifdef CONFIG_X86_32
+-asmlinkage int sys_sigaltstack(unsigned long bx)
++int sys_sigaltstack(struct pt_regs *regs)
+ {
+-	/*
+-	 * This is needed to make gcc realize it doesn't own the
+-	 * "struct pt_regs"
+-	 */
+-	struct pt_regs *regs = (struct pt_regs *)&bx;
+-	const stack_t __user *uss = (const stack_t __user *)bx;
++	const stack_t __user *uss = (const stack_t __user *)regs->bx;
+ 	stack_t __user *uoss = (stack_t __user *)regs->cx;
+ 
+ 	return do_sigaltstack(uss, uoss, regs->sp);
+@@ -566,14 +566,12 @@ sys_sigaltstack(const stack_t __user *us
+  * Do a signal return; undo the signal stack.
+  */
+ #ifdef CONFIG_X86_32
+-asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
++unsigned long sys_sigreturn(struct pt_regs *regs)
+ {
+ 	struct sigframe __user *frame;
+-	struct pt_regs *regs;
+ 	unsigned long ax;
+ 	sigset_t set;
+ 
+-	regs = (struct pt_regs *) &__unused;
+ 	frame = (struct sigframe __user *)(regs->sp - 8);
+ 
+ 	if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
+@@ -600,7 +598,7 @@ badframe:
+ }
+ #endif /* CONFIG_X86_32 */
+ 
+-static long do_rt_sigreturn(struct pt_regs *regs)
++long sys_rt_sigreturn(struct pt_regs *regs)
+ {
+ 	struct rt_sigframe __user *frame;
+ 	unsigned long ax;
+@@ -631,25 +629,6 @@ badframe:
+ 	return 0;
+ }
+ 
+-#ifdef CONFIG_X86_32
+-/*
+- * Note: do not pass in pt_regs directly as with tail-call optimization
+- * GCC will incorrectly stomp on the caller's frame and corrupt user-space
+- * register state:
+- */
+-asmlinkage int sys_rt_sigreturn(unsigned long __unused)
+-{
+-	struct pt_regs *regs = (struct pt_regs *)&__unused;
+-
+-	return do_rt_sigreturn(regs);
+-}
+-#else /* !CONFIG_X86_32 */
+-asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
+-{
+-	return do_rt_sigreturn(regs);
+-}
+-#endif /* CONFIG_X86_32 */
+-
+ /*
+  * OK, we're invoking a handler:
+  */
+@@ -804,6 +783,13 @@ static void do_signal(struct pt_regs *re
+ 	int signr;
+ 	sigset_t *oldset;
+ 
++#ifdef CONFIG_PREEMPT_RT
++	/*
++	 * Fully-preemptible kernel does not need interrupts disabled:
++	 */
++	local_irq_enable();
++	preempt_check_resched();
++#endif
+ 	/*
+ 	 * We want the common case to go fast, which is why we may in certain
+ 	 * cases get here from kernel mode. Just return without doing anything
+@@ -893,6 +879,11 @@ do_notify_resume(struct pt_regs *regs, v
+ 		tracehook_notify_resume(regs);
+ 	}
+ 
++	if (thread_info_flags & _TIF_PERF_COUNTERS) {
++		clear_thread_flag(TIF_PERF_COUNTERS);
++		perf_counter_notify(regs);
++	}
++
+ #ifdef CONFIG_X86_32
+ 	clear_thread_flag(TIF_IRET);
+ #endif /* CONFIG_X86_32 */
+Index: linux-2.6-tip/arch/x86/kernel/smp.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/smp.c
++++ linux-2.6-tip/arch/x86/kernel/smp.c
+@@ -2,7 +2,7 @@
+  *	Intel SMP support routines.
+  *
+  *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
+- *	(c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
++ *	(c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com>
+  *      (c) 2002,2003 Andi Kleen, SuSE Labs.
+  *
+  *	i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
+@@ -26,8 +26,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+ #include <asm/proto.h>
+-#include <mach_ipi.h>
+-#include <mach_apic.h>
++#include <asm/apic.h>
+ /*
+  *	Some notes on x86 processor bugs affecting SMP operation:
+  *
+@@ -118,12 +117,22 @@ static void native_smp_send_reschedule(i
+ 		WARN_ON(1);
+ 		return;
+ 	}
+-	send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
++	apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
++}
++
++/*
++ * this function sends a 'reschedule' IPI to all other CPUs.
++ * This is used when RT tasks are starving and other CPUs
++ * might be able to run them:
++ */
++void smp_send_reschedule_allbutself(void)
++{
++	apic->send_IPI_allbutself(RESCHEDULE_VECTOR);
+ }
+ 
+ void native_send_call_func_single_ipi(int cpu)
+ {
+-	send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
++	apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
+ }
+ 
+ void native_send_call_func_ipi(const struct cpumask *mask)
+@@ -131,7 +140,7 @@ void native_send_call_func_ipi(const str
+ 	cpumask_var_t allbutself;
+ 
+ 	if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) {
+-		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
++		apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+ 		return;
+ 	}
+ 
+@@ -140,9 +149,9 @@ void native_send_call_func_ipi(const str
+ 
+ 	if (cpumask_equal(mask, allbutself) &&
+ 	    cpumask_equal(cpu_online_mask, cpu_callout_mask))
+-		send_IPI_allbutself(CALL_FUNCTION_VECTOR);
++		apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
+ 	else
+-		send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
++		apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
+ 
+ 	free_cpumask_var(allbutself);
+ }
+Index: linux-2.6-tip/arch/x86/kernel/smpboot.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/smpboot.c
++++ linux-2.6-tip/arch/x86/kernel/smpboot.c
+@@ -2,7 +2,7 @@
+  *	x86 SMP booting functions
+  *
+  *	(c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
+- *	(c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
++ *	(c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
+  *	Copyright 2001 Andi Kleen, SuSE Labs.
+  *
+  *	Much of the core SMP work is based on previous work by Thomas Radke, to
+@@ -53,7 +53,6 @@
+ #include <asm/nmi.h>
+ #include <asm/irq.h>
+ #include <asm/idle.h>
+-#include <asm/smp.h>
+ #include <asm/trampoline.h>
+ #include <asm/cpu.h>
+ #include <asm/numa.h>
+@@ -61,13 +60,12 @@
+ #include <asm/tlbflush.h>
+ #include <asm/mtrr.h>
+ #include <asm/vmi.h>
+-#include <asm/genapic.h>
++#include <asm/apic.h>
+ #include <asm/setup.h>
++#include <asm/uv/uv.h>
+ #include <linux/mc146818rtc.h>
+ 
+-#include <mach_apic.h>
+-#include <mach_wakecpu.h>
+-#include <smpboot_hooks.h>
++#include <asm/smpboot_hooks.h>
+ 
+ #ifdef CONFIG_X86_32
+ u8 apicid_2_node[MAX_APICID];
+@@ -103,29 +101,20 @@ EXPORT_SYMBOL(smp_num_siblings);
+ DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID;
+ 
+ /* representing HT siblings of each logical CPU */
+-DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
++DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+ EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
+ 
+ /* representing HT and core siblings of each logical CPU */
+-DEFINE_PER_CPU(cpumask_t, cpu_core_map);
++DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
+ EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+ 
+ /* Per CPU bogomips and other parameters */
+ DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
+ EXPORT_PER_CPU_SYMBOL(cpu_info);
+ 
+-static atomic_t init_deasserted;
+-
+-
+-/* Set if we find a B stepping CPU */
+-static int __cpuinitdata smp_b_stepping;
++atomic_t init_deasserted;
+ 
+ #if defined(CONFIG_NUMA) && defined(CONFIG_X86_32)
+-
+-/* which logical CPUs are on which nodes */
+-cpumask_t node_to_cpumask_map[MAX_NUMNODES] __read_mostly =
+-				{ [0 ... MAX_NUMNODES-1] = CPU_MASK_NONE };
+-EXPORT_SYMBOL(node_to_cpumask_map);
+ /* which node each logical CPU is on */
+ int cpu_to_node_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = 0 };
+ EXPORT_SYMBOL(cpu_to_node_map);
+@@ -134,7 +123,7 @@ EXPORT_SYMBOL(cpu_to_node_map);
+ static void map_cpu_to_node(int cpu, int node)
+ {
+ 	printk(KERN_INFO "Mapping cpu %d to node %d\n", cpu, node);
+-	cpumask_set_cpu(cpu, &node_to_cpumask_map[node]);
++	cpumask_set_cpu(cpu, node_to_cpumask_map[node]);
+ 	cpu_to_node_map[cpu] = node;
+ }
+ 
+@@ -145,7 +134,7 @@ static void unmap_cpu_to_node(int cpu)
+ 
+ 	printk(KERN_INFO "Unmapping cpu %d from all nodes\n", cpu);
+ 	for (node = 0; node < MAX_NUMNODES; node++)
+-		cpumask_clear_cpu(cpu, &node_to_cpumask_map[node]);
++		cpumask_clear_cpu(cpu, node_to_cpumask_map[node]);
+ 	cpu_to_node_map[cpu] = 0;
+ }
+ #else /* !(CONFIG_NUMA && CONFIG_X86_32) */
+@@ -163,7 +152,7 @@ static void map_cpu_to_logical_apicid(vo
+ {
+ 	int cpu = smp_processor_id();
+ 	int apicid = logical_smp_processor_id();
+-	int node = apicid_to_node(apicid);
++	int node = apic->apicid_to_node(apicid);
+ 
+ 	if (!node_online(node))
+ 		node = first_online_node;
+@@ -196,7 +185,8 @@ static void __cpuinit smp_callin(void)
+ 	 * our local APIC.  We have to wait for the IPI or we'll
+ 	 * lock up on an APIC access.
+ 	 */
+-	wait_for_init_deassert(&init_deasserted);
++	if (apic->wait_for_init_deassert)
++		apic->wait_for_init_deassert(&init_deasserted);
+ 
+ 	/*
+ 	 * (This works even if the APIC is not enabled.)
+@@ -243,7 +233,8 @@ static void __cpuinit smp_callin(void)
+ 	 */
+ 
+ 	pr_debug("CALLIN, before setup_local_APIC().\n");
+-	smp_callin_clear_local_apic();
++	if (apic->smp_callin_clear_local_apic)
++		apic->smp_callin_clear_local_apic();
+ 	setup_local_APIC();
+ 	end_local_APIC_setup();
+ 	map_cpu_to_logical_apicid();
+@@ -271,8 +262,6 @@ static void __cpuinit smp_callin(void)
+ 	cpumask_set_cpu(cpuid, cpu_callin_mask);
+ }
+ 
+-static int __cpuinitdata unsafe_smp;
+-
+ /*
+  * Activate a secondary processor.
+  */
+@@ -307,7 +296,7 @@ notrace static void __cpuinit start_seco
+ 	__flush_tlb_all();
+ #endif
+ 
+-	/* This must be done before setting cpu_online_map */
++	/* This must be done before setting cpu_online_mask */
+ 	set_cpu_sibling_map(raw_smp_processor_id());
+ 	wmb();
+ 
+@@ -340,75 +329,22 @@ notrace static void __cpuinit start_seco
+ 	cpu_idle();
+ }
+ 
+-static void __cpuinit smp_apply_quirks(struct cpuinfo_x86 *c)
+-{
+-	/*
+-	 * Mask B, Pentium, but not Pentium MMX
+-	 */
+-	if (c->x86_vendor == X86_VENDOR_INTEL &&
+-	    c->x86 == 5 &&
+-	    c->x86_mask >= 1 && c->x86_mask <= 4 &&
+-	    c->x86_model <= 3)
+-		/*
+-		 * Remember we have B step Pentia with bugs
+-		 */
+-		smp_b_stepping = 1;
+-
+-	/*
+-	 * Certain Athlons might work (for various values of 'work') in SMP
+-	 * but they are not certified as MP capable.
+-	 */
+-	if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
+-
+-		if (num_possible_cpus() == 1)
+-			goto valid_k7;
+-
+-		/* Athlon 660/661 is valid. */
+-		if ((c->x86_model == 6) && ((c->x86_mask == 0) ||
+-		    (c->x86_mask == 1)))
+-			goto valid_k7;
+-
+-		/* Duron 670 is valid */
+-		if ((c->x86_model == 7) && (c->x86_mask == 0))
+-			goto valid_k7;
+-
+-		/*
+-		 * Athlon 662, Duron 671, and Athlon >model 7 have capability
+-		 * bit. It's worth noting that the A5 stepping (662) of some
+-		 * Athlon XP's have the MP bit set.
+-		 * See http://www.heise.de/newsticker/data/jow-18.10.01-000 for
+-		 * more.
+-		 */
+-		if (((c->x86_model == 6) && (c->x86_mask >= 2)) ||
+-		    ((c->x86_model == 7) && (c->x86_mask >= 1)) ||
+-		     (c->x86_model > 7))
+-			if (cpu_has_mp)
+-				goto valid_k7;
+-
+-		/* If we get here, not a certified SMP capable AMD system. */
+-		unsafe_smp = 1;
+-	}
+-
+-valid_k7:
+-	;
++#ifdef CONFIG_CPUMASK_OFFSTACK
++/* In this case, llc_shared_map is a pointer to a cpumask. */
++static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
++				    const struct cpuinfo_x86 *src)
++{
++	struct cpumask *llc = dst->llc_shared_map;
++	*dst = *src;
++	dst->llc_shared_map = llc;
+ }
+-
+-static void __cpuinit smp_checks(void)
++#else
++static inline void copy_cpuinfo_x86(struct cpuinfo_x86 *dst,
++				    const struct cpuinfo_x86 *src)
+ {
+-	if (smp_b_stepping)
+-		printk(KERN_WARNING "WARNING: SMP operation may be unreliable"
+-				    "with B stepping processors.\n");
+-
+-	/*
+-	 * Don't taint if we are running SMP kernel on a single non-MP
+-	 * approved Athlon
+-	 */
+-	if (unsafe_smp && num_online_cpus() > 1) {
+-		printk(KERN_INFO "WARNING: This combination of AMD"
+-			"processors is not suitable for SMP.\n");
+-		add_taint(TAINT_UNSAFE_SMP);
+-	}
++	*dst = *src;
+ }
++#endif /* CONFIG_CPUMASK_OFFSTACK */
+ 
+ /*
+  * The bootstrap kernel entry code has set these up. Save them for
+@@ -419,11 +355,10 @@ void __cpuinit smp_store_cpu_info(int id
+ {
+ 	struct cpuinfo_x86 *c = &cpu_data(id);
+ 
+-	*c = boot_cpu_data;
++	copy_cpuinfo_x86(c, &boot_cpu_data);
+ 	c->cpu_index = id;
+ 	if (id != 0)
+ 		identify_secondary_cpu(c);
+-	smp_apply_quirks(c);
+ }
+ 
+ 
+@@ -444,15 +379,15 @@ void __cpuinit set_cpu_sibling_map(int c
+ 				cpumask_set_cpu(cpu, cpu_sibling_mask(i));
+ 				cpumask_set_cpu(i, cpu_core_mask(cpu));
+ 				cpumask_set_cpu(cpu, cpu_core_mask(i));
+-				cpumask_set_cpu(i, &c->llc_shared_map);
+-				cpumask_set_cpu(cpu, &o->llc_shared_map);
++				cpumask_set_cpu(i, c->llc_shared_map);
++				cpumask_set_cpu(cpu, o->llc_shared_map);
+ 			}
+ 		}
+ 	} else {
+ 		cpumask_set_cpu(cpu, cpu_sibling_mask(cpu));
+ 	}
+ 
+-	cpumask_set_cpu(cpu, &c->llc_shared_map);
++	cpumask_set_cpu(cpu, c->llc_shared_map);
+ 
+ 	if (current_cpu_data.x86_max_cores == 1) {
+ 		cpumask_copy(cpu_core_mask(cpu), cpu_sibling_mask(cpu));
+@@ -463,8 +398,8 @@ void __cpuinit set_cpu_sibling_map(int c
+ 	for_each_cpu(i, cpu_sibling_setup_mask) {
+ 		if (per_cpu(cpu_llc_id, cpu) != BAD_APICID &&
+ 		    per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) {
+-			cpumask_set_cpu(i, &c->llc_shared_map);
+-			cpumask_set_cpu(cpu, &cpu_data(i).llc_shared_map);
++			cpumask_set_cpu(i, c->llc_shared_map);
++			cpumask_set_cpu(cpu, cpu_data(i).llc_shared_map);
+ 		}
+ 		if (c->phys_proc_id == cpu_data(i).phys_proc_id) {
+ 			cpumask_set_cpu(i, cpu_core_mask(cpu));
+@@ -502,12 +437,7 @@ const struct cpumask *cpu_coregroup_mask
+ 	if (sched_mc_power_savings || sched_smt_power_savings)
+ 		return cpu_core_mask(cpu);
+ 	else
+-		return &c->llc_shared_map;
+-}
+-
+-cpumask_t cpu_coregroup_map(int cpu)
+-{
+-	return *cpu_coregroup_mask(cpu);
++		return c->llc_shared_map;
+ }
+ 
+ static void impress_friends(void)
+@@ -583,7 +513,7 @@ wakeup_secondary_cpu_via_nmi(int logical
+ 	/* Target chip */
+ 	/* Boot on the stack */
+ 	/* Kick the second */
+-	apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
++	apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
+ 
+ 	pr_debug("Waiting for send to finish...\n");
+ 	send_status = safe_apic_wait_icr_idle();
+@@ -614,12 +544,6 @@ wakeup_secondary_cpu_via_init(int phys_a
+ 	unsigned long send_status, accept_status = 0;
+ 	int maxlvt, num_starts, j;
+ 
+-	if (get_uv_system_type() == UV_NON_UNIQUE_APIC) {
+-		send_status = uv_wakeup_secondary(phys_apicid, start_eip);
+-		atomic_set(&init_deasserted, 1);
+-		return send_status;
+-	}
+-
+ 	maxlvt = lapic_get_maxlvt();
+ 
+ 	/*
+@@ -745,78 +669,23 @@ static void __cpuinit do_fork_idle(struc
+ 	complete(&c_idle->done);
+ }
+ 
+-#ifdef CONFIG_X86_64
+-
+-/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
+-static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
+-{
+-	if (!after_bootmem)
+-		free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
+-}
+-
+-/*
+- * Allocate node local memory for the AP pda.
+- *
+- * Must be called after the _cpu_pda pointer table is initialized.
+- */
+-int __cpuinit get_local_pda(int cpu)
+-{
+-	struct x8664_pda *oldpda, *newpda;
+-	unsigned long size = sizeof(struct x8664_pda);
+-	int node = cpu_to_node(cpu);
+-
+-	if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
+-		return 0;
+-
+-	oldpda = cpu_pda(cpu);
+-	newpda = kmalloc_node(size, GFP_ATOMIC, node);
+-	if (!newpda) {
+-		printk(KERN_ERR "Could not allocate node local PDA "
+-			"for CPU %d on node %d\n", cpu, node);
+-
+-		if (oldpda)
+-			return 0;	/* have a usable pda */
+-		else
+-			return -1;
+-	}
+-
+-	if (oldpda) {
+-		memcpy(newpda, oldpda, size);
+-		free_bootmem_pda(oldpda);
+-	}
+-
+-	newpda->in_bootmem = 0;
+-	cpu_pda(cpu) = newpda;
+-	return 0;
+-}
+-#endif /* CONFIG_X86_64 */
+-
+-static int __cpuinit do_boot_cpu(int apicid, int cpu)
+ /*
+  * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
+  * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
+- * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu.
++ * Returns zero if CPU booted OK, else error code from
++ * ->wakeup_secondary_cpu.
+  */
++static int __cpuinit do_boot_cpu(int apicid, int cpu)
+ {
+ 	unsigned long boot_error = 0;
+-	int timeout;
+ 	unsigned long start_ip;
+-	unsigned short nmi_high = 0, nmi_low = 0;
++	int timeout;
+ 	struct create_idle c_idle = {
+-		.cpu = cpu,
+-		.done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
++		.cpu	= cpu,
++		.done	= COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
+ 	};
+-	INIT_WORK(&c_idle.work, do_fork_idle);
+ 
+-#ifdef CONFIG_X86_64
+-	/* Allocate node local memory for AP pdas */
+-	if (cpu > 0) {
+-		boot_error = get_local_pda(cpu);
+-		if (boot_error)
+-			goto restore_state;
+-			/* if can't get pda memory, can't start cpu */
+-	}
+-#endif
++	INIT_WORK(&c_idle.work, do_fork_idle);
+ 
+ 	alternatives_smp_switch(1);
+ 
+@@ -847,14 +716,16 @@ static int __cpuinit do_boot_cpu(int api
+ 
+ 	set_idle_for_cpu(cpu, c_idle.idle);
+ do_rest:
+-#ifdef CONFIG_X86_32
+ 	per_cpu(current_task, cpu) = c_idle.idle;
+-	init_gdt(cpu);
++#ifdef CONFIG_X86_32
+ 	/* Stack for startup_32 can be just as for start_secondary onwards */
+ 	irq_ctx_init(cpu);
+ #else
+-	cpu_pda(cpu)->pcurrent = c_idle.idle;
+ 	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
++	initial_gs = per_cpu_offset(cpu);
++	per_cpu(kernel_stack, cpu) =
++		(unsigned long)task_stack_page(c_idle.idle) -
++		KERNEL_STACK_OFFSET + THREAD_SIZE;
+ #endif
+ 	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+ 	initial_code = (unsigned long)start_secondary;
+@@ -878,8 +749,6 @@ do_rest:
+ 
+ 		pr_debug("Setting warm reset code and vector.\n");
+ 
+-		store_NMI_vector(&nmi_high, &nmi_low);
+-
+ 		smpboot_setup_warm_reset_vector(start_ip);
+ 		/*
+ 		 * Be paranoid about clearing APIC errors.
+@@ -891,9 +760,13 @@ do_rest:
+ 	}
+ 
+ 	/*
+-	 * Starting actual IPI sequence...
++	 * Kick the secondary CPU. Use the method in the APIC driver
++	 * if it's defined - or use an INIT boot APIC message otherwise:
+ 	 */
+-	boot_error = wakeup_secondary_cpu(apicid, start_ip);
++	if (apic->wakeup_secondary_cpu)
++		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
++	else
++		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);
+ 
+ 	if (!boot_error) {
+ 		/*
+@@ -927,13 +800,11 @@ do_rest:
+ 			else
+ 				/* trampoline code not run */
+ 				printk(KERN_ERR "Not responding.\n");
+-			if (get_uv_system_type() != UV_NON_UNIQUE_APIC)
+-				inquire_remote_apic(apicid);
++			if (apic->inquire_remote_apic)
++				apic->inquire_remote_apic(apicid);
+ 		}
+ 	}
+-#ifdef CONFIG_X86_64
+-restore_state:
+-#endif
++
+ 	if (boot_error) {
+ 		/* Try to put things back the way they were before ... */
+ 		numa_remove_cpu(cpu); /* was set by numa_add_cpu */
+@@ -961,7 +832,7 @@ restore_state:
+ 
+ int __cpuinit native_cpu_up(unsigned int cpu)
+ {
+-	int apicid = cpu_present_to_apicid(cpu);
++	int apicid = apic->cpu_present_to_apicid(cpu);
+ 	unsigned long flags;
+ 	int err;
+ 
+@@ -1033,9 +904,8 @@ int __cpuinit native_cpu_up(unsigned int
+  */
+ static __init void disable_smp(void)
+ {
+-	/* use the read/write pointers to the present and possible maps */
+-	cpumask_copy(&cpu_present_map, cpumask_of(0));
+-	cpumask_copy(&cpu_possible_map, cpumask_of(0));
++	init_cpu_present(cpumask_of(0));
++	init_cpu_possible(cpumask_of(0));
+ 	smpboot_clear_io_apic_irqs();
+ 
+ 	if (smp_found_config)
+@@ -1054,14 +924,14 @@ static int __init smp_sanity_check(unsig
+ {
+ 	preempt_disable();
+ 
+-#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32)
++#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
+ 	if (def_to_bigsmp && nr_cpu_ids > 8) {
+ 		unsigned int cpu;
+ 		unsigned nr;
+ 
+ 		printk(KERN_WARNING
+ 		       "More than 8 CPUs detected - skipping them.\n"
+-		       "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n");
++		       "Use CONFIG_X86_BIGSMP.\n");
+ 
+ 		nr = 0;
+ 		for_each_present_cpu(cpu) {
+@@ -1107,7 +977,7 @@ static int __init smp_sanity_check(unsig
+ 	 * Should not be necessary because the MP table should list the boot
+ 	 * CPU too, but we do it for the sake of robustness anyway.
+ 	 */
+-	if (!check_phys_apicid_present(boot_cpu_physical_apicid)) {
++	if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
+ 		printk(KERN_NOTICE
+ 			"weird, boot CPU (#%d) not listed by the BIOS.\n",
+ 			boot_cpu_physical_apicid);
+@@ -1125,6 +995,7 @@ static int __init smp_sanity_check(unsig
+ 		printk(KERN_ERR "... forcing use of dummy APIC emulation."
+ 				"(tell your hw vendor)\n");
+ 		smpboot_clear_io_apic();
++		arch_disable_smp_support();
+ 		return -1;
+ 	}
+ 
+@@ -1166,6 +1037,8 @@ static void __init smp_cpu_index_default
+  */
+ void __init native_smp_prepare_cpus(unsigned int max_cpus)
+ {
++	unsigned int i;
++
+ 	preempt_disable();
+ 	smp_cpu_index_default();
+ 	current_cpu_data = boot_cpu_data;
+@@ -1179,11 +1052,19 @@ void __init native_smp_prepare_cpus(unsi
+ 	boot_cpu_logical_apicid = logical_smp_processor_id();
+ #endif
+ 	current_thread_info()->cpu = 0;  /* needed? */
++	for_each_possible_cpu(i) {
++		alloc_cpumask_var(&per_cpu(cpu_sibling_map, i), GFP_KERNEL);
++		alloc_cpumask_var(&per_cpu(cpu_core_map, i), GFP_KERNEL);
++		alloc_cpumask_var(&cpu_data(i).llc_shared_map, GFP_KERNEL);
++		cpumask_clear(per_cpu(cpu_core_map, i));
++		cpumask_clear(per_cpu(cpu_sibling_map, i));
++		cpumask_clear(cpu_data(i).llc_shared_map);
++	}
+ 	set_cpu_sibling_map(0);
+ 
+-#ifdef CONFIG_X86_64
+ 	enable_IR_x2apic();
+-	setup_apic_routing();
++#ifdef CONFIG_X86_64
++	default_setup_apic_routing();
+ #endif
+ 
+ 	if (smp_sanity_check(max_cpus) < 0) {
+@@ -1207,18 +1088,18 @@ void __init native_smp_prepare_cpus(unsi
+ 	 */
+ 	setup_local_APIC();
+ 
+-#ifdef CONFIG_X86_64
+ 	/*
+ 	 * Enable IO APIC before setting up error vector
+ 	 */
+ 	if (!skip_ioapic_setup && nr_ioapics)
+ 		enable_IO_APIC();
+-#endif
++
+ 	end_local_APIC_setup();
+ 
+ 	map_cpu_to_logical_apicid();
+ 
+-	setup_portio_remap();
++	if (apic->setup_portio_remap)
++		apic->setup_portio_remap();
+ 
+ 	smpboot_setup_io_apic();
+ 	/*
+@@ -1240,10 +1121,7 @@ out:
+ void __init native_smp_prepare_boot_cpu(void)
+ {
+ 	int me = smp_processor_id();
+-#ifdef CONFIG_X86_32
+-	init_gdt(me);
+-#endif
+-	switch_to_new_gdt();
++	switch_to_new_gdt(me);
+ 	/* already set me in cpu_online_mask in boot_cpu_init() */
+ 	cpumask_set_cpu(me, cpu_callout_mask);
+ 	per_cpu(cpu_state, me) = CPU_ONLINE;
+@@ -1254,7 +1132,6 @@ void __init native_smp_cpus_done(unsigne
+ 	pr_debug("Boot done.\n");
+ 
+ 	impress_friends();
+-	smp_checks();
+ #ifdef CONFIG_X86_IO_APIC
+ 	setup_ioapic_dest();
+ #endif
+@@ -1271,11 +1148,11 @@ early_param("possible_cpus", _setup_poss
+ 
+ 
+ /*
+- * cpu_possible_map should be static, it cannot change as cpu's
++ * cpu_possible_mask should be static, it cannot change as cpu's
+  * are onlined, or offlined. The reason is per-cpu data-structures
+  * are allocated by some modules at init time, and dont expect to
+  * do this dynamically on cpu arrival/departure.
+- * cpu_present_map on the other hand can change dynamically.
++ * cpu_present_mask on the other hand can change dynamically.
+  * In case when cpu_hotplug is not compiled, then we resort to current
+  * behaviour, which is cpu_possible == cpu_present.
+  * - Ashok Raj
+Index: linux-2.6-tip/arch/x86/kernel/smpcommon.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/smpcommon.c
++++ /dev/null
+@@ -1,30 +0,0 @@
+-/*
+- * SMP stuff which is common to all sub-architectures.
+- */
+-#include <linux/module.h>
+-#include <asm/smp.h>
+-
+-#ifdef CONFIG_X86_32
+-DEFINE_PER_CPU(unsigned long, this_cpu_off);
+-EXPORT_PER_CPU_SYMBOL(this_cpu_off);
+-
+-/*
+- * Initialize the CPU's GDT.  This is either the boot CPU doing itself
+- * (still using the master per-cpu area), or a CPU doing it for a
+- * secondary which will soon come up.
+- */
+-__cpuinit void init_gdt(int cpu)
+-{
+-	struct desc_struct gdt;
+-
+-	pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
+-			0x2 | DESCTYPE_S, 0x8);
+-	gdt.s = 1;
+-
+-	write_gdt_entry(get_cpu_gdt_table(cpu),
+-			GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
+-
+-	per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
+-	per_cpu(cpu_number, cpu) = cpu;
+-}
+-#endif
+Index: linux-2.6-tip/arch/x86/kernel/stacktrace.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/stacktrace.c
++++ linux-2.6-tip/arch/x86/kernel/stacktrace.c
+@@ -1,7 +1,7 @@
+ /*
+  * Stack trace management functions
+  *
+- *  Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
++ *  Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+  */
+ #include <linux/sched.h>
+ #include <linux/stacktrace.h>
+@@ -77,6 +77,13 @@ void save_stack_trace(struct stack_trace
+ }
+ EXPORT_SYMBOL_GPL(save_stack_trace);
+ 
++void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp)
++{
++	dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace);
++	if (trace->nr_entries < trace->max_entries)
++		trace->entries[trace->nr_entries++] = ULONG_MAX;
++}
++
+ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+ {
+ 	dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace);
+Index: linux-2.6-tip/arch/x86/kernel/summit_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/summit_32.c
++++ /dev/null
+@@ -1,188 +0,0 @@
+-/*
+- * IBM Summit-Specific Code
+- *
+- * Written By: Matthew Dobson, IBM Corporation
+- *
+- * Copyright (c) 2003 IBM Corp.
+- *
+- * All rights reserved.
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2 of the License, or (at
+- * your option) any later version.
+- *
+- * This program is distributed in the hope that it will be useful, but
+- * WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+- * NON INFRINGEMENT.  See the GNU General Public License for more
+- * details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+- *
+- * Send feedback to <colpatch@us.ibm.com>
+- *
+- */
+-
+-#include <linux/mm.h>
+-#include <linux/init.h>
+-#include <asm/io.h>
+-#include <asm/bios_ebda.h>
+-#include <asm/summit/mpparse.h>
+-
+-static struct rio_table_hdr *rio_table_hdr __initdata;
+-static struct scal_detail   *scal_devs[MAX_NUMNODES] __initdata;
+-static struct rio_detail    *rio_devs[MAX_NUMNODES*4] __initdata;
+-
+-#ifndef CONFIG_X86_NUMAQ
+-static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
+-#endif
+-
+-static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
+-{
+-	int twister = 0, node = 0;
+-	int i, bus, num_buses;
+-
+-	for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
+-		if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
+-			twister = rio_devs[i]->owner_id;
+-			break;
+-		}
+-	}
+-	if (i == rio_table_hdr->num_rio_dev) {
+-		printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
+-		return last_bus;
+-	}
+-
+-	for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
+-		if (scal_devs[i]->node_id == twister) {
+-			node = scal_devs[i]->node_id;
+-			break;
+-		}
+-	}
+-	if (i == rio_table_hdr->num_scal_dev) {
+-		printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
+-		return last_bus;
+-	}
+-
+-	switch (rio_devs[wpeg_num]->type) {
+-	case CompatWPEG:
+-		/*
+-		 * The Compatibility Winnipeg controls the 2 legacy buses,
+-		 * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
+-		 * a PCI-PCI bridge card is used in either slot: total 5 buses.
+-		 */
+-		num_buses = 5;
+-		break;
+-	case AltWPEG:
+-		/*
+-		 * The Alternate Winnipeg controls the 2 133MHz buses [1 slot
+-		 * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
+-		 * the "extra" buses for each of those slots: total 7 buses.
+-		 */
+-		num_buses = 7;
+-		break;
+-	case LookOutAWPEG:
+-	case LookOutBWPEG:
+-		/*
+-		 * A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
+-		 * & the "extra" buses for each of those slots: total 9 buses.
+-		 */
+-		num_buses = 9;
+-		break;
+-	default:
+-		printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
+-		return last_bus;
+-	}
+-
+-	for (bus = last_bus; bus < last_bus + num_buses; bus++)
+-		mp_bus_id_to_node[bus] = node;
+-	return bus;
+-}
+-
+-static int __init build_detail_arrays(void)
+-{
+-	unsigned long ptr;
+-	int i, scal_detail_size, rio_detail_size;
+-
+-	if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
+-		printk(KERN_WARNING "%s: MAX_NUMNODES too low!  Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
+-		return 0;
+-	}
+-
+-	switch (rio_table_hdr->version) {
+-	default:
+-		printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
+-		return 0;
+-	case 2:
+-		scal_detail_size = 11;
+-		rio_detail_size = 13;
+-		break;
+-	case 3:
+-		scal_detail_size = 12;
+-		rio_detail_size = 15;
+-		break;
+-	}
+-
+-	ptr = (unsigned long)rio_table_hdr + 3;
+-	for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
+-		scal_devs[i] = (struct scal_detail *)ptr;
+-
+-	for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
+-		rio_devs[i] = (struct rio_detail *)ptr;
+-
+-	return 1;
+-}
+-
+-void __init setup_summit(void)
+-{
+-	unsigned long		ptr;
+-	unsigned short		offset;
+-	int			i, next_wpeg, next_bus = 0;
+-
+-	/* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
+-	ptr = get_bios_ebda();
+-	ptr = (unsigned long)phys_to_virt(ptr);
+-
+-	rio_table_hdr = NULL;
+-	offset = 0x180;
+-	while (offset) {
+-		/* The block id is stored in the 2nd word */
+-		if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
+-			/* set the pointer past the offset & block id */
+-			rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
+-			break;
+-		}
+-		/* The next offset is stored in the 1st word.  0 means no more */
+-		offset = *((unsigned short *)(ptr + offset));
+-	}
+-	if (!rio_table_hdr) {
+-		printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
+-		return;
+-	}
+-
+-	if (!build_detail_arrays())
+-		return;
+-
+-	/* The first Winnipeg we're looking for has an index of 0 */
+-	next_wpeg = 0;
+-	do {
+-		for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
+-			if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
+-				/* It's the Winnipeg we're looking for! */
+-				next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
+-				next_wpeg++;
+-				break;
+-			}
+-		}
+-		/*
+-		 * If we go through all Rio devices and don't find one with
+-		 * the next index, it means we've found all the Winnipegs,
+-		 * and thus all the PCI buses.
+-		 */
+-		if (i == rio_table_hdr->num_rio_dev)
+-			next_wpeg = 0;
+-	} while (next_wpeg != 0);
+-}
+Index: linux-2.6-tip/arch/x86/kernel/syscall_table_32.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/syscall_table_32.S
++++ linux-2.6-tip/arch/x86/kernel/syscall_table_32.S
+@@ -1,7 +1,7 @@
+ ENTRY(sys_call_table)
+ 	.long sys_restart_syscall	/* 0 - old "setup()" system call, used for restarting */
+ 	.long sys_exit
+-	.long sys_fork
++	.long ptregs_fork
+ 	.long sys_read
+ 	.long sys_write
+ 	.long sys_open		/* 5 */
+@@ -10,7 +10,7 @@ ENTRY(sys_call_table)
+ 	.long sys_creat
+ 	.long sys_link
+ 	.long sys_unlink	/* 10 */
+-	.long sys_execve
++	.long ptregs_execve
+ 	.long sys_chdir
+ 	.long sys_time
+ 	.long sys_mknod
+@@ -109,17 +109,17 @@ ENTRY(sys_call_table)
+ 	.long sys_newlstat
+ 	.long sys_newfstat
+ 	.long sys_uname
+-	.long sys_iopl		/* 110 */
++	.long ptregs_iopl	/* 110 */
+ 	.long sys_vhangup
+ 	.long sys_ni_syscall	/* old "idle" system call */
+-	.long sys_vm86old
++	.long ptregs_vm86old
+ 	.long sys_wait4
+ 	.long sys_swapoff	/* 115 */
+ 	.long sys_sysinfo
+ 	.long sys_ipc
+ 	.long sys_fsync
+-	.long sys_sigreturn
+-	.long sys_clone		/* 120 */
++	.long ptregs_sigreturn
++	.long ptregs_clone	/* 120 */
+ 	.long sys_setdomainname
+ 	.long sys_newuname
+ 	.long sys_modify_ldt
+@@ -165,14 +165,14 @@ ENTRY(sys_call_table)
+ 	.long sys_mremap
+ 	.long sys_setresuid16
+ 	.long sys_getresuid16	/* 165 */
+-	.long sys_vm86
++	.long ptregs_vm86
+ 	.long sys_ni_syscall	/* Old sys_query_module */
+ 	.long sys_poll
+ 	.long sys_nfsservctl
+ 	.long sys_setresgid16	/* 170 */
+ 	.long sys_getresgid16
+ 	.long sys_prctl
+-	.long sys_rt_sigreturn
++	.long ptregs_rt_sigreturn
+ 	.long sys_rt_sigaction
+ 	.long sys_rt_sigprocmask	/* 175 */
+ 	.long sys_rt_sigpending
+@@ -185,11 +185,11 @@ ENTRY(sys_call_table)
+ 	.long sys_getcwd
+ 	.long sys_capget
+ 	.long sys_capset	/* 185 */
+-	.long sys_sigaltstack
++	.long ptregs_sigaltstack
+ 	.long sys_sendfile
+ 	.long sys_ni_syscall	/* reserved for streams1 */
+ 	.long sys_ni_syscall	/* reserved for streams2 */
+-	.long sys_vfork		/* 190 */
++	.long ptregs_vfork	/* 190 */
+ 	.long sys_getrlimit
+ 	.long sys_mmap2
+ 	.long sys_truncate64
+@@ -332,3 +332,8 @@ ENTRY(sys_call_table)
+ 	.long sys_dup3			/* 330 */
+ 	.long sys_pipe2
+ 	.long sys_inotify_init1
++	.long sys_ni_syscall		/* preadv */
++	.long sys_ni_syscall		/* pwritev */
++	.long sys_rt_tgsigqueueinfo	/* 335 */
++	.long sys_perf_counter_open
++
+Index: linux-2.6-tip/arch/x86/kernel/time_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/time_32.c
++++ linux-2.6-tip/arch/x86/kernel/time_32.c
+@@ -33,12 +33,12 @@
+ #include <linux/time.h>
+ #include <linux/mca.h>
+ 
+-#include <asm/arch_hooks.h>
++#include <asm/setup.h>
+ #include <asm/hpet.h>
+ #include <asm/time.h>
+ #include <asm/timer.h>
+ 
+-#include "do_timer.h"
++#include <asm/do_timer.h>
+ 
+ int timer_ack;
+ 
+@@ -118,7 +118,7 @@ void __init hpet_time_init(void)
+ {
+ 	if (!hpet_enable())
+ 		setup_pit_timer();
+-	time_init_hook();
++	x86_quirk_time_init();
+ }
+ 
+ /*
+@@ -131,7 +131,7 @@ void __init hpet_time_init(void)
+  */
+ void __init time_init(void)
+ {
+-	pre_time_init_hook();
++	x86_quirk_pre_time_init();
+ 	tsc_init();
+ 	late_time_init = choose_time_init();
+ }
+Index: linux-2.6-tip/arch/x86/kernel/tlb_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/tlb_32.c
++++ /dev/null
+@@ -1,256 +0,0 @@
+-#include <linux/spinlock.h>
+-#include <linux/cpu.h>
+-#include <linux/interrupt.h>
+-
+-#include <asm/tlbflush.h>
+-
+-DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
+-			____cacheline_aligned = { &init_mm, 0, };
+-
+-/* must come after the send_IPI functions above for inlining */
+-#include <mach_ipi.h>
+-
+-/*
+- *	Smarter SMP flushing macros.
+- *		c/o Linus Torvalds.
+- *
+- *	These mean you can really definitely utterly forget about
+- *	writing to user space from interrupts. (Its not allowed anyway).
+- *
+- *	Optimizations Manfred Spraul <manfred@colorfullife.com>
+- */
+-
+-static cpumask_t flush_cpumask;
+-static struct mm_struct *flush_mm;
+-static unsigned long flush_va;
+-static DEFINE_SPINLOCK(tlbstate_lock);
+-
+-/*
+- * We cannot call mmdrop() because we are in interrupt context,
+- * instead update mm->cpu_vm_mask.
+- *
+- * We need to reload %cr3 since the page tables may be going
+- * away from under us..
+- */
+-void leave_mm(int cpu)
+-{
+-	BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
+-	cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
+-	load_cr3(swapper_pg_dir);
+-}
+-EXPORT_SYMBOL_GPL(leave_mm);
+-
+-/*
+- *
+- * The flush IPI assumes that a thread switch happens in this order:
+- * [cpu0: the cpu that switches]
+- * 1) switch_mm() either 1a) or 1b)
+- * 1a) thread switch to a different mm
+- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+- * 	Stop ipi delivery for the old mm. This is not synchronized with
+- * 	the other cpus, but smp_invalidate_interrupt ignore flush ipis
+- * 	for the wrong mm, and in the worst case we perform a superfluous
+- * 	tlb flush.
+- * 1a2) set cpu_tlbstate to TLBSTATE_OK
+- * 	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+- *	was in lazy tlb mode.
+- * 1a3) update cpu_tlbstate[].active_mm
+- * 	Now cpu0 accepts tlb flushes for the new mm.
+- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+- * 	Now the other cpus will send tlb flush ipis.
+- * 1a4) change cr3.
+- * 1b) thread switch without mm change
+- *	cpu_tlbstate[].active_mm is correct, cpu0 already handles
+- *	flush ipis.
+- * 1b1) set cpu_tlbstate to TLBSTATE_OK
+- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+- * 	Atomically set the bit [other cpus will start sending flush ipis],
+- * 	and test the bit.
+- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+- * 2) switch %%esp, ie current
+- *
+- * The interrupt must handle 2 special cases:
+- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+- *   runs in kernel space, the cpu could load tlb entries for user space
+- *   pages.
+- *
+- * The good news is that cpu_tlbstate is local to each cpu, no
+- * write/read ordering problems.
+- */
+-
+-/*
+- * TLB flush IPI:
+- *
+- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+- * 2) Leave the mm if we are in the lazy tlb mode.
+- */
+-
+-void smp_invalidate_interrupt(struct pt_regs *regs)
+-{
+-	unsigned long cpu;
+-
+-	cpu = get_cpu();
+-
+-	if (!cpu_isset(cpu, flush_cpumask))
+-		goto out;
+-		/*
+-		 * This was a BUG() but until someone can quote me the
+-		 * line from the intel manual that guarantees an IPI to
+-		 * multiple CPUs is retried _only_ on the erroring CPUs
+-		 * its staying as a return
+-		 *
+-		 * BUG();
+-		 */
+-
+-	if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
+-		if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
+-			if (flush_va == TLB_FLUSH_ALL)
+-				local_flush_tlb();
+-			else
+-				__flush_tlb_one(flush_va);
+-		} else
+-			leave_mm(cpu);
+-	}
+-	ack_APIC_irq();
+-	smp_mb__before_clear_bit();
+-	cpu_clear(cpu, flush_cpumask);
+-	smp_mb__after_clear_bit();
+-out:
+-	put_cpu_no_resched();
+-	inc_irq_stat(irq_tlb_count);
+-}
+-
+-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+-			     unsigned long va)
+-{
+-	cpumask_t cpumask = *cpumaskp;
+-
+-	/*
+-	 * A couple of (to be removed) sanity checks:
+-	 *
+-	 * - current CPU must not be in mask
+-	 * - mask must exist :)
+-	 */
+-	BUG_ON(cpus_empty(cpumask));
+-	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+-	BUG_ON(!mm);
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-	/* If a CPU which we ran on has gone down, OK. */
+-	cpus_and(cpumask, cpumask, cpu_online_map);
+-	if (unlikely(cpus_empty(cpumask)))
+-		return;
+-#endif
+-
+-	/*
+-	 * i'm not happy about this global shared spinlock in the
+-	 * MM hot path, but we'll see how contended it is.
+-	 * AK: x86-64 has a faster method that could be ported.
+-	 */
+-	spin_lock(&tlbstate_lock);
+-
+-	flush_mm = mm;
+-	flush_va = va;
+-	cpus_or(flush_cpumask, cpumask, flush_cpumask);
+-
+-	/*
+-	 * Make the above memory operations globally visible before
+-	 * sending the IPI.
+-	 */
+-	smp_mb();
+-	/*
+-	 * We have to send the IPI only to
+-	 * CPUs affected.
+-	 */
+-	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
+-
+-	while (!cpus_empty(flush_cpumask))
+-		/* nothing. lockup detection does not belong here */
+-		cpu_relax();
+-
+-	flush_mm = NULL;
+-	flush_va = 0;
+-	spin_unlock(&tlbstate_lock);
+-}
+-
+-void flush_tlb_current_task(void)
+-{
+-	struct mm_struct *mm = current->mm;
+-	cpumask_t cpu_mask;
+-
+-	preempt_disable();
+-	cpu_mask = mm->cpu_vm_mask;
+-	cpu_clear(smp_processor_id(), cpu_mask);
+-
+-	local_flush_tlb();
+-	if (!cpus_empty(cpu_mask))
+-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-	preempt_enable();
+-}
+-
+-void flush_tlb_mm(struct mm_struct *mm)
+-{
+-	cpumask_t cpu_mask;
+-
+-	preempt_disable();
+-	cpu_mask = mm->cpu_vm_mask;
+-	cpu_clear(smp_processor_id(), cpu_mask);
+-
+-	if (current->active_mm == mm) {
+-		if (current->mm)
+-			local_flush_tlb();
+-		else
+-			leave_mm(smp_processor_id());
+-	}
+-	if (!cpus_empty(cpu_mask))
+-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-
+-	preempt_enable();
+-}
+-
+-void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
+-{
+-	struct mm_struct *mm = vma->vm_mm;
+-	cpumask_t cpu_mask;
+-
+-	preempt_disable();
+-	cpu_mask = mm->cpu_vm_mask;
+-	cpu_clear(smp_processor_id(), cpu_mask);
+-
+-	if (current->active_mm == mm) {
+-		if (current->mm)
+-			__flush_tlb_one(va);
+-		 else
+-			leave_mm(smp_processor_id());
+-	}
+-
+-	if (!cpus_empty(cpu_mask))
+-		flush_tlb_others(cpu_mask, mm, va);
+-
+-	preempt_enable();
+-}
+-EXPORT_SYMBOL(flush_tlb_page);
+-
+-static void do_flush_tlb_all(void *info)
+-{
+-	unsigned long cpu = smp_processor_id();
+-
+-	__flush_tlb_all();
+-	if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
+-		leave_mm(cpu);
+-}
+-
+-void flush_tlb_all(void)
+-{
+-	on_each_cpu(do_flush_tlb_all, NULL, 1);
+-}
+-
+-void reset_lazy_tlbstate(void)
+-{
+-	int cpu = raw_smp_processor_id();
+-
+-	per_cpu(cpu_tlbstate, cpu).state = 0;
+-	per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
+-}
+-
+Index: linux-2.6-tip/arch/x86/kernel/tlb_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/tlb_64.c
++++ /dev/null
+@@ -1,284 +0,0 @@
+-#include <linux/init.h>
+-
+-#include <linux/mm.h>
+-#include <linux/delay.h>
+-#include <linux/spinlock.h>
+-#include <linux/smp.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/interrupt.h>
+-
+-#include <asm/mtrr.h>
+-#include <asm/pgalloc.h>
+-#include <asm/tlbflush.h>
+-#include <asm/mmu_context.h>
+-#include <asm/proto.h>
+-#include <asm/apicdef.h>
+-#include <asm/idle.h>
+-#include <asm/uv/uv_hub.h>
+-#include <asm/uv/uv_bau.h>
+-
+-#include <mach_ipi.h>
+-/*
+- *	Smarter SMP flushing macros.
+- *		c/o Linus Torvalds.
+- *
+- *	These mean you can really definitely utterly forget about
+- *	writing to user space from interrupts. (Its not allowed anyway).
+- *
+- *	Optimizations Manfred Spraul <manfred@colorfullife.com>
+- *
+- *	More scalable flush, from Andi Kleen
+- *
+- *	To avoid global state use 8 different call vectors.
+- *	Each CPU uses a specific vector to trigger flushes on other
+- *	CPUs. Depending on the received vector the target CPUs look into
+- *	the right per cpu variable for the flush data.
+- *
+- *	With more than 8 CPUs they are hashed to the 8 available
+- *	vectors. The limited global vector space forces us to this right now.
+- *	In future when interrupts are split into per CPU domains this could be
+- *	fixed, at the cost of triggering multiple IPIs in some cases.
+- */
+-
+-union smp_flush_state {
+-	struct {
+-		cpumask_t flush_cpumask;
+-		struct mm_struct *flush_mm;
+-		unsigned long flush_va;
+-		spinlock_t tlbstate_lock;
+-	};
+-	char pad[SMP_CACHE_BYTES];
+-} ____cacheline_aligned;
+-
+-/* State is put into the per CPU data section, but padded
+-   to a full cache line because other CPUs can access it and we don't
+-   want false sharing in the per cpu data segment. */
+-static DEFINE_PER_CPU(union smp_flush_state, flush_state);
+-
+-/*
+- * We cannot call mmdrop() because we are in interrupt context,
+- * instead update mm->cpu_vm_mask.
+- */
+-void leave_mm(int cpu)
+-{
+-	if (read_pda(mmu_state) == TLBSTATE_OK)
+-		BUG();
+-	cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
+-	load_cr3(swapper_pg_dir);
+-}
+-EXPORT_SYMBOL_GPL(leave_mm);
+-
+-/*
+- *
+- * The flush IPI assumes that a thread switch happens in this order:
+- * [cpu0: the cpu that switches]
+- * 1) switch_mm() either 1a) or 1b)
+- * 1a) thread switch to a different mm
+- * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
+- *	Stop ipi delivery for the old mm. This is not synchronized with
+- *	the other cpus, but smp_invalidate_interrupt ignore flush ipis
+- *	for the wrong mm, and in the worst case we perform a superfluous
+- *	tlb flush.
+- * 1a2) set cpu mmu_state to TLBSTATE_OK
+- *	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
+- *	was in lazy tlb mode.
+- * 1a3) update cpu active_mm
+- *	Now cpu0 accepts tlb flushes for the new mm.
+- * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
+- *	Now the other cpus will send tlb flush ipis.
+- * 1a4) change cr3.
+- * 1b) thread switch without mm change
+- *	cpu active_mm is correct, cpu0 already handles
+- *	flush ipis.
+- * 1b1) set cpu mmu_state to TLBSTATE_OK
+- * 1b2) test_and_set the cpu bit in cpu_vm_mask.
+- *	Atomically set the bit [other cpus will start sending flush ipis],
+- *	and test the bit.
+- * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
+- * 2) switch %%esp, ie current
+- *
+- * The interrupt must handle 2 special cases:
+- * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
+- * - the cpu performs speculative tlb reads, i.e. even if the cpu only
+- *   runs in kernel space, the cpu could load tlb entries for user space
+- *   pages.
+- *
+- * The good news is that cpu mmu_state is local to each cpu, no
+- * write/read ordering problems.
+- */
+-
+-/*
+- * TLB flush IPI:
+- *
+- * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
+- * 2) Leave the mm if we are in the lazy tlb mode.
+- *
+- * Interrupts are disabled.
+- */
+-
+-asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
+-{
+-	int cpu;
+-	int sender;
+-	union smp_flush_state *f;
+-
+-	cpu = smp_processor_id();
+-	/*
+-	 * orig_rax contains the negated interrupt vector.
+-	 * Use that to determine where the sender put the data.
+-	 */
+-	sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
+-	f = &per_cpu(flush_state, sender);
+-
+-	if (!cpu_isset(cpu, f->flush_cpumask))
+-		goto out;
+-		/*
+-		 * This was a BUG() but until someone can quote me the
+-		 * line from the intel manual that guarantees an IPI to
+-		 * multiple CPUs is retried _only_ on the erroring CPUs
+-		 * its staying as a return
+-		 *
+-		 * BUG();
+-		 */
+-
+-	if (f->flush_mm == read_pda(active_mm)) {
+-		if (read_pda(mmu_state) == TLBSTATE_OK) {
+-			if (f->flush_va == TLB_FLUSH_ALL)
+-				local_flush_tlb();
+-			else
+-				__flush_tlb_one(f->flush_va);
+-		} else
+-			leave_mm(cpu);
+-	}
+-out:
+-	ack_APIC_irq();
+-	cpu_clear(cpu, f->flush_cpumask);
+-	inc_irq_stat(irq_tlb_count);
+-}
+-
+-void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
+-			     unsigned long va)
+-{
+-	int sender;
+-	union smp_flush_state *f;
+-	cpumask_t cpumask = *cpumaskp;
+-
+-	if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
+-		return;
+-
+-	/* Caller has disabled preemption */
+-	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
+-	f = &per_cpu(flush_state, sender);
+-
+-	/*
+-	 * Could avoid this lock when
+-	 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
+-	 * probably not worth checking this for a cache-hot lock.
+-	 */
+-	spin_lock(&f->tlbstate_lock);
+-
+-	f->flush_mm = mm;
+-	f->flush_va = va;
+-	cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
+-
+-	/*
+-	 * Make the above memory operations globally visible before
+-	 * sending the IPI.
+-	 */
+-	smp_mb();
+-	/*
+-	 * We have to send the IPI only to
+-	 * CPUs affected.
+-	 */
+-	send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
+-
+-	while (!cpus_empty(f->flush_cpumask))
+-		cpu_relax();
+-
+-	f->flush_mm = NULL;
+-	f->flush_va = 0;
+-	spin_unlock(&f->tlbstate_lock);
+-}
+-
+-static int __cpuinit init_smp_flush(void)
+-{
+-	int i;
+-
+-	for_each_possible_cpu(i)
+-		spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
+-
+-	return 0;
+-}
+-core_initcall(init_smp_flush);
+-
+-void flush_tlb_current_task(void)
+-{
+-	struct mm_struct *mm = current->mm;
+-	cpumask_t cpu_mask;
+-
+-	preempt_disable();
+-	cpu_mask = mm->cpu_vm_mask;
+-	cpu_clear(smp_processor_id(), cpu_mask);
+-
+-	local_flush_tlb();
+-	if (!cpus_empty(cpu_mask))
+-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-	preempt_enable();
+-}
+-
+-void flush_tlb_mm(struct mm_struct *mm)
+-{
+-	cpumask_t cpu_mask;
+-
+-	preempt_disable();
+-	cpu_mask = mm->cpu_vm_mask;
+-	cpu_clear(smp_processor_id(), cpu_mask);
+-
+-	if (current->active_mm == mm) {
+-		if (current->mm)
+-			local_flush_tlb();
+-		else
+-			leave_mm(smp_processor_id());
+-	}
+-	if (!cpus_empty(cpu_mask))
+-		flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-
+-	preempt_enable();
+-}
+-
+-void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
+-{
+-	struct mm_struct *mm = vma->vm_mm;
+-	cpumask_t cpu_mask;
+-
+-	preempt_disable();
+-	cpu_mask = mm->cpu_vm_mask;
+-	cpu_clear(smp_processor_id(), cpu_mask);
+-
+-	if (current->active_mm == mm) {
+-		if (current->mm)
+-			__flush_tlb_one(va);
+-		else
+-			leave_mm(smp_processor_id());
+-	}
+-
+-	if (!cpus_empty(cpu_mask))
+-		flush_tlb_others(cpu_mask, mm, va);
+-
+-	preempt_enable();
+-}
+-
+-static void do_flush_tlb_all(void *info)
+-{
+-	unsigned long cpu = smp_processor_id();
+-
+-	__flush_tlb_all();
+-	if (read_pda(mmu_state) == TLBSTATE_LAZY)
+-		leave_mm(cpu);
+-}
+-
+-void flush_tlb_all(void)
+-{
+-	on_each_cpu(do_flush_tlb_all, NULL, 1);
+-}
+Index: linux-2.6-tip/arch/x86/kernel/tlb_uv.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/tlb_uv.c
++++ linux-2.6-tip/arch/x86/kernel/tlb_uv.c
+@@ -11,16 +11,15 @@
+ #include <linux/kernel.h>
+ 
+ #include <asm/mmu_context.h>
++#include <asm/uv/uv.h>
+ #include <asm/uv/uv_mmrs.h>
+ #include <asm/uv/uv_hub.h>
+ #include <asm/uv/uv_bau.h>
+-#include <asm/genapic.h>
++#include <asm/apic.h>
+ #include <asm/idle.h>
+ #include <asm/tsc.h>
+ #include <asm/irq_vectors.h>
+ 
+-#include <mach_apic.h>
+-
+ static struct bau_control	**uv_bau_table_bases __read_mostly;
+ static int			uv_bau_retry_limit __read_mostly;
+ 
+@@ -210,14 +209,15 @@ static int uv_wait_completion(struct bau
+  *
+  * Send a broadcast and wait for a broadcast message to complete.
+  *
+- * The cpumaskp mask contains the cpus the broadcast was sent to.
++ * The flush_mask contains the cpus the broadcast was sent to.
+  *
+- * Returns 1 if all remote flushing was done. The mask is zeroed.
+- * Returns 0 if some remote flushing remains to be done. The mask is left
+- * unchanged.
+- */
+-int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
+-			   cpumask_t *cpumaskp)
++ * Returns NULL if all remote flushing was done. The mask is zeroed.
++ * Returns @flush_mask if some remote flushing remains to be done. The
++ * mask will have some bits still set.
++ */
++const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
++					     struct bau_desc *bau_desc,
++					     struct cpumask *flush_mask)
+ {
+ 	int completion_status = 0;
+ 	int right_shift;
+@@ -257,66 +257,75 @@ int uv_flush_send_and_wait(int cpu, int 
+ 		 * the cpu's, all of which are still in the mask.
+ 		 */
+ 		__get_cpu_var(ptcstats).ptc_i++;
+-		return 0;
++		return flush_mask;
+ 	}
+ 
+ 	/*
+ 	 * Success, so clear the remote cpu's from the mask so we don't
+ 	 * use the IPI method of shootdown on them.
+ 	 */
+-	for_each_cpu_mask(bit, *cpumaskp) {
++	for_each_cpu(bit, flush_mask) {
+ 		blade = uv_cpu_to_blade_id(bit);
+ 		if (blade == this_blade)
+ 			continue;
+-		cpu_clear(bit, *cpumaskp);
++		cpumask_clear_cpu(bit, flush_mask);
+ 	}
+-	if (!cpus_empty(*cpumaskp))
+-		return 0;
+-	return 1;
++	if (!cpumask_empty(flush_mask))
++		return flush_mask;
++	return NULL;
+ }
+ 
++static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask);
++
+ /**
+  * uv_flush_tlb_others - globally purge translation cache of a virtual
+  * address or all TLB's
+- * @cpumaskp: mask of all cpu's in which the address is to be removed
++ * @cpumask: mask of all cpu's in which the address is to be removed
+  * @mm: mm_struct containing virtual address range
+  * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
++ * @cpu: the current cpu
+  *
+  * This is the entry point for initiating any UV global TLB shootdown.
+  *
+  * Purges the translation caches of all specified processors of the given
+  * virtual address, or purges all TLB's on specified processors.
+  *
+- * The caller has derived the cpumaskp from the mm_struct and has subtracted
+- * the local cpu from the mask.  This function is called only if there
+- * are bits set in the mask. (e.g. flush_tlb_page())
++ * The caller has derived the cpumask from the mm_struct.  This function
++ * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
+  *
+- * The cpumaskp is converted into a nodemask of the nodes containing
++ * The cpumask is converted into a nodemask of the nodes containing
+  * the cpus.
+  *
+- * Returns 1 if all remote flushing was done.
+- * Returns 0 if some remote flushing remains to be done.
++ * Note that this function should be called with preemption disabled.
++ *
++ * Returns NULL if all remote flushing was done.
++ * Returns pointer to cpumask if some remote flushing remains to be
++ * done.  The returned pointer is valid till preemption is re-enabled.
+  */
+-int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
+-			unsigned long va)
++const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
++					  struct mm_struct *mm,
++					  unsigned long va, unsigned int cpu)
+ {
++	struct cpumask *flush_mask = __get_cpu_var(uv_flush_tlb_mask);
+ 	int i;
+ 	int bit;
+ 	int blade;
+-	int cpu;
++	int uv_cpu;
+ 	int this_blade;
+ 	int locals = 0;
+ 	struct bau_desc *bau_desc;
+ 
+-	cpu = uv_blade_processor_id();
++	cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
++
++	uv_cpu = uv_blade_processor_id();
+ 	this_blade = uv_numa_blade_id();
+ 	bau_desc = __get_cpu_var(bau_control).descriptor_base;
+-	bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu;
++	bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
+ 
+ 	bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
+ 
+ 	i = 0;
+-	for_each_cpu_mask(bit, *cpumaskp) {
++	for_each_cpu(bit, flush_mask) {
+ 		blade = uv_cpu_to_blade_id(bit);
+ 		BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
+ 		if (blade == this_blade) {
+@@ -331,17 +340,17 @@ int uv_flush_tlb_others(cpumask_t *cpuma
+ 		 * no off_node flushing; return status for local node
+ 		 */
+ 		if (locals)
+-			return 0;
++			return flush_mask;
+ 		else
+-			return 1;
++			return NULL;
+ 	}
+ 	__get_cpu_var(ptcstats).requestor++;
+ 	__get_cpu_var(ptcstats).ntargeted += i;
+ 
+ 	bau_desc->payload.address = va;
+-	bau_desc->payload.sending_cpu = smp_processor_id();
++	bau_desc->payload.sending_cpu = cpu;
+ 
+-	return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp);
++	return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
+ }
+ 
+ /*
+@@ -747,6 +756,10 @@ static int __init uv_bau_init(void)
+ 	if (!is_uv_system())
+ 		return 0;
+ 
++	for_each_possible_cpu(cur_cpu)
++		alloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu),
++				       GFP_KERNEL, cpu_to_node(cur_cpu));
++
+ 	uv_bau_retry_limit = 1;
+ 	uv_nshift = uv_hub_info->n_val;
+ 	uv_mmask = (1UL << uv_hub_info->n_val) - 1;
+Index: linux-2.6-tip/arch/x86/kernel/topology.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/topology.c
++++ linux-2.6-tip/arch/x86/kernel/topology.c
+@@ -25,10 +25,10 @@
+  *
+  * Send feedback to <colpatch@us.ibm.com>
+  */
+-#include <linux/init.h>
+-#include <linux/smp.h>
+ #include <linux/nodemask.h>
+ #include <linux/mmzone.h>
++#include <linux/init.h>
++#include <linux/smp.h>
+ #include <asm/cpu.h>
+ 
+ static DEFINE_PER_CPU(struct x86_cpu, cpu_devices);
+@@ -47,6 +47,7 @@ int __ref arch_register_cpu(int num)
+ 	 */
+ 	if (num)
+ 		per_cpu(cpu_devices, num).cpu.hotpluggable = 1;
++
+ 	return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
+ }
+ EXPORT_SYMBOL(arch_register_cpu);
+@@ -56,12 +57,13 @@ void arch_unregister_cpu(int num)
+ 	unregister_cpu(&per_cpu(cpu_devices, num).cpu);
+ }
+ EXPORT_SYMBOL(arch_unregister_cpu);
+-#else
++#else /* CONFIG_HOTPLUG_CPU */
++
+ static int __init arch_register_cpu(int num)
+ {
+ 	return register_cpu(&per_cpu(cpu_devices, num).cpu, num);
+ }
+-#endif /*CONFIG_HOTPLUG_CPU*/
++#endif /* CONFIG_HOTPLUG_CPU */
+ 
+ static int __init topology_init(void)
+ {
+@@ -70,11 +72,11 @@ static int __init topology_init(void)
+ #ifdef CONFIG_NUMA
+ 	for_each_online_node(i)
+ 		register_one_node(i);
+-#endif /* CONFIG_NUMA */
++#endif
+ 
+ 	for_each_present_cpu(i)
+ 		arch_register_cpu(i);
++
+ 	return 0;
+ }
+-
+ subsys_initcall(topology_init);
+Index: linux-2.6-tip/arch/x86/kernel/trampoline_32.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/trampoline_32.S
++++ linux-2.6-tip/arch/x86/kernel/trampoline_32.S
+@@ -29,7 +29,7 @@
+ 
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ 
+ /* We can free up trampoline after bootup if cpu hotplug is not supported. */
+ #ifndef CONFIG_HOTPLUG_CPU
+Index: linux-2.6-tip/arch/x86/kernel/trampoline_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/trampoline_64.S
++++ linux-2.6-tip/arch/x86/kernel/trampoline_64.S
+@@ -25,10 +25,11 @@
+  */
+ 
+ #include <linux/linkage.h>
+-#include <asm/pgtable.h>
+-#include <asm/page.h>
++#include <asm/pgtable_types.h>
++#include <asm/page_types.h>
+ #include <asm/msr.h>
+ #include <asm/segment.h>
++#include <asm/processor-flags.h>
+ 
+ .section .rodata, "a", @progbits
+ 
+@@ -37,7 +38,7 @@
+ ENTRY(trampoline_data)
+ r_base = .
+ 	cli			# We should be safe anyway
+-	wbinvd	
++	wbinvd
+ 	mov	%cs, %ax	# Code and data in the same place
+ 	mov	%ax, %ds
+ 	mov	%ax, %es
+@@ -73,9 +74,8 @@ r_base = .
+ 	lidtl	tidt - r_base	# load idt with 0, 0
+ 	lgdtl	tgdt - r_base	# load gdt with whatever is appropriate
+ 
+-	xor	%ax, %ax
+-	inc	%ax		# protected mode (PE) bit
+-	lmsw	%ax		# into protected mode
++	mov	$X86_CR0_PE, %ax	# protected mode (PE) bit
++	lmsw	%ax			# into protected mode
+ 
+ 	# flush prefetch and jump to startup_32
+ 	ljmpl	*(startup_32_vector - r_base)
+@@ -86,9 +86,8 @@ startup_32:
+ 	movl	$__KERNEL_DS, %eax	# Initialize the %ds segment register
+ 	movl	%eax, %ds
+ 
+-	xorl	%eax, %eax
+-	btsl	$5, %eax		# Enable PAE mode
+-	movl	%eax, %cr4
++	movl	$X86_CR4_PAE, %eax
++	movl	%eax, %cr4		# Enable PAE mode
+ 
+ 					# Setup trampoline 4 level pagetables
+ 	leal	(trampoline_level4_pgt - r_base)(%esi), %eax
+@@ -99,9 +98,9 @@ startup_32:
+ 	xorl	%edx, %edx
+ 	wrmsr
+ 
+-	xorl	%eax, %eax
+-	btsl	$31, %eax		# Enable paging and in turn activate Long Mode
+-	btsl	$0, %eax		# Enable protected mode
++	# Enable paging and in turn activate Long Mode
++	# Enable protected mode
++	movl	$(X86_CR0_PG | X86_CR0_PE), %eax
+ 	movl	%eax, %cr0
+ 
+ 	/*
+Index: linux-2.6-tip/arch/x86/kernel/traps.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/traps.c
++++ linux-2.6-tip/arch/x86/kernel/traps.c
+@@ -46,6 +46,7 @@
+ #endif
+ 
+ #include <asm/stacktrace.h>
++#include <asm/kmemcheck.h>
+ #include <asm/processor.h>
+ #include <asm/debugreg.h>
+ #include <asm/atomic.h>
+@@ -54,15 +55,14 @@
+ #include <asm/desc.h>
+ #include <asm/i387.h>
+ 
+-#include <mach_traps.h>
++#include <asm/mach_traps.h>
+ 
+ #ifdef CONFIG_X86_64
+ #include <asm/pgalloc.h>
+ #include <asm/proto.h>
+-#include <asm/pda.h>
+ #else
+ #include <asm/processor-flags.h>
+-#include <asm/arch_hooks.h>
++#include <asm/setup.h>
+ #include <asm/traps.h>
+ 
+ #include "cpu/mcheck/mce.h"
+@@ -92,9 +92,10 @@ static inline void conditional_sti(struc
+ 		local_irq_enable();
+ }
+ 
+-static inline void preempt_conditional_sti(struct pt_regs *regs)
++static inline void preempt_conditional_sti(struct pt_regs *regs, int stack)
+ {
+-	inc_preempt_count();
++	if (stack)
++		inc_preempt_count();
+ 	if (regs->flags & X86_EFLAGS_IF)
+ 		local_irq_enable();
+ }
+@@ -105,11 +106,12 @@ static inline void conditional_cli(struc
+ 		local_irq_disable();
+ }
+ 
+-static inline void preempt_conditional_cli(struct pt_regs *regs)
++static inline void preempt_conditional_cli(struct pt_regs *regs, int stack)
+ {
+ 	if (regs->flags & X86_EFLAGS_IF)
+ 		local_irq_disable();
+-	dec_preempt_count();
++	if (stack)
++		dec_preempt_count();
+ }
+ 
+ #ifdef CONFIG_X86_32
+@@ -119,47 +121,6 @@ die_if_kernel(const char *str, struct pt
+ 	if (!user_mode_vm(regs))
+ 		die(str, regs, err);
+ }
+-
+-/*
+- * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
+- * invalid offset set (the LAZY one) and the faulting thread has
+- * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
+- * we set the offset field correctly and return 1.
+- */
+-static int lazy_iobitmap_copy(void)
+-{
+-	struct thread_struct *thread;
+-	struct tss_struct *tss;
+-	int cpu;
+-
+-	cpu = get_cpu();
+-	tss = &per_cpu(init_tss, cpu);
+-	thread = &current->thread;
+-
+-	if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
+-	    thread->io_bitmap_ptr) {
+-		memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
+-		       thread->io_bitmap_max);
+-		/*
+-		 * If the previously set map was extending to higher ports
+-		 * than the current one, pad extra space with 0xff (no access).
+-		 */
+-		if (thread->io_bitmap_max < tss->io_bitmap_max) {
+-			memset((char *) tss->io_bitmap +
+-				thread->io_bitmap_max, 0xff,
+-				tss->io_bitmap_max - thread->io_bitmap_max);
+-		}
+-		tss->io_bitmap_max = thread->io_bitmap_max;
+-		tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
+-		tss->io_bitmap_owner = thread;
+-		put_cpu();
+-
+-		return 1;
+-	}
+-	put_cpu();
+-
+-	return 0;
+-}
+ #endif
+ 
+ static void __kprobes
+@@ -277,9 +238,9 @@ dotraplinkage void do_stack_segment(stru
+ 	if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
+ 			12, SIGBUS) == NOTIFY_STOP)
+ 		return;
+-	preempt_conditional_sti(regs);
++	preempt_conditional_sti(regs, STACKFAULT_STACK);
+ 	do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
+-	preempt_conditional_cli(regs);
++	preempt_conditional_cli(regs, STACKFAULT_STACK);
+ }
+ 
+ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
+@@ -310,11 +271,6 @@ do_general_protection(struct pt_regs *re
+ 	conditional_sti(regs);
+ 
+ #ifdef CONFIG_X86_32
+-	if (lazy_iobitmap_copy()) {
+-		/* restart the faulting instruction */
+-		return;
+-	}
+-
+ 	if (regs->flags & X86_VM_MASK)
+ 		goto gp_in_vm86;
+ #endif
+@@ -517,9 +473,9 @@ dotraplinkage void __kprobes do_int3(str
+ 		return;
+ #endif
+ 
+-	preempt_conditional_sti(regs);
++	preempt_conditional_sti(regs, DEBUG_STACK);
+ 	do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
+-	preempt_conditional_cli(regs);
++	preempt_conditional_cli(regs, DEBUG_STACK);
+ }
+ 
+ #ifdef CONFIG_X86_64
+@@ -581,6 +537,10 @@ dotraplinkage void __kprobes do_debug(st
+ 
+ 	get_debugreg(condition, 6);
+ 
++	/* Catch kmemcheck conditions first of all! */
++	if (condition & DR_STEP && kmemcheck_trap(regs))
++		return;
++
+ 	/*
+ 	 * The processor cleared BTF, so don't mark that we need it set.
+ 	 */
+@@ -592,7 +552,7 @@ dotraplinkage void __kprobes do_debug(st
+ 		return;
+ 
+ 	/* It's safe to allow irq's after DR6 has been saved */
+-	preempt_conditional_sti(regs);
++	preempt_conditional_sti(regs, DEBUG_STACK);
+ 
+ 	/* Mask out spurious debug traps due to lazy DR7 setting */
+ 	if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
+@@ -627,7 +587,7 @@ dotraplinkage void __kprobes do_debug(st
+ 	 */
+ clear_dr7:
+ 	set_debugreg(0, 7);
+-	preempt_conditional_cli(regs);
++	preempt_conditional_cli(regs, DEBUG_STACK);
+ 	return;
+ 
+ #ifdef CONFIG_X86_32
+@@ -642,7 +602,7 @@ debug_vm86:
+ clear_TF_reenable:
+ 	set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+ 	regs->flags &= ~X86_EFLAGS_TF;
+-	preempt_conditional_cli(regs);
++	preempt_conditional_cli(regs, DEBUG_STACK);
+ 	return;
+ }
+ 
+@@ -914,19 +874,20 @@ void math_emulate(struct math_emu_info *
+ }
+ #endif /* CONFIG_MATH_EMULATION */
+ 
+-dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
++dotraplinkage void __kprobes
++do_device_not_available(struct pt_regs *regs, long error_code)
+ {
+ #ifdef CONFIG_X86_32
+ 	if (read_cr0() & X86_CR0_EM) {
+ 		struct math_emu_info info = { };
+ 
+-		conditional_sti(&regs);
++		conditional_sti(regs);
+ 
+-		info.regs = &regs;
++		info.regs = regs;
+ 		math_emulate(&info);
+ 	} else {
+ 		math_state_restore(); /* interrupts still off */
+-		conditional_sti(&regs);
++		conditional_sti(regs);
+ 	}
+ #else
+ 	math_state_restore();
+@@ -942,7 +903,7 @@ dotraplinkage void do_iret_error(struct 
+ 	info.si_signo = SIGILL;
+ 	info.si_errno = 0;
+ 	info.si_code = ILL_BADSTK;
+-	info.si_addr = 0;
++	info.si_addr = NULL;
+ 	if (notify_die(DIE_TRAP, "iret exception",
+ 			regs, error_code, 32, SIGILL) == NOTIFY_STOP)
+ 		return;
+@@ -991,8 +952,13 @@ void __init trap_init(void)
+ #endif
+ 	set_intr_gate(19, &simd_coprocessor_error);
+ 
++	/* Reserve all the builtin and the syscall vector: */
++	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
++		set_bit(i, used_vectors);
++
+ #ifdef CONFIG_IA32_EMULATION
+ 	set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
++	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
+ #endif
+ 
+ #ifdef CONFIG_X86_32
+@@ -1009,23 +975,15 @@ void __init trap_init(void)
+ 	}
+ 
+ 	set_system_trap_gate(SYSCALL_VECTOR, &system_call);
+-#endif
+-
+-	/* Reserve all the builtin and the syscall vector: */
+-	for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
+-		set_bit(i, used_vectors);
+-
+-#ifdef CONFIG_X86_64
+-	set_bit(IA32_SYSCALL_VECTOR, used_vectors);
+-#else
+ 	set_bit(SYSCALL_VECTOR, used_vectors);
+ #endif
++
+ 	/*
+ 	 * Should be a barrier for any external CPU state:
+ 	 */
+ 	cpu_init();
+ 
+ #ifdef CONFIG_X86_32
+-	trap_init_hook();
++	x86_quirk_trap_init();
+ #endif
+ }
+Index: linux-2.6-tip/arch/x86/kernel/tsc.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/tsc.c
++++ linux-2.6-tip/arch/x86/kernel/tsc.c
+@@ -17,20 +17,21 @@
+ #include <asm/delay.h>
+ #include <asm/hypervisor.h>
+ 
+-unsigned int cpu_khz;           /* TSC clocks / usec, not used here */
++unsigned int __read_mostly cpu_khz;	/* TSC clocks / usec, not used here */
+ EXPORT_SYMBOL(cpu_khz);
+-unsigned int tsc_khz;
++
++unsigned int __read_mostly tsc_khz;
+ EXPORT_SYMBOL(tsc_khz);
+ 
+ /*
+  * TSC can be unstable due to cpufreq or due to unsynced TSCs
+  */
+-static int tsc_unstable;
++static int __read_mostly tsc_unstable;
+ 
+ /* native_sched_clock() is called before tsc_init(), so
+    we must start with the TSC soft disabled to prevent
+    erroneous rdtsc usage on !cpu_has_tsc processors */
+-static int tsc_disabled = -1;
++static int __read_mostly tsc_disabled = -1;
+ 
+ static int tsc_clocksource_reliable;
+ /*
+@@ -793,7 +794,7 @@ __cpuinit int unsynchronized_tsc(void)
+ 	if (!cpu_has_tsc || tsc_unstable)
+ 		return 1;
+ 
+-#ifdef CONFIG_X86_SMP
++#ifdef CONFIG_SMP
+ 	if (apic_is_clustered_box())
+ 		return 1;
+ #endif
+Index: linux-2.6-tip/arch/x86/kernel/uv_time.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/kernel/uv_time.c
+@@ -0,0 +1,393 @@
++/*
++ * SGI RTC clock/timer routines.
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License as published by
++ *  the Free Software Foundation; either version 2 of the License, or
++ *  (at your option) any later version.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; if not, write to the Free Software
++ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
++ *
++ *  Copyright (c) 2009 Silicon Graphics, Inc.  All Rights Reserved.
++ *  Copyright (c) Dimitri Sivanich
++ */
++#include <linux/clockchips.h>
++
++#include <asm/uv/uv_mmrs.h>
++#include <asm/uv/uv_hub.h>
++#include <asm/uv/bios.h>
++#include <asm/uv/uv.h>
++#include <asm/apic.h>
++#include <asm/cpu.h>
++
++#define RTC_NAME		"sgi_rtc"
++
++static cycle_t uv_read_rtc(void);
++static int uv_rtc_next_event(unsigned long, struct clock_event_device *);
++static void uv_rtc_timer_setup(enum clock_event_mode,
++				struct clock_event_device *);
++
++static struct clocksource clocksource_uv = {
++	.name		= RTC_NAME,
++	.rating		= 400,
++	.read		= uv_read_rtc,
++	.mask		= (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK,
++	.shift		= 10,
++	.flags		= CLOCK_SOURCE_IS_CONTINUOUS,
++};
++
++static struct clock_event_device clock_event_device_uv = {
++	.name		= RTC_NAME,
++	.features	= CLOCK_EVT_FEAT_ONESHOT,
++	.shift		= 20,
++	.rating		= 400,
++	.irq		= -1,
++	.set_next_event	= uv_rtc_next_event,
++	.set_mode	= uv_rtc_timer_setup,
++	.event_handler	= NULL,
++};
++
++static DEFINE_PER_CPU(struct clock_event_device, cpu_ced);
++
++/* There is one of these allocated per node */
++struct uv_rtc_timer_head {
++	spinlock_t	lock;
++	/* next cpu waiting for timer, local node relative: */
++	int		next_cpu;
++	/* number of cpus on this node: */
++	int		ncpus;
++	struct {
++		int	lcpu;		/* systemwide logical cpu number */
++		u64	expires;	/* next timer expiration for this cpu */
++	} cpu[1];
++};
++
++/*
++ * Access to uv_rtc_timer_head via blade id.
++ */
++static struct uv_rtc_timer_head		**blade_info __read_mostly;
++
++static int				uv_rtc_enable;
++
++/*
++ * Hardware interface routines
++ */
++
++/* Send IPIs to another node */
++static void uv_rtc_send_IPI(int cpu)
++{
++	unsigned long apicid, val;
++	int pnode;
++
++	apicid = cpu_physical_id(cpu);
++	pnode = uv_apicid_to_pnode(apicid);
++	val = (1UL << UVH_IPI_INT_SEND_SHFT) |
++	      (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
++	      (GENERIC_INTERRUPT_VECTOR << UVH_IPI_INT_VECTOR_SHFT);
++
++	uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
++}
++
++/* Check for an RTC interrupt pending */
++static int uv_intr_pending(int pnode)
++{
++	return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) &
++		UVH_EVENT_OCCURRED0_RTC1_MASK;
++}
++
++/* Setup interrupt and return non-zero if early expiration occurred. */
++static int uv_setup_intr(int cpu, u64 expires)
++{
++	u64 val;
++	int pnode = uv_cpu_to_pnode(cpu);
++
++	uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
++		UVH_RTC1_INT_CONFIG_M_MASK);
++	uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L);
++
++	uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS,
++		UVH_EVENT_OCCURRED0_RTC1_MASK);
++
++	val = (GENERIC_INTERRUPT_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) |
++		((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT);
++
++	/* Set configuration */
++	uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val);
++	/* Initialize comparator value */
++	uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires);
++
++	return (expires < uv_read_rtc() && !uv_intr_pending(pnode));
++}
++
++/*
++ * Per-cpu timer tracking routines
++ */
++
++static __init void uv_rtc_deallocate_timers(void)
++{
++	int bid;
++
++	for_each_possible_blade(bid) {
++		kfree(blade_info[bid]);
++	}
++	kfree(blade_info);
++}
++
++/* Allocate per-node list of cpu timer expiration times. */
++static __init int uv_rtc_allocate_timers(void)
++{
++	int cpu;
++
++	blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL);
++	if (!blade_info)
++		return -ENOMEM;
++	memset(blade_info, 0, uv_possible_blades * sizeof(void *));
++
++	for_each_present_cpu(cpu) {
++		int nid = cpu_to_node(cpu);
++		int bid = uv_cpu_to_blade_id(cpu);
++		int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
++		struct uv_rtc_timer_head *head = blade_info[bid];
++
++		if (!head) {
++			head = kmalloc_node(sizeof(struct uv_rtc_timer_head) +
++				(uv_blade_nr_possible_cpus(bid) *
++					2 * sizeof(u64)),
++				GFP_KERNEL, nid);
++			if (!head) {
++				uv_rtc_deallocate_timers();
++				return -ENOMEM;
++			}
++			spin_lock_init(&head->lock);
++			head->ncpus = uv_blade_nr_possible_cpus(bid);
++			head->next_cpu = -1;
++			blade_info[bid] = head;
++		}
++
++		head->cpu[bcpu].lcpu = cpu;
++		head->cpu[bcpu].expires = ULLONG_MAX;
++	}
++
++	return 0;
++}
++
++/* Find and set the next expiring timer.  */
++static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode)
++{
++	u64 lowest = ULLONG_MAX;
++	int c, bcpu = -1;
++
++	head->next_cpu = -1;
++	for (c = 0; c < head->ncpus; c++) {
++		u64 exp = head->cpu[c].expires;
++		if (exp < lowest) {
++			bcpu = c;
++			lowest = exp;
++		}
++	}
++	if (bcpu >= 0) {
++		head->next_cpu = bcpu;
++		c = head->cpu[bcpu].lcpu;
++		if (uv_setup_intr(c, lowest))
++			/* If we didn't set it up in time, trigger */
++			uv_rtc_send_IPI(c);
++	} else {
++		uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG,
++			UVH_RTC1_INT_CONFIG_M_MASK);
++	}
++}
++
++/*
++ * Set expiration time for current cpu.
++ *
++ * Returns 1 if we missed the expiration time.
++ */
++static int uv_rtc_set_timer(int cpu, u64 expires)
++{
++	int pnode = uv_cpu_to_pnode(cpu);
++	int bid = uv_cpu_to_blade_id(cpu);
++	struct uv_rtc_timer_head *head = blade_info[bid];
++	int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
++	u64 *t = &head->cpu[bcpu].expires;
++	unsigned long flags;
++	int next_cpu;
++
++	spin_lock_irqsave(&head->lock, flags);
++
++	next_cpu = head->next_cpu;
++	*t = expires;
++	/* Will this one be next to go off? */
++	if (next_cpu < 0 || bcpu == next_cpu ||
++			expires < head->cpu[next_cpu].expires) {
++		head->next_cpu = bcpu;
++		if (uv_setup_intr(cpu, expires)) {
++			*t = ULLONG_MAX;
++			uv_rtc_find_next_timer(head, pnode);
++			spin_unlock_irqrestore(&head->lock, flags);
++			return 1;
++		}
++	}
++
++	spin_unlock_irqrestore(&head->lock, flags);
++	return 0;
++}
++
++/*
++ * Unset expiration time for current cpu.
++ *
++ * Returns 1 if this timer was pending.
++ */
++static int uv_rtc_unset_timer(int cpu)
++{
++	int pnode = uv_cpu_to_pnode(cpu);
++	int bid = uv_cpu_to_blade_id(cpu);
++	struct uv_rtc_timer_head *head = blade_info[bid];
++	int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id;
++	u64 *t = &head->cpu[bcpu].expires;
++	unsigned long flags;
++	int rc = 0;
++
++	spin_lock_irqsave(&head->lock, flags);
++
++	if (head->next_cpu == bcpu && uv_read_rtc() >= *t)
++		rc = 1;
++
++	*t = ULLONG_MAX;
++
++	/* Was the hardware setup for this timer? */
++	if (head->next_cpu == bcpu)
++		uv_rtc_find_next_timer(head, pnode);
++
++	spin_unlock_irqrestore(&head->lock, flags);
++
++	return rc;
++}
++
++
++/*
++ * Kernel interface routines.
++ */
++
++/*
++ * Read the RTC.
++ */
++static cycle_t uv_read_rtc(void)
++{
++	return (cycle_t)uv_read_local_mmr(UVH_RTC);
++}
++
++/*
++ * Program the next event, relative to now
++ */
++static int uv_rtc_next_event(unsigned long delta,
++			     struct clock_event_device *ced)
++{
++	int ced_cpu = cpumask_first(ced->cpumask);
++
++	return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc());
++}
++
++/*
++ * Setup the RTC timer in oneshot mode
++ */
++static void uv_rtc_timer_setup(enum clock_event_mode mode,
++			       struct clock_event_device *evt)
++{
++	int ced_cpu = cpumask_first(evt->cpumask);
++
++	switch (mode) {
++	case CLOCK_EVT_MODE_PERIODIC:
++	case CLOCK_EVT_MODE_ONESHOT:
++	case CLOCK_EVT_MODE_RESUME:
++		/* Nothing to do here yet */
++		break;
++	case CLOCK_EVT_MODE_UNUSED:
++	case CLOCK_EVT_MODE_SHUTDOWN:
++		uv_rtc_unset_timer(ced_cpu);
++		break;
++	}
++}
++
++static void uv_rtc_interrupt(void)
++{
++	struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
++	int cpu = smp_processor_id();
++
++	if (!ced || !ced->event_handler)
++		return;
++
++	if (uv_rtc_unset_timer(cpu) != 1)
++		return;
++
++	ced->event_handler(ced);
++}
++
++static int __init uv_enable_rtc(char *str)
++{
++	uv_rtc_enable = 1;
++
++	return 1;
++}
++__setup("uvrtc", uv_enable_rtc);
++
++static __init void uv_rtc_register_clockevents(struct work_struct *dummy)
++{
++	struct clock_event_device *ced = &__get_cpu_var(cpu_ced);
++
++	*ced = clock_event_device_uv;
++	ced->cpumask = cpumask_of(smp_processor_id());
++	clockevents_register_device(ced);
++}
++
++static __init int uv_rtc_setup_clock(void)
++{
++	int rc;
++
++	if (!uv_rtc_enable || !is_uv_system() || generic_interrupt_extension)
++		return -ENODEV;
++
++	generic_interrupt_extension = uv_rtc_interrupt;
++
++	clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second,
++				clocksource_uv.shift);
++
++	rc = clocksource_register(&clocksource_uv);
++	if (rc) {
++		generic_interrupt_extension = NULL;
++		return rc;
++	}
++
++	/* Setup and register clockevents */
++	rc = uv_rtc_allocate_timers();
++	if (rc) {
++		clocksource_unregister(&clocksource_uv);
++		generic_interrupt_extension = NULL;
++		return rc;
++	}
++
++	clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second,
++				NSEC_PER_SEC, clock_event_device_uv.shift);
++
++	clock_event_device_uv.min_delta_ns = NSEC_PER_SEC /
++						sn_rtc_cycles_per_second;
++
++	clock_event_device_uv.max_delta_ns = clocksource_uv.mask *
++				(NSEC_PER_SEC / sn_rtc_cycles_per_second);
++
++	rc = schedule_on_each_cpu(uv_rtc_register_clockevents);
++	if (rc) {
++		clocksource_unregister(&clocksource_uv);
++		generic_interrupt_extension = NULL;
++		uv_rtc_deallocate_timers();
++	}
++
++	return rc;
++}
++arch_initcall(uv_rtc_setup_clock);
+Index: linux-2.6-tip/arch/x86/kernel/visws_quirks.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/visws_quirks.c
++++ linux-2.6-tip/arch/x86/kernel/visws_quirks.c
+@@ -24,18 +24,14 @@
+ 
+ #include <asm/visws/cobalt.h>
+ #include <asm/visws/piix4.h>
+-#include <asm/arch_hooks.h>
+ #include <asm/io_apic.h>
+ #include <asm/fixmap.h>
+ #include <asm/reboot.h>
+ #include <asm/setup.h>
++#include <asm/apic.h>
+ #include <asm/e820.h>
+ #include <asm/io.h>
+ 
+-#include <mach_ipi.h>
+-
+-#include "mach_apic.h"
+-
+ #include <linux/kernel_stat.h>
+ 
+ #include <asm/i8259.h>
+@@ -49,8 +45,6 @@
+ 
+ extern int no_broadcast;
+ 
+-#include <asm/apic.h>
+-
+ char visws_board_type	= -1;
+ char visws_board_rev	= -1;
+ 
+@@ -200,7 +194,7 @@ static void __init MP_processor_info(str
+ 		return;
+ 	}
+ 
+-	apic_cpus = apicid_to_cpu_present(m->apicid);
++	apic_cpus = apic->apicid_to_cpu_present(m->apicid);
+ 	physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
+ 	/*
+ 	 * Validate version
+@@ -584,7 +578,7 @@ static struct irq_chip piix4_virtual_irq
+ static irqreturn_t piix4_master_intr(int irq, void *dev_id)
+ {
+ 	int realirq;
+-	irq_desc_t *desc;
++	struct irq_desc *desc;
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&i8259A_lock, flags);
+@@ -649,11 +643,13 @@ out_unlock:
+ static struct irqaction master_action = {
+ 	.handler =	piix4_master_intr,
+ 	.name =		"PIIX4-8259",
++	.flags =	IRQF_NODELAY,
+ };
+ 
+ static struct irqaction cascade_action = {
+ 	.handler = 	no_action,
+ 	.name =		"cascade",
++	.flags =	IRQF_NODELAY,
+ };
+ 
+ 
+Index: linux-2.6-tip/arch/x86/kernel/vm86_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/vm86_32.c
++++ linux-2.6-tip/arch/x86/kernel/vm86_32.c
+@@ -137,6 +137,7 @@ struct pt_regs *save_v86_state(struct ke
+ 	local_irq_enable();
+ 
+ 	if (!current->thread.vm86_info) {
++		local_irq_disable();
+ 		printk("no vm86_info: BAD\n");
+ 		do_exit(SIGSEGV);
+ 	}
+@@ -158,7 +159,7 @@ struct pt_regs *save_v86_state(struct ke
+ 	ret = KVM86->regs32;
+ 
+ 	ret->fs = current->thread.saved_fs;
+-	loadsegment(gs, current->thread.saved_gs);
++	set_user_gs(ret, current->thread.saved_gs);
+ 
+ 	return ret;
+ }
+@@ -197,9 +198,9 @@ out:
+ static int do_vm86_irq_handling(int subfunction, int irqnumber);
+ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
+ 
+-asmlinkage int sys_vm86old(struct pt_regs regs)
++int sys_vm86old(struct pt_regs *regs)
+ {
+-	struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx;
++	struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
+ 	struct kernel_vm86_struct info; /* declare this _on top_,
+ 					 * this avoids wasting of stack space.
+ 					 * This remains on the stack until we
+@@ -218,7 +219,7 @@ asmlinkage int sys_vm86old(struct pt_reg
+ 	if (tmp)
+ 		goto out;
+ 	memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
+-	info.regs32 = &regs;
++	info.regs32 = regs;
+ 	tsk->thread.vm86_info = v86;
+ 	do_sys_vm86(&info, tsk);
+ 	ret = 0;	/* we never return here */
+@@ -227,7 +228,7 @@ out:
+ }
+ 
+ 
+-asmlinkage int sys_vm86(struct pt_regs regs)
++int sys_vm86(struct pt_regs *regs)
+ {
+ 	struct kernel_vm86_struct info; /* declare this _on top_,
+ 					 * this avoids wasting of stack space.
+@@ -239,12 +240,12 @@ asmlinkage int sys_vm86(struct pt_regs r
+ 	struct vm86plus_struct __user *v86;
+ 
+ 	tsk = current;
+-	switch (regs.bx) {
++	switch (regs->bx) {
+ 	case VM86_REQUEST_IRQ:
+ 	case VM86_FREE_IRQ:
+ 	case VM86_GET_IRQ_BITS:
+ 	case VM86_GET_AND_RESET_IRQ:
+-		ret = do_vm86_irq_handling(regs.bx, (int)regs.cx);
++		ret = do_vm86_irq_handling(regs->bx, (int)regs->cx);
+ 		goto out;
+ 	case VM86_PLUS_INSTALL_CHECK:
+ 		/*
+@@ -261,14 +262,14 @@ asmlinkage int sys_vm86(struct pt_regs r
+ 	ret = -EPERM;
+ 	if (tsk->thread.saved_sp0)
+ 		goto out;
+-	v86 = (struct vm86plus_struct __user *)regs.cx;
++	v86 = (struct vm86plus_struct __user *)regs->cx;
+ 	tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
+ 				       offsetof(struct kernel_vm86_struct, regs32) -
+ 				       sizeof(info.regs));
+ 	ret = -EFAULT;
+ 	if (tmp)
+ 		goto out;
+-	info.regs32 = &regs;
++	info.regs32 = regs;
+ 	info.vm86plus.is_vm86pus = 1;
+ 	tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
+ 	do_sys_vm86(&info, tsk);
+@@ -323,7 +324,7 @@ static void do_sys_vm86(struct kernel_vm
+ 	info->regs32->ax = 0;
+ 	tsk->thread.saved_sp0 = tsk->thread.sp0;
+ 	tsk->thread.saved_fs = info->regs32->fs;
+-	savesegment(gs, tsk->thread.saved_gs);
++	tsk->thread.saved_gs = get_user_gs(info->regs32);
+ 
+ 	tss = &per_cpu(init_tss, get_cpu());
+ 	tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
+Index: linux-2.6-tip/arch/x86/kernel/vmi_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/vmi_32.c
++++ linux-2.6-tip/arch/x86/kernel/vmi_32.c
+@@ -395,11 +395,6 @@ static void vmi_set_pte_atomic(pte_t *pt
+ 	vmi_ops.update_pte(ptep, VMI_PAGE_PT);
+ }
+ 
+-static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
+-{
+-	vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1));
+-}
+-
+ static void vmi_set_pud(pud_t *pudp, pud_t pudval)
+ {
+ 	/* Um, eww */
+@@ -680,10 +675,11 @@ static inline int __init activate_vmi(vo
+ 	para_fill(pv_mmu_ops.write_cr2, SetCR2);
+ 	para_fill(pv_mmu_ops.write_cr3, SetCR3);
+ 	para_fill(pv_cpu_ops.write_cr4, SetCR4);
+-	para_fill(pv_irq_ops.save_fl, GetInterruptMask);
+-	para_fill(pv_irq_ops.restore_fl, SetInterruptMask);
+-	para_fill(pv_irq_ops.irq_disable, DisableInterrupts);
+-	para_fill(pv_irq_ops.irq_enable, EnableInterrupts);
++
++	para_fill(pv_irq_ops.save_fl.func, GetInterruptMask);
++	para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask);
++	para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts);
++	para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts);
+ 
+ 	para_fill(pv_cpu_ops.wbinvd, WBINVD);
+ 	para_fill(pv_cpu_ops.read_tsc, RDTSC);
+@@ -749,7 +745,6 @@ static inline int __init activate_vmi(vo
+ 		pv_mmu_ops.set_pmd = vmi_set_pmd;
+ #ifdef CONFIG_X86_PAE
+ 		pv_mmu_ops.set_pte_atomic = vmi_set_pte_atomic;
+-		pv_mmu_ops.set_pte_present = vmi_set_pte_present;
+ 		pv_mmu_ops.set_pud = vmi_set_pud;
+ 		pv_mmu_ops.pte_clear = vmi_pte_clear;
+ 		pv_mmu_ops.pmd_clear = vmi_pmd_clear;
+@@ -797,8 +792,8 @@ static inline int __init activate_vmi(vo
+ #endif
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+-       para_fill(apic_ops->read, APICRead);
+-       para_fill(apic_ops->write, APICWrite);
++       para_fill(apic->read, APICRead);
++       para_fill(apic->write, APICWrite);
+ #endif
+ 
+ 	/*
+Index: linux-2.6-tip/arch/x86/kernel/vmiclock_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/vmiclock_32.c
++++ linux-2.6-tip/arch/x86/kernel/vmiclock_32.c
+@@ -28,7 +28,6 @@
+ 
+ #include <asm/vmi.h>
+ #include <asm/vmi_time.h>
+-#include <asm/arch_hooks.h>
+ #include <asm/apicdef.h>
+ #include <asm/apic.h>
+ #include <asm/timer.h>
+@@ -256,7 +255,7 @@ void __devinit vmi_time_bsp_init(void)
+ 	 */
+ 	clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+ 	local_irq_disable();
+-#ifdef CONFIG_X86_SMP
++#ifdef CONFIG_SMP
+ 	/*
+ 	 * XXX handle_percpu_irq only defined for SMP; we need to switch over
+ 	 * to using it, since this is a local interrupt, which each CPU must
+@@ -288,8 +287,7 @@ static struct clocksource clocksource_vm
+ static cycle_t read_real_cycles(void)
+ {
+ 	cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
+-	return ret >= clocksource_vmi.cycle_last ?
+-		ret : clocksource_vmi.cycle_last;
++	return max(ret, clocksource_vmi.cycle_last);
+ }
+ 
+ static struct clocksource clocksource_vmi = {
+Index: linux-2.6-tip/arch/x86/kernel/vmlinux_32.lds.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/vmlinux_32.lds.S
++++ linux-2.6-tip/arch/x86/kernel/vmlinux_32.lds.S
+@@ -12,7 +12,7 @@
+ 
+ #include <asm-generic/vmlinux.lds.h>
+ #include <asm/thread_info.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ #include <asm/cache.h>
+ #include <asm/boot.h>
+ 
+@@ -178,14 +178,7 @@ SECTIONS
+ 	__initramfs_end = .;
+   }
+ #endif
+-  . = ALIGN(PAGE_SIZE);
+-  .data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
+-	__per_cpu_start = .;
+-	*(.data.percpu.page_aligned)
+-	*(.data.percpu)
+-	*(.data.percpu.shared_aligned)
+-	__per_cpu_end = .;
+-  }
++  PERCPU(PAGE_SIZE)
+   . = ALIGN(PAGE_SIZE);
+   /* freed after init ends here */
+ 
+@@ -196,15 +189,24 @@ SECTIONS
+ 	*(.bss)
+ 	. = ALIGN(4);
+ 	__bss_stop = .;
+-  	_end = . ;
+-	/* This is where the kernel creates the early boot page tables */
++  }
++
++  .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
+ 	. = ALIGN(PAGE_SIZE);
+-	pg0 = . ;
++	__brk_base = . ;
++ 	. += 64 * 1024 ;	/* 64k alignment slop space */
++	*(.brk_reservation)	/* areas brk users have reserved */
++	__brk_limit = . ;
++  }
++
++  .end : AT(ADDR(.end) - LOAD_OFFSET) {
++	_end = . ;
+   }
+ 
+   /* Sections to be discarded */
+   /DISCARD/ : {
+ 	*(.exitcall.exit)
++	*(.discard)
+ 	}
+ 
+   STABS_DEBUG
+@@ -212,6 +214,12 @@ SECTIONS
+   DWARF_DEBUG
+ }
+ 
++/*
++ * Build-time check on the image size:
++ */
++ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),
++	"kernel image bigger than KERNEL_IMAGE_SIZE")
++
+ #ifdef CONFIG_KEXEC
+ /* Link time checks */
+ #include <asm/kexec.h>
+Index: linux-2.6-tip/arch/x86/kernel/vmlinux_64.lds.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/vmlinux_64.lds.S
++++ linux-2.6-tip/arch/x86/kernel/vmlinux_64.lds.S
+@@ -5,7 +5,8 @@
+ #define LOAD_OFFSET __START_KERNEL_map
+ 
+ #include <asm-generic/vmlinux.lds.h>
+-#include <asm/page.h>
++#include <asm/asm-offsets.h>
++#include <asm/page_types.h>
+ 
+ #undef i386	/* in case the preprocessor is a 32bit one */
+ 
+@@ -13,20 +14,23 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86
+ OUTPUT_ARCH(i386:x86-64)
+ ENTRY(phys_startup_64)
+ jiffies_64 = jiffies;
+-_proxy_pda = 1;
+ PHDRS {
+ 	text PT_LOAD FLAGS(5);	/* R_E */
+ 	data PT_LOAD FLAGS(7);	/* RWE */
+ 	user PT_LOAD FLAGS(7);	/* RWE */
+ 	data.init PT_LOAD FLAGS(7);	/* RWE */
++#ifdef CONFIG_SMP
++	percpu PT_LOAD FLAGS(7);	/* RWE */
++#endif
++	data.init2 PT_LOAD FLAGS(7);	/* RWE */
+ 	note PT_NOTE FLAGS(0);	/* ___ */
+ }
+ SECTIONS
+ {
+   . = __START_KERNEL;
+   phys_startup_64 = startup_64 - LOAD_OFFSET;
+-  _text = .;			/* Text and read-only data */
+   .text :  AT(ADDR(.text) - LOAD_OFFSET) {
++	_text = .;			/* Text and read-only data */
+ 	/* First the code that has to be first for bootstrapping */
+ 	*(.text.head)
+ 	_stext = .;
+@@ -57,13 +61,13 @@ SECTIONS
+   .data : AT(ADDR(.data) - LOAD_OFFSET) {
+ 	DATA_DATA
+ 	CONSTRUCTORS
++	_edata = .;			/* End of data section */
+ 	} :data
+ 
+-  _edata = .;			/* End of data section */
+ 
+   . = ALIGN(PAGE_SIZE);
+-  . = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+   .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) {
++	. = ALIGN(CONFIG_X86_L1_CACHE_BYTES);
+ 	*(.data.cacheline_aligned)
+   }
+   . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES);
+@@ -121,29 +125,29 @@ SECTIONS
+ #undef VVIRT_OFFSET
+ #undef VVIRT
+ 
+-  . = ALIGN(THREAD_SIZE);	/* init_task */
+   .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) {
++	. = ALIGN(THREAD_SIZE);	/* init_task */
+ 	*(.data.init_task)
+   }:data.init
+ 
+-  . = ALIGN(PAGE_SIZE);
+   .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) {
++	. = ALIGN(PAGE_SIZE);
+ 	*(.data.page_aligned)
+   }
+ 
+-  /* might get freed after init */
+-  . = ALIGN(PAGE_SIZE);
+-  __smp_alt_begin = .;
+-  __smp_locks = .;
+   .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {
++	/* might get freed after init */
++	. = ALIGN(PAGE_SIZE);
++	__smp_alt_begin = .;
++	__smp_locks = .;
+ 	*(.smp_locks)
++	__smp_locks_end = .;
++	. = ALIGN(PAGE_SIZE);
++	__smp_alt_end = .;
+   }
+-  __smp_locks_end = .;
+-  . = ALIGN(PAGE_SIZE);
+-  __smp_alt_end = .;
+ 
+   . = ALIGN(PAGE_SIZE);		/* Init code and data */
+-  __init_begin = .;
++  __init_begin = .;	/* paired with __init_end */
+   .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+ 	_sinittext = .;
+ 	INIT_TEXT
+@@ -155,40 +159,42 @@ SECTIONS
+ 	__initdata_end = .;
+    }
+ 
+-  . = ALIGN(16);
+-  __setup_start = .;
+-  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { *(.init.setup) }
+-  __setup_end = .;
+-  __initcall_start = .;
++  .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) {
++	. = ALIGN(16);
++	__setup_start = .;
++	*(.init.setup)
++	__setup_end = .;
++  }
+   .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
++	__initcall_start = .;
+ 	INITCALLS
++	__initcall_end = .;
+   }
+-  __initcall_end = .;
+-  __con_initcall_start = .;
+   .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) {
++	__con_initcall_start = .;
+ 	*(.con_initcall.init)
++	__con_initcall_end = .;
+   }
+-  __con_initcall_end = .;
+-  __x86_cpu_dev_start = .;
+   .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
++	__x86_cpu_dev_start = .;
+ 	*(.x86_cpu_dev.init)
++	__x86_cpu_dev_end = .;
+   }
+-  __x86_cpu_dev_end = .;
+   SECURITY_INIT
+ 
+   . = ALIGN(8);
+   .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
+-  __parainstructions = .;
++	__parainstructions = .;
+        *(.parainstructions)
+-  __parainstructions_end = .;
++	__parainstructions_end = .;
+   }
+ 
+-  . = ALIGN(8);
+-  __alt_instructions = .;
+   .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
++	. = ALIGN(8);
++	__alt_instructions = .;
+ 	*(.altinstructions)
++	__alt_instructions_end = .;
+   }
+-  __alt_instructions_end = .;
+   .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {
+ 	*(.altinstr_replacement)
+   }
+@@ -203,28 +209,53 @@ SECTIONS
+ 
+ #ifdef CONFIG_BLK_DEV_INITRD
+   . = ALIGN(PAGE_SIZE);
+-  __initramfs_start = .;
+-  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) }
+-  __initramfs_end = .;
++  .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) {
++	__initramfs_start = .;
++	*(.init.ramfs)
++	__initramfs_end = .;
++  }
+ #endif
+ 
++#ifdef CONFIG_SMP
++  /*
++   * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
++   * output PHDR, so the next output section - __data_nosave - should
++   * start another section data.init2.  Also, pda should be at the head of
++   * percpu area.  Preallocate it and define the percpu offset symbol
++   * so that it can be accessed as a percpu variable.
++   */
++  . = ALIGN(PAGE_SIZE);
++  PERCPU_VADDR(0, :percpu)
++#else
+   PERCPU(PAGE_SIZE)
++#endif
+ 
+   . = ALIGN(PAGE_SIZE);
+   __init_end = .;
+ 
+-  . = ALIGN(PAGE_SIZE);
+-  __nosave_begin = .;
+-  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
+-  . = ALIGN(PAGE_SIZE);
+-  __nosave_end = .;
++  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
++	. = ALIGN(PAGE_SIZE);
++	__nosave_begin = .;
++	*(.data.nosave)
++	. = ALIGN(PAGE_SIZE);
++	__nosave_end = .;
++  } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
+ 
+-  __bss_start = .;		/* BSS */
+   .bss : AT(ADDR(.bss) - LOAD_OFFSET) {
++	. = ALIGN(PAGE_SIZE);
++	__bss_start = .;		/* BSS */
+ 	*(.bss.page_aligned)
+ 	*(.bss)
+-	}
+-  __bss_stop = .;
++	__bss_stop = .;
++  }
++
++  .brk : AT(ADDR(.brk) - LOAD_OFFSET) {
++	. = ALIGN(PAGE_SIZE);
++	__brk_base = . ;
++ 	. += 64 * 1024 ;	/* 64k alignment slop space */
++	*(.brk_reservation)	/* areas brk users have reserved */
++	__brk_limit = . ;
++  }
+ 
+   _end = . ;
+ 
+@@ -232,6 +263,7 @@ SECTIONS
+   /DISCARD/ : {
+ 	*(.exitcall.exit)
+ 	*(.eh_frame)
++	*(.discard)
+ 	}
+ 
+   STABS_DEBUG
+@@ -239,8 +271,28 @@ SECTIONS
+   DWARF_DEBUG
+ }
+ 
++ /*
++  * Per-cpu symbols which need to be offset from __per_cpu_load
++  * for the boot processor.
++  */
++#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
++INIT_PER_CPU(gdt_page);
++INIT_PER_CPU(irq_stack_union);
++
+ /*
+  * Build-time check on the image size:
+  */
+ ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
+ 	"kernel image bigger than KERNEL_IMAGE_SIZE")
++
++#ifdef CONFIG_SMP
++ASSERT((per_cpu__irq_stack_union == 0),
++        "irq_stack_union is not at start of per-cpu area");
++#endif
++
++#ifdef CONFIG_KEXEC
++#include <asm/kexec.h>
++
++ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
++       "kexec control code size is too big")
++#endif
+Index: linux-2.6-tip/arch/x86/kernel/vsmp_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/vsmp_64.c
++++ linux-2.6-tip/arch/x86/kernel/vsmp_64.c
+@@ -22,7 +22,7 @@
+ #include <asm/paravirt.h>
+ #include <asm/setup.h>
+ 
+-#if defined CONFIG_PCI && defined CONFIG_PARAVIRT
++#ifdef CONFIG_PARAVIRT
+ /*
+  * Interrupt control on vSMPowered systems:
+  * ~AC is a shadow of IF.  If IF is 'on' AC should be 'off'
+@@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void)
+ 		flags &= ~X86_EFLAGS_IF;
+ 	return flags;
+ }
++PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
+ 
+ static void vsmp_restore_fl(unsigned long flags)
+ {
+@@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned lon
+ 		flags |= X86_EFLAGS_AC;
+ 	native_restore_fl(flags);
+ }
++PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
+ 
+ static void vsmp_irq_disable(void)
+ {
+@@ -53,6 +55,7 @@ static void vsmp_irq_disable(void)
+ 
+ 	native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
+ }
++PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
+ 
+ static void vsmp_irq_enable(void)
+ {
+@@ -60,6 +63,7 @@ static void vsmp_irq_enable(void)
+ 
+ 	native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
+ }
++PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
+ 
+ static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
+ 				  unsigned long addr, unsigned len)
+@@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void)
+ 	       cap, ctl);
+ 	if (cap & ctl & (1 << 4)) {
+ 		/* Setup irq ops and turn on vSMP  IRQ fastpath handling */
+-		pv_irq_ops.irq_disable = vsmp_irq_disable;
+-		pv_irq_ops.irq_enable  = vsmp_irq_enable;
+-		pv_irq_ops.save_fl  = vsmp_save_fl;
+-		pv_irq_ops.restore_fl  = vsmp_restore_fl;
++		pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
++		pv_irq_ops.irq_enable  = PV_CALLEE_SAVE(vsmp_irq_enable);
++		pv_irq_ops.save_fl  = PV_CALLEE_SAVE(vsmp_save_fl);
++		pv_irq_ops.restore_fl  = PV_CALLEE_SAVE(vsmp_restore_fl);
+ 		pv_init_ops.patch = vsmp_patch;
+ 
+ 		ctl &= ~(1 << 4);
+@@ -110,7 +114,6 @@ static void __init set_vsmp_pv_ops(void)
+ }
+ #endif
+ 
+-#ifdef CONFIG_PCI
+ static int is_vsmp = -1;
+ 
+ static void __init detect_vsmp_box(void)
+@@ -135,15 +138,6 @@ int is_vsmp_box(void)
+ 		return 0;
+ 	}
+ }
+-#else
+-static void __init detect_vsmp_box(void)
+-{
+-}
+-int is_vsmp_box(void)
+-{
+-	return 0;
+-}
+-#endif
+ 
+ void __init vsmp_init(void)
+ {
+Index: linux-2.6-tip/arch/x86/kernel/x8664_ksyms_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/x8664_ksyms_64.c
++++ linux-2.6-tip/arch/x86/kernel/x8664_ksyms_64.c
+@@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy);
+ EXPORT_SYMBOL(empty_zero_page);
+ EXPORT_SYMBOL(init_level4_pgt);
+ EXPORT_SYMBOL(load_gs_index);
+-
+-EXPORT_SYMBOL(_proxy_pda);
+Index: linux-2.6-tip/arch/x86/kvm/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kvm/Kconfig
++++ linux-2.6-tip/arch/x86/kvm/Kconfig
+@@ -59,7 +59,8 @@ config KVM_AMD
+ 
+ config KVM_TRACE
+ 	bool "KVM trace support"
+-	depends on KVM && MARKERS && SYSFS
++	depends on KVM && SYSFS
++	select MARKERS
+ 	select RELAY
+ 	select DEBUG_FS
+ 	default n
+Index: linux-2.6-tip/arch/x86/lguest/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/lguest/Kconfig
++++ linux-2.6-tip/arch/x86/lguest/Kconfig
+@@ -3,7 +3,6 @@ config LGUEST_GUEST
+ 	select PARAVIRT
+ 	depends on X86_32
+ 	depends on !X86_PAE
+-	depends on !X86_VOYAGER
+ 	select VIRTIO
+ 	select VIRTIO_RING
+ 	select VIRTIO_CONSOLE
+Index: linux-2.6-tip/arch/x86/lguest/boot.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/lguest/boot.c
++++ linux-2.6-tip/arch/x86/lguest/boot.c
+@@ -173,24 +173,29 @@ static unsigned long save_fl(void)
+ {
+ 	return lguest_data.irq_enabled;
+ }
++PV_CALLEE_SAVE_REGS_THUNK(save_fl);
+ 
+ /* restore_flags() just sets the flags back to the value given. */
+ static void restore_fl(unsigned long flags)
+ {
+ 	lguest_data.irq_enabled = flags;
+ }
++PV_CALLEE_SAVE_REGS_THUNK(restore_fl);
+ 
+ /* Interrupts go off... */
+ static void irq_disable(void)
+ {
+ 	lguest_data.irq_enabled = 0;
+ }
++PV_CALLEE_SAVE_REGS_THUNK(irq_disable);
+ 
+ /* Interrupts go on... */
+ static void irq_enable(void)
+ {
+ 	lguest_data.irq_enabled = X86_EFLAGS_IF;
+ }
++PV_CALLEE_SAVE_REGS_THUNK(irq_enable);
++
+ /*:*/
+ /*M:003 Note that we don't check for outstanding interrupts when we re-enable
+  * them (or when we unmask an interrupt).  This seems to work for the moment,
+@@ -278,7 +283,7 @@ static void lguest_load_tls(struct threa
+ 	/* There's one problem which normal hardware doesn't have: the Host
+ 	 * can't handle us removing entries we're currently using.  So we clear
+ 	 * the GS register here: if it's needed it'll be reloaded anyway. */
+-	loadsegment(gs, 0);
++	lazy_load_gs(0);
+ 	lazy_hcall(LHCALL_LOAD_TLS, __pa(&t->tls_array), cpu, 0);
+ }
+ 
+@@ -836,13 +841,14 @@ static u32 lguest_apic_safe_wait_icr_idl
+ 	return 0;
+ }
+ 
+-static struct apic_ops lguest_basic_apic_ops = {
+-	.read = lguest_apic_read,
+-	.write = lguest_apic_write,
+-	.icr_read = lguest_apic_icr_read,
+-	.icr_write = lguest_apic_icr_write,
+-	.wait_icr_idle = lguest_apic_wait_icr_idle,
+-	.safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
++static void set_lguest_basic_apic_ops(void)
++{
++	apic->read = lguest_apic_read;
++	apic->write = lguest_apic_write;
++	apic->icr_read = lguest_apic_icr_read;
++	apic->icr_write = lguest_apic_icr_write;
++	apic->wait_icr_idle = lguest_apic_wait_icr_idle;
++	apic->safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle;
+ };
+ #endif
+ 
+@@ -997,10 +1003,10 @@ __init void lguest_init(void)
+ 
+ 	/* interrupt-related operations */
+ 	pv_irq_ops.init_IRQ = lguest_init_IRQ;
+-	pv_irq_ops.save_fl = save_fl;
+-	pv_irq_ops.restore_fl = restore_fl;
+-	pv_irq_ops.irq_disable = irq_disable;
+-	pv_irq_ops.irq_enable = irq_enable;
++	pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl);
++	pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl);
++	pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable);
++	pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable);
+ 	pv_irq_ops.safe_halt = lguest_safe_halt;
+ 
+ 	/* init-time operations */
+@@ -1045,7 +1051,7 @@ __init void lguest_init(void)
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+ 	/* apic read/write intercepts */
+-	apic_ops = &lguest_basic_apic_ops;
++	set_lguest_basic_apic_ops();
+ #endif
+ 
+ 	/* time operations */
+@@ -1060,14 +1066,6 @@ __init void lguest_init(void)
+ 	 * lguest_init() where the rest of the fairly chaotic boot setup
+ 	 * occurs. */
+ 
+-	/* The native boot code sets up initial page tables immediately after
+-	 * the kernel itself, and sets init_pg_tables_end so they're not
+-	 * clobbered.  The Launcher places our initial pagetables somewhere at
+-	 * the top of our physical memory, so we don't need extra space: set
+-	 * init_pg_tables_end to the end of the kernel. */
+-	init_pg_tables_start = __pa(pg0);
+-	init_pg_tables_end = __pa(pg0);
+-
+ 	/* As described in head_32.S, we map the first 128M of memory. */
+ 	max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT;
+ 
+Index: linux-2.6-tip/arch/x86/lib/getuser.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/lib/getuser.S
++++ linux-2.6-tip/arch/x86/lib/getuser.S
+@@ -28,7 +28,7 @@
+ 
+ #include <linux/linkage.h>
+ #include <asm/dwarf2.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ #include <asm/errno.h>
+ #include <asm/asm-offsets.h>
+ #include <asm/thread_info.h>
+Index: linux-2.6-tip/arch/x86/lib/memcpy_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/lib/memcpy_64.S
++++ linux-2.6-tip/arch/x86/lib/memcpy_64.S
+@@ -1,30 +1,38 @@
+ /* Copyright 2002 Andi Kleen */
+ 
+ #include <linux/linkage.h>
+-#include <asm/dwarf2.h>
++
+ #include <asm/cpufeature.h>
++#include <asm/dwarf2.h>
+ 
+ /*
+  * memcpy - Copy a memory block.
+  *
+- * Input:	
+- * rdi destination
+- * rsi source
+- * rdx count
+- * 
++ * Input:
++ *  rdi destination
++ *  rsi source
++ *  rdx count
++ *
+  * Output:
+  * rax original destination
+- */	
++ */
+ 
++/*
++ * memcpy_c() - fast string ops (REP MOVSQ) based variant.
++ *
++ * Calls to this get patched into the kernel image via the
++ * alternative instructions framework:
++ */
+ 	ALIGN
+ memcpy_c:
+ 	CFI_STARTPROC
+-	movq %rdi,%rax
+-	movl %edx,%ecx
+-	shrl $3,%ecx
+-	andl $7,%edx
++	movq %rdi, %rax
++
++	movl %edx, %ecx
++	shrl $3, %ecx
++	andl $7, %edx
+ 	rep movsq
+-	movl %edx,%ecx
++	movl %edx, %ecx
+ 	rep movsb
+ 	ret
+ 	CFI_ENDPROC
+@@ -33,99 +41,110 @@ ENDPROC(memcpy_c)
+ ENTRY(__memcpy)
+ ENTRY(memcpy)
+ 	CFI_STARTPROC
+-	pushq %rbx
+-	CFI_ADJUST_CFA_OFFSET 8
+-	CFI_REL_OFFSET rbx, 0
+-	movq %rdi,%rax
+ 
+-	movl %edx,%ecx
+-	shrl $6,%ecx
++	/*
++	 * Put the number of full 64-byte blocks into %ecx.
++	 * Tail portion is handled at the end:
++	 */
++	movq %rdi, %rax
++	movl %edx, %ecx
++	shrl   $6, %ecx
+ 	jz .Lhandle_tail
+ 
+ 	.p2align 4
+ .Lloop_64:
++	/*
++	 * We decrement the loop index here - and the zero-flag is
++	 * checked at the end of the loop (instructions inbetween do
++	 * not change the zero flag):
++	 */
+ 	decl %ecx
+ 
+-	movq (%rsi),%r11
+-	movq 8(%rsi),%r8
+-
+-	movq %r11,(%rdi)
+-	movq %r8,1*8(%rdi)
+-
+-	movq 2*8(%rsi),%r9
+-	movq 3*8(%rsi),%r10
++	/*
++	 * Move in blocks of 4x16 bytes:
++	 */
++	movq 0*8(%rsi),		%r11
++	movq 1*8(%rsi),		%r8
++	movq %r11,		0*8(%rdi)
++	movq %r8,		1*8(%rdi)
++
++	movq 2*8(%rsi),		%r9
++	movq 3*8(%rsi),		%r10
++	movq %r9,		2*8(%rdi)
++	movq %r10,		3*8(%rdi)
++
++	movq 4*8(%rsi),		%r11
++	movq 5*8(%rsi),		%r8
++	movq %r11,		4*8(%rdi)
++	movq %r8,		5*8(%rdi)
++
++	movq 6*8(%rsi),		%r9
++	movq 7*8(%rsi),		%r10
++	movq %r9,		6*8(%rdi)
++	movq %r10,		7*8(%rdi)
+ 
+-	movq %r9,2*8(%rdi)
+-	movq %r10,3*8(%rdi)
++	leaq 64(%rsi), %rsi
++	leaq 64(%rdi), %rdi
+ 
+-	movq 4*8(%rsi),%r11
+-	movq 5*8(%rsi),%r8
+-
+-	movq %r11,4*8(%rdi)
+-	movq %r8,5*8(%rdi)
+-
+-	movq 6*8(%rsi),%r9
+-	movq 7*8(%rsi),%r10
+-
+-	movq %r9,6*8(%rdi)
+-	movq %r10,7*8(%rdi)
+-
+-	leaq 64(%rsi),%rsi
+-	leaq 64(%rdi),%rdi
+ 	jnz  .Lloop_64
+ 
+ .Lhandle_tail:
+-	movl %edx,%ecx
+-	andl $63,%ecx
+-	shrl $3,%ecx
++	movl %edx, %ecx
++	andl  $63, %ecx
++	shrl   $3, %ecx
+ 	jz   .Lhandle_7
++
+ 	.p2align 4
+ .Lloop_8:
+ 	decl %ecx
+-	movq (%rsi),%r8
+-	movq %r8,(%rdi)
+-	leaq 8(%rdi),%rdi
+-	leaq 8(%rsi),%rsi
++	movq (%rsi),		%r8
++	movq %r8,		(%rdi)
++	leaq 8(%rdi),		%rdi
++	leaq 8(%rsi),		%rsi
+ 	jnz  .Lloop_8
+ 
+ .Lhandle_7:
+-	movl %edx,%ecx
+-	andl $7,%ecx
+-	jz .Lende
++	movl %edx, %ecx
++	andl $7, %ecx
++	jz .Lend
++
+ 	.p2align 4
+ .Lloop_1:
+-	movb (%rsi),%r8b
+-	movb %r8b,(%rdi)
++	movb (%rsi), %r8b
++	movb %r8b, (%rdi)
+ 	incq %rdi
+ 	incq %rsi
+ 	decl %ecx
+ 	jnz .Lloop_1
+ 
+-.Lende:
+-	popq %rbx
+-	CFI_ADJUST_CFA_OFFSET -8
+-	CFI_RESTORE rbx
++.Lend:
+ 	ret
+-.Lfinal:
+ 	CFI_ENDPROC
+ ENDPROC(memcpy)
+ ENDPROC(__memcpy)
+ 
+-	/* Some CPUs run faster using the string copy instructions.
+-	   It is also a lot simpler. Use this when possible */
++	/*
++	 * Some CPUs run faster using the string copy instructions.
++	 * It is also a lot simpler. Use this when possible:
++	 */
+ 
+-	.section .altinstr_replacement,"ax"
++	.section .altinstr_replacement, "ax"
+ 1:	.byte 0xeb				/* jmp <disp8> */
+ 	.byte (memcpy_c - memcpy) - (2f - 1b)	/* offset */
+ 2:
+ 	.previous
+-	.section .altinstructions,"a"
++
++	.section .altinstructions, "a"
+ 	.align 8
+ 	.quad memcpy
+ 	.quad 1b
+ 	.byte X86_FEATURE_REP_GOOD
+-	/* Replace only beginning, memcpy is used to apply alternatives, so it
+-	 * is silly to overwrite itself with nops - reboot is only outcome... */
++
++	/*
++	 * Replace only beginning, memcpy is used to apply alternatives,
++	 * so it is silly to overwrite itself with nops - reboot is the
++	 * only outcome...
++	 */
+ 	.byte 2b - 1b
+ 	.byte 2b - 1b
+ 	.previous
+Index: linux-2.6-tip/arch/x86/mach-default/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-default/Makefile
++++ /dev/null
+@@ -1,5 +0,0 @@
+-#
+-# Makefile for the linux kernel.
+-#
+-
+-obj-y				:= setup.o
+Index: linux-2.6-tip/arch/x86/mach-default/setup.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-default/setup.c
++++ /dev/null
+@@ -1,174 +0,0 @@
+-/*
+- *	Machine specific setup for generic
+- */
+-
+-#include <linux/smp.h>
+-#include <linux/init.h>
+-#include <linux/interrupt.h>
+-#include <asm/acpi.h>
+-#include <asm/arch_hooks.h>
+-#include <asm/e820.h>
+-#include <asm/setup.h>
+-
+-#include <mach_ipi.h>
+-
+-#ifdef CONFIG_HOTPLUG_CPU
+-#define DEFAULT_SEND_IPI	(1)
+-#else
+-#define DEFAULT_SEND_IPI	(0)
+-#endif
+-
+-int no_broadcast = DEFAULT_SEND_IPI;
+-
+-/**
+- * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
+- *
+- * Description:
+- *	Perform any necessary interrupt initialisation prior to setting up
+- *	the "ordinary" interrupt call gates.  For legacy reasons, the ISA
+- *	interrupts should be initialised here if the machine emulates a PC
+- *	in any way.
+- **/
+-void __init pre_intr_init_hook(void)
+-{
+-	if (x86_quirks->arch_pre_intr_init) {
+-		if (x86_quirks->arch_pre_intr_init())
+-			return;
+-	}
+-	init_ISA_irqs();
+-}
+-
+-/*
+- * IRQ2 is cascade interrupt to second interrupt controller
+- */
+-static struct irqaction irq2 = {
+-	.handler = no_action,
+-	.mask = CPU_MASK_NONE,
+-	.name = "cascade",
+-};
+-
+-/**
+- * intr_init_hook - post gate setup interrupt initialisation
+- *
+- * Description:
+- *	Fill in any interrupts that may have been left out by the general
+- *	init_IRQ() routine.  interrupts having to do with the machine rather
+- *	than the devices on the I/O bus (like APIC interrupts in intel MP
+- *	systems) are started here.
+- **/
+-void __init intr_init_hook(void)
+-{
+-	if (x86_quirks->arch_intr_init) {
+-		if (x86_quirks->arch_intr_init())
+-			return;
+-	}
+-	if (!acpi_ioapic)
+-		setup_irq(2, &irq2);
+-
+-}
+-
+-/**
+- * pre_setup_arch_hook - hook called prior to any setup_arch() execution
+- *
+- * Description:
+- *	generally used to activate any machine specific identification
+- *	routines that may be needed before setup_arch() runs.  On Voyager
+- *	this is used to get the board revision and type.
+- **/
+-void __init pre_setup_arch_hook(void)
+-{
+-}
+-
+-/**
+- * trap_init_hook - initialise system specific traps
+- *
+- * Description:
+- *	Called as the final act of trap_init().  Used in VISWS to initialise
+- *	the various board specific APIC traps.
+- **/
+-void __init trap_init_hook(void)
+-{
+-	if (x86_quirks->arch_trap_init) {
+-		if (x86_quirks->arch_trap_init())
+-			return;
+-	}
+-}
+-
+-static struct irqaction irq0  = {
+-	.handler = timer_interrupt,
+-	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
+-	.mask = CPU_MASK_NONE,
+-	.name = "timer"
+-};
+-
+-/**
+- * pre_time_init_hook - do any specific initialisations before.
+- *
+- **/
+-void __init pre_time_init_hook(void)
+-{
+-	if (x86_quirks->arch_pre_time_init)
+-		x86_quirks->arch_pre_time_init();
+-}
+-
+-/**
+- * time_init_hook - do any specific initialisations for the system timer.
+- *
+- * Description:
+- *	Must plug the system timer interrupt source at HZ into the IRQ listed
+- *	in irq_vectors.h:TIMER_IRQ
+- **/
+-void __init time_init_hook(void)
+-{
+-	if (x86_quirks->arch_time_init) {
+-		/*
+-		 * A nonzero return code does not mean failure, it means
+-		 * that the architecture quirk does not want any
+-		 * generic (timer) setup to be performed after this:
+-		 */
+-		if (x86_quirks->arch_time_init())
+-			return;
+-	}
+-
+-	irq0.mask = cpumask_of_cpu(0);
+-	setup_irq(0, &irq0);
+-}
+-
+-#ifdef CONFIG_MCA
+-/**
+- * mca_nmi_hook - hook into MCA specific NMI chain
+- *
+- * Description:
+- *	The MCA (Microchannel Architecture) has an NMI chain for NMI sources
+- *	along the MCA bus.  Use this to hook into that chain if you will need
+- *	it.
+- **/
+-void mca_nmi_hook(void)
+-{
+-	/*
+-	 * If I recall correctly, there's a whole bunch of other things that
+-	 * we can do to check for NMI problems, but that's all I know about
+-	 * at the moment.
+-	 */
+-	pr_warning("NMI generated from unknown source!\n");
+-}
+-#endif
+-
+-static __init int no_ipi_broadcast(char *str)
+-{
+-	get_option(&str, &no_broadcast);
+-	pr_info("Using %s mode\n",
+-		no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
+-	return 1;
+-}
+-__setup("no_ipi_broadcast=", no_ipi_broadcast);
+-
+-static int __init print_ipi_mode(void)
+-{
+-	pr_info("Using IPI %s mode\n",
+-		no_broadcast ? "No-Shortcut" : "Shortcut");
+-	return 0;
+-}
+-
+-late_initcall(print_ipi_mode);
+-
+Index: linux-2.6-tip/arch/x86/mach-generic/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-generic/Makefile
++++ /dev/null
+@@ -1,11 +0,0 @@
+-#
+-# Makefile for the generic architecture
+-#
+-
+-EXTRA_CFLAGS			:= -Iarch/x86/kernel
+-
+-obj-y				:= probe.o default.o
+-obj-$(CONFIG_X86_NUMAQ)		+= numaq.o
+-obj-$(CONFIG_X86_SUMMIT)	+= summit.o
+-obj-$(CONFIG_X86_BIGSMP)	+= bigsmp.o
+-obj-$(CONFIG_X86_ES7000)	+= es7000.o
+Index: linux-2.6-tip/arch/x86/mach-generic/bigsmp.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-generic/bigsmp.c
++++ /dev/null
+@@ -1,60 +0,0 @@
+-/*
+- * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs.
+- * Drives the local APIC in "clustered mode".
+- */
+-#define APIC_DEFINITION 1
+-#include <linux/threads.h>
+-#include <linux/cpumask.h>
+-#include <asm/mpspec.h>
+-#include <asm/genapic.h>
+-#include <asm/fixmap.h>
+-#include <asm/apicdef.h>
+-#include <linux/kernel.h>
+-#include <linux/init.h>
+-#include <linux/dmi.h>
+-#include <asm/bigsmp/apicdef.h>
+-#include <linux/smp.h>
+-#include <asm/bigsmp/apic.h>
+-#include <asm/bigsmp/ipi.h>
+-#include <asm/mach-default/mach_mpparse.h>
+-#include <asm/mach-default/mach_wakecpu.h>
+-
+-static int dmi_bigsmp; /* can be set by dmi scanners */
+-
+-static int hp_ht_bigsmp(const struct dmi_system_id *d)
+-{
+-	printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
+-	dmi_bigsmp = 1;
+-	return 0;
+-}
+-
+-
+-static const struct dmi_system_id bigsmp_dmi_table[] = {
+-	{ hp_ht_bigsmp, "HP ProLiant DL760 G2",
+-	{ DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+-	DMI_MATCH(DMI_BIOS_VERSION, "P44-"),}
+-	},
+-
+-	{ hp_ht_bigsmp, "HP ProLiant DL740",
+-	{ DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
+-	DMI_MATCH(DMI_BIOS_VERSION, "P47-"),}
+-	},
+-	 { }
+-};
+-
+-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
+-{
+-	cpus_clear(*retmask);
+-	cpu_set(cpu, *retmask);
+-}
+-
+-static int probe_bigsmp(void)
+-{
+-	if (def_to_bigsmp)
+-		dmi_bigsmp = 1;
+-	else
+-		dmi_check_system(bigsmp_dmi_table);
+-	return dmi_bigsmp;
+-}
+-
+-struct genapic apic_bigsmp = APIC_INIT("bigsmp", probe_bigsmp);
+Index: linux-2.6-tip/arch/x86/mach-generic/default.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-generic/default.c
++++ /dev/null
+@@ -1,27 +0,0 @@
+-/*
+- * Default generic APIC driver. This handles up to 8 CPUs.
+- */
+-#define APIC_DEFINITION 1
+-#include <linux/threads.h>
+-#include <linux/cpumask.h>
+-#include <asm/mpspec.h>
+-#include <asm/mach-default/mach_apicdef.h>
+-#include <asm/genapic.h>
+-#include <asm/fixmap.h>
+-#include <asm/apicdef.h>
+-#include <linux/kernel.h>
+-#include <linux/string.h>
+-#include <linux/smp.h>
+-#include <linux/init.h>
+-#include <asm/mach-default/mach_apic.h>
+-#include <asm/mach-default/mach_ipi.h>
+-#include <asm/mach-default/mach_mpparse.h>
+-#include <asm/mach-default/mach_wakecpu.h>
+-
+-/* should be called last. */
+-static int probe_default(void)
+-{
+-	return 1;
+-}
+-
+-struct genapic apic_default = APIC_INIT("default", probe_default);
+Index: linux-2.6-tip/arch/x86/mach-generic/es7000.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-generic/es7000.c
++++ /dev/null
+@@ -1,103 +0,0 @@
+-/*
+- * APIC driver for the Unisys ES7000 chipset.
+- */
+-#define APIC_DEFINITION 1
+-#include <linux/threads.h>
+-#include <linux/cpumask.h>
+-#include <asm/mpspec.h>
+-#include <asm/genapic.h>
+-#include <asm/fixmap.h>
+-#include <asm/apicdef.h>
+-#include <linux/kernel.h>
+-#include <linux/string.h>
+-#include <linux/init.h>
+-#include <asm/es7000/apicdef.h>
+-#include <linux/smp.h>
+-#include <asm/es7000/apic.h>
+-#include <asm/es7000/ipi.h>
+-#include <asm/es7000/mpparse.h>
+-#include <asm/mach-default/mach_wakecpu.h>
+-
+-void __init es7000_update_genapic_to_cluster(void)
+-{
+-	genapic->target_cpus = target_cpus_cluster;
+-	genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
+-	genapic->int_dest_mode = INT_DEST_MODE_CLUSTER;
+-	genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER;
+-
+-	genapic->init_apic_ldr = init_apic_ldr_cluster;
+-
+-	genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster;
+-}
+-
+-static int probe_es7000(void)
+-{
+-	/* probed later in mptable/ACPI hooks */
+-	return 0;
+-}
+-
+-extern void es7000_sw_apic(void);
+-static void __init enable_apic_mode(void)
+-{
+-	es7000_sw_apic();
+-	return;
+-}
+-
+-static __init int
+-mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+-{
+-	if (mpc->oemptr) {
+-		struct mpc_oemtable *oem_table =
+-			(struct mpc_oemtable *)mpc->oemptr;
+-		if (!strncmp(oem, "UNISYS", 6))
+-			return parse_unisys_oem((char *)oem_table);
+-	}
+-	return 0;
+-}
+-
+-#ifdef CONFIG_ACPI
+-/* Hook from generic ACPI tables.c */
+-static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	unsigned long oem_addr = 0;
+-	int check_dsdt;
+-	int ret = 0;
+-
+-	/* check dsdt at first to avoid clear fix_map for oem_addr */
+-	check_dsdt = es7000_check_dsdt();
+-
+-	if (!find_unisys_acpi_oem_table(&oem_addr)) {
+-		if (check_dsdt)
+-			ret = parse_unisys_oem((char *)oem_addr);
+-		else {
+-			setup_unisys();
+-			ret = 1;
+-		}
+-		/*
+-		 * we need to unmap it
+-		 */
+-		unmap_unisys_acpi_oem_table(oem_addr);
+-	}
+-	return ret;
+-}
+-#else
+-static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	return 0;
+-}
+-#endif
+-
+-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
+-{
+-	/* Careful. Some cpus do not strictly honor the set of cpus
+-	 * specified in the interrupt destination when using lowest
+-	 * priority interrupt delivery mode.
+-	 *
+-	 * In particular there was a hyperthreading cpu observed to
+-	 * deliver interrupts to the wrong hyperthread when only one
+-	 * hyperthread was specified in the interrupt desitination.
+-	 */
+-	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+-}
+-
+-struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000);
+Index: linux-2.6-tip/arch/x86/mach-generic/numaq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-generic/numaq.c
++++ /dev/null
+@@ -1,53 +0,0 @@
+-/*
+- * APIC driver for the IBM NUMAQ chipset.
+- */
+-#define APIC_DEFINITION 1
+-#include <linux/threads.h>
+-#include <linux/cpumask.h>
+-#include <asm/mpspec.h>
+-#include <asm/genapic.h>
+-#include <asm/fixmap.h>
+-#include <asm/apicdef.h>
+-#include <linux/kernel.h>
+-#include <linux/string.h>
+-#include <linux/init.h>
+-#include <asm/numaq/apicdef.h>
+-#include <linux/smp.h>
+-#include <asm/numaq/apic.h>
+-#include <asm/numaq/ipi.h>
+-#include <asm/numaq/mpparse.h>
+-#include <asm/numaq/wakecpu.h>
+-#include <asm/numaq.h>
+-
+-static int mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+-{
+-	numaq_mps_oem_check(mpc, oem, productid);
+-	return found_numaq;
+-}
+-
+-static int probe_numaq(void)
+-{
+-	/* already know from get_memcfg_numaq() */
+-	return found_numaq;
+-}
+-
+-/* Hook from generic ACPI tables.c */
+-static int acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	return 0;
+-}
+-
+-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
+-{
+-	/* Careful. Some cpus do not strictly honor the set of cpus
+-	 * specified in the interrupt destination when using lowest
+-	 * priority interrupt delivery mode.
+-	 *
+-	 * In particular there was a hyperthreading cpu observed to
+-	 * deliver interrupts to the wrong hyperthread when only one
+-	 * hyperthread was specified in the interrupt desitination.
+-	 */
+-	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+-}
+-
+-struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq);
+Index: linux-2.6-tip/arch/x86/mach-generic/probe.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-generic/probe.c
++++ /dev/null
+@@ -1,152 +0,0 @@
+-/*
+- * Copyright 2003 Andi Kleen, SuSE Labs.
+- * Subject to the GNU Public License, v.2
+- *
+- * Generic x86 APIC driver probe layer.
+- */
+-#include <linux/threads.h>
+-#include <linux/cpumask.h>
+-#include <linux/string.h>
+-#include <linux/kernel.h>
+-#include <linux/ctype.h>
+-#include <linux/init.h>
+-#include <linux/errno.h>
+-#include <asm/fixmap.h>
+-#include <asm/mpspec.h>
+-#include <asm/apicdef.h>
+-#include <asm/genapic.h>
+-#include <asm/setup.h>
+-
+-extern struct genapic apic_numaq;
+-extern struct genapic apic_summit;
+-extern struct genapic apic_bigsmp;
+-extern struct genapic apic_es7000;
+-extern struct genapic apic_default;
+-
+-struct genapic *genapic = &apic_default;
+-
+-static struct genapic *apic_probe[] __initdata = {
+-#ifdef CONFIG_X86_NUMAQ
+-	&apic_numaq,
+-#endif
+-#ifdef CONFIG_X86_SUMMIT
+-	&apic_summit,
+-#endif
+-#ifdef CONFIG_X86_BIGSMP
+-	&apic_bigsmp,
+-#endif
+-#ifdef CONFIG_X86_ES7000
+-	&apic_es7000,
+-#endif
+-	&apic_default,	/* must be last */
+-	NULL,
+-};
+-
+-static int cmdline_apic __initdata;
+-static int __init parse_apic(char *arg)
+-{
+-	int i;
+-
+-	if (!arg)
+-		return -EINVAL;
+-
+-	for (i = 0; apic_probe[i]; i++) {
+-		if (!strcmp(apic_probe[i]->name, arg)) {
+-			genapic = apic_probe[i];
+-			cmdline_apic = 1;
+-			return 0;
+-		}
+-	}
+-
+-	if (x86_quirks->update_genapic)
+-		x86_quirks->update_genapic();
+-
+-	/* Parsed again by __setup for debug/verbose */
+-	return 0;
+-}
+-early_param("apic", parse_apic);
+-
+-void __init generic_bigsmp_probe(void)
+-{
+-#ifdef CONFIG_X86_BIGSMP
+-	/*
+-	 * This routine is used to switch to bigsmp mode when
+-	 * - There is no apic= option specified by the user
+-	 * - generic_apic_probe() has chosen apic_default as the sub_arch
+-	 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
+-	 */
+-
+-	if (!cmdline_apic && genapic == &apic_default) {
+-		if (apic_bigsmp.probe()) {
+-			genapic = &apic_bigsmp;
+-			if (x86_quirks->update_genapic)
+-				x86_quirks->update_genapic();
+-			printk(KERN_INFO "Overriding APIC driver with %s\n",
+-			       genapic->name);
+-		}
+-	}
+-#endif
+-}
+-
+-void __init generic_apic_probe(void)
+-{
+-	if (!cmdline_apic) {
+-		int i;
+-		for (i = 0; apic_probe[i]; i++) {
+-			if (apic_probe[i]->probe()) {
+-				genapic = apic_probe[i];
+-				break;
+-			}
+-		}
+-		/* Not visible without early console */
+-		if (!apic_probe[i])
+-			panic("Didn't find an APIC driver");
+-
+-		if (x86_quirks->update_genapic)
+-			x86_quirks->update_genapic();
+-	}
+-	printk(KERN_INFO "Using APIC driver %s\n", genapic->name);
+-}
+-
+-/* These functions can switch the APIC even after the initial ->probe() */
+-
+-int __init mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
+-{
+-	int i;
+-	for (i = 0; apic_probe[i]; ++i) {
+-		if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) {
+-			if (!cmdline_apic) {
+-				genapic = apic_probe[i];
+-				if (x86_quirks->update_genapic)
+-					x86_quirks->update_genapic();
+-				printk(KERN_INFO "Switched to APIC driver `%s'.\n",
+-				       genapic->name);
+-			}
+-			return 1;
+-		}
+-	}
+-	return 0;
+-}
+-
+-int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+-{
+-	int i;
+-	for (i = 0; apic_probe[i]; ++i) {
+-		if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
+-			if (!cmdline_apic) {
+-				genapic = apic_probe[i];
+-				if (x86_quirks->update_genapic)
+-					x86_quirks->update_genapic();
+-				printk(KERN_INFO "Switched to APIC driver `%s'.\n",
+-				       genapic->name);
+-			}
+-			return 1;
+-		}
+-	}
+-	return 0;
+-}
+-
+-int hard_smp_processor_id(void)
+-{
+-	return genapic->get_apic_id(*(unsigned long *)(APIC_BASE+APIC_ID));
+-}
+Index: linux-2.6-tip/arch/x86/mach-generic/summit.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-generic/summit.c
++++ /dev/null
+@@ -1,40 +0,0 @@
+-/*
+- * APIC driver for the IBM "Summit" chipset.
+- */
+-#define APIC_DEFINITION 1
+-#include <linux/threads.h>
+-#include <linux/cpumask.h>
+-#include <asm/mpspec.h>
+-#include <asm/genapic.h>
+-#include <asm/fixmap.h>
+-#include <asm/apicdef.h>
+-#include <linux/kernel.h>
+-#include <linux/string.h>
+-#include <linux/init.h>
+-#include <asm/summit/apicdef.h>
+-#include <linux/smp.h>
+-#include <asm/summit/apic.h>
+-#include <asm/summit/ipi.h>
+-#include <asm/summit/mpparse.h>
+-#include <asm/mach-default/mach_wakecpu.h>
+-
+-static int probe_summit(void)
+-{
+-	/* probed later in mptable/ACPI hooks */
+-	return 0;
+-}
+-
+-static void vector_allocation_domain(int cpu, cpumask_t *retmask)
+-{
+-	/* Careful. Some cpus do not strictly honor the set of cpus
+-	 * specified in the interrupt destination when using lowest
+-	 * priority interrupt delivery mode.
+-	 *
+-	 * In particular there was a hyperthreading cpu observed to
+-	 * deliver interrupts to the wrong hyperthread when only one
+-	 * hyperthread was specified in the interrupt desitination.
+-	 */
+-	*retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
+-}
+-
+-struct genapic apic_summit = APIC_INIT("summit", probe_summit);
+Index: linux-2.6-tip/arch/x86/mach-rdc321x/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-rdc321x/Makefile
++++ /dev/null
+@@ -1,5 +0,0 @@
+-#
+-# Makefile for the RDC321x specific parts of the kernel
+-#
+-obj-$(CONFIG_X86_RDC321X)        := gpio.o platform.o
+-
+Index: linux-2.6-tip/arch/x86/mach-rdc321x/gpio.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-rdc321x/gpio.c
++++ /dev/null
+@@ -1,194 +0,0 @@
+-/*
+- *  GPIO support for RDC SoC R3210/R8610
+- *
+- *  Copyright (C) 2007, Florian Fainelli <florian@openwrt.org>
+- *  Copyright (C) 2008, Volker Weiss <dev@tintuc.de>
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License as published by
+- * the Free Software Foundation; either version 2 of the License, or
+- * (at your option) any later version.
+- *
+- * This program is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+- * GNU General Public License for more details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+- *
+- */
+-
+-
+-#include <linux/spinlock.h>
+-#include <linux/io.h>
+-#include <linux/types.h>
+-#include <linux/module.h>
+-
+-#include <asm/gpio.h>
+-#include <asm/mach-rdc321x/rdc321x_defs.h>
+-
+-
+-/* spin lock to protect our private copy of GPIO data register plus
+-   the access to PCI conf registers. */
+-static DEFINE_SPINLOCK(gpio_lock);
+-
+-/* copy of GPIO data registers */
+-static u32 gpio_data_reg1;
+-static u32 gpio_data_reg2;
+-
+-static u32 gpio_request_data[2];
+-
+-
+-static inline void rdc321x_conf_write(unsigned addr, u32 value)
+-{
+-	outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
+-	outl(value, RDC3210_CFGREG_DATA);
+-}
+-
+-static inline void rdc321x_conf_or(unsigned addr, u32 value)
+-{
+-	outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
+-	value |= inl(RDC3210_CFGREG_DATA);
+-	outl(value, RDC3210_CFGREG_DATA);
+-}
+-
+-static inline u32 rdc321x_conf_read(unsigned addr)
+-{
+-	outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
+-
+-	return inl(RDC3210_CFGREG_DATA);
+-}
+-
+-/* configure pin as GPIO */
+-static void rdc321x_configure_gpio(unsigned gpio)
+-{
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&gpio_lock, flags);
+-	rdc321x_conf_or(gpio < 32
+-		? RDC321X_GPIO_CTRL_REG1 : RDC321X_GPIO_CTRL_REG2,
+-		1 << (gpio & 0x1f));
+-	spin_unlock_irqrestore(&gpio_lock, flags);
+-}
+-
+-/* initially setup the 2 copies of the gpio data registers.
+-   This function must be called by the platform setup code. */
+-void __init rdc321x_gpio_setup()
+-{
+-	/* this might not be, what others (BIOS, bootloader, etc.)
+-	   wrote to these registers before, but it's a good guess. Still
+-	   better than just using 0xffffffff. */
+-
+-	gpio_data_reg1 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG1);
+-	gpio_data_reg2 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG2);
+-}
+-
+-/* determine, if gpio number is valid */
+-static inline int rdc321x_is_gpio(unsigned gpio)
+-{
+-	return gpio <= RDC321X_MAX_GPIO;
+-}
+-
+-/* request GPIO */
+-int rdc_gpio_request(unsigned gpio, const char *label)
+-{
+-	unsigned long flags;
+-
+-	if (!rdc321x_is_gpio(gpio))
+-		return -EINVAL;
+-
+-	spin_lock_irqsave(&gpio_lock, flags);
+-	if (gpio_request_data[(gpio & 0x20) ? 1 : 0] & (1 << (gpio & 0x1f)))
+-		goto inuse;
+-	gpio_request_data[(gpio & 0x20) ? 1 : 0] |= (1 << (gpio & 0x1f));
+-	spin_unlock_irqrestore(&gpio_lock, flags);
+-
+-	return 0;
+-inuse:
+-	spin_unlock_irqrestore(&gpio_lock, flags);
+-	return -EINVAL;
+-}
+-EXPORT_SYMBOL(rdc_gpio_request);
+-
+-/* release previously-claimed GPIO */
+-void rdc_gpio_free(unsigned gpio)
+-{
+-	unsigned long flags;
+-
+-	if (!rdc321x_is_gpio(gpio))
+-		return;
+-
+-	spin_lock_irqsave(&gpio_lock, flags);
+-	gpio_request_data[(gpio & 0x20) ? 1 : 0] &= ~(1 << (gpio & 0x1f));
+-	spin_unlock_irqrestore(&gpio_lock, flags);
+-}
+-EXPORT_SYMBOL(rdc_gpio_free);
+-
+-/* read GPIO pin */
+-int rdc_gpio_get_value(unsigned gpio)
+-{
+-	u32 reg;
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&gpio_lock, flags);
+-	reg = rdc321x_conf_read(gpio < 32
+-		? RDC321X_GPIO_DATA_REG1 : RDC321X_GPIO_DATA_REG2);
+-	spin_unlock_irqrestore(&gpio_lock, flags);
+-
+-	return (1 << (gpio & 0x1f)) & reg ? 1 : 0;
+-}
+-EXPORT_SYMBOL(rdc_gpio_get_value);
+-
+-/* set GPIO pin to value */
+-void rdc_gpio_set_value(unsigned gpio, int value)
+-{
+-	unsigned long flags;
+-	u32 reg;
+-
+-	reg = 1 << (gpio & 0x1f);
+-	if (gpio < 32) {
+-		spin_lock_irqsave(&gpio_lock, flags);
+-		if (value)
+-			gpio_data_reg1 |= reg;
+-		else
+-			gpio_data_reg1 &= ~reg;
+-		rdc321x_conf_write(RDC321X_GPIO_DATA_REG1, gpio_data_reg1);
+-		spin_unlock_irqrestore(&gpio_lock, flags);
+-	} else {
+-		spin_lock_irqsave(&gpio_lock, flags);
+-		if (value)
+-			gpio_data_reg2 |= reg;
+-		else
+-			gpio_data_reg2 &= ~reg;
+-		rdc321x_conf_write(RDC321X_GPIO_DATA_REG2, gpio_data_reg2);
+-		spin_unlock_irqrestore(&gpio_lock, flags);
+-	}
+-}
+-EXPORT_SYMBOL(rdc_gpio_set_value);
+-
+-/* configure GPIO pin as input */
+-int rdc_gpio_direction_input(unsigned gpio)
+-{
+-	if (!rdc321x_is_gpio(gpio))
+-		return -EINVAL;
+-
+-	rdc321x_configure_gpio(gpio);
+-
+-	return 0;
+-}
+-EXPORT_SYMBOL(rdc_gpio_direction_input);
+-
+-/* configure GPIO pin as output and set value */
+-int rdc_gpio_direction_output(unsigned gpio, int value)
+-{
+-	if (!rdc321x_is_gpio(gpio))
+-		return -EINVAL;
+-
+-	gpio_set_value(gpio, value);
+-	rdc321x_configure_gpio(gpio);
+-
+-	return 0;
+-}
+-EXPORT_SYMBOL(rdc_gpio_direction_output);
+Index: linux-2.6-tip/arch/x86/mach-rdc321x/platform.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-rdc321x/platform.c
++++ /dev/null
+@@ -1,69 +0,0 @@
+-/*
+- *  Generic RDC321x platform devices
+- *
+- *  Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
+- *
+- *  This program is free software; you can redistribute it and/or
+- *  modify it under the terms of the GNU General Public License
+- *  as published by the Free Software Foundation; either version 2
+- *  of the License, or (at your option) any later version.
+- *
+- *  This program is distributed in the hope that it will be useful,
+- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+- *  GNU General Public License for more details.
+- *
+- *  You should have received a copy of the GNU General Public License
+- *  along with this program; if not, write to the
+- *  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+- *  Boston, MA  02110-1301, USA.
+- *
+- */
+-
+-#include <linux/init.h>
+-#include <linux/kernel.h>
+-#include <linux/list.h>
+-#include <linux/device.h>
+-#include <linux/platform_device.h>
+-#include <linux/leds.h>
+-
+-#include <asm/gpio.h>
+-
+-/* LEDS */
+-static struct gpio_led default_leds[] = {
+-	{ .name = "rdc:dmz", .gpio = 1, },
+-};
+-
+-static struct gpio_led_platform_data rdc321x_led_data = {
+-	.num_leds = ARRAY_SIZE(default_leds),
+-	.leds = default_leds,
+-};
+-
+-static struct platform_device rdc321x_leds = {
+-	.name = "leds-gpio",
+-	.id = -1,
+-	.dev = {
+-		.platform_data = &rdc321x_led_data,
+-	}
+-};
+-
+-/* Watchdog */
+-static struct platform_device rdc321x_wdt = {
+-	.name = "rdc321x-wdt",
+-	.id = -1,
+-	.num_resources = 0,
+-};
+-
+-static struct platform_device *rdc321x_devs[] = {
+-	&rdc321x_leds,
+-	&rdc321x_wdt
+-};
+-
+-static int __init rdc_board_setup(void)
+-{
+-	rdc321x_gpio_setup();
+-
+-	return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs));
+-}
+-
+-arch_initcall(rdc_board_setup);
+Index: linux-2.6-tip/arch/x86/mach-voyager/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-voyager/Makefile
++++ /dev/null
+@@ -1,8 +0,0 @@
+-#
+-# Makefile for the linux kernel.
+-#
+-
+-EXTRA_CFLAGS	:= -Iarch/x86/kernel
+-obj-y			:= setup.o voyager_basic.o voyager_thread.o
+-
+-obj-$(CONFIG_SMP)	+= voyager_smp.o voyager_cat.o
+Index: linux-2.6-tip/arch/x86/mach-voyager/setup.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-voyager/setup.c
++++ /dev/null
+@@ -1,118 +0,0 @@
+-/*
+- *	Machine specific setup for generic
+- */
+-
+-#include <linux/init.h>
+-#include <linux/interrupt.h>
+-#include <asm/arch_hooks.h>
+-#include <asm/voyager.h>
+-#include <asm/e820.h>
+-#include <asm/io.h>
+-#include <asm/setup.h>
+-
+-void __init pre_intr_init_hook(void)
+-{
+-	init_ISA_irqs();
+-}
+-
+-/*
+- * IRQ2 is cascade interrupt to second interrupt controller
+- */
+-static struct irqaction irq2 = {
+-	.handler = no_action,
+-	.mask = CPU_MASK_NONE,
+-	.name = "cascade",
+-};
+-
+-void __init intr_init_hook(void)
+-{
+-#ifdef CONFIG_SMP
+-	voyager_smp_intr_init();
+-#endif
+-
+-	setup_irq(2, &irq2);
+-}
+-
+-static void voyager_disable_tsc(void)
+-{
+-	/* Voyagers run their CPUs from independent clocks, so disable
+-	 * the TSC code because we can't sync them */
+-	setup_clear_cpu_cap(X86_FEATURE_TSC);
+-}
+-
+-void __init pre_setup_arch_hook(void)
+-{
+-	voyager_disable_tsc();
+-}
+-
+-void __init pre_time_init_hook(void)
+-{
+-	voyager_disable_tsc();
+-}
+-
+-void __init trap_init_hook(void)
+-{
+-}
+-
+-static struct irqaction irq0 = {
+-	.handler = timer_interrupt,
+-	.flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
+-	.mask = CPU_MASK_NONE,
+-	.name = "timer"
+-};
+-
+-void __init time_init_hook(void)
+-{
+-	irq0.mask = cpumask_of_cpu(safe_smp_processor_id());
+-	setup_irq(0, &irq0);
+-}
+-
+-/* Hook for machine specific memory setup. */
+-
+-char *__init machine_specific_memory_setup(void)
+-{
+-	char *who;
+-	int new_nr;
+-
+-	who = "NOT VOYAGER";
+-
+-	if (voyager_level == 5) {
+-		__u32 addr, length;
+-		int i;
+-
+-		who = "Voyager-SUS";
+-
+-		e820.nr_map = 0;
+-		for (i = 0; voyager_memory_detect(i, &addr, &length); i++) {
+-			e820_add_region(addr, length, E820_RAM);
+-		}
+-		return who;
+-	} else if (voyager_level == 4) {
+-		__u32 tom;
+-		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
+-		/* select the DINO config space */
+-		outb(VOYAGER_DINO, VOYAGER_CAT_CONFIG_PORT);
+-		/* Read DINO top of memory register */
+-		tom = ((inb(catbase + 0x4) & 0xf0) << 16)
+-		    + ((inb(catbase + 0x5) & 0x7f) << 24);
+-
+-		if (inb(catbase) != VOYAGER_DINO) {
+-			printk(KERN_ERR
+-			       "Voyager: Failed to get DINO for L4, setting tom to EXT_MEM_K\n");
+-			tom = (boot_params.screen_info.ext_mem_k) << 10;
+-		}
+-		who = "Voyager-TOM";
+-		e820_add_region(0, 0x9f000, E820_RAM);
+-		/* map from 1M to top of memory */
+-		e820_add_region(1 * 1024 * 1024, tom - 1 * 1024 * 1024,
+-				  E820_RAM);
+-		/* FIXME: Should check the ASICs to see if I need to
+-		 * take out the 8M window.  Just do it at the moment
+-		 * */
+-		e820_add_region(8 * 1024 * 1024, 8 * 1024 * 1024,
+-				  E820_RESERVED);
+-		return who;
+-	}
+-
+-	return default_machine_specific_memory_setup();
+-}
+Index: linux-2.6-tip/arch/x86/mach-voyager/voyager_basic.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-voyager/voyager_basic.c
++++ /dev/null
+@@ -1,317 +0,0 @@
+-/* Copyright (C) 1999,2001 
+- *
+- * Author: J.E.J.Bottomley@HansenPartnership.com
+- *
+- * This file contains all the voyager specific routines for getting
+- * initialisation of the architecture to function.  For additional
+- * features see:
+- *
+- *	voyager_cat.c - Voyager CAT bus interface
+- *	voyager_smp.c - Voyager SMP hal (emulates linux smp.c)
+- */
+-
+-#include <linux/module.h>
+-#include <linux/types.h>
+-#include <linux/sched.h>
+-#include <linux/ptrace.h>
+-#include <linux/ioport.h>
+-#include <linux/interrupt.h>
+-#include <linux/init.h>
+-#include <linux/delay.h>
+-#include <linux/reboot.h>
+-#include <linux/sysrq.h>
+-#include <linux/smp.h>
+-#include <linux/nodemask.h>
+-#include <asm/io.h>
+-#include <asm/voyager.h>
+-#include <asm/vic.h>
+-#include <linux/pm.h>
+-#include <asm/tlbflush.h>
+-#include <asm/arch_hooks.h>
+-#include <asm/i8253.h>
+-
+-/*
+- * Power off function, if any
+- */
+-void (*pm_power_off) (void);
+-EXPORT_SYMBOL(pm_power_off);
+-
+-int voyager_level = 0;
+-
+-struct voyager_SUS *voyager_SUS = NULL;
+-
+-#ifdef CONFIG_SMP
+-static void voyager_dump(int dummy1, struct tty_struct *dummy3)
+-{
+-	/* get here via a sysrq */
+-	voyager_smp_dump();
+-}
+-
+-static struct sysrq_key_op sysrq_voyager_dump_op = {
+-	.handler = voyager_dump,
+-	.help_msg = "Voyager",
+-	.action_msg = "Dump Voyager Status",
+-};
+-#endif
+-
+-void voyager_detect(struct voyager_bios_info *bios)
+-{
+-	if (bios->len != 0xff) {
+-		int class = (bios->class_1 << 8)
+-		    | (bios->class_2 & 0xff);
+-
+-		printk("Voyager System detected.\n"
+-		       "        Class %x, Revision %d.%d\n",
+-		       class, bios->major, bios->minor);
+-		if (class == VOYAGER_LEVEL4)
+-			voyager_level = 4;
+-		else if (class < VOYAGER_LEVEL5_AND_ABOVE)
+-			voyager_level = 3;
+-		else
+-			voyager_level = 5;
+-		printk("        Architecture Level %d\n", voyager_level);
+-		if (voyager_level < 4)
+-			printk
+-			    ("\n**WARNING**: Voyager HAL only supports Levels 4 and 5 Architectures at the moment\n\n");
+-		/* install the power off handler */
+-		pm_power_off = voyager_power_off;
+-#ifdef CONFIG_SMP
+-		register_sysrq_key('v', &sysrq_voyager_dump_op);
+-#endif
+-	} else {
+-		printk("\n\n**WARNING**: No Voyager Subsystem Found\n");
+-	}
+-}
+-
+-void voyager_system_interrupt(int cpl, void *dev_id)
+-{
+-	printk("Voyager: detected system interrupt\n");
+-}
+-
+-/* Routine to read information from the extended CMOS area */
+-__u8 voyager_extended_cmos_read(__u16 addr)
+-{
+-	outb(addr & 0xff, 0x74);
+-	outb((addr >> 8) & 0xff, 0x75);
+-	return inb(0x76);
+-}
+-
+-/* internal definitions for the SUS Click Map of memory */
+-
+-#define CLICK_ENTRIES	16
+-#define CLICK_SIZE	4096	/* click to byte conversion for Length */
+-
+-typedef struct ClickMap {
+-	struct Entry {
+-		__u32 Address;
+-		__u32 Length;
+-	} Entry[CLICK_ENTRIES];
+-} ClickMap_t;
+-
+-/* This routine is pretty much an awful hack to read the bios clickmap by
+- * mapping it into page 0.  There are usually three regions in the map:
+- * 	Base Memory
+- * 	Extended Memory
+- *	zero length marker for end of map
+- *
+- * Returns are 0 for failure and 1 for success on extracting region.
+- */
+-int __init voyager_memory_detect(int region, __u32 * start, __u32 * length)
+-{
+-	int i;
+-	int retval = 0;
+-	__u8 cmos[4];
+-	ClickMap_t *map;
+-	unsigned long map_addr;
+-	unsigned long old;
+-
+-	if (region >= CLICK_ENTRIES) {
+-		printk("Voyager: Illegal ClickMap region %d\n", region);
+-		return 0;
+-	}
+-
+-	for (i = 0; i < sizeof(cmos); i++)
+-		cmos[i] =
+-		    voyager_extended_cmos_read(VOYAGER_MEMORY_CLICKMAP + i);
+-
+-	map_addr = *(unsigned long *)cmos;
+-
+-	/* steal page 0 for this */
+-	old = pg0[0];
+-	pg0[0] = ((map_addr & PAGE_MASK) | _PAGE_RW | _PAGE_PRESENT);
+-	local_flush_tlb();
+-	/* now clear everything out but page 0 */
+-	map = (ClickMap_t *) (map_addr & (~PAGE_MASK));
+-
+-	/* zero length is the end of the clickmap */
+-	if (map->Entry[region].Length != 0) {
+-		*length = map->Entry[region].Length * CLICK_SIZE;
+-		*start = map->Entry[region].Address;
+-		retval = 1;
+-	}
+-
+-	/* replace the mapping */
+-	pg0[0] = old;
+-	local_flush_tlb();
+-	return retval;
+-}
+-
+-/* voyager specific handling code for timer interrupts.  Used to hand
+- * off the timer tick to the SMP code, since the VIC doesn't have an
+- * internal timer (The QIC does, but that's another story). */
+-void voyager_timer_interrupt(void)
+-{
+-	if ((jiffies & 0x3ff) == 0) {
+-
+-		/* There seems to be something flaky in either
+-		 * hardware or software that is resetting the timer 0
+-		 * count to something much higher than it should be
+-		 * This seems to occur in the boot sequence, just
+-		 * before root is mounted.  Therefore, every 10
+-		 * seconds or so, we sanity check the timer zero count
+-		 * and kick it back to where it should be.
+-		 *
+-		 * FIXME: This is the most awful hack yet seen.  I
+-		 * should work out exactly what is interfering with
+-		 * the timer count settings early in the boot sequence
+-		 * and swiftly introduce it to something sharp and
+-		 * pointy.  */
+-		__u16 val;
+-
+-		spin_lock(&i8253_lock);
+-
+-		outb_p(0x00, 0x43);
+-		val = inb_p(0x40);
+-		val |= inb(0x40) << 8;
+-		spin_unlock(&i8253_lock);
+-
+-		if (val > LATCH) {
+-			printk
+-			    ("\nVOYAGER: countdown timer value too high (%d), resetting\n\n",
+-			     val);
+-			spin_lock(&i8253_lock);
+-			outb(0x34, 0x43);
+-			outb_p(LATCH & 0xff, 0x40);	/* LSB */
+-			outb(LATCH >> 8, 0x40);	/* MSB */
+-			spin_unlock(&i8253_lock);
+-		}
+-	}
+-#ifdef CONFIG_SMP
+-	smp_vic_timer_interrupt();
+-#endif
+-}
+-
+-void voyager_power_off(void)
+-{
+-	printk("VOYAGER Power Off\n");
+-
+-	if (voyager_level == 5) {
+-		voyager_cat_power_off();
+-	} else if (voyager_level == 4) {
+-		/* This doesn't apparently work on most L4 machines,
+-		 * but the specs say to do this to get automatic power
+-		 * off.  Unfortunately, if it doesn't power off the
+-		 * machine, it ends up doing a cold restart, which
+-		 * isn't really intended, so comment out the code */
+-#if 0
+-		int port;
+-
+-		/* enable the voyager Configuration Space */
+-		outb((inb(VOYAGER_MC_SETUP) & 0xf0) | 0x8, VOYAGER_MC_SETUP);
+-		/* the port for the power off flag is an offset from the
+-		   floating base */
+-		port = (inb(VOYAGER_SSPB_RELOCATION_PORT) << 8) + 0x21;
+-		/* set the power off flag */
+-		outb(inb(port) | 0x1, port);
+-#endif
+-	}
+-	/* and wait for it to happen */
+-	local_irq_disable();
+-	for (;;)
+-		halt();
+-}
+-
+-/* copied from process.c */
+-static inline void kb_wait(void)
+-{
+-	int i;
+-
+-	for (i = 0; i < 0x10000; i++)
+-		if ((inb_p(0x64) & 0x02) == 0)
+-			break;
+-}
+-
+-void machine_shutdown(void)
+-{
+-	/* Architecture specific shutdown needed before a kexec */
+-}
+-
+-void machine_restart(char *cmd)
+-{
+-	printk("Voyager Warm Restart\n");
+-	kb_wait();
+-
+-	if (voyager_level == 5) {
+-		/* write magic values to the RTC to inform system that
+-		 * shutdown is beginning */
+-		outb(0x8f, 0x70);
+-		outb(0x5, 0x71);
+-
+-		udelay(50);
+-		outb(0xfe, 0x64);	/* pull reset low */
+-	} else if (voyager_level == 4) {
+-		__u16 catbase = inb(VOYAGER_SSPB_RELOCATION_PORT) << 8;
+-		__u8 basebd = inb(VOYAGER_MC_SETUP);
+-
+-		outb(basebd | 0x08, VOYAGER_MC_SETUP);
+-		outb(0x02, catbase + 0x21);
+-	}
+-	local_irq_disable();
+-	for (;;)
+-		halt();
+-}
+-
+-void machine_emergency_restart(void)
+-{
+-	/*for now, just hook this to a warm restart */
+-	machine_restart(NULL);
+-}
+-
+-void mca_nmi_hook(void)
+-{
+-	__u8 dumpval __maybe_unused = inb(0xf823);
+-	__u8 swnmi __maybe_unused = inb(0xf813);
+-
+-	/* FIXME: assume dump switch pressed */
+-	/* check to see if the dump switch was pressed */
+-	VDEBUG(("VOYAGER: dumpval = 0x%x, swnmi = 0x%x\n", dumpval, swnmi));
+-	/* clear swnmi */
+-	outb(0xff, 0xf813);
+-	/* tell SUS to ignore dump */
+-	if (voyager_level == 5 && voyager_SUS != NULL) {
+-		if (voyager_SUS->SUS_mbox == VOYAGER_DUMP_BUTTON_NMI) {
+-			voyager_SUS->kernel_mbox = VOYAGER_NO_COMMAND;
+-			voyager_SUS->kernel_flags |= VOYAGER_OS_IN_PROGRESS;
+-			udelay(1000);
+-			voyager_SUS->kernel_mbox = VOYAGER_IGNORE_DUMP;
+-			voyager_SUS->kernel_flags &= ~VOYAGER_OS_IN_PROGRESS;
+-		}
+-	}
+-	printk(KERN_ERR
+-	       "VOYAGER: Dump switch pressed, printing CPU%d tracebacks\n",
+-	       smp_processor_id());
+-	show_stack(NULL, NULL);
+-	show_state();
+-}
+-
+-void machine_halt(void)
+-{
+-	/* treat a halt like a power off */
+-	machine_power_off();
+-}
+-
+-void machine_power_off(void)
+-{
+-	if (pm_power_off)
+-		pm_power_off();
+-}
+Index: linux-2.6-tip/arch/x86/mach-voyager/voyager_cat.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-voyager/voyager_cat.c
++++ /dev/null
+@@ -1,1197 +0,0 @@
+-/* -*- mode: c; c-basic-offset: 8 -*- */
+-
+-/* Copyright (C) 1999,2001
+- *
+- * Author: J.E.J.Bottomley@HansenPartnership.com
+- *
+- * This file contains all the logic for manipulating the CAT bus
+- * in a level 5 machine.
+- *
+- * The CAT bus is a serial configuration and test bus.  Its primary
+- * uses are to probe the initial configuration of the system and to
+- * diagnose error conditions when a system interrupt occurs.  The low
+- * level interface is fairly primitive, so most of this file consists
+- * of bit shift manipulations to send and receive packets on the
+- * serial bus */
+-
+-#include <linux/types.h>
+-#include <linux/completion.h>
+-#include <linux/sched.h>
+-#include <asm/voyager.h>
+-#include <asm/vic.h>
+-#include <linux/ioport.h>
+-#include <linux/init.h>
+-#include <linux/slab.h>
+-#include <linux/delay.h>
+-#include <asm/io.h>
+-
+-#ifdef VOYAGER_CAT_DEBUG
+-#define CDEBUG(x)	printk x
+-#else
+-#define CDEBUG(x)
+-#endif
+-
+-/* the CAT command port */
+-#define CAT_CMD		(sspb + 0xe)
+-/* the CAT data port */
+-#define CAT_DATA	(sspb + 0xd)
+-
+-/* the internal cat functions */
+-static void cat_pack(__u8 * msg, __u16 start_bit, __u8 * data, __u16 num_bits);
+-static void cat_unpack(__u8 * msg, __u16 start_bit, __u8 * data,
+-		       __u16 num_bits);
+-static void cat_build_header(__u8 * header, const __u16 len,
+-			     const __u16 smallest_reg_bits,
+-			     const __u16 longest_reg_bits);
+-static int cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp,
+-			__u8 reg, __u8 op);
+-static int cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp,
+-		       __u8 reg, __u8 * value);
+-static int cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes,
+-			__u8 pad_bits);
+-static int cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+-		     __u8 value);
+-static int cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+-		    __u8 * value);
+-static int cat_subread(voyager_module_t * modp, voyager_asic_t * asicp,
+-		       __u16 offset, __u16 len, void *buf);
+-static int cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
+-			__u8 reg, __u8 value);
+-static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp);
+-static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp);
+-
+-static inline const char *cat_module_name(int module_id)
+-{
+-	switch (module_id) {
+-	case 0x10:
+-		return "Processor Slot 0";
+-	case 0x11:
+-		return "Processor Slot 1";
+-	case 0x12:
+-		return "Processor Slot 2";
+-	case 0x13:
+-		return "Processor Slot 4";
+-	case 0x14:
+-		return "Memory Slot 0";
+-	case 0x15:
+-		return "Memory Slot 1";
+-	case 0x18:
+-		return "Primary Microchannel";
+-	case 0x19:
+-		return "Secondary Microchannel";
+-	case 0x1a:
+-		return "Power Supply Interface";
+-	case 0x1c:
+-		return "Processor Slot 5";
+-	case 0x1d:
+-		return "Processor Slot 6";
+-	case 0x1e:
+-		return "Processor Slot 7";
+-	case 0x1f:
+-		return "Processor Slot 8";
+-	default:
+-		return "Unknown Module";
+-	}
+-}
+-
+-static int sspb = 0;		/* stores the super port location */
+-int voyager_8slot = 0;		/* set to true if a 51xx monster */
+-
+-voyager_module_t *voyager_cat_list;
+-
+-/* the I/O port assignments for the VIC and QIC */
+-static struct resource vic_res = {
+-	.name = "Voyager Interrupt Controller",
+-	.start = 0xFC00,
+-	.end = 0xFC6F
+-};
+-static struct resource qic_res = {
+-	.name = "Quad Interrupt Controller",
+-	.start = 0xFC70,
+-	.end = 0xFCFF
+-};
+-
+-/* This function is used to pack a data bit stream inside a message.
+- * It writes num_bits of the data buffer in msg starting at start_bit.
+- * Note: This function assumes that any unused bit in the data stream
+- * is set to zero so that the ors will work correctly */
+-static void
+-cat_pack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
+-{
+-	/* compute initial shift needed */
+-	const __u16 offset = start_bit % BITS_PER_BYTE;
+-	__u16 len = num_bits / BITS_PER_BYTE;
+-	__u16 byte = start_bit / BITS_PER_BYTE;
+-	__u16 residue = (num_bits % BITS_PER_BYTE) + offset;
+-	int i;
+-
+-	/* adjust if we have more than a byte of residue */
+-	if (residue >= BITS_PER_BYTE) {
+-		residue -= BITS_PER_BYTE;
+-		len++;
+-	}
+-
+-	/* clear out the bits.  We assume here that if len==0 then
+-	 * residue >= offset.  This is always true for the catbus
+-	 * operations */
+-	msg[byte] &= 0xff << (BITS_PER_BYTE - offset);
+-	msg[byte++] |= data[0] >> offset;
+-	if (len == 0)
+-		return;
+-	for (i = 1; i < len; i++)
+-		msg[byte++] = (data[i - 1] << (BITS_PER_BYTE - offset))
+-		    | (data[i] >> offset);
+-	if (residue != 0) {
+-		__u8 mask = 0xff >> residue;
+-		__u8 last_byte = data[i - 1] << (BITS_PER_BYTE - offset)
+-		    | (data[i] >> offset);
+-
+-		last_byte &= ~mask;
+-		msg[byte] &= mask;
+-		msg[byte] |= last_byte;
+-	}
+-	return;
+-}
+-
+-/* unpack the data again (same arguments as cat_pack()). data buffer
+- * must be zero populated.
+- *
+- * Function: given a message string move to start_bit and copy num_bits into
+- * data (starting at bit 0 in data).
+- */
+-static void
+-cat_unpack(__u8 * msg, const __u16 start_bit, __u8 * data, const __u16 num_bits)
+-{
+-	/* compute initial shift needed */
+-	const __u16 offset = start_bit % BITS_PER_BYTE;
+-	__u16 len = num_bits / BITS_PER_BYTE;
+-	const __u8 last_bits = num_bits % BITS_PER_BYTE;
+-	__u16 byte = start_bit / BITS_PER_BYTE;
+-	int i;
+-
+-	if (last_bits != 0)
+-		len++;
+-
+-	/* special case: want < 8 bits from msg and we can get it from
+-	 * a single byte of the msg */
+-	if (len == 0 && BITS_PER_BYTE - offset >= num_bits) {
+-		data[0] = msg[byte] << offset;
+-		data[0] &= 0xff >> (BITS_PER_BYTE - num_bits);
+-		return;
+-	}
+-	for (i = 0; i < len; i++) {
+-		/* this annoying if has to be done just in case a read of
+-		 * msg one beyond the array causes a panic */
+-		if (offset != 0) {
+-			data[i] = msg[byte++] << offset;
+-			data[i] |= msg[byte] >> (BITS_PER_BYTE - offset);
+-		} else {
+-			data[i] = msg[byte++];
+-		}
+-	}
+-	/* do we need to truncate the final byte */
+-	if (last_bits != 0) {
+-		data[i - 1] &= 0xff << (BITS_PER_BYTE - last_bits);
+-	}
+-	return;
+-}
+-
+-static void
+-cat_build_header(__u8 * header, const __u16 len, const __u16 smallest_reg_bits,
+-		 const __u16 longest_reg_bits)
+-{
+-	int i;
+-	__u16 start_bit = (smallest_reg_bits - 1) % BITS_PER_BYTE;
+-	__u8 *last_byte = &header[len - 1];
+-
+-	if (start_bit == 0)
+-		start_bit = 1;	/* must have at least one bit in the hdr */
+-
+-	for (i = 0; i < len; i++)
+-		header[i] = 0;
+-
+-	for (i = start_bit; i > 0; i--)
+-		*last_byte = ((*last_byte) << 1) + 1;
+-
+-}
+-
+-static int
+-cat_sendinst(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 op)
+-{
+-	__u8 parity, inst, inst_buf[4] = { 0 };
+-	__u8 iseq[VOYAGER_MAX_SCAN_PATH], hseq[VOYAGER_MAX_REG_SIZE];
+-	__u16 ibytes, hbytes, padbits;
+-	int i;
+-
+-	/* 
+-	 * Parity is the parity of the register number + 1 (READ_REGISTER
+-	 * and WRITE_REGISTER always add '1' to the number of bits == 1)
+-	 */
+-	parity = (__u8) (1 + (reg & 0x01) +
+-			 ((__u8) (reg & 0x02) >> 1) +
+-			 ((__u8) (reg & 0x04) >> 2) +
+-			 ((__u8) (reg & 0x08) >> 3)) % 2;
+-
+-	inst = ((parity << 7) | (reg << 2) | op);
+-
+-	outb(VOYAGER_CAT_IRCYC, CAT_CMD);
+-	if (!modp->scan_path_connected) {
+-		if (asicp->asic_id != VOYAGER_CAT_ID) {
+-			printk
+-			    ("**WARNING***: cat_sendinst has disconnected scan path not to CAT asic\n");
+-			return 1;
+-		}
+-		outb(VOYAGER_CAT_HEADER, CAT_DATA);
+-		outb(inst, CAT_DATA);
+-		if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+-			CDEBUG(("VOYAGER CAT: cat_sendinst failed to get CAT_HEADER\n"));
+-			return 1;
+-		}
+-		return 0;
+-	}
+-	ibytes = modp->inst_bits / BITS_PER_BYTE;
+-	if ((padbits = modp->inst_bits % BITS_PER_BYTE) != 0) {
+-		padbits = BITS_PER_BYTE - padbits;
+-		ibytes++;
+-	}
+-	hbytes = modp->largest_reg / BITS_PER_BYTE;
+-	if (modp->largest_reg % BITS_PER_BYTE)
+-		hbytes++;
+-	CDEBUG(("cat_sendinst: ibytes=%d, hbytes=%d\n", ibytes, hbytes));
+-	/* initialise the instruction sequence to 0xff */
+-	for (i = 0; i < ibytes + hbytes; i++)
+-		iseq[i] = 0xff;
+-	cat_build_header(hseq, hbytes, modp->smallest_reg, modp->largest_reg);
+-	cat_pack(iseq, modp->inst_bits, hseq, hbytes * BITS_PER_BYTE);
+-	inst_buf[0] = inst;
+-	inst_buf[1] = 0xFF >> (modp->largest_reg % BITS_PER_BYTE);
+-	cat_pack(iseq, asicp->bit_location, inst_buf, asicp->ireg_length);
+-#ifdef VOYAGER_CAT_DEBUG
+-	printk("ins = 0x%x, iseq: ", inst);
+-	for (i = 0; i < ibytes + hbytes; i++)
+-		printk("0x%x ", iseq[i]);
+-	printk("\n");
+-#endif
+-	if (cat_shiftout(iseq, ibytes, hbytes, padbits)) {
+-		CDEBUG(("VOYAGER CAT: cat_sendinst: cat_shiftout failed\n"));
+-		return 1;
+-	}
+-	CDEBUG(("CAT SHIFTOUT DONE\n"));
+-	return 0;
+-}
+-
+-static int
+-cat_getdata(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+-	    __u8 * value)
+-{
+-	if (!modp->scan_path_connected) {
+-		if (asicp->asic_id != VOYAGER_CAT_ID) {
+-			CDEBUG(("VOYAGER CAT: ERROR: cat_getdata to CAT asic with scan path connected\n"));
+-			return 1;
+-		}
+-		if (reg > VOYAGER_SUBADDRHI)
+-			outb(VOYAGER_CAT_RUN, CAT_CMD);
+-		outb(VOYAGER_CAT_DRCYC, CAT_CMD);
+-		outb(VOYAGER_CAT_HEADER, CAT_DATA);
+-		*value = inb(CAT_DATA);
+-		outb(0xAA, CAT_DATA);
+-		if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+-			CDEBUG(("cat_getdata: failed to get VOYAGER_CAT_HEADER\n"));
+-			return 1;
+-		}
+-		return 0;
+-	} else {
+-		__u16 sbits = modp->num_asics - 1 + asicp->ireg_length;
+-		__u16 sbytes = sbits / BITS_PER_BYTE;
+-		__u16 tbytes;
+-		__u8 string[VOYAGER_MAX_SCAN_PATH],
+-		    trailer[VOYAGER_MAX_REG_SIZE];
+-		__u8 padbits;
+-		int i;
+-
+-		outb(VOYAGER_CAT_DRCYC, CAT_CMD);
+-
+-		if ((padbits = sbits % BITS_PER_BYTE) != 0) {
+-			padbits = BITS_PER_BYTE - padbits;
+-			sbytes++;
+-		}
+-		tbytes = asicp->ireg_length / BITS_PER_BYTE;
+-		if (asicp->ireg_length % BITS_PER_BYTE)
+-			tbytes++;
+-		CDEBUG(("cat_getdata: tbytes = %d, sbytes = %d, padbits = %d\n",
+-			tbytes, sbytes, padbits));
+-		cat_build_header(trailer, tbytes, 1, asicp->ireg_length);
+-
+-		for (i = tbytes - 1; i >= 0; i--) {
+-			outb(trailer[i], CAT_DATA);
+-			string[sbytes + i] = inb(CAT_DATA);
+-		}
+-
+-		for (i = sbytes - 1; i >= 0; i--) {
+-			outb(0xaa, CAT_DATA);
+-			string[i] = inb(CAT_DATA);
+-		}
+-		*value = 0;
+-		cat_unpack(string,
+-			   padbits + (tbytes * BITS_PER_BYTE) +
+-			   asicp->asic_location, value, asicp->ireg_length);
+-#ifdef VOYAGER_CAT_DEBUG
+-		printk("value=0x%x, string: ", *value);
+-		for (i = 0; i < tbytes + sbytes; i++)
+-			printk("0x%x ", string[i]);
+-		printk("\n");
+-#endif
+-
+-		/* sanity check the rest of the return */
+-		for (i = 0; i < tbytes; i++) {
+-			__u8 input = 0;
+-
+-			cat_unpack(string, padbits + (i * BITS_PER_BYTE),
+-				   &input, BITS_PER_BYTE);
+-			if (trailer[i] != input) {
+-				CDEBUG(("cat_getdata: failed to sanity check rest of ret(%d) 0x%x != 0x%x\n", i, input, trailer[i]));
+-				return 1;
+-			}
+-		}
+-		CDEBUG(("cat_getdata DONE\n"));
+-		return 0;
+-	}
+-}
+-
+-static int
+-cat_shiftout(__u8 * data, __u16 data_bytes, __u16 header_bytes, __u8 pad_bits)
+-{
+-	int i;
+-
+-	for (i = data_bytes + header_bytes - 1; i >= header_bytes; i--)
+-		outb(data[i], CAT_DATA);
+-
+-	for (i = header_bytes - 1; i >= 0; i--) {
+-		__u8 header = 0;
+-		__u8 input;
+-
+-		outb(data[i], CAT_DATA);
+-		input = inb(CAT_DATA);
+-		CDEBUG(("cat_shiftout: returned 0x%x\n", input));
+-		cat_unpack(data, ((data_bytes + i) * BITS_PER_BYTE) - pad_bits,
+-			   &header, BITS_PER_BYTE);
+-		if (input != header) {
+-			CDEBUG(("VOYAGER CAT: cat_shiftout failed to return header 0x%x != 0x%x\n", input, header));
+-			return 1;
+-		}
+-	}
+-	return 0;
+-}
+-
+-static int
+-cat_senddata(voyager_module_t * modp, voyager_asic_t * asicp,
+-	     __u8 reg, __u8 value)
+-{
+-	outb(VOYAGER_CAT_DRCYC, CAT_CMD);
+-	if (!modp->scan_path_connected) {
+-		if (asicp->asic_id != VOYAGER_CAT_ID) {
+-			CDEBUG(("VOYAGER CAT: ERROR: scan path disconnected when asic != CAT\n"));
+-			return 1;
+-		}
+-		outb(VOYAGER_CAT_HEADER, CAT_DATA);
+-		outb(value, CAT_DATA);
+-		if (inb(CAT_DATA) != VOYAGER_CAT_HEADER) {
+-			CDEBUG(("cat_senddata: failed to get correct header response to sent data\n"));
+-			return 1;
+-		}
+-		if (reg > VOYAGER_SUBADDRHI) {
+-			outb(VOYAGER_CAT_RUN, CAT_CMD);
+-			outb(VOYAGER_CAT_END, CAT_CMD);
+-			outb(VOYAGER_CAT_RUN, CAT_CMD);
+-		}
+-
+-		return 0;
+-	} else {
+-		__u16 hbytes = asicp->ireg_length / BITS_PER_BYTE;
+-		__u16 dbytes =
+-		    (modp->num_asics - 1 + asicp->ireg_length) / BITS_PER_BYTE;
+-		__u8 padbits, dseq[VOYAGER_MAX_SCAN_PATH],
+-		    hseq[VOYAGER_MAX_REG_SIZE];
+-		int i;
+-
+-		if ((padbits = (modp->num_asics - 1
+-				+ asicp->ireg_length) % BITS_PER_BYTE) != 0) {
+-			padbits = BITS_PER_BYTE - padbits;
+-			dbytes++;
+-		}
+-		if (asicp->ireg_length % BITS_PER_BYTE)
+-			hbytes++;
+-
+-		cat_build_header(hseq, hbytes, 1, asicp->ireg_length);
+-
+-		for (i = 0; i < dbytes + hbytes; i++)
+-			dseq[i] = 0xff;
+-		CDEBUG(("cat_senddata: dbytes=%d, hbytes=%d, padbits=%d\n",
+-			dbytes, hbytes, padbits));
+-		cat_pack(dseq, modp->num_asics - 1 + asicp->ireg_length,
+-			 hseq, hbytes * BITS_PER_BYTE);
+-		cat_pack(dseq, asicp->asic_location, &value,
+-			 asicp->ireg_length);
+-#ifdef VOYAGER_CAT_DEBUG
+-		printk("dseq ");
+-		for (i = 0; i < hbytes + dbytes; i++) {
+-			printk("0x%x ", dseq[i]);
+-		}
+-		printk("\n");
+-#endif
+-		return cat_shiftout(dseq, dbytes, hbytes, padbits);
+-	}
+-}
+-
+-static int
+-cat_write(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg, __u8 value)
+-{
+-	if (cat_sendinst(modp, asicp, reg, VOYAGER_WRITE_CONFIG))
+-		return 1;
+-	return cat_senddata(modp, asicp, reg, value);
+-}
+-
+-static int
+-cat_read(voyager_module_t * modp, voyager_asic_t * asicp, __u8 reg,
+-	 __u8 * value)
+-{
+-	if (cat_sendinst(modp, asicp, reg, VOYAGER_READ_CONFIG))
+-		return 1;
+-	return cat_getdata(modp, asicp, reg, value);
+-}
+-
+-static int
+-cat_subaddrsetup(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
+-		 __u16 len)
+-{
+-	__u8 val;
+-
+-	if (len > 1) {
+-		/* set auto increment */
+-		__u8 newval;
+-
+-		if (cat_read(modp, asicp, VOYAGER_AUTO_INC_REG, &val)) {
+-			CDEBUG(("cat_subaddrsetup: read of VOYAGER_AUTO_INC_REG failed\n"));
+-			return 1;
+-		}
+-		CDEBUG(("cat_subaddrsetup: VOYAGER_AUTO_INC_REG = 0x%x\n",
+-			val));
+-		newval = val | VOYAGER_AUTO_INC;
+-		if (newval != val) {
+-			if (cat_write(modp, asicp, VOYAGER_AUTO_INC_REG, val)) {
+-				CDEBUG(("cat_subaddrsetup: write to VOYAGER_AUTO_INC_REG failed\n"));
+-				return 1;
+-			}
+-		}
+-	}
+-	if (cat_write(modp, asicp, VOYAGER_SUBADDRLO, (__u8) (offset & 0xff))) {
+-		CDEBUG(("cat_subaddrsetup: write to SUBADDRLO failed\n"));
+-		return 1;
+-	}
+-	if (asicp->subaddr > VOYAGER_SUBADDR_LO) {
+-		if (cat_write
+-		    (modp, asicp, VOYAGER_SUBADDRHI, (__u8) (offset >> 8))) {
+-			CDEBUG(("cat_subaddrsetup: write to SUBADDRHI failed\n"));
+-			return 1;
+-		}
+-		cat_read(modp, asicp, VOYAGER_SUBADDRHI, &val);
+-		CDEBUG(("cat_subaddrsetup: offset = %d, hi = %d\n", offset,
+-			val));
+-	}
+-	cat_read(modp, asicp, VOYAGER_SUBADDRLO, &val);
+-	CDEBUG(("cat_subaddrsetup: offset = %d, lo = %d\n", offset, val));
+-	return 0;
+-}
+-
+-static int
+-cat_subwrite(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
+-	     __u16 len, void *buf)
+-{
+-	int i, retval;
+-
+-	/* FIXME: need special actions for VOYAGER_CAT_ID here */
+-	if (asicp->asic_id == VOYAGER_CAT_ID) {
+-		CDEBUG(("cat_subwrite: ATTEMPT TO WRITE TO CAT ASIC\n"));
+-		/* FIXME -- This is supposed to be handled better
+-		 * There is a problem writing to the cat asic in the
+-		 * PSI.  The 30us delay seems to work, though */
+-		udelay(30);
+-	}
+-
+-	if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
+-		printk("cat_subwrite: cat_subaddrsetup FAILED\n");
+-		return retval;
+-	}
+-
+-	if (cat_sendinst
+-	    (modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_WRITE_CONFIG)) {
+-		printk("cat_subwrite: cat_sendinst FAILED\n");
+-		return 1;
+-	}
+-	for (i = 0; i < len; i++) {
+-		if (cat_senddata(modp, asicp, 0xFF, ((__u8 *) buf)[i])) {
+-			printk
+-			    ("cat_subwrite: cat_sendata element at %d FAILED\n",
+-			     i);
+-			return 1;
+-		}
+-	}
+-	return 0;
+-}
+-static int
+-cat_subread(voyager_module_t * modp, voyager_asic_t * asicp, __u16 offset,
+-	    __u16 len, void *buf)
+-{
+-	int i, retval;
+-
+-	if ((retval = cat_subaddrsetup(modp, asicp, offset, len)) != 0) {
+-		CDEBUG(("cat_subread: cat_subaddrsetup FAILED\n"));
+-		return retval;
+-	}
+-
+-	if (cat_sendinst(modp, asicp, VOYAGER_SUBADDRDATA, VOYAGER_READ_CONFIG)) {
+-		CDEBUG(("cat_subread: cat_sendinst failed\n"));
+-		return 1;
+-	}
+-	for (i = 0; i < len; i++) {
+-		if (cat_getdata(modp, asicp, 0xFF, &((__u8 *) buf)[i])) {
+-			CDEBUG(("cat_subread: cat_getdata element %d failed\n",
+-				i));
+-			return 1;
+-		}
+-	}
+-	return 0;
+-}
+-
+-/* buffer for storing EPROM data read in during initialisation */
+-static __initdata __u8 eprom_buf[0xFFFF];
+-static voyager_module_t *voyager_initial_module;
+-
+-/* Initialise the cat bus components.  We assume this is called by the
+- * boot cpu *after* all memory initialisation has been done (so we can
+- * use kmalloc) but before smp initialisation, so we can probe the SMP
+- * configuration and pick up necessary information.  */
+-void __init voyager_cat_init(void)
+-{
+-	voyager_module_t **modpp = &voyager_initial_module;
+-	voyager_asic_t **asicpp;
+-	voyager_asic_t *qabc_asic = NULL;
+-	int i, j;
+-	unsigned long qic_addr = 0;
+-	__u8 qabc_data[0x20];
+-	__u8 num_submodules, val;
+-	voyager_eprom_hdr_t *eprom_hdr = (voyager_eprom_hdr_t *) & eprom_buf[0];
+-
+-	__u8 cmos[4];
+-	unsigned long addr;
+-
+-	/* initiallise the SUS mailbox */
+-	for (i = 0; i < sizeof(cmos); i++)
+-		cmos[i] = voyager_extended_cmos_read(VOYAGER_DUMP_LOCATION + i);
+-	addr = *(unsigned long *)cmos;
+-	if ((addr & 0xff000000) != 0xff000000) {
+-		printk(KERN_ERR
+-		       "Voyager failed to get SUS mailbox (addr = 0x%lx\n",
+-		       addr);
+-	} else {
+-		static struct resource res;
+-
+-		res.name = "voyager SUS";
+-		res.start = addr;
+-		res.end = addr + 0x3ff;
+-
+-		request_resource(&iomem_resource, &res);
+-		voyager_SUS = (struct voyager_SUS *)
+-		    ioremap(addr, 0x400);
+-		printk(KERN_NOTICE "Voyager SUS mailbox version 0x%x\n",
+-		       voyager_SUS->SUS_version);
+-		voyager_SUS->kernel_version = VOYAGER_MAILBOX_VERSION;
+-		voyager_SUS->kernel_flags = VOYAGER_OS_HAS_SYSINT;
+-	}
+-
+-	/* clear the processor counts */
+-	voyager_extended_vic_processors = 0;
+-	voyager_quad_processors = 0;
+-
+-	printk("VOYAGER: beginning CAT bus probe\n");
+-	/* set up the SuperSet Port Block which tells us where the
+-	 * CAT communication port is */
+-	sspb = inb(VOYAGER_SSPB_RELOCATION_PORT) * 0x100;
+-	VDEBUG(("VOYAGER DEBUG: sspb = 0x%x\n", sspb));
+-
+-	/* now find out if were 8 slot or normal */
+-	if ((inb(VIC_PROC_WHO_AM_I) & EIGHT_SLOT_IDENTIFIER)
+-	    == EIGHT_SLOT_IDENTIFIER) {
+-		voyager_8slot = 1;
+-		printk(KERN_NOTICE
+-		       "Voyager: Eight slot 51xx configuration detected\n");
+-	}
+-
+-	for (i = VOYAGER_MIN_MODULE; i <= VOYAGER_MAX_MODULE; i++) {
+-		__u8 input;
+-		int asic;
+-		__u16 eprom_size;
+-		__u16 sp_offset;
+-
+-		outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
+-		outb(i, VOYAGER_CAT_CONFIG_PORT);
+-
+-		/* check the presence of the module */
+-		outb(VOYAGER_CAT_RUN, CAT_CMD);
+-		outb(VOYAGER_CAT_IRCYC, CAT_CMD);
+-		outb(VOYAGER_CAT_HEADER, CAT_DATA);
+-		/* stream series of alternating 1's and 0's to stimulate
+-		 * response */
+-		outb(0xAA, CAT_DATA);
+-		input = inb(CAT_DATA);
+-		outb(VOYAGER_CAT_END, CAT_CMD);
+-		if (input != VOYAGER_CAT_HEADER) {
+-			continue;
+-		}
+-		CDEBUG(("VOYAGER DEBUG: found module id 0x%x, %s\n", i,
+-			cat_module_name(i)));
+-		*modpp = kmalloc(sizeof(voyager_module_t), GFP_KERNEL);	/*&voyager_module_storage[cat_count++]; */
+-		if (*modpp == NULL) {
+-			printk("**WARNING** kmalloc failure in cat_init\n");
+-			continue;
+-		}
+-		memset(*modpp, 0, sizeof(voyager_module_t));
+-		/* need temporary asic for cat_subread.  It will be
+-		 * filled in correctly later */
+-		(*modpp)->asic = kmalloc(sizeof(voyager_asic_t), GFP_KERNEL);	/*&voyager_asic_storage[asic_count]; */
+-		if ((*modpp)->asic == NULL) {
+-			printk("**WARNING** kmalloc failure in cat_init\n");
+-			continue;
+-		}
+-		memset((*modpp)->asic, 0, sizeof(voyager_asic_t));
+-		(*modpp)->asic->asic_id = VOYAGER_CAT_ID;
+-		(*modpp)->asic->subaddr = VOYAGER_SUBADDR_HI;
+-		(*modpp)->module_addr = i;
+-		(*modpp)->scan_path_connected = 0;
+-		if (i == VOYAGER_PSI) {
+-			/* Exception leg for modules with no EEPROM */
+-			printk("Module \"%s\"\n", cat_module_name(i));
+-			continue;
+-		}
+-
+-		CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
+-		outb(VOYAGER_CAT_RUN, CAT_CMD);
+-		cat_disconnect(*modpp, (*modpp)->asic);
+-		if (cat_subread(*modpp, (*modpp)->asic,
+-				VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
+-				&eprom_size)) {
+-			printk
+-			    ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
+-			     i);
+-			outb(VOYAGER_CAT_END, CAT_CMD);
+-			continue;
+-		}
+-		if (eprom_size > sizeof(eprom_buf)) {
+-			printk
+-			    ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n",
+-			     i, eprom_size);
+-			outb(VOYAGER_CAT_END, CAT_CMD);
+-			continue;
+-		}
+-		outb(VOYAGER_CAT_END, CAT_CMD);
+-		outb(VOYAGER_CAT_RUN, CAT_CMD);
+-		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
+-			eprom_size));
+-		if (cat_subread
+-		    (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
+-			outb(VOYAGER_CAT_END, CAT_CMD);
+-			continue;
+-		}
+-		outb(VOYAGER_CAT_END, CAT_CMD);
+-		printk("Module \"%s\", version 0x%x, tracer 0x%x, asics %d\n",
+-		       cat_module_name(i), eprom_hdr->version_id,
+-		       *((__u32 *) eprom_hdr->tracer), eprom_hdr->num_asics);
+-		(*modpp)->ee_size = eprom_hdr->ee_size;
+-		(*modpp)->num_asics = eprom_hdr->num_asics;
+-		asicpp = &((*modpp)->asic);
+-		sp_offset = eprom_hdr->scan_path_offset;
+-		/* All we really care about are the Quad cards.  We
+-		 * identify them because they are in a processor slot
+-		 * and have only four asics */
+-		if ((i < 0x10 || (i >= 0x14 && i < 0x1c) || i > 0x1f)) {
+-			modpp = &((*modpp)->next);
+-			continue;
+-		}
+-		/* Now we know it's in a processor slot, does it have
+-		 * a quad baseboard submodule */
+-		outb(VOYAGER_CAT_RUN, CAT_CMD);
+-		cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODPRESENT,
+-			 &num_submodules);
+-		/* lowest two bits, active low */
+-		num_submodules = ~(0xfc | num_submodules);
+-		CDEBUG(("VOYAGER CAT: %d submodules present\n",
+-			num_submodules));
+-		if (num_submodules == 0) {
+-			/* fill in the dyadic extended processors */
+-			__u8 cpu = i & 0x07;
+-
+-			printk("Module \"%s\": Dyadic Processor Card\n",
+-			       cat_module_name(i));
+-			voyager_extended_vic_processors |= (1 << cpu);
+-			cpu += 4;
+-			voyager_extended_vic_processors |= (1 << cpu);
+-			outb(VOYAGER_CAT_END, CAT_CMD);
+-			continue;
+-		}
+-
+-		/* now we want to read the asics on the first submodule,
+-		 * which should be the quad base board */
+-
+-		cat_read(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, &val);
+-		CDEBUG(("cat_init: SUBMODSELECT value = 0x%x\n", val));
+-		val = (val & 0x7c) | VOYAGER_QUAD_BASEBOARD;
+-		cat_write(*modpp, (*modpp)->asic, VOYAGER_SUBMODSELECT, val);
+-
+-		outb(VOYAGER_CAT_END, CAT_CMD);
+-
+-		CDEBUG(("cat_init: Reading eeprom for module 0x%x at offset %d\n", i, VOYAGER_XSUM_END_OFFSET));
+-		outb(VOYAGER_CAT_RUN, CAT_CMD);
+-		cat_disconnect(*modpp, (*modpp)->asic);
+-		if (cat_subread(*modpp, (*modpp)->asic,
+-				VOYAGER_XSUM_END_OFFSET, sizeof(eprom_size),
+-				&eprom_size)) {
+-			printk
+-			    ("**WARNING**: Voyager couldn't read EPROM size for module 0x%x\n",
+-			     i);
+-			outb(VOYAGER_CAT_END, CAT_CMD);
+-			continue;
+-		}
+-		if (eprom_size > sizeof(eprom_buf)) {
+-			printk
+-			    ("**WARNING**: Voyager insufficient size to read EPROM data, module 0x%x.  Need %d\n",
+-			     i, eprom_size);
+-			outb(VOYAGER_CAT_END, CAT_CMD);
+-			continue;
+-		}
+-		outb(VOYAGER_CAT_END, CAT_CMD);
+-		outb(VOYAGER_CAT_RUN, CAT_CMD);
+-		CDEBUG(("cat_init: module 0x%x, eeprom_size %d\n", i,
+-			eprom_size));
+-		if (cat_subread
+-		    (*modpp, (*modpp)->asic, 0, eprom_size, eprom_buf)) {
+-			outb(VOYAGER_CAT_END, CAT_CMD);
+-			continue;
+-		}
+-		outb(VOYAGER_CAT_END, CAT_CMD);
+-		/* Now do everything for the QBB submodule 1 */
+-		(*modpp)->ee_size = eprom_hdr->ee_size;
+-		(*modpp)->num_asics = eprom_hdr->num_asics;
+-		asicpp = &((*modpp)->asic);
+-		sp_offset = eprom_hdr->scan_path_offset;
+-		/* get rid of the dummy CAT asic and read the real one */
+-		kfree((*modpp)->asic);
+-		for (asic = 0; asic < (*modpp)->num_asics; asic++) {
+-			int j;
+-			voyager_asic_t *asicp = *asicpp = kzalloc(sizeof(voyager_asic_t), GFP_KERNEL);	/*&voyager_asic_storage[asic_count++]; */
+-			voyager_sp_table_t *sp_table;
+-			voyager_at_t *asic_table;
+-			voyager_jtt_t *jtag_table;
+-
+-			if (asicp == NULL) {
+-				printk
+-				    ("**WARNING** kmalloc failure in cat_init\n");
+-				continue;
+-			}
+-			asicpp = &(asicp->next);
+-			asicp->asic_location = asic;
+-			sp_table =
+-			    (voyager_sp_table_t *) (eprom_buf + sp_offset);
+-			asicp->asic_id = sp_table->asic_id;
+-			asic_table =
+-			    (voyager_at_t *) (eprom_buf +
+-					      sp_table->asic_data_offset);
+-			for (j = 0; j < 4; j++)
+-				asicp->jtag_id[j] = asic_table->jtag_id[j];
+-			jtag_table =
+-			    (voyager_jtt_t *) (eprom_buf +
+-					       asic_table->jtag_offset);
+-			asicp->ireg_length = jtag_table->ireg_len;
+-			asicp->bit_location = (*modpp)->inst_bits;
+-			(*modpp)->inst_bits += asicp->ireg_length;
+-			if (asicp->ireg_length > (*modpp)->largest_reg)
+-				(*modpp)->largest_reg = asicp->ireg_length;
+-			if (asicp->ireg_length < (*modpp)->smallest_reg ||
+-			    (*modpp)->smallest_reg == 0)
+-				(*modpp)->smallest_reg = asicp->ireg_length;
+-			CDEBUG(("asic 0x%x, ireg_length=%d, bit_location=%d\n",
+-				asicp->asic_id, asicp->ireg_length,
+-				asicp->bit_location));
+-			if (asicp->asic_id == VOYAGER_QUAD_QABC) {
+-				CDEBUG(("VOYAGER CAT: QABC ASIC found\n"));
+-				qabc_asic = asicp;
+-			}
+-			sp_offset += sizeof(voyager_sp_table_t);
+-		}
+-		CDEBUG(("Module inst_bits = %d, largest_reg = %d, smallest_reg=%d\n", (*modpp)->inst_bits, (*modpp)->largest_reg, (*modpp)->smallest_reg));
+-		/* OK, now we have the QUAD ASICs set up, use them.
+-		 * we need to:
+-		 *
+-		 * 1. Find the Memory area for the Quad CPIs.
+-		 * 2. Find the Extended VIC processor
+-		 * 3. Configure a second extended VIC processor (This
+-		 *    cannot be done for the 51xx.
+-		 * */
+-		outb(VOYAGER_CAT_RUN, CAT_CMD);
+-		cat_connect(*modpp, (*modpp)->asic);
+-		CDEBUG(("CAT CONNECTED!!\n"));
+-		cat_subread(*modpp, qabc_asic, 0, sizeof(qabc_data), qabc_data);
+-		qic_addr = qabc_data[5] << 8;
+-		qic_addr = (qic_addr | qabc_data[6]) << 8;
+-		qic_addr = (qic_addr | qabc_data[7]) << 8;
+-		printk
+-		    ("Module \"%s\": Quad Processor Card; CPI 0x%lx, SET=0x%x\n",
+-		     cat_module_name(i), qic_addr, qabc_data[8]);
+-#if 0				/* plumbing fails---FIXME */
+-		if ((qabc_data[8] & 0xf0) == 0) {
+-			/* FIXME: 32 way 8 CPU slot monster cannot be
+-			 * plumbed this way---need to check for it */
+-
+-			printk("Plumbing second Extended Quad Processor\n");
+-			/* second VIC line hardwired to Quad CPU 1 */
+-			qabc_data[8] |= 0x20;
+-			cat_subwrite(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
+-#ifdef VOYAGER_CAT_DEBUG
+-			/* verify plumbing */
+-			cat_subread(*modpp, qabc_asic, 8, 1, &qabc_data[8]);
+-			if ((qabc_data[8] & 0xf0) == 0) {
+-				CDEBUG(("PLUMBING FAILED: 0x%x\n",
+-					qabc_data[8]));
+-			}
+-#endif
+-		}
+-#endif
+-
+-		{
+-			struct resource *res =
+-			    kzalloc(sizeof(struct resource), GFP_KERNEL);
+-			res->name = kmalloc(128, GFP_KERNEL);
+-			sprintf((char *)res->name, "Voyager %s Quad CPI",
+-				cat_module_name(i));
+-			res->start = qic_addr;
+-			res->end = qic_addr + 0x3ff;
+-			request_resource(&iomem_resource, res);
+-		}
+-
+-		qic_addr = (unsigned long)ioremap_cache(qic_addr, 0x400);
+-
+-		for (j = 0; j < 4; j++) {
+-			__u8 cpu;
+-
+-			if (voyager_8slot) {
+-				/* 8 slot has a different mapping,
+-				 * each slot has only one vic line, so
+-				 * 1 cpu in each slot must be < 8 */
+-				cpu = (i & 0x07) + j * 8;
+-			} else {
+-				cpu = (i & 0x03) + j * 4;
+-			}
+-			if ((qabc_data[8] & (1 << j))) {
+-				voyager_extended_vic_processors |= (1 << cpu);
+-			}
+-			if (qabc_data[8] & (1 << (j + 4))) {
+-				/* Second SET register plumbed: Quad
+-				 * card has two VIC connected CPUs.
+-				 * Secondary cannot be booted as a VIC
+-				 * CPU */
+-				voyager_extended_vic_processors |= (1 << cpu);
+-				voyager_allowed_boot_processors &=
+-				    (~(1 << cpu));
+-			}
+-
+-			voyager_quad_processors |= (1 << cpu);
+-			voyager_quad_cpi_addr[cpu] = (struct voyager_qic_cpi *)
+-			    (qic_addr + (j << 8));
+-			CDEBUG(("CPU%d: CPI address 0x%lx\n", cpu,
+-				(unsigned long)voyager_quad_cpi_addr[cpu]));
+-		}
+-		outb(VOYAGER_CAT_END, CAT_CMD);
+-
+-		*asicpp = NULL;
+-		modpp = &((*modpp)->next);
+-	}
+-	*modpp = NULL;
+-	printk
+-	    ("CAT Bus Initialisation finished: extended procs 0x%x, quad procs 0x%x, allowed vic boot = 0x%x\n",
+-	     voyager_extended_vic_processors, voyager_quad_processors,
+-	     voyager_allowed_boot_processors);
+-	request_resource(&ioport_resource, &vic_res);
+-	if (voyager_quad_processors)
+-		request_resource(&ioport_resource, &qic_res);
+-	/* set up the front power switch */
+-}
+-
+-int voyager_cat_readb(__u8 module, __u8 asic, int reg)
+-{
+-	return 0;
+-}
+-
+-static int cat_disconnect(voyager_module_t * modp, voyager_asic_t * asicp)
+-{
+-	__u8 val;
+-	int err = 0;
+-
+-	if (!modp->scan_path_connected)
+-		return 0;
+-	if (asicp->asic_id != VOYAGER_CAT_ID) {
+-		CDEBUG(("cat_disconnect: ASIC is not CAT\n"));
+-		return 1;
+-	}
+-	err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
+-	if (err) {
+-		CDEBUG(("cat_disconnect: failed to read SCANPATH\n"));
+-		return err;
+-	}
+-	val &= VOYAGER_DISCONNECT_ASIC;
+-	err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
+-	if (err) {
+-		CDEBUG(("cat_disconnect: failed to write SCANPATH\n"));
+-		return err;
+-	}
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-	outb(VOYAGER_CAT_RUN, CAT_CMD);
+-	modp->scan_path_connected = 0;
+-
+-	return 0;
+-}
+-
+-static int cat_connect(voyager_module_t * modp, voyager_asic_t * asicp)
+-{
+-	__u8 val;
+-	int err = 0;
+-
+-	if (modp->scan_path_connected)
+-		return 0;
+-	if (asicp->asic_id != VOYAGER_CAT_ID) {
+-		CDEBUG(("cat_connect: ASIC is not CAT\n"));
+-		return 1;
+-	}
+-
+-	err = cat_read(modp, asicp, VOYAGER_SCANPATH, &val);
+-	if (err) {
+-		CDEBUG(("cat_connect: failed to read SCANPATH\n"));
+-		return err;
+-	}
+-	val |= VOYAGER_CONNECT_ASIC;
+-	err = cat_write(modp, asicp, VOYAGER_SCANPATH, val);
+-	if (err) {
+-		CDEBUG(("cat_connect: failed to write SCANPATH\n"));
+-		return err;
+-	}
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-	outb(VOYAGER_CAT_RUN, CAT_CMD);
+-	modp->scan_path_connected = 1;
+-
+-	return 0;
+-}
+-
+-void voyager_cat_power_off(void)
+-{
+-	/* Power the machine off by writing to the PSI over the CAT
+-	 * bus */
+-	__u8 data;
+-	voyager_module_t psi = { 0 };
+-	voyager_asic_t psi_asic = { 0 };
+-
+-	psi.asic = &psi_asic;
+-	psi.asic->asic_id = VOYAGER_CAT_ID;
+-	psi.asic->subaddr = VOYAGER_SUBADDR_HI;
+-	psi.module_addr = VOYAGER_PSI;
+-	psi.scan_path_connected = 0;
+-
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-	/* Connect the PSI to the CAT Bus */
+-	outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
+-	outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
+-	outb(VOYAGER_CAT_RUN, CAT_CMD);
+-	cat_disconnect(&psi, &psi_asic);
+-	/* Read the status */
+-	cat_subread(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-	CDEBUG(("PSI STATUS 0x%x\n", data));
+-	/* These two writes are power off prep and perform */
+-	data = PSI_CLEAR;
+-	outb(VOYAGER_CAT_RUN, CAT_CMD);
+-	cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-	data = PSI_POWER_DOWN;
+-	outb(VOYAGER_CAT_RUN, CAT_CMD);
+-	cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG, 1, &data);
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-}
+-
+-struct voyager_status voyager_status = { 0 };
+-
+-void voyager_cat_psi(__u8 cmd, __u16 reg, __u8 * data)
+-{
+-	voyager_module_t psi = { 0 };
+-	voyager_asic_t psi_asic = { 0 };
+-
+-	psi.asic = &psi_asic;
+-	psi.asic->asic_id = VOYAGER_CAT_ID;
+-	psi.asic->subaddr = VOYAGER_SUBADDR_HI;
+-	psi.module_addr = VOYAGER_PSI;
+-	psi.scan_path_connected = 0;
+-
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-	/* Connect the PSI to the CAT Bus */
+-	outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
+-	outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
+-	outb(VOYAGER_CAT_RUN, CAT_CMD);
+-	cat_disconnect(&psi, &psi_asic);
+-	switch (cmd) {
+-	case VOYAGER_PSI_READ:
+-		cat_read(&psi, &psi_asic, reg, data);
+-		break;
+-	case VOYAGER_PSI_WRITE:
+-		cat_write(&psi, &psi_asic, reg, *data);
+-		break;
+-	case VOYAGER_PSI_SUBREAD:
+-		cat_subread(&psi, &psi_asic, reg, 1, data);
+-		break;
+-	case VOYAGER_PSI_SUBWRITE:
+-		cat_subwrite(&psi, &psi_asic, reg, 1, data);
+-		break;
+-	default:
+-		printk(KERN_ERR "Voyager PSI, unrecognised command %d\n", cmd);
+-		break;
+-	}
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-}
+-
+-void voyager_cat_do_common_interrupt(void)
+-{
+-	/* This is caused either by a memory parity error or something
+-	 * in the PSI */
+-	__u8 data;
+-	voyager_module_t psi = { 0 };
+-	voyager_asic_t psi_asic = { 0 };
+-	struct voyager_psi psi_reg;
+-	int i;
+-      re_read:
+-	psi.asic = &psi_asic;
+-	psi.asic->asic_id = VOYAGER_CAT_ID;
+-	psi.asic->subaddr = VOYAGER_SUBADDR_HI;
+-	psi.module_addr = VOYAGER_PSI;
+-	psi.scan_path_connected = 0;
+-
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-	/* Connect the PSI to the CAT Bus */
+-	outb(VOYAGER_CAT_DESELECT, VOYAGER_CAT_CONFIG_PORT);
+-	outb(VOYAGER_PSI, VOYAGER_CAT_CONFIG_PORT);
+-	outb(VOYAGER_CAT_RUN, CAT_CMD);
+-	cat_disconnect(&psi, &psi_asic);
+-	/* Read the status.  NOTE: Need to read *all* the PSI regs here
+-	 * otherwise the cmn int will be reasserted */
+-	for (i = 0; i < sizeof(psi_reg.regs); i++) {
+-		cat_read(&psi, &psi_asic, i, &((__u8 *) & psi_reg.regs)[i]);
+-	}
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-	if ((psi_reg.regs.checkbit & 0x02) == 0) {
+-		psi_reg.regs.checkbit |= 0x02;
+-		cat_write(&psi, &psi_asic, 5, psi_reg.regs.checkbit);
+-		printk("VOYAGER RE-READ PSI\n");
+-		goto re_read;
+-	}
+-	outb(VOYAGER_CAT_RUN, CAT_CMD);
+-	for (i = 0; i < sizeof(psi_reg.subregs); i++) {
+-		/* This looks strange, but the PSI doesn't do auto increment
+-		 * correctly */
+-		cat_subread(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG + i,
+-			    1, &((__u8 *) & psi_reg.subregs)[i]);
+-	}
+-	outb(VOYAGER_CAT_END, CAT_CMD);
+-#ifdef VOYAGER_CAT_DEBUG
+-	printk("VOYAGER PSI: ");
+-	for (i = 0; i < sizeof(psi_reg.regs); i++)
+-		printk("%02x ", ((__u8 *) & psi_reg.regs)[i]);
+-	printk("\n           ");
+-	for (i = 0; i < sizeof(psi_reg.subregs); i++)
+-		printk("%02x ", ((__u8 *) & psi_reg.subregs)[i]);
+-	printk("\n");
+-#endif
+-	if (psi_reg.regs.intstatus & PSI_MON) {
+-		/* switch off or power fail */
+-
+-		if (psi_reg.subregs.supply & PSI_SWITCH_OFF) {
+-			if (voyager_status.switch_off) {
+-				printk(KERN_ERR
+-				       "Voyager front panel switch turned off again---Immediate power off!\n");
+-				voyager_cat_power_off();
+-				/* not reached */
+-			} else {
+-				printk(KERN_ERR
+-				       "Voyager front panel switch turned off\n");
+-				voyager_status.switch_off = 1;
+-				voyager_status.request_from_kernel = 1;
+-				wake_up_process(voyager_thread);
+-			}
+-			/* Tell the hardware we're taking care of the
+-			 * shutdown, otherwise it will power the box off
+-			 * within 3 seconds of the switch being pressed and,
+-			 * which is much more important to us, continue to 
+-			 * assert the common interrupt */
+-			data = PSI_CLR_SWITCH_OFF;
+-			outb(VOYAGER_CAT_RUN, CAT_CMD);
+-			cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_SUPPLY_REG,
+-				     1, &data);
+-			outb(VOYAGER_CAT_END, CAT_CMD);
+-		} else {
+-
+-			VDEBUG(("Voyager ac fail reg 0x%x\n",
+-				psi_reg.subregs.ACfail));
+-			if ((psi_reg.subregs.ACfail & AC_FAIL_STAT_CHANGE) == 0) {
+-				/* No further update */
+-				return;
+-			}
+-#if 0
+-			/* Don't bother trying to find out who failed.
+-			 * FIXME: This probably makes the code incorrect on
+-			 * anything other than a 345x */
+-			for (i = 0; i < 5; i++) {
+-				if (psi_reg.subregs.ACfail & (1 << i)) {
+-					break;
+-				}
+-			}
+-			printk(KERN_NOTICE "AC FAIL IN SUPPLY %d\n", i);
+-#endif
+-			/* DON'T do this: it shuts down the AC PSI 
+-			   outb(VOYAGER_CAT_RUN, CAT_CMD);
+-			   data = PSI_MASK_MASK | i;
+-			   cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_MASK,
+-			   1, &data);
+-			   outb(VOYAGER_CAT_END, CAT_CMD);
+-			 */
+-			printk(KERN_ERR "Voyager AC power failure\n");
+-			outb(VOYAGER_CAT_RUN, CAT_CMD);
+-			data = PSI_COLD_START;
+-			cat_subwrite(&psi, &psi_asic, VOYAGER_PSI_GENERAL_REG,
+-				     1, &data);
+-			outb(VOYAGER_CAT_END, CAT_CMD);
+-			voyager_status.power_fail = 1;
+-			voyager_status.request_from_kernel = 1;
+-			wake_up_process(voyager_thread);
+-		}
+-
+-	} else if (psi_reg.regs.intstatus & PSI_FAULT) {
+-		/* Major fault! */
+-		printk(KERN_ERR
+-		       "Voyager PSI Detected major fault, immediate power off!\n");
+-		voyager_cat_power_off();
+-		/* not reached */
+-	} else if (psi_reg.regs.intstatus & (PSI_DC_FAIL | PSI_ALARM
+-					     | PSI_CURRENT | PSI_DVM
+-					     | PSI_PSCFAULT | PSI_STAT_CHG)) {
+-		/* other psi fault */
+-
+-		printk(KERN_WARNING "Voyager PSI status 0x%x\n", data);
+-		/* clear the PSI fault */
+-		outb(VOYAGER_CAT_RUN, CAT_CMD);
+-		cat_write(&psi, &psi_asic, VOYAGER_PSI_STATUS_REG, 0);
+-		outb(VOYAGER_CAT_END, CAT_CMD);
+-	}
+-}
+Index: linux-2.6-tip/arch/x86/mach-voyager/voyager_smp.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-voyager/voyager_smp.c
++++ /dev/null
+@@ -1,1807 +0,0 @@
+-/* -*- mode: c; c-basic-offset: 8 -*- */
+-
+-/* Copyright (C) 1999,2001
+- *
+- * Author: J.E.J.Bottomley@HansenPartnership.com
+- *
+- * This file provides all the same external entries as smp.c but uses
+- * the voyager hal to provide the functionality
+- */
+-#include <linux/cpu.h>
+-#include <linux/module.h>
+-#include <linux/mm.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/delay.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/cache.h>
+-#include <linux/interrupt.h>
+-#include <linux/init.h>
+-#include <linux/kernel.h>
+-#include <linux/bootmem.h>
+-#include <linux/completion.h>
+-#include <asm/desc.h>
+-#include <asm/voyager.h>
+-#include <asm/vic.h>
+-#include <asm/mtrr.h>
+-#include <asm/pgalloc.h>
+-#include <asm/tlbflush.h>
+-#include <asm/arch_hooks.h>
+-#include <asm/trampoline.h>
+-
+-/* TLB state -- visible externally, indexed physically */
+-DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { &init_mm, 0 };
+-
+-/* CPU IRQ affinity -- set to all ones initially */
+-static unsigned long cpu_irq_affinity[NR_CPUS] __cacheline_aligned =
+-	{[0 ... NR_CPUS-1]  = ~0UL };
+-
+-/* per CPU data structure (for /proc/cpuinfo et al), visible externally
+- * indexed physically */
+-DEFINE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
+-EXPORT_PER_CPU_SYMBOL(cpu_info);
+-
+-/* physical ID of the CPU used to boot the system */
+-unsigned char boot_cpu_id;
+-
+-/* The memory line addresses for the Quad CPIs */
+-struct voyager_qic_cpi *voyager_quad_cpi_addr[NR_CPUS] __cacheline_aligned;
+-
+-/* The masks for the Extended VIC processors, filled in by cat_init */
+-__u32 voyager_extended_vic_processors = 0;
+-
+-/* Masks for the extended Quad processors which cannot be VIC booted */
+-__u32 voyager_allowed_boot_processors = 0;
+-
+-/* The mask for the Quad Processors (both extended and non-extended) */
+-__u32 voyager_quad_processors = 0;
+-
+-/* Total count of live CPUs, used in process.c to display
+- * the CPU information and in irq.c for the per CPU irq
+- * activity count.  Finally exported by i386_ksyms.c */
+-static int voyager_extended_cpus = 1;
+-
+-/* Used for the invalidate map that's also checked in the spinlock */
+-static volatile unsigned long smp_invalidate_needed;
+-
+-/* Bitmask of CPUs present in the system - exported by i386_syms.c, used
+- * by scheduler but indexed physically */
+-static cpumask_t voyager_phys_cpu_present_map = CPU_MASK_NONE;
+-
+-/* The internal functions */
+-static void send_CPI(__u32 cpuset, __u8 cpi);
+-static void ack_CPI(__u8 cpi);
+-static int ack_QIC_CPI(__u8 cpi);
+-static void ack_special_QIC_CPI(__u8 cpi);
+-static void ack_VIC_CPI(__u8 cpi);
+-static void send_CPI_allbutself(__u8 cpi);
+-static void mask_vic_irq(unsigned int irq);
+-static void unmask_vic_irq(unsigned int irq);
+-static unsigned int startup_vic_irq(unsigned int irq);
+-static void enable_local_vic_irq(unsigned int irq);
+-static void disable_local_vic_irq(unsigned int irq);
+-static void before_handle_vic_irq(unsigned int irq);
+-static void after_handle_vic_irq(unsigned int irq);
+-static void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask);
+-static void ack_vic_irq(unsigned int irq);
+-static void vic_enable_cpi(void);
+-static void do_boot_cpu(__u8 cpuid);
+-static void do_quad_bootstrap(void);
+-static void initialize_secondary(void);
+-
+-int hard_smp_processor_id(void);
+-int safe_smp_processor_id(void);
+-
+-/* Inline functions */
+-static inline void send_one_QIC_CPI(__u8 cpu, __u8 cpi)
+-{
+-	voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi =
+-	    (smp_processor_id() << 16) + cpi;
+-}
+-
+-static inline void send_QIC_CPI(__u32 cpuset, __u8 cpi)
+-{
+-	int cpu;
+-
+-	for_each_online_cpu(cpu) {
+-		if (cpuset & (1 << cpu)) {
+-#ifdef VOYAGER_DEBUG
+-			if (!cpu_online(cpu))
+-				VDEBUG(("CPU%d sending cpi %d to CPU%d not in "
+-					"cpu_online_map\n",
+-					hard_smp_processor_id(), cpi, cpu));
+-#endif
+-			send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
+-		}
+-	}
+-}
+-
+-static inline void wrapper_smp_local_timer_interrupt(void)
+-{
+-	irq_enter();
+-	smp_local_timer_interrupt();
+-	irq_exit();
+-}
+-
+-static inline void send_one_CPI(__u8 cpu, __u8 cpi)
+-{
+-	if (voyager_quad_processors & (1 << cpu))
+-		send_one_QIC_CPI(cpu, cpi - QIC_CPI_OFFSET);
+-	else
+-		send_CPI(1 << cpu, cpi);
+-}
+-
+-static inline void send_CPI_allbutself(__u8 cpi)
+-{
+-	__u8 cpu = smp_processor_id();
+-	__u32 mask = cpus_addr(cpu_online_map)[0] & ~(1 << cpu);
+-	send_CPI(mask, cpi);
+-}
+-
+-static inline int is_cpu_quad(void)
+-{
+-	__u8 cpumask = inb(VIC_PROC_WHO_AM_I);
+-	return ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER);
+-}
+-
+-static inline int is_cpu_extended(void)
+-{
+-	__u8 cpu = hard_smp_processor_id();
+-
+-	return (voyager_extended_vic_processors & (1 << cpu));
+-}
+-
+-static inline int is_cpu_vic_boot(void)
+-{
+-	__u8 cpu = hard_smp_processor_id();
+-
+-	return (voyager_extended_vic_processors
+-		& voyager_allowed_boot_processors & (1 << cpu));
+-}
+-
+-static inline void ack_CPI(__u8 cpi)
+-{
+-	switch (cpi) {
+-	case VIC_CPU_BOOT_CPI:
+-		if (is_cpu_quad() && !is_cpu_vic_boot())
+-			ack_QIC_CPI(cpi);
+-		else
+-			ack_VIC_CPI(cpi);
+-		break;
+-	case VIC_SYS_INT:
+-	case VIC_CMN_INT:
+-		/* These are slightly strange.  Even on the Quad card,
+-		 * They are vectored as VIC CPIs */
+-		if (is_cpu_quad())
+-			ack_special_QIC_CPI(cpi);
+-		else
+-			ack_VIC_CPI(cpi);
+-		break;
+-	default:
+-		printk("VOYAGER ERROR: CPI%d is in common CPI code\n", cpi);
+-		break;
+-	}
+-}
+-
+-/* local variables */
+-
+-/* The VIC IRQ descriptors -- these look almost identical to the
+- * 8259 IRQs except that masks and things must be kept per processor
+- */
+-static struct irq_chip vic_chip = {
+-	.name = "VIC",
+-	.startup = startup_vic_irq,
+-	.mask = mask_vic_irq,
+-	.unmask = unmask_vic_irq,
+-	.set_affinity = set_vic_irq_affinity,
+-};
+-
+-/* used to count up as CPUs are brought on line (starts at 0) */
+-static int cpucount = 0;
+-
+-/* The per cpu profile stuff - used in smp_local_timer_interrupt */
+-static DEFINE_PER_CPU(int, prof_multiplier) = 1;
+-static DEFINE_PER_CPU(int, prof_old_multiplier) = 1;
+-static DEFINE_PER_CPU(int, prof_counter) = 1;
+-
+-/* the map used to check if a CPU has booted */
+-static __u32 cpu_booted_map;
+-
+-/* the synchronize flag used to hold all secondary CPUs spinning in
+- * a tight loop until the boot sequence is ready for them */
+-static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
+-
+-/* This is for the new dynamic CPU boot code */
+-
+-/* The per processor IRQ masks (these are usually kept in sync) */
+-static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned;
+-
+-/* the list of IRQs to be enabled by the VIC_ENABLE_IRQ_CPI */
+-static __u16 vic_irq_enable_mask[NR_CPUS] __cacheline_aligned = { 0 };
+-
+-/* Lock for enable/disable of VIC interrupts */
+-static __cacheline_aligned DEFINE_SPINLOCK(vic_irq_lock);
+-
+-/* The boot processor is correctly set up in PC mode when it
+- * comes up, but the secondaries need their master/slave 8259
+- * pairs initializing correctly */
+-
+-/* Interrupt counters (per cpu) and total - used to try to
+- * even up the interrupt handling routines */
+-static long vic_intr_total = 0;
+-static long vic_intr_count[NR_CPUS] __cacheline_aligned = { 0 };
+-static unsigned long vic_tick[NR_CPUS] __cacheline_aligned = { 0 };
+-
+-/* Since we can only use CPI0, we fake all the other CPIs */
+-static unsigned long vic_cpi_mailbox[NR_CPUS] __cacheline_aligned;
+-
+-/* debugging routine to read the isr of the cpu's pic */
+-static inline __u16 vic_read_isr(void)
+-{
+-	__u16 isr;
+-
+-	outb(0x0b, 0xa0);
+-	isr = inb(0xa0) << 8;
+-	outb(0x0b, 0x20);
+-	isr |= inb(0x20);
+-
+-	return isr;
+-}
+-
+-static __init void qic_setup(void)
+-{
+-	if (!is_cpu_quad()) {
+-		/* not a quad, no setup */
+-		return;
+-	}
+-	outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
+-	outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
+-
+-	if (is_cpu_extended()) {
+-		/* the QIC duplicate of the VIC base register */
+-		outb(VIC_DEFAULT_CPI_BASE, QIC_VIC_CPI_BASE_REGISTER);
+-		outb(QIC_DEFAULT_CPI_BASE, QIC_CPI_BASE_REGISTER);
+-
+-		/* FIXME: should set up the QIC timer and memory parity
+-		 * error vectors here */
+-	}
+-}
+-
+-static __init void vic_setup_pic(void)
+-{
+-	outb(1, VIC_REDIRECT_REGISTER_1);
+-	/* clear the claim registers for dynamic routing */
+-	outb(0, VIC_CLAIM_REGISTER_0);
+-	outb(0, VIC_CLAIM_REGISTER_1);
+-
+-	outb(0, VIC_PRIORITY_REGISTER);
+-	/* Set the Primary and Secondary Microchannel vector
+-	 * bases to be the same as the ordinary interrupts
+-	 *
+-	 * FIXME: This would be more efficient using separate
+-	 * vectors. */
+-	outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE);
+-	outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE);
+-	/* Now initiallise the master PIC belonging to this CPU by
+-	 * sending the four ICWs */
+-
+-	/* ICW1: level triggered, ICW4 needed */
+-	outb(0x19, 0x20);
+-
+-	/* ICW2: vector base */
+-	outb(FIRST_EXTERNAL_VECTOR, 0x21);
+-
+-	/* ICW3: slave at line 2 */
+-	outb(0x04, 0x21);
+-
+-	/* ICW4: 8086 mode */
+-	outb(0x01, 0x21);
+-
+-	/* now the same for the slave PIC */
+-
+-	/* ICW1: level trigger, ICW4 needed */
+-	outb(0x19, 0xA0);
+-
+-	/* ICW2: slave vector base */
+-	outb(FIRST_EXTERNAL_VECTOR + 8, 0xA1);
+-
+-	/* ICW3: slave ID */
+-	outb(0x02, 0xA1);
+-
+-	/* ICW4: 8086 mode */
+-	outb(0x01, 0xA1);
+-}
+-
+-static void do_quad_bootstrap(void)
+-{
+-	if (is_cpu_quad() && is_cpu_vic_boot()) {
+-		int i;
+-		unsigned long flags;
+-		__u8 cpuid = hard_smp_processor_id();
+-
+-		local_irq_save(flags);
+-
+-		for (i = 0; i < 4; i++) {
+-			/* FIXME: this would be >>3 &0x7 on the 32 way */
+-			if (((cpuid >> 2) & 0x03) == i)
+-				/* don't lower our own mask! */
+-				continue;
+-
+-			/* masquerade as local Quad CPU */
+-			outb(QIC_CPUID_ENABLE | i, QIC_PROCESSOR_ID);
+-			/* enable the startup CPI */
+-			outb(QIC_BOOT_CPI_MASK, QIC_MASK_REGISTER1);
+-			/* restore cpu id */
+-			outb(0, QIC_PROCESSOR_ID);
+-		}
+-		local_irq_restore(flags);
+-	}
+-}
+-
+-void prefill_possible_map(void)
+-{
+-	/* This is empty on voyager because we need a much
+-	 * earlier detection which is done in find_smp_config */
+-}
+-
+-/* Set up all the basic stuff: read the SMP config and make all the
+- * SMP information reflect only the boot cpu.  All others will be
+- * brought on-line later. */
+-void __init find_smp_config(void)
+-{
+-	int i;
+-
+-	boot_cpu_id = hard_smp_processor_id();
+-
+-	printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id);
+-
+-	/* initialize the CPU structures (moved from smp_boot_cpus) */
+-	for (i = 0; i < nr_cpu_ids; i++)
+-		cpu_irq_affinity[i] = ~0;
+-	cpu_online_map = cpumask_of_cpu(boot_cpu_id);
+-
+-	/* The boot CPU must be extended */
+-	voyager_extended_vic_processors = 1 << boot_cpu_id;
+-	/* initially, all of the first 8 CPUs can boot */
+-	voyager_allowed_boot_processors = 0xff;
+-	/* set up everything for just this CPU, we can alter
+-	 * this as we start the other CPUs later */
+-	/* now get the CPU disposition from the extended CMOS */
+-	cpus_addr(voyager_phys_cpu_present_map)[0] =
+-	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK);
+-	cpus_addr(voyager_phys_cpu_present_map)[0] |=
+-	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK + 1) << 8;
+-	cpus_addr(voyager_phys_cpu_present_map)[0] |=
+-	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
+-				       2) << 16;
+-	cpus_addr(voyager_phys_cpu_present_map)[0] |=
+-	    voyager_extended_cmos_read(VOYAGER_PROCESSOR_PRESENT_MASK +
+-				       3) << 24;
+-	init_cpu_possible(&voyager_phys_cpu_present_map);
+-	printk("VOYAGER SMP: voyager_phys_cpu_present_map = 0x%lx\n",
+-	       cpus_addr(voyager_phys_cpu_present_map)[0]);
+-	/* Here we set up the VIC to enable SMP */
+-	/* enable the CPIs by writing the base vector to their register */
+-	outb(VIC_DEFAULT_CPI_BASE, VIC_CPI_BASE_REGISTER);
+-	outb(1, VIC_REDIRECT_REGISTER_1);
+-	/* set the claim registers for static routing --- Boot CPU gets
+-	 * all interrupts untill all other CPUs started */
+-	outb(0xff, VIC_CLAIM_REGISTER_0);
+-	outb(0xff, VIC_CLAIM_REGISTER_1);
+-	/* Set the Primary and Secondary Microchannel vector
+-	 * bases to be the same as the ordinary interrupts
+-	 *
+-	 * FIXME: This would be more efficient using separate
+-	 * vectors. */
+-	outb(FIRST_EXTERNAL_VECTOR, VIC_PRIMARY_MC_BASE);
+-	outb(FIRST_EXTERNAL_VECTOR, VIC_SECONDARY_MC_BASE);
+-
+-	/* Finally tell the firmware that we're driving */
+-	outb(inb(VOYAGER_SUS_IN_CONTROL_PORT) | VOYAGER_IN_CONTROL_FLAG,
+-	     VOYAGER_SUS_IN_CONTROL_PORT);
+-
+-	current_thread_info()->cpu = boot_cpu_id;
+-	x86_write_percpu(cpu_number, boot_cpu_id);
+-}
+-
+-/*
+- *	The bootstrap kernel entry code has set these up. Save them
+- *	for a given CPU, id is physical */
+-void __init smp_store_cpu_info(int id)
+-{
+-	struct cpuinfo_x86 *c = &cpu_data(id);
+-
+-	*c = boot_cpu_data;
+-	c->cpu_index = id;
+-
+-	identify_secondary_cpu(c);
+-}
+-
+-/* Routine initially called when a non-boot CPU is brought online */
+-static void __init start_secondary(void *unused)
+-{
+-	__u8 cpuid = hard_smp_processor_id();
+-
+-	cpu_init();
+-
+-	/* OK, we're in the routine */
+-	ack_CPI(VIC_CPU_BOOT_CPI);
+-
+-	/* setup the 8259 master slave pair belonging to this CPU ---
+-	 * we won't actually receive any until the boot CPU
+-	 * relinquishes it's static routing mask */
+-	vic_setup_pic();
+-
+-	qic_setup();
+-
+-	if (is_cpu_quad() && !is_cpu_vic_boot()) {
+-		/* clear the boot CPI */
+-		__u8 dummy;
+-
+-		dummy =
+-		    voyager_quad_cpi_addr[cpuid]->qic_cpi[VIC_CPU_BOOT_CPI].cpi;
+-		printk("read dummy %d\n", dummy);
+-	}
+-
+-	/* lower the mask to receive CPIs */
+-	vic_enable_cpi();
+-
+-	VDEBUG(("VOYAGER SMP: CPU%d, stack at about %p\n", cpuid, &cpuid));
+-
+-	notify_cpu_starting(cpuid);
+-
+-	/* enable interrupts */
+-	local_irq_enable();
+-
+-	/* get our bogomips */
+-	calibrate_delay();
+-
+-	/* save our processor parameters */
+-	smp_store_cpu_info(cpuid);
+-
+-	/* if we're a quad, we may need to bootstrap other CPUs */
+-	do_quad_bootstrap();
+-
+-	/* FIXME: this is rather a poor hack to prevent the CPU
+-	 * activating softirqs while it's supposed to be waiting for
+-	 * permission to proceed.  Without this, the new per CPU stuff
+-	 * in the softirqs will fail */
+-	local_irq_disable();
+-	cpu_set(cpuid, cpu_callin_map);
+-
+-	/* signal that we're done */
+-	cpu_booted_map = 1;
+-
+-	while (!cpu_isset(cpuid, smp_commenced_mask))
+-		rep_nop();
+-	local_irq_enable();
+-
+-	local_flush_tlb();
+-
+-	cpu_set(cpuid, cpu_online_map);
+-	wmb();
+-	cpu_idle();
+-}
+-
+-/* Routine to kick start the given CPU and wait for it to report ready
+- * (or timeout in startup).  When this routine returns, the requested
+- * CPU is either fully running and configured or known to be dead.
+- *
+- * We call this routine sequentially 1 CPU at a time, so no need for
+- * locking */
+-
+-static void __init do_boot_cpu(__u8 cpu)
+-{
+-	struct task_struct *idle;
+-	int timeout;
+-	unsigned long flags;
+-	int quad_boot = (1 << cpu) & voyager_quad_processors
+-	    & ~(voyager_extended_vic_processors
+-		& voyager_allowed_boot_processors);
+-
+-	/* This is the format of the CPI IDT gate (in real mode) which
+-	 * we're hijacking to boot the CPU */
+-	union IDTFormat {
+-		struct seg {
+-			__u16 Offset;
+-			__u16 Segment;
+-		} idt;
+-		__u32 val;
+-	} hijack_source;
+-
+-	__u32 *hijack_vector;
+-	__u32 start_phys_address = setup_trampoline();
+-
+-	/* There's a clever trick to this: The linux trampoline is
+-	 * compiled to begin at absolute location zero, so make the
+-	 * address zero but have the data segment selector compensate
+-	 * for the actual address */
+-	hijack_source.idt.Offset = start_phys_address & 0x000F;
+-	hijack_source.idt.Segment = (start_phys_address >> 4) & 0xFFFF;
+-
+-	cpucount++;
+-	alternatives_smp_switch(1);
+-
+-	idle = fork_idle(cpu);
+-	if (IS_ERR(idle))
+-		panic("failed fork for CPU%d", cpu);
+-	idle->thread.ip = (unsigned long)start_secondary;
+-	/* init_tasks (in sched.c) is indexed logically */
+-	stack_start.sp = (void *)idle->thread.sp;
+-
+-	init_gdt(cpu);
+-	per_cpu(current_task, cpu) = idle;
+-	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+-	irq_ctx_init(cpu);
+-
+-	/* Note: Don't modify initial ss override */
+-	VDEBUG(("VOYAGER SMP: Booting CPU%d at 0x%lx[%x:%x], stack %p\n", cpu,
+-		(unsigned long)hijack_source.val, hijack_source.idt.Segment,
+-		hijack_source.idt.Offset, stack_start.sp));
+-
+-	/* init lowmem identity mapping */
+-	clone_pgd_range(swapper_pg_dir, swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+-			min_t(unsigned long, KERNEL_PGD_PTRS, KERNEL_PGD_BOUNDARY));
+-	flush_tlb_all();
+-
+-	if (quad_boot) {
+-		printk("CPU %d: non extended Quad boot\n", cpu);
+-		hijack_vector =
+-		    (__u32 *)
+-		    phys_to_virt((VIC_CPU_BOOT_CPI + QIC_DEFAULT_CPI_BASE) * 4);
+-		*hijack_vector = hijack_source.val;
+-	} else {
+-		printk("CPU%d: extended VIC boot\n", cpu);
+-		hijack_vector =
+-		    (__u32 *)
+-		    phys_to_virt((VIC_CPU_BOOT_CPI + VIC_DEFAULT_CPI_BASE) * 4);
+-		*hijack_vector = hijack_source.val;
+-		/* VIC errata, may also receive interrupt at this address */
+-		hijack_vector =
+-		    (__u32 *)
+-		    phys_to_virt((VIC_CPU_BOOT_ERRATA_CPI +
+-				  VIC_DEFAULT_CPI_BASE) * 4);
+-		*hijack_vector = hijack_source.val;
+-	}
+-	/* All non-boot CPUs start with interrupts fully masked.  Need
+-	 * to lower the mask of the CPI we're about to send.  We do
+-	 * this in the VIC by masquerading as the processor we're
+-	 * about to boot and lowering its interrupt mask */
+-	local_irq_save(flags);
+-	if (quad_boot) {
+-		send_one_QIC_CPI(cpu, VIC_CPU_BOOT_CPI);
+-	} else {
+-		outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
+-		/* here we're altering registers belonging to `cpu' */
+-
+-		outb(VIC_BOOT_INTERRUPT_MASK, 0x21);
+-		/* now go back to our original identity */
+-		outb(boot_cpu_id, VIC_PROCESSOR_ID);
+-
+-		/* and boot the CPU */
+-
+-		send_CPI((1 << cpu), VIC_CPU_BOOT_CPI);
+-	}
+-	cpu_booted_map = 0;
+-	local_irq_restore(flags);
+-
+-	/* now wait for it to become ready (or timeout) */
+-	for (timeout = 0; timeout < 50000; timeout++) {
+-		if (cpu_booted_map)
+-			break;
+-		udelay(100);
+-	}
+-	/* reset the page table */
+-	zap_low_mappings();
+-
+-	if (cpu_booted_map) {
+-		VDEBUG(("CPU%d: Booted successfully, back in CPU %d\n",
+-			cpu, smp_processor_id()));
+-
+-		printk("CPU%d: ", cpu);
+-		print_cpu_info(&cpu_data(cpu));
+-		wmb();
+-		cpu_set(cpu, cpu_callout_map);
+-		cpu_set(cpu, cpu_present_map);
+-	} else {
+-		printk("CPU%d FAILED TO BOOT: ", cpu);
+-		if (*
+-		    ((volatile unsigned char *)phys_to_virt(start_phys_address))
+-		    == 0xA5)
+-			printk("Stuck.\n");
+-		else
+-			printk("Not responding.\n");
+-
+-		cpucount--;
+-	}
+-}
+-
+-void __init smp_boot_cpus(void)
+-{
+-	int i;
+-
+-	/* CAT BUS initialisation must be done after the memory */
+-	/* FIXME: The L4 has a catbus too, it just needs to be
+-	 * accessed in a totally different way */
+-	if (voyager_level == 5) {
+-		voyager_cat_init();
+-
+-		/* now that the cat has probed the Voyager System Bus, sanity
+-		 * check the cpu map */
+-		if (((voyager_quad_processors | voyager_extended_vic_processors)
+-		     & cpus_addr(voyager_phys_cpu_present_map)[0]) !=
+-		    cpus_addr(voyager_phys_cpu_present_map)[0]) {
+-			/* should panic */
+-			printk("\n\n***WARNING*** "
+-			       "Sanity check of CPU present map FAILED\n");
+-		}
+-	} else if (voyager_level == 4)
+-		voyager_extended_vic_processors =
+-		    cpus_addr(voyager_phys_cpu_present_map)[0];
+-
+-	/* this sets up the idle task to run on the current cpu */
+-	voyager_extended_cpus = 1;
+-	/* Remove the global_irq_holder setting, it triggers a BUG() on
+-	 * schedule at the moment */
+-	//global_irq_holder = boot_cpu_id;
+-
+-	/* FIXME: Need to do something about this but currently only works
+-	 * on CPUs with a tsc which none of mine have.
+-	 smp_tune_scheduling();
+-	 */
+-	smp_store_cpu_info(boot_cpu_id);
+-	/* setup the jump vector */
+-	initial_code = (unsigned long)initialize_secondary;
+-	printk("CPU%d: ", boot_cpu_id);
+-	print_cpu_info(&cpu_data(boot_cpu_id));
+-
+-	if (is_cpu_quad()) {
+-		/* booting on a Quad CPU */
+-		printk("VOYAGER SMP: Boot CPU is Quad\n");
+-		qic_setup();
+-		do_quad_bootstrap();
+-	}
+-
+-	/* enable our own CPIs */
+-	vic_enable_cpi();
+-
+-	cpu_set(boot_cpu_id, cpu_online_map);
+-	cpu_set(boot_cpu_id, cpu_callout_map);
+-
+-	/* loop over all the extended VIC CPUs and boot them.  The
+-	 * Quad CPUs must be bootstrapped by their extended VIC cpu */
+-	for (i = 0; i < nr_cpu_ids; i++) {
+-		if (i == boot_cpu_id || !cpu_isset(i, voyager_phys_cpu_present_map))
+-			continue;
+-		do_boot_cpu(i);
+-		/* This udelay seems to be needed for the Quad boots
+-		 * don't remove unless you know what you're doing */
+-		udelay(1000);
+-	}
+-	/* we could compute the total bogomips here, but why bother?,
+-	 * Code added from smpboot.c */
+-	{
+-		unsigned long bogosum = 0;
+-
+-		for_each_online_cpu(i)
+-			bogosum += cpu_data(i).loops_per_jiffy;
+-		printk(KERN_INFO "Total of %d processors activated "
+-		       "(%lu.%02lu BogoMIPS).\n",
+-		       cpucount + 1, bogosum / (500000 / HZ),
+-		       (bogosum / (5000 / HZ)) % 100);
+-	}
+-	voyager_extended_cpus = hweight32(voyager_extended_vic_processors);
+-	printk("VOYAGER: Extended (interrupt handling CPUs): "
+-	       "%d, non-extended: %d\n", voyager_extended_cpus,
+-	       num_booting_cpus() - voyager_extended_cpus);
+-	/* that's it, switch to symmetric mode */
+-	outb(0, VIC_PRIORITY_REGISTER);
+-	outb(0, VIC_CLAIM_REGISTER_0);
+-	outb(0, VIC_CLAIM_REGISTER_1);
+-
+-	VDEBUG(("VOYAGER SMP: Booted with %d CPUs\n", num_booting_cpus()));
+-}
+-
+-/* Reload the secondary CPUs task structure (this function does not
+- * return ) */
+-static void __init initialize_secondary(void)
+-{
+-#if 0
+-	// AC kernels only
+-	set_current(hard_get_current());
+-#endif
+-
+-	/*
+-	 * We don't actually need to load the full TSS,
+-	 * basically just the stack pointer and the eip.
+-	 */
+-
+-	asm volatile ("movl %0,%%esp\n\t"
+-		      "jmp *%1"::"r" (current->thread.sp),
+-		      "r"(current->thread.ip));
+-}
+-
+-/* handle a Voyager SYS_INT -- If we don't, the base board will
+- * panic the system.
+- *
+- * System interrupts occur because some problem was detected on the
+- * various busses.  To find out what you have to probe all the
+- * hardware via the CAT bus.  FIXME: At the moment we do nothing. */
+-void smp_vic_sys_interrupt(struct pt_regs *regs)
+-{
+-	ack_CPI(VIC_SYS_INT);
+-	printk("Voyager SYSTEM INTERRUPT\n");
+-}
+-
+-/* Handle a voyager CMN_INT; These interrupts occur either because of
+- * a system status change or because a single bit memory error
+- * occurred.  FIXME: At the moment, ignore all this. */
+-void smp_vic_cmn_interrupt(struct pt_regs *regs)
+-{
+-	static __u8 in_cmn_int = 0;
+-	static DEFINE_SPINLOCK(cmn_int_lock);
+-
+-	/* common ints are broadcast, so make sure we only do this once */
+-	_raw_spin_lock(&cmn_int_lock);
+-	if (in_cmn_int)
+-		goto unlock_end;
+-
+-	in_cmn_int++;
+-	_raw_spin_unlock(&cmn_int_lock);
+-
+-	VDEBUG(("Voyager COMMON INTERRUPT\n"));
+-
+-	if (voyager_level == 5)
+-		voyager_cat_do_common_interrupt();
+-
+-	_raw_spin_lock(&cmn_int_lock);
+-	in_cmn_int = 0;
+-      unlock_end:
+-	_raw_spin_unlock(&cmn_int_lock);
+-	ack_CPI(VIC_CMN_INT);
+-}
+-
+-/*
+- * Reschedule call back. Nothing to do, all the work is done
+- * automatically when we return from the interrupt.  */
+-static void smp_reschedule_interrupt(void)
+-{
+-	/* do nothing */
+-}
+-
+-static struct mm_struct *flush_mm;
+-static unsigned long flush_va;
+-static DEFINE_SPINLOCK(tlbstate_lock);
+-
+-/*
+- * We cannot call mmdrop() because we are in interrupt context,
+- * instead update mm->cpu_vm_mask.
+- *
+- * We need to reload %cr3 since the page tables may be going
+- * away from under us..
+- */
+-static inline void voyager_leave_mm(unsigned long cpu)
+-{
+-	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
+-		BUG();
+-	cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask);
+-	load_cr3(swapper_pg_dir);
+-}
+-
+-/*
+- * Invalidate call-back
+- */
+-static void smp_invalidate_interrupt(void)
+-{
+-	__u8 cpu = smp_processor_id();
+-
+-	if (!test_bit(cpu, &smp_invalidate_needed))
+-		return;
+-	/* This will flood messages.  Don't uncomment unless you see
+-	 * Problems with cross cpu invalidation
+-	 VDEBUG(("VOYAGER SMP: CPU%d received INVALIDATE_CPI\n",
+-	 smp_processor_id()));
+-	 */
+-
+-	if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
+-		if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
+-			if (flush_va == TLB_FLUSH_ALL)
+-				local_flush_tlb();
+-			else
+-				__flush_tlb_one(flush_va);
+-		} else
+-			voyager_leave_mm(cpu);
+-	}
+-	smp_mb__before_clear_bit();
+-	clear_bit(cpu, &smp_invalidate_needed);
+-	smp_mb__after_clear_bit();
+-}
+-
+-/* All the new flush operations for 2.4 */
+-
+-/* This routine is called with a physical cpu mask */
+-static void
+-voyager_flush_tlb_others(unsigned long cpumask, struct mm_struct *mm,
+-			 unsigned long va)
+-{
+-	int stuck = 50000;
+-
+-	if (!cpumask)
+-		BUG();
+-	if ((cpumask & cpus_addr(cpu_online_map)[0]) != cpumask)
+-		BUG();
+-	if (cpumask & (1 << smp_processor_id()))
+-		BUG();
+-	if (!mm)
+-		BUG();
+-
+-	spin_lock(&tlbstate_lock);
+-
+-	flush_mm = mm;
+-	flush_va = va;
+-	atomic_set_mask(cpumask, &smp_invalidate_needed);
+-	/*
+-	 * We have to send the CPI only to
+-	 * CPUs affected.
+-	 */
+-	send_CPI(cpumask, VIC_INVALIDATE_CPI);
+-
+-	while (smp_invalidate_needed) {
+-		mb();
+-		if (--stuck == 0) {
+-			printk("***WARNING*** Stuck doing invalidate CPI "
+-			       "(CPU%d)\n", smp_processor_id());
+-			break;
+-		}
+-	}
+-
+-	/* Uncomment only to debug invalidation problems
+-	   VDEBUG(("VOYAGER SMP: Completed invalidate CPI (CPU%d)\n", cpu));
+-	 */
+-
+-	flush_mm = NULL;
+-	flush_va = 0;
+-	spin_unlock(&tlbstate_lock);
+-}
+-
+-void flush_tlb_current_task(void)
+-{
+-	struct mm_struct *mm = current->mm;
+-	unsigned long cpu_mask;
+-
+-	preempt_disable();
+-
+-	cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
+-	local_flush_tlb();
+-	if (cpu_mask)
+-		voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-
+-	preempt_enable();
+-}
+-
+-void flush_tlb_mm(struct mm_struct *mm)
+-{
+-	unsigned long cpu_mask;
+-
+-	preempt_disable();
+-
+-	cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
+-
+-	if (current->active_mm == mm) {
+-		if (current->mm)
+-			local_flush_tlb();
+-		else
+-			voyager_leave_mm(smp_processor_id());
+-	}
+-	if (cpu_mask)
+-		voyager_flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
+-
+-	preempt_enable();
+-}
+-
+-void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
+-{
+-	struct mm_struct *mm = vma->vm_mm;
+-	unsigned long cpu_mask;
+-
+-	preempt_disable();
+-
+-	cpu_mask = cpus_addr(mm->cpu_vm_mask)[0] & ~(1 << smp_processor_id());
+-	if (current->active_mm == mm) {
+-		if (current->mm)
+-			__flush_tlb_one(va);
+-		else
+-			voyager_leave_mm(smp_processor_id());
+-	}
+-
+-	if (cpu_mask)
+-		voyager_flush_tlb_others(cpu_mask, mm, va);
+-
+-	preempt_enable();
+-}
+-
+-EXPORT_SYMBOL(flush_tlb_page);
+-
+-/* enable the requested IRQs */
+-static void smp_enable_irq_interrupt(void)
+-{
+-	__u8 irq;
+-	__u8 cpu = get_cpu();
+-
+-	VDEBUG(("VOYAGER SMP: CPU%d enabling irq mask 0x%x\n", cpu,
+-		vic_irq_enable_mask[cpu]));
+-
+-	spin_lock(&vic_irq_lock);
+-	for (irq = 0; irq < 16; irq++) {
+-		if (vic_irq_enable_mask[cpu] & (1 << irq))
+-			enable_local_vic_irq(irq);
+-	}
+-	vic_irq_enable_mask[cpu] = 0;
+-	spin_unlock(&vic_irq_lock);
+-
+-	put_cpu_no_resched();
+-}
+-
+-/*
+- *	CPU halt call-back
+- */
+-static void smp_stop_cpu_function(void *dummy)
+-{
+-	VDEBUG(("VOYAGER SMP: CPU%d is STOPPING\n", smp_processor_id()));
+-	cpu_clear(smp_processor_id(), cpu_online_map);
+-	local_irq_disable();
+-	for (;;)
+-		halt();
+-}
+-
+-/* execute a thread on a new CPU.  The function to be called must be
+- * previously set up.  This is used to schedule a function for
+- * execution on all CPUs - set up the function then broadcast a
+- * function_interrupt CPI to come here on each CPU */
+-static void smp_call_function_interrupt(void)
+-{
+-	irq_enter();
+-	generic_smp_call_function_interrupt();
+-	__get_cpu_var(irq_stat).irq_call_count++;
+-	irq_exit();
+-}
+-
+-static void smp_call_function_single_interrupt(void)
+-{
+-	irq_enter();
+-	generic_smp_call_function_single_interrupt();
+-	__get_cpu_var(irq_stat).irq_call_count++;
+-	irq_exit();
+-}
+-
+-/* Sorry about the name.  In an APIC based system, the APICs
+- * themselves are programmed to send a timer interrupt.  This is used
+- * by linux to reschedule the processor.  Voyager doesn't have this,
+- * so we use the system clock to interrupt one processor, which in
+- * turn, broadcasts a timer CPI to all the others --- we receive that
+- * CPI here.  We don't use this actually for counting so losing
+- * ticks doesn't matter
+- *
+- * FIXME: For those CPUs which actually have a local APIC, we could
+- * try to use it to trigger this interrupt instead of having to
+- * broadcast the timer tick.  Unfortunately, all my pentium DYADs have
+- * no local APIC, so I can't do this
+- *
+- * This function is currently a placeholder and is unused in the code */
+-void smp_apic_timer_interrupt(struct pt_regs *regs)
+-{
+-	struct pt_regs *old_regs = set_irq_regs(regs);
+-	wrapper_smp_local_timer_interrupt();
+-	set_irq_regs(old_regs);
+-}
+-
+-/* All of the QUAD interrupt GATES */
+-void smp_qic_timer_interrupt(struct pt_regs *regs)
+-{
+-	struct pt_regs *old_regs = set_irq_regs(regs);
+-	ack_QIC_CPI(QIC_TIMER_CPI);
+-	wrapper_smp_local_timer_interrupt();
+-	set_irq_regs(old_regs);
+-}
+-
+-void smp_qic_invalidate_interrupt(struct pt_regs *regs)
+-{
+-	ack_QIC_CPI(QIC_INVALIDATE_CPI);
+-	smp_invalidate_interrupt();
+-}
+-
+-void smp_qic_reschedule_interrupt(struct pt_regs *regs)
+-{
+-	ack_QIC_CPI(QIC_RESCHEDULE_CPI);
+-	smp_reschedule_interrupt();
+-}
+-
+-void smp_qic_enable_irq_interrupt(struct pt_regs *regs)
+-{
+-	ack_QIC_CPI(QIC_ENABLE_IRQ_CPI);
+-	smp_enable_irq_interrupt();
+-}
+-
+-void smp_qic_call_function_interrupt(struct pt_regs *regs)
+-{
+-	ack_QIC_CPI(QIC_CALL_FUNCTION_CPI);
+-	smp_call_function_interrupt();
+-}
+-
+-void smp_qic_call_function_single_interrupt(struct pt_regs *regs)
+-{
+-	ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI);
+-	smp_call_function_single_interrupt();
+-}
+-
+-void smp_vic_cpi_interrupt(struct pt_regs *regs)
+-{
+-	struct pt_regs *old_regs = set_irq_regs(regs);
+-	__u8 cpu = smp_processor_id();
+-
+-	if (is_cpu_quad())
+-		ack_QIC_CPI(VIC_CPI_LEVEL0);
+-	else
+-		ack_VIC_CPI(VIC_CPI_LEVEL0);
+-
+-	if (test_and_clear_bit(VIC_TIMER_CPI, &vic_cpi_mailbox[cpu]))
+-		wrapper_smp_local_timer_interrupt();
+-	if (test_and_clear_bit(VIC_INVALIDATE_CPI, &vic_cpi_mailbox[cpu]))
+-		smp_invalidate_interrupt();
+-	if (test_and_clear_bit(VIC_RESCHEDULE_CPI, &vic_cpi_mailbox[cpu]))
+-		smp_reschedule_interrupt();
+-	if (test_and_clear_bit(VIC_ENABLE_IRQ_CPI, &vic_cpi_mailbox[cpu]))
+-		smp_enable_irq_interrupt();
+-	if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu]))
+-		smp_call_function_interrupt();
+-	if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu]))
+-		smp_call_function_single_interrupt();
+-	set_irq_regs(old_regs);
+-}
+-
+-static void do_flush_tlb_all(void *info)
+-{
+-	unsigned long cpu = smp_processor_id();
+-
+-	__flush_tlb_all();
+-	if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY)
+-		voyager_leave_mm(cpu);
+-}
+-
+-/* flush the TLB of every active CPU in the system */
+-void flush_tlb_all(void)
+-{
+-	on_each_cpu(do_flush_tlb_all, 0, 1);
+-}
+-
+-/* send a reschedule CPI to one CPU by physical CPU number*/
+-static void voyager_smp_send_reschedule(int cpu)
+-{
+-	send_one_CPI(cpu, VIC_RESCHEDULE_CPI);
+-}
+-
+-int hard_smp_processor_id(void)
+-{
+-	__u8 i;
+-	__u8 cpumask = inb(VIC_PROC_WHO_AM_I);
+-	if ((cpumask & QUAD_IDENTIFIER) == QUAD_IDENTIFIER)
+-		return cpumask & 0x1F;
+-
+-	for (i = 0; i < 8; i++) {
+-		if (cpumask & (1 << i))
+-			return i;
+-	}
+-	printk("** WARNING ** Illegal cpuid returned by VIC: %d", cpumask);
+-	return 0;
+-}
+-
+-int safe_smp_processor_id(void)
+-{
+-	return hard_smp_processor_id();
+-}
+-
+-/* broadcast a halt to all other CPUs */
+-static void voyager_smp_send_stop(void)
+-{
+-	smp_call_function(smp_stop_cpu_function, NULL, 1);
+-}
+-
+-/* this function is triggered in time.c when a clock tick fires
+- * we need to re-broadcast the tick to all CPUs */
+-void smp_vic_timer_interrupt(void)
+-{
+-	send_CPI_allbutself(VIC_TIMER_CPI);
+-	smp_local_timer_interrupt();
+-}
+-
+-/* local (per CPU) timer interrupt.  It does both profiling and
+- * process statistics/rescheduling.
+- *
+- * We do profiling in every local tick, statistics/rescheduling
+- * happen only every 'profiling multiplier' ticks. The default
+- * multiplier is 1 and it can be changed by writing the new multiplier
+- * value into /proc/profile.
+- */
+-void smp_local_timer_interrupt(void)
+-{
+-	int cpu = smp_processor_id();
+-	long weight;
+-
+-	profile_tick(CPU_PROFILING);
+-	if (--per_cpu(prof_counter, cpu) <= 0) {
+-		/*
+-		 * The multiplier may have changed since the last time we got
+-		 * to this point as a result of the user writing to
+-		 * /proc/profile. In this case we need to adjust the APIC
+-		 * timer accordingly.
+-		 *
+-		 * Interrupts are already masked off at this point.
+-		 */
+-		per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu);
+-		if (per_cpu(prof_counter, cpu) !=
+-		    per_cpu(prof_old_multiplier, cpu)) {
+-			/* FIXME: need to update the vic timer tick here */
+-			per_cpu(prof_old_multiplier, cpu) =
+-			    per_cpu(prof_counter, cpu);
+-		}
+-
+-		update_process_times(user_mode_vm(get_irq_regs()));
+-	}
+-
+-	if (((1 << cpu) & voyager_extended_vic_processors) == 0)
+-		/* only extended VIC processors participate in
+-		 * interrupt distribution */
+-		return;
+-
+-	/*
+-	 * We take the 'long' return path, and there every subsystem
+-	 * grabs the appropriate locks (kernel lock/ irq lock).
+-	 *
+-	 * we might want to decouple profiling from the 'long path',
+-	 * and do the profiling totally in assembly.
+-	 *
+-	 * Currently this isn't too much of an issue (performance wise),
+-	 * we can take more than 100K local irqs per second on a 100 MHz P5.
+-	 */
+-
+-	if ((++vic_tick[cpu] & 0x7) != 0)
+-		return;
+-	/* get here every 16 ticks (about every 1/6 of a second) */
+-
+-	/* Change our priority to give someone else a chance at getting
+-	 * the IRQ. The algorithm goes like this:
+-	 *
+-	 * In the VIC, the dynamically routed interrupt is always
+-	 * handled by the lowest priority eligible (i.e. receiving
+-	 * interrupts) CPU.  If >1 eligible CPUs are equal lowest, the
+-	 * lowest processor number gets it.
+-	 *
+-	 * The priority of a CPU is controlled by a special per-CPU
+-	 * VIC priority register which is 3 bits wide 0 being lowest
+-	 * and 7 highest priority..
+-	 *
+-	 * Therefore we subtract the average number of interrupts from
+-	 * the number we've fielded.  If this number is negative, we
+-	 * lower the activity count and if it is positive, we raise
+-	 * it.
+-	 *
+-	 * I'm afraid this still leads to odd looking interrupt counts:
+-	 * the totals are all roughly equal, but the individual ones
+-	 * look rather skewed.
+-	 *
+-	 * FIXME: This algorithm is total crap when mixed with SMP
+-	 * affinity code since we now try to even up the interrupt
+-	 * counts when an affinity binding is keeping them on a
+-	 * particular CPU*/
+-	weight = (vic_intr_count[cpu] * voyager_extended_cpus
+-		  - vic_intr_total) >> 4;
+-	weight += 4;
+-	if (weight > 7)
+-		weight = 7;
+-	if (weight < 0)
+-		weight = 0;
+-
+-	outb((__u8) weight, VIC_PRIORITY_REGISTER);
+-
+-#ifdef VOYAGER_DEBUG
+-	if ((vic_tick[cpu] & 0xFFF) == 0) {
+-		/* print this message roughly every 25 secs */
+-		printk("VOYAGER SMP: vic_tick[%d] = %lu, weight = %ld\n",
+-		       cpu, vic_tick[cpu], weight);
+-	}
+-#endif
+-}
+-
+-/* setup the profiling timer */
+-int setup_profiling_timer(unsigned int multiplier)
+-{
+-	int i;
+-
+-	if ((!multiplier))
+-		return -EINVAL;
+-
+-	/*
+-	 * Set the new multiplier for each CPU. CPUs don't start using the
+-	 * new values until the next timer interrupt in which they do process
+-	 * accounting.
+-	 */
+-	for (i = 0; i < nr_cpu_ids; ++i)
+-		per_cpu(prof_multiplier, i) = multiplier;
+-
+-	return 0;
+-}
+-
+-/* This is a bit of a mess, but forced on us by the genirq changes
+- * there's no genirq handler that really does what voyager wants
+- * so hack it up with the simple IRQ handler */
+-static void handle_vic_irq(unsigned int irq, struct irq_desc *desc)
+-{
+-	before_handle_vic_irq(irq);
+-	handle_simple_irq(irq, desc);
+-	after_handle_vic_irq(irq);
+-}
+-
+-/*  The CPIs are handled in the per cpu 8259s, so they must be
+- *  enabled to be received: FIX: enabling the CPIs in the early
+- *  boot sequence interferes with bug checking; enable them later
+- *  on in smp_init */
+-#define VIC_SET_GATE(cpi, vector) \
+-	set_intr_gate((cpi) + VIC_DEFAULT_CPI_BASE, (vector))
+-#define QIC_SET_GATE(cpi, vector) \
+-	set_intr_gate((cpi) + QIC_DEFAULT_CPI_BASE, (vector))
+-
+-void __init voyager_smp_intr_init(void)
+-{
+-	int i;
+-
+-	/* initialize the per cpu irq mask to all disabled */
+-	for (i = 0; i < nr_cpu_ids; i++)
+-		vic_irq_mask[i] = 0xFFFF;
+-
+-	VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt);
+-
+-	VIC_SET_GATE(VIC_SYS_INT, vic_sys_interrupt);
+-	VIC_SET_GATE(VIC_CMN_INT, vic_cmn_interrupt);
+-
+-	QIC_SET_GATE(QIC_TIMER_CPI, qic_timer_interrupt);
+-	QIC_SET_GATE(QIC_INVALIDATE_CPI, qic_invalidate_interrupt);
+-	QIC_SET_GATE(QIC_RESCHEDULE_CPI, qic_reschedule_interrupt);
+-	QIC_SET_GATE(QIC_ENABLE_IRQ_CPI, qic_enable_irq_interrupt);
+-	QIC_SET_GATE(QIC_CALL_FUNCTION_CPI, qic_call_function_interrupt);
+-
+-	/* now put the VIC descriptor into the first 48 IRQs
+-	 *
+-	 * This is for later: first 16 correspond to PC IRQs; next 16
+-	 * are Primary MC IRQs and final 16 are Secondary MC IRQs */
+-	for (i = 0; i < 48; i++)
+-		set_irq_chip_and_handler(i, &vic_chip, handle_vic_irq);
+-}
+-
+-/* send a CPI at level cpi to a set of cpus in cpuset (set 1 bit per
+- * processor to receive CPI */
+-static void send_CPI(__u32 cpuset, __u8 cpi)
+-{
+-	int cpu;
+-	__u32 quad_cpuset = (cpuset & voyager_quad_processors);
+-
+-	if (cpi < VIC_START_FAKE_CPI) {
+-		/* fake CPI are only used for booting, so send to the
+-		 * extended quads as well---Quads must be VIC booted */
+-		outb((__u8) (cpuset), VIC_CPI_Registers[cpi]);
+-		return;
+-	}
+-	if (quad_cpuset)
+-		send_QIC_CPI(quad_cpuset, cpi);
+-	cpuset &= ~quad_cpuset;
+-	cpuset &= 0xff;		/* only first 8 CPUs vaild for VIC CPI */
+-	if (cpuset == 0)
+-		return;
+-	for_each_online_cpu(cpu) {
+-		if (cpuset & (1 << cpu))
+-			set_bit(cpi, &vic_cpi_mailbox[cpu]);
+-	}
+-	if (cpuset)
+-		outb((__u8) cpuset, VIC_CPI_Registers[VIC_CPI_LEVEL0]);
+-}
+-
+-/* Acknowledge receipt of CPI in the QIC, clear in QIC hardware and
+- * set the cache line to shared by reading it.
+- *
+- * DON'T make this inline otherwise the cache line read will be
+- * optimised away
+- * */
+-static int ack_QIC_CPI(__u8 cpi)
+-{
+-	__u8 cpu = hard_smp_processor_id();
+-
+-	cpi &= 7;
+-
+-	outb(1 << cpi, QIC_INTERRUPT_CLEAR1);
+-	return voyager_quad_cpi_addr[cpu]->qic_cpi[cpi].cpi;
+-}
+-
+-static void ack_special_QIC_CPI(__u8 cpi)
+-{
+-	switch (cpi) {
+-	case VIC_CMN_INT:
+-		outb(QIC_CMN_INT, QIC_INTERRUPT_CLEAR0);
+-		break;
+-	case VIC_SYS_INT:
+-		outb(QIC_SYS_INT, QIC_INTERRUPT_CLEAR0);
+-		break;
+-	}
+-	/* also clear at the VIC, just in case (nop for non-extended proc) */
+-	ack_VIC_CPI(cpi);
+-}
+-
+-/* Acknowledge receipt of CPI in the VIC (essentially an EOI) */
+-static void ack_VIC_CPI(__u8 cpi)
+-{
+-#ifdef VOYAGER_DEBUG
+-	unsigned long flags;
+-	__u16 isr;
+-	__u8 cpu = smp_processor_id();
+-
+-	local_irq_save(flags);
+-	isr = vic_read_isr();
+-	if ((isr & (1 << (cpi & 7))) == 0) {
+-		printk("VOYAGER SMP: CPU%d lost CPI%d\n", cpu, cpi);
+-	}
+-#endif
+-	/* send specific EOI; the two system interrupts have
+-	 * bit 4 set for a separate vector but behave as the
+-	 * corresponding 3 bit intr */
+-	outb_p(0x60 | (cpi & 7), 0x20);
+-
+-#ifdef VOYAGER_DEBUG
+-	if ((vic_read_isr() & (1 << (cpi & 7))) != 0) {
+-		printk("VOYAGER SMP: CPU%d still asserting CPI%d\n", cpu, cpi);
+-	}
+-	local_irq_restore(flags);
+-#endif
+-}
+-
+-/* cribbed with thanks from irq.c */
+-#define __byte(x,y)	(((unsigned char *)&(y))[x])
+-#define cached_21(cpu)	(__byte(0,vic_irq_mask[cpu]))
+-#define cached_A1(cpu)	(__byte(1,vic_irq_mask[cpu]))
+-
+-static unsigned int startup_vic_irq(unsigned int irq)
+-{
+-	unmask_vic_irq(irq);
+-
+-	return 0;
+-}
+-
+-/* The enable and disable routines.  This is where we run into
+- * conflicting architectural philosophy.  Fundamentally, the voyager
+- * architecture does not expect to have to disable interrupts globally
+- * (the IRQ controllers belong to each CPU).  The processor masquerade
+- * which is used to start the system shouldn't be used in a running OS
+- * since it will cause great confusion if two separate CPUs drive to
+- * the same IRQ controller (I know, I've tried it).
+- *
+- * The solution is a variant on the NCR lazy SPL design:
+- *
+- * 1) To disable an interrupt, do nothing (other than set the
+- *    IRQ_DISABLED flag).  This dares the interrupt actually to arrive.
+- *
+- * 2) If the interrupt dares to come in, raise the local mask against
+- *    it (this will result in all the CPU masks being raised
+- *    eventually).
+- *
+- * 3) To enable the interrupt, lower the mask on the local CPU and
+- *    broadcast an Interrupt enable CPI which causes all other CPUs to
+- *    adjust their masks accordingly.  */
+-
+-static void unmask_vic_irq(unsigned int irq)
+-{
+-	/* linux doesn't to processor-irq affinity, so enable on
+-	 * all CPUs we know about */
+-	int cpu = smp_processor_id(), real_cpu;
+-	__u16 mask = (1 << irq);
+-	__u32 processorList = 0;
+-	unsigned long flags;
+-
+-	VDEBUG(("VOYAGER: unmask_vic_irq(%d) CPU%d affinity 0x%lx\n",
+-		irq, cpu, cpu_irq_affinity[cpu]));
+-	spin_lock_irqsave(&vic_irq_lock, flags);
+-	for_each_online_cpu(real_cpu) {
+-		if (!(voyager_extended_vic_processors & (1 << real_cpu)))
+-			continue;
+-		if (!(cpu_irq_affinity[real_cpu] & mask)) {
+-			/* irq has no affinity for this CPU, ignore */
+-			continue;
+-		}
+-		if (real_cpu == cpu) {
+-			enable_local_vic_irq(irq);
+-		} else if (vic_irq_mask[real_cpu] & mask) {
+-			vic_irq_enable_mask[real_cpu] |= mask;
+-			processorList |= (1 << real_cpu);
+-		}
+-	}
+-	spin_unlock_irqrestore(&vic_irq_lock, flags);
+-	if (processorList)
+-		send_CPI(processorList, VIC_ENABLE_IRQ_CPI);
+-}
+-
+-static void mask_vic_irq(unsigned int irq)
+-{
+-	/* lazy disable, do nothing */
+-}
+-
+-static void enable_local_vic_irq(unsigned int irq)
+-{
+-	__u8 cpu = smp_processor_id();
+-	__u16 mask = ~(1 << irq);
+-	__u16 old_mask = vic_irq_mask[cpu];
+-
+-	vic_irq_mask[cpu] &= mask;
+-	if (vic_irq_mask[cpu] == old_mask)
+-		return;
+-
+-	VDEBUG(("VOYAGER DEBUG: Enabling irq %d in hardware on CPU %d\n",
+-		irq, cpu));
+-
+-	if (irq & 8) {
+-		outb_p(cached_A1(cpu), 0xA1);
+-		(void)inb_p(0xA1);
+-	} else {
+-		outb_p(cached_21(cpu), 0x21);
+-		(void)inb_p(0x21);
+-	}
+-}
+-
+-static void disable_local_vic_irq(unsigned int irq)
+-{
+-	__u8 cpu = smp_processor_id();
+-	__u16 mask = (1 << irq);
+-	__u16 old_mask = vic_irq_mask[cpu];
+-
+-	if (irq == 7)
+-		return;
+-
+-	vic_irq_mask[cpu] |= mask;
+-	if (old_mask == vic_irq_mask[cpu])
+-		return;
+-
+-	VDEBUG(("VOYAGER DEBUG: Disabling irq %d in hardware on CPU %d\n",
+-		irq, cpu));
+-
+-	if (irq & 8) {
+-		outb_p(cached_A1(cpu), 0xA1);
+-		(void)inb_p(0xA1);
+-	} else {
+-		outb_p(cached_21(cpu), 0x21);
+-		(void)inb_p(0x21);
+-	}
+-}
+-
+-/* The VIC is level triggered, so the ack can only be issued after the
+- * interrupt completes.  However, we do Voyager lazy interrupt
+- * handling here: It is an extremely expensive operation to mask an
+- * interrupt in the vic, so we merely set a flag (IRQ_DISABLED).  If
+- * this interrupt actually comes in, then we mask and ack here to push
+- * the interrupt off to another CPU */
+-static void before_handle_vic_irq(unsigned int irq)
+-{
+-	irq_desc_t *desc = irq_to_desc(irq);
+-	__u8 cpu = smp_processor_id();
+-
+-	_raw_spin_lock(&vic_irq_lock);
+-	vic_intr_total++;
+-	vic_intr_count[cpu]++;
+-
+-	if (!(cpu_irq_affinity[cpu] & (1 << irq))) {
+-		/* The irq is not in our affinity mask, push it off
+-		 * onto another CPU */
+-		VDEBUG(("VOYAGER DEBUG: affinity triggered disable of irq %d "
+-			"on cpu %d\n", irq, cpu));
+-		disable_local_vic_irq(irq);
+-		/* set IRQ_INPROGRESS to prevent the handler in irq.c from
+-		 * actually calling the interrupt routine */
+-		desc->status |= IRQ_REPLAY | IRQ_INPROGRESS;
+-	} else if (desc->status & IRQ_DISABLED) {
+-		/* Damn, the interrupt actually arrived, do the lazy
+-		 * disable thing. The interrupt routine in irq.c will
+-		 * not handle a IRQ_DISABLED interrupt, so nothing more
+-		 * need be done here */
+-		VDEBUG(("VOYAGER DEBUG: lazy disable of irq %d on CPU %d\n",
+-			irq, cpu));
+-		disable_local_vic_irq(irq);
+-		desc->status |= IRQ_REPLAY;
+-	} else {
+-		desc->status &= ~IRQ_REPLAY;
+-	}
+-
+-	_raw_spin_unlock(&vic_irq_lock);
+-}
+-
+-/* Finish the VIC interrupt: basically mask */
+-static void after_handle_vic_irq(unsigned int irq)
+-{
+-	irq_desc_t *desc = irq_to_desc(irq);
+-
+-	_raw_spin_lock(&vic_irq_lock);
+-	{
+-		unsigned int status = desc->status & ~IRQ_INPROGRESS;
+-#ifdef VOYAGER_DEBUG
+-		__u16 isr;
+-#endif
+-
+-		desc->status = status;
+-		if ((status & IRQ_DISABLED))
+-			disable_local_vic_irq(irq);
+-#ifdef VOYAGER_DEBUG
+-		/* DEBUG: before we ack, check what's in progress */
+-		isr = vic_read_isr();
+-		if ((isr & (1 << irq) && !(status & IRQ_REPLAY)) == 0) {
+-			int i;
+-			__u8 cpu = smp_processor_id();
+-			__u8 real_cpu;
+-			int mask;	/* Um... initialize me??? --RR */
+-
+-			printk("VOYAGER SMP: CPU%d lost interrupt %d\n",
+-			       cpu, irq);
+-			for_each_possible_cpu(real_cpu, mask) {
+-
+-				outb(VIC_CPU_MASQUERADE_ENABLE | real_cpu,
+-				     VIC_PROCESSOR_ID);
+-				isr = vic_read_isr();
+-				if (isr & (1 << irq)) {
+-					printk
+-					    ("VOYAGER SMP: CPU%d ack irq %d\n",
+-					     real_cpu, irq);
+-					ack_vic_irq(irq);
+-				}
+-				outb(cpu, VIC_PROCESSOR_ID);
+-			}
+-		}
+-#endif /* VOYAGER_DEBUG */
+-		/* as soon as we ack, the interrupt is eligible for
+-		 * receipt by another CPU so everything must be in
+-		 * order here  */
+-		ack_vic_irq(irq);
+-		if (status & IRQ_REPLAY) {
+-			/* replay is set if we disable the interrupt
+-			 * in the before_handle_vic_irq() routine, so
+-			 * clear the in progress bit here to allow the
+-			 * next CPU to handle this correctly */
+-			desc->status &= ~(IRQ_REPLAY | IRQ_INPROGRESS);
+-		}
+-#ifdef VOYAGER_DEBUG
+-		isr = vic_read_isr();
+-		if ((isr & (1 << irq)) != 0)
+-			printk("VOYAGER SMP: after_handle_vic_irq() after "
+-			       "ack irq=%d, isr=0x%x\n", irq, isr);
+-#endif /* VOYAGER_DEBUG */
+-	}
+-	_raw_spin_unlock(&vic_irq_lock);
+-
+-	/* All code after this point is out of the main path - the IRQ
+-	 * may be intercepted by another CPU if reasserted */
+-}
+-
+-/* Linux processor - interrupt affinity manipulations.
+- *
+- * For each processor, we maintain a 32 bit irq affinity mask.
+- * Initially it is set to all 1's so every processor accepts every
+- * interrupt.  In this call, we change the processor's affinity mask:
+- *
+- * Change from enable to disable:
+- *
+- * If the interrupt ever comes in to the processor, we will disable it
+- * and ack it to push it off to another CPU, so just accept the mask here.
+- *
+- * Change from disable to enable:
+- *
+- * change the mask and then do an interrupt enable CPI to re-enable on
+- * the selected processors */
+-
+-void set_vic_irq_affinity(unsigned int irq, const struct cpumask *mask)
+-{
+-	/* Only extended processors handle interrupts */
+-	unsigned long real_mask;
+-	unsigned long irq_mask = 1 << irq;
+-	int cpu;
+-
+-	real_mask = cpus_addr(*mask)[0] & voyager_extended_vic_processors;
+-
+-	if (cpus_addr(*mask)[0] == 0)
+-		/* can't have no CPUs to accept the interrupt -- extremely
+-		 * bad things will happen */
+-		return;
+-
+-	if (irq == 0)
+-		/* can't change the affinity of the timer IRQ.  This
+-		 * is due to the constraint in the voyager
+-		 * architecture that the CPI also comes in on and IRQ
+-		 * line and we have chosen IRQ0 for this.  If you
+-		 * raise the mask on this interrupt, the processor
+-		 * will no-longer be able to accept VIC CPIs */
+-		return;
+-
+-	if (irq >= 32)
+-		/* You can only have 32 interrupts in a voyager system
+-		 * (and 32 only if you have a secondary microchannel
+-		 * bus) */
+-		return;
+-
+-	for_each_online_cpu(cpu) {
+-		unsigned long cpu_mask = 1 << cpu;
+-
+-		if (cpu_mask & real_mask) {
+-			/* enable the interrupt for this cpu */
+-			cpu_irq_affinity[cpu] |= irq_mask;
+-		} else {
+-			/* disable the interrupt for this cpu */
+-			cpu_irq_affinity[cpu] &= ~irq_mask;
+-		}
+-	}
+-	/* this is magic, we now have the correct affinity maps, so
+-	 * enable the interrupt.  This will send an enable CPI to
+-	 * those CPUs who need to enable it in their local masks,
+-	 * causing them to correct for the new affinity . If the
+-	 * interrupt is currently globally disabled, it will simply be
+-	 * disabled again as it comes in (voyager lazy disable).  If
+-	 * the affinity map is tightened to disable the interrupt on a
+-	 * cpu, it will be pushed off when it comes in */
+-	unmask_vic_irq(irq);
+-}
+-
+-static void ack_vic_irq(unsigned int irq)
+-{
+-	if (irq & 8) {
+-		outb(0x62, 0x20);	/* Specific EOI to cascade */
+-		outb(0x60 | (irq & 7), 0xA0);
+-	} else {
+-		outb(0x60 | (irq & 7), 0x20);
+-	}
+-}
+-
+-/* enable the CPIs.  In the VIC, the CPIs are delivered by the 8259
+- * but are not vectored by it.  This means that the 8259 mask must be
+- * lowered to receive them */
+-static __init void vic_enable_cpi(void)
+-{
+-	__u8 cpu = smp_processor_id();
+-
+-	/* just take a copy of the current mask (nop for boot cpu) */
+-	vic_irq_mask[cpu] = vic_irq_mask[boot_cpu_id];
+-
+-	enable_local_vic_irq(VIC_CPI_LEVEL0);
+-	enable_local_vic_irq(VIC_CPI_LEVEL1);
+-	/* for sys int and cmn int */
+-	enable_local_vic_irq(7);
+-
+-	if (is_cpu_quad()) {
+-		outb(QIC_DEFAULT_MASK0, QIC_MASK_REGISTER0);
+-		outb(QIC_CPI_ENABLE, QIC_MASK_REGISTER1);
+-		VDEBUG(("VOYAGER SMP: QIC ENABLE CPI: CPU%d: MASK 0x%x\n",
+-			cpu, QIC_CPI_ENABLE));
+-	}
+-
+-	VDEBUG(("VOYAGER SMP: ENABLE CPI: CPU%d: MASK 0x%x\n",
+-		cpu, vic_irq_mask[cpu]));
+-}
+-
+-void voyager_smp_dump()
+-{
+-	int old_cpu = smp_processor_id(), cpu;
+-
+-	/* dump the interrupt masks of each processor */
+-	for_each_online_cpu(cpu) {
+-		__u16 imr, isr, irr;
+-		unsigned long flags;
+-
+-		local_irq_save(flags);
+-		outb(VIC_CPU_MASQUERADE_ENABLE | cpu, VIC_PROCESSOR_ID);
+-		imr = (inb(0xa1) << 8) | inb(0x21);
+-		outb(0x0a, 0xa0);
+-		irr = inb(0xa0) << 8;
+-		outb(0x0a, 0x20);
+-		irr |= inb(0x20);
+-		outb(0x0b, 0xa0);
+-		isr = inb(0xa0) << 8;
+-		outb(0x0b, 0x20);
+-		isr |= inb(0x20);
+-		outb(old_cpu, VIC_PROCESSOR_ID);
+-		local_irq_restore(flags);
+-		printk("\tCPU%d: mask=0x%x, IMR=0x%x, IRR=0x%x, ISR=0x%x\n",
+-		       cpu, vic_irq_mask[cpu], imr, irr, isr);
+-#if 0
+-		/* These lines are put in to try to unstick an un ack'd irq */
+-		if (isr != 0) {
+-			int irq;
+-			for (irq = 0; irq < 16; irq++) {
+-				if (isr & (1 << irq)) {
+-					printk("\tCPU%d: ack irq %d\n",
+-					       cpu, irq);
+-					local_irq_save(flags);
+-					outb(VIC_CPU_MASQUERADE_ENABLE | cpu,
+-					     VIC_PROCESSOR_ID);
+-					ack_vic_irq(irq);
+-					outb(old_cpu, VIC_PROCESSOR_ID);
+-					local_irq_restore(flags);
+-				}
+-			}
+-		}
+-#endif
+-	}
+-}
+-
+-void smp_voyager_power_off(void *dummy)
+-{
+-	if (smp_processor_id() == boot_cpu_id)
+-		voyager_power_off();
+-	else
+-		smp_stop_cpu_function(NULL);
+-}
+-
+-static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
+-{
+-	/* FIXME: ignore max_cpus for now */
+-	smp_boot_cpus();
+-}
+-
+-static void __cpuinit voyager_smp_prepare_boot_cpu(void)
+-{
+-	init_gdt(smp_processor_id());
+-	switch_to_new_gdt();
+-
+-	cpu_online_map = cpumask_of_cpu(smp_processor_id());
+-	cpu_callout_map = cpumask_of_cpu(smp_processor_id());
+-	cpu_callin_map = CPU_MASK_NONE;
+-	cpu_present_map = cpumask_of_cpu(smp_processor_id());
+-
+-}
+-
+-static int __cpuinit voyager_cpu_up(unsigned int cpu)
+-{
+-	/* This only works at boot for x86.  See "rewrite" above. */
+-	if (cpu_isset(cpu, smp_commenced_mask))
+-		return -ENOSYS;
+-
+-	/* In case one didn't come up */
+-	if (!cpu_isset(cpu, cpu_callin_map))
+-		return -EIO;
+-	/* Unleash the CPU! */
+-	cpu_set(cpu, smp_commenced_mask);
+-	while (!cpu_online(cpu))
+-		mb();
+-	return 0;
+-}
+-
+-static void __init voyager_smp_cpus_done(unsigned int max_cpus)
+-{
+-	zap_low_mappings();
+-}
+-
+-void __init smp_setup_processor_id(void)
+-{
+-	current_thread_info()->cpu = hard_smp_processor_id();
+-	x86_write_percpu(cpu_number, hard_smp_processor_id());
+-}
+-
+-static void voyager_send_call_func(const struct cpumask *callmask)
+-{
+-	__u32 mask = cpus_addr(*callmask)[0] & ~(1 << smp_processor_id());
+-	send_CPI(mask, VIC_CALL_FUNCTION_CPI);
+-}
+-
+-static void voyager_send_call_func_single(int cpu)
+-{
+-	send_CPI(1 << cpu, VIC_CALL_FUNCTION_SINGLE_CPI);
+-}
+-
+-struct smp_ops smp_ops = {
+-	.smp_prepare_boot_cpu = voyager_smp_prepare_boot_cpu,
+-	.smp_prepare_cpus = voyager_smp_prepare_cpus,
+-	.cpu_up = voyager_cpu_up,
+-	.smp_cpus_done = voyager_smp_cpus_done,
+-
+-	.smp_send_stop = voyager_smp_send_stop,
+-	.smp_send_reschedule = voyager_smp_send_reschedule,
+-
+-	.send_call_func_ipi = voyager_send_call_func,
+-	.send_call_func_single_ipi = voyager_send_call_func_single,
+-};
+Index: linux-2.6-tip/arch/x86/mach-voyager/voyager_thread.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mach-voyager/voyager_thread.c
++++ /dev/null
+@@ -1,128 +0,0 @@
+-/* -*- mode: c; c-basic-offset: 8 -*- */
+-
+-/* Copyright (C) 2001
+- *
+- * Author: J.E.J.Bottomley@HansenPartnership.com
+- *
+- * This module provides the machine status monitor thread for the
+- * voyager architecture.  This allows us to monitor the machine
+- * environment (temp, voltage, fan function) and the front panel and
+- * internal UPS.  If a fault is detected, this thread takes corrective
+- * action (usually just informing init)
+- * */
+-
+-#include <linux/module.h>
+-#include <linux/mm.h>
+-#include <linux/kernel_stat.h>
+-#include <linux/delay.h>
+-#include <linux/mc146818rtc.h>
+-#include <linux/init.h>
+-#include <linux/bootmem.h>
+-#include <linux/kmod.h>
+-#include <linux/completion.h>
+-#include <linux/sched.h>
+-#include <linux/kthread.h>
+-#include <asm/desc.h>
+-#include <asm/voyager.h>
+-#include <asm/vic.h>
+-#include <asm/mtrr.h>
+-#include <asm/msr.h>
+-
+-struct task_struct *voyager_thread;
+-static __u8 set_timeout;
+-
+-static int execute(const char *string)
+-{
+-	int ret;
+-
+-	char *envp[] = {
+-		"HOME=/",
+-		"TERM=linux",
+-		"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
+-		NULL,
+-	};
+-	char *argv[] = {
+-		"/bin/bash",
+-		"-c",
+-		(char *)string,
+-		NULL,
+-	};
+-
+-	if ((ret =
+-	     call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC)) != 0) {
+-		printk(KERN_ERR "Voyager failed to run \"%s\": %i\n", string,
+-		       ret);
+-	}
+-	return ret;
+-}
+-
+-static void check_from_kernel(void)
+-{
+-	if (voyager_status.switch_off) {
+-
+-		/* FIXME: This should be configurable via proc */
+-		execute("umask 600; echo 0 > /etc/initrunlvl; kill -HUP 1");
+-	} else if (voyager_status.power_fail) {
+-		VDEBUG(("Voyager daemon detected AC power failure\n"));
+-
+-		/* FIXME: This should be configureable via proc */
+-		execute("umask 600; echo F > /etc/powerstatus; kill -PWR 1");
+-		set_timeout = 1;
+-	}
+-}
+-
+-static void check_continuing_condition(void)
+-{
+-	if (voyager_status.power_fail) {
+-		__u8 data;
+-		voyager_cat_psi(VOYAGER_PSI_SUBREAD,
+-				VOYAGER_PSI_AC_FAIL_REG, &data);
+-		if ((data & 0x1f) == 0) {
+-			/* all power restored */
+-			printk(KERN_NOTICE
+-			       "VOYAGER AC power restored, cancelling shutdown\n");
+-			/* FIXME: should be user configureable */
+-			execute
+-			    ("umask 600; echo O > /etc/powerstatus; kill -PWR 1");
+-			set_timeout = 0;
+-		}
+-	}
+-}
+-
+-static int thread(void *unused)
+-{
+-	printk(KERN_NOTICE "Voyager starting monitor thread\n");
+-
+-	for (;;) {
+-		set_current_state(TASK_INTERRUPTIBLE);
+-		schedule_timeout(set_timeout ? HZ : MAX_SCHEDULE_TIMEOUT);
+-
+-		VDEBUG(("Voyager Daemon awoken\n"));
+-		if (voyager_status.request_from_kernel == 0) {
+-			/* probably awoken from timeout */
+-			check_continuing_condition();
+-		} else {
+-			check_from_kernel();
+-			voyager_status.request_from_kernel = 0;
+-		}
+-	}
+-}
+-
+-static int __init voyager_thread_start(void)
+-{
+-	voyager_thread = kthread_run(thread, NULL, "kvoyagerd");
+-	if (IS_ERR(voyager_thread)) {
+-		printk(KERN_ERR
+-		       "Voyager: Failed to create system monitor thread.\n");
+-		return PTR_ERR(voyager_thread);
+-	}
+-	return 0;
+-}
+-
+-static void __exit voyager_thread_stop(void)
+-{
+-	kthread_stop(voyager_thread);
+-}
+-
+-module_init(voyager_thread_start);
+-module_exit(voyager_thread_stop);
+Index: linux-2.6-tip/arch/x86/math-emu/get_address.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/math-emu/get_address.c
++++ linux-2.6-tip/arch/x86/math-emu/get_address.c
+@@ -150,11 +150,9 @@ static long pm_address(u_char FPU_modrm,
+ #endif /* PARANOID */
+ 
+ 	switch (segment) {
+-		/* gs isn't used by the kernel, so it still has its
+-		   user-space value. */
+ 	case PREFIX_GS_ - 1:
+-		/* N.B. - movl %seg, mem is a 2 byte write regardless of prefix */
+-		savesegment(gs, addr->selector);
++		/* user gs handling can be lazy, use special accessors */
++		addr->selector = get_user_gs(FPU_info->regs);
+ 		break;
+ 	default:
+ 		addr->selector = PM_REG_(segment);
+Index: linux-2.6-tip/arch/x86/mm/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/Makefile
++++ linux-2.6-tip/arch/x86/mm/Makefile
+@@ -1,6 +1,8 @@
+-obj-y	:=  init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
++obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
+ 	    pat.o pgtable.o gup.o
+ 
++obj-$(CONFIG_SMP)		+= tlb.o
++
+ obj-$(CONFIG_X86_32)		+= pgtable_32.o iomap_32.o
+ 
+ obj-$(CONFIG_HUGETLB_PAGE)	+= hugetlbpage.o
+@@ -8,11 +10,13 @@ obj-$(CONFIG_X86_PTDUMP)	+= dump_pagetab
+ 
+ obj-$(CONFIG_HIGHMEM)		+= highmem_32.o
+ 
++obj-$(CONFIG_KMEMCHECK)		+= kmemcheck/
++
+ obj-$(CONFIG_MMIOTRACE)		+= mmiotrace.o
+ mmiotrace-y			:= kmmio.o pf_in.o mmio-mod.o
+ obj-$(CONFIG_MMIOTRACE_TEST)	+= testmmiotrace.o
+ 
+-obj-$(CONFIG_NUMA)		+= numa_$(BITS).o
++obj-$(CONFIG_NUMA)		+= numa.o numa_$(BITS).o
+ obj-$(CONFIG_K8_NUMA)		+= k8topology_64.o
+ obj-$(CONFIG_ACPI_NUMA)		+= srat_$(BITS).o
+ 
+Index: linux-2.6-tip/arch/x86/mm/extable.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/extable.c
++++ linux-2.6-tip/arch/x86/mm/extable.c
+@@ -23,6 +23,12 @@ int fixup_exception(struct pt_regs *regs
+ 
+ 	fixup = search_exception_tables(regs->ip);
+ 	if (fixup) {
++		/* If fixup is less than 16, it means uaccess error */
++		if (fixup->fixup < 16) {
++			current_thread_info()->uaccess_err = -EFAULT;
++			regs->ip += fixup->fixup;
++			return 1;
++		}
+ 		regs->ip = fixup->fixup;
+ 		return 1;
+ 	}
+Index: linux-2.6-tip/arch/x86/mm/fault.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/fault.c
++++ linux-2.6-tip/arch/x86/mm/fault.c
+@@ -1,73 +1,81 @@
+ /*
+  *  Copyright (C) 1995  Linus Torvalds
+- *  Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
++ *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
++ *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
+  */
+-
+-#include <linux/signal.h>
+-#include <linux/sched.h>
+-#include <linux/kernel.h>
+-#include <linux/errno.h>
+-#include <linux/string.h>
+-#include <linux/types.h>
+-#include <linux/ptrace.h>
+-#include <linux/mmiotrace.h>
+-#include <linux/mman.h>
+-#include <linux/mm.h>
+-#include <linux/smp.h>
+ #include <linux/interrupt.h>
+-#include <linux/init.h>
+-#include <linux/tty.h>
+-#include <linux/vt_kern.h>		/* For unblank_screen() */
++#include <linux/mmiotrace.h>
++#include <linux/bootmem.h>
+ #include <linux/compiler.h>
+ #include <linux/highmem.h>
+-#include <linux/bootmem.h>		/* for max_low_pfn */
+-#include <linux/vmalloc.h>
+-#include <linux/module.h>
+ #include <linux/kprobes.h>
+ #include <linux/uaccess.h>
++#include <linux/vmalloc.h>
++#include <linux/vt_kern.h>
++#include <linux/signal.h>
++#include <linux/kernel.h>
++#include <linux/ptrace.h>
++#include <linux/string.h>
++#include <linux/module.h>
+ #include <linux/kdebug.h>
++#include <linux/errno.h>
++#include <linux/magic.h>
++#include <linux/sched.h>
++#include <linux/types.h>
++#include <linux/init.h>
++#include <linux/mman.h>
++#include <linux/tty.h>
++#include <linux/smp.h>
++#include <linux/mm.h>
++#include <linux/perf_counter.h>
+ 
+-#include <asm/system.h>
+-#include <asm/desc.h>
+-#include <asm/segment.h>
+-#include <asm/pgalloc.h>
+-#include <asm/smp.h>
++#include <asm-generic/sections.h>
++
++#include <asm/kmemcheck.h>
+ #include <asm/tlbflush.h>
++#include <asm/pgalloc.h>
++#include <asm/segment.h>
++#include <asm/system.h>
+ #include <asm/proto.h>
+-#include <asm-generic/sections.h>
+ #include <asm/traps.h>
++#include <asm/desc.h>
+ 
+ /*
+- * Page fault error code bits
+- *	bit 0 == 0 means no page found, 1 means protection fault
+- *	bit 1 == 0 means read, 1 means write
+- *	bit 2 == 0 means kernel, 1 means user-mode
+- *	bit 3 == 1 means use of reserved bit detected
+- *	bit 4 == 1 means fault was an instruction fetch
+- */
+-#define PF_PROT		(1<<0)
+-#define PF_WRITE	(1<<1)
+-#define PF_USER		(1<<2)
+-#define PF_RSVD		(1<<3)
+-#define PF_INSTR	(1<<4)
++ * Page fault error code bits:
++ *
++ *   bit 0 ==	 0: no page found	1: protection fault
++ *   bit 1 ==	 0: read access		1: write access
++ *   bit 2 ==	 0: kernel-mode access	1: user-mode access
++ *   bit 3 ==				1: use of reserved bit detected
++ *   bit 4 ==				1: fault was an instruction fetch
++ */
++enum x86_pf_error_code {
++
++	PF_PROT		=		1 << 0,
++	PF_WRITE	=		1 << 1,
++	PF_USER		=		1 << 2,
++	PF_RSVD		=		1 << 3,
++	PF_INSTR	=		1 << 4,
++};
+ 
++/*
++ * Returns 0 if mmiotrace is disabled, or if the fault is not
++ * handled by mmiotrace:
++ */
+ static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+ {
+-#ifdef CONFIG_MMIOTRACE
+ 	if (unlikely(is_kmmio_active()))
+ 		if (kmmio_handler(regs, addr) == 1)
+ 			return -1;
+-#endif
+ 	return 0;
+ }
+ 
+ static inline int notify_page_fault(struct pt_regs *regs)
+ {
+-#ifdef CONFIG_KPROBES
+ 	int ret = 0;
+ 
+ 	/* kprobe_running() needs smp_processor_id() */
+-	if (!user_mode_vm(regs)) {
++	if (kprobes_built_in() && !user_mode_vm(regs)) {
+ 		preempt_disable();
+ 		if (kprobe_running() && kprobe_fault_handler(regs, 14))
+ 			ret = 1;
+@@ -75,29 +83,76 @@ static inline int notify_page_fault(stru
+ 	}
+ 
+ 	return ret;
+-#else
+-	return 0;
+-#endif
+ }
+ 
+ /*
+- * X86_32
+- * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
+- * Check that here and ignore it.
+- *
+- * X86_64
+- * Sometimes the CPU reports invalid exceptions on prefetch.
+- * Check that here and ignore it.
++ * Prefetch quirks:
++ *
++ * 32-bit mode:
++ *
++ *   Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
++ *   Check that here and ignore it.
++ *
++ * 64-bit mode:
++ *
++ *   Sometimes the CPU reports invalid exceptions on prefetch.
++ *   Check that here and ignore it.
+  *
+- * Opcode checker based on code by Richard Brunner
++ * Opcode checker based on code by Richard Brunner.
+  */
+-static int is_prefetch(struct pt_regs *regs, unsigned long addr,
+-		       unsigned long error_code)
++static inline int
++check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
++		      unsigned char opcode, int *prefetch)
+ {
++	unsigned char instr_hi = opcode & 0xf0;
++	unsigned char instr_lo = opcode & 0x0f;
++
++	switch (instr_hi) {
++	case 0x20:
++	case 0x30:
++		/*
++		 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
++		 * In X86_64 long mode, the CPU will signal invalid
++		 * opcode if some of these prefixes are present so
++		 * X86_64 will never get here anyway
++		 */
++		return ((instr_lo & 7) == 0x6);
++#ifdef CONFIG_X86_64
++	case 0x40:
++		/*
++		 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
++		 * Need to figure out under what instruction mode the
++		 * instruction was issued. Could check the LDT for lm,
++		 * but for now it's good enough to assume that long
++		 * mode only uses well known segments or kernel.
++		 */
++		return (!user_mode(regs)) || (regs->cs == __USER_CS);
++#endif
++	case 0x60:
++		/* 0x64 thru 0x67 are valid prefixes in all modes. */
++		return (instr_lo & 0xC) == 0x4;
++	case 0xF0:
++		/* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
++		return !instr_lo || (instr_lo>>1) == 1;
++	case 0x00:
++		/* Prefetch instruction is 0x0F0D or 0x0F18 */
++		if (probe_kernel_address(instr, opcode))
++			return 0;
++
++		*prefetch = (instr_lo == 0xF) &&
++			(opcode == 0x0D || opcode == 0x18);
++		return 0;
++	default:
++		return 0;
++	}
++}
++
++static int
++is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
++{
++	unsigned char *max_instr;
+ 	unsigned char *instr;
+-	int scan_more = 1;
+ 	int prefetch = 0;
+-	unsigned char *max_instr;
+ 
+ 	/*
+ 	 * If it was a exec (instruction fetch) fault on NX page, then
+@@ -106,106 +161,170 @@ static int is_prefetch(struct pt_regs *r
+ 	if (error_code & PF_INSTR)
+ 		return 0;
+ 
+-	instr = (unsigned char *)convert_ip_to_linear(current, regs);
++	instr = (void *)convert_ip_to_linear(current, regs);
+ 	max_instr = instr + 15;
+ 
+ 	if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+ 		return 0;
+ 
+-	while (scan_more && instr < max_instr) {
++	while (instr < max_instr) {
+ 		unsigned char opcode;
+-		unsigned char instr_hi;
+-		unsigned char instr_lo;
+ 
+ 		if (probe_kernel_address(instr, opcode))
+ 			break;
+ 
+-		instr_hi = opcode & 0xf0;
+-		instr_lo = opcode & 0x0f;
+ 		instr++;
+ 
+-		switch (instr_hi) {
+-		case 0x20:
+-		case 0x30:
+-			/*
+-			 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
+-			 * In X86_64 long mode, the CPU will signal invalid
+-			 * opcode if some of these prefixes are present so
+-			 * X86_64 will never get here anyway
+-			 */
+-			scan_more = ((instr_lo & 7) == 0x6);
+-			break;
+-#ifdef CONFIG_X86_64
+-		case 0x40:
+-			/*
+-			 * In AMD64 long mode 0x40..0x4F are valid REX prefixes
+-			 * Need to figure out under what instruction mode the
+-			 * instruction was issued. Could check the LDT for lm,
+-			 * but for now it's good enough to assume that long
+-			 * mode only uses well known segments or kernel.
+-			 */
+-			scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
++		if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
+ 			break;
+-#endif
+-		case 0x60:
+-			/* 0x64 thru 0x67 are valid prefixes in all modes. */
+-			scan_more = (instr_lo & 0xC) == 0x4;
+-			break;
+-		case 0xF0:
+-			/* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
+-			scan_more = !instr_lo || (instr_lo>>1) == 1;
+-			break;
+-		case 0x00:
+-			/* Prefetch instruction is 0x0F0D or 0x0F18 */
+-			scan_more = 0;
+-
+-			if (probe_kernel_address(instr, opcode))
+-				break;
+-			prefetch = (instr_lo == 0xF) &&
+-				(opcode == 0x0D || opcode == 0x18);
+-			break;
+-		default:
+-			scan_more = 0;
+-			break;
+-		}
+ 	}
+ 	return prefetch;
+ }
+ 
+-static void force_sig_info_fault(int si_signo, int si_code,
+-	unsigned long address, struct task_struct *tsk)
++static void
++force_sig_info_fault(int si_signo, int si_code, unsigned long address,
++		     struct task_struct *tsk)
+ {
+ 	siginfo_t info;
+ 
+-	info.si_signo = si_signo;
+-	info.si_errno = 0;
+-	info.si_code = si_code;
+-	info.si_addr = (void __user *)address;
++	info.si_signo	= si_signo;
++	info.si_errno	= 0;
++	info.si_code	= si_code;
++	info.si_addr	= (void __user *)address;
++
+ 	force_sig_info(si_signo, &info, tsk);
+ }
+ 
+-#ifdef CONFIG_X86_64
+-static int bad_address(void *p)
++DEFINE_SPINLOCK(pgd_lock);
++LIST_HEAD(pgd_list);
++
++#ifdef CONFIG_X86_32
++static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+ {
+-	unsigned long dummy;
+-	return probe_kernel_address((unsigned long *)p, dummy);
++	unsigned index = pgd_index(address);
++	pgd_t *pgd_k;
++	pud_t *pud, *pud_k;
++	pmd_t *pmd, *pmd_k;
++
++	pgd += index;
++	pgd_k = init_mm.pgd + index;
++
++	if (!pgd_present(*pgd_k))
++		return NULL;
++
++	/*
++	 * set_pgd(pgd, *pgd_k); here would be useless on PAE
++	 * and redundant with the set_pmd() on non-PAE. As would
++	 * set_pud.
++	 */
++	pud = pud_offset(pgd, address);
++	pud_k = pud_offset(pgd_k, address);
++	if (!pud_present(*pud_k))
++		return NULL;
++
++	pmd = pmd_offset(pud, address);
++	pmd_k = pmd_offset(pud_k, address);
++	if (!pmd_present(*pmd_k))
++		return NULL;
++
++	if (!pmd_present(*pmd)) {
++		set_pmd(pmd, *pmd_k);
++		arch_flush_lazy_mmu_mode();
++	} else {
++		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
++	}
++
++	return pmd_k;
++}
++
++void vmalloc_sync_all(void)
++{
++	unsigned long address;
++
++	if (SHARED_KERNEL_PMD)
++		return;
++
++	for (address = VMALLOC_START & PMD_MASK;
++	     address >= TASK_SIZE && address < FIXADDR_TOP;
++	     address += PMD_SIZE) {
++
++		unsigned long flags;
++		struct page *page;
++
++		spin_lock_irqsave(&pgd_lock, flags);
++		list_for_each_entry(page, &pgd_list, lru) {
++			if (!vmalloc_sync_one(page_address(page), address))
++				break;
++		}
++		spin_unlock_irqrestore(&pgd_lock, flags);
++	}
++}
++
++/*
++ * 32-bit:
++ *
++ *   Handle a fault on the vmalloc or module mapping area
++ */
++static noinline int vmalloc_fault(unsigned long address)
++{
++	unsigned long pgd_paddr;
++	pmd_t *pmd_k;
++	pte_t *pte_k;
++
++	/* Make sure we are in vmalloc area: */
++	if (!(address >= VMALLOC_START && address < VMALLOC_END))
++		return -1;
++
++	/*
++	 * Synchronize this task's top level page-table
++	 * with the 'reference' page table.
++	 *
++	 * Do _not_ use "current" here. We might be inside
++	 * an interrupt in the middle of a task switch..
++	 */
++	pgd_paddr = read_cr3();
++	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
++	if (!pmd_k)
++		return -1;
++
++	pte_k = pte_offset_kernel(pmd_k, address);
++	if (!pte_present(*pte_k))
++		return -1;
++
++	return 0;
++}
++
++/*
++ * Did it hit the DOS screen memory VA from vm86 mode?
++ */
++static inline void
++check_v8086_mode(struct pt_regs *regs, unsigned long address,
++		 struct task_struct *tsk)
++{
++	unsigned long bit;
++
++	if (!v8086_mode(regs))
++		return;
++
++	bit = (address - 0xA0000) >> PAGE_SHIFT;
++	if (bit < 32)
++		tsk->thread.screen_bitmap |= 1 << bit;
+ }
+-#endif
+ 
+ static void dump_pagetable(unsigned long address)
+ {
+-#ifdef CONFIG_X86_32
+ 	__typeof__(pte_val(__pte(0))) page;
+ 
+ 	page = read_cr3();
+ 	page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
++
+ #ifdef CONFIG_X86_PAE
+ 	printk("*pdpt = %016Lx ", page);
+ 	if ((page >> PAGE_SHIFT) < max_low_pfn
+ 	    && page & _PAGE_PRESENT) {
+ 		page &= PAGE_MASK;
+ 		page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
+-		                                         & (PTRS_PER_PMD - 1)];
++							& (PTRS_PER_PMD - 1)];
+ 		printk(KERN_CONT "*pde = %016Lx ", page);
+ 		page &= ~_PAGE_NX;
+ 	}
+@@ -217,19 +336,145 @@ static void dump_pagetable(unsigned long
+ 	 * We must not directly access the pte in the highpte
+ 	 * case if the page table is located in highmem.
+ 	 * And let's rather not kmap-atomic the pte, just in case
+-	 * it's allocated already.
++	 * it's allocated already:
+ 	 */
+ 	if ((page >> PAGE_SHIFT) < max_low_pfn
+ 	    && (page & _PAGE_PRESENT)
+ 	    && !(page & _PAGE_PSE)) {
++
+ 		page &= PAGE_MASK;
+ 		page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
+-		                                         & (PTRS_PER_PTE - 1)];
++							& (PTRS_PER_PTE - 1)];
+ 		printk("*pte = %0*Lx ", sizeof(page)*2, (u64)page);
+ 	}
+ 
+ 	printk("\n");
+-#else /* CONFIG_X86_64 */
++}
++
++#else /* CONFIG_X86_64: */
++
++void vmalloc_sync_all(void)
++{
++	unsigned long address;
++
++	for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
++	     address += PGDIR_SIZE) {
++
++		const pgd_t *pgd_ref = pgd_offset_k(address);
++		unsigned long flags;
++		struct page *page;
++
++		if (pgd_none(*pgd_ref))
++			continue;
++
++		spin_lock_irqsave(&pgd_lock, flags);
++		list_for_each_entry(page, &pgd_list, lru) {
++			pgd_t *pgd;
++			pgd = (pgd_t *)page_address(page) + pgd_index(address);
++			if (pgd_none(*pgd))
++				set_pgd(pgd, *pgd_ref);
++			else
++				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
++		}
++		spin_unlock_irqrestore(&pgd_lock, flags);
++	}
++}
++
++/*
++ * 64-bit:
++ *
++ *   Handle a fault on the vmalloc area
++ *
++ * This assumes no large pages in there.
++ */
++static noinline int vmalloc_fault(unsigned long address)
++{
++	pgd_t *pgd, *pgd_ref;
++	pud_t *pud, *pud_ref;
++	pmd_t *pmd, *pmd_ref;
++	pte_t *pte, *pte_ref;
++
++	/* Make sure we are in vmalloc area: */
++	if (!(address >= VMALLOC_START && address < VMALLOC_END))
++		return -1;
++
++	/*
++	 * Copy kernel mappings over when needed. This can also
++	 * happen within a race in page table update. In the later
++	 * case just flush:
++	 */
++	pgd = pgd_offset(current->active_mm, address);
++	pgd_ref = pgd_offset_k(address);
++	if (pgd_none(*pgd_ref))
++		return -1;
++
++	if (pgd_none(*pgd))
++		set_pgd(pgd, *pgd_ref);
++	else
++		BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
++
++	/*
++	 * Below here mismatches are bugs because these lower tables
++	 * are shared:
++	 */
++
++	pud = pud_offset(pgd, address);
++	pud_ref = pud_offset(pgd_ref, address);
++	if (pud_none(*pud_ref))
++		return -1;
++
++	if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
++		BUG();
++
++	pmd = pmd_offset(pud, address);
++	pmd_ref = pmd_offset(pud_ref, address);
++	if (pmd_none(*pmd_ref))
++		return -1;
++
++	if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
++		BUG();
++
++	pte_ref = pte_offset_kernel(pmd_ref, address);
++	if (!pte_present(*pte_ref))
++		return -1;
++
++	pte = pte_offset_kernel(pmd, address);
++
++	/*
++	 * Don't use pte_page here, because the mappings can point
++	 * outside mem_map, and the NUMA hash lookup cannot handle
++	 * that:
++	 */
++	if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
++		BUG();
++
++	return 0;
++}
++
++static const char errata93_warning[] =
++KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
++KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
++KERN_ERR "******* Please consider a BIOS update.\n"
++KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
++
++/*
++ * No vm86 mode in 64-bit mode:
++ */
++static inline void
++check_v8086_mode(struct pt_regs *regs, unsigned long address,
++		 struct task_struct *tsk)
++{
++}
++
++static int bad_address(void *p)
++{
++	unsigned long dummy;
++
++	return probe_kernel_address((unsigned long *)p, dummy);
++}
++
++static void dump_pagetable(unsigned long address)
++{
+ 	pgd_t *pgd;
+ 	pud_t *pud;
+ 	pmd_t *pmd;
+@@ -238,102 +483,77 @@ static void dump_pagetable(unsigned long
+ 	pgd = (pgd_t *)read_cr3();
+ 
+ 	pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
++
+ 	pgd += pgd_index(address);
+-	if (bad_address(pgd)) goto bad;
++	if (bad_address(pgd))
++		goto bad;
++
+ 	printk("PGD %lx ", pgd_val(*pgd));
+-	if (!pgd_present(*pgd)) goto ret;
++
++	if (!pgd_present(*pgd))
++		goto out;
+ 
+ 	pud = pud_offset(pgd, address);
+-	if (bad_address(pud)) goto bad;
++	if (bad_address(pud))
++		goto bad;
++
+ 	printk("PUD %lx ", pud_val(*pud));
+ 	if (!pud_present(*pud) || pud_large(*pud))
+-		goto ret;
++		goto out;
+ 
+ 	pmd = pmd_offset(pud, address);
+-	if (bad_address(pmd)) goto bad;
++	if (bad_address(pmd))
++		goto bad;
++
+ 	printk("PMD %lx ", pmd_val(*pmd));
+-	if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
++	if (!pmd_present(*pmd) || pmd_large(*pmd))
++		goto out;
+ 
+ 	pte = pte_offset_kernel(pmd, address);
+-	if (bad_address(pte)) goto bad;
++	if (bad_address(pte))
++		goto bad;
++
+ 	printk("PTE %lx", pte_val(*pte));
+-ret:
++out:
+ 	printk("\n");
+ 	return;
+ bad:
+ 	printk("BAD\n");
+-#endif
+ }
+ 
+-#ifdef CONFIG_X86_32
+-static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
+-{
+-	unsigned index = pgd_index(address);
+-	pgd_t *pgd_k;
+-	pud_t *pud, *pud_k;
+-	pmd_t *pmd, *pmd_k;
+-
+-	pgd += index;
+-	pgd_k = init_mm.pgd + index;
++#endif /* CONFIG_X86_64 */
+ 
+-	if (!pgd_present(*pgd_k))
+-		return NULL;
+-
+-	/*
+-	 * set_pgd(pgd, *pgd_k); here would be useless on PAE
+-	 * and redundant with the set_pmd() on non-PAE. As would
+-	 * set_pud.
+-	 */
+-
+-	pud = pud_offset(pgd, address);
+-	pud_k = pud_offset(pgd_k, address);
+-	if (!pud_present(*pud_k))
+-		return NULL;
+-
+-	pmd = pmd_offset(pud, address);
+-	pmd_k = pmd_offset(pud_k, address);
+-	if (!pmd_present(*pmd_k))
+-		return NULL;
+-	if (!pmd_present(*pmd)) {
+-		set_pmd(pmd, *pmd_k);
+-		arch_flush_lazy_mmu_mode();
+-	} else
+-		BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+-	return pmd_k;
+-}
+-#endif
+-
+-#ifdef CONFIG_X86_64
+-static const char errata93_warning[] =
+-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
+-KERN_ERR "******* Please consider a BIOS update.\n"
+-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+-#endif
+-
+-/* Workaround for K8 erratum #93 & buggy BIOS.
+-   BIOS SMM functions are required to use a specific workaround
+-   to avoid corruption of the 64bit RIP register on C stepping K8.
+-   A lot of BIOS that didn't get tested properly miss this.
+-   The OS sees this as a page fault with the upper 32bits of RIP cleared.
+-   Try to work around it here.
+-   Note we only handle faults in kernel here.
+-   Does nothing for X86_32
++/*
++ * Workaround for K8 erratum #93 & buggy BIOS.
++ *
++ * BIOS SMM functions are required to use a specific workaround
++ * to avoid corruption of the 64bit RIP register on C stepping K8.
++ *
++ * A lot of BIOS that didn't get tested properly miss this.
++ *
++ * The OS sees this as a page fault with the upper 32bits of RIP cleared.
++ * Try to work around it here.
++ *
++ * Note we only handle faults in kernel here.
++ * Does nothing on 32-bit.
+  */
+ static int is_errata93(struct pt_regs *regs, unsigned long address)
+ {
+ #ifdef CONFIG_X86_64
+-	static int warned;
++	static int once;
++
+ 	if (address != regs->ip)
+ 		return 0;
++
+ 	if ((address >> 32) != 0)
+ 		return 0;
++
+ 	address |= 0xffffffffUL << 32;
+ 	if ((address >= (u64)_stext && address <= (u64)_etext) ||
+ 	    (address >= MODULES_VADDR && address <= MODULES_END)) {
+-		if (!warned) {
++		if (!once) {
+ 			printk(errata93_warning);
+-			warned = 1;
++			once = 1;
+ 		}
+ 		regs->ip = address;
+ 		return 1;
+@@ -343,16 +563,17 @@ static int is_errata93(struct pt_regs *r
+ }
+ 
+ /*
+- * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal
+- * addresses >4GB.  We catch this in the page fault handler because these
+- * addresses are not reachable. Just detect this case and return.  Any code
++ * Work around K8 erratum #100 K8 in compat mode occasionally jumps
++ * to illegal addresses >4GB.
++ *
++ * We catch this in the page fault handler because these addresses
++ * are not reachable. Just detect this case and return.  Any code
+  * segment in LDT is compatibility mode.
+  */
+ static int is_errata100(struct pt_regs *regs, unsigned long address)
+ {
+ #ifdef CONFIG_X86_64
+-	if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
+-	    (address >> 32))
++	if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32))
+ 		return 1;
+ #endif
+ 	return 0;
+@@ -362,13 +583,15 @@ static int is_f00f_bug(struct pt_regs *r
+ {
+ #ifdef CONFIG_X86_F00F_BUG
+ 	unsigned long nr;
++
+ 	/*
+-	 * Pentium F0 0F C7 C8 bug workaround.
++	 * Pentium F0 0F C7 C8 bug workaround:
+ 	 */
+ 	if (boot_cpu_data.f00f_bug) {
+ 		nr = (address - idt_descr.address) >> 3;
+ 
+ 		if (nr == 6) {
++			zap_rt_locks();
+ 			do_invalid_op(regs, 0);
+ 			return 1;
+ 		}
+@@ -377,62 +600,277 @@ static int is_f00f_bug(struct pt_regs *r
+ 	return 0;
+ }
+ 
+-static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
+-			    unsigned long address)
++static const char nx_warning[] = KERN_CRIT
++"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
++
++static void
++show_fault_oops(struct pt_regs *regs, unsigned long error_code,
++		unsigned long address)
+ {
+-#ifdef CONFIG_X86_32
+ 	if (!oops_may_print())
+ 		return;
+-#endif
+ 
+-#ifdef CONFIG_X86_PAE
+ 	if (error_code & PF_INSTR) {
+ 		unsigned int level;
++
+ 		pte_t *pte = lookup_address(address, &level);
+ 
+ 		if (pte && pte_present(*pte) && !pte_exec(*pte))
+-			printk(KERN_CRIT "kernel tried to execute "
+-				"NX-protected page - exploit attempt? "
+-				"(uid: %d)\n", current_uid());
++			printk(nx_warning, current_uid());
+ 	}
+-#endif
+ 
+ 	printk(KERN_ALERT "BUG: unable to handle kernel ");
+ 	if (address < PAGE_SIZE)
+ 		printk(KERN_CONT "NULL pointer dereference");
+ 	else
+ 		printk(KERN_CONT "paging request");
++
+ 	printk(KERN_CONT " at %p\n", (void *) address);
+ 	printk(KERN_ALERT "IP:");
+ 	printk_address(regs->ip, 1);
++
+ 	dump_pagetable(address);
+ }
+ 
+-#ifdef CONFIG_X86_64
+-static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
+-				 unsigned long error_code)
++static noinline void
++pgtable_bad(struct pt_regs *regs, unsigned long error_code,
++	    unsigned long address)
+ {
+-	unsigned long flags = oops_begin();
+-	int sig = SIGKILL;
+ 	struct task_struct *tsk;
++	unsigned long flags;
++	int sig;
++
++	flags = oops_begin();
++	tsk = current;
++	sig = SIGKILL;
+ 
+ 	printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
+-	       current->comm, address);
++	       tsk->comm, address);
+ 	dump_pagetable(address);
+-	tsk = current;
+-	tsk->thread.cr2 = address;
+-	tsk->thread.trap_no = 14;
+-	tsk->thread.error_code = error_code;
++
++	tsk->thread.cr2		= address;
++	tsk->thread.trap_no	= 14;
++	tsk->thread.error_code	= error_code;
++
+ 	if (__die("Bad pagetable", regs, error_code))
+ 		sig = 0;
++
++	oops_end(flags, regs, sig);
++}
++
++static noinline void
++no_context(struct pt_regs *regs, unsigned long error_code,
++	   unsigned long address)
++{
++	struct task_struct *tsk = current;
++	unsigned long *stackend;
++	unsigned long flags;
++	int sig;
++
++	/* Are we prepared to handle this kernel fault? */
++	if (fixup_exception(regs))
++		return;
++
++	/*
++	 * 32-bit:
++	 *
++	 *   Valid to do another page fault here, because if this fault
++	 *   had been triggered by is_prefetch fixup_exception would have
++	 *   handled it.
++	 *
++	 * 64-bit:
++	 *
++	 *   Hall of shame of CPU/BIOS bugs.
++	 */
++	if (is_prefetch(regs, error_code, address))
++		return;
++
++	if (is_errata93(regs, address))
++		return;
++
++	/*
++	 * Oops. The kernel tried to access some bad page. We'll have to
++	 * terminate things with extreme prejudice:
++	 */
++	flags = oops_begin();
++
++	show_fault_oops(regs, error_code, address);
++
++	stackend = end_of_stack(tsk);
++	if (*stackend != STACK_END_MAGIC)
++		printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
++
++	tsk->thread.cr2		= address;
++	tsk->thread.trap_no	= 14;
++	tsk->thread.error_code	= error_code;
++
++	sig = SIGKILL;
++	if (__die("Oops", regs, error_code))
++		sig = 0;
++
++	/* Executive summary in case the body of the oops scrolled away */
++	printk(KERN_EMERG "CR2: %016lx\n", address);
++
+ 	oops_end(flags, regs, sig);
+ }
+-#endif
++
++/*
++ * Print out info about fatal segfaults, if the show_unhandled_signals
++ * sysctl is set:
++ */
++static inline void
++show_signal_msg(struct pt_regs *regs, unsigned long error_code,
++		unsigned long address, struct task_struct *tsk)
++{
++	if (!unhandled_signal(tsk, SIGSEGV))
++		return;
++
++	if (!printk_ratelimit())
++		return;
++
++	printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
++		task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
++		tsk->comm, task_pid_nr(tsk), address,
++		(void *)regs->ip, (void *)regs->sp, error_code);
++
++	print_vma_addr(KERN_CONT " in ", regs->ip);
++
++	printk(KERN_CONT "\n");
++}
++
++static void
++__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
++		       unsigned long address, int si_code)
++{
++	struct task_struct *tsk = current;
++
++	/* User mode accesses just cause a SIGSEGV */
++	if (error_code & PF_USER) {
++		/*
++		 * It's possible to have interrupts off here:
++		 */
++		local_irq_enable();
++
++		/*
++		 * Valid to do another page fault here because this one came
++		 * from user space:
++		 */
++		if (is_prefetch(regs, error_code, address))
++			return;
++
++		if (is_errata100(regs, address))
++			return;
++
++		if (unlikely(show_unhandled_signals))
++			show_signal_msg(regs, error_code, address, tsk);
++
++		/* Kernel addresses are always protection faults: */
++		tsk->thread.cr2		= address;
++		tsk->thread.error_code	= error_code | (address >= TASK_SIZE);
++		tsk->thread.trap_no	= 14;
++
++		force_sig_info_fault(SIGSEGV, si_code, address, tsk);
++
++		return;
++	}
++
++	if (is_f00f_bug(regs, address))
++		return;
++
++	no_context(regs, error_code, address);
++}
++
++static noinline void
++bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
++		     unsigned long address)
++{
++	__bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
++}
++
++static void
++__bad_area(struct pt_regs *regs, unsigned long error_code,
++	   unsigned long address, int si_code)
++{
++	struct mm_struct *mm = current->mm;
++
++	/*
++	 * Something tried to access memory that isn't in our memory map..
++	 * Fix it, but check if it's kernel or user first..
++	 */
++	up_read(&mm->mmap_sem);
++
++	__bad_area_nosemaphore(regs, error_code, address, si_code);
++}
++
++static noinline void
++bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
++{
++	__bad_area(regs, error_code, address, SEGV_MAPERR);
++}
++
++static noinline void
++bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
++		      unsigned long address)
++{
++	__bad_area(regs, error_code, address, SEGV_ACCERR);
++}
++
++/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
++static void
++out_of_memory(struct pt_regs *regs, unsigned long error_code,
++	      unsigned long address)
++{
++	/*
++	 * We ran out of memory, call the OOM killer, and return the userspace
++	 * (which will retry the fault, or kill us if we got oom-killed):
++	 */
++	up_read(&current->mm->mmap_sem);
++
++	pagefault_out_of_memory();
++}
++
++static void
++do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
++{
++	struct task_struct *tsk = current;
++	struct mm_struct *mm = tsk->mm;
++
++	up_read(&mm->mmap_sem);
++
++	/* Kernel mode? Handle exceptions or die: */
++	if (!(error_code & PF_USER))
++		no_context(regs, error_code, address);
++
++	/* User-space => ok to do another page fault: */
++	if (is_prefetch(regs, error_code, address))
++		return;
++
++	tsk->thread.cr2		= address;
++	tsk->thread.error_code	= error_code;
++	tsk->thread.trap_no	= 14;
++
++	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
++}
++
++static noinline void
++mm_fault_error(struct pt_regs *regs, unsigned long error_code,
++	       unsigned long address, unsigned int fault)
++{
++	if (fault & VM_FAULT_OOM) {
++		out_of_memory(regs, error_code, address);
++	} else {
++		if (fault & VM_FAULT_SIGBUS)
++			do_sigbus(regs, error_code, address);
++		else
++			BUG();
++	}
++}
+ 
+ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
+ {
+ 	if ((error_code & PF_WRITE) && !pte_write(*pte))
+ 		return 0;
++
+ 	if ((error_code & PF_INSTR) && !pte_exec(*pte))
+ 		return 0;
+ 
+@@ -440,21 +878,25 @@ static int spurious_fault_check(unsigned
+ }
+ 
+ /*
+- * Handle a spurious fault caused by a stale TLB entry.  This allows
+- * us to lazily refresh the TLB when increasing the permissions of a
+- * kernel page (RO -> RW or NX -> X).  Doing it eagerly is very
+- * expensive since that implies doing a full cross-processor TLB
+- * flush, even if no stale TLB entries exist on other processors.
++ * Handle a spurious fault caused by a stale TLB entry.
++ *
++ * This allows us to lazily refresh the TLB when increasing the
++ * permissions of a kernel page (RO -> RW or NX -> X).  Doing it
++ * eagerly is very expensive since that implies doing a full
++ * cross-processor TLB flush, even if no stale TLB entries exist
++ * on other processors.
++ *
+  * There are no security implications to leaving a stale TLB when
+  * increasing the permissions on a page.
+  */
+-static int spurious_fault(unsigned long address,
+-			  unsigned long error_code)
++static noinline int
++spurious_fault(unsigned long error_code, unsigned long address)
+ {
+ 	pgd_t *pgd;
+ 	pud_t *pud;
+ 	pmd_t *pmd;
+ 	pte_t *pte;
++	int ret;
+ 
+ 	/* Reserved-bit violation or user access to kernel space? */
+ 	if (error_code & (PF_USER | PF_RSVD))
+@@ -482,126 +924,77 @@ static int spurious_fault(unsigned long 
+ 	if (!pte_present(*pte))
+ 		return 0;
+ 
+-	return spurious_fault_check(error_code, pte);
+-}
+-
+-/*
+- * X86_32
+- * Handle a fault on the vmalloc or module mapping area
+- *
+- * X86_64
+- * Handle a fault on the vmalloc area
+- *
+- * This assumes no large pages in there.
+- */
+-static int vmalloc_fault(unsigned long address)
+-{
+-#ifdef CONFIG_X86_32
+-	unsigned long pgd_paddr;
+-	pmd_t *pmd_k;
+-	pte_t *pte_k;
+-
+-	/* Make sure we are in vmalloc area */
+-	if (!(address >= VMALLOC_START && address < VMALLOC_END))
+-		return -1;
++	ret = spurious_fault_check(error_code, pte);
++	if (!ret)
++		return 0;
+ 
+ 	/*
+-	 * Synchronize this task's top level page-table
+-	 * with the 'reference' page table.
+-	 *
+-	 * Do _not_ use "current" here. We might be inside
+-	 * an interrupt in the middle of a task switch..
++	 * Make sure we have permissions in PMD.
++	 * If not, then there's a bug in the page tables:
+ 	 */
+-	pgd_paddr = read_cr3();
+-	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
+-	if (!pmd_k)
+-		return -1;
+-	pte_k = pte_offset_kernel(pmd_k, address);
+-	if (!pte_present(*pte_k))
+-		return -1;
+-	return 0;
+-#else
+-	pgd_t *pgd, *pgd_ref;
+-	pud_t *pud, *pud_ref;
+-	pmd_t *pmd, *pmd_ref;
+-	pte_t *pte, *pte_ref;
++	ret = spurious_fault_check(error_code, (pte_t *) pmd);
++	WARN_ONCE(!ret, "PMD has incorrect permission bits\n");
+ 
+-	/* Make sure we are in vmalloc area */
+-	if (!(address >= VMALLOC_START && address < VMALLOC_END))
+-		return -1;
++	return ret;
++}
+ 
+-	/* Copy kernel mappings over when needed. This can also
+-	   happen within a race in page table update. In the later
+-	   case just flush. */
++int show_unhandled_signals = 1;
+ 
+-	pgd = pgd_offset(current->active_mm, address);
+-	pgd_ref = pgd_offset_k(address);
+-	if (pgd_none(*pgd_ref))
+-		return -1;
+-	if (pgd_none(*pgd))
+-		set_pgd(pgd, *pgd_ref);
+-	else
+-		BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
++static inline int
++access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
++{
++	if (write) {
++		/* write, present and write, not present: */
++		if (unlikely(!(vma->vm_flags & VM_WRITE)))
++			return 1;
++		return 0;
++	}
+ 
+-	/* Below here mismatches are bugs because these lower tables
+-	   are shared */
++	/* read, present: */
++	if (unlikely(error_code & PF_PROT))
++		return 1;
++
++	/* read, not present: */
++	if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
++		return 1;
+ 
+-	pud = pud_offset(pgd, address);
+-	pud_ref = pud_offset(pgd_ref, address);
+-	if (pud_none(*pud_ref))
+-		return -1;
+-	if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
+-		BUG();
+-	pmd = pmd_offset(pud, address);
+-	pmd_ref = pmd_offset(pud_ref, address);
+-	if (pmd_none(*pmd_ref))
+-		return -1;
+-	if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
+-		BUG();
+-	pte_ref = pte_offset_kernel(pmd_ref, address);
+-	if (!pte_present(*pte_ref))
+-		return -1;
+-	pte = pte_offset_kernel(pmd, address);
+-	/* Don't use pte_page here, because the mappings can point
+-	   outside mem_map, and the NUMA hash lookup cannot handle
+-	   that. */
+-	if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
+-		BUG();
+ 	return 0;
+-#endif
+ }
+ 
+-int show_unhandled_signals = 1;
++static int fault_in_kernel_space(unsigned long address)
++{
++	return address >= TASK_SIZE_MAX;
++}
+ 
+ /*
+  * This routine handles page faults.  It determines the address,
+  * and the problem, and then passes it off to one of the appropriate
+  * routines.
+  */
+-#ifdef CONFIG_X86_64
+-asmlinkage
+-#endif
+-void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
++dotraplinkage void __kprobes
++do_page_fault(struct pt_regs *regs, unsigned long error_code)
+ {
+-	struct task_struct *tsk;
+-	struct mm_struct *mm;
+ 	struct vm_area_struct *vma;
++	struct task_struct *tsk;
+ 	unsigned long address;
+-	int write, si_code;
++	struct mm_struct *mm;
++	int write;
+ 	int fault;
+-#ifdef CONFIG_X86_64
+-	unsigned long flags;
+-	int sig;
+-#endif
+ 
+ 	tsk = current;
+ 	mm = tsk->mm;
++
+ 	prefetchw(&mm->mmap_sem);
+ 
+-	/* get the address */
++	/* Get the faulting address: */
+ 	address = read_cr2();
+ 
+-	si_code = SEGV_MAPERR;
++	/*
++	 * Detect and handle instructions that would cause a page fault for
++	 * both a tracked kernel page and a userspace page.
++	 */
++	if (kmemcheck_active(regs))
++		kmemcheck_hide(regs);
+ 
+ 	if (unlikely(kmmio_fault(regs, address)))
+ 		return;
+@@ -619,319 +1012,156 @@ void __kprobes do_page_fault(struct pt_r
+ 	 * (error_code & 4) == 0, and that the fault was not a
+ 	 * protection error (error_code & 9) == 0.
+ 	 */
+-#ifdef CONFIG_X86_32
+-	if (unlikely(address >= TASK_SIZE)) {
+-#else
+-	if (unlikely(address >= TASK_SIZE64)) {
+-#endif
+-		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
+-		    vmalloc_fault(address) >= 0)
+-			return;
++	if (unlikely(fault_in_kernel_space(address))) {
++		if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
++			if (vmalloc_fault(address) >= 0)
++				return;
++
++			if (kmemcheck_fault(regs, address, error_code))
++				return;
++		}
+ 
+-		/* Can handle a stale RO->RW TLB */
+-		if (spurious_fault(address, error_code))
++		/* Can handle a stale RO->RW TLB: */
++		if (spurious_fault(error_code, address))
+ 			return;
+ 
+-		/* kprobes don't want to hook the spurious faults. */
++		/* kprobes don't want to hook the spurious faults: */
+ 		if (notify_page_fault(regs))
+ 			return;
+ 		/*
+ 		 * Don't take the mm semaphore here. If we fixup a prefetch
+-		 * fault we could otherwise deadlock.
++		 * fault we could otherwise deadlock:
+ 		 */
+-		goto bad_area_nosemaphore;
+-	}
++		bad_area_nosemaphore(regs, error_code, address);
+ 
+-	/* kprobes don't want to hook the spurious faults. */
+-	if (notify_page_fault(regs))
+ 		return;
++	}
+ 
++	/* kprobes don't want to hook the spurious faults: */
++	if (unlikely(notify_page_fault(regs)))
++		return;
+ 	/*
+ 	 * It's safe to allow irq's after cr2 has been saved and the
+ 	 * vmalloc fault has been handled.
+ 	 *
+ 	 * User-mode registers count as a user access even for any
+-	 * potential system fault or CPU buglet.
++	 * potential system fault or CPU buglet:
+ 	 */
+ 	if (user_mode_vm(regs)) {
+ 		local_irq_enable();
+ 		error_code |= PF_USER;
+-	} else if (regs->flags & X86_EFLAGS_IF)
+-		local_irq_enable();
++	} else {
++		if (regs->flags & X86_EFLAGS_IF)
++			local_irq_enable();
++	}
+ 
+-#ifdef CONFIG_X86_64
+ 	if (unlikely(error_code & PF_RSVD))
+-		pgtable_bad(address, regs, error_code);
+-#endif
++		pgtable_bad(regs, error_code, address);
++
++	perf_swcounter_event(PERF_COUNT_PAGE_FAULTS, 1, 0, regs);
+ 
+ 	/*
+-	 * If we're in an interrupt, have no user context or are running in an
+-	 * atomic region then we must not take the fault.
++	 * If we're in an interrupt, have no user context or are running
++	 * in an atomic region then we must not take the fault:
+ 	 */
+-	if (unlikely(in_atomic() || !mm))
+-		goto bad_area_nosemaphore;
++	if (unlikely(in_atomic() || !mm || current->pagefault_disabled)) {
++		bad_area_nosemaphore(regs, error_code, address);
++		return;
++	}
+ 
+ 	/*
+ 	 * When running in the kernel we expect faults to occur only to
+-	 * addresses in user space.  All other faults represent errors in the
+-	 * kernel and should generate an OOPS.  Unfortunately, in the case of an
+-	 * erroneous fault occurring in a code path which already holds mmap_sem
+-	 * we will deadlock attempting to validate the fault against the
+-	 * address space.  Luckily the kernel only validly references user
+-	 * space from well defined areas of code, which are listed in the
+-	 * exceptions table.
++	 * addresses in user space.  All other faults represent errors in
++	 * the kernel and should generate an OOPS.  Unfortunately, in the
++	 * case of an erroneous fault occurring in a code path which already
++	 * holds mmap_sem we will deadlock attempting to validate the fault
++	 * against the address space.  Luckily the kernel only validly
++	 * references user space from well defined areas of code, which are
++	 * listed in the exceptions table.
+ 	 *
+ 	 * As the vast majority of faults will be valid we will only perform
+-	 * the source reference check when there is a possibility of a deadlock.
+-	 * Attempt to lock the address space, if we cannot we then validate the
+-	 * source.  If this is invalid we can skip the address space check,
+-	 * thus avoiding the deadlock.
++	 * the source reference check when there is a possibility of a
++	 * deadlock. Attempt to lock the address space, if we cannot we then
++	 * validate the source. If this is invalid we can skip the address
++	 * space check, thus avoiding the deadlock:
+ 	 */
+-	if (!down_read_trylock(&mm->mmap_sem)) {
++	if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
+ 		if ((error_code & PF_USER) == 0 &&
+-		    !search_exception_tables(regs->ip))
+-			goto bad_area_nosemaphore;
++		    !search_exception_tables(regs->ip)) {
++			bad_area_nosemaphore(regs, error_code, address);
++			return;
++		}
+ 		down_read(&mm->mmap_sem);
++	} else {
++		/*
++		 * The above down_read_trylock() might have succeeded in
++		 * which case we'll have missed the might_sleep() from
++		 * down_read():
++		 */
++		might_sleep();
+ 	}
+ 
+ 	vma = find_vma(mm, address);
+-	if (!vma)
+-		goto bad_area;
+-	if (vma->vm_start <= address)
++	if (unlikely(!vma)) {
++		bad_area(regs, error_code, address);
++		return;
++	}
++	if (likely(vma->vm_start <= address))
+ 		goto good_area;
+-	if (!(vma->vm_flags & VM_GROWSDOWN))
+-		goto bad_area;
++	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
++		bad_area(regs, error_code, address);
++		return;
++	}
+ 	if (error_code & PF_USER) {
+ 		/*
+ 		 * Accessing the stack below %sp is always a bug.
+ 		 * The large cushion allows instructions like enter
+-		 * and pusha to work.  ("enter $65535,$31" pushes
++		 * and pusha to work. ("enter $65535, $31" pushes
+ 		 * 32 pointers and then decrements %sp by 65535.)
+ 		 */
+-		if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
+-			goto bad_area;
+-	}
+-	if (expand_stack(vma, address))
+-		goto bad_area;
+-/*
+- * Ok, we have a good vm_area for this memory access, so
+- * we can handle it..
+- */
+-good_area:
+-	si_code = SEGV_ACCERR;
+-	write = 0;
+-	switch (error_code & (PF_PROT|PF_WRITE)) {
+-	default:	/* 3: write, present */
+-		/* fall through */
+-	case PF_WRITE:		/* write, not present */
+-		if (!(vma->vm_flags & VM_WRITE))
+-			goto bad_area;
+-		write++;
+-		break;
+-	case PF_PROT:		/* read, present */
+-		goto bad_area;
+-	case 0:			/* read, not present */
+-		if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
+-			goto bad_area;
+-	}
+-
+-	/*
+-	 * If for any reason at all we couldn't handle the fault,
+-	 * make sure we exit gracefully rather than endlessly redo
+-	 * the fault.
+-	 */
+-	fault = handle_mm_fault(mm, vma, address, write);
+-	if (unlikely(fault & VM_FAULT_ERROR)) {
+-		if (fault & VM_FAULT_OOM)
+-			goto out_of_memory;
+-		else if (fault & VM_FAULT_SIGBUS)
+-			goto do_sigbus;
+-		BUG();
+-	}
+-	if (fault & VM_FAULT_MAJOR)
+-		tsk->maj_flt++;
+-	else
+-		tsk->min_flt++;
+-
+-#ifdef CONFIG_X86_32
+-	/*
+-	 * Did it hit the DOS screen memory VA from vm86 mode?
+-	 */
+-	if (v8086_mode(regs)) {
+-		unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
+-		if (bit < 32)
+-			tsk->thread.screen_bitmap |= 1 << bit;
+-	}
+-#endif
+-	up_read(&mm->mmap_sem);
+-	return;
+-
+-/*
+- * Something tried to access memory that isn't in our memory map..
+- * Fix it, but check if it's kernel or user first..
+- */
+-bad_area:
+-	up_read(&mm->mmap_sem);
+-
+-bad_area_nosemaphore:
+-	/* User mode accesses just cause a SIGSEGV */
+-	if (error_code & PF_USER) {
+-		/*
+-		 * It's possible to have interrupts off here.
+-		 */
+-		local_irq_enable();
+-
+-		/*
+-		 * Valid to do another page fault here because this one came
+-		 * from user space.
+-		 */
+-		if (is_prefetch(regs, address, error_code))
+-			return;
+-
+-		if (is_errata100(regs, address))
++		if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
++			bad_area(regs, error_code, address);
+ 			return;
+-
+-		if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
+-		    printk_ratelimit()) {
+-			printk(
+-			"%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+-			task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
+-			tsk->comm, task_pid_nr(tsk), address,
+-			(void *) regs->ip, (void *) regs->sp, error_code);
+-			print_vma_addr(" in ", regs->ip);
+-			printk("\n");
+ 		}
+-
+-		tsk->thread.cr2 = address;
+-		/* Kernel addresses are always protection faults */
+-		tsk->thread.error_code = error_code | (address >= TASK_SIZE);
+-		tsk->thread.trap_no = 14;
+-		force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+-		return;
+ 	}
+-
+-	if (is_f00f_bug(regs, address))
+-		return;
+-
+-no_context:
+-	/* Are we prepared to handle this kernel fault?  */
+-	if (fixup_exception(regs))
++	if (unlikely(expand_stack(vma, address))) {
++		bad_area(regs, error_code, address);
+ 		return;
++	}
+ 
+ 	/*
+-	 * X86_32
+-	 * Valid to do another page fault here, because if this fault
+-	 * had been triggered by is_prefetch fixup_exception would have
+-	 * handled it.
+-	 *
+-	 * X86_64
+-	 * Hall of shame of CPU/BIOS bugs.
++	 * Ok, we have a good vm_area for this memory access, so
++	 * we can handle it..
+ 	 */
+-	if (is_prefetch(regs, address, error_code))
+-		return;
++good_area:
++	write = error_code & PF_WRITE;
+ 
+-	if (is_errata93(regs, address))
++	if (unlikely(access_error(error_code, write, vma))) {
++		bad_area_access_error(regs, error_code, address);
+ 		return;
++	}
+ 
+-/*
+- * Oops. The kernel tried to access some bad page. We'll have to
+- * terminate things with extreme prejudice.
+- */
+-#ifdef CONFIG_X86_32
+-	bust_spinlocks(1);
+-#else
+-	flags = oops_begin();
+-#endif
+-
+-	show_fault_oops(regs, error_code, address);
+-
+-	tsk->thread.cr2 = address;
+-	tsk->thread.trap_no = 14;
+-	tsk->thread.error_code = error_code;
+-
+-#ifdef CONFIG_X86_32
+-	die("Oops", regs, error_code);
+-	bust_spinlocks(0);
+-	do_exit(SIGKILL);
+-#else
+-	sig = SIGKILL;
+-	if (__die("Oops", regs, error_code))
+-		sig = 0;
+-	/* Executive summary in case the body of the oops scrolled away */
+-	printk(KERN_EMERG "CR2: %016lx\n", address);
+-	oops_end(flags, regs, sig);
+-#endif
+-
+-out_of_memory:
+ 	/*
+-	 * We ran out of memory, call the OOM killer, and return the userspace
+-	 * (which will retry the fault, or kill us if we got oom-killed).
++	 * If for any reason at all we couldn't handle the fault,
++	 * make sure we exit gracefully rather than endlessly redo
++	 * the fault:
+ 	 */
+-	up_read(&mm->mmap_sem);
+-	pagefault_out_of_memory();
+-	return;
+-
+-do_sigbus:
+-	up_read(&mm->mmap_sem);
+-
+-	/* Kernel mode? Handle exceptions or die */
+-	if (!(error_code & PF_USER))
+-		goto no_context;
+-#ifdef CONFIG_X86_32
+-	/* User space => ok to do another page fault */
+-	if (is_prefetch(regs, address, error_code))
+-		return;
+-#endif
+-	tsk->thread.cr2 = address;
+-	tsk->thread.error_code = error_code;
+-	tsk->thread.trap_no = 14;
+-	force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+-}
+-
+-DEFINE_SPINLOCK(pgd_lock);
+-LIST_HEAD(pgd_list);
+-
+-void vmalloc_sync_all(void)
+-{
+-	unsigned long address;
++	fault = handle_mm_fault(mm, vma, address, write);
+ 
+-#ifdef CONFIG_X86_32
+-	if (SHARED_KERNEL_PMD)
++	if (unlikely(fault & VM_FAULT_ERROR)) {
++		mm_fault_error(regs, error_code, address, fault);
+ 		return;
+-
+-	for (address = VMALLOC_START & PMD_MASK;
+-	     address >= TASK_SIZE && address < FIXADDR_TOP;
+-	     address += PMD_SIZE) {
+-		unsigned long flags;
+-		struct page *page;
+-
+-		spin_lock_irqsave(&pgd_lock, flags);
+-		list_for_each_entry(page, &pgd_list, lru) {
+-			if (!vmalloc_sync_one(page_address(page),
+-					      address))
+-				break;
+-		}
+-		spin_unlock_irqrestore(&pgd_lock, flags);
+ 	}
+-#else /* CONFIG_X86_64 */
+-	for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
+-	     address += PGDIR_SIZE) {
+-		const pgd_t *pgd_ref = pgd_offset_k(address);
+-		unsigned long flags;
+-		struct page *page;
+ 
+-		if (pgd_none(*pgd_ref))
+-			continue;
+-		spin_lock_irqsave(&pgd_lock, flags);
+-		list_for_each_entry(page, &pgd_list, lru) {
+-			pgd_t *pgd;
+-			pgd = (pgd_t *)page_address(page) + pgd_index(address);
+-			if (pgd_none(*pgd))
+-				set_pgd(pgd, *pgd_ref);
+-			else
+-				BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
+-		}
+-		spin_unlock_irqrestore(&pgd_lock, flags);
++	if (fault & VM_FAULT_MAJOR) {
++		tsk->maj_flt++;
++		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MAJ, 1, 0, regs);
++	} else {
++		tsk->min_flt++;
++		perf_swcounter_event(PERF_COUNT_PAGE_FAULTS_MIN, 1, 0, regs);
+ 	}
+-#endif
++
++	check_v8086_mode(regs, address, tsk);
++
++	up_read(&mm->mmap_sem);
+ }
+Index: linux-2.6-tip/arch/x86/mm/highmem_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/highmem_32.c
++++ linux-2.6-tip/arch/x86/mm/highmem_32.c
+@@ -1,11 +1,12 @@
+ #include <linux/highmem.h>
+ #include <linux/module.h>
++#include <linux/swap.h> /* for totalram_pages */
+ 
+ void *kmap(struct page *page)
+ {
+-	might_sleep();
+ 	if (!PageHighMem(page))
+ 		return page_address(page);
++	might_sleep();
+ 	return kmap_high(page);
+ }
+ 
+@@ -18,6 +19,27 @@ void kunmap(struct page *page)
+ 	kunmap_high(page);
+ }
+ 
++void kunmap_virt(void *ptr)
++{
++	struct page *page;
++
++	if ((unsigned long)ptr < PKMAP_ADDR(0))
++		return;
++	page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]);
++	kunmap(page);
++}
++
++struct page *kmap_to_page(void *ptr)
++{
++	struct page *page;
++
++	if ((unsigned long)ptr < PKMAP_ADDR(0))
++		return virt_to_page(ptr);
++	page = pte_page(pkmap_page_table[PKMAP_NR((unsigned long)ptr)]);
++	return page;
++}
++EXPORT_SYMBOL_GPL(kmap_to_page); /* PREEMPT_RT converts some modules to use this */
++
+ static void debug_kmap_atomic_prot(enum km_type type)
+ {
+ #ifdef CONFIG_DEBUG_HIGHMEM
+@@ -69,12 +91,12 @@ static void debug_kmap_atomic_prot(enum 
+  * However when holding an atomic kmap is is not legal to sleep, so atomic
+  * kmaps are appropriate for short, tight code paths only.
+  */
+-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
++void *__kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+ {
+ 	enum fixed_addresses idx;
+ 	unsigned long vaddr;
+ 
+-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
++	preempt_disable();
+ 	pagefault_disable();
+ 
+ 	if (!PageHighMem(page))
+@@ -84,19 +106,24 @@ void *kmap_atomic_prot(struct page *page
+ 
+ 	idx = type + KM_TYPE_NR*smp_processor_id();
+ 	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+-	BUG_ON(!pte_none(*(kmap_pte-idx)));
++	WARN_ON_ONCE(!pte_none(*(kmap_pte-idx)));
+ 	set_pte(kmap_pte-idx, mk_pte(page, prot));
+ 	arch_flush_lazy_mmu_mode();
+ 
+ 	return (void *)vaddr;
+ }
+ 
+-void *kmap_atomic(struct page *page, enum km_type type)
++void *__kmap_atomic_direct(struct page *page, enum km_type type)
++{
++	return __kmap_atomic_prot(page, type, kmap_prot);
++}
++
++void *__kmap_atomic(struct page *page, enum km_type type)
+ {
+ 	return kmap_atomic_prot(page, type, kmap_prot);
+ }
+ 
+-void kunmap_atomic(void *kvaddr, enum km_type type)
++void __kunmap_atomic(void *kvaddr, enum km_type type)
+ {
+ 	unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
+ 	enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
+@@ -118,28 +145,21 @@ void kunmap_atomic(void *kvaddr, enum km
+ 
+ 	arch_flush_lazy_mmu_mode();
+ 	pagefault_enable();
++	preempt_enable();
+ }
+ 
+-/* This is the same as kmap_atomic() but can map memory that doesn't
++/*
++ * This is the same as kmap_atomic() but can map memory that doesn't
+  * have a struct page associated with it.
+  */
+-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
++void *__kmap_atomic_pfn(unsigned long pfn, enum km_type type)
+ {
+-	enum fixed_addresses idx;
+-	unsigned long vaddr;
+-
+-	pagefault_disable();
+-
+-	idx = type + KM_TYPE_NR*smp_processor_id();
+-	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+-	set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
+-	arch_flush_lazy_mmu_mode();
+-
+-	return (void*) vaddr;
++	preempt_disable();
++	return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
+ }
+-EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
++EXPORT_SYMBOL_GPL(__kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
+ 
+-struct page *kmap_atomic_to_page(void *ptr)
++struct page *__kmap_atomic_to_page(void *ptr)
+ {
+ 	unsigned long idx, vaddr = (unsigned long)ptr;
+ 	pte_t *pte;
+@@ -154,5 +174,30 @@ struct page *kmap_atomic_to_page(void *p
+ 
+ EXPORT_SYMBOL(kmap);
+ EXPORT_SYMBOL(kunmap);
+-EXPORT_SYMBOL(kmap_atomic);
+-EXPORT_SYMBOL(kunmap_atomic);
++EXPORT_SYMBOL(kunmap_virt);
++EXPORT_SYMBOL(__kmap_atomic);
++EXPORT_SYMBOL(__kunmap_atomic);
++
++void __init set_highmem_pages_init(void)
++{
++	struct zone *zone;
++	int nid;
++
++	for_each_zone(zone) {
++		unsigned long zone_start_pfn, zone_end_pfn;
++
++		if (!is_highmem(zone))
++			continue;
++
++		zone_start_pfn = zone->zone_start_pfn;
++		zone_end_pfn = zone_start_pfn + zone->spanned_pages;
++
++		nid = zone_to_nid(zone);
++		printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
++				zone->name, nid, zone_start_pfn, zone_end_pfn);
++
++		add_highpages_with_active_regions(nid, zone_start_pfn,
++				 zone_end_pfn);
++	}
++	totalram_pages += totalhigh_pages;
++}
+Index: linux-2.6-tip/arch/x86/mm/init.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/init.c
+@@ -0,0 +1,394 @@
++#include <linux/ioport.h>
++#include <linux/swap.h>
++
++#include <asm/cacheflush.h>
++#include <asm/e820.h>
++#include <asm/init.h>
++#include <asm/page.h>
++#include <asm/page_types.h>
++#include <asm/sections.h>
++#include <asm/system.h>
++#include <asm/tlbflush.h>
++
++unsigned long __initdata e820_table_start;
++unsigned long __meminitdata e820_table_end;
++unsigned long __meminitdata e820_table_top;
++
++enum bootmem_state bootmem_state = BEFORE_BOOTMEM;
++
++int direct_gbpages
++#ifdef CONFIG_DIRECT_GBPAGES
++				= 1
++#endif
++;
++
++static void __init find_early_table_space(unsigned long end, int use_pse,
++					  int use_gbpages)
++{
++	unsigned long puds, pmds, ptes, tables, start;
++
++	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
++	tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
++
++	if (use_gbpages) {
++		unsigned long extra;
++
++		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
++		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
++	} else
++		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
++
++	tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
++
++	if (use_pse) {
++		unsigned long extra;
++
++		extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
++#ifdef CONFIG_X86_32
++		extra += PMD_SIZE;
++#endif
++		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	} else
++		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
++
++	tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
++
++#ifdef CONFIG_X86_32
++	/* for fixmap */
++	tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
++#endif
++
++	/*
++	 * RED-PEN putting page tables only on node 0 could
++	 * cause a hotspot and fill up ZONE_DMA. The page tables
++	 * need roughly 0.5KB per GB.
++	 */
++#ifdef CONFIG_X86_32
++	start = 0x7000;
++	e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
++					tables, PAGE_SIZE);
++#else /* CONFIG_X86_64 */
++	start = 0x8000;
++	e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE);
++#endif
++	if (e820_table_start == -1UL)
++		panic("Cannot find space for the kernel page tables");
++
++	e820_table_start >>= PAGE_SHIFT;
++	e820_table_end = e820_table_start;
++	e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
++
++	printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
++		end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
++}
++
++struct map_range {
++	unsigned long start;
++	unsigned long end;
++	unsigned page_size_mask;
++};
++
++#ifdef CONFIG_X86_32
++#define NR_RANGE_MR 3
++#else /* CONFIG_X86_64 */
++#define NR_RANGE_MR 5
++#endif
++
++static int __meminit save_mr(struct map_range *mr, int nr_range,
++			     unsigned long start_pfn, unsigned long end_pfn,
++			     unsigned long page_size_mask)
++{
++	if (start_pfn < end_pfn) {
++		if (nr_range >= NR_RANGE_MR)
++			panic("run out of range for init_memory_mapping\n");
++		mr[nr_range].start = start_pfn<<PAGE_SHIFT;
++		mr[nr_range].end   = end_pfn<<PAGE_SHIFT;
++		mr[nr_range].page_size_mask = page_size_mask;
++		nr_range++;
++	}
++
++	return nr_range;
++}
++
++#ifdef CONFIG_X86_64
++static void __init init_gbpages(void)
++{
++	if (direct_gbpages && cpu_has_gbpages)
++		printk(KERN_INFO "Using GB pages for direct mapping\n");
++	else
++		direct_gbpages = 0;
++}
++#else
++static inline void init_gbpages(void)
++{
++}
++#endif
++
++/*
++ * Setup the direct mapping of the physical memory at PAGE_OFFSET.
++ * This runs before bootmem is initialized and gets pages directly from
++ * the physical memory. To access them they are temporarily mapped.
++ */
++unsigned long __init_refok init_memory_mapping(unsigned long start,
++					       unsigned long end)
++{
++	unsigned long page_size_mask = 0;
++	unsigned long start_pfn, end_pfn;
++	unsigned long ret = 0;
++	unsigned long pos;
++
++	struct map_range mr[NR_RANGE_MR];
++	int nr_range, i;
++	int use_pse, use_gbpages;
++
++	printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
++
++	if (bootmem_state == BEFORE_BOOTMEM)
++		init_gbpages();
++
++#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
++	/*
++	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
++	 * This will simplify cpa(), which otherwise needs to support splitting
++	 * large pages into small in interrupt context, etc.
++	 */
++	use_pse = use_gbpages = 0;
++#else
++	use_pse = cpu_has_pse;
++	use_gbpages = direct_gbpages;
++#endif
++
++#ifdef CONFIG_X86_32
++#ifdef CONFIG_X86_PAE
++	set_nx();
++	if (nx_enabled)
++		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
++#endif
++
++	/* Enable PSE if available */
++	if (cpu_has_pse)
++		set_in_cr4(X86_CR4_PSE);
++
++	/* Enable PGE if available */
++	if (cpu_has_pge) {
++		set_in_cr4(X86_CR4_PGE);
++		__supported_pte_mask |= _PAGE_GLOBAL;
++	}
++#endif
++
++	if (use_gbpages)
++		page_size_mask |= 1 << PG_LEVEL_1G;
++	if (use_pse)
++		page_size_mask |= 1 << PG_LEVEL_2M;
++
++	memset(mr, 0, sizeof(mr));
++	nr_range = 0;
++
++	/* head if not big page alignment ? */
++	start_pfn = start >> PAGE_SHIFT;
++	pos = start_pfn << PAGE_SHIFT;
++#ifdef CONFIG_X86_32
++	/*
++	 * Don't use a large page for the first 2/4MB of memory
++	 * because there are often fixed size MTRRs in there
++	 * and overlapping MTRRs into large pages can cause
++	 * slowdowns.
++	 */
++	if (pos == 0)
++		end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
++	else
++		end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
++				 << (PMD_SHIFT - PAGE_SHIFT);
++#else /* CONFIG_X86_64 */
++	end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
++			<< (PMD_SHIFT - PAGE_SHIFT);
++#endif
++	if (end_pfn > (end >> PAGE_SHIFT))
++		end_pfn = end >> PAGE_SHIFT;
++	if (start_pfn < end_pfn) {
++		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
++		pos = end_pfn << PAGE_SHIFT;
++	}
++
++	/* big page (2M) range */
++	start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
++			 << (PMD_SHIFT - PAGE_SHIFT);
++#ifdef CONFIG_X86_32
++	end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
++#else /* CONFIG_X86_64 */
++	end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
++			 << (PUD_SHIFT - PAGE_SHIFT);
++	if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
++		end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
++#endif
++
++	if (start_pfn < end_pfn) {
++		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
++				page_size_mask & (1<<PG_LEVEL_2M));
++		pos = end_pfn << PAGE_SHIFT;
++	}
++
++#ifdef CONFIG_X86_64
++	/* big page (1G) range */
++	start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
++			 << (PUD_SHIFT - PAGE_SHIFT);
++	end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
++	if (start_pfn < end_pfn) {
++		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
++				page_size_mask &
++				 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
++		pos = end_pfn << PAGE_SHIFT;
++	}
++
++	/* tail is not big page (1G) alignment */
++	start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
++			 << (PMD_SHIFT - PAGE_SHIFT);
++	end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
++	if (start_pfn < end_pfn) {
++		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
++				page_size_mask & (1<<PG_LEVEL_2M));
++		pos = end_pfn << PAGE_SHIFT;
++	}
++#endif
++
++	/* tail is not big page (2M) alignment */
++	start_pfn = pos>>PAGE_SHIFT;
++	end_pfn = end>>PAGE_SHIFT;
++	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
++
++	/* try to merge same page size and continuous */
++	for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
++		unsigned long old_start;
++		if (mr[i].end != mr[i+1].start ||
++		    mr[i].page_size_mask != mr[i+1].page_size_mask)
++			continue;
++		/* move it */
++		old_start = mr[i].start;
++		memmove(&mr[i], &mr[i+1],
++			(nr_range - 1 - i) * sizeof(struct map_range));
++		mr[i--].start = old_start;
++		nr_range--;
++	}
++
++	for (i = 0; i < nr_range; i++)
++		printk(KERN_DEBUG " %010lx - %010lx page %s\n",
++				mr[i].start, mr[i].end,
++			(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
++			 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
++
++	/*
++	 * Find space for the kernel direct mapping tables.
++	 *
++	 * Later we should allocate these tables in the local node of the
++	 * memory mapped. Unfortunately this is done currently before the
++	 * nodes are discovered.
++	 */
++	if (bootmem_state == BEFORE_BOOTMEM)
++		find_early_table_space(end, use_pse, use_gbpages);
++
++#ifdef CONFIG_X86_32
++	for (i = 0; i < nr_range; i++)
++		kernel_physical_mapping_init(mr[i].start, mr[i].end,
++					     mr[i].page_size_mask);
++	ret = end;
++#else /* CONFIG_X86_64 */
++	for (i = 0; i < nr_range; i++)
++		ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
++						   mr[i].page_size_mask);
++#endif
++
++#ifdef CONFIG_X86_32
++	early_ioremap_page_table_range_init();
++
++	load_cr3(swapper_pg_dir);
++#endif
++
++#ifdef CONFIG_X86_64
++	if (bootmem_state == BEFORE_BOOTMEM)
++		mmu_cr4_features = read_cr4();
++#endif
++	__flush_tlb_all();
++
++	if (bootmem_state == BEFORE_BOOTMEM &&
++	    e820_table_end > e820_table_start)
++		reserve_early(e820_table_start << PAGE_SHIFT,
++				 e820_table_end << PAGE_SHIFT, "PGTABLE");
++
++	if (bootmem_state == BEFORE_BOOTMEM)
++		early_memtest(start, end);
++
++	return ret >> PAGE_SHIFT;
++}
++
++
++/*
++ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
++ * is valid. The argument is a physical page number.
++ *
++ *
++ * On x86, access has to be given to the first megabyte of ram because that area
++ * contains bios code and data regions used by X and dosemu and similar apps.
++ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
++ * mmio resources as well as potential bios/acpi data regions.
++ */
++int devmem_is_allowed(unsigned long pagenr)
++{
++	if (pagenr <= 256)
++		return 1;
++	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
++		return 0;
++	if (!page_is_ram(pagenr))
++		return 1;
++	return 0;
++}
++
++void free_init_pages(char *what, unsigned long begin, unsigned long end)
++{
++	unsigned long addr = begin;
++
++	if (addr >= end)
++		return;
++
++	/*
++	 * If debugging page accesses then do not free this memory but
++	 * mark them not present - any buggy init-section access will
++	 * create a kernel page fault:
++	 */
++#ifdef CONFIG_DEBUG_PAGEALLOC
++	printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
++		begin, PAGE_ALIGN(end));
++	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
++#else
++	/*
++	 * We just marked the kernel text read only above, now that
++	 * we are going to free part of that, we need to make that
++	 * writeable first.
++	 */
++	set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
++
++	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
++
++	for (; addr < end; addr += PAGE_SIZE) {
++		ClearPageReserved(virt_to_page(addr));
++		init_page_count(virt_to_page(addr));
++		memset((void *)(addr & ~(PAGE_SIZE-1)),
++			POISON_FREE_INITMEM, PAGE_SIZE);
++		free_page(addr);
++		totalram_pages++;
++	}
++#endif
++}
++
++void free_initmem(void)
++{
++	free_init_pages("unused kernel memory",
++			(unsigned long)(&__init_begin),
++			(unsigned long)(&__init_end));
++}
++
++#ifdef CONFIG_BLK_DEV_INITRD
++void free_initrd_mem(unsigned long start, unsigned long end)
++{
++	free_init_pages("initrd memory", start, end);
++}
++#endif
+Index: linux-2.6-tip/arch/x86/mm/init_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/init_32.c
++++ linux-2.6-tip/arch/x86/mm/init_32.c
+@@ -49,31 +49,23 @@
+ #include <asm/paravirt.h>
+ #include <asm/setup.h>
+ #include <asm/cacheflush.h>
+-#include <asm/smp.h>
+-
+-unsigned int __VMALLOC_RESERVE = 128 << 20;
++#include <asm/init.h>
+ 
+ unsigned long max_low_pfn_mapped;
+ unsigned long max_pfn_mapped;
+ 
+-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+ unsigned long highstart_pfn, highend_pfn;
+ 
+ static noinline int do_test_wp_bit(void);
+ 
+-
+-static unsigned long __initdata table_start;
+-static unsigned long __meminitdata table_end;
+-static unsigned long __meminitdata table_top;
+-
+-static int __initdata after_init_bootmem;
++bool __read_mostly __vmalloc_start_set = false;
+ 
+ static __init void *alloc_low_page(void)
+ {
+-	unsigned long pfn = table_end++;
++	unsigned long pfn = e820_table_end++;
+ 	void *adr;
+ 
+-	if (pfn >= table_top)
++	if (pfn >= e820_table_top)
+ 		panic("alloc_low_page: ran out of memory");
+ 
+ 	adr = __va(pfn * PAGE_SIZE);
+@@ -89,14 +81,20 @@ static __init void *alloc_low_page(void)
+ static pmd_t * __init one_md_table_init(pgd_t *pgd)
+ {
+ 	pud_t *pud;
+-	pmd_t *pmd_table;
++	pmd_t *pmd_table = NULL;
+ 
+ #ifdef CONFIG_X86_PAE
+ 	if (!(pgd_val(*pgd) & _PAGE_PRESENT)) {
+-		if (after_init_bootmem)
++		switch (bootmem_state) {
++		case DURING_BOOTMEM:
+ 			pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+-		else
++			break;
++		case BEFORE_BOOTMEM:
+ 			pmd_table = (pmd_t *)alloc_low_page();
++			break;
++		default:
++			panic("after bootmem call one_md_table_init\n");
++		}
+ 		paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
+ 		set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
+ 		pud = pud_offset(pgd, 0);
+@@ -120,15 +118,21 @@ static pte_t * __init one_page_table_ini
+ 	if (!(pmd_val(*pmd) & _PAGE_PRESENT)) {
+ 		pte_t *page_table = NULL;
+ 
+-		if (after_init_bootmem) {
+-#ifdef CONFIG_DEBUG_PAGEALLOC
++		switch (bootmem_state) {
++		case DURING_BOOTMEM:
++#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
+ 			page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
+ #endif
+ 			if (!page_table)
+ 				page_table =
+ 				(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+-		} else
++			break;
++		case BEFORE_BOOTMEM:
+ 			page_table = (pte_t *)alloc_low_page();
++			break;
++		default:
++			panic("after bootmem call one_page_table_init\n");
++		}
+ 
+ 		paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
+ 		set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
+@@ -138,6 +142,23 @@ static pte_t * __init one_page_table_ini
+ 	return pte_offset_kernel(pmd, 0);
+ }
+ 
++pmd_t * __init populate_extra_pmd(unsigned long vaddr)
++{
++	int pgd_idx = pgd_index(vaddr);
++	int pmd_idx = pmd_index(vaddr);
++
++	return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx;
++}
++
++pte_t * __init populate_extra_pte(unsigned long vaddr)
++{
++	int pte_idx = pte_index(vaddr);
++	pmd_t *pmd;
++
++	pmd = populate_extra_pmd(vaddr);
++	return one_page_table_init(pmd) + pte_idx;
++}
++
+ static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
+ 					   unsigned long vaddr, pte_t *lastpte)
+ {
+@@ -154,12 +175,12 @@ static pte_t *__init page_table_kmap_che
+ 	if (pmd_idx_kmap_begin != pmd_idx_kmap_end
+ 	    && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
+ 	    && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
+-	    && ((__pa(pte) >> PAGE_SHIFT) < table_start
+-		|| (__pa(pte) >> PAGE_SHIFT) >= table_end)) {
++	    && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
++		|| (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
+ 		pte_t *newpte;
+ 		int i;
+ 
+-		BUG_ON(after_init_bootmem);
++		BUG_ON(bootmem_state != BEFORE_BOOTMEM);
+ 		newpte = alloc_low_page();
+ 		for (i = 0; i < PTRS_PER_PTE; i++)
+ 			set_pte(newpte + i, pte[i]);
+@@ -228,11 +249,14 @@ static inline int is_kernel_text(unsigne
+  * of max_low_pfn pages, by creating page tables starting from address
+  * PAGE_OFFSET:
+  */
+-static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
+-						unsigned long start_pfn,
+-						unsigned long end_pfn,
+-						int use_pse)
++unsigned long __init
++kernel_physical_mapping_init(unsigned long start,
++			     unsigned long end,
++			     unsigned long page_size_mask)
+ {
++	int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
++	unsigned long start_pfn, end_pfn;
++	pgd_t *pgd_base = swapper_pg_dir;
+ 	int pgd_idx, pmd_idx, pte_ofs;
+ 	unsigned long pfn;
+ 	pgd_t *pgd;
+@@ -241,6 +265,9 @@ static void __init kernel_physical_mappi
+ 	unsigned pages_2m, pages_4k;
+ 	int mapping_iter;
+ 
++	start_pfn = start >> PAGE_SHIFT;
++	end_pfn = end >> PAGE_SHIFT;
++
+ 	/*
+ 	 * First iteration will setup identity mapping using large/small pages
+ 	 * based on use_pse, with other attributes same as set by
+@@ -355,26 +382,6 @@ repeat:
+ 		mapping_iter = 2;
+ 		goto repeat;
+ 	}
+-}
+-
+-/*
+- * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+- * is valid. The argument is a physical page number.
+- *
+- *
+- * On x86, access has to be given to the first megabyte of ram because that area
+- * contains bios code and data regions used by X and dosemu and similar apps.
+- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+- * mmio resources as well as potential bios/acpi data regions.
+- */
+-int devmem_is_allowed(unsigned long pagenr)
+-{
+-	if (pagenr <= 256)
+-		return 1;
+-	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+-		return 0;
+-	if (!page_is_ram(pagenr))
+-		return 1;
+ 	return 0;
+ }
+ 
+@@ -470,22 +477,10 @@ void __init add_highpages_with_active_re
+ 	work_with_active_regions(nid, add_highpages_work_fn, &data);
+ }
+ 
+-#ifndef CONFIG_NUMA
+-static void __init set_highmem_pages_init(void)
+-{
+-	add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
+-
+-	totalram_pages += totalhigh_pages;
+-}
+-#endif /* !CONFIG_NUMA */
+-
+ #else
+ static inline void permanent_kmaps_init(pgd_t *pgd_base)
+ {
+ }
+-static inline void set_highmem_pages_init(void)
+-{
+-}
+ #endif /* CONFIG_HIGHMEM */
+ 
+ void __init native_pagetable_setup_start(pgd_t *base)
+@@ -543,8 +538,9 @@ void __init native_pagetable_setup_done(
+  * be partially populated, and so it avoids stomping on any existing
+  * mappings.
+  */
+-static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
++void __init early_ioremap_page_table_range_init(void)
+ {
++	pgd_t *pgd_base = swapper_pg_dir;
+ 	unsigned long vaddr, end;
+ 
+ 	/*
+@@ -639,7 +635,7 @@ static int __init noexec_setup(char *str
+ }
+ early_param("noexec", noexec_setup);
+ 
+-static void __init set_nx(void)
++void __init set_nx(void)
+ {
+ 	unsigned int v[4], l, h;
+ 
+@@ -675,75 +671,97 @@ static int __init parse_highmem(char *ar
+ }
+ early_param("highmem", parse_highmem);
+ 
++#define MSG_HIGHMEM_TOO_BIG \
++	"highmem size (%luMB) is bigger than pages available (%luMB)!\n"
++
++#define MSG_LOWMEM_TOO_SMALL \
++	"highmem size (%luMB) results in <64MB lowmem, ignoring it!\n"
+ /*
+- * Determine low and high memory ranges:
++ * All of RAM fits into lowmem - but if user wants highmem
++ * artificially via the highmem=x boot parameter then create
++ * it:
+  */
+-void __init find_low_pfn_range(void)
++void __init lowmem_pfn_init(void)
+ {
+-	/* it could update max_pfn */
+-
+ 	/* max_low_pfn is 0, we already have early_res support */
+-
+ 	max_low_pfn = max_pfn;
+-	if (max_low_pfn > MAXMEM_PFN) {
+-		if (highmem_pages == -1)
+-			highmem_pages = max_pfn - MAXMEM_PFN;
+-		if (highmem_pages + MAXMEM_PFN < max_pfn)
+-			max_pfn = MAXMEM_PFN + highmem_pages;
+-		if (highmem_pages + MAXMEM_PFN > max_pfn) {
+-			printk(KERN_WARNING "only %luMB highmem pages "
+-				"available, ignoring highmem size of %uMB.\n",
+-				pages_to_mb(max_pfn - MAXMEM_PFN),
++
++	if (highmem_pages == -1)
++		highmem_pages = 0;
++#ifdef CONFIG_HIGHMEM
++	if (highmem_pages >= max_pfn) {
++		printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,
++			pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
++		highmem_pages = 0;
++	}
++	if (highmem_pages) {
++		if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {
++			printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,
+ 				pages_to_mb(highmem_pages));
+ 			highmem_pages = 0;
+ 		}
+-		max_low_pfn = MAXMEM_PFN;
++		max_low_pfn -= highmem_pages;
++	}
++#else
++	if (highmem_pages)
++		printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
++#endif
++}
++
++#define MSG_HIGHMEM_TOO_SMALL \
++	"only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
++
++#define MSG_HIGHMEM_TRIMMED \
++	"Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n"
++/*
++ * We have more RAM than fits into lowmem - we try to put it into
++ * highmem, also taking the highmem=x boot parameter into account:
++ */
++void __init highmem_pfn_init(void)
++{
++	max_low_pfn = MAXMEM_PFN;
++
++	if (highmem_pages == -1)
++		highmem_pages = max_pfn - MAXMEM_PFN;
++
++	if (highmem_pages + MAXMEM_PFN < max_pfn)
++		max_pfn = MAXMEM_PFN + highmem_pages;
++
++	if (highmem_pages + MAXMEM_PFN > max_pfn) {
++		printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,
++			pages_to_mb(max_pfn - MAXMEM_PFN),
++			pages_to_mb(highmem_pages));
++		highmem_pages = 0;
++	}
+ #ifndef CONFIG_HIGHMEM
+-		/* Maximum memory usable is what is directly addressable */
+-		printk(KERN_WARNING "Warning only %ldMB will be used.\n",
+-					MAXMEM>>20);
+-		if (max_pfn > MAX_NONPAE_PFN)
+-			printk(KERN_WARNING
+-				 "Use a HIGHMEM64G enabled kernel.\n");
+-		else
+-			printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
+-		max_pfn = MAXMEM_PFN;
++	/* Maximum memory usable is what is directly addressable */
++	printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
++	if (max_pfn > MAX_NONPAE_PFN)
++		printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
++	else
++		printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
++	max_pfn = MAXMEM_PFN;
+ #else /* !CONFIG_HIGHMEM */
+ #ifndef CONFIG_HIGHMEM64G
+-		if (max_pfn > MAX_NONPAE_PFN) {
+-			max_pfn = MAX_NONPAE_PFN;
+-			printk(KERN_WARNING "Warning only 4GB will be used."
+-				"Use a HIGHMEM64G enabled kernel.\n");
+-		}
++	if (max_pfn > MAX_NONPAE_PFN) {
++		max_pfn = MAX_NONPAE_PFN;
++		printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
++	}
+ #endif /* !CONFIG_HIGHMEM64G */
+ #endif /* !CONFIG_HIGHMEM */
+-	} else {
+-		if (highmem_pages == -1)
+-			highmem_pages = 0;
+-#ifdef CONFIG_HIGHMEM
+-		if (highmem_pages >= max_pfn) {
+-			printk(KERN_ERR "highmem size specified (%uMB) is "
+-				"bigger than pages available (%luMB)!.\n",
+-				pages_to_mb(highmem_pages),
+-				pages_to_mb(max_pfn));
+-			highmem_pages = 0;
+-		}
+-		if (highmem_pages) {
+-			if (max_low_pfn - highmem_pages <
+-			    64*1024*1024/PAGE_SIZE){
+-				printk(KERN_ERR "highmem size %uMB results in "
+-				"smaller than 64MB lowmem, ignoring it.\n"
+-					, pages_to_mb(highmem_pages));
+-				highmem_pages = 0;
+-			}
+-			max_low_pfn -= highmem_pages;
+-		}
+-#else
+-		if (highmem_pages)
+-			printk(KERN_ERR "ignoring highmem size on non-highmem"
+-					" kernel!\n");
+-#endif
+-	}
++}
++
++/*
++ * Determine low and high memory ranges:
++ */
++void __init find_low_pfn_range(void)
++{
++	/* it could update max_pfn */
++
++	if (max_pfn <= MAXMEM_PFN)
++		lowmem_pfn_init();
++	else
++		highmem_pfn_init();
+ }
+ 
+ #ifndef CONFIG_NEED_MULTIPLE_NODES
+@@ -769,6 +787,8 @@ void __init initmem_init(unsigned long s
+ #ifdef CONFIG_FLATMEM
+ 	max_mapnr = num_physpages;
+ #endif
++	__vmalloc_start_set = true;
++
+ 	printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
+ 			pages_to_mb(max_low_pfn));
+ 
+@@ -790,176 +810,64 @@ static void __init zone_sizes_init(void)
+ 	free_area_init_nodes(max_zone_pfns);
+ }
+ 
++static unsigned long __init setup_node_bootmem(int nodeid,
++				 unsigned long start_pfn,
++				 unsigned long end_pfn,
++				 unsigned long bootmap)
++{
++	unsigned long bootmap_size;
++
++	/* don't touch min_low_pfn */
++	bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
++					 bootmap >> PAGE_SHIFT,
++					 start_pfn, end_pfn);
++	printk(KERN_INFO "  node %d low ram: %08lx - %08lx\n",
++		nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
++	printk(KERN_INFO "  node %d bootmap %08lx - %08lx\n",
++		 nodeid, bootmap, bootmap + bootmap_size);
++	free_bootmem_with_active_regions(nodeid, end_pfn);
++	early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
++
++	return bootmap + bootmap_size;
++}
++
+ void __init setup_bootmem_allocator(void)
+ {
+-	int i;
++	int nodeid;
+ 	unsigned long bootmap_size, bootmap;
+ 	/*
+ 	 * Initialize the boot-time allocator (with low memory only):
+ 	 */
+ 	bootmap_size = bootmem_bootmap_pages(max_low_pfn)<<PAGE_SHIFT;
+-	bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
+-				 max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
++	bootmap = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, bootmap_size,
+ 				 PAGE_SIZE);
+ 	if (bootmap == -1L)
+ 		panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+ 	reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
+ 
+-	/* don't touch min_low_pfn */
+-	bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
+-					 min_low_pfn, max_low_pfn);
+ 	printk(KERN_INFO "  mapped low ram: 0 - %08lx\n",
+ 		 max_pfn_mapped<<PAGE_SHIFT);
+-	printk(KERN_INFO "  low ram: %08lx - %08lx\n",
+-		 min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
+-	printk(KERN_INFO "  bootmap %08lx - %08lx\n",
+-		 bootmap, bootmap + bootmap_size);
+-	for_each_online_node(i)
+-		free_bootmem_with_active_regions(i, max_low_pfn);
+-	early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
+-
+-	after_init_bootmem = 1;
+-}
+-
+-static void __init find_early_table_space(unsigned long end, int use_pse)
+-{
+-	unsigned long puds, pmds, ptes, tables, start;
+-
+-	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
+-	tables = PAGE_ALIGN(puds * sizeof(pud_t));
+-
+-	pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+-	tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
+-
+-	if (use_pse) {
+-		unsigned long extra;
++	printk(KERN_INFO "  low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
+ 
+-		extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+-		extra += PMD_SIZE;
+-		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+-	} else
+-		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
++	for_each_online_node(nodeid) {
++		 unsigned long start_pfn, end_pfn;
+ 
+-	tables += PAGE_ALIGN(ptes * sizeof(pte_t));
+-
+-	/* for fixmap */
+-	tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t));
+-
+-	/*
+-	 * RED-PEN putting page tables only on node 0 could
+-	 * cause a hotspot and fill up ZONE_DMA. The page tables
+-	 * need roughly 0.5KB per GB.
+-	 */
+-	start = 0x7000;
+-	table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT,
+-					tables, PAGE_SIZE);
+-	if (table_start == -1UL)
+-		panic("Cannot find space for the kernel page tables");
+-
+-	table_start >>= PAGE_SHIFT;
+-	table_end = table_start;
+-	table_top = table_start + (tables>>PAGE_SHIFT);
+-
+-	printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
+-		end, table_start << PAGE_SHIFT,
+-		(table_start << PAGE_SHIFT) + tables);
+-}
+-
+-unsigned long __init_refok init_memory_mapping(unsigned long start,
+-						unsigned long end)
+-{
+-	pgd_t *pgd_base = swapper_pg_dir;
+-	unsigned long start_pfn, end_pfn;
+-	unsigned long big_page_start;
+-#ifdef CONFIG_DEBUG_PAGEALLOC
+-	/*
+-	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+-	 * This will simplify cpa(), which otherwise needs to support splitting
+-	 * large pages into small in interrupt context, etc.
+-	 */
+-	int use_pse = 0;
++#ifdef CONFIG_NEED_MULTIPLE_NODES
++		start_pfn = node_start_pfn[nodeid];
++		end_pfn = node_end_pfn[nodeid];
++		if (start_pfn > max_low_pfn)
++			continue;
++		if (end_pfn > max_low_pfn)
++			end_pfn = max_low_pfn;
+ #else
+-	int use_pse = cpu_has_pse;
++		start_pfn = 0;
++		end_pfn = max_low_pfn;
+ #endif
+-
+-	/*
+-	 * Find space for the kernel direct mapping tables.
+-	 */
+-	if (!after_init_bootmem)
+-		find_early_table_space(end, use_pse);
+-
+-#ifdef CONFIG_X86_PAE
+-	set_nx();
+-	if (nx_enabled)
+-		printk(KERN_INFO "NX (Execute Disable) protection: active\n");
+-#endif
+-
+-	/* Enable PSE if available */
+-	if (cpu_has_pse)
+-		set_in_cr4(X86_CR4_PSE);
+-
+-	/* Enable PGE if available */
+-	if (cpu_has_pge) {
+-		set_in_cr4(X86_CR4_PGE);
+-		__supported_pte_mask |= _PAGE_GLOBAL;
++		bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
++						 bootmap);
+ 	}
+-
+-	/*
+-	 * Don't use a large page for the first 2/4MB of memory
+-	 * because there are often fixed size MTRRs in there
+-	 * and overlapping MTRRs into large pages can cause
+-	 * slowdowns.
+-	 */
+-	big_page_start = PMD_SIZE;
+-
+-	if (start < big_page_start) {
+-		start_pfn = start >> PAGE_SHIFT;
+-		end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
+-	} else {
+-		/* head is not big page alignment ? */
+-		start_pfn = start >> PAGE_SHIFT;
+-		end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
+-				 << (PMD_SHIFT - PAGE_SHIFT);
+-	}
+-	if (start_pfn < end_pfn)
+-		kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
+-
+-	/* big page range */
+-	start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
+-			 << (PMD_SHIFT - PAGE_SHIFT);
+-	if (start_pfn < (big_page_start >> PAGE_SHIFT))
+-		start_pfn =  big_page_start >> PAGE_SHIFT;
+-	end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+-	if (start_pfn < end_pfn)
+-		kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
+-					     use_pse);
+-
+-	/* tail is not big page alignment ? */
+-	start_pfn = end_pfn;
+-	if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
+-		end_pfn = end >> PAGE_SHIFT;
+-		if (start_pfn < end_pfn)
+-			kernel_physical_mapping_init(pgd_base, start_pfn,
+-							 end_pfn, 0);
+-	}
+-
+-	early_ioremap_page_table_range_init(pgd_base);
+-
+-	load_cr3(swapper_pg_dir);
+-
+-	__flush_tlb_all();
+-
+-	if (!after_init_bootmem)
+-		reserve_early(table_start << PAGE_SHIFT,
+-				 table_end << PAGE_SHIFT, "PGTABLE");
+-
+-	if (!after_init_bootmem)
+-		early_memtest(start, end);
+-
+-	return end >> PAGE_SHIFT;
+ }
+ 
+-
+ /*
+  * paging_init() sets up the page tables - note that the first 8MB are
+  * already mapped by head.S.
+@@ -1024,6 +932,8 @@ void __init mem_init(void)
+ 	/* this will put all low memory onto the freelists */
+ 	totalram_pages += free_all_bootmem();
+ 
++	bootmem_state = AFTER_BOOTMEM;
++
+ 	reservedpages = 0;
+ 	for (tmp = 0; tmp < max_low_pfn; tmp++)
+ 		/*
+@@ -1155,17 +1065,47 @@ static noinline int do_test_wp_bit(void)
+ const int rodata_test_data = 0xC3;
+ EXPORT_SYMBOL_GPL(rodata_test_data);
+ 
++static int kernel_set_to_readonly;
++
++void set_kernel_text_rw(void)
++{
++	unsigned long start = PFN_ALIGN(_text);
++	unsigned long size = PFN_ALIGN(_etext) - start;
++
++	if (!kernel_set_to_readonly)
++		return;
++
++	pr_debug("Set kernel text: %lx - %lx for read write\n",
++		 start, start+size);
++
++	set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
++}
++
++void set_kernel_text_ro(void)
++{
++	unsigned long start = PFN_ALIGN(_text);
++	unsigned long size = PFN_ALIGN(_etext) - start;
++
++	if (!kernel_set_to_readonly)
++		return;
++
++	pr_debug("Set kernel text: %lx - %lx for read only\n",
++		 start, start+size);
++
++	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
++}
++
+ void mark_rodata_ro(void)
+ {
+ 	unsigned long start = PFN_ALIGN(_text);
+ 	unsigned long size = PFN_ALIGN(_etext) - start;
+ 
+-#ifndef CONFIG_DYNAMIC_FTRACE
+-	/* Dynamic tracing modifies the kernel text section */
+ 	set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
+ 	printk(KERN_INFO "Write protecting the kernel text: %luk\n",
+ 		size >> 10);
+ 
++	kernel_set_to_readonly = 1;
++
+ #ifdef CONFIG_CPA_DEBUG
+ 	printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
+ 		start, start+size);
+@@ -1174,7 +1114,6 @@ void mark_rodata_ro(void)
+ 	printk(KERN_INFO "Testing CPA: write protecting again\n");
+ 	set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
+ #endif
+-#endif /* CONFIG_DYNAMIC_FTRACE */
+ 
+ 	start += size;
+ 	size = (unsigned long)__end_rodata - start;
+@@ -1193,52 +1132,6 @@ void mark_rodata_ro(void)
+ }
+ #endif
+ 
+-void free_init_pages(char *what, unsigned long begin, unsigned long end)
+-{
+-#ifdef CONFIG_DEBUG_PAGEALLOC
+-	/*
+-	 * If debugging page accesses then do not free this memory but
+-	 * mark them not present - any buggy init-section access will
+-	 * create a kernel page fault:
+-	 */
+-	printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
+-		begin, PAGE_ALIGN(end));
+-	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
+-#else
+-	unsigned long addr;
+-
+-	/*
+-	 * We just marked the kernel text read only above, now that
+-	 * we are going to free part of that, we need to make that
+-	 * writeable first.
+-	 */
+-	set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
+-
+-	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+-		ClearPageReserved(virt_to_page(addr));
+-		init_page_count(virt_to_page(addr));
+-		memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
+-		free_page(addr);
+-		totalram_pages++;
+-	}
+-	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
+-#endif
+-}
+-
+-void free_initmem(void)
+-{
+-	free_init_pages("unused kernel memory",
+-			(unsigned long)(&__init_begin),
+-			(unsigned long)(&__init_end));
+-}
+-
+-#ifdef CONFIG_BLK_DEV_INITRD
+-void free_initrd_mem(unsigned long start, unsigned long end)
+-{
+-	free_init_pages("initrd memory", start, end);
+-}
+-#endif
+-
+ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
+ 				   int flags)
+ {
+Index: linux-2.6-tip/arch/x86/mm/init_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/init_64.c
++++ linux-2.6-tip/arch/x86/mm/init_64.c
+@@ -48,6 +48,7 @@
+ #include <asm/kdebug.h>
+ #include <asm/numa.h>
+ #include <asm/cacheflush.h>
++#include <asm/init.h>
+ 
+ /*
+  * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
+@@ -59,14 +60,6 @@ unsigned long max_pfn_mapped;
+ 
+ static unsigned long dma_reserve __initdata;
+ 
+-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+-
+-int direct_gbpages
+-#ifdef CONFIG_DIRECT_GBPAGES
+-				= 1
+-#endif
+-;
+-
+ static int __init parse_direct_gbpages_off(char *arg)
+ {
+ 	direct_gbpages = 0;
+@@ -87,12 +80,10 @@ early_param("gbpages", parse_direct_gbpa
+  * around without checking the pgd every time.
+  */
+ 
+-int after_bootmem;
+-
+ pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP;
+ EXPORT_SYMBOL_GPL(__supported_pte_mask);
+ 
+-static int do_not_nx __cpuinitdata;
++static int disable_nx __cpuinitdata;
+ 
+ /*
+  * noexec=on|off
+@@ -107,9 +98,9 @@ static int __init nonx_setup(char *str)
+ 		return -EINVAL;
+ 	if (!strncmp(str, "on", 2)) {
+ 		__supported_pte_mask |= _PAGE_NX;
+-		do_not_nx = 0;
++		disable_nx = 0;
+ 	} else if (!strncmp(str, "off", 3)) {
+-		do_not_nx = 1;
++		disable_nx = 1;
+ 		__supported_pte_mask &= ~_PAGE_NX;
+ 	}
+ 	return 0;
+@@ -121,7 +112,7 @@ void __cpuinit check_efer(void)
+ 	unsigned long efer;
+ 
+ 	rdmsrl(MSR_EFER, efer);
+-	if (!(efer & EFER_NX) || do_not_nx)
++	if (!(efer & EFER_NX) || disable_nx)
+ 		__supported_pte_mask &= ~_PAGE_NX;
+ }
+ 
+@@ -147,20 +138,26 @@ __setup("noexec32=", nonx32_setup);
+ 
+ /*
+  * NOTE: This function is marked __ref because it calls __init function
+- * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
++ * (alloc_bootmem_pages). It's safe to do it ONLY when DURING_BOOTMEM.
+  */
+ static __ref void *spp_getpage(void)
+ {
+-	void *ptr;
++	void *ptr = NULL;
+ 
+-	if (after_bootmem)
+-		ptr = (void *) get_zeroed_page(GFP_ATOMIC);
+-	else
++	switch (bootmem_state) {
++	case AFTER_BOOTMEM:
++		ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
++		break;
++	case DURING_BOOTMEM:
+ 		ptr = alloc_bootmem_pages(PAGE_SIZE);
++		break;
++	default:
++		panic("calling spp_getpage before bootmem\n");
++	}
+ 
+ 	if (!ptr || ((unsigned long)ptr & ~PAGE_MASK)) {
+ 		panic("set_pte_phys: cannot allocate page data %s\n",
+-			after_bootmem ? "after bootmem" : "");
++			bootmem_state == AFTER_BOOTMEM ? "after bootmem" : "");
+ 	}
+ 
+ 	pr_debug("spp_getpage %p\n", ptr);
+@@ -168,34 +165,51 @@ static __ref void *spp_getpage(void)
+ 	return ptr;
+ }
+ 
+-void
+-set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
++static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
+ {
+-	pud_t *pud;
+-	pmd_t *pmd;
+-	pte_t *pte;
++	if (pgd_none(*pgd)) {
++		pud_t *pud = (pud_t *)spp_getpage();
++		pgd_populate(&init_mm, pgd, pud);
++		if (pud != pud_offset(pgd, 0))
++			printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
++			       pud, pud_offset(pgd, 0));
++	}
++	return pud_offset(pgd, vaddr);
++}
+ 
+-	pud = pud_page + pud_index(vaddr);
++static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
++{
+ 	if (pud_none(*pud)) {
+-		pmd = (pmd_t *) spp_getpage();
++		pmd_t *pmd = (pmd_t *) spp_getpage();
+ 		pud_populate(&init_mm, pud, pmd);
+-		if (pmd != pmd_offset(pud, 0)) {
++		if (pmd != pmd_offset(pud, 0))
+ 			printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
+-				pmd, pmd_offset(pud, 0));
+-			return;
+-		}
++			       pmd, pmd_offset(pud, 0));
+ 	}
+-	pmd = pmd_offset(pud, vaddr);
++	return pmd_offset(pud, vaddr);
++}
++
++static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
++{
+ 	if (pmd_none(*pmd)) {
+-		pte = (pte_t *) spp_getpage();
++		pte_t *pte = (pte_t *) spp_getpage();
+ 		pmd_populate_kernel(&init_mm, pmd, pte);
+-		if (pte != pte_offset_kernel(pmd, 0)) {
++		if (pte != pte_offset_kernel(pmd, 0))
+ 			printk(KERN_ERR "PAGETABLE BUG #02!\n");
+-			return;
+-		}
+ 	}
++	return pte_offset_kernel(pmd, vaddr);
++}
++
++void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
++{
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++
++	pud = pud_page + pud_index(vaddr);
++	pmd = fill_pmd(pud, vaddr);
++	pte = fill_pte(pmd, vaddr);
+ 
+-	pte = pte_offset_kernel(pmd, vaddr);
+ 	set_pte(pte, new_pte);
+ 
+ 	/*
+@@ -205,8 +219,7 @@ set_pte_vaddr_pud(pud_t *pud_page, unsig
+ 	__flush_tlb_one(vaddr);
+ }
+ 
+-void
+-set_pte_vaddr(unsigned long vaddr, pte_t pteval)
++void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
+ {
+ 	pgd_t *pgd;
+ 	pud_t *pud_page;
+@@ -223,6 +236,24 @@ set_pte_vaddr(unsigned long vaddr, pte_t
+ 	set_pte_vaddr_pud(pud_page, vaddr, pteval);
+ }
+ 
++pmd_t * __init populate_extra_pmd(unsigned long vaddr)
++{
++	pgd_t *pgd;
++	pud_t *pud;
++
++	pgd = pgd_offset_k(vaddr);
++	pud = fill_pud(pgd, vaddr);
++	return fill_pmd(pud, vaddr);
++}
++
++pte_t * __init populate_extra_pte(unsigned long vaddr)
++{
++	pmd_t *pmd;
++
++	pmd = populate_extra_pmd(vaddr);
++	return fill_pte(pmd, vaddr);
++}
++
+ /*
+  * Create large page table mappings for a range of physical addresses.
+  */
+@@ -291,23 +322,20 @@ void __init cleanup_highmap(void)
+ 	}
+ }
+ 
+-static unsigned long __initdata table_start;
+-static unsigned long __meminitdata table_end;
+-static unsigned long __meminitdata table_top;
+-
+ static __ref void *alloc_low_page(unsigned long *phys)
+ {
+-	unsigned long pfn = table_end++;
++	unsigned long pfn;
+ 	void *adr;
+ 
+-	if (after_bootmem) {
+-		adr = (void *)get_zeroed_page(GFP_ATOMIC);
++	if (bootmem_state == AFTER_BOOTMEM) {
++		adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
+ 		*phys = __pa(adr);
+ 
+ 		return adr;
+ 	}
+ 
+-	if (pfn >= table_top)
++	pfn = e820_table_end++;
++	if (pfn >= e820_table_top)
+ 		panic("alloc_low_page: ran out of memory");
+ 
+ 	adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
+@@ -318,7 +346,7 @@ static __ref void *alloc_low_page(unsign
+ 
+ static __ref void unmap_low_page(void *adr)
+ {
+-	if (after_bootmem)
++	if (bootmem_state == AFTER_BOOTMEM)
+ 		return;
+ 
+ 	early_iounmap(adr, PAGE_SIZE);
+@@ -337,7 +365,7 @@ phys_pte_init(pte_t *pte_page, unsigned 
+ 	for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
+ 
+ 		if (addr >= end) {
+-			if (!after_bootmem) {
++			if (bootmem_state != AFTER_BOOTMEM) {
+ 				for(; i < PTRS_PER_PTE; i++, pte++)
+ 					set_pte(pte, __pte(0));
+ 			}
+@@ -393,7 +421,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned 
+ 		pgprot_t new_prot = prot;
+ 
+ 		if (address >= end) {
+-			if (!after_bootmem) {
++			if (bootmem_state != AFTER_BOOTMEM) {
+ 				for (; i < PTRS_PER_PMD; i++, pmd++)
+ 					set_pmd(pmd, __pmd(0));
+ 			}
+@@ -479,7 +507,7 @@ phys_pud_init(pud_t *pud_page, unsigned 
+ 		if (addr >= end)
+ 			break;
+ 
+-		if (!after_bootmem &&
++		if (bootmem_state != AFTER_BOOTMEM &&
+ 				!e820_any_mapped(addr, addr+PUD_SIZE, 0)) {
+ 			set_pud(pud, __pud(0));
+ 			continue;
+@@ -547,58 +575,10 @@ phys_pud_update(pgd_t *pgd, unsigned lon
+ 	return phys_pud_init(pud, addr, end, page_size_mask);
+ }
+ 
+-static void __init find_early_table_space(unsigned long end, int use_pse,
+-					  int use_gbpages)
+-{
+-	unsigned long puds, pmds, ptes, tables, start;
+-
+-	puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
+-	tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+-	if (use_gbpages) {
+-		unsigned long extra;
+-		extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
+-		pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
+-	} else
+-		pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+-	tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
+-
+-	if (use_pse) {
+-		unsigned long extra;
+-		extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
+-		ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
+-	} else
+-		ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+-	tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
+-
+-	/*
+-	 * RED-PEN putting page tables only on node 0 could
+-	 * cause a hotspot and fill up ZONE_DMA. The page tables
+-	 * need roughly 0.5KB per GB.
+-	 */
+-	start = 0x8000;
+-	table_start = find_e820_area(start, end, tables, PAGE_SIZE);
+-	if (table_start == -1UL)
+-		panic("Cannot find space for the kernel page tables");
+-
+-	table_start >>= PAGE_SHIFT;
+-	table_end = table_start;
+-	table_top = table_start + (tables >> PAGE_SHIFT);
+-
+-	printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
+-		end, table_start << PAGE_SHIFT, table_top << PAGE_SHIFT);
+-}
+-
+-static void __init init_gbpages(void)
+-{
+-	if (direct_gbpages && cpu_has_gbpages)
+-		printk(KERN_INFO "Using GB pages for direct mapping\n");
+-	else
+-		direct_gbpages = 0;
+-}
+-
+-static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
+-						unsigned long end,
+-						unsigned long page_size_mask)
++unsigned long __init
++kernel_physical_mapping_init(unsigned long start,
++			     unsigned long end,
++			     unsigned long page_size_mask)
+ {
+ 
+ 	unsigned long next, last_map_addr = end;
+@@ -635,176 +615,6 @@ static unsigned long __meminit kernel_ph
+ 	return last_map_addr;
+ }
+ 
+-struct map_range {
+-	unsigned long start;
+-	unsigned long end;
+-	unsigned page_size_mask;
+-};
+-
+-#define NR_RANGE_MR 5
+-
+-static int save_mr(struct map_range *mr, int nr_range,
+-		   unsigned long start_pfn, unsigned long end_pfn,
+-		   unsigned long page_size_mask)
+-{
+-
+-	if (start_pfn < end_pfn) {
+-		if (nr_range >= NR_RANGE_MR)
+-			panic("run out of range for init_memory_mapping\n");
+-		mr[nr_range].start = start_pfn<<PAGE_SHIFT;
+-		mr[nr_range].end   = end_pfn<<PAGE_SHIFT;
+-		mr[nr_range].page_size_mask = page_size_mask;
+-		nr_range++;
+-	}
+-
+-	return nr_range;
+-}
+-
+-/*
+- * Setup the direct mapping of the physical memory at PAGE_OFFSET.
+- * This runs before bootmem is initialized and gets pages directly from
+- * the physical memory. To access them they are temporarily mapped.
+- */
+-unsigned long __init_refok init_memory_mapping(unsigned long start,
+-					       unsigned long end)
+-{
+-	unsigned long last_map_addr = 0;
+-	unsigned long page_size_mask = 0;
+-	unsigned long start_pfn, end_pfn;
+-	unsigned long pos;
+-
+-	struct map_range mr[NR_RANGE_MR];
+-	int nr_range, i;
+-	int use_pse, use_gbpages;
+-
+-	printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
+-
+-	/*
+-	 * Find space for the kernel direct mapping tables.
+-	 *
+-	 * Later we should allocate these tables in the local node of the
+-	 * memory mapped. Unfortunately this is done currently before the
+-	 * nodes are discovered.
+-	 */
+-	if (!after_bootmem)
+-		init_gbpages();
+-
+-#ifdef CONFIG_DEBUG_PAGEALLOC
+-	/*
+-	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
+-	 * This will simplify cpa(), which otherwise needs to support splitting
+-	 * large pages into small in interrupt context, etc.
+-	 */
+-	use_pse = use_gbpages = 0;
+-#else
+-	use_pse = cpu_has_pse;
+-	use_gbpages = direct_gbpages;
+-#endif
+-
+-	if (use_gbpages)
+-		page_size_mask |= 1 << PG_LEVEL_1G;
+-	if (use_pse)
+-		page_size_mask |= 1 << PG_LEVEL_2M;
+-
+-	memset(mr, 0, sizeof(mr));
+-	nr_range = 0;
+-
+-	/* head if not big page alignment ?*/
+-	start_pfn = start >> PAGE_SHIFT;
+-	pos = start_pfn << PAGE_SHIFT;
+-	end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
+-			<< (PMD_SHIFT - PAGE_SHIFT);
+-	if (end_pfn > (end >> PAGE_SHIFT))
+-		end_pfn = end >> PAGE_SHIFT;
+-	if (start_pfn < end_pfn) {
+-		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+-		pos = end_pfn << PAGE_SHIFT;
+-	}
+-
+-	/* big page (2M) range*/
+-	start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+-			 << (PMD_SHIFT - PAGE_SHIFT);
+-	end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
+-			 << (PUD_SHIFT - PAGE_SHIFT);
+-	if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
+-		end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
+-	if (start_pfn < end_pfn) {
+-		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+-				page_size_mask & (1<<PG_LEVEL_2M));
+-		pos = end_pfn << PAGE_SHIFT;
+-	}
+-
+-	/* big page (1G) range */
+-	start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
+-			 << (PUD_SHIFT - PAGE_SHIFT);
+-	end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
+-	if (start_pfn < end_pfn) {
+-		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+-				page_size_mask &
+-				 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
+-		pos = end_pfn << PAGE_SHIFT;
+-	}
+-
+-	/* tail is not big page (1G) alignment */
+-	start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
+-			 << (PMD_SHIFT - PAGE_SHIFT);
+-	end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
+-	if (start_pfn < end_pfn) {
+-		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
+-				page_size_mask & (1<<PG_LEVEL_2M));
+-		pos = end_pfn << PAGE_SHIFT;
+-	}
+-
+-	/* tail is not big page (2M) alignment */
+-	start_pfn = pos>>PAGE_SHIFT;
+-	end_pfn = end>>PAGE_SHIFT;
+-	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
+-
+-	/* try to merge same page size and continuous */
+-	for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
+-		unsigned long old_start;
+-		if (mr[i].end != mr[i+1].start ||
+-		    mr[i].page_size_mask != mr[i+1].page_size_mask)
+-			continue;
+-		/* move it */
+-		old_start = mr[i].start;
+-		memmove(&mr[i], &mr[i+1],
+-			 (nr_range - 1 - i) * sizeof (struct map_range));
+-		mr[i--].start = old_start;
+-		nr_range--;
+-	}
+-
+-	for (i = 0; i < nr_range; i++)
+-		printk(KERN_DEBUG " %010lx - %010lx page %s\n",
+-				mr[i].start, mr[i].end,
+-			(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
+-			 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
+-
+-	if (!after_bootmem)
+-		find_early_table_space(end, use_pse, use_gbpages);
+-
+-	for (i = 0; i < nr_range; i++)
+-		last_map_addr = kernel_physical_mapping_init(
+-					mr[i].start, mr[i].end,
+-					mr[i].page_size_mask);
+-
+-	if (!after_bootmem)
+-		mmu_cr4_features = read_cr4();
+-	__flush_tlb_all();
+-
+-	if (!after_bootmem && table_end > table_start)
+-		reserve_early(table_start << PAGE_SHIFT,
+-				 table_end << PAGE_SHIFT, "PGTABLE");
+-
+-	printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
+-			 last_map_addr, end);
+-
+-	if (!after_bootmem)
+-		early_memtest(start, end);
+-
+-	return last_map_addr >> PAGE_SHIFT;
+-}
+-
+ #ifndef CONFIG_NUMA
+ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+ {
+@@ -876,28 +686,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
+ 
+ #endif /* CONFIG_MEMORY_HOTPLUG */
+ 
+-/*
+- * devmem_is_allowed() checks to see if /dev/mem access to a certain address
+- * is valid. The argument is a physical page number.
+- *
+- *
+- * On x86, access has to be given to the first megabyte of ram because that area
+- * contains bios code and data regions used by X and dosemu and similar apps.
+- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
+- * mmio resources as well as potential bios/acpi data regions.
+- */
+-int devmem_is_allowed(unsigned long pagenr)
+-{
+-	if (pagenr <= 256)
+-		return 1;
+-	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
+-		return 0;
+-	if (!page_is_ram(pagenr))
+-		return 1;
+-	return 0;
+-}
+-
+-
+ static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
+ 			 kcore_modules, kcore_vsyscall;
+ 
+@@ -910,8 +698,6 @@ void __init mem_init(void)
+ 
+ 	/* clear_bss() already clear the empty_zero_page */
+ 
+-	reservedpages = 0;
+-
+ 	/* this will put all low memory onto the freelists */
+ #ifdef CONFIG_NUMA
+ 	totalram_pages = numa_free_all_bootmem();
+@@ -919,9 +705,9 @@ void __init mem_init(void)
+ 	totalram_pages = free_all_bootmem();
+ #endif
+ 
++	bootmem_state = AFTER_BOOTMEM;
+ 	absent_pages = absent_pages_in_range(0, max_pfn);
+ 	reservedpages = max_pfn - totalram_pages - absent_pages;
+-	after_bootmem = 1;
+ 
+ 	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+ 	datasize =  (unsigned long) &_edata - (unsigned long) &_etext;
+@@ -947,46 +733,39 @@ void __init mem_init(void)
+ 		initsize >> 10);
+ }
+ 
+-void free_init_pages(char *what, unsigned long begin, unsigned long end)
++#ifdef CONFIG_DEBUG_RODATA
++const int rodata_test_data = 0xC3;
++EXPORT_SYMBOL_GPL(rodata_test_data);
++
++static int kernel_set_to_readonly;
++
++void set_kernel_text_rw(void)
+ {
+-	unsigned long addr = begin;
++	unsigned long start = PFN_ALIGN(_stext);
++	unsigned long end = PFN_ALIGN(__start_rodata);
+ 
+-	if (addr >= end)
++	if (!kernel_set_to_readonly)
+ 		return;
+ 
+-	/*
+-	 * If debugging page accesses then do not free this memory but
+-	 * mark them not present - any buggy init-section access will
+-	 * create a kernel page fault:
+-	 */
+-#ifdef CONFIG_DEBUG_PAGEALLOC
+-	printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
+-		begin, PAGE_ALIGN(end));
+-	set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
+-#else
+-	printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
++	pr_debug("Set kernel text: %lx - %lx for read write\n",
++		 start, end);
+ 
+-	for (; addr < end; addr += PAGE_SIZE) {
+-		ClearPageReserved(virt_to_page(addr));
+-		init_page_count(virt_to_page(addr));
+-		memset((void *)(addr & ~(PAGE_SIZE-1)),
+-			POISON_FREE_INITMEM, PAGE_SIZE);
+-		free_page(addr);
+-		totalram_pages++;
+-	}
+-#endif
++	set_memory_rw(start, (end - start) >> PAGE_SHIFT);
+ }
+ 
+-void free_initmem(void)
++void set_kernel_text_ro(void)
+ {
+-	free_init_pages("unused kernel memory",
+-			(unsigned long)(&__init_begin),
+-			(unsigned long)(&__init_end));
+-}
++	unsigned long start = PFN_ALIGN(_stext);
++	unsigned long end = PFN_ALIGN(__start_rodata);
+ 
+-#ifdef CONFIG_DEBUG_RODATA
+-const int rodata_test_data = 0xC3;
+-EXPORT_SYMBOL_GPL(rodata_test_data);
++	if (!kernel_set_to_readonly)
++		return;
++
++	pr_debug("Set kernel text: %lx - %lx for read only\n",
++		 start, end);
++
++	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
++}
+ 
+ void mark_rodata_ro(void)
+ {
+@@ -994,15 +773,12 @@ void mark_rodata_ro(void)
+ 	unsigned long rodata_start =
+ 		((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+ 
+-#ifdef CONFIG_DYNAMIC_FTRACE
+-	/* Dynamic tracing modifies the kernel text section */
+-	start = rodata_start;
+-#endif
+-
+ 	printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
+ 	       (end - start) >> 10);
+ 	set_memory_ro(start, (end - start) >> PAGE_SHIFT);
+ 
++	kernel_set_to_readonly = 1;
++
+ 	/*
+ 	 * The rodata section (but not the kernel text!) should also be
+ 	 * not-executable.
+@@ -1022,13 +798,6 @@ void mark_rodata_ro(void)
+ 
+ #endif
+ 
+-#ifdef CONFIG_BLK_DEV_INITRD
+-void free_initrd_mem(unsigned long start, unsigned long end)
+-{
+-	free_init_pages("initrd memory", start, end);
+-}
+-#endif
+-
+ int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
+ 				   int flags)
+ {
+Index: linux-2.6-tip/arch/x86/mm/iomap_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/iomap_32.c
++++ linux-2.6-tip/arch/x86/mm/iomap_32.c
+@@ -31,16 +31,28 @@ int is_io_mapping_possible(resource_size
+ }
+ EXPORT_SYMBOL_GPL(is_io_mapping_possible);
+ 
+-/* Map 'pfn' using fixed map 'type' and protections 'prot'
+- */
+-void *
+-iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
++void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+ {
+ 	enum fixed_addresses idx;
+ 	unsigned long vaddr;
+ 
++	preempt_disable();
+ 	pagefault_disable();
+ 
++	idx = type + KM_TYPE_NR * smp_processor_id();
++	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
++	set_pte(kmap_pte - idx, pfn_pte(pfn, prot));
++	arch_flush_lazy_mmu_mode();
++
++	return (void *)vaddr;
++}
++
++/*
++ * Map 'pfn' using fixed map 'type' and protections 'prot'
++ */
++void *
++iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
++{
+ 	/*
+ 	 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
+ 	 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
+@@ -50,12 +62,7 @@ iomap_atomic_prot_pfn(unsigned long pfn,
+ 	if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
+ 		prot = PAGE_KERNEL_UC_MINUS;
+ 
+-	idx = type + KM_TYPE_NR*smp_processor_id();
+-	vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
+-	set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
+-	arch_flush_lazy_mmu_mode();
+-
+-	return (void*) vaddr;
++	return kmap_atomic_prot_pfn(pfn, type, prot);
+ }
+ EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
+ 
+@@ -76,5 +83,6 @@ iounmap_atomic(void *kvaddr, enum km_typ
+ 
+ 	arch_flush_lazy_mmu_mode();
+ 	pagefault_enable();
++	preempt_enable();
+ }
+ EXPORT_SYMBOL_GPL(iounmap_atomic);
+Index: linux-2.6-tip/arch/x86/mm/ioremap.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/ioremap.c
++++ linux-2.6-tip/arch/x86/mm/ioremap.c
+@@ -22,13 +22,17 @@
+ #include <asm/pgalloc.h>
+ #include <asm/pat.h>
+ 
+-#ifdef CONFIG_X86_64
+-
+-static inline int phys_addr_valid(unsigned long addr)
++static inline int phys_addr_valid(resource_size_t addr)
+ {
+-	return addr < (1UL << boot_cpu_data.x86_phys_bits);
++#ifdef CONFIG_PHYS_ADDR_T_64BIT
++	return !(addr >> boot_cpu_data.x86_phys_bits);
++#else
++	return 1;
++#endif
+ }
+ 
++#ifdef CONFIG_X86_64
++
+ unsigned long __phys_addr(unsigned long x)
+ {
+ 	if (x >= __START_KERNEL_map) {
+@@ -38,8 +42,7 @@ unsigned long __phys_addr(unsigned long 
+ 	} else {
+ 		VIRTUAL_BUG_ON(x < PAGE_OFFSET);
+ 		x -= PAGE_OFFSET;
+-		VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM :
+-					!phys_addr_valid(x));
++		VIRTUAL_BUG_ON(!phys_addr_valid(x));
+ 	}
+ 	return x;
+ }
+@@ -56,10 +59,8 @@ bool __virt_addr_valid(unsigned long x)
+ 		if (x < PAGE_OFFSET)
+ 			return false;
+ 		x -= PAGE_OFFSET;
+-		if (system_state == SYSTEM_BOOTING ?
+-				x > MAXMEM : !phys_addr_valid(x)) {
++		if (!phys_addr_valid(x))
+ 			return false;
+-		}
+ 	}
+ 
+ 	return pfn_valid(x >> PAGE_SHIFT);
+@@ -68,18 +69,12 @@ EXPORT_SYMBOL(__virt_addr_valid);
+ 
+ #else
+ 
+-static inline int phys_addr_valid(unsigned long addr)
+-{
+-	return 1;
+-}
+-
+ #ifdef CONFIG_DEBUG_VIRTUAL
+ unsigned long __phys_addr(unsigned long x)
+ {
+-	/* VMALLOC_* aren't constants; not available at the boot time */
++	/* VMALLOC_* aren't constants  */
+ 	VIRTUAL_BUG_ON(x < PAGE_OFFSET);
+-	VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING &&
+-		is_vmalloc_addr((void *) x));
++	VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
+ 	return x - PAGE_OFFSET;
+ }
+ EXPORT_SYMBOL(__phys_addr);
+@@ -89,7 +84,9 @@ bool __virt_addr_valid(unsigned long x)
+ {
+ 	if (x < PAGE_OFFSET)
+ 		return false;
+-	if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x))
++	if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
++		return false;
++	if (x >= FIXADDR_START)
+ 		return false;
+ 	return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
+ }
+@@ -348,7 +345,7 @@ EXPORT_SYMBOL(ioremap_nocache);
+  *
+  * Must be freed with iounmap.
+  */
+-void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
++void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
+ {
+ 	if (pat_enabled)
+ 		return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
+@@ -508,13 +505,19 @@ static inline pte_t * __init early_iorem
+ 	return &bm_pte[pte_index(addr)];
+ }
+ 
++static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
++
+ void __init early_ioremap_init(void)
+ {
+ 	pmd_t *pmd;
++	int i;
+ 
+ 	if (early_ioremap_debug)
+ 		printk(KERN_INFO "early_ioremap_init()\n");
+ 
++	for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
++		slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
++
+ 	pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
+ 	memset(bm_pte, 0, sizeof(bm_pte));
+ 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
+@@ -544,7 +547,7 @@ void __init early_ioremap_reset(void)
+ }
+ 
+ static void __init __early_set_fixmap(enum fixed_addresses idx,
+-				   unsigned long phys, pgprot_t flags)
++				      phys_addr_t phys, pgprot_t flags)
+ {
+ 	unsigned long addr = __fix_to_virt(idx);
+ 	pte_t *pte;
+@@ -563,7 +566,7 @@ static void __init __early_set_fixmap(en
+ }
+ 
+ static inline void __init early_set_fixmap(enum fixed_addresses idx,
+-					   unsigned long phys, pgprot_t prot)
++					   phys_addr_t phys, pgprot_t prot)
+ {
+ 	if (after_paging_init)
+ 		__set_fixmap(idx, phys, prot);
+@@ -581,6 +584,7 @@ static inline void __init early_clear_fi
+ 
+ static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
+ static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
++
+ static int __init check_early_ioremap_leak(void)
+ {
+ 	int count = 0;
+@@ -602,9 +606,11 @@ static int __init check_early_ioremap_le
+ }
+ late_initcall(check_early_ioremap_leak);
+ 
+-static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
++static void __init __iomem *
++__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
+ {
+-	unsigned long offset, last_addr;
++	unsigned long offset;
++	resource_size_t last_addr;
+ 	unsigned int nrpages;
+ 	enum fixed_addresses idx0, idx;
+ 	int i, slot;
+@@ -620,15 +626,15 @@ static void __init __iomem *__early_iore
+ 	}
+ 
+ 	if (slot < 0) {
+-		printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n",
+-			 phys_addr, size);
++		printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n",
++			 (u64)phys_addr, size);
+ 		WARN_ON(1);
+ 		return NULL;
+ 	}
+ 
+ 	if (early_ioremap_debug) {
+-		printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
+-		       phys_addr, size, slot);
++		printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ",
++		       (u64)phys_addr, size, slot);
+ 		dump_stack();
+ 	}
+ 
+@@ -668,20 +674,22 @@ static void __init __iomem *__early_iore
+ 		--nrpages;
+ 	}
+ 	if (early_ioremap_debug)
+-		printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
++		printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]);
+ 
+-	prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0));
++	prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
+ 	return prev_map[slot];
+ }
+ 
+ /* Remap an IO device */
+-void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size)
++void __init __iomem *
++early_ioremap(resource_size_t phys_addr, unsigned long size)
+ {
+ 	return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
+ }
+ 
+ /* Remap memory */
+-void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size)
++void __init __iomem *
++early_memremap(resource_size_t phys_addr, unsigned long size)
+ {
+ 	return __early_ioremap(phys_addr, size, PAGE_KERNEL);
+ }
+@@ -738,8 +746,3 @@ void __init early_iounmap(void __iomem *
+ 	}
+ 	prev_map[slot] = NULL;
+ }
+-
+-void __this_fixmap_does_not_exist(void)
+-{
+-	WARN_ON(1);
+-}
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/Makefile
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/Makefile
+@@ -0,0 +1 @@
++obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/error.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/error.c
+@@ -0,0 +1,228 @@
++#include <linux/interrupt.h>
++#include <linux/kdebug.h>
++#include <linux/kmemcheck.h>
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <linux/ptrace.h>
++#include <linux/stacktrace.h>
++#include <linux/string.h>
++
++#include "error.h"
++#include "shadow.h"
++
++enum kmemcheck_error_type {
++	KMEMCHECK_ERROR_INVALID_ACCESS,
++	KMEMCHECK_ERROR_BUG,
++};
++
++#define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT)
++
++struct kmemcheck_error {
++	enum kmemcheck_error_type type;
++
++	union {
++		/* KMEMCHECK_ERROR_INVALID_ACCESS */
++		struct {
++			/* Kind of access that caused the error */
++			enum kmemcheck_shadow state;
++			/* Address and size of the erroneous read */
++			unsigned long	address;
++			unsigned int	size;
++		};
++	};
++
++	struct pt_regs		regs;
++	struct stack_trace	trace;
++	unsigned long		trace_entries[32];
++
++	/* We compress it to a char. */
++	unsigned char		shadow_copy[SHADOW_COPY_SIZE];
++	unsigned char		memory_copy[SHADOW_COPY_SIZE];
++};
++
++/*
++ * Create a ring queue of errors to output. We can't call printk() directly
++ * from the kmemcheck traps, since this may call the console drivers and
++ * result in a recursive fault.
++ */
++static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE];
++static unsigned int error_count;
++static unsigned int error_rd;
++static unsigned int error_wr;
++static unsigned int error_missed_count;
++
++static struct kmemcheck_error *error_next_wr(void)
++{
++	struct kmemcheck_error *e;
++
++	if (error_count == ARRAY_SIZE(error_fifo)) {
++		++error_missed_count;
++		return NULL;
++	}
++
++	e = &error_fifo[error_wr];
++	if (++error_wr == ARRAY_SIZE(error_fifo))
++		error_wr = 0;
++	++error_count;
++	return e;
++}
++
++static struct kmemcheck_error *error_next_rd(void)
++{
++	struct kmemcheck_error *e;
++
++	if (error_count == 0)
++		return NULL;
++
++	e = &error_fifo[error_rd];
++	if (++error_rd == ARRAY_SIZE(error_fifo))
++		error_rd = 0;
++	--error_count;
++	return e;
++}
++
++void kmemcheck_error_recall(void)
++{
++	static const char *desc[] = {
++		[KMEMCHECK_SHADOW_UNALLOCATED]		= "unallocated",
++		[KMEMCHECK_SHADOW_UNINITIALIZED]	= "uninitialized",
++		[KMEMCHECK_SHADOW_INITIALIZED]		= "initialized",
++		[KMEMCHECK_SHADOW_FREED]		= "freed",
++	};
++
++	static const char short_desc[] = {
++		[KMEMCHECK_SHADOW_UNALLOCATED]		= 'a',
++		[KMEMCHECK_SHADOW_UNINITIALIZED]	= 'u',
++		[KMEMCHECK_SHADOW_INITIALIZED]		= 'i',
++		[KMEMCHECK_SHADOW_FREED]		= 'f',
++	};
++
++	struct kmemcheck_error *e;
++	unsigned int i;
++
++	e = error_next_rd();
++	if (!e)
++		return;
++
++	switch (e->type) {
++	case KMEMCHECK_ERROR_INVALID_ACCESS:
++		printk(KERN_ERR  "WARNING: kmemcheck: Caught %d-bit read "
++			"from %s memory (%p)\n",
++			8 * e->size, e->state < ARRAY_SIZE(desc) ?
++				desc[e->state] : "(invalid shadow state)",
++			(void *) e->address);
++
++		printk(KERN_INFO);
++		for (i = 0; i < SHADOW_COPY_SIZE; ++i)
++			printk("%02x", e->memory_copy[i]);
++		printk("\n");
++
++		printk(KERN_INFO);
++		for (i = 0; i < SHADOW_COPY_SIZE; ++i) {
++			if (e->shadow_copy[i] < ARRAY_SIZE(short_desc))
++				printk(" %c", short_desc[e->shadow_copy[i]]);
++			else
++				printk(" ?");
++		}
++		printk("\n");
++		printk(KERN_INFO "%*c\n", 2 + 2
++			* (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^');
++		break;
++	case KMEMCHECK_ERROR_BUG:
++		printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n");
++		break;
++	}
++
++	__show_regs(&e->regs, 1);
++	print_stack_trace(&e->trace, 0);
++}
++
++static void do_wakeup(unsigned long data)
++{
++	while (error_count > 0)
++		kmemcheck_error_recall();
++
++	if (error_missed_count > 0) {
++		printk(KERN_WARNING "kmemcheck: Lost %d error reports because "
++			"the queue was too small\n", error_missed_count);
++		error_missed_count = 0;
++	}
++}
++
++static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0);
++
++/*
++ * Save the context of an error report.
++ */
++void kmemcheck_error_save(enum kmemcheck_shadow state,
++	unsigned long address, unsigned int size, struct pt_regs *regs)
++{
++	static unsigned long prev_ip;
++
++	struct kmemcheck_error *e;
++	void *shadow_copy;
++	void *memory_copy;
++
++	/* Don't report several adjacent errors from the same EIP. */
++	if (regs->ip == prev_ip)
++		return;
++	prev_ip = regs->ip;
++
++	e = error_next_wr();
++	if (!e)
++		return;
++
++	e->type = KMEMCHECK_ERROR_INVALID_ACCESS;
++
++	e->state = state;
++	e->address = address;
++	e->size = size;
++
++	/* Save regs */
++	memcpy(&e->regs, regs, sizeof(*regs));
++
++	/* Save stack trace */
++	e->trace.nr_entries = 0;
++	e->trace.entries = e->trace_entries;
++	e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
++	e->trace.skip = 0;
++	save_stack_trace_bp(&e->trace, regs->bp);
++
++	/* Round address down to nearest 16 bytes */
++	shadow_copy = kmemcheck_shadow_lookup(address
++		& ~(SHADOW_COPY_SIZE - 1));
++	BUG_ON(!shadow_copy);
++
++	memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE);
++
++	kmemcheck_show_addr(address);
++	memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1));
++	memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE);
++	kmemcheck_hide_addr(address);
++
++	tasklet_hi_schedule_first(&kmemcheck_tasklet);
++}
++
++/*
++ * Save the context of a kmemcheck bug.
++ */
++void kmemcheck_error_save_bug(struct pt_regs *regs)
++{
++	struct kmemcheck_error *e;
++
++	e = error_next_wr();
++	if (!e)
++		return;
++
++	e->type = KMEMCHECK_ERROR_BUG;
++
++	memcpy(&e->regs, regs, sizeof(*regs));
++
++	e->trace.nr_entries = 0;
++	e->trace.entries = e->trace_entries;
++	e->trace.max_entries = ARRAY_SIZE(e->trace_entries);
++	e->trace.skip = 1;
++	save_stack_trace(&e->trace);
++
++	tasklet_hi_schedule_first(&kmemcheck_tasklet);
++}
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/error.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/error.h
+@@ -0,0 +1,15 @@
++#ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H
++#define ARCH__X86__MM__KMEMCHECK__ERROR_H
++
++#include <linux/ptrace.h>
++
++#include "shadow.h"
++
++void kmemcheck_error_save(enum kmemcheck_shadow state,
++	unsigned long address, unsigned int size, struct pt_regs *regs);
++
++void kmemcheck_error_save_bug(struct pt_regs *regs);
++
++void kmemcheck_error_recall(void);
++
++#endif
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/kmemcheck.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/kmemcheck.c
+@@ -0,0 +1,637 @@
++/**
++ * kmemcheck - a heavyweight memory checker for the linux kernel
++ * Copyright (C) 2007, 2008  Vegard Nossum <vegardno@ifi.uio.no>
++ * (With a lot of help from Ingo Molnar and Pekka Enberg.)
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License (version 2) as
++ * published by the Free Software Foundation.
++ */
++
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/kallsyms.h>
++#include <linux/kernel.h>
++#include <linux/kmemcheck.h>
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/page-flags.h>
++#include <linux/percpu.h>
++#include <linux/ptrace.h>
++#include <linux/string.h>
++#include <linux/types.h>
++
++#include <asm/cacheflush.h>
++#include <asm/kmemcheck.h>
++#include <asm/pgtable.h>
++#include <asm/tlbflush.h>
++
++#include "error.h"
++#include "opcode.h"
++#include "pte.h"
++#include "selftest.h"
++#include "shadow.h"
++
++
++#ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT
++#  define KMEMCHECK_ENABLED 0
++#endif
++
++#ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT
++#  define KMEMCHECK_ENABLED 1
++#endif
++
++#ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT
++#  define KMEMCHECK_ENABLED 2
++#endif
++
++int kmemcheck_enabled = KMEMCHECK_ENABLED;
++
++void __init kmemcheck_init(void)
++{
++#ifdef CONFIG_SMP
++	/*
++	 * Limit SMP to use a single CPU. We rely on the fact that this code
++	 * runs before SMP is set up.
++	 */
++	if (setup_max_cpus > 1) {
++		printk(KERN_INFO
++			"kmemcheck: Limiting number of CPUs to 1.\n");
++		setup_max_cpus = 1;
++	}
++#endif
++
++	if (!kmemcheck_selftest()) {
++		printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n");
++		kmemcheck_enabled = 0;
++		return;
++	}
++
++	printk(KERN_INFO "kmemcheck: Initialized\n");
++}
++
++/*
++ * We need to parse the kmemcheck= option before any memory is allocated.
++ */
++static int __init param_kmemcheck(char *str)
++{
++	if (!str)
++		return -EINVAL;
++
++	sscanf(str, "%d", &kmemcheck_enabled);
++	return 0;
++}
++
++early_param("kmemcheck", param_kmemcheck);
++
++int kmemcheck_show_addr(unsigned long address)
++{
++	pte_t *pte;
++
++	pte = kmemcheck_pte_lookup(address);
++	if (!pte)
++		return 0;
++
++	set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
++	__flush_tlb_one(address);
++	return 1;
++}
++
++int kmemcheck_hide_addr(unsigned long address)
++{
++	pte_t *pte;
++
++	pte = kmemcheck_pte_lookup(address);
++	if (!pte)
++		return 0;
++
++	set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
++	__flush_tlb_one(address);
++	return 1;
++}
++
++struct kmemcheck_context {
++	bool busy;
++	int balance;
++
++	/*
++	 * There can be at most two memory operands to an instruction, but
++	 * each address can cross a page boundary -- so we may need up to
++	 * four addresses that must be hidden/revealed for each fault.
++	 */
++	unsigned long addr[4];
++	unsigned long n_addrs;
++	unsigned long flags;
++
++	/* Data size of the instruction that caused a fault. */
++	unsigned int size;
++};
++
++static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context);
++
++bool kmemcheck_active(struct pt_regs *regs)
++{
++	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
++
++	return data->balance > 0;
++}
++
++/* Save an address that needs to be shown/hidden */
++static void kmemcheck_save_addr(unsigned long addr)
++{
++	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
++
++	BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr));
++	data->addr[data->n_addrs++] = addr;
++}
++
++static unsigned int kmemcheck_show_all(void)
++{
++	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
++	unsigned int i;
++	unsigned int n;
++
++	n = 0;
++	for (i = 0; i < data->n_addrs; ++i)
++		n += kmemcheck_show_addr(data->addr[i]);
++
++	return n;
++}
++
++static unsigned int kmemcheck_hide_all(void)
++{
++	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
++	unsigned int i;
++	unsigned int n;
++
++	n = 0;
++	for (i = 0; i < data->n_addrs; ++i)
++		n += kmemcheck_hide_addr(data->addr[i]);
++
++	return n;
++}
++
++/*
++ * Called from the #PF handler.
++ */
++void kmemcheck_show(struct pt_regs *regs)
++{
++	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
++
++	BUG_ON(!irqs_disabled());
++
++	if (unlikely(data->balance != 0)) {
++		kmemcheck_show_all();
++		kmemcheck_error_save_bug(regs);
++		data->balance = 0;
++		return;
++	}
++
++	/*
++	 * None of the addresses actually belonged to kmemcheck. Note that
++	 * this is not an error.
++	 */
++	if (kmemcheck_show_all() == 0)
++		return;
++
++	++data->balance;
++
++	/*
++	 * The IF needs to be cleared as well, so that the faulting
++	 * instruction can run "uninterrupted". Otherwise, we might take
++	 * an interrupt and start executing that before we've had a chance
++	 * to hide the page again.
++	 *
++	 * NOTE: In the rare case of multiple faults, we must not override
++	 * the original flags:
++	 */
++	if (!(regs->flags & X86_EFLAGS_TF))
++		data->flags = regs->flags;
++
++	regs->flags |= X86_EFLAGS_TF;
++	regs->flags &= ~X86_EFLAGS_IF;
++}
++
++/*
++ * Called from the #DB handler.
++ */
++void kmemcheck_hide(struct pt_regs *regs)
++{
++	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
++	int n;
++
++	BUG_ON(!irqs_disabled());
++
++	if (data->balance == 0)
++		return;
++
++	if (unlikely(data->balance != 1)) {
++		kmemcheck_show_all();
++		kmemcheck_error_save_bug(regs);
++		data->n_addrs = 0;
++		data->balance = 0;
++
++		if (!(data->flags & X86_EFLAGS_TF))
++			regs->flags &= ~X86_EFLAGS_TF;
++		if (data->flags & X86_EFLAGS_IF)
++			regs->flags |= X86_EFLAGS_IF;
++		return;
++	}
++
++	if (kmemcheck_enabled)
++		n = kmemcheck_hide_all();
++	else
++		n = kmemcheck_show_all();
++
++	if (n == 0)
++		return;
++
++	--data->balance;
++
++	data->n_addrs = 0;
++
++	if (!(data->flags & X86_EFLAGS_TF))
++		regs->flags &= ~X86_EFLAGS_TF;
++	if (data->flags & X86_EFLAGS_IF)
++		regs->flags |= X86_EFLAGS_IF;
++}
++
++void kmemcheck_show_pages(struct page *p, unsigned int n)
++{
++	unsigned int i;
++
++	for (i = 0; i < n; ++i) {
++		unsigned long address;
++		pte_t *pte;
++		unsigned int level;
++
++		address = (unsigned long) page_address(&p[i]);
++		pte = lookup_address(address, &level);
++		BUG_ON(!pte);
++		BUG_ON(level != PG_LEVEL_4K);
++
++		set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
++		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN));
++		__flush_tlb_one(address);
++	}
++}
++
++bool kmemcheck_page_is_tracked(struct page *p)
++{
++	/* This will also check the "hidden" flag of the PTE. */
++	return kmemcheck_pte_lookup((unsigned long) page_address(p));
++}
++
++void kmemcheck_hide_pages(struct page *p, unsigned int n)
++{
++	unsigned int i;
++
++	for (i = 0; i < n; ++i) {
++		unsigned long address;
++		pte_t *pte;
++		unsigned int level;
++
++		address = (unsigned long) page_address(&p[i]);
++		pte = lookup_address(address, &level);
++		BUG_ON(!pte);
++		BUG_ON(level != PG_LEVEL_4K);
++
++		set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
++		set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN));
++		__flush_tlb_one(address);
++	}
++}
++
++/* Access may NOT cross page boundary */
++static void kmemcheck_read_strict(struct pt_regs *regs,
++	unsigned long addr, unsigned int size)
++{
++	void *shadow;
++	enum kmemcheck_shadow status;
++
++	shadow = kmemcheck_shadow_lookup(addr);
++	if (!shadow)
++		return;
++
++	kmemcheck_save_addr(addr);
++	status = kmemcheck_shadow_test(shadow, size);
++	if (status == KMEMCHECK_SHADOW_INITIALIZED)
++		return;
++
++	if (kmemcheck_enabled)
++		kmemcheck_error_save(status, addr, size, regs);
++
++	if (kmemcheck_enabled == 2)
++		kmemcheck_enabled = 0;
++
++	/* Don't warn about it again. */
++	kmemcheck_shadow_set(shadow, size);
++}
++
++/* Access may cross page boundary */
++static void kmemcheck_read(struct pt_regs *regs,
++	unsigned long addr, unsigned int size)
++{
++	unsigned long page = addr & PAGE_MASK;
++	unsigned long next_addr = addr + size - 1;
++	unsigned long next_page = next_addr & PAGE_MASK;
++
++	if (likely(page == next_page)) {
++		kmemcheck_read_strict(regs, addr, size);
++		return;
++	}
++
++	/*
++	 * What we do is basically to split the access across the
++	 * two pages and handle each part separately. Yes, this means
++	 * that we may now see reads that are 3 + 5 bytes, for
++	 * example (and if both are uninitialized, there will be two
++	 * reports), but it makes the code a lot simpler.
++	 */
++	kmemcheck_read_strict(regs, addr, next_page - addr);
++	kmemcheck_read_strict(regs, next_page, next_addr - next_page);
++}
++
++static void kmemcheck_write_strict(struct pt_regs *regs,
++	unsigned long addr, unsigned int size)
++{
++	void *shadow;
++
++	shadow = kmemcheck_shadow_lookup(addr);
++	if (!shadow)
++		return;
++
++	kmemcheck_save_addr(addr);
++	kmemcheck_shadow_set(shadow, size);
++}
++
++static void kmemcheck_write(struct pt_regs *regs,
++	unsigned long addr, unsigned int size)
++{
++	unsigned long page = addr & PAGE_MASK;
++	unsigned long next_addr = addr + size - 1;
++	unsigned long next_page = next_addr & PAGE_MASK;
++
++	if (likely(page == next_page)) {
++		kmemcheck_write_strict(regs, addr, size);
++		return;
++	}
++
++	/* See comment in kmemcheck_read(). */
++	kmemcheck_write_strict(regs, addr, next_page - addr);
++	kmemcheck_write_strict(regs, next_page, next_addr - next_page);
++}
++
++/*
++ * Copying is hard. We have two addresses, each of which may be split across
++ * a page (and each page will have different shadow addresses).
++ */
++static void kmemcheck_copy(struct pt_regs *regs,
++	unsigned long src_addr, unsigned long dst_addr, unsigned int size)
++{
++	uint8_t shadow[8];
++	enum kmemcheck_shadow status;
++
++	unsigned long page;
++	unsigned long next_addr;
++	unsigned long next_page;
++
++	uint8_t *x;
++	unsigned int i;
++	unsigned int n;
++
++	BUG_ON(size > sizeof(shadow));
++
++	page = src_addr & PAGE_MASK;
++	next_addr = src_addr + size - 1;
++	next_page = next_addr & PAGE_MASK;
++
++	if (likely(page == next_page)) {
++		/* Same page */
++		x = kmemcheck_shadow_lookup(src_addr);
++		if (x) {
++			kmemcheck_save_addr(src_addr);
++			for (i = 0; i < size; ++i)
++				shadow[i] = x[i];
++		} else {
++			for (i = 0; i < size; ++i)
++				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
++		}
++	} else {
++		n = next_page - src_addr;
++		BUG_ON(n > sizeof(shadow));
++
++		/* First page */
++		x = kmemcheck_shadow_lookup(src_addr);
++		if (x) {
++			kmemcheck_save_addr(src_addr);
++			for (i = 0; i < n; ++i)
++				shadow[i] = x[i];
++		} else {
++			/* Not tracked */
++			for (i = 0; i < n; ++i)
++				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
++		}
++
++		/* Second page */
++		x = kmemcheck_shadow_lookup(next_page);
++		if (x) {
++			kmemcheck_save_addr(next_page);
++			for (i = n; i < size; ++i)
++				shadow[i] = x[i - n];
++		} else {
++			/* Not tracked */
++			for (i = n; i < size; ++i)
++				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
++		}
++	}
++
++	page = dst_addr & PAGE_MASK;
++	next_addr = dst_addr + size - 1;
++	next_page = next_addr & PAGE_MASK;
++
++	if (likely(page == next_page)) {
++		/* Same page */
++		x = kmemcheck_shadow_lookup(dst_addr);
++		if (x) {
++			kmemcheck_save_addr(dst_addr);
++			for (i = 0; i < size; ++i) {
++				x[i] = shadow[i];
++				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
++			}
++		}
++	} else {
++		n = next_page - dst_addr;
++		BUG_ON(n > sizeof(shadow));
++
++		/* First page */
++		x = kmemcheck_shadow_lookup(dst_addr);
++		if (x) {
++			kmemcheck_save_addr(dst_addr);
++			for (i = 0; i < n; ++i) {
++				x[i] = shadow[i];
++				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
++			}
++		}
++
++		/* Second page */
++		x = kmemcheck_shadow_lookup(next_page);
++		if (x) {
++			kmemcheck_save_addr(next_page);
++			for (i = n; i < size; ++i) {
++				x[i - n] = shadow[i];
++				shadow[i] = KMEMCHECK_SHADOW_INITIALIZED;
++			}
++		}
++	}
++
++	status = kmemcheck_shadow_test(shadow, size);
++	if (status == KMEMCHECK_SHADOW_INITIALIZED)
++		return;
++
++	if (kmemcheck_enabled)
++		kmemcheck_error_save(status, src_addr, size, regs);
++
++	if (kmemcheck_enabled == 2)
++		kmemcheck_enabled = 0;
++}
++
++enum kmemcheck_method {
++	KMEMCHECK_READ,
++	KMEMCHECK_WRITE,
++};
++
++static void kmemcheck_access(struct pt_regs *regs,
++	unsigned long fallback_address, enum kmemcheck_method fallback_method)
++{
++	const uint8_t *insn;
++	const uint8_t *insn_primary;
++	unsigned int size;
++
++	struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context);
++
++	/* Recursive fault -- ouch. */
++	if (data->busy) {
++		kmemcheck_show_addr(fallback_address);
++		kmemcheck_error_save_bug(regs);
++		return;
++	}
++
++	data->busy = true;
++
++	insn = (const uint8_t *) regs->ip;
++	insn_primary = kmemcheck_opcode_get_primary(insn);
++
++	kmemcheck_opcode_decode(insn, &size);
++
++	switch (insn_primary[0]) {
++#ifdef CONFIG_KMEMCHECK_BITOPS_OK
++		/* AND, OR, XOR */
++		/*
++		 * Unfortunately, these instructions have to be excluded from
++		 * our regular checking since they access only some (and not
++		 * all) bits. This clears out "bogus" bitfield-access warnings.
++		 */
++	case 0x80:
++	case 0x81:
++	case 0x82:
++	case 0x83:
++		switch ((insn_primary[1] >> 3) & 7) {
++			/* OR */
++		case 1:
++			/* AND */
++		case 4:
++			/* XOR */
++		case 6:
++			kmemcheck_write(regs, fallback_address, size);
++			goto out;
++
++			/* ADD */
++		case 0:
++			/* ADC */
++		case 2:
++			/* SBB */
++		case 3:
++			/* SUB */
++		case 5:
++			/* CMP */
++		case 7:
++			break;
++		}
++		break;
++#endif
++
++		/* MOVS, MOVSB, MOVSW, MOVSD */
++	case 0xa4:
++	case 0xa5:
++		/*
++		 * These instructions are special because they take two
++		 * addresses, but we only get one page fault.
++		 */
++		kmemcheck_copy(regs, regs->si, regs->di, size);
++		goto out;
++
++		/* CMPS, CMPSB, CMPSW, CMPSD */
++	case 0xa6:
++	case 0xa7:
++		kmemcheck_read(regs, regs->si, size);
++		kmemcheck_read(regs, regs->di, size);
++		goto out;
++	}
++
++	/*
++	 * If the opcode isn't special in any way, we use the data from the
++	 * page fault handler to determine the address and type of memory
++	 * access.
++	 */
++	switch (fallback_method) {
++	case KMEMCHECK_READ:
++		kmemcheck_read(regs, fallback_address, size);
++		goto out;
++	case KMEMCHECK_WRITE:
++		kmemcheck_write(regs, fallback_address, size);
++		goto out;
++	}
++
++out:
++	data->busy = false;
++}
++
++bool kmemcheck_fault(struct pt_regs *regs, unsigned long address,
++	unsigned long error_code)
++{
++	pte_t *pte;
++
++	/*
++	 * XXX: Is it safe to assume that memory accesses from virtual 86
++	 * mode or non-kernel code segments will _never_ access kernel
++	 * memory (e.g. tracked pages)? For now, we need this to avoid
++	 * invoking kmemcheck for PnP BIOS calls.
++	 */
++	if (regs->flags & X86_VM_MASK)
++		return false;
++	if (regs->cs != __KERNEL_CS)
++		return false;
++
++	pte = kmemcheck_pte_lookup(address);
++	if (!pte)
++		return false;
++
++	if (error_code & 2)
++		kmemcheck_access(regs, address, KMEMCHECK_WRITE);
++	else
++		kmemcheck_access(regs, address, KMEMCHECK_READ);
++
++	kmemcheck_show(regs);
++	return true;
++}
++
++bool kmemcheck_trap(struct pt_regs *regs)
++{
++	if (!kmemcheck_active(regs))
++		return false;
++
++	/* We're done. */
++	kmemcheck_hide(regs);
++	return true;
++}
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/opcode.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/opcode.c
+@@ -0,0 +1,106 @@
++#include <linux/types.h>
++
++#include "opcode.h"
++
++static bool opcode_is_prefix(uint8_t b)
++{
++	return
++		/* Group 1 */
++		b == 0xf0 || b == 0xf2 || b == 0xf3
++		/* Group 2 */
++		|| b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26
++		|| b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e
++		/* Group 3 */
++		|| b == 0x66
++		/* Group 4 */
++		|| b == 0x67;
++}
++
++#ifdef CONFIG_X86_64
++static bool opcode_is_rex_prefix(uint8_t b)
++{
++	return (b & 0xf0) == 0x40;
++}
++#else
++static bool opcode_is_rex_prefix(uint8_t b)
++{
++	return false;
++}
++#endif
++
++#define REX_W (1 << 3)
++
++/*
++ * This is a VERY crude opcode decoder. We only need to find the size of the
++ * load/store that caused our #PF and this should work for all the opcodes
++ * that we care about. Moreover, the ones who invented this instruction set
++ * should be shot.
++ */
++void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size)
++{
++	/* Default operand size */
++	int operand_size_override = 4;
++
++	/* prefixes */
++	for (; opcode_is_prefix(*op); ++op) {
++		if (*op == 0x66)
++			operand_size_override = 2;
++	}
++
++	/* REX prefix */
++	if (opcode_is_rex_prefix(*op)) {
++		uint8_t rex = *op;
++
++		++op;
++		if (rex & REX_W) {
++			switch (*op) {
++			case 0x63:
++				*size = 4;
++				return;
++			case 0x0f:
++				++op;
++
++				switch (*op) {
++				case 0xb6:
++				case 0xbe:
++					*size = 1;
++					return;
++				case 0xb7:
++				case 0xbf:
++					*size = 2;
++					return;
++				}
++
++				break;
++			}
++
++			*size = 8;
++			return;
++		}
++	}
++
++	/* escape opcode */
++	if (*op == 0x0f) {
++		++op;
++
++		/*
++		 * This is move with zero-extend and sign-extend, respectively;
++		 * we don't have to think about 0xb6/0xbe, because this is
++		 * already handled in the conditional below.
++		 */
++		if (*op == 0xb7 || *op == 0xbf)
++			operand_size_override = 2;
++	}
++
++	*size = (*op & 1) ? operand_size_override : 1;
++}
++
++const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op)
++{
++	/* skip prefixes */
++	while (opcode_is_prefix(*op))
++		++op;
++	if (opcode_is_rex_prefix(*op))
++		++op;
++	return op;
++}
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/opcode.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/opcode.h
+@@ -0,0 +1,9 @@
++#ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H
++#define ARCH__X86__MM__KMEMCHECK__OPCODE_H
++
++#include <linux/types.h>
++
++void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size);
++const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op);
++
++#endif
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/pte.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/pte.c
+@@ -0,0 +1,22 @@
++#include <linux/mm.h>
++
++#include <asm/pgtable.h>
++
++#include "pte.h"
++
++pte_t *kmemcheck_pte_lookup(unsigned long address)
++{
++	pte_t *pte;
++	unsigned int level;
++
++	pte = lookup_address(address, &level);
++	if (!pte)
++		return NULL;
++	if (level != PG_LEVEL_4K)
++		return NULL;
++	if (!pte_hidden(*pte))
++		return NULL;
++
++	return pte;
++}
++
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/pte.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/pte.h
+@@ -0,0 +1,10 @@
++#ifndef ARCH__X86__MM__KMEMCHECK__PTE_H
++#define ARCH__X86__MM__KMEMCHECK__PTE_H
++
++#include <linux/mm.h>
++
++#include <asm/pgtable.h>
++
++pte_t *kmemcheck_pte_lookup(unsigned long address);
++
++#endif
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/selftest.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/selftest.c
+@@ -0,0 +1,69 @@
++#include <linux/kernel.h>
++
++#include "opcode.h"
++#include "selftest.h"
++
++struct selftest_opcode {
++	unsigned int expected_size;
++	const uint8_t *insn;
++	const char *desc;
++};
++
++static const struct selftest_opcode selftest_opcodes[] = {
++	/* REP MOVS */
++	{1, "\xf3\xa4", 		"rep movsb <mem8>, <mem8>"},
++	{4, "\xf3\xa5",			"rep movsl <mem32>, <mem32>"},
++
++	/* MOVZX / MOVZXD */
++	{1, "\x66\x0f\xb6\x51\xf8",	"movzwq <mem8>, <reg16>"},
++	{1, "\x0f\xb6\x51\xf8",		"movzwq <mem8>, <reg32>"},
++
++	/* MOVSX / MOVSXD */
++	{1, "\x66\x0f\xbe\x51\xf8",	"movswq <mem8>, <reg16>"},
++	{1, "\x0f\xbe\x51\xf8",		"movswq <mem8>, <reg32>"},
++
++#ifdef CONFIG_X86_64
++	/* MOVZX / MOVZXD */
++	{1, "\x49\x0f\xb6\x51\xf8",	"movzbq <mem8>, <reg64>"},
++	{2, "\x49\x0f\xb7\x51\xf8",	"movzbq <mem16>, <reg64>"},
++
++	/* MOVSX / MOVSXD */
++	{1, "\x49\x0f\xbe\x51\xf8",	"movsbq <mem8>, <reg64>"},
++	{2, "\x49\x0f\xbf\x51\xf8",	"movsbq <mem16>, <reg64>"},
++	{4, "\x49\x63\x51\xf8",		"movslq <mem32>, <reg64>"},
++#endif
++};
++
++static bool selftest_opcode_one(const struct selftest_opcode *op)
++{
++	unsigned size;
++
++	kmemcheck_opcode_decode(op->insn, &size);
++
++	if (size == op->expected_size)
++		return true;
++
++	printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n",
++		op->desc, op->expected_size, size);
++	return false;
++}
++
++static bool selftest_opcodes_all(void)
++{
++	bool pass = true;
++	unsigned int i;
++
++	for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i)
++		pass = pass && selftest_opcode_one(&selftest_opcodes[i]);
++
++	return pass;
++}
++
++bool kmemcheck_selftest(void)
++{
++	bool pass = true;
++
++	pass = pass && selftest_opcodes_all();
++
++	return pass;
++}
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/selftest.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/selftest.h
+@@ -0,0 +1,6 @@
++#ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H
++#define ARCH_X86_MM_KMEMCHECK_SELFTEST_H
++
++bool kmemcheck_selftest(void);
++
++#endif
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/shadow.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/shadow.c
+@@ -0,0 +1,162 @@
++#include <linux/kmemcheck.h>
++#include <linux/module.h>
++#include <linux/mm.h>
++#include <linux/module.h>
++
++#include <asm/page.h>
++#include <asm/pgtable.h>
++
++#include "pte.h"
++#include "shadow.h"
++
++/*
++ * Return the shadow address for the given address. Returns NULL if the
++ * address is not tracked.
++ *
++ * We need to be extremely careful not to follow any invalid pointers,
++ * because this function can be called for *any* possible address.
++ */
++void *kmemcheck_shadow_lookup(unsigned long address)
++{
++	pte_t *pte;
++	struct page *page;
++
++	if (!virt_addr_valid(address))
++		return NULL;
++
++	pte = kmemcheck_pte_lookup(address);
++	if (!pte)
++		return NULL;
++
++	page = virt_to_page(address);
++	if (!page->shadow)
++		return NULL;
++	return page->shadow + (address & (PAGE_SIZE - 1));
++}
++
++static void mark_shadow(void *address, unsigned int n,
++	enum kmemcheck_shadow status)
++{
++	unsigned long addr = (unsigned long) address;
++	unsigned long last_addr = addr + n - 1;
++	unsigned long page = addr & PAGE_MASK;
++	unsigned long last_page = last_addr & PAGE_MASK;
++	unsigned int first_n;
++	void *shadow;
++
++	/* If the memory range crosses a page boundary, stop there. */
++	if (page == last_page)
++		first_n = n;
++	else
++		first_n = page + PAGE_SIZE - addr;
++
++	shadow = kmemcheck_shadow_lookup(addr);
++	if (shadow)
++		memset(shadow, status, first_n);
++
++	addr += first_n;
++	n -= first_n;
++
++	/* Do full-page memset()s. */
++	while (n >= PAGE_SIZE) {
++		shadow = kmemcheck_shadow_lookup(addr);
++		if (shadow)
++			memset(shadow, status, PAGE_SIZE);
++
++		addr += PAGE_SIZE;
++		n -= PAGE_SIZE;
++	}
++
++	/* Do the remaining page, if any. */
++	if (n > 0) {
++		shadow = kmemcheck_shadow_lookup(addr);
++		if (shadow)
++			memset(shadow, status, n);
++	}
++}
++
++void kmemcheck_mark_unallocated(void *address, unsigned int n)
++{
++	mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED);
++}
++
++void kmemcheck_mark_uninitialized(void *address, unsigned int n)
++{
++	mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED);
++}
++
++/*
++ * Fill the shadow memory of the given address such that the memory at that
++ * address is marked as being initialized.
++ */
++void kmemcheck_mark_initialized(void *address, unsigned int n)
++{
++	mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED);
++}
++EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized);
++
++void kmemcheck_mark_freed(void *address, unsigned int n)
++{
++	mark_shadow(address, n, KMEMCHECK_SHADOW_FREED);
++}
++
++void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n)
++{
++	unsigned int i;
++
++	for (i = 0; i < n; ++i)
++		kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE);
++}
++
++void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n)
++{
++	unsigned int i;
++
++	for (i = 0; i < n; ++i)
++		kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE);
++}
++
++void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n)
++{
++	unsigned int i;
++
++	for (i = 0; i < n; ++i)
++		kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE);
++}
++
++enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size)
++{
++	uint8_t *x;
++	unsigned int i;
++
++	x = shadow;
++
++#ifdef CONFIG_KMEMCHECK_PARTIAL_OK
++	/*
++	 * Make sure _some_ bytes are initialized. Gcc frequently generates
++	 * code to access neighboring bytes.
++	 */
++	for (i = 0; i < size; ++i) {
++		if (x[i] == KMEMCHECK_SHADOW_INITIALIZED)
++			return x[i];
++	}
++#else
++	/* All bytes must be initialized. */
++	for (i = 0; i < size; ++i) {
++		if (x[i] != KMEMCHECK_SHADOW_INITIALIZED)
++			return x[i];
++	}
++#endif
++
++	return x[0];
++}
++
++void kmemcheck_shadow_set(void *shadow, unsigned int size)
++{
++	uint8_t *x;
++	unsigned int i;
++
++	x = shadow;
++	for (i = 0; i < size; ++i)
++		x[i] = KMEMCHECK_SHADOW_INITIALIZED;
++}
+Index: linux-2.6-tip/arch/x86/mm/kmemcheck/shadow.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/kmemcheck/shadow.h
+@@ -0,0 +1,16 @@
++#ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H
++#define ARCH__X86__MM__KMEMCHECK__SHADOW_H
++
++enum kmemcheck_shadow {
++	KMEMCHECK_SHADOW_UNALLOCATED,
++	KMEMCHECK_SHADOW_UNINITIALIZED,
++	KMEMCHECK_SHADOW_INITIALIZED,
++	KMEMCHECK_SHADOW_FREED,
++};
++
++void *kmemcheck_shadow_lookup(unsigned long address);
++
++enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size);
++void kmemcheck_shadow_set(void *shadow, unsigned int size);
++
++#endif
+Index: linux-2.6-tip/arch/x86/mm/kmmio.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/kmmio.c
++++ linux-2.6-tip/arch/x86/mm/kmmio.c
+@@ -310,7 +310,7 @@ static int post_kmmio_handler(unsigned l
+ 	struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
+ 
+ 	if (!ctx->active) {
+-		pr_warning("kmmio: spurious debug trap on CPU %d.\n",
++		pr_debug("kmmio: spurious debug trap on CPU %d.\n",
+ 							smp_processor_id());
+ 		goto out;
+ 	}
+Index: linux-2.6-tip/arch/x86/mm/memtest.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/memtest.c
++++ linux-2.6-tip/arch/x86/mm/memtest.c
+@@ -9,44 +9,44 @@
+ 
+ #include <asm/e820.h>
+ 
+-static void __init memtest(unsigned long start_phys, unsigned long size,
+-				 unsigned pattern)
++static u64 patterns[] __initdata = {
++	0,
++	0xffffffffffffffffULL,
++	0x5555555555555555ULL,
++	0xaaaaaaaaaaaaaaaaULL,
++	0x1111111111111111ULL,
++	0x2222222222222222ULL,
++	0x4444444444444444ULL,
++	0x8888888888888888ULL,
++	0x3333333333333333ULL,
++	0x6666666666666666ULL,
++	0x9999999999999999ULL,
++	0xccccccccccccccccULL,
++	0x7777777777777777ULL,
++	0xbbbbbbbbbbbbbbbbULL,
++	0xddddddddddddddddULL,
++	0xeeeeeeeeeeeeeeeeULL,
++	0x7a6c7258554e494cULL, /* yeah ;-) */
++};
++
++static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad)
+ {
+-	unsigned long i;
+-	unsigned long *start;
+-	unsigned long start_bad;
+-	unsigned long last_bad;
+-	unsigned long val;
+-	unsigned long start_phys_aligned;
+-	unsigned long count;
+-	unsigned long incr;
+-
+-	switch (pattern) {
+-	case 0:
+-		val = 0UL;
+-		break;
+-	case 1:
+-		val = -1UL;
+-		break;
+-	case 2:
+-#ifdef CONFIG_X86_64
+-		val = 0x5555555555555555UL;
+-#else
+-		val = 0x55555555UL;
+-#endif
+-		break;
+-	case 3:
+-#ifdef CONFIG_X86_64
+-		val = 0xaaaaaaaaaaaaaaaaUL;
+-#else
+-		val = 0xaaaaaaaaUL;
+-#endif
+-		break;
+-	default:
+-		return;
+-	}
++	printk(KERN_INFO "  %016llx bad mem addr %010llx - %010llx reserved\n",
++	       (unsigned long long) pattern,
++	       (unsigned long long) start_bad,
++	       (unsigned long long) end_bad);
++	reserve_early(start_bad, end_bad, "BAD RAM");
++}
+ 
+-	incr = sizeof(unsigned long);
++static void __init memtest(u64 pattern, u64 start_phys, u64 size)
++{
++	u64 i, count;
++	u64 *start;
++	u64 start_bad, last_bad;
++	u64 start_phys_aligned;
++	size_t incr;
++
++	incr = sizeof(pattern);
+ 	start_phys_aligned = ALIGN(start_phys, incr);
+ 	count = (size - (start_phys_aligned - start_phys))/incr;
+ 	start = __va(start_phys_aligned);
+@@ -54,25 +54,42 @@ static void __init memtest(unsigned long
+ 	last_bad = 0;
+ 
+ 	for (i = 0; i < count; i++)
+-		start[i] = val;
++		start[i] = pattern;
+ 	for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
+-		if (*start != val) {
+-			if (start_phys_aligned == last_bad + incr) {
+-				last_bad += incr;
+-			} else {
+-				if (start_bad) {
+-					printk(KERN_CONT "\n  %016lx bad mem addr %010lx - %010lx reserved",
+-						val, start_bad, last_bad + incr);
+-					reserve_early(start_bad, last_bad + incr, "BAD RAM");
+-				}
+-				start_bad = last_bad = start_phys_aligned;
+-			}
++		if (*start == pattern)
++			continue;
++		if (start_phys_aligned == last_bad + incr) {
++			last_bad += incr;
++			continue;
+ 		}
++		if (start_bad)
++			reserve_bad_mem(pattern, start_bad, last_bad + incr);
++		start_bad = last_bad = start_phys_aligned;
+ 	}
+-	if (start_bad) {
+-		printk(KERN_CONT "\n  %016lx bad mem addr %010lx - %010lx reserved",
+-			val, start_bad, last_bad + incr);
+-		reserve_early(start_bad, last_bad + incr, "BAD RAM");
++	if (start_bad)
++		reserve_bad_mem(pattern, start_bad, last_bad + incr);
++}
++
++static void __init do_one_pass(u64 pattern, u64 start, u64 end)
++{
++	u64 size = 0;
++
++	while (start < end) {
++		start = find_e820_area_size(start, &size, 1);
++
++		/* done ? */
++		if (start >= end)
++			break;
++		if (start + size > end)
++			size = end - start;
++
++		printk(KERN_INFO "  %010llx - %010llx pattern %016llx\n",
++		       (unsigned long long) start,
++		       (unsigned long long) start + size,
++		       (unsigned long long) cpu_to_be64(pattern));
++		memtest(pattern, start, size);
++
++		start += size;
+ 	}
+ }
+ 
+@@ -83,6 +100,9 @@ static int __init parse_memtest(char *ar
+ {
+ 	if (arg)
+ 		memtest_pattern = simple_strtoul(arg, NULL, 0);
++	else
++		memtest_pattern = ARRAY_SIZE(patterns);
++
+ 	return 0;
+ }
+ 
+@@ -90,33 +110,22 @@ early_param("memtest", parse_memtest);
+ 
+ void __init early_memtest(unsigned long start, unsigned long end)
+ {
+-	u64 t_start, t_size;
+-	unsigned pattern;
++	unsigned int i;
++	unsigned int idx = 0;
+ 
+ 	if (!memtest_pattern)
+ 		return;
+ 
+-	printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
+-	for (pattern = 0; pattern < memtest_pattern; pattern++) {
+-		t_start = start;
+-		t_size = 0;
+-		while (t_start < end) {
+-			t_start = find_e820_area_size(t_start, &t_size, 1);
+-
+-			/* done ? */
+-			if (t_start >= end)
+-				break;
+-			if (t_start + t_size > end)
+-				t_size = end - t_start;
+-
+-			printk(KERN_CONT "\n  %010llx - %010llx pattern %d",
+-				(unsigned long long)t_start,
+-				(unsigned long long)t_start + t_size, pattern);
+-
+-			memtest(t_start, t_size, pattern);
++	printk(KERN_INFO "early_memtest: # of tests: %d\n", memtest_pattern);
++	for (i = 0; i < memtest_pattern; i++) {
++		idx = i % ARRAY_SIZE(patterns);
++		do_one_pass(patterns[idx], start, end);
++	}
+ 
+-			t_start += t_size;
+-		}
++	if (idx > 0) {
++		printk(KERN_INFO "early_memtest: wipe out "
++		       "test pattern from memory\n");
++		/* additional test with pattern 0 will do this */
++		do_one_pass(0, start, end);
+ 	}
+-	printk(KERN_CONT "\n");
+ }
+Index: linux-2.6-tip/arch/x86/mm/mmap.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/mmap.c
++++ linux-2.6-tip/arch/x86/mm/mmap.c
+@@ -4,7 +4,7 @@
+  * Based on code by Ingo Molnar and Andi Kleen, copyrighted
+  * as follows:
+  *
+- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
++ * Copyright 2003-2009 Red Hat Inc.
+  * All Rights Reserved.
+  * Copyright 2005 Andi Kleen, SUSE Labs.
+  * Copyright 2007 Jiri Kosina, SUSE Labs.
+Index: linux-2.6-tip/arch/x86/mm/mmio-mod.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/mmio-mod.c
++++ linux-2.6-tip/arch/x86/mm/mmio-mod.c
+@@ -378,27 +378,34 @@ static void clear_trace_list(void)
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+-static cpumask_t downed_cpus;
++static cpumask_var_t downed_cpus;
+ 
+ static void enter_uniprocessor(void)
+ {
+ 	int cpu;
+ 	int err;
+ 
++	if (downed_cpus == NULL &&
++	    !alloc_cpumask_var(&downed_cpus, GFP_KERNEL)) {
++		pr_notice(NAME "Failed to allocate mask\n");
++		goto out;
++	}
++
+ 	get_online_cpus();
+-	downed_cpus = cpu_online_map;
+-	cpu_clear(first_cpu(cpu_online_map), downed_cpus);
++	cpumask_copy(downed_cpus, cpu_online_mask);
++	cpumask_clear_cpu(cpumask_first(cpu_online_mask), downed_cpus);
+ 	if (num_online_cpus() > 1)
+ 		pr_notice(NAME "Disabling non-boot CPUs...\n");
+ 	put_online_cpus();
+ 
+-	for_each_cpu_mask(cpu, downed_cpus) {
++	for_each_cpu(cpu, downed_cpus) {
+ 		err = cpu_down(cpu);
+ 		if (!err)
+ 			pr_info(NAME "CPU%d is down.\n", cpu);
+ 		else
+ 			pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err);
+ 	}
++out:
+ 	if (num_online_cpus() > 1)
+ 		pr_warning(NAME "multiple CPUs still online, "
+ 						"may miss events.\n");
+@@ -411,10 +418,10 @@ static void __ref leave_uniprocessor(voi
+ 	int cpu;
+ 	int err;
+ 
+-	if (cpus_weight(downed_cpus) == 0)
++	if (downed_cpus == NULL || cpumask_weight(downed_cpus) == 0)
+ 		return;
+ 	pr_notice(NAME "Re-enabling CPUs...\n");
+-	for_each_cpu_mask(cpu, downed_cpus) {
++	for_each_cpu(cpu, downed_cpus) {
+ 		err = cpu_up(cpu);
+ 		if (!err)
+ 			pr_info(NAME "enabled CPU%d.\n", cpu);
+Index: linux-2.6-tip/arch/x86/mm/numa.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/numa.c
+@@ -0,0 +1,67 @@
++/* Common code for 32 and 64-bit NUMA */
++#include <linux/topology.h>
++#include <linux/module.h>
++#include <linux/bootmem.h>
++
++#ifdef CONFIG_DEBUG_PER_CPU_MAPS
++# define DBG(x...) printk(KERN_DEBUG x)
++#else
++# define DBG(x...)
++#endif
++
++/*
++ * Which logical CPUs are on which nodes
++ */
++cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
++EXPORT_SYMBOL(node_to_cpumask_map);
++
++/*
++ * Allocate node_to_cpumask_map based on number of available nodes
++ * Requires node_possible_map to be valid.
++ *
++ * Note: node_to_cpumask() is not valid until after this is done.
++ * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
++ */
++void __init setup_node_to_cpumask_map(void)
++{
++	unsigned int node, num = 0;
++
++	/* setup nr_node_ids if not done yet */
++	if (nr_node_ids == MAX_NUMNODES) {
++		for_each_node_mask(node, node_possible_map)
++			num = node;
++		nr_node_ids = num + 1;
++	}
++
++	/* allocate the map */
++	for (node = 0; node < nr_node_ids; node++)
++		alloc_bootmem_cpumask_var(&node_to_cpumask_map[node]);
++
++	/* cpumask_of_node() will now work */
++	pr_debug("Node to cpumask map for %d nodes\n", nr_node_ids);
++}
++
++#ifdef CONFIG_DEBUG_PER_CPU_MAPS
++/*
++ * Returns a pointer to the bitmask of CPUs on Node 'node'.
++ */
++const struct cpumask *cpumask_of_node(int node)
++{
++	if (node >= nr_node_ids) {
++		printk(KERN_WARNING
++			"cpumask_of_node(%d): node > nr_node_ids(%d)\n",
++			node, nr_node_ids);
++		dump_stack();
++		return cpu_none_mask;
++	}
++	if (node_to_cpumask_map[node] == NULL) {
++		printk(KERN_WARNING
++			"cpumask_of_node(%d): no node_to_cpumask_map!\n",
++			node);
++		dump_stack();
++		return cpu_online_mask;
++	}
++	return node_to_cpumask_map[node];
++}
++EXPORT_SYMBOL(cpumask_of_node);
++#endif
+Index: linux-2.6-tip/arch/x86/mm/numa_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/numa_32.c
++++ linux-2.6-tip/arch/x86/mm/numa_32.c
+@@ -194,7 +194,7 @@ void *alloc_remap(int nid, unsigned long
+ 	size = ALIGN(size, L1_CACHE_BYTES);
+ 
+ 	if (!allocation || (allocation + size) >= node_remap_end_vaddr[nid])
+-		return 0;
++		return NULL;
+ 
+ 	node_remap_alloc_vaddr[nid] += size;
+ 	memset(allocation, 0, size);
+@@ -416,39 +416,14 @@ void __init initmem_init(unsigned long s
+ 	for_each_online_node(nid)
+ 		propagate_e820_map_node(nid);
+ 
+-	for_each_online_node(nid)
++	for_each_online_node(nid) {
+ 		memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
++		NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
++	}
+ 
+-	NODE_DATA(0)->bdata = &bootmem_node_data[0];
+ 	setup_bootmem_allocator();
+ }
+ 
+-void __init set_highmem_pages_init(void)
+-{
+-#ifdef CONFIG_HIGHMEM
+-	struct zone *zone;
+-	int nid;
+-
+-	for_each_zone(zone) {
+-		unsigned long zone_start_pfn, zone_end_pfn;
+-
+-		if (!is_highmem(zone))
+-			continue;
+-
+-		zone_start_pfn = zone->zone_start_pfn;
+-		zone_end_pfn = zone_start_pfn + zone->spanned_pages;
+-
+-		nid = zone_to_nid(zone);
+-		printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
+-				zone->name, nid, zone_start_pfn, zone_end_pfn);
+-
+-		add_highpages_with_active_regions(nid, zone_start_pfn,
+-				 zone_end_pfn);
+-	}
+-	totalram_pages += totalhigh_pages;
+-#endif
+-}
+-
+ #ifdef CONFIG_MEMORY_HOTPLUG
+ static int paddr_to_nid(u64 addr)
+ {
+Index: linux-2.6-tip/arch/x86/mm/numa_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/numa_64.c
++++ linux-2.6-tip/arch/x86/mm/numa_64.c
+@@ -33,6 +33,15 @@ int numa_off __initdata;
+ static unsigned long __initdata nodemap_addr;
+ static unsigned long __initdata nodemap_size;
+ 
++DEFINE_PER_CPU(int, node_number) = 0;
++EXPORT_PER_CPU_SYMBOL(node_number);
++
++/*
++ * Map cpu index to node index
++ */
++DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
++EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
++
+ /*
+  * Given a shift value, try to populate memnodemap[]
+  * Returns :
+@@ -640,3 +649,116 @@ void __init init_cpu_to_node(void)
+ #endif
+ 
+ 
++void __cpuinit numa_set_node(int cpu, int node)
++{
++	int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
++
++	/* early setting, no percpu area yet */
++	if (cpu_to_node_map) {
++		cpu_to_node_map[cpu] = node;
++		return;
++	}
++
++#ifdef CONFIG_DEBUG_PER_CPU_MAPS
++	if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
++		printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
++		dump_stack();
++		return;
++	}
++#endif
++	per_cpu(x86_cpu_to_node_map, cpu) = node;
++
++	if (node != NUMA_NO_NODE)
++		per_cpu(node_number, cpu) = node;
++}
++
++void __cpuinit numa_clear_node(int cpu)
++{
++	numa_set_node(cpu, NUMA_NO_NODE);
++}
++
++#ifndef CONFIG_DEBUG_PER_CPU_MAPS
++
++void __cpuinit numa_add_cpu(int cpu)
++{
++	cpumask_set_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
++}
++
++void __cpuinit numa_remove_cpu(int cpu)
++{
++	cpumask_clear_cpu(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
++}
++
++#else /* CONFIG_DEBUG_PER_CPU_MAPS */
++
++/*
++ * --------- debug versions of the numa functions ---------
++ */
++static void __cpuinit numa_set_cpumask(int cpu, int enable)
++{
++	int node = early_cpu_to_node(cpu);
++	struct cpumask *mask;
++	char buf[64];
++
++	mask = node_to_cpumask_map[node];
++	if (mask == NULL) {
++		printk(KERN_ERR "node_to_cpumask_map[%i] NULL\n", node);
++		dump_stack();
++		return;
++	}
++
++	if (enable)
++		cpumask_set_cpu(cpu, mask);
++	else
++		cpumask_clear_cpu(cpu, mask);
++
++	cpulist_scnprintf(buf, sizeof(buf), mask);
++	printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
++		enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
++}
++
++void __cpuinit numa_add_cpu(int cpu)
++{
++	numa_set_cpumask(cpu, 1);
++}
++
++void __cpuinit numa_remove_cpu(int cpu)
++{
++	numa_set_cpumask(cpu, 0);
++}
++
++int cpu_to_node(int cpu)
++{
++	if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
++		printk(KERN_WARNING
++			"cpu_to_node(%d): usage too early!\n", cpu);
++		dump_stack();
++		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
++	}
++	return per_cpu(x86_cpu_to_node_map, cpu);
++}
++EXPORT_SYMBOL(cpu_to_node);
++
++/*
++ * Same function as cpu_to_node() but used if called before the
++ * per_cpu areas are setup.
++ */
++int early_cpu_to_node(int cpu)
++{
++	if (early_per_cpu_ptr(x86_cpu_to_node_map))
++		return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
++
++	if (!cpu_possible(cpu)) {
++		printk(KERN_WARNING
++			"early_cpu_to_node(%d): no per_cpu area!\n", cpu);
++		dump_stack();
++		return NUMA_NO_NODE;
++	}
++	return per_cpu(x86_cpu_to_node_map, cpu);
++}
++
++/*
++ * --------- end of debug versions of the numa functions ---------
++ */
++
++#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
+Index: linux-2.6-tip/arch/x86/mm/pageattr.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/pageattr.c
++++ linux-2.6-tip/arch/x86/mm/pageattr.c
+@@ -16,6 +16,7 @@
+ #include <asm/processor.h>
+ #include <asm/tlbflush.h>
+ #include <asm/sections.h>
++#include <asm/setup.h>
+ #include <asm/uaccess.h>
+ #include <asm/pgalloc.h>
+ #include <asm/proto.h>
+@@ -33,6 +34,7 @@ struct cpa_data {
+ 	unsigned long	pfn;
+ 	unsigned	force_split : 1;
+ 	int		curpage;
++	struct page	**pages;
+ };
+ 
+ /*
+@@ -45,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock);
+ 
+ #define CPA_FLUSHTLB 1
+ #define CPA_ARRAY 2
++#define CPA_PAGES_ARRAY 4
+ 
+ #ifdef CONFIG_PROC_FS
+ static unsigned long direct_pages_count[PG_LEVEL_NUM];
+@@ -95,7 +98,7 @@ static inline unsigned long highmap_star
+ 
+ static inline unsigned long highmap_end_pfn(void)
+ {
+-	return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
++	return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
+ }
+ 
+ #endif
+@@ -201,10 +204,10 @@ static void cpa_flush_range(unsigned lon
+ 	}
+ }
+ 
+-static void cpa_flush_array(unsigned long *start, int numpages, int cache)
++static void cpa_flush_array(unsigned long *start, int numpages, int cache,
++			    int in_flags, struct page **pages)
+ {
+ 	unsigned int i, level;
+-	unsigned long *addr;
+ 
+ 	BUG_ON(irqs_disabled());
+ 
+@@ -225,14 +228,22 @@ static void cpa_flush_array(unsigned lon
+ 	 * will cause all other CPUs to flush the same
+ 	 * cachelines:
+ 	 */
+-	for (i = 0, addr = start; i < numpages; i++, addr++) {
+-		pte_t *pte = lookup_address(*addr, &level);
++	for (i = 0; i < numpages; i++) {
++		unsigned long addr;
++		pte_t *pte;
++
++		if (in_flags & CPA_PAGES_ARRAY)
++			addr = (unsigned long)page_address(pages[i]);
++		else
++			addr = start[i];
++
++		pte = lookup_address(addr, &level);
+ 
+ 		/*
+ 		 * Only flush present addresses:
+ 		 */
+ 		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
+-			clflush_cache_range((void *) *addr, PAGE_SIZE);
++			clflush_cache_range((void *)addr, PAGE_SIZE);
+ 	}
+ }
+ 
+@@ -464,7 +475,7 @@ static int split_large_page(pte_t *kpte,
+ 
+ 	if (!debug_pagealloc)
+ 		spin_unlock(&cpa_lock);
+-	base = alloc_pages(GFP_KERNEL, 0);
++	base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
+ 	if (!debug_pagealloc)
+ 		spin_lock(&cpa_lock);
+ 	if (!base)
+@@ -482,6 +493,13 @@ static int split_large_page(pte_t *kpte,
+ 	pbase = (pte_t *)page_address(base);
+ 	paravirt_alloc_pte(&init_mm, page_to_pfn(base));
+ 	ref_prot = pte_pgprot(pte_clrhuge(*kpte));
++	/*
++	 * If we ever want to utilize the PAT bit, we need to
++	 * update this function to make sure it's converted from
++	 * bit 12 to bit 7 when we cross from the 2MB level to
++	 * the 4K level:
++	 */
++	WARN_ON_ONCE(pgprot_val(ref_prot) & _PAGE_PAT_LARGE);
+ 
+ #ifdef CONFIG_X86_64
+ 	if (level == PG_LEVEL_1G) {
+@@ -577,7 +595,9 @@ static int __change_page_attr(struct cpa
+ 	unsigned int level;
+ 	pte_t *kpte, old_pte;
+ 
+-	if (cpa->flags & CPA_ARRAY)
++	if (cpa->flags & CPA_PAGES_ARRAY)
++		address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
++	else if (cpa->flags & CPA_ARRAY)
+ 		address = cpa->vaddr[cpa->curpage];
+ 	else
+ 		address = *cpa->vaddr;
+@@ -680,7 +700,9 @@ static int cpa_process_alias(struct cpa_
+ 	 * No need to redo, when the primary call touched the direct
+ 	 * mapping already:
+ 	 */
+-	if (cpa->flags & CPA_ARRAY)
++	if (cpa->flags & CPA_PAGES_ARRAY)
++		vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
++	else if (cpa->flags & CPA_ARRAY)
+ 		vaddr = cpa->vaddr[cpa->curpage];
+ 	else
+ 		vaddr = *cpa->vaddr;
+@@ -691,7 +713,7 @@ static int cpa_process_alias(struct cpa_
+ 		alias_cpa = *cpa;
+ 		temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
+ 		alias_cpa.vaddr = &temp_cpa_vaddr;
+-		alias_cpa.flags &= ~CPA_ARRAY;
++		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+ 
+ 
+ 		ret = __change_page_attr_set_clr(&alias_cpa, 0);
+@@ -704,7 +726,7 @@ static int cpa_process_alias(struct cpa_
+ 	 * No need to redo, when the primary call touched the high
+ 	 * mapping already:
+ 	 */
+-	if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
++	if (within(vaddr, (unsigned long) _text, _brk_end))
+ 		return 0;
+ 
+ 	/*
+@@ -717,7 +739,7 @@ static int cpa_process_alias(struct cpa_
+ 	alias_cpa = *cpa;
+ 	temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
+ 	alias_cpa.vaddr = &temp_cpa_vaddr;
+-	alias_cpa.flags &= ~CPA_ARRAY;
++	alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+ 
+ 	/*
+ 	 * The high mapping range is imprecise, so ignore the return value.
+@@ -738,7 +760,7 @@ static int __change_page_attr_set_clr(st
+ 		 */
+ 		cpa->numpages = numpages;
+ 		/* for array changes, we can't use large page */
+-		if (cpa->flags & CPA_ARRAY)
++		if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
+ 			cpa->numpages = 1;
+ 
+ 		if (!debug_pagealloc)
+@@ -762,7 +784,7 @@ static int __change_page_attr_set_clr(st
+ 		 */
+ 		BUG_ON(cpa->numpages > numpages);
+ 		numpages -= cpa->numpages;
+-		if (cpa->flags & CPA_ARRAY)
++		if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
+ 			cpa->curpage++;
+ 		else
+ 			*cpa->vaddr += cpa->numpages * PAGE_SIZE;
+@@ -779,7 +801,8 @@ static inline int cache_attr(pgprot_t at
+ 
+ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
+ 				    pgprot_t mask_set, pgprot_t mask_clr,
+-				    int force_split, int array)
++				    int force_split, int in_flag,
++				    struct page **pages)
+ {
+ 	struct cpa_data cpa;
+ 	int ret, cache, checkalias;
+@@ -794,15 +817,7 @@ static int change_page_attr_set_clr(unsi
+ 		return 0;
+ 
+ 	/* Ensure we are PAGE_SIZE aligned */
+-	if (!array) {
+-		if (*addr & ~PAGE_MASK) {
+-			*addr &= PAGE_MASK;
+-			/*
+-			 * People should not be passing in unaligned addresses:
+-			 */
+-			WARN_ON_ONCE(1);
+-		}
+-	} else {
++	if (in_flag & CPA_ARRAY) {
+ 		int i;
+ 		for (i = 0; i < numpages; i++) {
+ 			if (addr[i] & ~PAGE_MASK) {
+@@ -810,10 +825,24 @@ static int change_page_attr_set_clr(unsi
+ 				WARN_ON_ONCE(1);
+ 			}
+ 		}
++	} else if (!(in_flag & CPA_PAGES_ARRAY)) {
++		/*
++		 * in_flag of CPA_PAGES_ARRAY implies it is aligned.
++		 * No need to cehck in that case
++		 */
++		if (*addr & ~PAGE_MASK) {
++			*addr &= PAGE_MASK;
++			/*
++			 * People should not be passing in unaligned addresses:
++			 */
++			WARN_ON_ONCE(1);
++		}
+ 	}
+ 
++#if 0
+ 	/* Must avoid aliasing mappings in the highmem code */
+ 	kmap_flush_unused();
++#endif
+ 
+ 	vm_unmap_aliases();
+ 
+@@ -825,6 +854,7 @@ static int change_page_attr_set_clr(unsi
+ 	arch_flush_lazy_mmu_mode();
+ 
+ 	cpa.vaddr = addr;
++	cpa.pages = pages;
+ 	cpa.numpages = numpages;
+ 	cpa.mask_set = mask_set;
+ 	cpa.mask_clr = mask_clr;
+@@ -832,8 +862,8 @@ static int change_page_attr_set_clr(unsi
+ 	cpa.curpage = 0;
+ 	cpa.force_split = force_split;
+ 
+-	if (array)
+-		cpa.flags |= CPA_ARRAY;
++	if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
++		cpa.flags |= in_flag;
+ 
+ 	/* No alias checking for _NX bit modifications */
+ 	checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
+@@ -859,9 +889,10 @@ static int change_page_attr_set_clr(unsi
+ 	 * wbindv):
+ 	 */
+ 	if (!ret && cpu_has_clflush) {
+-		if (cpa.flags & CPA_ARRAY)
+-			cpa_flush_array(addr, numpages, cache);
+-		else
++		if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
++			cpa_flush_array(addr, numpages, cache,
++					cpa.flags, pages);
++		} else
+ 			cpa_flush_range(*addr, numpages, cache);
+ 	} else
+ 		cpa_flush_all(cache);
+@@ -881,14 +912,28 @@ static inline int change_page_attr_set(u
+ 				       pgprot_t mask, int array)
+ {
+ 	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
+-		array);
++		(array ? CPA_ARRAY : 0), NULL);
+ }
+ 
+ static inline int change_page_attr_clear(unsigned long *addr, int numpages,
+ 					 pgprot_t mask, int array)
+ {
+ 	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
+-		array);
++		(array ? CPA_ARRAY : 0), NULL);
++}
++
++static inline int cpa_set_pages_array(struct page **pages, int numpages,
++				       pgprot_t mask)
++{
++	return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
++		CPA_PAGES_ARRAY, pages);
++}
++
++static inline int cpa_clear_pages_array(struct page **pages, int numpages,
++					 pgprot_t mask)
++{
++	return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
++		CPA_PAGES_ARRAY, pages);
+ }
+ 
+ int _set_memory_uc(unsigned long addr, int numpages)
+@@ -1036,7 +1081,7 @@ int set_memory_np(unsigned long addr, in
+ int set_memory_4k(unsigned long addr, int numpages)
+ {
+ 	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
+-					__pgprot(0), 1, 0);
++					__pgprot(0), 1, 0, NULL);
+ }
+ 
+ int set_pages_uc(struct page *page, int numpages)
+@@ -1047,6 +1092,35 @@ int set_pages_uc(struct page *page, int 
+ }
+ EXPORT_SYMBOL(set_pages_uc);
+ 
++int set_pages_array_uc(struct page **pages, int addrinarray)
++{
++	unsigned long start;
++	unsigned long end;
++	int i;
++	int free_idx;
++
++	for (i = 0; i < addrinarray; i++) {
++		start = (unsigned long)page_address(pages[i]);
++		end = start + PAGE_SIZE;
++		if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
++			goto err_out;
++	}
++
++	if (cpa_set_pages_array(pages, addrinarray,
++			__pgprot(_PAGE_CACHE_UC_MINUS)) == 0) {
++		return 0; /* Success */
++	}
++err_out:
++	free_idx = i;
++	for (i = 0; i < free_idx; i++) {
++		start = (unsigned long)page_address(pages[i]);
++		end = start + PAGE_SIZE;
++		free_memtype(start, end);
++	}
++	return -EINVAL;
++}
++EXPORT_SYMBOL(set_pages_array_uc);
++
+ int set_pages_wb(struct page *page, int numpages)
+ {
+ 	unsigned long addr = (unsigned long)page_address(page);
+@@ -1055,6 +1129,26 @@ int set_pages_wb(struct page *page, int 
+ }
+ EXPORT_SYMBOL(set_pages_wb);
+ 
++int set_pages_array_wb(struct page **pages, int addrinarray)
++{
++	int retval;
++	unsigned long start;
++	unsigned long end;
++	int i;
++
++	retval = cpa_clear_pages_array(pages, addrinarray,
++			__pgprot(_PAGE_CACHE_MASK));
++
++	for (i = 0; i < addrinarray; i++) {
++		start = (unsigned long)page_address(pages[i]);
++		end = start + PAGE_SIZE;
++		free_memtype(start, end);
++	}
++
++	return retval;
++}
++EXPORT_SYMBOL(set_pages_array_wb);
++
+ int set_pages_x(struct page *page, int numpages)
+ {
+ 	unsigned long addr = (unsigned long)page_address(page);
+Index: linux-2.6-tip/arch/x86/mm/pat.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/pat.c
++++ linux-2.6-tip/arch/x86/mm/pat.c
+@@ -31,7 +31,7 @@
+ #ifdef CONFIG_X86_PAT
+ int __read_mostly pat_enabled = 1;
+ 
+-void __cpuinit pat_disable(char *reason)
++void __cpuinit pat_disable(const char *reason)
+ {
+ 	pat_enabled = 0;
+ 	printk(KERN_INFO "%s\n", reason);
+@@ -43,6 +43,11 @@ static int __init nopat(char *str)
+ 	return 0;
+ }
+ early_param("nopat", nopat);
++#else
++static inline void pat_disable(const char *reason)
++{
++	(void)reason;
++}
+ #endif
+ 
+ 
+@@ -79,16 +84,20 @@ void pat_init(void)
+ 	if (!pat_enabled)
+ 		return;
+ 
+-	/* Paranoia check. */
+-	if (!cpu_has_pat && boot_pat_state) {
+-		/*
+-		 * If this happens we are on a secondary CPU, but
+-		 * switched to PAT on the boot CPU. We have no way to
+-		 * undo PAT.
+-		 */
+-		printk(KERN_ERR "PAT enabled, "
+-		       "but not supported by secondary CPU\n");
+-		BUG();
++	if (!cpu_has_pat) {
++		if (!boot_pat_state) {
++			pat_disable("PAT not supported by CPU.");
++			return;
++		} else {
++			/*
++			 * If this happens we are on a secondary CPU, but
++			 * switched to PAT on the boot CPU. We have no way to
++			 * undo PAT.
++			 */
++			printk(KERN_ERR "PAT enabled, "
++			       "but not supported by secondary CPU\n");
++			BUG();
++		}
+ 	}
+ 
+ 	/* Set PWT to Write-Combining. All other bits stay the same */
+@@ -626,6 +635,33 @@ void unmap_devmem(unsigned long pfn, uns
+ }
+ 
+ /*
++ * Change the memory type for the physial address range in kernel identity
++ * mapping space if that range is a part of identity map.
++ */
++int kernel_map_sync_memtype(u64 base, unsigned long size, unsigned long flags)
++{
++	unsigned long id_sz;
++
++	if (!pat_enabled || base >= __pa(high_memory))
++		return 0;
++
++	id_sz = (__pa(high_memory) < base + size) ?
++				__pa(high_memory) - base :
++				size;
++
++	if (ioremap_change_attr((unsigned long)__va(base), id_sz, flags) < 0) {
++		printk(KERN_INFO
++			"%s:%d ioremap_change_attr failed %s "
++			"for %Lx-%Lx\n",
++			current->comm, current->pid,
++			cattr_name(flags),
++			base, (unsigned long long)(base + size));
++		return -EINVAL;
++	}
++	return 0;
++}
++
++/*
+  * Internal interface to reserve a range of physical memory with prot.
+  * Reserved non RAM regions only and after successful reserve_memtype,
+  * this func also keeps identity mapping (if any) in sync with this new prot.
+@@ -634,7 +670,7 @@ static int reserve_pfn_range(u64 paddr, 
+ 				int strict_prot)
+ {
+ 	int is_ram = 0;
+-	int id_sz, ret;
++	int ret;
+ 	unsigned long flags;
+ 	unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
+ 
+@@ -672,23 +708,8 @@ static int reserve_pfn_range(u64 paddr, 
+ 				     flags);
+ 	}
+ 
+-	/* Need to keep identity mapping in sync */
+-	if (paddr >= __pa(high_memory))
+-		return 0;
+-
+-	id_sz = (__pa(high_memory) < paddr + size) ?
+-				__pa(high_memory) - paddr :
+-				size;
+-
+-	if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
++	if (kernel_map_sync_memtype(paddr, size, flags) < 0) {
+ 		free_memtype(paddr, paddr + size);
+-		printk(KERN_ERR
+-			"%s:%d reserve_pfn_range ioremap_change_attr failed %s "
+-			"for %Lx-%Lx\n",
+-			current->comm, current->pid,
+-			cattr_name(flags),
+-			(unsigned long long)paddr,
+-			(unsigned long long)(paddr + size));
+ 		return -EINVAL;
+ 	}
+ 	return 0;
+Index: linux-2.6-tip/arch/x86/mm/pgtable.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/pgtable.c
++++ linux-2.6-tip/arch/x86/mm/pgtable.c
+@@ -4,9 +4,11 @@
+ #include <asm/tlb.h>
+ #include <asm/fixmap.h>
+ 
++#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
++
+ pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+ {
+-	return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
++	return (pte_t *)__get_free_page(PGALLOC_GFP);
+ }
+ 
+ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
+@@ -14,9 +16,9 @@ pgtable_t pte_alloc_one(struct mm_struct
+ 	struct page *pte;
+ 
+ #ifdef CONFIG_HIGHPTE
+-	pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
++	pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
+ #else
+-	pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
++	pte = alloc_pages(PGALLOC_GFP, 0);
+ #endif
+ 	if (pte)
+ 		pgtable_page_ctor(pte);
+@@ -130,6 +132,7 @@ void pud_populate(struct mm_struct *mm, 
+ 	   reserved at the pmd (PDPT) level. */
+ 	set_pud(pudp, __pud(__pa(pmd) | _PAGE_PRESENT));
+ 
++	preempt_disable();
+ 	/*
+ 	 * According to Intel App note "TLBs, Paging-Structure Caches,
+ 	 * and Their Invalidation", April 2007, document 317080-001,
+@@ -138,6 +141,7 @@ void pud_populate(struct mm_struct *mm, 
+ 	 */
+ 	if (mm == current->active_mm)
+ 		write_cr3(read_cr3());
++	preempt_enable();
+ }
+ #else  /* !CONFIG_X86_PAE */
+ 
+@@ -161,7 +165,7 @@ static int preallocate_pmds(pmd_t *pmds[
+ 	bool failed = false;
+ 
+ 	for(i = 0; i < PREALLOCATED_PMDS; i++) {
+-		pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT);
++		pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP);
+ 		if (pmd == NULL)
+ 			failed = true;
+ 		pmds[i] = pmd;
+@@ -228,7 +232,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ 	pmd_t *pmds[PREALLOCATED_PMDS];
+ 	unsigned long flags;
+ 
+-	pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
++	pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
+ 
+ 	if (pgd == NULL)
+ 		goto out;
+@@ -313,6 +317,24 @@ int ptep_clear_flush_young(struct vm_are
+ 	return young;
+ }
+ 
++/**
++ * reserve_top_address - reserves a hole in the top of kernel address space
++ * @reserve - size of hole to reserve
++ *
++ * Can be used to relocate the fixmap area and poke a hole in the top
++ * of kernel address space to make room for a hypervisor.
++ */
++void __init reserve_top_address(unsigned long reserve)
++{
++#ifdef CONFIG_X86_32
++	BUG_ON(fixmaps_set > 0);
++	printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
++	       (int)-reserve);
++	__FIXADDR_TOP = -reserve - PAGE_SIZE;
++	__VMALLOC_RESERVE += reserve;
++#endif
++}
++
+ int fixmaps_set;
+ 
+ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
+@@ -327,7 +349,8 @@ void __native_set_fixmap(enum fixed_addr
+ 	fixmaps_set++;
+ }
+ 
+-void native_set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t flags)
++void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
++		       pgprot_t flags)
+ {
+ 	__native_set_fixmap(idx, pfn_pte(phys >> PAGE_SHIFT, flags));
+ }
+Index: linux-2.6-tip/arch/x86/mm/pgtable_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/pgtable_32.c
++++ linux-2.6-tip/arch/x86/mm/pgtable_32.c
+@@ -20,6 +20,8 @@
+ #include <asm/tlb.h>
+ #include <asm/tlbflush.h>
+ 
++unsigned int __VMALLOC_RESERVE = 128 << 20;
++
+ /*
+  * Associate a virtual page frame with a given physical page frame 
+  * and protection flags for that frame.
+@@ -48,7 +50,7 @@ void set_pte_vaddr(unsigned long vaddr, 
+ 	}
+ 	pte = pte_offset_kernel(pmd, vaddr);
+ 	if (pte_val(pteval))
+-		set_pte_present(&init_mm, vaddr, pte, pteval);
++		set_pte_at(&init_mm, vaddr, pte, pteval);
+ 	else
+ 		pte_clear(&init_mm, vaddr, pte);
+ 
+@@ -97,22 +99,6 @@ void set_pmd_pfn(unsigned long vaddr, un
+ unsigned long __FIXADDR_TOP = 0xfffff000;
+ EXPORT_SYMBOL(__FIXADDR_TOP);
+ 
+-/**
+- * reserve_top_address - reserves a hole in the top of kernel address space
+- * @reserve - size of hole to reserve
+- *
+- * Can be used to relocate the fixmap area and poke a hole in the top
+- * of kernel address space to make room for a hypervisor.
+- */
+-void __init reserve_top_address(unsigned long reserve)
+-{
+-	BUG_ON(fixmaps_set > 0);
+-	printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
+-	       (int)-reserve);
+-	__FIXADDR_TOP = -reserve - PAGE_SIZE;
+-	__VMALLOC_RESERVE += reserve;
+-}
+-
+ /*
+  * vmalloc=size forces the vmalloc area to be exactly 'size'
+  * bytes. This can be used to increase (or decrease) the
+Index: linux-2.6-tip/arch/x86/mm/srat_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/srat_64.c
++++ linux-2.6-tip/arch/x86/mm/srat_64.c
+@@ -20,7 +20,8 @@
+ #include <asm/proto.h>
+ #include <asm/numa.h>
+ #include <asm/e820.h>
+-#include <asm/genapic.h>
++#include <asm/apic.h>
++#include <asm/uv/uv.h>
+ 
+ int acpi_numa __initdata;
+ 
+Index: linux-2.6-tip/arch/x86/mm/tlb.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/mm/tlb.c
+@@ -0,0 +1,290 @@
++#include <linux/init.h>
++
++#include <linux/mm.h>
++#include <linux/spinlock.h>
++#include <linux/smp.h>
++#include <linux/interrupt.h>
++#include <linux/module.h>
++
++#include <asm/tlbflush.h>
++#include <asm/mmu_context.h>
++#include <asm/apic.h>
++#include <asm/uv/uv.h>
++
++DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
++			= { &init_mm, 0, };
++
++/*
++ *	Smarter SMP flushing macros.
++ *		c/o Linus Torvalds.
++ *
++ *	These mean you can really definitely utterly forget about
++ *	writing to user space from interrupts. (Its not allowed anyway).
++ *
++ *	Optimizations Manfred Spraul <manfred@colorfullife.com>
++ *
++ *	More scalable flush, from Andi Kleen
++ *
++ *	To avoid global state use 8 different call vectors.
++ *	Each CPU uses a specific vector to trigger flushes on other
++ *	CPUs. Depending on the received vector the target CPUs look into
++ *	the right array slot for the flush data.
++ *
++ *	With more than 8 CPUs they are hashed to the 8 available
++ *	vectors. The limited global vector space forces us to this right now.
++ *	In future when interrupts are split into per CPU domains this could be
++ *	fixed, at the cost of triggering multiple IPIs in some cases.
++ */
++
++union smp_flush_state {
++	struct {
++		struct mm_struct *flush_mm;
++		unsigned long flush_va;
++		DECLARE_BITMAP(flush_cpumask, NR_CPUS);
++		raw_spinlock_t tlbstate_lock;
++	};
++	char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
++} ____cacheline_internodealigned_in_smp;
++
++/* State is put into the per CPU data section, but padded
++   to a full cache line because other CPUs can access it and we don't
++   want false sharing in the per cpu data segment. */
++static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
++
++/*
++ * We cannot call mmdrop() because we are in interrupt context,
++ * instead update mm->cpu_vm_mask.
++ */
++void leave_mm(int cpu)
++{
++	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
++		BUG();
++	cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
++	load_cr3(swapper_pg_dir);
++}
++EXPORT_SYMBOL_GPL(leave_mm);
++
++/*
++ *
++ * The flush IPI assumes that a thread switch happens in this order:
++ * [cpu0: the cpu that switches]
++ * 1) switch_mm() either 1a) or 1b)
++ * 1a) thread switch to a different mm
++ * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
++ *	Stop ipi delivery for the old mm. This is not synchronized with
++ *	the other cpus, but smp_invalidate_interrupt ignore flush ipis
++ *	for the wrong mm, and in the worst case we perform a superfluous
++ *	tlb flush.
++ * 1a2) set cpu mmu_state to TLBSTATE_OK
++ *	Now the smp_invalidate_interrupt won't call leave_mm if cpu0
++ *	was in lazy tlb mode.
++ * 1a3) update cpu active_mm
++ *	Now cpu0 accepts tlb flushes for the new mm.
++ * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
++ *	Now the other cpus will send tlb flush ipis.
++ * 1a4) change cr3.
++ * 1b) thread switch without mm change
++ *	cpu active_mm is correct, cpu0 already handles
++ *	flush ipis.
++ * 1b1) set cpu mmu_state to TLBSTATE_OK
++ * 1b2) test_and_set the cpu bit in cpu_vm_mask.
++ *	Atomically set the bit [other cpus will start sending flush ipis],
++ *	and test the bit.
++ * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
++ * 2) switch %%esp, ie current
++ *
++ * The interrupt must handle 2 special cases:
++ * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
++ * - the cpu performs speculative tlb reads, i.e. even if the cpu only
++ *   runs in kernel space, the cpu could load tlb entries for user space
++ *   pages.
++ *
++ * The good news is that cpu mmu_state is local to each cpu, no
++ * write/read ordering problems.
++ */
++
++/*
++ * TLB flush IPI:
++ *
++ * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
++ * 2) Leave the mm if we are in the lazy tlb mode.
++ *
++ * Interrupts are disabled.
++ */
++
++/*
++ * FIXME: use of asmlinkage is not consistent.  On x86_64 it's noop
++ * but still used for documentation purpose but the usage is slightly
++ * inconsistent.  On x86_32, asmlinkage is regparm(0) but interrupt
++ * entry calls in with the first parameter in %eax.  Maybe define
++ * intrlinkage?
++ */
++#ifdef CONFIG_X86_64
++asmlinkage
++#endif
++void smp_invalidate_interrupt(struct pt_regs *regs)
++{
++	unsigned int cpu;
++	unsigned int sender;
++	union smp_flush_state *f;
++
++	cpu = smp_processor_id();
++	/*
++	 * orig_rax contains the negated interrupt vector.
++	 * Use that to determine where the sender put the data.
++	 */
++	sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
++	f = &flush_state[sender];
++
++	if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
++		goto out;
++		/*
++		 * This was a BUG() but until someone can quote me the
++		 * line from the intel manual that guarantees an IPI to
++		 * multiple CPUs is retried _only_ on the erroring CPUs
++		 * its staying as a return
++		 *
++		 * BUG();
++		 */
++
++	if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
++		if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
++			if (f->flush_va == TLB_FLUSH_ALL)
++				local_flush_tlb();
++			else
++				__flush_tlb_one(f->flush_va);
++		} else
++			leave_mm(cpu);
++	}
++out:
++	ack_APIC_irq();
++	smp_mb__before_clear_bit();
++	cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
++	smp_mb__after_clear_bit();
++	inc_irq_stat(irq_tlb_count);
++}
++
++static void flush_tlb_others_ipi(const struct cpumask *cpumask,
++				 struct mm_struct *mm, unsigned long va)
++{
++	unsigned int sender;
++	union smp_flush_state *f;
++
++	/* Caller has disabled preemption */
++	sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
++	f = &flush_state[sender];
++
++	/*
++	 * Could avoid this lock when
++	 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
++	 * probably not worth checking this for a cache-hot lock.
++	 */
++	spin_lock(&f->tlbstate_lock);
++
++	f->flush_mm = mm;
++	f->flush_va = va;
++	cpumask_andnot(to_cpumask(f->flush_cpumask),
++		       cpumask, cpumask_of(smp_processor_id()));
++
++	/*
++	 * We have to send the IPI only to
++	 * CPUs affected.
++	 */
++	apic->send_IPI_mask(to_cpumask(f->flush_cpumask),
++		      INVALIDATE_TLB_VECTOR_START + sender);
++
++	while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
++		cpu_relax();
++
++	f->flush_mm = NULL;
++	f->flush_va = 0;
++	spin_unlock(&f->tlbstate_lock);
++}
++
++void native_flush_tlb_others(const struct cpumask *cpumask,
++			     struct mm_struct *mm, unsigned long va)
++{
++	if (is_uv_system()) {
++		unsigned int cpu;
++
++		cpu = get_cpu();
++		cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
++		if (cpumask)
++			flush_tlb_others_ipi(cpumask, mm, va);
++		put_cpu();
++		return;
++	}
++	flush_tlb_others_ipi(cpumask, mm, va);
++}
++
++static int __cpuinit init_smp_flush(void)
++{
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(flush_state); i++)
++		spin_lock_init(&flush_state[i].tlbstate_lock);
++
++	return 0;
++}
++core_initcall(init_smp_flush);
++
++void flush_tlb_current_task(void)
++{
++	struct mm_struct *mm = current->mm;
++
++	preempt_disable();
++
++	local_flush_tlb();
++	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
++		flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
++	preempt_enable();
++}
++
++void flush_tlb_mm(struct mm_struct *mm)
++{
++	preempt_disable();
++
++	if (current->active_mm == mm) {
++		if (current->mm)
++			local_flush_tlb();
++		else
++			leave_mm(smp_processor_id());
++	}
++	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
++		flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
++
++	preempt_enable();
++}
++
++void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
++{
++	struct mm_struct *mm = vma->vm_mm;
++
++	preempt_disable();
++
++	if (current->active_mm == mm) {
++		if (current->mm)
++			__flush_tlb_one(va);
++		else
++			leave_mm(smp_processor_id());
++	}
++
++	if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
++		flush_tlb_others(&mm->cpu_vm_mask, mm, va);
++
++	preempt_enable();
++}
++
++static void do_flush_tlb_all(void *info)
++{
++	unsigned long cpu = smp_processor_id();
++
++	__flush_tlb_all();
++	if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
++		leave_mm(cpu);
++}
++
++void flush_tlb_all(void)
++{
++	on_each_cpu(do_flush_tlb_all, NULL, 1);
++}
+Index: linux-2.6-tip/arch/x86/oprofile/nmi_int.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/oprofile/nmi_int.c
++++ linux-2.6-tip/arch/x86/oprofile/nmi_int.c
+@@ -40,8 +40,9 @@ static int profile_exceptions_notify(str
+ 
+ 	switch (val) {
+ 	case DIE_NMI:
+-		if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)))
+-			ret = NOTIFY_STOP;
++	case DIE_NMI_IPI:
++		model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu));
++		ret = NOTIFY_STOP;
+ 		break;
+ 	default:
+ 		break;
+@@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy)
+ static struct notifier_block profile_exceptions_nb = {
+ 	.notifier_call = profile_exceptions_notify,
+ 	.next = NULL,
+-	.priority = 0
++	.priority = 2
+ };
+ 
+ static int nmi_setup(void)
+Index: linux-2.6-tip/arch/x86/oprofile/op_model_p4.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/oprofile/op_model_p4.c
++++ linux-2.6-tip/arch/x86/oprofile/op_model_p4.c
+@@ -380,7 +380,7 @@ static unsigned int get_stagger(void)
+ {
+ #ifdef CONFIG_SMP
+ 	int cpu = smp_processor_id();
+-	return (cpu != first_cpu(per_cpu(cpu_sibling_map, cpu)));
++	return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
+ #endif
+ 	return 0;
+ }
+Index: linux-2.6-tip/arch/x86/oprofile/op_model_ppro.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/oprofile/op_model_ppro.c
++++ linux-2.6-tip/arch/x86/oprofile/op_model_ppro.c
+@@ -18,7 +18,7 @@
+ #include <asm/msr.h>
+ #include <asm/apic.h>
+ #include <asm/nmi.h>
+-#include <asm/intel_arch_perfmon.h>
++#include <asm/perf_counter.h>
+ 
+ #include "op_x86_model.h"
+ #include "op_counter.h"
+@@ -136,6 +136,13 @@ static int ppro_check_ctrs(struct pt_reg
+ 	u64 val;
+ 	int i;
+ 
++	/*
++	 * This can happen if perf counters are in use when
++	 * we steal the die notifier NMI.
++	 */
++	if (unlikely(!reset_value))
++		goto out;
++
+ 	for (i = 0 ; i < num_counters; ++i) {
+ 		if (!reset_value[i])
+ 			continue;
+@@ -146,6 +153,7 @@ static int ppro_check_ctrs(struct pt_reg
+ 		}
+ 	}
+ 
++out:
+ 	/* Only P6 based Pentium M need to re-unmask the apic vector but it
+ 	 * doesn't hurt other P6 variant */
+ 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
+Index: linux-2.6-tip/arch/x86/pci/amd_bus.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/pci/amd_bus.c
++++ linux-2.6-tip/arch/x86/pci/amd_bus.c
+@@ -277,8 +277,8 @@ static int __init early_fill_mp_bus_info
+ {
+ 	int i;
+ 	int j;
+-	unsigned bus;
+-	unsigned slot;
++	unsigned uninitialized_var(bus);
++	unsigned uninitialized_var(slot);
+ 	int found;
+ 	int node;
+ 	int link;
+Index: linux-2.6-tip/arch/x86/pci/common.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/pci/common.c
++++ linux-2.6-tip/arch/x86/pci/common.c
+@@ -81,7 +81,7 @@ int pcibios_scanned;
+  * This interrupt-safe spinlock protects all accesses to PCI
+  * configuration space.
+  */
+-DEFINE_SPINLOCK(pci_config_lock);
++DEFINE_RAW_SPINLOCK(pci_config_lock);
+ 
+ static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
+ {
+@@ -90,7 +90,7 @@ static int __devinit can_skip_ioresource
+ 	return 0;
+ }
+ 
+-static struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitdata = {
++static const struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitconst = {
+ /*
+  * Systems where PCI IO resource ISA alignment can be skipped
+  * when the ISA enable bit in the bridge control is not set
+@@ -183,7 +183,7 @@ static int __devinit assign_all_busses(c
+ }
+ #endif
+ 
+-static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
++static const struct dmi_system_id __devinitconst pciprobe_dmi_table[] = {
+ #ifdef __i386__
+ /*
+  * Laptops which need pci=assign-busses to see Cardbus cards
+Index: linux-2.6-tip/arch/x86/pci/fixup.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/pci/fixup.c
++++ linux-2.6-tip/arch/x86/pci/fixup.c
+@@ -356,7 +356,7 @@ static void __devinit pci_fixup_video(st
+ DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video);
+ 
+ 
+-static struct dmi_system_id __devinitdata msi_k8t_dmi_table[] = {
++static const struct dmi_system_id __devinitconst msi_k8t_dmi_table[] = {
+ 	{
+ 		.ident = "MSI-K8T-Neo2Fir",
+ 		.matches = {
+@@ -413,7 +413,7 @@ DECLARE_PCI_FIXUP_RESUME(PCI_VENDOR_ID_V
+  */
+ static u16 toshiba_line_size;
+ 
+-static struct dmi_system_id __devinitdata toshiba_ohci1394_dmi_table[] = {
++static const struct dmi_system_id __devinitconst toshiba_ohci1394_dmi_table[] = {
+ 	{
+ 		.ident = "Toshiba PS5 based laptop",
+ 		.matches = {
+Index: linux-2.6-tip/arch/x86/pci/numaq_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/pci/numaq_32.c
++++ linux-2.6-tip/arch/x86/pci/numaq_32.c
+@@ -5,7 +5,7 @@
+ #include <linux/pci.h>
+ #include <linux/init.h>
+ #include <linux/nodemask.h>
+-#include <mach_apic.h>
++#include <asm/apic.h>
+ #include <asm/mpspec.h>
+ #include <asm/pci_x86.h>
+ 
+@@ -18,10 +18,6 @@
+ 
+ #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local])
+ 
+-/* Where the IO area was mapped on multiquad, always 0 otherwise */
+-void *xquad_portio;
+-EXPORT_SYMBOL(xquad_portio);
+-
+ #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port)
+ 
+ #define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \
+Index: linux-2.6-tip/arch/x86/pci/pcbios.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/pci/pcbios.c
++++ linux-2.6-tip/arch/x86/pci/pcbios.c
+@@ -7,7 +7,7 @@
+ #include <linux/module.h>
+ #include <linux/uaccess.h>
+ #include <asm/pci_x86.h>
+-#include <asm/mach-default/pci-functions.h>
++#include <asm/pci-functions.h>
+ 
+ /* BIOS32 signature: "_32_" */
+ #define BIOS32_SIGNATURE	(('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24))
+Index: linux-2.6-tip/arch/x86/power/hibernate_asm_32.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/power/hibernate_asm_32.S
++++ linux-2.6-tip/arch/x86/power/hibernate_asm_32.S
+@@ -8,7 +8,7 @@
+ 
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ #include <asm/asm-offsets.h>
+ #include <asm/processor-flags.h>
+ 
+Index: linux-2.6-tip/arch/x86/power/hibernate_asm_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/power/hibernate_asm_64.S
++++ linux-2.6-tip/arch/x86/power/hibernate_asm_64.S
+@@ -18,7 +18,7 @@
+ 	.text
+ #include <linux/linkage.h>
+ #include <asm/segment.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ #include <asm/asm-offsets.h>
+ #include <asm/processor-flags.h>
+ 
+Index: linux-2.6-tip/arch/x86/vdso/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/vdso/Makefile
++++ linux-2.6-tip/arch/x86/vdso/Makefile
+@@ -38,7 +38,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
+ 	$(call if_changed,objcopy)
+ 
+ CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \
+-       $(filter -g%,$(KBUILD_CFLAGS))
++       $(filter -g%,$(KBUILD_CFLAGS)) $(call cc-option, -fno-stack-protector)
+ 
+ $(vobjs): KBUILD_CFLAGS += $(CFL)
+ 
+Index: linux-2.6-tip/arch/x86/vdso/vma.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/vdso/vma.c
++++ linux-2.6-tip/arch/x86/vdso/vma.c
+@@ -85,8 +85,8 @@ static unsigned long vdso_addr(unsigned 
+ 	unsigned long addr, end;
+ 	unsigned offset;
+ 	end = (start + PMD_SIZE - 1) & PMD_MASK;
+-	if (end >= TASK_SIZE64)
+-		end = TASK_SIZE64;
++	if (end >= TASK_SIZE_MAX)
++		end = TASK_SIZE_MAX;
+ 	end -= len;
+ 	/* This loses some more bits than a modulo, but is cheaper */
+ 	offset = get_random_int() & (PTRS_PER_PTE - 1);
+Index: linux-2.6-tip/arch/x86/xen/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/Kconfig
++++ linux-2.6-tip/arch/x86/xen/Kconfig
+@@ -6,7 +6,7 @@ config XEN
+ 	bool "Xen guest support"
+ 	select PARAVIRT
+ 	select PARAVIRT_CLOCK
+-	depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER))
++	depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS)
+ 	depends on X86_CMPXCHG && X86_TSC
+ 	help
+ 	  This is the Linux Xen port.  Enabling this will allow the
+Index: linux-2.6-tip/arch/x86/xen/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/Makefile
++++ linux-2.6-tip/arch/x86/xen/Makefile
+@@ -6,7 +6,8 @@ CFLAGS_REMOVE_irq.o = -pg
+ endif
+ 
+ obj-y		:= enlighten.o setup.o multicalls.o mmu.o irq.o \
+-			time.o xen-asm_$(BITS).o grant-table.o suspend.o
++			time.o xen-asm.o xen-asm_$(BITS).o \
++			grant-table.o suspend.o
+ 
+ obj-$(CONFIG_SMP)		+= smp.o spinlock.o
+ obj-$(CONFIG_XEN_DEBUG_FS)	+= debugfs.o
+\ No newline at end of file
+Index: linux-2.6-tip/arch/x86/xen/enlighten.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/enlighten.c
++++ linux-2.6-tip/arch/x86/xen/enlighten.c
+@@ -61,40 +61,13 @@ DEFINE_PER_CPU(struct vcpu_info, xen_vcp
+ enum xen_domain_type xen_domain_type = XEN_NATIVE;
+ EXPORT_SYMBOL_GPL(xen_domain_type);
+ 
+-/*
+- * Identity map, in addition to plain kernel map.  This needs to be
+- * large enough to allocate page table pages to allocate the rest.
+- * Each page can map 2MB.
+- */
+-static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
+-
+-#ifdef CONFIG_X86_64
+-/* l3 pud for userspace vsyscall mapping */
+-static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
+-#endif /* CONFIG_X86_64 */
+-
+-/*
+- * Note about cr3 (pagetable base) values:
+- *
+- * xen_cr3 contains the current logical cr3 value; it contains the
+- * last set cr3.  This may not be the current effective cr3, because
+- * its update may be being lazily deferred.  However, a vcpu looking
+- * at its own cr3 can use this value knowing that it everything will
+- * be self-consistent.
+- *
+- * xen_current_cr3 contains the actual vcpu cr3; it is set once the
+- * hypercall to set the vcpu cr3 is complete (so it may be a little
+- * out of date, but it will never be set early).  If one vcpu is
+- * looking at another vcpu's cr3 value, it should use this variable.
+- */
+-DEFINE_PER_CPU(unsigned long, xen_cr3);	 /* cr3 stored as physaddr */
+-DEFINE_PER_CPU(unsigned long, xen_current_cr3);	 /* actual vcpu cr3 */
+-
+ struct start_info *xen_start_info;
+ EXPORT_SYMBOL_GPL(xen_start_info);
+ 
+ struct shared_info xen_dummy_shared_info;
+ 
++void *xen_initial_gdt;
++
+ /*
+  * Point at some empty memory to start with. We map the real shared_info
+  * page as soon as fixmap is up and running.
+@@ -114,14 +87,7 @@ struct shared_info *HYPERVISOR_shared_in
+  *
+  * 0: not available, 1: available
+  */
+-static int have_vcpu_info_placement =
+-#ifdef CONFIG_X86_32
+-	1
+-#else
+-	0
+-#endif
+-	;
+-
++static int have_vcpu_info_placement = 1;
+ 
+ static void xen_vcpu_setup(int cpu)
+ {
+@@ -137,7 +103,7 @@ static void xen_vcpu_setup(int cpu)
+ 
+ 	vcpup = &per_cpu(xen_vcpu_info, cpu);
+ 
+-	info.mfn = virt_to_mfn(vcpup);
++	info.mfn = arbitrary_virt_to_mfn(vcpup);
+ 	info.offset = offset_in_page(vcpup);
+ 
+ 	printk(KERN_DEBUG "trying to map vcpu_info %d at %p, mfn %llx, offset %d\n",
+@@ -237,7 +203,7 @@ static unsigned long xen_get_debugreg(in
+ 	return HYPERVISOR_get_debugreg(reg);
+ }
+ 
+-static void xen_leave_lazy(void)
++void xen_leave_lazy(void)
+ {
+ 	paravirt_leave_lazy(paravirt_get_lazy_mode());
+ 	xen_mc_flush();
+@@ -335,8 +301,10 @@ static void xen_load_gdt(const struct de
+ 	frames = mcs.args;
+ 
+ 	for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) {
+-		frames[f] = virt_to_mfn(va);
++		frames[f] = arbitrary_virt_to_mfn((void *)va);
++
+ 		make_lowmem_page_readonly((void *)va);
++		make_lowmem_page_readonly(mfn_to_virt(frames[f]));
+ 	}
+ 
+ 	MULTI_set_gdt(mcs.mc, frames, size / sizeof(struct desc_struct));
+@@ -348,7 +316,7 @@ static void load_TLS_descriptor(struct t
+ 				unsigned int cpu, unsigned int i)
+ {
+ 	struct desc_struct *gdt = get_cpu_gdt_table(cpu);
+-	xmaddr_t maddr = virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
++	xmaddr_t maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
+ 	struct multicall_space mc = __xen_mc_entry(0);
+ 
+ 	MULTI_update_descriptor(mc.mc, maddr.maddr, t->tls_array[i]);
+@@ -357,13 +325,14 @@ static void load_TLS_descriptor(struct t
+ static void xen_load_tls(struct thread_struct *t, unsigned int cpu)
+ {
+ 	/*
+-	 * XXX sleazy hack: If we're being called in a lazy-cpu zone,
+-	 * it means we're in a context switch, and %gs has just been
+-	 * saved.  This means we can zero it out to prevent faults on
+-	 * exit from the hypervisor if the next process has no %gs.
+-	 * Either way, it has been saved, and the new value will get
+-	 * loaded properly.  This will go away as soon as Xen has been
+-	 * modified to not save/restore %gs for normal hypercalls.
++	 * XXX sleazy hack: If we're being called in a lazy-cpu zone
++	 * and lazy gs handling is enabled, it means we're in a
++	 * context switch, and %gs has just been saved.  This means we
++	 * can zero it out to prevent faults on exit from the
++	 * hypervisor if the next process has no %gs.  Either way, it
++	 * has been saved, and the new value will get loaded properly.
++	 * This will go away as soon as Xen has been modified to not
++	 * save/restore %gs for normal hypercalls.
+ 	 *
+ 	 * On x86_64, this hack is not used for %gs, because gs points
+ 	 * to KERNEL_GS_BASE (and uses it for PDA references), so we
+@@ -375,7 +344,7 @@ static void xen_load_tls(struct thread_s
+ 	 */
+ 	if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
+ #ifdef CONFIG_X86_32
+-		loadsegment(gs, 0);
++		lazy_load_gs(0);
+ #else
+ 		loadsegment(fs, 0);
+ #endif
+@@ -521,7 +490,7 @@ static void xen_write_gdt_entry(struct d
+ 		break;
+ 
+ 	default: {
+-		xmaddr_t maddr = virt_to_machine(&dt[entry]);
++		xmaddr_t maddr = arbitrary_virt_to_machine(&dt[entry]);
+ 
+ 		xen_mc_flush();
+ 		if (HYPERVISOR_update_descriptor(maddr.maddr, *(u64 *)desc))
+@@ -587,94 +556,18 @@ static u32 xen_safe_apic_wait_icr_idle(v
+         return 0;
+ }
+ 
+-static struct apic_ops xen_basic_apic_ops = {
+-	.read = xen_apic_read,
+-	.write = xen_apic_write,
+-	.icr_read = xen_apic_icr_read,
+-	.icr_write = xen_apic_icr_write,
+-	.wait_icr_idle = xen_apic_wait_icr_idle,
+-	.safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
+-};
+-
+-#endif
+-
+-static void xen_flush_tlb(void)
++static void set_xen_basic_apic_ops(void)
+ {
+-	struct mmuext_op *op;
+-	struct multicall_space mcs;
+-
+-	preempt_disable();
+-
+-	mcs = xen_mc_entry(sizeof(*op));
+-
+-	op = mcs.args;
+-	op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
+-	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+-
+-	xen_mc_issue(PARAVIRT_LAZY_MMU);
+-
+-	preempt_enable();
+-}
+-
+-static void xen_flush_tlb_single(unsigned long addr)
+-{
+-	struct mmuext_op *op;
+-	struct multicall_space mcs;
+-
+-	preempt_disable();
+-
+-	mcs = xen_mc_entry(sizeof(*op));
+-	op = mcs.args;
+-	op->cmd = MMUEXT_INVLPG_LOCAL;
+-	op->arg1.linear_addr = addr & PAGE_MASK;
+-	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+-
+-	xen_mc_issue(PARAVIRT_LAZY_MMU);
+-
+-	preempt_enable();
++	apic->read = xen_apic_read;
++	apic->write = xen_apic_write;
++	apic->icr_read = xen_apic_icr_read;
++	apic->icr_write = xen_apic_icr_write;
++	apic->wait_icr_idle = xen_apic_wait_icr_idle;
++	apic->safe_wait_icr_idle = xen_safe_apic_wait_icr_idle;
+ }
+ 
+-static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
+-				 unsigned long va)
+-{
+-	struct {
+-		struct mmuext_op op;
+-		cpumask_t mask;
+-	} *args;
+-	cpumask_t cpumask = *cpus;
+-	struct multicall_space mcs;
+-
+-	/*
+-	 * A couple of (to be removed) sanity checks:
+-	 *
+-	 * - current CPU must not be in mask
+-	 * - mask must exist :)
+-	 */
+-	BUG_ON(cpus_empty(cpumask));
+-	BUG_ON(cpu_isset(smp_processor_id(), cpumask));
+-	BUG_ON(!mm);
+-
+-	/* If a CPU which we ran on has gone down, OK. */
+-	cpus_and(cpumask, cpumask, cpu_online_map);
+-	if (cpus_empty(cpumask))
+-		return;
+-
+-	mcs = xen_mc_entry(sizeof(*args));
+-	args = mcs.args;
+-	args->mask = cpumask;
+-	args->op.arg2.vcpumask = &args->mask;
+-
+-	if (va == TLB_FLUSH_ALL) {
+-		args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
+-	} else {
+-		args->op.cmd = MMUEXT_INVLPG_MULTI;
+-		args->op.arg1.linear_addr = va;
+-	}
+-
+-	MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
++#endif
+ 
+-	xen_mc_issue(PARAVIRT_LAZY_MMU);
+-}
+ 
+ static void xen_clts(void)
+ {
+@@ -700,21 +593,6 @@ static void xen_write_cr0(unsigned long 
+ 	xen_mc_issue(PARAVIRT_LAZY_CPU);
+ }
+ 
+-static void xen_write_cr2(unsigned long cr2)
+-{
+-	x86_read_percpu(xen_vcpu)->arch.cr2 = cr2;
+-}
+-
+-static unsigned long xen_read_cr2(void)
+-{
+-	return x86_read_percpu(xen_vcpu)->arch.cr2;
+-}
+-
+-static unsigned long xen_read_cr2_direct(void)
+-{
+-	return x86_read_percpu(xen_vcpu_info.arch.cr2);
+-}
+-
+ static void xen_write_cr4(unsigned long cr4)
+ {
+ 	cr4 &= ~X86_CR4_PGE;
+@@ -723,71 +601,6 @@ static void xen_write_cr4(unsigned long 
+ 	native_write_cr4(cr4);
+ }
+ 
+-static unsigned long xen_read_cr3(void)
+-{
+-	return x86_read_percpu(xen_cr3);
+-}
+-
+-static void set_current_cr3(void *v)
+-{
+-	x86_write_percpu(xen_current_cr3, (unsigned long)v);
+-}
+-
+-static void __xen_write_cr3(bool kernel, unsigned long cr3)
+-{
+-	struct mmuext_op *op;
+-	struct multicall_space mcs;
+-	unsigned long mfn;
+-
+-	if (cr3)
+-		mfn = pfn_to_mfn(PFN_DOWN(cr3));
+-	else
+-		mfn = 0;
+-
+-	WARN_ON(mfn == 0 && kernel);
+-
+-	mcs = __xen_mc_entry(sizeof(*op));
+-
+-	op = mcs.args;
+-	op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
+-	op->arg1.mfn = mfn;
+-
+-	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
+-
+-	if (kernel) {
+-		x86_write_percpu(xen_cr3, cr3);
+-
+-		/* Update xen_current_cr3 once the batch has actually
+-		   been submitted. */
+-		xen_mc_callback(set_current_cr3, (void *)cr3);
+-	}
+-}
+-
+-static void xen_write_cr3(unsigned long cr3)
+-{
+-	BUG_ON(preemptible());
+-
+-	xen_mc_batch();  /* disables interrupts */
+-
+-	/* Update while interrupts are disabled, so its atomic with
+-	   respect to ipis */
+-	x86_write_percpu(xen_cr3, cr3);
+-
+-	__xen_write_cr3(true, cr3);
+-
+-#ifdef CONFIG_X86_64
+-	{
+-		pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
+-		if (user_pgd)
+-			__xen_write_cr3(false, __pa(user_pgd));
+-		else
+-			__xen_write_cr3(false, 0);
+-	}
+-#endif
+-
+-	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
+-}
+-
+ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
+ {
+ 	int ret;
+@@ -829,185 +642,6 @@ static int xen_write_msr_safe(unsigned i
+ 	return ret;
+ }
+ 
+-/* Early in boot, while setting up the initial pagetable, assume
+-   everything is pinned. */
+-static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
+-{
+-#ifdef CONFIG_FLATMEM
+-	BUG_ON(mem_map);	/* should only be used early */
+-#endif
+-	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
+-}
+-
+-/* Early release_pte assumes that all pts are pinned, since there's
+-   only init_mm and anything attached to that is pinned. */
+-static void xen_release_pte_init(unsigned long pfn)
+-{
+-	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+-}
+-
+-static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
+-{
+-	struct mmuext_op op;
+-	op.cmd = cmd;
+-	op.arg1.mfn = pfn_to_mfn(pfn);
+-	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
+-		BUG();
+-}
+-
+-/* This needs to make sure the new pte page is pinned iff its being
+-   attached to a pinned pagetable. */
+-static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
+-{
+-	struct page *page = pfn_to_page(pfn);
+-
+-	if (PagePinned(virt_to_page(mm->pgd))) {
+-		SetPagePinned(page);
+-
+-		vm_unmap_aliases();
+-		if (!PageHighMem(page)) {
+-			make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
+-			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
+-				pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
+-		} else {
+-			/* make sure there are no stray mappings of
+-			   this page */
+-			kmap_flush_unused();
+-		}
+-	}
+-}
+-
+-static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
+-{
+-	xen_alloc_ptpage(mm, pfn, PT_PTE);
+-}
+-
+-static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
+-{
+-	xen_alloc_ptpage(mm, pfn, PT_PMD);
+-}
+-
+-static int xen_pgd_alloc(struct mm_struct *mm)
+-{
+-	pgd_t *pgd = mm->pgd;
+-	int ret = 0;
+-
+-	BUG_ON(PagePinned(virt_to_page(pgd)));
+-
+-#ifdef CONFIG_X86_64
+-	{
+-		struct page *page = virt_to_page(pgd);
+-		pgd_t *user_pgd;
+-
+-		BUG_ON(page->private != 0);
+-
+-		ret = -ENOMEM;
+-
+-		user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+-		page->private = (unsigned long)user_pgd;
+-
+-		if (user_pgd != NULL) {
+-			user_pgd[pgd_index(VSYSCALL_START)] =
+-				__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
+-			ret = 0;
+-		}
+-
+-		BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
+-	}
+-#endif
+-
+-	return ret;
+-}
+-
+-static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
+-{
+-#ifdef CONFIG_X86_64
+-	pgd_t *user_pgd = xen_get_user_pgd(pgd);
+-
+-	if (user_pgd)
+-		free_page((unsigned long)user_pgd);
+-#endif
+-}
+-
+-/* This should never happen until we're OK to use struct page */
+-static void xen_release_ptpage(unsigned long pfn, unsigned level)
+-{
+-	struct page *page = pfn_to_page(pfn);
+-
+-	if (PagePinned(page)) {
+-		if (!PageHighMem(page)) {
+-			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
+-				pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
+-			make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
+-		}
+-		ClearPagePinned(page);
+-	}
+-}
+-
+-static void xen_release_pte(unsigned long pfn)
+-{
+-	xen_release_ptpage(pfn, PT_PTE);
+-}
+-
+-static void xen_release_pmd(unsigned long pfn)
+-{
+-	xen_release_ptpage(pfn, PT_PMD);
+-}
+-
+-#if PAGETABLE_LEVELS == 4
+-static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
+-{
+-	xen_alloc_ptpage(mm, pfn, PT_PUD);
+-}
+-
+-static void xen_release_pud(unsigned long pfn)
+-{
+-	xen_release_ptpage(pfn, PT_PUD);
+-}
+-#endif
+-
+-#ifdef CONFIG_HIGHPTE
+-static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
+-{
+-	pgprot_t prot = PAGE_KERNEL;
+-
+-	if (PagePinned(page))
+-		prot = PAGE_KERNEL_RO;
+-
+-	if (0 && PageHighMem(page))
+-		printk("mapping highpte %lx type %d prot %s\n",
+-		       page_to_pfn(page), type,
+-		       (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
+-
+-	return kmap_atomic_prot(page, type, prot);
+-}
+-#endif
+-
+-#ifdef CONFIG_X86_32
+-static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
+-{
+-	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
+-	if (pte_val_ma(*ptep) & _PAGE_PRESENT)
+-		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
+-			       pte_val_ma(pte));
+-
+-	return pte;
+-}
+-
+-/* Init-time set_pte while constructing initial pagetables, which
+-   doesn't allow RO pagetable pages to be remapped RW */
+-static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
+-{
+-	pte = mask_rw_pte(ptep, pte);
+-
+-	xen_set_pte(ptep, pte);
+-}
+-#endif
+-
+-static __init void xen_pagetable_setup_start(pgd_t *base)
+-{
+-}
+-
+ void xen_setup_shared_info(void)
+ {
+ 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+@@ -1028,37 +662,6 @@ void xen_setup_shared_info(void)
+ 	xen_setup_mfn_list_list();
+ }
+ 
+-static __init void xen_pagetable_setup_done(pgd_t *base)
+-{
+-	xen_setup_shared_info();
+-}
+-
+-static __init void xen_post_allocator_init(void)
+-{
+-	pv_mmu_ops.set_pte = xen_set_pte;
+-	pv_mmu_ops.set_pmd = xen_set_pmd;
+-	pv_mmu_ops.set_pud = xen_set_pud;
+-#if PAGETABLE_LEVELS == 4
+-	pv_mmu_ops.set_pgd = xen_set_pgd;
+-#endif
+-
+-	/* This will work as long as patching hasn't happened yet
+-	   (which it hasn't) */
+-	pv_mmu_ops.alloc_pte = xen_alloc_pte;
+-	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
+-	pv_mmu_ops.release_pte = xen_release_pte;
+-	pv_mmu_ops.release_pmd = xen_release_pmd;
+-#if PAGETABLE_LEVELS == 4
+-	pv_mmu_ops.alloc_pud = xen_alloc_pud;
+-	pv_mmu_ops.release_pud = xen_release_pud;
+-#endif
+-
+-#ifdef CONFIG_X86_64
+-	SetPagePinned(virt_to_page(level3_user_vsyscall));
+-#endif
+-	xen_mark_init_mm_pinned();
+-}
+-
+ /* This is called once we have the cpu_possible_map */
+ void xen_setup_vcpu_info_placement(void)
+ {
+@@ -1072,10 +675,10 @@ void xen_setup_vcpu_info_placement(void)
+ 	if (have_vcpu_info_placement) {
+ 		printk(KERN_INFO "Xen: using vcpu_info placement\n");
+ 
+-		pv_irq_ops.save_fl = xen_save_fl_direct;
+-		pv_irq_ops.restore_fl = xen_restore_fl_direct;
+-		pv_irq_ops.irq_disable = xen_irq_disable_direct;
+-		pv_irq_ops.irq_enable = xen_irq_enable_direct;
++		pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct);
++		pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct);
++		pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct);
++		pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(xen_irq_enable_direct);
+ 		pv_mmu_ops.read_cr2 = xen_read_cr2_direct;
+ 	}
+ }
+@@ -1133,49 +736,6 @@ static unsigned xen_patch(u8 type, u16 c
+ 	return ret;
+ }
+ 
+-static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot)
+-{
+-	pte_t pte;
+-
+-	phys >>= PAGE_SHIFT;
+-
+-	switch (idx) {
+-	case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
+-#ifdef CONFIG_X86_F00F_BUG
+-	case FIX_F00F_IDT:
+-#endif
+-#ifdef CONFIG_X86_32
+-	case FIX_WP_TEST:
+-	case FIX_VDSO:
+-# ifdef CONFIG_HIGHMEM
+-	case FIX_KMAP_BEGIN ... FIX_KMAP_END:
+-# endif
+-#else
+-	case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
+-#endif
+-#ifdef CONFIG_X86_LOCAL_APIC
+-	case FIX_APIC_BASE:	/* maps dummy local APIC */
+-#endif
+-		pte = pfn_pte(phys, prot);
+-		break;
+-
+-	default:
+-		pte = mfn_pte(phys, prot);
+-		break;
+-	}
+-
+-	__native_set_fixmap(idx, pte);
+-
+-#ifdef CONFIG_X86_64
+-	/* Replicate changes to map the vsyscall page into the user
+-	   pagetable vsyscall mapping. */
+-	if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
+-		unsigned long vaddr = __fix_to_virt(idx);
+-		set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
+-	}
+-#endif
+-}
+-
+ static const struct pv_info xen_info __initdata = {
+ 	.paravirt_enabled = 1,
+ 	.shared_kernel_pmd = 0,
+@@ -1271,87 +831,6 @@ static const struct pv_apic_ops xen_apic
+ #endif
+ };
+ 
+-static const struct pv_mmu_ops xen_mmu_ops __initdata = {
+-	.pagetable_setup_start = xen_pagetable_setup_start,
+-	.pagetable_setup_done = xen_pagetable_setup_done,
+-
+-	.read_cr2 = xen_read_cr2,
+-	.write_cr2 = xen_write_cr2,
+-
+-	.read_cr3 = xen_read_cr3,
+-	.write_cr3 = xen_write_cr3,
+-
+-	.flush_tlb_user = xen_flush_tlb,
+-	.flush_tlb_kernel = xen_flush_tlb,
+-	.flush_tlb_single = xen_flush_tlb_single,
+-	.flush_tlb_others = xen_flush_tlb_others,
+-
+-	.pte_update = paravirt_nop,
+-	.pte_update_defer = paravirt_nop,
+-
+-	.pgd_alloc = xen_pgd_alloc,
+-	.pgd_free = xen_pgd_free,
+-
+-	.alloc_pte = xen_alloc_pte_init,
+-	.release_pte = xen_release_pte_init,
+-	.alloc_pmd = xen_alloc_pte_init,
+-	.alloc_pmd_clone = paravirt_nop,
+-	.release_pmd = xen_release_pte_init,
+-
+-#ifdef CONFIG_HIGHPTE
+-	.kmap_atomic_pte = xen_kmap_atomic_pte,
+-#endif
+-
+-#ifdef CONFIG_X86_64
+-	.set_pte = xen_set_pte,
+-#else
+-	.set_pte = xen_set_pte_init,
+-#endif
+-	.set_pte_at = xen_set_pte_at,
+-	.set_pmd = xen_set_pmd_hyper,
+-
+-	.ptep_modify_prot_start = __ptep_modify_prot_start,
+-	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
+-
+-	.pte_val = xen_pte_val,
+-	.pte_flags = native_pte_flags,
+-	.pgd_val = xen_pgd_val,
+-
+-	.make_pte = xen_make_pte,
+-	.make_pgd = xen_make_pgd,
+-
+-#ifdef CONFIG_X86_PAE
+-	.set_pte_atomic = xen_set_pte_atomic,
+-	.set_pte_present = xen_set_pte_at,
+-	.pte_clear = xen_pte_clear,
+-	.pmd_clear = xen_pmd_clear,
+-#endif	/* CONFIG_X86_PAE */
+-	.set_pud = xen_set_pud_hyper,
+-
+-	.make_pmd = xen_make_pmd,
+-	.pmd_val = xen_pmd_val,
+-
+-#if PAGETABLE_LEVELS == 4
+-	.pud_val = xen_pud_val,
+-	.make_pud = xen_make_pud,
+-	.set_pgd = xen_set_pgd_hyper,
+-
+-	.alloc_pud = xen_alloc_pte_init,
+-	.release_pud = xen_release_pte_init,
+-#endif	/* PAGETABLE_LEVELS == 4 */
+-
+-	.activate_mm = xen_activate_mm,
+-	.dup_mmap = xen_dup_mmap,
+-	.exit_mmap = xen_exit_mmap,
+-
+-	.lazy_mode = {
+-		.enter = paravirt_enter_lazy_mmu,
+-		.leave = xen_leave_lazy,
+-	},
+-
+-	.set_fixmap = xen_set_fixmap,
+-};
+-
+ static void xen_reboot(int reason)
+ {
+ 	struct sched_shutdown r = { .reason = reason };
+@@ -1394,223 +873,6 @@ static const struct machine_ops __initda
+ };
+ 
+ 
+-static void __init xen_reserve_top(void)
+-{
+-#ifdef CONFIG_X86_32
+-	unsigned long top = HYPERVISOR_VIRT_START;
+-	struct xen_platform_parameters pp;
+-
+-	if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
+-		top = pp.virt_start;
+-
+-	reserve_top_address(-top);
+-#endif	/* CONFIG_X86_32 */
+-}
+-
+-/*
+- * Like __va(), but returns address in the kernel mapping (which is
+- * all we have until the physical memory mapping has been set up.
+- */
+-static void *__ka(phys_addr_t paddr)
+-{
+-#ifdef CONFIG_X86_64
+-	return (void *)(paddr + __START_KERNEL_map);
+-#else
+-	return __va(paddr);
+-#endif
+-}
+-
+-/* Convert a machine address to physical address */
+-static unsigned long m2p(phys_addr_t maddr)
+-{
+-	phys_addr_t paddr;
+-
+-	maddr &= PTE_PFN_MASK;
+-	paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
+-
+-	return paddr;
+-}
+-
+-/* Convert a machine address to kernel virtual */
+-static void *m2v(phys_addr_t maddr)
+-{
+-	return __ka(m2p(maddr));
+-}
+-
+-static void set_page_prot(void *addr, pgprot_t prot)
+-{
+-	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
+-	pte_t pte = pfn_pte(pfn, prot);
+-
+-	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
+-		BUG();
+-}
+-
+-static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
+-{
+-	unsigned pmdidx, pteidx;
+-	unsigned ident_pte;
+-	unsigned long pfn;
+-
+-	ident_pte = 0;
+-	pfn = 0;
+-	for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
+-		pte_t *pte_page;
+-
+-		/* Reuse or allocate a page of ptes */
+-		if (pmd_present(pmd[pmdidx]))
+-			pte_page = m2v(pmd[pmdidx].pmd);
+-		else {
+-			/* Check for free pte pages */
+-			if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
+-				break;
+-
+-			pte_page = &level1_ident_pgt[ident_pte];
+-			ident_pte += PTRS_PER_PTE;
+-
+-			pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
+-		}
+-
+-		/* Install mappings */
+-		for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
+-			pte_t pte;
+-
+-			if (pfn > max_pfn_mapped)
+-				max_pfn_mapped = pfn;
+-
+-			if (!pte_none(pte_page[pteidx]))
+-				continue;
+-
+-			pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
+-			pte_page[pteidx] = pte;
+-		}
+-	}
+-
+-	for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
+-		set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
+-
+-	set_page_prot(pmd, PAGE_KERNEL_RO);
+-}
+-
+-#ifdef CONFIG_X86_64
+-static void convert_pfn_mfn(void *v)
+-{
+-	pte_t *pte = v;
+-	int i;
+-
+-	/* All levels are converted the same way, so just treat them
+-	   as ptes. */
+-	for (i = 0; i < PTRS_PER_PTE; i++)
+-		pte[i] = xen_make_pte(pte[i].pte);
+-}
+-
+-/*
+- * Set up the inital kernel pagetable.
+- *
+- * We can construct this by grafting the Xen provided pagetable into
+- * head_64.S's preconstructed pagetables.  We copy the Xen L2's into
+- * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt.  This
+- * means that only the kernel has a physical mapping to start with -
+- * but that's enough to get __va working.  We need to fill in the rest
+- * of the physical mapping once some sort of allocator has been set
+- * up.
+- */
+-static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+-						unsigned long max_pfn)
+-{
+-	pud_t *l3;
+-	pmd_t *l2;
+-
+-	/* Zap identity mapping */
+-	init_level4_pgt[0] = __pgd(0);
+-
+-	/* Pre-constructed entries are in pfn, so convert to mfn */
+-	convert_pfn_mfn(init_level4_pgt);
+-	convert_pfn_mfn(level3_ident_pgt);
+-	convert_pfn_mfn(level3_kernel_pgt);
+-
+-	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
+-	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
+-
+-	memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+-	memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+-
+-	l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
+-	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
+-	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+-
+-	/* Set up identity map */
+-	xen_map_identity_early(level2_ident_pgt, max_pfn);
+-
+-	/* Make pagetable pieces RO */
+-	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
+-	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
+-	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
+-	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
+-	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+-	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
+-
+-	/* Pin down new L4 */
+-	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
+-			  PFN_DOWN(__pa_symbol(init_level4_pgt)));
+-
+-	/* Unpin Xen-provided one */
+-	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+-
+-	/* Switch over */
+-	pgd = init_level4_pgt;
+-
+-	/*
+-	 * At this stage there can be no user pgd, and no page
+-	 * structure to attach it to, so make sure we just set kernel
+-	 * pgd.
+-	 */
+-	xen_mc_batch();
+-	__xen_write_cr3(true, __pa(pgd));
+-	xen_mc_issue(PARAVIRT_LAZY_CPU);
+-
+-	reserve_early(__pa(xen_start_info->pt_base),
+-		      __pa(xen_start_info->pt_base +
+-			   xen_start_info->nr_pt_frames * PAGE_SIZE),
+-		      "XEN PAGETABLES");
+-
+-	return pgd;
+-}
+-#else	/* !CONFIG_X86_64 */
+-static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
+-
+-static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
+-						unsigned long max_pfn)
+-{
+-	pmd_t *kernel_pmd;
+-
+-	init_pg_tables_start = __pa(pgd);
+-	init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE;
+-	max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024);
+-
+-	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
+-	memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+-
+-	xen_map_identity_early(level2_kernel_pgt, max_pfn);
+-
+-	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+-	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
+-			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
+-
+-	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
+-	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
+-	set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
+-
+-	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
+-
+-	xen_write_cr3(__pa(swapper_pg_dir));
+-
+-	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
+-
+-	return swapper_pg_dir;
+-}
+-#endif	/* CONFIG_X86_64 */
+-
+ /* First C function to be called on Xen boot */
+ asmlinkage void __init xen_start_kernel(void)
+ {
+@@ -1639,7 +901,7 @@ asmlinkage void __init xen_start_kernel(
+ 	/*
+ 	 * set up the basic apic ops.
+ 	 */
+-	apic_ops = &xen_basic_apic_ops;
++	set_xen_basic_apic_ops();
+ #endif
+ 
+ 	if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
+@@ -1650,10 +912,18 @@ asmlinkage void __init xen_start_kernel(
+ 	machine_ops = xen_machine_ops;
+ 
+ #ifdef CONFIG_X86_64
+-	/* Disable until direct per-cpu data access. */
+-	have_vcpu_info_placement = 0;
+-	x86_64_init_pda();
++	/*
++	 * Setup percpu state.  We only need to do this for 64-bit
++	 * because 32-bit already has %fs set properly.
++	 */
++	load_percpu_segment(0);
+ #endif
++	/*
++	 * The only reliable way to retain the initial address of the
++	 * percpu gdt_page is to remember it here, so we can go and
++	 * mark it RW later, when the initial percpu area is freed.
++	 */
++	xen_initial_gdt = &per_cpu(gdt_page, 0);
+ 
+ 	xen_smp_init();
+ 
+Index: linux-2.6-tip/arch/x86/xen/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/irq.c
++++ linux-2.6-tip/arch/x86/xen/irq.c
+@@ -19,27 +19,12 @@ void xen_force_evtchn_callback(void)
+ 	(void)HYPERVISOR_xen_version(0, NULL);
+ }
+ 
+-static void __init __xen_init_IRQ(void)
+-{
+-	int i;
+-
+-	/* Create identity vector->irq map */
+-	for(i = 0; i < NR_VECTORS; i++) {
+-		int cpu;
+-
+-		for_each_possible_cpu(cpu)
+-			per_cpu(vector_irq, cpu)[i] = i;
+-	}
+-
+-	xen_init_IRQ();
+-}
+-
+ static unsigned long xen_save_fl(void)
+ {
+ 	struct vcpu_info *vcpu;
+ 	unsigned long flags;
+ 
+-	vcpu = x86_read_percpu(xen_vcpu);
++	vcpu = percpu_read(xen_vcpu);
+ 
+ 	/* flag has opposite sense of mask */
+ 	flags = !vcpu->evtchn_upcall_mask;
+@@ -50,6 +35,7 @@ static unsigned long xen_save_fl(void)
+ 	*/
+ 	return (-flags) & X86_EFLAGS_IF;
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl);
+ 
+ static void xen_restore_fl(unsigned long flags)
+ {
+@@ -62,7 +48,7 @@ static void xen_restore_fl(unsigned long
+ 	   make sure we're don't switch CPUs between getting the vcpu
+ 	   pointer and updating the mask. */
+ 	preempt_disable();
+-	vcpu = x86_read_percpu(xen_vcpu);
++	vcpu = percpu_read(xen_vcpu);
+ 	vcpu->evtchn_upcall_mask = flags;
+ 	preempt_enable_no_resched();
+ 
+@@ -76,6 +62,7 @@ static void xen_restore_fl(unsigned long
+ 			xen_force_evtchn_callback();
+ 	}
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl);
+ 
+ static void xen_irq_disable(void)
+ {
+@@ -83,9 +70,10 @@ static void xen_irq_disable(void)
+ 	   make sure we're don't switch CPUs between getting the vcpu
+ 	   pointer and updating the mask. */
+ 	preempt_disable();
+-	x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1;
++	percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
+ 	preempt_enable_no_resched();
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable);
+ 
+ static void xen_irq_enable(void)
+ {
+@@ -96,7 +84,7 @@ static void xen_irq_enable(void)
+ 	   the caller is confused and is trying to re-enable interrupts
+ 	   on an indeterminate processor. */
+ 
+-	vcpu = x86_read_percpu(xen_vcpu);
++	vcpu = percpu_read(xen_vcpu);
+ 	vcpu->evtchn_upcall_mask = 0;
+ 
+ 	/* Doesn't matter if we get preempted here, because any
+@@ -106,6 +94,7 @@ static void xen_irq_enable(void)
+ 	if (unlikely(vcpu->evtchn_upcall_pending))
+ 		xen_force_evtchn_callback();
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_irq_enable);
+ 
+ static void xen_safe_halt(void)
+ {
+@@ -123,11 +112,13 @@ static void xen_halt(void)
+ }
+ 
+ static const struct pv_irq_ops xen_irq_ops __initdata = {
+-	.init_IRQ = __xen_init_IRQ,
+-	.save_fl = xen_save_fl,
+-	.restore_fl = xen_restore_fl,
+-	.irq_disable = xen_irq_disable,
+-	.irq_enable = xen_irq_enable,
++	.init_IRQ = xen_init_IRQ,
++
++	.save_fl = PV_CALLEE_SAVE(xen_save_fl),
++	.restore_fl = PV_CALLEE_SAVE(xen_restore_fl),
++	.irq_disable = PV_CALLEE_SAVE(xen_irq_disable),
++	.irq_enable = PV_CALLEE_SAVE(xen_irq_enable),
++
+ 	.safe_halt = xen_safe_halt,
+ 	.halt = xen_halt,
+ #ifdef CONFIG_X86_64
+Index: linux-2.6-tip/arch/x86/xen/mmu.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/mmu.c
++++ linux-2.6-tip/arch/x86/xen/mmu.c
+@@ -47,6 +47,7 @@
+ #include <asm/tlbflush.h>
+ #include <asm/fixmap.h>
+ #include <asm/mmu_context.h>
++#include <asm/setup.h>
+ #include <asm/paravirt.h>
+ #include <asm/linkage.h>
+ 
+@@ -55,6 +56,8 @@
+ 
+ #include <xen/page.h>
+ #include <xen/interface/xen.h>
++#include <xen/interface/version.h>
++#include <xen/hvc-console.h>
+ 
+ #include "multicalls.h"
+ #include "mmu.h"
+@@ -114,6 +117,37 @@ static inline void check_zero(void)
+ 
+ #endif /* CONFIG_XEN_DEBUG_FS */
+ 
++
++/*
++ * Identity map, in addition to plain kernel map.  This needs to be
++ * large enough to allocate page table pages to allocate the rest.
++ * Each page can map 2MB.
++ */
++static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss;
++
++#ifdef CONFIG_X86_64
++/* l3 pud for userspace vsyscall mapping */
++static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
++#endif /* CONFIG_X86_64 */
++
++/*
++ * Note about cr3 (pagetable base) values:
++ *
++ * xen_cr3 contains the current logical cr3 value; it contains the
++ * last set cr3.  This may not be the current effective cr3, because
++ * its update may be being lazily deferred.  However, a vcpu looking
++ * at its own cr3 can use this value knowing that it everything will
++ * be self-consistent.
++ *
++ * xen_current_cr3 contains the actual vcpu cr3; it is set once the
++ * hypercall to set the vcpu cr3 is complete (so it may be a little
++ * out of date, but it will never be set early).  If one vcpu is
++ * looking at another vcpu's cr3 value, it should use this variable.
++ */
++DEFINE_PER_CPU(unsigned long, xen_cr3);	 /* cr3 stored as physaddr */
++DEFINE_PER_CPU(unsigned long, xen_current_cr3);	 /* actual vcpu cr3 */
++
++
+ /*
+  * Just beyond the highest usermode address.  STACK_TOP_MAX has a
+  * redzone above it, so round it up to a PGD boundary.
+@@ -242,6 +276,13 @@ void set_phys_to_machine(unsigned long p
+ 	p2m_top[topidx][idx] = mfn;
+ }
+ 
++unsigned long arbitrary_virt_to_mfn(void *vaddr)
++{
++	xmaddr_t maddr = arbitrary_virt_to_machine(vaddr);
++
++	return PFN_DOWN(maddr.maddr);
++}
++
+ xmaddr_t arbitrary_virt_to_machine(void *vaddr)
+ {
+ 	unsigned long address = (unsigned long)vaddr;
+@@ -458,28 +499,33 @@ pteval_t xen_pte_val(pte_t pte)
+ {
+ 	return pte_mfn_to_pfn(pte.pte);
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
+ 
+ pgdval_t xen_pgd_val(pgd_t pgd)
+ {
+ 	return pte_mfn_to_pfn(pgd.pgd);
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
+ 
+ pte_t xen_make_pte(pteval_t pte)
+ {
+ 	pte = pte_pfn_to_mfn(pte);
+ 	return native_make_pte(pte);
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
+ 
+ pgd_t xen_make_pgd(pgdval_t pgd)
+ {
+ 	pgd = pte_pfn_to_mfn(pgd);
+ 	return native_make_pgd(pgd);
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
+ 
+ pmdval_t xen_pmd_val(pmd_t pmd)
+ {
+ 	return pte_mfn_to_pfn(pmd.pmd);
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
+ 
+ void xen_set_pud_hyper(pud_t *ptr, pud_t val)
+ {
+@@ -556,12 +602,14 @@ pmd_t xen_make_pmd(pmdval_t pmd)
+ 	pmd = pte_pfn_to_mfn(pmd);
+ 	return native_make_pmd(pmd);
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
+ 
+ #if PAGETABLE_LEVELS == 4
+ pudval_t xen_pud_val(pud_t pud)
+ {
+ 	return pte_mfn_to_pfn(pud.pud);
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
+ 
+ pud_t xen_make_pud(pudval_t pud)
+ {
+@@ -569,6 +617,7 @@ pud_t xen_make_pud(pudval_t pud)
+ 
+ 	return native_make_pud(pud);
+ }
++PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
+ 
+ pgd_t *xen_get_user_pgd(pgd_t *pgd)
+ {
+@@ -1063,18 +1112,14 @@ static void drop_other_mm_ref(void *info
+ 	struct mm_struct *mm = info;
+ 	struct mm_struct *active_mm;
+ 
+-#ifdef CONFIG_X86_64
+-	active_mm = read_pda(active_mm);
+-#else
+-	active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
+-#endif
++	active_mm = percpu_read(cpu_tlbstate.active_mm);
+ 
+ 	if (active_mm == mm)
+ 		leave_mm(smp_processor_id());
+ 
+ 	/* If this cpu still has a stale cr3 reference, then make sure
+ 	   it has been flushed. */
+-	if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) {
++	if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
+ 		load_cr3(swapper_pg_dir);
+ 		arch_flush_lazy_cpu_mode();
+ 	}
+@@ -1156,6 +1201,705 @@ void xen_exit_mmap(struct mm_struct *mm)
+ 	spin_unlock(&mm->page_table_lock);
+ }
+ 
++static __init void xen_pagetable_setup_start(pgd_t *base)
++{
++}
++
++static __init void xen_pagetable_setup_done(pgd_t *base)
++{
++	xen_setup_shared_info();
++}
++
++static void xen_write_cr2(unsigned long cr2)
++{
++	percpu_read(xen_vcpu)->arch.cr2 = cr2;
++}
++
++static unsigned long xen_read_cr2(void)
++{
++	return percpu_read(xen_vcpu)->arch.cr2;
++}
++
++unsigned long xen_read_cr2_direct(void)
++{
++	return percpu_read(xen_vcpu_info.arch.cr2);
++}
++
++static void xen_flush_tlb(void)
++{
++	struct mmuext_op *op;
++	struct multicall_space mcs;
++
++	preempt_disable();
++
++	mcs = xen_mc_entry(sizeof(*op));
++
++	op = mcs.args;
++	op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
++	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
++
++	xen_mc_issue(PARAVIRT_LAZY_MMU);
++
++	preempt_enable();
++}
++
++static void xen_flush_tlb_single(unsigned long addr)
++{
++	struct mmuext_op *op;
++	struct multicall_space mcs;
++
++	preempt_disable();
++
++	mcs = xen_mc_entry(sizeof(*op));
++	op = mcs.args;
++	op->cmd = MMUEXT_INVLPG_LOCAL;
++	op->arg1.linear_addr = addr & PAGE_MASK;
++	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
++
++	xen_mc_issue(PARAVIRT_LAZY_MMU);
++
++	preempt_enable();
++}
++
++static void xen_flush_tlb_others(const struct cpumask *cpus,
++				 struct mm_struct *mm, unsigned long va)
++{
++	struct {
++		struct mmuext_op op;
++		DECLARE_BITMAP(mask, NR_CPUS);
++	} *args;
++	struct multicall_space mcs;
++
++	BUG_ON(cpumask_empty(cpus));
++	BUG_ON(!mm);
++
++	mcs = xen_mc_entry(sizeof(*args));
++	args = mcs.args;
++	args->op.arg2.vcpumask = to_cpumask(args->mask);
++
++	/* Remove us, and any offline CPUS. */
++	cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
++	cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
++
++	if (va == TLB_FLUSH_ALL) {
++		args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
++	} else {
++		args->op.cmd = MMUEXT_INVLPG_MULTI;
++		args->op.arg1.linear_addr = va;
++	}
++
++	MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
++
++	xen_mc_issue(PARAVIRT_LAZY_MMU);
++}
++
++static unsigned long xen_read_cr3(void)
++{
++	return percpu_read(xen_cr3);
++}
++
++static void set_current_cr3(void *v)
++{
++	percpu_write(xen_current_cr3, (unsigned long)v);
++}
++
++static void __xen_write_cr3(bool kernel, unsigned long cr3)
++{
++	struct mmuext_op *op;
++	struct multicall_space mcs;
++	unsigned long mfn;
++
++	if (cr3)
++		mfn = pfn_to_mfn(PFN_DOWN(cr3));
++	else
++		mfn = 0;
++
++	WARN_ON(mfn == 0 && kernel);
++
++	mcs = __xen_mc_entry(sizeof(*op));
++
++	op = mcs.args;
++	op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
++	op->arg1.mfn = mfn;
++
++	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
++
++	if (kernel) {
++		percpu_write(xen_cr3, cr3);
++
++		/* Update xen_current_cr3 once the batch has actually
++		   been submitted. */
++		xen_mc_callback(set_current_cr3, (void *)cr3);
++	}
++}
++
++static void xen_write_cr3(unsigned long cr3)
++{
++	BUG_ON(preemptible());
++
++	xen_mc_batch();  /* disables interrupts */
++
++	/* Update while interrupts are disabled, so its atomic with
++	   respect to ipis */
++	percpu_write(xen_cr3, cr3);
++
++	__xen_write_cr3(true, cr3);
++
++#ifdef CONFIG_X86_64
++	{
++		pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
++		if (user_pgd)
++			__xen_write_cr3(false, __pa(user_pgd));
++		else
++			__xen_write_cr3(false, 0);
++	}
++#endif
++
++	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
++}
++
++static int xen_pgd_alloc(struct mm_struct *mm)
++{
++	pgd_t *pgd = mm->pgd;
++	int ret = 0;
++
++	BUG_ON(PagePinned(virt_to_page(pgd)));
++
++#ifdef CONFIG_X86_64
++	{
++		struct page *page = virt_to_page(pgd);
++		pgd_t *user_pgd;
++
++		BUG_ON(page->private != 0);
++
++		ret = -ENOMEM;
++
++		user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
++		page->private = (unsigned long)user_pgd;
++
++		if (user_pgd != NULL) {
++			user_pgd[pgd_index(VSYSCALL_START)] =
++				__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
++			ret = 0;
++		}
++
++		BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
++	}
++#endif
++
++	return ret;
++}
++
++static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
++{
++#ifdef CONFIG_X86_64
++	pgd_t *user_pgd = xen_get_user_pgd(pgd);
++
++	if (user_pgd)
++		free_page((unsigned long)user_pgd);
++#endif
++}
++
++#ifdef CONFIG_HIGHPTE
++static void *xen_kmap_atomic_pte(struct page *page, enum km_type type)
++{
++	pgprot_t prot = PAGE_KERNEL;
++
++	if (PagePinned(page))
++		prot = PAGE_KERNEL_RO;
++
++	if (0 && PageHighMem(page))
++		printk("mapping highpte %lx type %d prot %s\n",
++		       page_to_pfn(page), type,
++		       (unsigned long)pgprot_val(prot) & _PAGE_RW ? "WRITE" : "READ");
++
++	return kmap_atomic_prot(page, type, prot);
++}
++#endif
++
++#ifdef CONFIG_X86_32
++static __init pte_t mask_rw_pte(pte_t *ptep, pte_t pte)
++{
++	/* If there's an existing pte, then don't allow _PAGE_RW to be set */
++	if (pte_val_ma(*ptep) & _PAGE_PRESENT)
++		pte = __pte_ma(((pte_val_ma(*ptep) & _PAGE_RW) | ~_PAGE_RW) &
++			       pte_val_ma(pte));
++
++	return pte;
++}
++
++/* Init-time set_pte while constructing initial pagetables, which
++   doesn't allow RO pagetable pages to be remapped RW */
++static __init void xen_set_pte_init(pte_t *ptep, pte_t pte)
++{
++	pte = mask_rw_pte(ptep, pte);
++
++	xen_set_pte(ptep, pte);
++}
++#endif
++
++/* Early in boot, while setting up the initial pagetable, assume
++   everything is pinned. */
++static __init void xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
++{
++#ifdef CONFIG_FLATMEM
++	BUG_ON(mem_map);	/* should only be used early */
++#endif
++	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
++}
++
++/* Early release_pte assumes that all pts are pinned, since there's
++   only init_mm and anything attached to that is pinned. */
++static void xen_release_pte_init(unsigned long pfn)
++{
++	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
++}
++
++static void pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
++{
++	struct mmuext_op op;
++	op.cmd = cmd;
++	op.arg1.mfn = pfn_to_mfn(pfn);
++	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
++		BUG();
++}
++
++/* This needs to make sure the new pte page is pinned iff its being
++   attached to a pinned pagetable. */
++static void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn, unsigned level)
++{
++	struct page *page = pfn_to_page(pfn);
++
++	if (PagePinned(virt_to_page(mm->pgd))) {
++		SetPagePinned(page);
++
++		vm_unmap_aliases();
++		if (!PageHighMem(page)) {
++			make_lowmem_page_readonly(__va(PFN_PHYS((unsigned long)pfn)));
++			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
++				pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
++		} else {
++			/* make sure there are no stray mappings of
++			   this page */
++			kmap_flush_unused();
++		}
++	}
++}
++
++static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
++{
++	xen_alloc_ptpage(mm, pfn, PT_PTE);
++}
++
++static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
++{
++	xen_alloc_ptpage(mm, pfn, PT_PMD);
++}
++
++/* This should never happen until we're OK to use struct page */
++static void xen_release_ptpage(unsigned long pfn, unsigned level)
++{
++	struct page *page = pfn_to_page(pfn);
++
++	if (PagePinned(page)) {
++		if (!PageHighMem(page)) {
++			if (level == PT_PTE && USE_SPLIT_PTLOCKS)
++				pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
++			make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
++		}
++		ClearPagePinned(page);
++	}
++}
++
++static void xen_release_pte(unsigned long pfn)
++{
++	xen_release_ptpage(pfn, PT_PTE);
++}
++
++static void xen_release_pmd(unsigned long pfn)
++{
++	xen_release_ptpage(pfn, PT_PMD);
++}
++
++#if PAGETABLE_LEVELS == 4
++static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
++{
++	xen_alloc_ptpage(mm, pfn, PT_PUD);
++}
++
++static void xen_release_pud(unsigned long pfn)
++{
++	xen_release_ptpage(pfn, PT_PUD);
++}
++#endif
++
++void __init xen_reserve_top(void)
++{
++#ifdef CONFIG_X86_32
++	unsigned long top = HYPERVISOR_VIRT_START;
++	struct xen_platform_parameters pp;
++
++	if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
++		top = pp.virt_start;
++
++	reserve_top_address(-top);
++#endif	/* CONFIG_X86_32 */
++}
++
++/*
++ * Like __va(), but returns address in the kernel mapping (which is
++ * all we have until the physical memory mapping has been set up.
++ */
++static void *__ka(phys_addr_t paddr)
++{
++#ifdef CONFIG_X86_64
++	return (void *)(paddr + __START_KERNEL_map);
++#else
++	return __va(paddr);
++#endif
++}
++
++/* Convert a machine address to physical address */
++static unsigned long m2p(phys_addr_t maddr)
++{
++	phys_addr_t paddr;
++
++	maddr &= PTE_PFN_MASK;
++	paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
++
++	return paddr;
++}
++
++/* Convert a machine address to kernel virtual */
++static void *m2v(phys_addr_t maddr)
++{
++	return __ka(m2p(maddr));
++}
++
++static void set_page_prot(void *addr, pgprot_t prot)
++{
++	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
++	pte_t pte = pfn_pte(pfn, prot);
++
++	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
++		BUG();
++}
++
++static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
++{
++	unsigned pmdidx, pteidx;
++	unsigned ident_pte;
++	unsigned long pfn;
++
++	ident_pte = 0;
++	pfn = 0;
++	for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) {
++		pte_t *pte_page;
++
++		/* Reuse or allocate a page of ptes */
++		if (pmd_present(pmd[pmdidx]))
++			pte_page = m2v(pmd[pmdidx].pmd);
++		else {
++			/* Check for free pte pages */
++			if (ident_pte == ARRAY_SIZE(level1_ident_pgt))
++				break;
++
++			pte_page = &level1_ident_pgt[ident_pte];
++			ident_pte += PTRS_PER_PTE;
++
++			pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE);
++		}
++
++		/* Install mappings */
++		for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) {
++			pte_t pte;
++
++			if (pfn > max_pfn_mapped)
++				max_pfn_mapped = pfn;
++
++			if (!pte_none(pte_page[pteidx]))
++				continue;
++
++			pte = pfn_pte(pfn, PAGE_KERNEL_EXEC);
++			pte_page[pteidx] = pte;
++		}
++	}
++
++	for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE)
++		set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO);
++
++	set_page_prot(pmd, PAGE_KERNEL_RO);
++}
++
++#ifdef CONFIG_X86_64
++static void convert_pfn_mfn(void *v)
++{
++	pte_t *pte = v;
++	int i;
++
++	/* All levels are converted the same way, so just treat them
++	   as ptes. */
++	for (i = 0; i < PTRS_PER_PTE; i++)
++		pte[i] = xen_make_pte(pte[i].pte);
++}
++
++/*
++ * Set up the inital kernel pagetable.
++ *
++ * We can construct this by grafting the Xen provided pagetable into
++ * head_64.S's preconstructed pagetables.  We copy the Xen L2's into
++ * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt.  This
++ * means that only the kernel has a physical mapping to start with -
++ * but that's enough to get __va working.  We need to fill in the rest
++ * of the physical mapping once some sort of allocator has been set
++ * up.
++ */
++__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
++					 unsigned long max_pfn)
++{
++	pud_t *l3;
++	pmd_t *l2;
++
++	/* Zap identity mapping */
++	init_level4_pgt[0] = __pgd(0);
++
++	/* Pre-constructed entries are in pfn, so convert to mfn */
++	convert_pfn_mfn(init_level4_pgt);
++	convert_pfn_mfn(level3_ident_pgt);
++	convert_pfn_mfn(level3_kernel_pgt);
++
++	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
++	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
++
++	memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
++	memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
++
++	l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
++	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
++	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
++
++	/* Set up identity map */
++	xen_map_identity_early(level2_ident_pgt, max_pfn);
++
++	/* Make pagetable pieces RO */
++	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
++	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
++	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
++	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
++	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
++	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
++
++	/* Pin down new L4 */
++	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
++			  PFN_DOWN(__pa_symbol(init_level4_pgt)));
++
++	/* Unpin Xen-provided one */
++	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
++
++	/* Switch over */
++	pgd = init_level4_pgt;
++
++	/*
++	 * At this stage there can be no user pgd, and no page
++	 * structure to attach it to, so make sure we just set kernel
++	 * pgd.
++	 */
++	xen_mc_batch();
++	__xen_write_cr3(true, __pa(pgd));
++	xen_mc_issue(PARAVIRT_LAZY_CPU);
++
++	reserve_early(__pa(xen_start_info->pt_base),
++		      __pa(xen_start_info->pt_base +
++			   xen_start_info->nr_pt_frames * PAGE_SIZE),
++		      "XEN PAGETABLES");
++
++	return pgd;
++}
++#else	/* !CONFIG_X86_64 */
++static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss;
++
++__init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd,
++					 unsigned long max_pfn)
++{
++	pmd_t *kernel_pmd;
++
++	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->pt_base) +
++				  xen_start_info->nr_pt_frames * PAGE_SIZE +
++				  512*1024);
++
++	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
++	memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
++
++	xen_map_identity_early(level2_kernel_pgt, max_pfn);
++
++	memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
++	set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY],
++			__pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT));
++
++	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
++	set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO);
++	set_page_prot(empty_zero_page, PAGE_KERNEL_RO);
++
++	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
++
++	xen_write_cr3(__pa(swapper_pg_dir));
++
++	pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir)));
++
++	return swapper_pg_dir;
++}
++#endif	/* CONFIG_X86_64 */
++
++static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
++{
++	pte_t pte;
++
++	phys >>= PAGE_SHIFT;
++
++	switch (idx) {
++	case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
++#ifdef CONFIG_X86_F00F_BUG
++	case FIX_F00F_IDT:
++#endif
++#ifdef CONFIG_X86_32
++	case FIX_WP_TEST:
++	case FIX_VDSO:
++# ifdef CONFIG_HIGHMEM
++	case FIX_KMAP_BEGIN ... FIX_KMAP_END:
++# endif
++#else
++	case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
++#endif
++#ifdef CONFIG_X86_LOCAL_APIC
++	case FIX_APIC_BASE:	/* maps dummy local APIC */
++#endif
++		pte = pfn_pte(phys, prot);
++		break;
++
++	default:
++		pte = mfn_pte(phys, prot);
++		break;
++	}
++
++	__native_set_fixmap(idx, pte);
++
++#ifdef CONFIG_X86_64
++	/* Replicate changes to map the vsyscall page into the user
++	   pagetable vsyscall mapping. */
++	if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) {
++		unsigned long vaddr = __fix_to_virt(idx);
++		set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
++	}
++#endif
++}
++
++__init void xen_post_allocator_init(void)
++{
++	pv_mmu_ops.set_pte = xen_set_pte;
++	pv_mmu_ops.set_pmd = xen_set_pmd;
++	pv_mmu_ops.set_pud = xen_set_pud;
++#if PAGETABLE_LEVELS == 4
++	pv_mmu_ops.set_pgd = xen_set_pgd;
++#endif
++
++	/* This will work as long as patching hasn't happened yet
++	   (which it hasn't) */
++	pv_mmu_ops.alloc_pte = xen_alloc_pte;
++	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
++	pv_mmu_ops.release_pte = xen_release_pte;
++	pv_mmu_ops.release_pmd = xen_release_pmd;
++#if PAGETABLE_LEVELS == 4
++	pv_mmu_ops.alloc_pud = xen_alloc_pud;
++	pv_mmu_ops.release_pud = xen_release_pud;
++#endif
++
++#ifdef CONFIG_X86_64
++	SetPagePinned(virt_to_page(level3_user_vsyscall));
++#endif
++	xen_mark_init_mm_pinned();
++}
++
++
++const struct pv_mmu_ops xen_mmu_ops __initdata = {
++	.pagetable_setup_start = xen_pagetable_setup_start,
++	.pagetable_setup_done = xen_pagetable_setup_done,
++
++	.read_cr2 = xen_read_cr2,
++	.write_cr2 = xen_write_cr2,
++
++	.read_cr3 = xen_read_cr3,
++	.write_cr3 = xen_write_cr3,
++
++	.flush_tlb_user = xen_flush_tlb,
++	.flush_tlb_kernel = xen_flush_tlb,
++	.flush_tlb_single = xen_flush_tlb_single,
++	.flush_tlb_others = xen_flush_tlb_others,
++
++	.pte_update = paravirt_nop,
++	.pte_update_defer = paravirt_nop,
++
++	.pgd_alloc = xen_pgd_alloc,
++	.pgd_free = xen_pgd_free,
++
++	.alloc_pte = xen_alloc_pte_init,
++	.release_pte = xen_release_pte_init,
++	.alloc_pmd = xen_alloc_pte_init,
++	.alloc_pmd_clone = paravirt_nop,
++	.release_pmd = xen_release_pte_init,
++
++#ifdef CONFIG_HIGHPTE
++	.kmap_atomic_pte = xen_kmap_atomic_pte,
++#endif
++
++#ifdef CONFIG_X86_64
++	.set_pte = xen_set_pte,
++#else
++	.set_pte = xen_set_pte_init,
++#endif
++	.set_pte_at = xen_set_pte_at,
++	.set_pmd = xen_set_pmd_hyper,
++
++	.ptep_modify_prot_start = __ptep_modify_prot_start,
++	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
++
++	.pte_val = PV_CALLEE_SAVE(xen_pte_val),
++	.pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
++
++	.make_pte = PV_CALLEE_SAVE(xen_make_pte),
++	.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
++
++#ifdef CONFIG_X86_PAE
++	.set_pte_atomic = xen_set_pte_atomic,
++	.pte_clear = xen_pte_clear,
++	.pmd_clear = xen_pmd_clear,
++#endif	/* CONFIG_X86_PAE */
++	.set_pud = xen_set_pud_hyper,
++
++	.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
++	.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
++
++#if PAGETABLE_LEVELS == 4
++	.pud_val = PV_CALLEE_SAVE(xen_pud_val),
++	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
++	.set_pgd = xen_set_pgd_hyper,
++
++	.alloc_pud = xen_alloc_pte_init,
++	.release_pud = xen_release_pte_init,
++#endif	/* PAGETABLE_LEVELS == 4 */
++
++	.activate_mm = xen_activate_mm,
++	.dup_mmap = xen_dup_mmap,
++	.exit_mmap = xen_exit_mmap,
++
++	.lazy_mode = {
++		.enter = paravirt_enter_lazy_mmu,
++		.leave = xen_leave_lazy,
++	},
++
++	.set_fixmap = xen_set_fixmap,
++};
++
++
+ #ifdef CONFIG_XEN_DEBUG_FS
+ 
+ static struct dentry *d_mmu_debug;
+Index: linux-2.6-tip/arch/x86/xen/mmu.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/mmu.h
++++ linux-2.6-tip/arch/x86/xen/mmu.h
+@@ -54,4 +54,7 @@ pte_t xen_ptep_modify_prot_start(struct 
+ void  xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
+ 				  pte_t *ptep, pte_t pte);
+ 
++unsigned long xen_read_cr2_direct(void);
++
++extern const struct pv_mmu_ops xen_mmu_ops;
+ #endif	/* _XEN_MMU_H */
+Index: linux-2.6-tip/arch/x86/xen/multicalls.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/multicalls.c
++++ linux-2.6-tip/arch/x86/xen/multicalls.c
+@@ -39,6 +39,7 @@ struct mc_buffer {
+ 	struct multicall_entry entries[MC_BATCH];
+ #if MC_DEBUG
+ 	struct multicall_entry debug[MC_BATCH];
++	void *caller[MC_BATCH];
+ #endif
+ 	unsigned char args[MC_ARGS];
+ 	struct callback {
+@@ -154,11 +155,12 @@ void xen_mc_flush(void)
+ 			       ret, smp_processor_id());
+ 			dump_stack();
+ 			for (i = 0; i < b->mcidx; i++) {
+-				printk(KERN_DEBUG "  call %2d/%d: op=%lu arg=[%lx] result=%ld\n",
++				printk(KERN_DEBUG "  call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n",
+ 				       i+1, b->mcidx,
+ 				       b->debug[i].op,
+ 				       b->debug[i].args[0],
+-				       b->entries[i].result);
++				       b->entries[i].result,
++				       b->caller[i]);
+ 			}
+ 		}
+ #endif
+@@ -168,8 +170,6 @@ void xen_mc_flush(void)
+ 	} else
+ 		BUG_ON(b->argidx != 0);
+ 
+-	local_irq_restore(flags);
+-
+ 	for (i = 0; i < b->cbidx; i++) {
+ 		struct callback *cb = &b->callbacks[i];
+ 
+@@ -177,7 +177,9 @@ void xen_mc_flush(void)
+ 	}
+ 	b->cbidx = 0;
+ 
+-	BUG_ON(ret);
++	local_irq_restore(flags);
++
++	WARN_ON(ret);
+ }
+ 
+ struct multicall_space __xen_mc_entry(size_t args)
+@@ -197,6 +199,9 @@ struct multicall_space __xen_mc_entry(si
+ 	}
+ 
+ 	ret.mc = &b->entries[b->mcidx];
++#ifdef MC_DEBUG
++	b->caller[b->mcidx] = __builtin_return_address(0);
++#endif
+ 	b->mcidx++;
+ 	ret.args = &b->args[argidx];
+ 	b->argidx = argidx + args;
+Index: linux-2.6-tip/arch/x86/xen/multicalls.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/multicalls.h
++++ linux-2.6-tip/arch/x86/xen/multicalls.h
+@@ -41,7 +41,7 @@ static inline void xen_mc_issue(unsigned
+ 		xen_mc_flush();
+ 
+ 	/* restore flags saved in xen_mc_batch */
+-	local_irq_restore(x86_read_percpu(xen_mc_irq_flags));
++	local_irq_restore(percpu_read(xen_mc_irq_flags));
+ }
+ 
+ /* Set up a callback to be called when the current batch is flushed */
+Index: linux-2.6-tip/arch/x86/xen/smp.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/smp.c
++++ linux-2.6-tip/arch/x86/xen/smp.c
+@@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_sin
+  */
+ static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
+ {
+-#ifdef CONFIG_X86_32
+-	__get_cpu_var(irq_stat).irq_resched_count++;
+-#else
+-	add_pda(irq_resched_count, 1);
+-#endif
++	inc_irq_stat(irq_resched_count);
+ 
+ 	return IRQ_HANDLED;
+ }
+@@ -78,7 +74,7 @@ static __cpuinit void cpu_bringup(void)
+ 	xen_setup_cpu_clockevents();
+ 
+ 	cpu_set(cpu, cpu_online_map);
+-	x86_write_percpu(cpu_state, CPU_ONLINE);
++	percpu_write(cpu_state, CPU_ONLINE);
+ 	wmb();
+ 
+ 	/* We can take interrupts now: we're officially "up". */
+@@ -162,7 +158,7 @@ static void __init xen_fill_possible_map
+ 		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
+ 		if (rc >= 0) {
+ 			num_processors++;
+-			cpu_set(i, cpu_possible_map);
++			set_cpu_possible(i, true);
+ 		}
+ 	}
+ }
+@@ -174,7 +170,7 @@ static void __init xen_smp_prepare_boot_
+ 
+ 	/* We've switched to the "real" per-cpu gdt, so make sure the
+ 	   old memory can be recycled */
+-	make_lowmem_page_readwrite(&per_cpu_var(gdt_page));
++	make_lowmem_page_readwrite(xen_initial_gdt);
+ 
+ 	xen_setup_vcpu_info_placement();
+ }
+@@ -201,7 +197,7 @@ static void __init xen_smp_prepare_cpus(
+ 	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
+ 		for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
+ 			continue;
+-		cpu_clear(cpu, cpu_possible_map);
++		set_cpu_possible(cpu, false);
+ 	}
+ 
+ 	for_each_possible_cpu (cpu) {
+@@ -214,7 +210,7 @@ static void __init xen_smp_prepare_cpus(
+ 		if (IS_ERR(idle))
+ 			panic("failed fork for CPU %d", cpu);
+ 
+-		cpu_set(cpu, cpu_present_map);
++		set_cpu_present(cpu, true);
+ 	}
+ }
+ 
+@@ -223,6 +219,7 @@ cpu_initialize_context(unsigned int cpu,
+ {
+ 	struct vcpu_guest_context *ctxt;
+ 	struct desc_struct *gdt;
++	unsigned long gdt_mfn;
+ 
+ 	if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
+ 		return 0;
+@@ -239,6 +236,8 @@ cpu_initialize_context(unsigned int cpu,
+ 	ctxt->user_regs.ss = __KERNEL_DS;
+ #ifdef CONFIG_X86_32
+ 	ctxt->user_regs.fs = __KERNEL_PERCPU;
++#else
++	ctxt->gs_base_kernel = per_cpu_offset(cpu);
+ #endif
+ 	ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
+ 	ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
+@@ -250,9 +249,12 @@ cpu_initialize_context(unsigned int cpu,
+ 	ctxt->ldt_ents = 0;
+ 
+ 	BUG_ON((unsigned long)gdt & ~PAGE_MASK);
++
++	gdt_mfn = arbitrary_virt_to_mfn(gdt);
+ 	make_lowmem_page_readonly(gdt);
++	make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
+ 
+-	ctxt->gdt_frames[0] = virt_to_mfn(gdt);
++	ctxt->gdt_frames[0] = gdt_mfn;
+ 	ctxt->gdt_ents      = GDT_ENTRIES;
+ 
+ 	ctxt->user_regs.cs = __KERNEL_CS;
+@@ -283,23 +285,14 @@ static int __cpuinit xen_cpu_up(unsigned
+ 	struct task_struct *idle = idle_task(cpu);
+ 	int rc;
+ 
+-#ifdef CONFIG_X86_64
+-	/* Allocate node local memory for AP pdas */
+-	WARN_ON(cpu == 0);
+-	if (cpu > 0) {
+-		rc = get_local_pda(cpu);
+-		if (rc)
+-			return rc;
+-	}
+-#endif
+-
+-#ifdef CONFIG_X86_32
+-	init_gdt(cpu);
+ 	per_cpu(current_task, cpu) = idle;
++#ifdef CONFIG_X86_32
+ 	irq_ctx_init(cpu);
+ #else
+-	cpu_pda(cpu)->pcurrent = idle;
+ 	clear_tsk_thread_flag(idle, TIF_FORK);
++	per_cpu(kernel_stack, cpu) =
++		(unsigned long)task_stack_page(idle) -
++		KERNEL_STACK_OFFSET + THREAD_SIZE;
+ #endif
+ 	xen_setup_timer(cpu);
+ 	xen_init_lock_cpu(cpu);
+@@ -445,11 +438,7 @@ static irqreturn_t xen_call_function_int
+ {
+ 	irq_enter();
+ 	generic_smp_call_function_interrupt();
+-#ifdef CONFIG_X86_32
+-	__get_cpu_var(irq_stat).irq_call_count++;
+-#else
+-	add_pda(irq_call_count, 1);
+-#endif
++	inc_irq_stat(irq_call_count);
+ 	irq_exit();
+ 
+ 	return IRQ_HANDLED;
+@@ -459,11 +448,7 @@ static irqreturn_t xen_call_function_sin
+ {
+ 	irq_enter();
+ 	generic_smp_call_function_single_interrupt();
+-#ifdef CONFIG_X86_32
+-	__get_cpu_var(irq_stat).irq_call_count++;
+-#else
+-	add_pda(irq_call_count, 1);
+-#endif
++	inc_irq_stat(irq_call_count);
+ 	irq_exit();
+ 
+ 	return IRQ_HANDLED;
+Index: linux-2.6-tip/arch/x86/xen/suspend.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/suspend.c
++++ linux-2.6-tip/arch/x86/xen/suspend.c
+@@ -6,6 +6,7 @@
+ 
+ #include <asm/xen/hypercall.h>
+ #include <asm/xen/page.h>
++#include <asm/fixmap.h>
+ 
+ #include "xen-ops.h"
+ #include "mmu.h"
+Index: linux-2.6-tip/arch/x86/xen/xen-asm.S
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/xen/xen-asm.S
+@@ -0,0 +1,142 @@
++/*
++ * Asm versions of Xen pv-ops, suitable for either direct use or
++ * inlining.  The inline versions are the same as the direct-use
++ * versions, with the pre- and post-amble chopped off.
++ *
++ * This code is encoded for size rather than absolute efficiency, with
++ * a view to being able to inline as much as possible.
++ *
++ * We only bother with direct forms (ie, vcpu in percpu data) of the
++ * operations here; the indirect forms are better handled in C, since
++ * they're generally too large to inline anyway.
++ */
++
++#include <asm/asm-offsets.h>
++#include <asm/percpu.h>
++#include <asm/processor-flags.h>
++
++#include "xen-asm.h"
++
++/*
++ * Enable events.  This clears the event mask and tests the pending
++ * event status with one and operation.  If there are pending events,
++ * then enter the hypervisor to get them handled.
++ */
++ENTRY(xen_irq_enable_direct)
++	/* Unmask events */
++	movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
++
++	/*
++	 * Preempt here doesn't matter because that will deal with any
++	 * pending interrupts.  The pending check may end up being run
++	 * on the wrong CPU, but that doesn't hurt.
++	 */
++
++	/* Test for pending */
++	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
++	jz 1f
++
++2:	call check_events
++1:
++ENDPATCH(xen_irq_enable_direct)
++	ret
++	ENDPROC(xen_irq_enable_direct)
++	RELOC(xen_irq_enable_direct, 2b+1)
++
++
++/*
++ * Disabling events is simply a matter of making the event mask
++ * non-zero.
++ */
++ENTRY(xen_irq_disable_direct)
++	movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
++ENDPATCH(xen_irq_disable_direct)
++	ret
++	ENDPROC(xen_irq_disable_direct)
++	RELOC(xen_irq_disable_direct, 0)
++
++/*
++ * (xen_)save_fl is used to get the current interrupt enable status.
++ * Callers expect the status to be in X86_EFLAGS_IF, and other bits
++ * may be set in the return value.  We take advantage of this by
++ * making sure that X86_EFLAGS_IF has the right value (and other bits
++ * in that byte are 0), but other bits in the return value are
++ * undefined.  We need to toggle the state of the bit, because Xen and
++ * x86 use opposite senses (mask vs enable).
++ */
++ENTRY(xen_save_fl_direct)
++	testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
++	setz %ah
++	addb %ah, %ah
++ENDPATCH(xen_save_fl_direct)
++	ret
++	ENDPROC(xen_save_fl_direct)
++	RELOC(xen_save_fl_direct, 0)
++
++
++/*
++ * In principle the caller should be passing us a value return from
++ * xen_save_fl_direct, but for robustness sake we test only the
++ * X86_EFLAGS_IF flag rather than the whole byte. After setting the
++ * interrupt mask state, it checks for unmasked pending events and
++ * enters the hypervisor to get them delivered if so.
++ */
++ENTRY(xen_restore_fl_direct)
++#ifdef CONFIG_X86_64
++	testw $X86_EFLAGS_IF, %di
++#else
++	testb $X86_EFLAGS_IF>>8, %ah
++#endif
++	setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
++	/*
++	 * Preempt here doesn't matter because that will deal with any
++	 * pending interrupts.  The pending check may end up being run
++	 * on the wrong CPU, but that doesn't hurt.
++	 */
++
++	/* check for unmasked and pending */
++	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
++	jz 1f
++2:	call check_events
++1:
++ENDPATCH(xen_restore_fl_direct)
++	ret
++	ENDPROC(xen_restore_fl_direct)
++	RELOC(xen_restore_fl_direct, 2b+1)
++
++
++/*
++ * Force an event check by making a hypercall, but preserve regs
++ * before making the call.
++ */
++check_events:
++#ifdef CONFIG_X86_32
++	push %eax
++	push %ecx
++	push %edx
++	call xen_force_evtchn_callback
++	pop %edx
++	pop %ecx
++	pop %eax
++#else
++	push %rax
++	push %rcx
++	push %rdx
++	push %rsi
++	push %rdi
++	push %r8
++	push %r9
++	push %r10
++	push %r11
++	call xen_force_evtchn_callback
++	pop %r11
++	pop %r10
++	pop %r9
++	pop %r8
++	pop %rdi
++	pop %rsi
++	pop %rdx
++	pop %rcx
++	pop %rax
++#endif
++	ret
+Index: linux-2.6-tip/arch/x86/xen/xen-asm.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/x86/xen/xen-asm.h
+@@ -0,0 +1,12 @@
++#ifndef _XEN_XEN_ASM_H
++#define _XEN_XEN_ASM_H
++
++#include <linux/linkage.h>
++
++#define RELOC(x, v)	.globl x##_reloc; x##_reloc=v
++#define ENDPATCH(x)	.globl x##_end; x##_end=.
++
++/* Pseudo-flag used for virtual NMI, which we don't implement yet */
++#define XEN_EFLAGS_NMI	0x80000000
++
++#endif
+Index: linux-2.6-tip/arch/x86/xen/xen-asm_32.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/xen-asm_32.S
++++ linux-2.6-tip/arch/x86/xen/xen-asm_32.S
+@@ -1,117 +1,43 @@
+ /*
+-	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
+-	The inline versions are the same as the direct-use versions, with the
+-	pre- and post-amble chopped off.
+-
+-	This code is encoded for size rather than absolute efficiency,
+-	with a view to being able to inline as much as possible.
+-
+-	We only bother with direct forms (ie, vcpu in pda) of the operations
+-	here; the indirect forms are better handled in C, since they're
+-	generally too large to inline anyway.
++ * Asm versions of Xen pv-ops, suitable for either direct use or
++ * inlining.  The inline versions are the same as the direct-use
++ * versions, with the pre- and post-amble chopped off.
++ *
++ * This code is encoded for size rather than absolute efficiency, with
++ * a view to being able to inline as much as possible.
++ *
++ * We only bother with direct forms (ie, vcpu in pda) of the
++ * operations here; the indirect forms are better handled in C, since
++ * they're generally too large to inline anyway.
+  */
+ 
+-#include <linux/linkage.h>
+-
+-#include <asm/asm-offsets.h>
+ #include <asm/thread_info.h>
+-#include <asm/percpu.h>
+ #include <asm/processor-flags.h>
+ #include <asm/segment.h>
+ 
+ #include <xen/interface/xen.h>
+ 
+-#define RELOC(x, v)	.globl x##_reloc; x##_reloc=v
+-#define ENDPATCH(x)	.globl x##_end; x##_end=.
+-
+-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
+-#define XEN_EFLAGS_NMI	0x80000000
+-
+-/*
+-	Enable events.  This clears the event mask and tests the pending
+-	event status with one and operation.  If there are pending
+-	events, then enter the hypervisor to get them handled.
+- */
+-ENTRY(xen_irq_enable_direct)
+-	/* Unmask events */
+-	movb $0, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+-
+-	/* Preempt here doesn't matter because that will deal with
+-	   any pending interrupts.  The pending check may end up being
+-	   run on the wrong CPU, but that doesn't hurt. */
+-
+-	/* Test for pending */
+-	testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
+-	jz 1f
+-
+-2:	call check_events
+-1:
+-ENDPATCH(xen_irq_enable_direct)
+-	ret
+-	ENDPROC(xen_irq_enable_direct)
+-	RELOC(xen_irq_enable_direct, 2b+1)
+-
++#include "xen-asm.h"
+ 
+ /*
+-	Disabling events is simply a matter of making the event mask
+-	non-zero.
++ * Force an event check by making a hypercall, but preserve regs
++ * before making the call.
+  */
+-ENTRY(xen_irq_disable_direct)
+-	movb $1, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+-ENDPATCH(xen_irq_disable_direct)
+-	ret
+-	ENDPROC(xen_irq_disable_direct)
+-	RELOC(xen_irq_disable_direct, 0)
+-
+-/*
+-	(xen_)save_fl is used to get the current interrupt enable status.
+-	Callers expect the status to be in X86_EFLAGS_IF, and other bits
+-	may be set in the return value.  We take advantage of this by
+-	making sure that X86_EFLAGS_IF has the right value (and other bits
+-	in that byte are 0), but other bits in the return value are
+-	undefined.  We need to toggle the state of the bit, because
+-	Xen and x86 use opposite senses (mask vs enable).
+- */
+-ENTRY(xen_save_fl_direct)
+-	testb $0xff, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+-	setz %ah
+-	addb %ah,%ah
+-ENDPATCH(xen_save_fl_direct)
+-	ret
+-	ENDPROC(xen_save_fl_direct)
+-	RELOC(xen_save_fl_direct, 0)
+-
+-
+-/*
+-	In principle the caller should be passing us a value return
+-	from xen_save_fl_direct, but for robustness sake we test only
+-	the X86_EFLAGS_IF flag rather than the whole byte. After
+-	setting the interrupt mask state, it checks for unmasked
+-	pending events and enters the hypervisor to get them delivered
+-	if so.
+- */
+-ENTRY(xen_restore_fl_direct)
+-	testb $X86_EFLAGS_IF>>8, %ah
+-	setz PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_mask
+-	/* Preempt here doesn't matter because that will deal with
+-	   any pending interrupts.  The pending check may end up being
+-	   run on the wrong CPU, but that doesn't hurt. */
+-
+-	/* check for unmasked and pending */
+-	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info)+XEN_vcpu_info_pending
+-	jz 1f
+-2:	call check_events
+-1:
+-ENDPATCH(xen_restore_fl_direct)
++check_events:
++	push %eax
++	push %ecx
++	push %edx
++	call xen_force_evtchn_callback
++	pop %edx
++	pop %ecx
++	pop %eax
+ 	ret
+-	ENDPROC(xen_restore_fl_direct)
+-	RELOC(xen_restore_fl_direct, 2b+1)
+ 
+ /*
+-	We can't use sysexit directly, because we're not running in ring0.
+-	But we can easily fake it up using iret.  Assuming xen_sysexit
+-	is jumped to with a standard stack frame, we can just strip it
+-	back to a standard iret frame and use iret.
++ * We can't use sysexit directly, because we're not running in ring0.
++ * But we can easily fake it up using iret.  Assuming xen_sysexit is
++ * jumped to with a standard stack frame, we can just strip it back to
++ * a standard iret frame and use iret.
+  */
+ ENTRY(xen_sysexit)
+ 	movl PT_EAX(%esp), %eax			/* Shouldn't be necessary? */
+@@ -122,33 +48,31 @@ ENTRY(xen_sysexit)
+ ENDPROC(xen_sysexit)
+ 
+ /*
+-	This is run where a normal iret would be run, with the same stack setup:
+-	      8: eflags
+-	      4: cs
+-	esp-> 0: eip
+-
+-	This attempts to make sure that any pending events are dealt
+-	with on return to usermode, but there is a small window in
+-	which an event can happen just before entering usermode.  If
+-	the nested interrupt ends up setting one of the TIF_WORK_MASK
+-	pending work flags, they will not be tested again before
+-	returning to usermode. This means that a process can end up
+-	with pending work, which will be unprocessed until the process
+-	enters and leaves the kernel again, which could be an
+-	unbounded amount of time.  This means that a pending signal or
+-	reschedule event could be indefinitely delayed.
+-
+-	The fix is to notice a nested interrupt in the critical
+-	window, and if one occurs, then fold the nested interrupt into
+-	the current interrupt stack frame, and re-process it
+-	iteratively rather than recursively.  This means that it will
+-	exit via the normal path, and all pending work will be dealt
+-	with appropriately.
+-
+-	Because the nested interrupt handler needs to deal with the
+-	current stack state in whatever form its in, we keep things
+-	simple by only using a single register which is pushed/popped
+-	on the stack.
++ * This is run where a normal iret would be run, with the same stack setup:
++ *	8: eflags
++ *	4: cs
++ *	esp-> 0: eip
++ *
++ * This attempts to make sure that any pending events are dealt with
++ * on return to usermode, but there is a small window in which an
++ * event can happen just before entering usermode.  If the nested
++ * interrupt ends up setting one of the TIF_WORK_MASK pending work
++ * flags, they will not be tested again before returning to
++ * usermode. This means that a process can end up with pending work,
++ * which will be unprocessed until the process enters and leaves the
++ * kernel again, which could be an unbounded amount of time.  This
++ * means that a pending signal or reschedule event could be
++ * indefinitely delayed.
++ *
++ * The fix is to notice a nested interrupt in the critical window, and
++ * if one occurs, then fold the nested interrupt into the current
++ * interrupt stack frame, and re-process it iteratively rather than
++ * recursively.  This means that it will exit via the normal path, and
++ * all pending work will be dealt with appropriately.
++ *
++ * Because the nested interrupt handler needs to deal with the current
++ * stack state in whatever form its in, we keep things simple by only
++ * using a single register which is pushed/popped on the stack.
+  */
+ ENTRY(xen_iret)
+ 	/* test eflags for special cases */
+@@ -158,13 +82,15 @@ ENTRY(xen_iret)
+ 	push %eax
+ 	ESP_OFFSET=4	# bytes pushed onto stack
+ 
+-	/* Store vcpu_info pointer for easy access.  Do it this
+-	   way to avoid having to reload %fs */
++	/*
++	 * Store vcpu_info pointer for easy access.  Do it this way to
++	 * avoid having to reload %fs
++	 */
+ #ifdef CONFIG_SMP
+ 	GET_THREAD_INFO(%eax)
+-	movl TI_cpu(%eax),%eax
+-	movl __per_cpu_offset(,%eax,4),%eax
+-	mov per_cpu__xen_vcpu(%eax),%eax
++	movl TI_cpu(%eax), %eax
++	movl __per_cpu_offset(,%eax,4), %eax
++	mov per_cpu__xen_vcpu(%eax), %eax
+ #else
+ 	movl per_cpu__xen_vcpu, %eax
+ #endif
+@@ -172,37 +98,46 @@ ENTRY(xen_iret)
+ 	/* check IF state we're restoring */
+ 	testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp)
+ 
+-	/* Maybe enable events.  Once this happens we could get a
+-	   recursive event, so the critical region starts immediately
+-	   afterwards.  However, if that happens we don't end up
+-	   resuming the code, so we don't have to be worried about
+-	   being preempted to another CPU. */
++	/*
++	 * Maybe enable events.  Once this happens we could get a
++	 * recursive event, so the critical region starts immediately
++	 * afterwards.  However, if that happens we don't end up
++	 * resuming the code, so we don't have to be worried about
++	 * being preempted to another CPU.
++	 */
+ 	setz XEN_vcpu_info_mask(%eax)
+ xen_iret_start_crit:
+ 
+ 	/* check for unmasked and pending */
+ 	cmpw $0x0001, XEN_vcpu_info_pending(%eax)
+ 
+-	/* If there's something pending, mask events again so we
+-	   can jump back into xen_hypervisor_callback */
++	/*
++	 * If there's something pending, mask events again so we can
++	 * jump back into xen_hypervisor_callback
++	 */
+ 	sete XEN_vcpu_info_mask(%eax)
+ 
+ 	popl %eax
+ 
+-	/* From this point on the registers are restored and the stack
+-	   updated, so we don't need to worry about it if we're preempted */
++	/*
++	 * From this point on the registers are restored and the stack
++	 * updated, so we don't need to worry about it if we're
++	 * preempted
++	 */
+ iret_restore_end:
+ 
+-	/* Jump to hypervisor_callback after fixing up the stack.
+-	   Events are masked, so jumping out of the critical
+-	   region is OK. */
++	/*
++	 * Jump to hypervisor_callback after fixing up the stack.
++	 * Events are masked, so jumping out of the critical region is
++	 * OK.
++	 */
+ 	je xen_hypervisor_callback
+ 
+ 1:	iret
+ xen_iret_end_crit:
+-.section __ex_table,"a"
++.section __ex_table, "a"
+ 	.align 4
+-	.long 1b,iret_exc
++	.long 1b, iret_exc
+ .previous
+ 
+ hyper_iret:
+@@ -212,55 +147,55 @@ hyper_iret:
+ 	.globl xen_iret_start_crit, xen_iret_end_crit
+ 
+ /*
+-   This is called by xen_hypervisor_callback in entry.S when it sees
+-   that the EIP at the time of interrupt was between xen_iret_start_crit
+-   and xen_iret_end_crit.  We're passed the EIP in %eax so we can do
+-   a more refined determination of what to do.
+-
+-   The stack format at this point is:
+-	----------------
+-	 ss		: (ss/esp may be present if we came from usermode)
+-	 esp		:
+-	 eflags		}  outer exception info
+-	 cs		}
+-	 eip		}
+-	---------------- <- edi (copy dest)
+-	 eax		:  outer eax if it hasn't been restored
+-	----------------
+-	 eflags		}  nested exception info
+-	 cs		}   (no ss/esp because we're nested
+-	 eip		}    from the same ring)
+-	 orig_eax	}<- esi (copy src)
+-	 - - - - - - - -
+-	 fs		}
+-	 es		}
+-	 ds		}  SAVE_ALL state
+-	 eax		}
+-	  :		:
+-	 ebx		}<- esp
+-	----------------
+-
+-   In order to deliver the nested exception properly, we need to shift
+-   everything from the return addr up to the error code so it
+-   sits just under the outer exception info.  This means that when we
+-   handle the exception, we do it in the context of the outer exception
+-   rather than starting a new one.
+-
+-   The only caveat is that if the outer eax hasn't been
+-   restored yet (ie, it's still on stack), we need to insert
+-   its value into the SAVE_ALL state before going on, since
+-   it's usermode state which we eventually need to restore.
++ * This is called by xen_hypervisor_callback in entry.S when it sees
++ * that the EIP at the time of interrupt was between
++ * xen_iret_start_crit and xen_iret_end_crit.  We're passed the EIP in
++ * %eax so we can do a more refined determination of what to do.
++ *
++ * The stack format at this point is:
++ *	----------------
++ *	 ss		: (ss/esp may be present if we came from usermode)
++ *	 esp		:
++ *	 eflags		}  outer exception info
++ *	 cs		}
++ *	 eip		}
++ *	---------------- <- edi (copy dest)
++ *	 eax		:  outer eax if it hasn't been restored
++ *	----------------
++ *	 eflags		}  nested exception info
++ *	 cs		}   (no ss/esp because we're nested
++ *	 eip		}    from the same ring)
++ *	 orig_eax	}<- esi (copy src)
++ *	 - - - - - - - -
++ *	 fs		}
++ *	 es		}
++ *	 ds		}  SAVE_ALL state
++ *	 eax		}
++ *	  :		:
++ *	 ebx		}<- esp
++ *	----------------
++ *
++ * In order to deliver the nested exception properly, we need to shift
++ * everything from the return addr up to the error code so it sits
++ * just under the outer exception info.  This means that when we
++ * handle the exception, we do it in the context of the outer
++ * exception rather than starting a new one.
++ *
++ * The only caveat is that if the outer eax hasn't been restored yet
++ * (ie, it's still on stack), we need to insert its value into the
++ * SAVE_ALL state before going on, since it's usermode state which we
++ * eventually need to restore.
+  */
+ ENTRY(xen_iret_crit_fixup)
+ 	/*
+-	   Paranoia: Make sure we're really coming from kernel space.
+-	   One could imagine a case where userspace jumps into the
+-	   critical range address, but just before the CPU delivers a GP,
+-	   it decides to deliver an interrupt instead.  Unlikely?
+-	   Definitely.  Easy to avoid?  Yes.  The Intel documents
+-	   explicitly say that the reported EIP for a bad jump is the
+-	   jump instruction itself, not the destination, but some virtual
+-	   environments get this wrong.
++	 * Paranoia: Make sure we're really coming from kernel space.
++	 * One could imagine a case where userspace jumps into the
++	 * critical range address, but just before the CPU delivers a
++	 * GP, it decides to deliver an interrupt instead.  Unlikely?
++	 * Definitely.  Easy to avoid?  Yes.  The Intel documents
++	 * explicitly say that the reported EIP for a bad jump is the
++	 * jump instruction itself, not the destination, but some
++	 * virtual environments get this wrong.
+ 	 */
+ 	movl PT_CS(%esp), %ecx
+ 	andl $SEGMENT_RPL_MASK, %ecx
+@@ -270,15 +205,17 @@ ENTRY(xen_iret_crit_fixup)
+ 	lea PT_ORIG_EAX(%esp), %esi
+ 	lea PT_EFLAGS(%esp), %edi
+ 
+-	/* If eip is before iret_restore_end then stack
+-	   hasn't been restored yet. */
++	/*
++	 * If eip is before iret_restore_end then stack
++	 * hasn't been restored yet.
++	 */
+ 	cmp $iret_restore_end, %eax
+ 	jae 1f
+ 
+-	movl 0+4(%edi),%eax		/* copy EAX (just above top of frame) */
++	movl 0+4(%edi), %eax		/* copy EAX (just above top of frame) */
+ 	movl %eax, PT_EAX(%esp)
+ 
+-	lea ESP_OFFSET(%edi),%edi	/* move dest up over saved regs */
++	lea ESP_OFFSET(%edi), %edi	/* move dest up over saved regs */
+ 
+ 	/* set up the copy */
+ 1:	std
+@@ -286,20 +223,6 @@ ENTRY(xen_iret_crit_fixup)
+ 	rep movsl
+ 	cld
+ 
+-	lea 4(%edi),%esp		/* point esp to new frame */
++	lea 4(%edi), %esp		/* point esp to new frame */
+ 2:	jmp xen_do_upcall
+ 
+-
+-/*
+-	Force an event check by making a hypercall,
+-	but preserve regs before making the call.
+- */
+-check_events:
+-	push %eax
+-	push %ecx
+-	push %edx
+-	call xen_force_evtchn_callback
+-	pop %edx
+-	pop %ecx
+-	pop %eax
+-	ret
+Index: linux-2.6-tip/arch/x86/xen/xen-asm_64.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/xen-asm_64.S
++++ linux-2.6-tip/arch/x86/xen/xen-asm_64.S
+@@ -1,174 +1,45 @@
+ /*
+-	Asm versions of Xen pv-ops, suitable for either direct use or inlining.
+-	The inline versions are the same as the direct-use versions, with the
+-	pre- and post-amble chopped off.
+-
+-	This code is encoded for size rather than absolute efficiency,
+-	with a view to being able to inline as much as possible.
+-
+-	We only bother with direct forms (ie, vcpu in pda) of the operations
+-	here; the indirect forms are better handled in C, since they're
+-	generally too large to inline anyway.
++ * Asm versions of Xen pv-ops, suitable for either direct use or
++ * inlining.  The inline versions are the same as the direct-use
++ * versions, with the pre- and post-amble chopped off.
++ *
++ * This code is encoded for size rather than absolute efficiency, with
++ * a view to being able to inline as much as possible.
++ *
++ * We only bother with direct forms (ie, vcpu in pda) of the
++ * operations here; the indirect forms are better handled in C, since
++ * they're generally too large to inline anyway.
+  */
+ 
+-#include <linux/linkage.h>
+-
+-#include <asm/asm-offsets.h>
+-#include <asm/processor-flags.h>
+ #include <asm/errno.h>
++#include <asm/percpu.h>
++#include <asm/processor-flags.h>
+ #include <asm/segment.h>
+ 
+ #include <xen/interface/xen.h>
+ 
+-#define RELOC(x, v)	.globl x##_reloc; x##_reloc=v
+-#define ENDPATCH(x)	.globl x##_end; x##_end=.
+-
+-/* Pseudo-flag used for virtual NMI, which we don't implement yet */
+-#define XEN_EFLAGS_NMI	0x80000000
+-
+-#if 1
+-/*
+-	x86-64 does not yet support direct access to percpu variables
+-	via a segment override, so we just need to make sure this code
+-	never gets used
+- */
+-#define BUG			ud2a
+-#define PER_CPU_VAR(var, off)	0xdeadbeef
+-#endif
+-
+-/*
+-	Enable events.  This clears the event mask and tests the pending
+-	event status with one and operation.  If there are pending
+-	events, then enter the hypervisor to get them handled.
+- */
+-ENTRY(xen_irq_enable_direct)
+-	BUG
+-
+-	/* Unmask events */
+-	movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+-
+-	/* Preempt here doesn't matter because that will deal with
+-	   any pending interrupts.  The pending check may end up being
+-	   run on the wrong CPU, but that doesn't hurt. */
+-
+-	/* Test for pending */
+-	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+-	jz 1f
+-
+-2:	call check_events
+-1:
+-ENDPATCH(xen_irq_enable_direct)
+-	ret
+-	ENDPROC(xen_irq_enable_direct)
+-	RELOC(xen_irq_enable_direct, 2b+1)
+-
+-/*
+-	Disabling events is simply a matter of making the event mask
+-	non-zero.
+- */
+-ENTRY(xen_irq_disable_direct)
+-	BUG
+-
+-	movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+-ENDPATCH(xen_irq_disable_direct)
+-	ret
+-	ENDPROC(xen_irq_disable_direct)
+-	RELOC(xen_irq_disable_direct, 0)
+-
+-/*
+-	(xen_)save_fl is used to get the current interrupt enable status.
+-	Callers expect the status to be in X86_EFLAGS_IF, and other bits
+-	may be set in the return value.  We take advantage of this by
+-	making sure that X86_EFLAGS_IF has the right value (and other bits
+-	in that byte are 0), but other bits in the return value are
+-	undefined.  We need to toggle the state of the bit, because
+-	Xen and x86 use opposite senses (mask vs enable).
+- */
+-ENTRY(xen_save_fl_direct)
+-	BUG
+-
+-	testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+-	setz %ah
+-	addb %ah,%ah
+-ENDPATCH(xen_save_fl_direct)
+-	ret
+-	ENDPROC(xen_save_fl_direct)
+-	RELOC(xen_save_fl_direct, 0)
+-
+-/*
+-	In principle the caller should be passing us a value return
+-	from xen_save_fl_direct, but for robustness sake we test only
+-	the X86_EFLAGS_IF flag rather than the whole byte. After
+-	setting the interrupt mask state, it checks for unmasked
+-	pending events and enters the hypervisor to get them delivered
+-	if so.
+- */
+-ENTRY(xen_restore_fl_direct)
+-	BUG
+-
+-	testb $X86_EFLAGS_IF>>8, %ah
+-	setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask)
+-	/* Preempt here doesn't matter because that will deal with
+-	   any pending interrupts.  The pending check may end up being
+-	   run on the wrong CPU, but that doesn't hurt. */
+-
+-	/* check for unmasked and pending */
+-	cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending)
+-	jz 1f
+-2:	call check_events
+-1:
+-ENDPATCH(xen_restore_fl_direct)
+-	ret
+-	ENDPROC(xen_restore_fl_direct)
+-	RELOC(xen_restore_fl_direct, 2b+1)
+-
+-
+-/*
+-	Force an event check by making a hypercall,
+-	but preserve regs before making the call.
+- */
+-check_events:
+-	push %rax
+-	push %rcx
+-	push %rdx
+-	push %rsi
+-	push %rdi
+-	push %r8
+-	push %r9
+-	push %r10
+-	push %r11
+-	call xen_force_evtchn_callback
+-	pop %r11
+-	pop %r10
+-	pop %r9
+-	pop %r8
+-	pop %rdi
+-	pop %rsi
+-	pop %rdx
+-	pop %rcx
+-	pop %rax
+-	ret
++#include "xen-asm.h"
+ 
+ ENTRY(xen_adjust_exception_frame)
+-	mov 8+0(%rsp),%rcx
+-	mov 8+8(%rsp),%r11
++	mov 8+0(%rsp), %rcx
++	mov 8+8(%rsp), %r11
+ 	ret $16
+ 
+ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
+ /*
+-	Xen64 iret frame:
+-
+-	ss
+-	rsp
+-	rflags
+-	cs
+-	rip		<-- standard iret frame
+-
+-	flags
+-
+-	rcx		}
+-	r11		}<-- pushed by hypercall page
+-rsp ->	rax		}
++ * Xen64 iret frame:
++ *
++ *	ss
++ *	rsp
++ *	rflags
++ *	cs
++ *	rip		<-- standard iret frame
++ *
++ *	flags
++ *
++ *	rcx		}
++ *	r11		}<-- pushed by hypercall page
++ * rsp->rax		}
+  */
+ ENTRY(xen_iret)
+ 	pushq $0
+@@ -177,8 +48,8 @@ ENDPATCH(xen_iret)
+ RELOC(xen_iret, 1b+1)
+ 
+ /*
+-	sysexit is not used for 64-bit processes, so it's
+-	only ever used to return to 32-bit compat userspace.
++ * sysexit is not used for 64-bit processes, so it's only ever used to
++ * return to 32-bit compat userspace.
+  */
+ ENTRY(xen_sysexit)
+ 	pushq $__USER32_DS
+@@ -193,13 +64,15 @@ ENDPATCH(xen_sysexit)
+ RELOC(xen_sysexit, 1b+1)
+ 
+ ENTRY(xen_sysret64)
+-	/* We're already on the usermode stack at this point, but still
+-	   with the kernel gs, so we can easily switch back */
+-	movq %rsp, %gs:pda_oldrsp
+-	movq %gs:pda_kernelstack,%rsp
++	/*
++	 * We're already on the usermode stack at this point, but
++	 * still with the kernel gs, so we can easily switch back
++	 */
++	movq %rsp, PER_CPU_VAR(old_rsp)
++	movq PER_CPU_VAR(kernel_stack), %rsp
+ 
+ 	pushq $__USER_DS
+-	pushq %gs:pda_oldrsp
++	pushq PER_CPU_VAR(old_rsp)
+ 	pushq %r11
+ 	pushq $__USER_CS
+ 	pushq %rcx
+@@ -210,13 +83,15 @@ ENDPATCH(xen_sysret64)
+ RELOC(xen_sysret64, 1b+1)
+ 
+ ENTRY(xen_sysret32)
+-	/* We're already on the usermode stack at this point, but still
+-	   with the kernel gs, so we can easily switch back */
+-	movq %rsp, %gs:pda_oldrsp
+-	movq %gs:pda_kernelstack, %rsp
++	/*
++	 * We're already on the usermode stack at this point, but
++	 * still with the kernel gs, so we can easily switch back
++	 */
++	movq %rsp, PER_CPU_VAR(old_rsp)
++	movq PER_CPU_VAR(kernel_stack), %rsp
+ 
+ 	pushq $__USER32_DS
+-	pushq %gs:pda_oldrsp
++	pushq PER_CPU_VAR(old_rsp)
+ 	pushq %r11
+ 	pushq $__USER32_CS
+ 	pushq %rcx
+@@ -227,28 +102,27 @@ ENDPATCH(xen_sysret32)
+ RELOC(xen_sysret32, 1b+1)
+ 
+ /*
+-	Xen handles syscall callbacks much like ordinary exceptions,
+-	which means we have:
+-	 - kernel gs
+-	 - kernel rsp
+-	 - an iret-like stack frame on the stack (including rcx and r11):
+-		ss
+-		rsp
+-		rflags
+-		cs
+-		rip
+-		r11
+-	rsp->	rcx
+-
+-	In all the entrypoints, we undo all that to make it look
+-	like a CPU-generated syscall/sysenter and jump to the normal
+-	entrypoint.
++ * Xen handles syscall callbacks much like ordinary exceptions, which
++ * means we have:
++ * - kernel gs
++ * - kernel rsp
++ * - an iret-like stack frame on the stack (including rcx and r11):
++ *	ss
++ *	rsp
++ *	rflags
++ *	cs
++ *	rip
++ *	r11
++ * rsp->rcx
++ *
++ * In all the entrypoints, we undo all that to make it look like a
++ * CPU-generated syscall/sysenter and jump to the normal entrypoint.
+  */
+ 
+ .macro undo_xen_syscall
+-	mov 0*8(%rsp),%rcx
+-	mov 1*8(%rsp),%r11
+-	mov 5*8(%rsp),%rsp
++	mov 0*8(%rsp), %rcx
++	mov 1*8(%rsp), %r11
++	mov 5*8(%rsp), %rsp
+ .endm
+ 
+ /* Normal 64-bit system call target */
+@@ -275,7 +149,7 @@ ENDPROC(xen_sysenter_target)
+ 
+ ENTRY(xen_syscall32_target)
+ ENTRY(xen_sysenter_target)
+-	lea 16(%rsp), %rsp	/* strip %rcx,%r11 */
++	lea 16(%rsp), %rsp	/* strip %rcx, %r11 */
+ 	mov $-ENOSYS, %rax
+ 	pushq $VGCF_in_syscall
+ 	jmp hypercall_iret
+Index: linux-2.6-tip/arch/x86/xen/xen-head.S
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/xen-head.S
++++ linux-2.6-tip/arch/x86/xen/xen-head.S
+@@ -8,7 +8,7 @@
+ 
+ #include <asm/boot.h>
+ #include <asm/asm.h>
+-#include <asm/page.h>
++#include <asm/page_types.h>
+ 
+ #include <xen/interface/elfnote.h>
+ #include <asm/xen/interface.h>
+Index: linux-2.6-tip/arch/x86/xen/xen-ops.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/xen/xen-ops.h
++++ linux-2.6-tip/arch/x86/xen/xen-ops.h
+@@ -10,9 +10,12 @@
+ extern const char xen_hypervisor_callback[];
+ extern const char xen_failsafe_callback[];
+ 
++extern void *xen_initial_gdt;
++
+ struct trap_info;
+ void xen_copy_trap_info(struct trap_info *traps);
+ 
++DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info);
+ DECLARE_PER_CPU(unsigned long, xen_cr3);
+ DECLARE_PER_CPU(unsigned long, xen_current_cr3);
+ 
+@@ -22,6 +25,13 @@ extern struct shared_info *HYPERVISOR_sh
+ 
+ void xen_setup_mfn_list_list(void);
+ void xen_setup_shared_info(void);
++void xen_setup_machphys_mapping(void);
++pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
++void xen_ident_map_ISA(void);
++void xen_reserve_top(void);
++
++void xen_leave_lazy(void);
++void xen_post_allocator_init(void);
+ 
+ char * __init xen_memory_setup(void);
+ void __init xen_arch_setup(void);
+Index: linux-2.6-tip/arch/xtensa/include/asm/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/xtensa/include/asm/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/arch/xtensa/include/asm/swab.h
+===================================================================
+--- linux-2.6-tip.orig/arch/xtensa/include/asm/swab.h
++++ linux-2.6-tip/arch/xtensa/include/asm/swab.h
+@@ -11,7 +11,7 @@
+ #ifndef _XTENSA_SWAB_H
+ #define _XTENSA_SWAB_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ #include <linux/compiler.h>
+ 
+ #define __SWAB_64_THRU_32__
+Index: linux-2.6-tip/arch/xtensa/kernel/irq.c
+===================================================================
+--- linux-2.6-tip.orig/arch/xtensa/kernel/irq.c
++++ linux-2.6-tip/arch/xtensa/kernel/irq.c
+@@ -99,7 +99,7 @@ int show_interrupts(struct seq_file *p, 
+ 		seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+ 		for_each_online_cpu(j)
+-			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
++			seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
+ #endif
+ 		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+ 		seq_printf(p, "  %s", action->name);
+Index: linux-2.6-tip/block/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/block/Kconfig
++++ linux-2.6-tip/block/Kconfig
+@@ -44,22 +44,6 @@ config LBD
+ 
+ 	  If unsure, say N.
+ 
+-config BLK_DEV_IO_TRACE
+-	bool "Support for tracing block io actions"
+-	depends on SYSFS
+-	select RELAY
+-	select DEBUG_FS
+-	select TRACEPOINTS
+-	help
+-	  Say Y here if you want to be able to trace the block layer actions
+-	  on a given queue. Tracing allows you to see any traffic happening
+-	  on a block device queue. For more information (and the userspace
+-	  support tools needed), fetch the blktrace tools from:
+-
+-	  git://git.kernel.dk/blktrace.git
+-
+-	  If unsure, say N.
+-
+ config BLK_DEV_BSG
+ 	bool "Block layer SG support v4 (EXPERIMENTAL)"
+ 	depends on EXPERIMENTAL
+Index: linux-2.6-tip/block/Makefile
+===================================================================
+--- linux-2.6-tip.orig/block/Makefile
++++ linux-2.6-tip/block/Makefile
+@@ -13,6 +13,5 @@ obj-$(CONFIG_IOSCHED_AS)	+= as-iosched.o
+ obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
+ obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
+ 
+-obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
+ obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
+ obj-$(CONFIG_BLK_DEV_INTEGRITY)	+= blk-integrity.o
+Index: linux-2.6-tip/block/blk-softirq.c
+===================================================================
+--- linux-2.6-tip.orig/block/blk-softirq.c
++++ linux-2.6-tip/block/blk-softirq.c
+@@ -64,7 +64,7 @@ static int raise_blk_irq(int cpu, struct
+ 		data->info = rq;
+ 		data->flags = 0;
+ 
+-		__smp_call_function_single(cpu, data);
++		__smp_call_function_single(cpu, data, 0);
+ 		return 0;
+ 	}
+ 
+Index: linux-2.6-tip/block/blk.h
+===================================================================
+--- linux-2.6-tip.orig/block/blk.h
++++ linux-2.6-tip/block/blk.h
+@@ -102,7 +102,7 @@ static inline int blk_cpu_to_group(int c
+ 	const struct cpumask *mask = cpu_coregroup_mask(cpu);
+ 	return cpumask_first(mask);
+ #elif defined(CONFIG_SCHED_SMT)
+-	return first_cpu(per_cpu(cpu_sibling_map, cpu));
++	return cpumask_first(topology_thread_cpumask(cpu));
+ #else
+ 	return cpu;
+ #endif
+Index: linux-2.6-tip/block/blktrace.c
+===================================================================
+--- linux-2.6-tip.orig/block/blktrace.c
++++ /dev/null
+@@ -1,860 +0,0 @@
+-/*
+- * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
+- *
+- * This program is free software; you can redistribute it and/or modify
+- * it under the terms of the GNU General Public License version 2 as
+- * published by the Free Software Foundation.
+- *
+- * This program is distributed in the hope that it will be useful,
+- * but WITHOUT ANY WARRANTY; without even the implied warranty of
+- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+- * GNU General Public License for more details.
+- *
+- * You should have received a copy of the GNU General Public License
+- * along with this program; if not, write to the Free Software
+- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+- *
+- */
+-#include <linux/kernel.h>
+-#include <linux/blkdev.h>
+-#include <linux/blktrace_api.h>
+-#include <linux/percpu.h>
+-#include <linux/init.h>
+-#include <linux/mutex.h>
+-#include <linux/debugfs.h>
+-#include <linux/time.h>
+-#include <trace/block.h>
+-#include <asm/uaccess.h>
+-
+-static unsigned int blktrace_seq __read_mostly = 1;
+-
+-/* Global reference count of probes */
+-static DEFINE_MUTEX(blk_probe_mutex);
+-static atomic_t blk_probes_ref = ATOMIC_INIT(0);
+-
+-static int blk_register_tracepoints(void);
+-static void blk_unregister_tracepoints(void);
+-
+-/*
+- * Send out a notify message.
+- */
+-static void trace_note(struct blk_trace *bt, pid_t pid, int action,
+-		       const void *data, size_t len)
+-{
+-	struct blk_io_trace *t;
+-
+-	t = relay_reserve(bt->rchan, sizeof(*t) + len);
+-	if (t) {
+-		const int cpu = smp_processor_id();
+-
+-		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+-		t->time = ktime_to_ns(ktime_get());
+-		t->device = bt->dev;
+-		t->action = action;
+-		t->pid = pid;
+-		t->cpu = cpu;
+-		t->pdu_len = len;
+-		memcpy((void *) t + sizeof(*t), data, len);
+-	}
+-}
+-
+-/*
+- * Send out a notify for this process, if we haven't done so since a trace
+- * started
+- */
+-static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
+-{
+-	tsk->btrace_seq = blktrace_seq;
+-	trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
+-}
+-
+-static void trace_note_time(struct blk_trace *bt)
+-{
+-	struct timespec now;
+-	unsigned long flags;
+-	u32 words[2];
+-
+-	getnstimeofday(&now);
+-	words[0] = now.tv_sec;
+-	words[1] = now.tv_nsec;
+-
+-	local_irq_save(flags);
+-	trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
+-	local_irq_restore(flags);
+-}
+-
+-void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
+-{
+-	int n;
+-	va_list args;
+-	unsigned long flags;
+-	char *buf;
+-
+-	local_irq_save(flags);
+-	buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
+-	va_start(args, fmt);
+-	n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
+-	va_end(args);
+-
+-	trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
+-	local_irq_restore(flags);
+-}
+-EXPORT_SYMBOL_GPL(__trace_note_message);
+-
+-static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
+-			 pid_t pid)
+-{
+-	if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
+-		return 1;
+-	if (sector < bt->start_lba || sector > bt->end_lba)
+-		return 1;
+-	if (bt->pid && pid != bt->pid)
+-		return 1;
+-
+-	return 0;
+-}
+-
+-/*
+- * Data direction bit lookup
+- */
+-static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK_TC_WRITE) };
+-
+-/* The ilog2() calls fall out because they're constant */
+-#define MASK_TC_BIT(rw, __name) ( (rw & (1 << BIO_RW_ ## __name)) << \
+-	  (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name) )
+-
+-/*
+- * The worker for the various blk_add_trace*() types. Fills out a
+- * blk_io_trace structure and places it in a per-cpu subbuffer.
+- */
+-static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
+-		     int rw, u32 what, int error, int pdu_len, void *pdu_data)
+-{
+-	struct task_struct *tsk = current;
+-	struct blk_io_trace *t;
+-	unsigned long flags;
+-	unsigned long *sequence;
+-	pid_t pid;
+-	int cpu;
+-
+-	if (unlikely(bt->trace_state != Blktrace_running))
+-		return;
+-
+-	what |= ddir_act[rw & WRITE];
+-	what |= MASK_TC_BIT(rw, BARRIER);
+-	what |= MASK_TC_BIT(rw, SYNCIO);
+-	what |= MASK_TC_BIT(rw, AHEAD);
+-	what |= MASK_TC_BIT(rw, META);
+-	what |= MASK_TC_BIT(rw, DISCARD);
+-
+-	pid = tsk->pid;
+-	if (unlikely(act_log_check(bt, what, sector, pid)))
+-		return;
+-
+-	/*
+-	 * A word about the locking here - we disable interrupts to reserve
+-	 * some space in the relay per-cpu buffer, to prevent an irq
+-	 * from coming in and stepping on our toes.
+-	 */
+-	local_irq_save(flags);
+-
+-	if (unlikely(tsk->btrace_seq != blktrace_seq))
+-		trace_note_tsk(bt, tsk);
+-
+-	t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
+-	if (t) {
+-		cpu = smp_processor_id();
+-		sequence = per_cpu_ptr(bt->sequence, cpu);
+-
+-		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
+-		t->sequence = ++(*sequence);
+-		t->time = ktime_to_ns(ktime_get());
+-		t->sector = sector;
+-		t->bytes = bytes;
+-		t->action = what;
+-		t->pid = pid;
+-		t->device = bt->dev;
+-		t->cpu = cpu;
+-		t->error = error;
+-		t->pdu_len = pdu_len;
+-
+-		if (pdu_len)
+-			memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
+-	}
+-
+-	local_irq_restore(flags);
+-}
+-
+-static struct dentry *blk_tree_root;
+-static DEFINE_MUTEX(blk_tree_mutex);
+-
+-static void blk_trace_cleanup(struct blk_trace *bt)
+-{
+-	debugfs_remove(bt->msg_file);
+-	debugfs_remove(bt->dropped_file);
+-	relay_close(bt->rchan);
+-	free_percpu(bt->sequence);
+-	free_percpu(bt->msg_data);
+-	kfree(bt);
+-	mutex_lock(&blk_probe_mutex);
+-	if (atomic_dec_and_test(&blk_probes_ref))
+-		blk_unregister_tracepoints();
+-	mutex_unlock(&blk_probe_mutex);
+-}
+-
+-int blk_trace_remove(struct request_queue *q)
+-{
+-	struct blk_trace *bt;
+-
+-	bt = xchg(&q->blk_trace, NULL);
+-	if (!bt)
+-		return -EINVAL;
+-
+-	if (bt->trace_state == Blktrace_setup ||
+-	    bt->trace_state == Blktrace_stopped)
+-		blk_trace_cleanup(bt);
+-
+-	return 0;
+-}
+-EXPORT_SYMBOL_GPL(blk_trace_remove);
+-
+-static int blk_dropped_open(struct inode *inode, struct file *filp)
+-{
+-	filp->private_data = inode->i_private;
+-
+-	return 0;
+-}
+-
+-static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
+-				size_t count, loff_t *ppos)
+-{
+-	struct blk_trace *bt = filp->private_data;
+-	char buf[16];
+-
+-	snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
+-
+-	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+-}
+-
+-static const struct file_operations blk_dropped_fops = {
+-	.owner =	THIS_MODULE,
+-	.open =		blk_dropped_open,
+-	.read =		blk_dropped_read,
+-};
+-
+-static int blk_msg_open(struct inode *inode, struct file *filp)
+-{
+-	filp->private_data = inode->i_private;
+-
+-	return 0;
+-}
+-
+-static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
+-				size_t count, loff_t *ppos)
+-{
+-	char *msg;
+-	struct blk_trace *bt;
+-
+-	if (count > BLK_TN_MAX_MSG)
+-		return -EINVAL;
+-
+-	msg = kmalloc(count, GFP_KERNEL);
+-	if (msg == NULL)
+-		return -ENOMEM;
+-
+-	if (copy_from_user(msg, buffer, count)) {
+-		kfree(msg);
+-		return -EFAULT;
+-	}
+-
+-	bt = filp->private_data;
+-	__trace_note_message(bt, "%s", msg);
+-	kfree(msg);
+-
+-	return count;
+-}
+-
+-static const struct file_operations blk_msg_fops = {
+-	.owner =	THIS_MODULE,
+-	.open =		blk_msg_open,
+-	.write =	blk_msg_write,
+-};
+-
+-/*
+- * Keep track of how many times we encountered a full subbuffer, to aid
+- * the user space app in telling how many lost events there were.
+- */
+-static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
+-				     void *prev_subbuf, size_t prev_padding)
+-{
+-	struct blk_trace *bt;
+-
+-	if (!relay_buf_full(buf))
+-		return 1;
+-
+-	bt = buf->chan->private_data;
+-	atomic_inc(&bt->dropped);
+-	return 0;
+-}
+-
+-static int blk_remove_buf_file_callback(struct dentry *dentry)
+-{
+-	struct dentry *parent = dentry->d_parent;
+-	debugfs_remove(dentry);
+-
+-	/*
+-	* this will fail for all but the last file, but that is ok. what we
+-	* care about is the top level buts->name directory going away, when
+-	* the last trace file is gone. Then we don't have to rmdir() that
+-	* manually on trace stop, so it nicely solves the issue with
+-	* force killing of running traces.
+-	*/
+-
+-	debugfs_remove(parent);
+-	return 0;
+-}
+-
+-static struct dentry *blk_create_buf_file_callback(const char *filename,
+-						   struct dentry *parent,
+-						   int mode,
+-						   struct rchan_buf *buf,
+-						   int *is_global)
+-{
+-	return debugfs_create_file(filename, mode, parent, buf,
+-					&relay_file_operations);
+-}
+-
+-static struct rchan_callbacks blk_relay_callbacks = {
+-	.subbuf_start		= blk_subbuf_start_callback,
+-	.create_buf_file	= blk_create_buf_file_callback,
+-	.remove_buf_file	= blk_remove_buf_file_callback,
+-};
+-
+-/*
+- * Setup everything required to start tracing
+- */
+-int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+-			struct blk_user_trace_setup *buts)
+-{
+-	struct blk_trace *old_bt, *bt = NULL;
+-	struct dentry *dir = NULL;
+-	int ret, i;
+-
+-	if (!buts->buf_size || !buts->buf_nr)
+-		return -EINVAL;
+-
+-	strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
+-	buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
+-
+-	/*
+-	 * some device names have larger paths - convert the slashes
+-	 * to underscores for this to work as expected
+-	 */
+-	for (i = 0; i < strlen(buts->name); i++)
+-		if (buts->name[i] == '/')
+-			buts->name[i] = '_';
+-
+-	ret = -ENOMEM;
+-	bt = kzalloc(sizeof(*bt), GFP_KERNEL);
+-	if (!bt)
+-		goto err;
+-
+-	bt->sequence = alloc_percpu(unsigned long);
+-	if (!bt->sequence)
+-		goto err;
+-
+-	bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG);
+-	if (!bt->msg_data)
+-		goto err;
+-
+-	ret = -ENOENT;
+-
+-	if (!blk_tree_root) {
+-		blk_tree_root = debugfs_create_dir("block", NULL);
+-		if (!blk_tree_root)
+-			return -ENOMEM;
+-	}
+-
+-	dir = debugfs_create_dir(buts->name, blk_tree_root);
+-
+-	if (!dir)
+-		goto err;
+-
+-	bt->dir = dir;
+-	bt->dev = dev;
+-	atomic_set(&bt->dropped, 0);
+-
+-	ret = -EIO;
+-	bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
+-	if (!bt->dropped_file)
+-		goto err;
+-
+-	bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
+-	if (!bt->msg_file)
+-		goto err;
+-
+-	bt->rchan = relay_open("trace", dir, buts->buf_size,
+-				buts->buf_nr, &blk_relay_callbacks, bt);
+-	if (!bt->rchan)
+-		goto err;
+-
+-	bt->act_mask = buts->act_mask;
+-	if (!bt->act_mask)
+-		bt->act_mask = (u16) -1;
+-
+-	bt->start_lba = buts->start_lba;
+-	bt->end_lba = buts->end_lba;
+-	if (!bt->end_lba)
+-		bt->end_lba = -1ULL;
+-
+-	bt->pid = buts->pid;
+-	bt->trace_state = Blktrace_setup;
+-
+-	mutex_lock(&blk_probe_mutex);
+-	if (atomic_add_return(1, &blk_probes_ref) == 1) {
+-		ret = blk_register_tracepoints();
+-		if (ret)
+-			goto probe_err;
+-	}
+-	mutex_unlock(&blk_probe_mutex);
+-
+-	ret = -EBUSY;
+-	old_bt = xchg(&q->blk_trace, bt);
+-	if (old_bt) {
+-		(void) xchg(&q->blk_trace, old_bt);
+-		goto err;
+-	}
+-
+-	return 0;
+-probe_err:
+-	atomic_dec(&blk_probes_ref);
+-	mutex_unlock(&blk_probe_mutex);
+-err:
+-	if (bt) {
+-		if (bt->msg_file)
+-			debugfs_remove(bt->msg_file);
+-		if (bt->dropped_file)
+-			debugfs_remove(bt->dropped_file);
+-		free_percpu(bt->sequence);
+-		free_percpu(bt->msg_data);
+-		if (bt->rchan)
+-			relay_close(bt->rchan);
+-		kfree(bt);
+-	}
+-	return ret;
+-}
+-
+-int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
+-		    char __user *arg)
+-{
+-	struct blk_user_trace_setup buts;
+-	int ret;
+-
+-	ret = copy_from_user(&buts, arg, sizeof(buts));
+-	if (ret)
+-		return -EFAULT;
+-
+-	ret = do_blk_trace_setup(q, name, dev, &buts);
+-	if (ret)
+-		return ret;
+-
+-	if (copy_to_user(arg, &buts, sizeof(buts)))
+-		return -EFAULT;
+-
+-	return 0;
+-}
+-EXPORT_SYMBOL_GPL(blk_trace_setup);
+-
+-int blk_trace_startstop(struct request_queue *q, int start)
+-{
+-	struct blk_trace *bt;
+-	int ret;
+-
+-	if ((bt = q->blk_trace) == NULL)
+-		return -EINVAL;
+-
+-	/*
+-	 * For starting a trace, we can transition from a setup or stopped
+-	 * trace. For stopping a trace, the state must be running
+-	 */
+-	ret = -EINVAL;
+-	if (start) {
+-		if (bt->trace_state == Blktrace_setup ||
+-		    bt->trace_state == Blktrace_stopped) {
+-			blktrace_seq++;
+-			smp_mb();
+-			bt->trace_state = Blktrace_running;
+-
+-			trace_note_time(bt);
+-			ret = 0;
+-		}
+-	} else {
+-		if (bt->trace_state == Blktrace_running) {
+-			bt->trace_state = Blktrace_stopped;
+-			relay_flush(bt->rchan);
+-			ret = 0;
+-		}
+-	}
+-
+-	return ret;
+-}
+-EXPORT_SYMBOL_GPL(blk_trace_startstop);
+-
+-/**
+- * blk_trace_ioctl: - handle the ioctls associated with tracing
+- * @bdev:	the block device
+- * @cmd: 	the ioctl cmd
+- * @arg:	the argument data, if any
+- *
+- **/
+-int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
+-{
+-	struct request_queue *q;
+-	int ret, start = 0;
+-	char b[BDEVNAME_SIZE];
+-
+-	q = bdev_get_queue(bdev);
+-	if (!q)
+-		return -ENXIO;
+-
+-	mutex_lock(&bdev->bd_mutex);
+-
+-	switch (cmd) {
+-	case BLKTRACESETUP:
+-		bdevname(bdev, b);
+-		ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
+-		break;
+-	case BLKTRACESTART:
+-		start = 1;
+-	case BLKTRACESTOP:
+-		ret = blk_trace_startstop(q, start);
+-		break;
+-	case BLKTRACETEARDOWN:
+-		ret = blk_trace_remove(q);
+-		break;
+-	default:
+-		ret = -ENOTTY;
+-		break;
+-	}
+-
+-	mutex_unlock(&bdev->bd_mutex);
+-	return ret;
+-}
+-
+-/**
+- * blk_trace_shutdown: - stop and cleanup trace structures
+- * @q:    the request queue associated with the device
+- *
+- **/
+-void blk_trace_shutdown(struct request_queue *q)
+-{
+-	if (q->blk_trace) {
+-		blk_trace_startstop(q, 0);
+-		blk_trace_remove(q);
+-	}
+-}
+-
+-/*
+- * blktrace probes
+- */
+-
+-/**
+- * blk_add_trace_rq - Add a trace for a request oriented action
+- * @q:		queue the io is for
+- * @rq:		the source request
+- * @what:	the action
+- *
+- * Description:
+- *     Records an action against a request. Will log the bio offset + size.
+- *
+- **/
+-static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
+-				    u32 what)
+-{
+-	struct blk_trace *bt = q->blk_trace;
+-	int rw = rq->cmd_flags & 0x03;
+-
+-	if (likely(!bt))
+-		return;
+-
+-	if (blk_discard_rq(rq))
+-		rw |= (1 << BIO_RW_DISCARD);
+-
+-	if (blk_pc_request(rq)) {
+-		what |= BLK_TC_ACT(BLK_TC_PC);
+-		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
+-				sizeof(rq->cmd), rq->cmd);
+-	} else  {
+-		what |= BLK_TC_ACT(BLK_TC_FS);
+-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+-				rw, what, rq->errors, 0, NULL);
+-	}
+-}
+-
+-static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
+-{
+-	blk_add_trace_rq(q, rq, BLK_TA_ABORT);
+-}
+-
+-static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
+-{
+-	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
+-}
+-
+-static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
+-{
+-	blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
+-}
+-
+-static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq)
+-{
+-	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
+-}
+-
+-static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq)
+-{
+-	blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
+-}
+-
+-/**
+- * blk_add_trace_bio - Add a trace for a bio oriented action
+- * @q:		queue the io is for
+- * @bio:	the source bio
+- * @what:	the action
+- *
+- * Description:
+- *     Records an action against a bio. Will log the bio offset + size.
+- *
+- **/
+-static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
+-				     u32 what)
+-{
+-	struct blk_trace *bt = q->blk_trace;
+-
+-	if (likely(!bt))
+-		return;
+-
+-	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
+-			!bio_flagged(bio, BIO_UPTODATE), 0, NULL);
+-}
+-
+-static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
+-{
+-	blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
+-}
+-
+-static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
+-{
+-	blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
+-}
+-
+-static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio)
+-{
+-	blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
+-}
+-
+-static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio)
+-{
+-	blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
+-}
+-
+-static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
+-{
+-	blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
+-}
+-
+-static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw)
+-{
+-	if (bio)
+-		blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
+-	else {
+-		struct blk_trace *bt = q->blk_trace;
+-
+-		if (bt)
+-			__blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
+-	}
+-}
+-
+-
+-static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw)
+-{
+-	if (bio)
+-		blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
+-	else {
+-		struct blk_trace *bt = q->blk_trace;
+-
+-		if (bt)
+-			__blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL);
+-	}
+-}
+-
+-static void blk_add_trace_plug(struct request_queue *q)
+-{
+-	struct blk_trace *bt = q->blk_trace;
+-
+-	if (bt)
+-		__blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
+-}
+-
+-static void blk_add_trace_unplug_io(struct request_queue *q)
+-{
+-	struct blk_trace *bt = q->blk_trace;
+-
+-	if (bt) {
+-		unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+-		__be64 rpdu = cpu_to_be64(pdu);
+-
+-		__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
+-				sizeof(rpdu), &rpdu);
+-	}
+-}
+-
+-static void blk_add_trace_unplug_timer(struct request_queue *q)
+-{
+-	struct blk_trace *bt = q->blk_trace;
+-
+-	if (bt) {
+-		unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
+-		__be64 rpdu = cpu_to_be64(pdu);
+-
+-		__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
+-				sizeof(rpdu), &rpdu);
+-	}
+-}
+-
+-static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
+-				unsigned int pdu)
+-{
+-	struct blk_trace *bt = q->blk_trace;
+-
+-	if (bt) {
+-		__be64 rpdu = cpu_to_be64(pdu);
+-
+-		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
+-				BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
+-				sizeof(rpdu), &rpdu);
+-	}
+-}
+-
+-/**
+- * blk_add_trace_remap - Add a trace for a remap operation
+- * @q:		queue the io is for
+- * @bio:	the source bio
+- * @dev:	target device
+- * @from:	source sector
+- * @to:		target sector
+- *
+- * Description:
+- *     Device mapper or raid target sometimes need to split a bio because
+- *     it spans a stripe (or similar). Add a trace for that action.
+- *
+- **/
+-static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
+-				       dev_t dev, sector_t from, sector_t to)
+-{
+-	struct blk_trace *bt = q->blk_trace;
+-	struct blk_io_trace_remap r;
+-
+-	if (likely(!bt))
+-		return;
+-
+-	r.device = cpu_to_be32(dev);
+-	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
+-	r.sector = cpu_to_be64(to);
+-
+-	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
+-			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
+-}
+-
+-/**
+- * blk_add_driver_data - Add binary message with driver-specific data
+- * @q:		queue the io is for
+- * @rq:		io request
+- * @data:	driver-specific data
+- * @len:	length of driver-specific data
+- *
+- * Description:
+- *     Some drivers might want to write driver-specific data per request.
+- *
+- **/
+-void blk_add_driver_data(struct request_queue *q,
+-			 struct request *rq,
+-			 void *data, size_t len)
+-{
+-	struct blk_trace *bt = q->blk_trace;
+-
+-	if (likely(!bt))
+-		return;
+-
+-	if (blk_pc_request(rq))
+-		__blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
+-				rq->errors, len, data);
+-	else
+-		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
+-				0, BLK_TA_DRV_DATA, rq->errors, len, data);
+-}
+-EXPORT_SYMBOL_GPL(blk_add_driver_data);
+-
+-static int blk_register_tracepoints(void)
+-{
+-	int ret;
+-
+-	ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
+-	WARN_ON(ret);
+-	ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
+-	WARN_ON(ret);
+-	ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
+-	WARN_ON(ret);
+-	ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
+-	WARN_ON(ret);
+-	ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
+-	WARN_ON(ret);
+-	ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
+-	WARN_ON(ret);
+-	ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
+-	WARN_ON(ret);
+-	ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
+-	WARN_ON(ret);
+-	ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
+-	WARN_ON(ret);
+-	ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
+-	WARN_ON(ret);
+-	ret = register_trace_block_getrq(blk_add_trace_getrq);
+-	WARN_ON(ret);
+-	ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
+-	WARN_ON(ret);
+-	ret = register_trace_block_plug(blk_add_trace_plug);
+-	WARN_ON(ret);
+-	ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
+-	WARN_ON(ret);
+-	ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
+-	WARN_ON(ret);
+-	ret = register_trace_block_split(blk_add_trace_split);
+-	WARN_ON(ret);
+-	ret = register_trace_block_remap(blk_add_trace_remap);
+-	WARN_ON(ret);
+-	return 0;
+-}
+-
+-static void blk_unregister_tracepoints(void)
+-{
+-	unregister_trace_block_remap(blk_add_trace_remap);
+-	unregister_trace_block_split(blk_add_trace_split);
+-	unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
+-	unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
+-	unregister_trace_block_plug(blk_add_trace_plug);
+-	unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
+-	unregister_trace_block_getrq(blk_add_trace_getrq);
+-	unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
+-	unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
+-	unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
+-	unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
+-	unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
+-	unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
+-	unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
+-	unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
+-	unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
+-	unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
+-
+-	tracepoint_synchronize_unregister();
+-}
+Index: linux-2.6-tip/block/bsg.c
+===================================================================
+--- linux-2.6-tip.orig/block/bsg.c
++++ linux-2.6-tip/block/bsg.c
+@@ -249,7 +249,7 @@ bsg_map_hdr(struct bsg_device *bd, struc
+ {
+ 	struct request_queue *q = bd->queue;
+ 	struct request *rq, *next_rq = NULL;
+-	int ret, rw;
++	int ret, uninitialized_var(rw);
+ 	unsigned int dxfer_len;
+ 	void *dxferp = NULL;
+ 
+Index: linux-2.6-tip/block/cfq-iosched.c
+===================================================================
+--- linux-2.6-tip.orig/block/cfq-iosched.c
++++ linux-2.6-tip/block/cfq-iosched.c
+@@ -1539,6 +1539,7 @@ cfq_async_queue_prio(struct cfq_data *cf
+ 		return &cfqd->async_idle_cfqq;
+ 	default:
+ 		BUG();
++		return NULL;
+ 	}
+ }
+ 
+Index: linux-2.6-tip/crypto/xor.c
+===================================================================
+--- linux-2.6-tip.orig/crypto/xor.c
++++ linux-2.6-tip/crypto/xor.c
+@@ -101,7 +101,12 @@ calibrate_xor_blocks(void)
+ 	void *b1, *b2;
+ 	struct xor_block_template *f, *fastest;
+ 
+-	b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
++	/*
++	 * Note: Since the memory is not actually used for _anything_ but to
++	 * test the XOR speed, we don't really want kmemcheck to warn about
++	 * reading uninitialized bytes here.
++	 */
++	b1 = (void *) __get_free_pages(GFP_KERNEL | __GFP_NOTRACK, 2);
+ 	if (!b1) {
+ 		printk(KERN_WARNING "xor: Yikes!  No memory available.\n");
+ 		return -ENOMEM;
+Index: linux-2.6-tip/drivers/acpi/acpica/exprep.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/acpica/exprep.c
++++ linux-2.6-tip/drivers/acpi/acpica/exprep.c
+@@ -320,7 +320,7 @@ acpi_ex_prep_common_field_object(union a
+ 				 u32 field_bit_position, u32 field_bit_length)
+ {
+ 	u32 access_bit_width;
+-	u32 byte_alignment;
++	u32 uninitialized_var(byte_alignment);
+ 	u32 nearest_byte_address;
+ 
+ 	ACPI_FUNCTION_TRACE(ex_prep_common_field_object);
+Index: linux-2.6-tip/drivers/acpi/acpica/nsxfeval.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/acpica/nsxfeval.c
++++ linux-2.6-tip/drivers/acpi/acpica/nsxfeval.c
+@@ -469,6 +469,9 @@ acpi_walk_namespace(acpi_object_type typ
+ 
+ 	ACPI_FUNCTION_TRACE(acpi_walk_namespace);
+ 
++	if (acpi_disabled)
++		return_ACPI_STATUS(AE_NO_NAMESPACE);
++
+ 	/* Parameter validation */
+ 
+ 	if ((type > ACPI_TYPE_LOCAL_MAX) || (!max_depth) || (!user_function)) {
+Index: linux-2.6-tip/drivers/acpi/acpica/tbxface.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/acpica/tbxface.c
++++ linux-2.6-tip/drivers/acpi/acpica/tbxface.c
+@@ -365,7 +365,7 @@ ACPI_EXPORT_SYMBOL(acpi_unload_table_id)
+ 
+ /*******************************************************************************
+  *
+- * FUNCTION:    acpi_get_table
++ * FUNCTION:    acpi_get_table_with_size
+  *
+  * PARAMETERS:  Signature           - ACPI signature of needed table
+  *              Instance            - Which instance (for SSDTs)
+@@ -377,8 +377,9 @@ ACPI_EXPORT_SYMBOL(acpi_unload_table_id)
+  *
+  *****************************************************************************/
+ acpi_status
+-acpi_get_table(char *signature,
+-	       u32 instance, struct acpi_table_header **out_table)
++acpi_get_table_with_size(char *signature,
++	       u32 instance, struct acpi_table_header **out_table,
++	       acpi_size *tbl_size)
+ {
+        u32 i;
+        u32 j;
+@@ -408,6 +409,7 @@ acpi_get_table(char *signature,
+ 		    acpi_tb_verify_table(&acpi_gbl_root_table_list.tables[i]);
+ 		if (ACPI_SUCCESS(status)) {
+ 			*out_table = acpi_gbl_root_table_list.tables[i].pointer;
++			*tbl_size = acpi_gbl_root_table_list.tables[i].length;
+ 		}
+ 
+ 		if (!acpi_gbl_permanent_mmap) {
+@@ -420,6 +422,15 @@ acpi_get_table(char *signature,
+ 	return (AE_NOT_FOUND);
+ }
+ 
++acpi_status
++acpi_get_table(char *signature,
++	       u32 instance, struct acpi_table_header **out_table)
++{
++	acpi_size tbl_size;
++
++	return acpi_get_table_with_size(signature,
++		       instance, out_table, &tbl_size);
++}
+ ACPI_EXPORT_SYMBOL(acpi_get_table)
+ 
+ /*******************************************************************************
+Index: linux-2.6-tip/drivers/acpi/osl.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/osl.c
++++ linux-2.6-tip/drivers/acpi/osl.c
+@@ -272,14 +272,21 @@ acpi_os_map_memory(acpi_physical_address
+ }
+ EXPORT_SYMBOL_GPL(acpi_os_map_memory);
+ 
+-void acpi_os_unmap_memory(void __iomem * virt, acpi_size size)
++void __ref acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
+ {
+-	if (acpi_gbl_permanent_mmap) {
++	if (acpi_gbl_permanent_mmap)
+ 		iounmap(virt);
+-	}
++	else
++		__acpi_unmap_table(virt, size);
+ }
+ EXPORT_SYMBOL_GPL(acpi_os_unmap_memory);
+ 
++void __init early_acpi_os_unmap_memory(void __iomem *virt, acpi_size size)
++{
++	if (!acpi_gbl_permanent_mmap)
++		__acpi_unmap_table(virt, size);
++}
++
+ #ifdef ACPI_FUTURE_USAGE
+ acpi_status
+ acpi_os_get_physical_address(void *virt, acpi_physical_address * phys)
+@@ -792,12 +799,12 @@ void acpi_os_delete_lock(acpi_spinlock h
+ acpi_status
+ acpi_os_create_semaphore(u32 max_units, u32 initial_units, acpi_handle * handle)
+ {
+-	struct semaphore *sem = NULL;
++	struct compat_semaphore *sem = NULL;
+ 
+-	sem = acpi_os_allocate(sizeof(struct semaphore));
++	sem = acpi_os_allocate(sizeof(struct compat_semaphore));
+ 	if (!sem)
+ 		return AE_NO_MEMORY;
+-	memset(sem, 0, sizeof(struct semaphore));
++	memset(sem, 0, sizeof(struct compat_semaphore));
+ 
+ 	sema_init(sem, initial_units);
+ 
+@@ -818,7 +825,7 @@ acpi_os_create_semaphore(u32 max_units, 
+ 
+ acpi_status acpi_os_delete_semaphore(acpi_handle handle)
+ {
+-	struct semaphore *sem = (struct semaphore *)handle;
++	struct compat_semaphore *sem = (struct compat_semaphore *)handle;
+ 
+ 	if (!sem)
+ 		return AE_BAD_PARAMETER;
+@@ -838,7 +845,7 @@ acpi_status acpi_os_delete_semaphore(acp
+ acpi_status acpi_os_wait_semaphore(acpi_handle handle, u32 units, u16 timeout)
+ {
+ 	acpi_status status = AE_OK;
+-	struct semaphore *sem = (struct semaphore *)handle;
++	struct compat_semaphore *sem = (struct compat_semaphore *)handle;
+ 	long jiffies;
+ 	int ret = 0;
+ 
+@@ -879,7 +886,7 @@ acpi_status acpi_os_wait_semaphore(acpi_
+  */
+ acpi_status acpi_os_signal_semaphore(acpi_handle handle, u32 units)
+ {
+-	struct semaphore *sem = (struct semaphore *)handle;
++	struct compat_semaphore *sem = (struct compat_semaphore *)handle;
+ 
+ 	if (!sem || (units < 1))
+ 		return AE_BAD_PARAMETER;
+Index: linux-2.6-tip/drivers/acpi/processor_idle.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/processor_idle.c
++++ linux-2.6-tip/drivers/acpi/processor_idle.c
+@@ -828,8 +828,11 @@ static int acpi_idle_bm_check(void)
+  */
+ static inline void acpi_idle_do_entry(struct acpi_processor_cx *cx)
+ {
++	u64 perf_flags;
++
+ 	/* Don't trace irqs off for idle */
+ 	stop_critical_timings();
++	perf_flags = hw_perf_save_disable();
+ 	if (cx->entry_method == ACPI_CSTATE_FFH) {
+ 		/* Call into architectural FFH based C-state */
+ 		acpi_processor_ffh_cstate_enter(cx);
+@@ -844,6 +847,7 @@ static inline void acpi_idle_do_entry(st
+ 		   gets asserted in time to freeze execution properly. */
+ 		unused = inl(acpi_gbl_FADT.xpm_timer_block.address);
+ 	}
++	hw_perf_restore(perf_flags);
+ 	start_critical_timings();
+ }
+ 
+@@ -958,7 +962,7 @@ static int acpi_idle_enter_simple(struct
+ }
+ 
+ static int c3_cpu_count;
+-static DEFINE_SPINLOCK(c3_lock);
++static DEFINE_RAW_SPINLOCK(c3_lock);
+ 
+ /**
+  * acpi_idle_enter_bm - enters C3 with proper BM handling
+Index: linux-2.6-tip/drivers/acpi/processor_perflib.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/processor_perflib.c
++++ linux-2.6-tip/drivers/acpi/processor_perflib.c
+@@ -516,12 +516,12 @@ int acpi_processor_preregister_performan
+ 			continue;
+ 		}
+ 
+-		if (!performance || !percpu_ptr(performance, i)) {
++		if (!performance || !per_cpu_ptr(performance, i)) {
+ 			retval = -EINVAL;
+ 			continue;
+ 		}
+ 
+-		pr->performance = percpu_ptr(performance, i);
++		pr->performance = per_cpu_ptr(performance, i);
+ 		cpumask_set_cpu(i, pr->performance->shared_cpu_map);
+ 		if (acpi_processor_get_psd(pr)) {
+ 			retval = -EINVAL;
+Index: linux-2.6-tip/drivers/acpi/sbs.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/sbs.c
++++ linux-2.6-tip/drivers/acpi/sbs.c
+@@ -389,6 +389,8 @@ static int acpi_battery_get_state(struct
+ 	return result;
+ }
+ 
++#if defined(CONFIG_ACPI_SYSFS_POWER) || defined(CONFIG_ACPI_PROCFS_POWER)
++
+ static int acpi_battery_get_alarm(struct acpi_battery *battery)
+ {
+ 	return acpi_smbus_read(battery->sbs->hc, SMBUS_READ_WORD,
+@@ -425,6 +427,8 @@ static int acpi_battery_set_alarm(struct
+ 	return ret;
+ }
+ 
++#endif
++
+ static int acpi_ac_get_present(struct acpi_sbs *sbs)
+ {
+ 	int result;
+@@ -816,7 +820,10 @@ static int acpi_battery_add(struct acpi_
+ 
+ static void acpi_battery_remove(struct acpi_sbs *sbs, int id)
+ {
++#if defined(CONFIG_ACPI_SYSFS_POWER) || defined(CONFIG_ACPI_PROCFS_POWER)
+ 	struct acpi_battery *battery = &sbs->battery[id];
++#endif
++
+ #ifdef CONFIG_ACPI_SYSFS_POWER
+ 	if (battery->bat.dev) {
+ 		if (battery->have_sysfs_alarm)
+Index: linux-2.6-tip/drivers/acpi/sleep.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/sleep.c
++++ linux-2.6-tip/drivers/acpi/sleep.c
+@@ -24,6 +24,7 @@
+ #include "sleep.h"
+ 
+ u8 sleep_states[ACPI_S_STATE_COUNT];
++static u32 acpi_target_sleep_state = ACPI_STATE_S0;
+ 
+ static void acpi_sleep_tts_switch(u32 acpi_state)
+ {
+@@ -77,7 +78,6 @@ static int acpi_sleep_prepare(u32 acpi_s
+ }
+ 
+ #ifdef CONFIG_ACPI_SLEEP
+-static u32 acpi_target_sleep_state = ACPI_STATE_S0;
+ /*
+  * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the
+  * user to request that behavior by using the 'acpi_old_suspend_ordering'
+Index: linux-2.6-tip/drivers/acpi/tables.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/tables.c
++++ linux-2.6-tip/drivers/acpi/tables.c
+@@ -181,14 +181,15 @@ acpi_table_parse_entries(char *id,
+ 	struct acpi_subtable_header *entry;
+ 	unsigned int count = 0;
+ 	unsigned long table_end;
++	acpi_size tbl_size;
+ 
+ 	if (!handler)
+ 		return -EINVAL;
+ 
+ 	if (strncmp(id, ACPI_SIG_MADT, 4) == 0)
+-		acpi_get_table(id, acpi_apic_instance, &table_header);
++		acpi_get_table_with_size(id, acpi_apic_instance, &table_header, &tbl_size);
+ 	else
+-		acpi_get_table(id, 0, &table_header);
++		acpi_get_table_with_size(id, 0, &table_header, &tbl_size);
+ 
+ 	if (!table_header) {
+ 		printk(KERN_WARNING PREFIX "%4.4s not present\n", id);
+@@ -206,8 +207,10 @@ acpi_table_parse_entries(char *id,
+ 	       table_end) {
+ 		if (entry->type == entry_id
+ 		    && (!max_entries || count++ < max_entries))
+-			if (handler(entry, table_end))
++			if (handler(entry, table_end)) {
++				early_acpi_os_unmap_memory((char *)table_header, tbl_size);
+ 				return -EINVAL;
++			}
+ 
+ 		entry = (struct acpi_subtable_header *)
+ 		    ((unsigned long)entry + entry->length);
+@@ -217,6 +220,7 @@ acpi_table_parse_entries(char *id,
+ 		       "%i found\n", id, entry_id, count - max_entries, count);
+ 	}
+ 
++	early_acpi_os_unmap_memory((char *)table_header, tbl_size);
+ 	return count;
+ }
+ 
+@@ -241,17 +245,19 @@ acpi_table_parse_madt(enum acpi_madt_typ
+ int __init acpi_table_parse(char *id, acpi_table_handler handler)
+ {
+ 	struct acpi_table_header *table = NULL;
++	acpi_size tbl_size;
+ 
+ 	if (!handler)
+ 		return -EINVAL;
+ 
+ 	if (strncmp(id, ACPI_SIG_MADT, 4) == 0)
+-		acpi_get_table(id, acpi_apic_instance, &table);
++		acpi_get_table_with_size(id, acpi_apic_instance, &table, &tbl_size);
+ 	else
+-		acpi_get_table(id, 0, &table);
++		acpi_get_table_with_size(id, 0, &table, &tbl_size);
+ 
+ 	if (table) {
+ 		handler(table);
++		early_acpi_os_unmap_memory(table, tbl_size);
+ 		return 0;
+ 	} else
+ 		return 1;
+@@ -265,8 +271,9 @@ int __init acpi_table_parse(char *id, ac
+ static void __init check_multiple_madt(void)
+ {
+ 	struct acpi_table_header *table = NULL;
++	acpi_size tbl_size;
+ 
+-	acpi_get_table(ACPI_SIG_MADT, 2, &table);
++	acpi_get_table_with_size(ACPI_SIG_MADT, 2, &table, &tbl_size);
+ 	if (table) {
+ 		printk(KERN_WARNING PREFIX
+ 		       "BIOS bug: multiple APIC/MADT found,"
+@@ -275,6 +282,7 @@ static void __init check_multiple_madt(v
+ 		       "If \"acpi_apic_instance=%d\" works better, "
+ 		       "notify linux-acpi@vger.kernel.org\n",
+ 		       acpi_apic_instance ? 0 : 2);
++		early_acpi_os_unmap_memory(table, tbl_size);
+ 
+ 	} else
+ 		acpi_apic_instance = 0;
+Index: linux-2.6-tip/drivers/ata/libata-core.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ata/libata-core.c
++++ linux-2.6-tip/drivers/ata/libata-core.c
+@@ -1484,7 +1484,7 @@ static int ata_hpa_resize(struct ata_dev
+ 	struct ata_eh_context *ehc = &dev->link->eh_context;
+ 	int print_info = ehc->i.flags & ATA_EHI_PRINTINFO;
+ 	u64 sectors = ata_id_n_sectors(dev->id);
+-	u64 native_sectors;
++	u64 uninitialized_var(native_sectors);
+ 	int rc;
+ 
+ 	/* do we need to do it? */
+Index: linux-2.6-tip/drivers/ata/libata-scsi.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ata/libata-scsi.c
++++ linux-2.6-tip/drivers/ata/libata-scsi.c
+@@ -3247,7 +3247,7 @@ void ata_scsi_scan_host(struct ata_port 
+ 	int tries = 5;
+ 	struct ata_device *last_failed_dev = NULL;
+ 	struct ata_link *link;
+-	struct ata_device *dev;
++	struct ata_device *uninitialized_var(dev);
+ 
+ 	if (ap->flags & ATA_FLAG_DISABLED)
+ 		return;
+Index: linux-2.6-tip/drivers/ata/pata_atiixp.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ata/pata_atiixp.c
++++ linux-2.6-tip/drivers/ata/pata_atiixp.c
+@@ -140,7 +140,7 @@ static void atiixp_set_dmamode(struct at
+ 		wanted_pio = 3;
+ 	else if (adev->dma_mode == XFER_MW_DMA_0)
+ 		wanted_pio = 0;
+-	else BUG();
++	else panic("atiixp_set_dmamode: unknown DMA mode!");
+ 
+ 	if (adev->pio_mode != wanted_pio)
+ 		atiixp_set_pio_timing(ap, adev, wanted_pio);
+Index: linux-2.6-tip/drivers/ata/sata_via.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ata/sata_via.c
++++ linux-2.6-tip/drivers/ata/sata_via.c
+@@ -566,7 +566,7 @@ static int svia_init_one(struct pci_dev 
+ 	static int printed_version;
+ 	unsigned int i;
+ 	int rc;
+-	struct ata_host *host;
++	struct ata_host *uninitialized_var(host);
+ 	int board_id = (int) ent->driver_data;
+ 	const unsigned *bar_sizes;
+ 
+Index: linux-2.6-tip/drivers/atm/ambassador.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/atm/ambassador.c
++++ linux-2.6-tip/drivers/atm/ambassador.c
+@@ -2097,7 +2097,7 @@ static int __devinit amb_init (amb_dev *
+ {
+   loader_block lb;
+   
+-  u32 version;
++  u32 version = -1;
+   
+   if (amb_reset (dev, 1)) {
+     PRINTK (KERN_ERR, "card reset failed!");
+Index: linux-2.6-tip/drivers/atm/horizon.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/atm/horizon.c
++++ linux-2.6-tip/drivers/atm/horizon.c
+@@ -2131,7 +2131,7 @@ static int atm_pcr_check (struct atm_tra
+ static int hrz_open (struct atm_vcc *atm_vcc)
+ {
+   int error;
+-  u16 channel;
++  u16 uninitialized_var(channel);
+   
+   struct atm_qos * qos;
+   struct atm_trafprm * txtp;
+Index: linux-2.6-tip/drivers/base/cpu.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/base/cpu.c
++++ linux-2.6-tip/drivers/base/cpu.c
+@@ -107,7 +107,7 @@ static SYSDEV_ATTR(crash_notes, 0400, sh
+ /*
+  * Print cpu online, possible, present, and system maps
+  */
+-static ssize_t print_cpus_map(char *buf, cpumask_t *map)
++static ssize_t print_cpus_map(char *buf, const struct cpumask *map)
+ {
+ 	int n = cpulist_scnprintf(buf, PAGE_SIZE-2, map);
+ 
+Index: linux-2.6-tip/drivers/base/iommu.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/base/iommu.c
++++ linux-2.6-tip/drivers/base/iommu.c
+@@ -31,7 +31,7 @@ void register_iommu(struct iommu_ops *op
+ 	iommu_ops = ops;
+ }
+ 
+-bool iommu_found()
++bool iommu_found(void)
+ {
+ 	return iommu_ops != NULL;
+ }
+Index: linux-2.6-tip/drivers/base/node.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/base/node.c
++++ linux-2.6-tip/drivers/base/node.c
+@@ -24,7 +24,7 @@ static struct sysdev_class node_class = 
+ static ssize_t node_read_cpumap(struct sys_device *dev, int type, char *buf)
+ {
+ 	struct node *node_dev = to_node(dev);
+-	node_to_cpumask_ptr(mask, node_dev->sysdev.id);
++	const struct cpumask *mask = cpumask_of_node(node_dev->sysdev.id);
+ 	int len;
+ 
+ 	/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
+Index: linux-2.6-tip/drivers/base/platform.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/base/platform.c
++++ linux-2.6-tip/drivers/base/platform.c
+@@ -611,7 +611,8 @@ static int platform_match(struct device 
+ 
+ #ifdef CONFIG_PM_SLEEP
+ 
+-static int platform_legacy_suspend(struct device *dev, pm_message_t mesg)
++static inline int
++platform_legacy_suspend(struct device *dev, pm_message_t mesg)
+ {
+ 	int ret = 0;
+ 
+@@ -621,7 +622,8 @@ static int platform_legacy_suspend(struc
+ 	return ret;
+ }
+ 
+-static int platform_legacy_suspend_late(struct device *dev, pm_message_t mesg)
++static inline int
++platform_legacy_suspend_late(struct device *dev, pm_message_t mesg)
+ {
+ 	struct platform_driver *drv = to_platform_driver(dev->driver);
+ 	struct platform_device *pdev;
+@@ -634,7 +636,7 @@ static int platform_legacy_suspend_late(
+ 	return ret;
+ }
+ 
+-static int platform_legacy_resume_early(struct device *dev)
++static inline int platform_legacy_resume_early(struct device *dev)
+ {
+ 	struct platform_driver *drv = to_platform_driver(dev->driver);
+ 	struct platform_device *pdev;
+@@ -647,7 +649,7 @@ static int platform_legacy_resume_early(
+ 	return ret;
+ }
+ 
+-static int platform_legacy_resume(struct device *dev)
++static inline int platform_legacy_resume(struct device *dev)
+ {
+ 	int ret = 0;
+ 
+Index: linux-2.6-tip/drivers/base/topology.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/base/topology.c
++++ linux-2.6-tip/drivers/base/topology.c
+@@ -31,7 +31,10 @@
+ #include <linux/hardirq.h>
+ #include <linux/topology.h>
+ 
+-#define define_one_ro(_name) 		\
++#define define_one_ro_named(_name, _func)				\
++static SYSDEV_ATTR(_name, 0444, _func, NULL)
++
++#define define_one_ro(_name)				\
+ static SYSDEV_ATTR(_name, 0444, show_##_name, NULL)
+ 
+ #define define_id_show_func(name)				\
+@@ -42,8 +45,8 @@ static ssize_t show_##name(struct sys_de
+ 	return sprintf(buf, "%d\n", topology_##name(cpu));	\
+ }
+ 
+-#if defined(topology_thread_siblings) || defined(topology_core_siblings)
+-static ssize_t show_cpumap(int type, cpumask_t *mask, char *buf)
++#if defined(topology_thread_cpumask) || defined(topology_core_cpumask)
++static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf)
+ {
+ 	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
+ 	int n = 0;
+@@ -65,7 +68,7 @@ static ssize_t show_##name(struct sys_de
+ 			   struct sysdev_attribute *attr, char *buf)	\
+ {									\
+ 	unsigned int cpu = dev->id;					\
+-	return show_cpumap(0, &(topology_##name(cpu)), buf);		\
++	return show_cpumap(0, topology_##name(cpu), buf);		\
+ }
+ 
+ #define define_siblings_show_list(name)					\
+@@ -74,7 +77,7 @@ static ssize_t show_##name##_list(struct
+ 				  char *buf)				\
+ {									\
+ 	unsigned int cpu = dev->id;					\
+-	return show_cpumap(1, &(topology_##name(cpu)), buf);		\
++	return show_cpumap(1, topology_##name(cpu), buf);		\
+ }
+ 
+ #else
+@@ -82,9 +85,7 @@ static ssize_t show_##name##_list(struct
+ static ssize_t show_##name(struct sys_device *dev,			\
+ 			   struct sysdev_attribute *attr, char *buf)	\
+ {									\
+-	unsigned int cpu = dev->id;					\
+-	cpumask_t mask = topology_##name(cpu);				\
+-	return show_cpumap(0, &mask, buf);				\
++	return show_cpumap(0, topology_##name(dev->id), buf);		\
+ }
+ 
+ #define define_siblings_show_list(name)					\
+@@ -92,9 +93,7 @@ static ssize_t show_##name##_list(struct
+ 				  struct sysdev_attribute *attr,	\
+ 				  char *buf)				\
+ {									\
+-	unsigned int cpu = dev->id;					\
+-	cpumask_t mask = topology_##name(cpu);				\
+-	return show_cpumap(1, &mask, buf);				\
++	return show_cpumap(1, topology_##name(dev->id), buf);		\
+ }
+ #endif
+ 
+@@ -107,13 +106,13 @@ define_one_ro(physical_package_id);
+ define_id_show_func(core_id);
+ define_one_ro(core_id);
+ 
+-define_siblings_show_func(thread_siblings);
+-define_one_ro(thread_siblings);
+-define_one_ro(thread_siblings_list);
+-
+-define_siblings_show_func(core_siblings);
+-define_one_ro(core_siblings);
+-define_one_ro(core_siblings_list);
++define_siblings_show_func(thread_cpumask);
++define_one_ro_named(thread_siblings, show_thread_cpumask);
++define_one_ro_named(thread_siblings_list, show_thread_cpumask_list);
++
++define_siblings_show_func(core_cpumask);
++define_one_ro_named(core_siblings, show_core_cpumask);
++define_one_ro_named(core_siblings_list, show_core_cpumask_list);
+ 
+ static struct attribute *default_attrs[] = {
+ 	&attr_physical_package_id.attr,
+Index: linux-2.6-tip/drivers/block/DAC960.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/block/DAC960.c
++++ linux-2.6-tip/drivers/block/DAC960.c
+@@ -6646,7 +6646,8 @@ static long DAC960_gam_ioctl(struct file
+ 	  (DAC960_ControllerInfo_T __user *) Argument;
+ 	DAC960_ControllerInfo_T ControllerInfo;
+ 	DAC960_Controller_T *Controller;
+-	int ControllerNumber;
++	int uninitialized_var(ControllerNumber);
++
+ 	if (UserSpaceControllerInfo == NULL)
+ 		ErrorCode = -EINVAL;
+ 	else ErrorCode = get_user(ControllerNumber,
+Index: linux-2.6-tip/drivers/char/ip2/ip2main.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/char/ip2/ip2main.c
++++ linux-2.6-tip/drivers/char/ip2/ip2main.c
+@@ -3202,4 +3202,4 @@ static struct pci_device_id ip2main_pci_
+ 	{ }
+ };
+ 
+-MODULE_DEVICE_TABLE(pci, ip2main_pci_tbl);
++MODULE_STATIC_DEVICE_TABLE(pci, ip2main_pci_tbl);
+Index: linux-2.6-tip/drivers/char/ipmi/ipmi_msghandler.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/char/ipmi/ipmi_msghandler.c
++++ linux-2.6-tip/drivers/char/ipmi/ipmi_msghandler.c
+@@ -1796,7 +1796,8 @@ int ipmi_request_settime(ipmi_user_t    
+ 			 int              retries,
+ 			 unsigned int     retry_time_ms)
+ {
+-	unsigned char saddr, lun;
++	unsigned char uninitialized_var(saddr),
++		      uninitialized_var(lun);
+ 	int           rv;
+ 
+ 	if (!user)
+@@ -1828,7 +1829,8 @@ int ipmi_request_supply_msgs(ipmi_user_t
+ 			     struct ipmi_recv_msg *supplied_recv,
+ 			     int                  priority)
+ {
+-	unsigned char saddr, lun;
++	unsigned char uninitialized_var(saddr),
++		      uninitialized_var(lun);
+ 	int           rv;
+ 
+ 	if (!user)
+Index: linux-2.6-tip/drivers/char/isicom.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/char/isicom.c
++++ linux-2.6-tip/drivers/char/isicom.c
+@@ -1585,7 +1585,7 @@ static unsigned int card_count;
+ static int __devinit isicom_probe(struct pci_dev *pdev,
+ 	const struct pci_device_id *ent)
+ {
+-	unsigned int signature, index;
++	unsigned int uninitialized_var(signature), index;
+ 	int retval = -EPERM;
+ 	struct isi_board *board = NULL;
+ 
+Index: linux-2.6-tip/drivers/char/random.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/char/random.c
++++ linux-2.6-tip/drivers/char/random.c
+@@ -241,6 +241,10 @@
+ #include <linux/percpu.h>
+ #include <linux/cryptohash.h>
+ 
++#ifdef CONFIG_GENERIC_HARDIRQS
++# include <linux/irq.h>
++#endif
++
+ #include <asm/processor.h>
+ #include <asm/uaccess.h>
+ #include <asm/irq.h>
+@@ -558,7 +562,7 @@ struct timer_rand_state {
+ 	unsigned dont_count_entropy:1;
+ };
+ 
+-#ifndef CONFIG_SPARSE_IRQ
++#ifndef CONFIG_GENERIC_HARDIRQS
+ 
+ static struct timer_rand_state *irq_timer_state[NR_IRQS];
+ 
+@@ -619,8 +623,11 @@ static void add_timer_randomness(struct 
+ 	preempt_disable();
+ 	/* if over the trickle threshold, use only 1 in 4096 samples */
+ 	if (input_pool.entropy_count > trickle_thresh &&
+-	    (__get_cpu_var(trickle_count)++ & 0xfff))
+-		goto out;
++	    (__get_cpu_var(trickle_count)++ & 0xfff)) {
++		preempt_enable();
++		return;
++	}
++	preempt_enable();
+ 
+ 	sample.jiffies = jiffies;
+ 	sample.cycles = get_cycles();
+@@ -662,8 +669,6 @@ static void add_timer_randomness(struct 
+ 		credit_entropy_bits(&input_pool,
+ 				    min_t(int, fls(delta>>1), 11));
+ 	}
+-out:
+-	preempt_enable();
+ }
+ 
+ void add_input_randomness(unsigned int type, unsigned int code,
+Index: linux-2.6-tip/drivers/char/rocket.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/char/rocket.c
++++ linux-2.6-tip/drivers/char/rocket.c
+@@ -150,12 +150,14 @@ static Word_t aiop_intr_bits[AIOP_CTL_SI
+ 	AIOP_INTR_BIT_3
+ };
+ 
++#ifdef CONFIG_PCI
+ static Word_t upci_aiop_intr_bits[AIOP_CTL_SIZE] = {
+ 	UPCI_AIOP_INTR_BIT_0,
+ 	UPCI_AIOP_INTR_BIT_1,
+ 	UPCI_AIOP_INTR_BIT_2,
+ 	UPCI_AIOP_INTR_BIT_3
+ };
++#endif
+ 
+ static Byte_t RData[RDATASIZE] = {
+ 	0x00, 0x09, 0xf6, 0x82,
+@@ -227,7 +229,6 @@ static unsigned long nextLineNumber;
+ static int __init init_ISA(int i);
+ static void rp_wait_until_sent(struct tty_struct *tty, int timeout);
+ static void rp_flush_buffer(struct tty_struct *tty);
+-static void rmSpeakerReset(CONTROLLER_T * CtlP, unsigned long model);
+ static unsigned char GetLineNumber(int ctrl, int aiop, int ch);
+ static unsigned char SetLineNumber(int ctrl, int aiop, int ch);
+ static void rp_start(struct tty_struct *tty);
+@@ -241,11 +242,14 @@ static void sDisInterrupts(CHANNEL_T * C
+ static void sModemReset(CONTROLLER_T * CtlP, int chan, int on);
+ static void sPCIModemReset(CONTROLLER_T * CtlP, int chan, int on);
+ static int sWriteTxPrioByte(CHANNEL_T * ChP, Byte_t Data);
++#ifdef CONFIG_PCI
++static void rmSpeakerReset(CONTROLLER_T * CtlP, unsigned long model);
+ static int sPCIInitController(CONTROLLER_T * CtlP, int CtlNum,
+ 			      ByteIO_t * AiopIOList, int AiopIOListSize,
+ 			      WordIO_t ConfigIO, int IRQNum, Byte_t Frequency,
+ 			      int PeriodicOnly, int altChanRingIndicator,
+ 			      int UPCIRingInd);
++#endif
+ static int sInitController(CONTROLLER_T * CtlP, int CtlNum, ByteIO_t MudbacIO,
+ 			   ByteIO_t * AiopIOList, int AiopIOListSize,
+ 			   int IRQNum, Byte_t Frequency, int PeriodicOnly);
+@@ -1751,7 +1755,7 @@ static struct pci_device_id __devinitdat
+ 	{ PCI_DEVICE(PCI_VENDOR_ID_RP, PCI_ANY_ID) },
+ 	{ }
+ };
+-MODULE_DEVICE_TABLE(pci, rocket_pci_ids);
++MODULE_STATIC_DEVICE_TABLE(pci, rocket_pci_ids);
+ 
+ /*
+  *  Called when a PCI card is found.  Retrieves and stores model information,
+@@ -2533,6 +2537,7 @@ static int sInitController(CONTROLLER_T 
+ 		return (CtlP->NumAiop);
+ }
+ 
++#ifdef CONFIG_PCI
+ /***************************************************************************
+ Function: sPCIInitController
+ Purpose:  Initialization of controller global registers and controller
+@@ -2652,6 +2657,7 @@ static int sPCIInitController(CONTROLLER
+ 	else
+ 		return (CtlP->NumAiop);
+ }
++#endif /* CONFIG_PCI */
+ 
+ /***************************************************************************
+ Function: sReadAiopID
+@@ -3142,6 +3148,7 @@ static void sPCIModemReset(CONTROLLER_T 
+ 	sOutB(addr + chan, 0);	/* apply or remove reset */
+ }
+ 
++#ifdef CONFIG_PCI
+ /*  Resets the speaker controller on RocketModem II and III devices */
+ static void rmSpeakerReset(CONTROLLER_T * CtlP, unsigned long model)
+ {
+@@ -3160,6 +3167,7 @@ static void rmSpeakerReset(CONTROLLER_T 
+ 		sOutB(addr, 0);
+ 	}
+ }
++#endif /* CONFIG_PCI */
+ 
+ /*  Returns the line number given the controller (board), aiop and channel number */
+ static unsigned char GetLineNumber(int ctrl, int aiop, int ch)
+Index: linux-2.6-tip/drivers/char/rtc.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/char/rtc.c
++++ linux-2.6-tip/drivers/char/rtc.c
+@@ -188,7 +188,9 @@ static int rtc_proc_open(struct inode *i
+  * timer (but you would need to have an awful timing before you'd trip on it)
+  */
+ static unsigned long rtc_status;	/* bitmapped status byte.	*/
++#if defined(RTC_IRQ) || defined(CONFIG_PROC_FS)
+ static unsigned long rtc_freq;		/* Current periodic IRQ rate	*/
++#endif
+ static unsigned long rtc_irq_data;	/* our output to the world	*/
+ static unsigned long rtc_max_user_freq = 64; /* > this, need CAP_SYS_RESOURCE */
+ 
+@@ -1074,7 +1076,9 @@ no_irq:
+ #endif
+ 
+ #if defined(__alpha__) || defined(__mips__)
++#ifdef CONFIG_PROC_FS
+ 	rtc_freq = HZ;
++#endif
+ 
+ 	/* Each operating system on an Alpha uses its own epoch.
+ 	   Let's try to guess which one we are using now. */
+@@ -1197,10 +1201,12 @@ static void rtc_dropped_irq(unsigned lon
+ 
+ 	spin_unlock_irq(&rtc_lock);
+ 
++#ifndef CONFIG_PREEMPT_RT
+ 	if (printk_ratelimit()) {
+ 		printk(KERN_WARNING "rtc: lost some interrupts at %ldHz.\n",
+ 			freq);
+ 	}
++#endif
+ 
+ 	/* Now we have new data */
+ 	wake_up_interruptible(&rtc_wait);
+Index: linux-2.6-tip/drivers/char/specialix.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/char/specialix.c
++++ linux-2.6-tip/drivers/char/specialix.c
+@@ -2359,7 +2359,7 @@ static struct pci_device_id specialx_pci
+ 	{ PCI_DEVICE(PCI_VENDOR_ID_SPECIALIX, PCI_DEVICE_ID_SPECIALIX_IO8) },
+ 	{ }
+ };
+-MODULE_DEVICE_TABLE(pci, specialx_pci_tbl);
++MODULE_STATIC_DEVICE_TABLE(pci, specialx_pci_tbl);
+ 
+ module_init(specialix_init_module);
+ module_exit(specialix_exit_module);
+Index: linux-2.6-tip/drivers/char/sysrq.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/char/sysrq.c
++++ linux-2.6-tip/drivers/char/sysrq.c
+@@ -25,6 +25,7 @@
+ #include <linux/kbd_kern.h>
+ #include <linux/proc_fs.h>
+ #include <linux/quotaops.h>
++#include <linux/perf_counter.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/suspend.h>
+@@ -35,7 +36,7 @@
+ #include <linux/vt_kern.h>
+ #include <linux/workqueue.h>
+ #include <linux/kexec.h>
+-#include <linux/irq.h>
++#include <linux/interrupt.h>
+ #include <linux/hrtimer.h>
+ #include <linux/oom.h>
+ 
+@@ -244,6 +245,7 @@ static void sysrq_handle_showregs(int ke
+ 	struct pt_regs *regs = get_irq_regs();
+ 	if (regs)
+ 		show_regs(regs);
++	perf_counter_print_debug();
+ }
+ static struct sysrq_key_op sysrq_showregs_op = {
+ 	.handler	= sysrq_handle_showregs,
+@@ -283,7 +285,7 @@ static void sysrq_ftrace_dump(int key, s
+ }
+ static struct sysrq_key_op sysrq_ftrace_dump_op = {
+ 	.handler	= sysrq_ftrace_dump,
+-	.help_msg	= "dumpZ-ftrace-buffer",
++	.help_msg	= "dump-ftrace-buffer(Z)",
+ 	.action_msg	= "Dump ftrace buffer",
+ 	.enable_mask	= SYSRQ_ENABLE_DUMP,
+ };
+Index: linux-2.6-tip/drivers/clocksource/acpi_pm.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/clocksource/acpi_pm.c
++++ linux-2.6-tip/drivers/clocksource/acpi_pm.c
+@@ -143,7 +143,7 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SE
+ #endif
+ 
+ #ifndef CONFIG_X86_64
+-#include "mach_timer.h"
++#include <asm/mach_timer.h>
+ #define PMTMR_EXPECTED_RATE \
+   ((CALIBRATE_LATCH * (PMTMR_TICKS_PER_SEC >> 10)) / (CLOCK_TICK_RATE>>10))
+ /*
+Index: linux-2.6-tip/drivers/clocksource/cyclone.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/clocksource/cyclone.c
++++ linux-2.6-tip/drivers/clocksource/cyclone.c
+@@ -7,7 +7,7 @@
+ #include <asm/pgtable.h>
+ #include <asm/io.h>
+ 
+-#include "mach_timer.h"
++#include <asm/mach_timer.h>
+ 
+ #define CYCLONE_CBAR_ADDR	0xFEB00CD0	/* base address ptr */
+ #define CYCLONE_PMCC_OFFSET	0x51A0		/* offset to control register */
+Index: linux-2.6-tip/drivers/eisa/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/eisa/Kconfig
++++ linux-2.6-tip/drivers/eisa/Kconfig
+@@ -3,7 +3,7 @@
+ #
+ config EISA_VLB_PRIMING
+ 	bool "Vesa Local Bus priming"
+-	depends on X86_PC && EISA
++	depends on X86 && EISA
+ 	default n
+ 	---help---
+ 	  Activate this option if your system contains a Vesa Local
+@@ -24,11 +24,11 @@ config EISA_PCI_EISA
+ 	  When in doubt, say Y.
+ 
+ # Using EISA_VIRTUAL_ROOT on something other than an Alpha or
+-# an X86_PC may lead to crashes...
++# an X86 may lead to crashes...
+ 
+ config EISA_VIRTUAL_ROOT
+ 	bool "EISA virtual root device"
+-	depends on EISA && (ALPHA || X86_PC)
++	depends on EISA && (ALPHA || X86)
+ 	default y
+ 	---help---
+ 	  Activate this option if your system only have EISA bus
+Index: linux-2.6-tip/drivers/firmware/dcdbas.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/firmware/dcdbas.c
++++ linux-2.6-tip/drivers/firmware/dcdbas.c
+@@ -244,7 +244,7 @@ static ssize_t host_control_on_shutdown_
+  */
+ int dcdbas_smi_request(struct smi_cmd *smi_cmd)
+ {
+-	cpumask_t old_mask;
++	cpumask_var_t old_mask;
+ 	int ret = 0;
+ 
+ 	if (smi_cmd->magic != SMI_CMD_MAGIC) {
+@@ -254,8 +254,11 @@ int dcdbas_smi_request(struct smi_cmd *s
+ 	}
+ 
+ 	/* SMI requires CPU 0 */
+-	old_mask = current->cpus_allowed;
+-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(0));
++	if (!alloc_cpumask_var(&old_mask, GFP_KERNEL))
++		return -ENOMEM;
++
++	cpumask_copy(old_mask, &current->cpus_allowed);
++	set_cpus_allowed_ptr(current, cpumask_of(0));
+ 	if (smp_processor_id() != 0) {
+ 		dev_dbg(&dcdbas_pdev->dev, "%s: failed to get CPU 0\n",
+ 			__func__);
+@@ -275,7 +278,8 @@ int dcdbas_smi_request(struct smi_cmd *s
+ 	);
+ 
+ out:
+-	set_cpus_allowed_ptr(current, &old_mask);
++	set_cpus_allowed_ptr(current, old_mask);
++	free_cpumask_var(old_mask);
+ 	return ret;
+ }
+ 
+Index: linux-2.6-tip/drivers/firmware/iscsi_ibft.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/firmware/iscsi_ibft.c
++++ linux-2.6-tip/drivers/firmware/iscsi_ibft.c
+@@ -938,8 +938,8 @@ static int __init ibft_init(void)
+ 		return -ENOMEM;
+ 
+ 	if (ibft_addr) {
+-		printk(KERN_INFO "iBFT detected at 0x%lx.\n",
+-		       virt_to_phys((void *)ibft_addr));
++		printk(KERN_INFO "iBFT detected at 0x%llx.\n",
++		       (u64)virt_to_phys((void *)ibft_addr));
+ 
+ 		rc = ibft_check_device();
+ 		if (rc)
+Index: linux-2.6-tip/drivers/gpu/drm/drm_proc.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/gpu/drm/drm_proc.c
++++ linux-2.6-tip/drivers/gpu/drm/drm_proc.c
+@@ -678,9 +678,9 @@ static int drm__vma_info(char *buf, char
+ 	*start = &buf[offset];
+ 	*eof = 0;
+ 
+-	DRM_PROC_PRINT("vma use count: %d, high_memory = %p, 0x%08lx\n",
++	DRM_PROC_PRINT("vma use count: %d, high_memory = %p, 0x%llx\n",
+ 		       atomic_read(&dev->vma_count),
+-		       high_memory, virt_to_phys(high_memory));
++		       high_memory, (u64)virt_to_phys(high_memory));
+ 	list_for_each_entry(pt, &dev->vmalist, head) {
+ 		if (!(vma = pt->vma))
+ 			continue;
+Index: linux-2.6-tip/drivers/hwmon/adt7473.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/hwmon/adt7473.c
++++ linux-2.6-tip/drivers/hwmon/adt7473.c
+@@ -848,6 +848,8 @@ static ssize_t show_pwm_auto_temp(struct
+ 	}
+ 	/* shouldn't ever get here */
+ 	BUG();
++
++	return 0;
+ }
+ 
+ static ssize_t set_pwm_auto_temp(struct device *dev,
+Index: linux-2.6-tip/drivers/hwmon/i5k_amb.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/hwmon/i5k_amb.c
++++ linux-2.6-tip/drivers/hwmon/i5k_amb.c
+@@ -480,7 +480,7 @@ static unsigned long i5k_channel_pci_id(
+ 	case PCI_DEVICE_ID_INTEL_5400_ERR:
+ 		return PCI_DEVICE_ID_INTEL_5400_FBD0 + channel;
+ 	default:
+-		BUG();
++		panic("i5k_channel_pci_id: unknown chipset!");
+ 	}
+ }
+ 
+Index: linux-2.6-tip/drivers/i2c/busses/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/i2c/busses/Kconfig
++++ linux-2.6-tip/drivers/i2c/busses/Kconfig
+@@ -56,6 +56,9 @@ config I2C_AMD756
+ config I2C_AMD756_S4882
+ 	tristate "SMBus multiplexing on the Tyan S4882"
+ 	depends on I2C_AMD756 && X86 && EXPERIMENTAL
++	# broke an Athlon 64 X2 Asus A8N-E with:
++	# http://redhat.com/~mingo/misc/config-Thu_Jul_17_11_34_08_CEST_2008.bad
++	depends on 0
+ 	help
+ 	  Enabling this option will add specific SMBus support for the Tyan
+ 	  S4882 motherboard.  On this 4-CPU board, the SMBus is multiplexed
+@@ -150,6 +153,9 @@ config I2C_NFORCE2
+ config I2C_NFORCE2_S4985
+ 	tristate "SMBus multiplexing on the Tyan S4985"
+ 	depends on I2C_NFORCE2 && X86 && EXPERIMENTAL
++	# broke a T60 Core2Duo with:
++	# http://redhat.com/~mingo/misc/config-Thu_Jul_17_10_47_42_CEST_2008.bad
++	depends on 0
+ 	help
+ 	  Enabling this option will add specific SMBus support for the Tyan
+ 	  S4985 motherboard.  On this 4-CPU board, the SMBus is multiplexed
+Index: linux-2.6-tip/drivers/ieee1394/csr1212.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ieee1394/csr1212.c
++++ linux-2.6-tip/drivers/ieee1394/csr1212.c
+@@ -35,6 +35,7 @@
+ 
+ #include <linux/errno.h>
+ #include <linux/kernel.h>
++#include <linux/kmemcheck.h>
+ #include <linux/string.h>
+ #include <asm/bug.h>
+ #include <asm/byteorder.h>
+@@ -387,6 +388,7 @@ csr1212_new_descriptor_leaf(u8 dtype, u3
+ 	if (!kv)
+ 		return NULL;
+ 
++	kmemcheck_annotate_bitfield(kv->value.leaf.data[0]);
+ 	CSR1212_DESCRIPTOR_LEAF_SET_TYPE(kv, dtype);
+ 	CSR1212_DESCRIPTOR_LEAF_SET_SPECIFIER_ID(kv, specifier_id);
+ 
+Index: linux-2.6-tip/drivers/ieee1394/nodemgr.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ieee1394/nodemgr.c
++++ linux-2.6-tip/drivers/ieee1394/nodemgr.c
+@@ -10,6 +10,7 @@
+ 
+ #include <linux/bitmap.h>
+ #include <linux/kernel.h>
++#include <linux/kmemcheck.h>
+ #include <linux/list.h>
+ #include <linux/slab.h>
+ #include <linux/delay.h>
+@@ -39,7 +40,10 @@ struct nodemgr_csr_info {
+ 	struct hpsb_host *host;
+ 	nodeid_t nodeid;
+ 	unsigned int generation;
+-	unsigned int speed_unverified:1;
++
++	kmemcheck_define_bitfield(flags, {
++		unsigned int speed_unverified:1;
++	});
+ };
+ 
+ 
+@@ -1295,6 +1299,7 @@ static void nodemgr_node_scan_one(struct
+ 	ci = kmalloc(sizeof(*ci), GFP_KERNEL);
+ 	if (!ci)
+ 		return;
++	kmemcheck_annotate_bitfield(ci->flags);
+ 
+ 	ci->host = host;
+ 	ci->nodeid = nodeid;
+Index: linux-2.6-tip/drivers/infiniband/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/infiniband/Kconfig
++++ linux-2.6-tip/drivers/infiniband/Kconfig
+@@ -2,6 +2,7 @@ menuconfig INFINIBAND
+ 	tristate "InfiniBand support"
+ 	depends on PCI || BROKEN
+ 	depends on HAS_IOMEM
++	depends on 0
+ 	---help---
+ 	  Core support for InfiniBand (IB).  Make sure to also select
+ 	  any protocols you wish to use as well as drivers for your
+Index: linux-2.6-tip/drivers/infiniband/hw/amso1100/c2_vq.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/infiniband/hw/amso1100/c2_vq.c
++++ linux-2.6-tip/drivers/infiniband/hw/amso1100/c2_vq.c
+@@ -107,7 +107,7 @@ struct c2_vq_req *vq_req_alloc(struct c2
+ 	r = kmalloc(sizeof(struct c2_vq_req), GFP_KERNEL);
+ 	if (r) {
+ 		init_waitqueue_head(&r->wait_object);
+-		r->reply_msg = (u64) NULL;
++		r->reply_msg = (u64) (long) NULL;
+ 		r->event = 0;
+ 		r->cm_id = NULL;
+ 		r->qp = NULL;
+@@ -123,7 +123,7 @@ struct c2_vq_req *vq_req_alloc(struct c2
+  */
+ void vq_req_free(struct c2_dev *c2dev, struct c2_vq_req *r)
+ {
+-	r->reply_msg = (u64) NULL;
++	r->reply_msg = (u64) (long) NULL;
+ 	if (atomic_dec_and_test(&r->refcnt)) {
+ 		kfree(r);
+ 	}
+@@ -151,7 +151,7 @@ void vq_req_get(struct c2_dev *c2dev, st
+ void vq_req_put(struct c2_dev *c2dev, struct c2_vq_req *r)
+ {
+ 	if (atomic_dec_and_test(&r->refcnt)) {
+-		if (r->reply_msg != (u64) NULL)
++		if (r->reply_msg != (u64) (long) NULL)
+ 			vq_repbuf_free(c2dev,
+ 				       (void *) (unsigned long) r->reply_msg);
+ 		kfree(r);
+@@ -258,3 +258,4 @@ void vq_repbuf_free(struct c2_dev *c2dev
+ {
+ 	kmem_cache_free(c2dev->host_msg_cache, reply);
+ }
++
+Index: linux-2.6-tip/drivers/infiniband/hw/ipath/ipath_driver.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/infiniband/hw/ipath/ipath_driver.c
++++ linux-2.6-tip/drivers/infiniband/hw/ipath/ipath_driver.c
+@@ -2715,7 +2715,7 @@ static void ipath_hol_signal_up(struct i
+  * to prevent HoL blocking, then start the HoL timer that
+  * periodically continues, then stop procs, so they can detect
+  * link down if they want, and do something about it.
+- * Timer may already be running, so use __mod_timer, not add_timer.
++ * Timer may already be running, so use mod_timer, not add_timer.
+  */
+ void ipath_hol_down(struct ipath_devdata *dd)
+ {
+@@ -2724,7 +2724,7 @@ void ipath_hol_down(struct ipath_devdata
+ 	dd->ipath_hol_next = IPATH_HOL_DOWNCONT;
+ 	dd->ipath_hol_timer.expires = jiffies +
+ 		msecs_to_jiffies(ipath_hol_timeout_ms);
+-	__mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
++	mod_timer(&dd->ipath_hol_timer, dd->ipath_hol_timer.expires);
+ }
+ 
+ /*
+@@ -2763,7 +2763,7 @@ void ipath_hol_event(unsigned long opaqu
+ 	else {
+ 		dd->ipath_hol_timer.expires = jiffies +
+ 			msecs_to_jiffies(ipath_hol_timeout_ms);
+-		__mod_timer(&dd->ipath_hol_timer,
++		mod_timer(&dd->ipath_hol_timer,
+ 			dd->ipath_hol_timer.expires);
+ 	}
+ }
+Index: linux-2.6-tip/drivers/input/keyboard/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/input/keyboard/Kconfig
++++ linux-2.6-tip/drivers/input/keyboard/Kconfig
+@@ -13,11 +13,11 @@ menuconfig INPUT_KEYBOARD
+ if INPUT_KEYBOARD
+ 
+ config KEYBOARD_ATKBD
+-	tristate "AT keyboard" if EMBEDDED || !X86_PC
++	tristate "AT keyboard" if EMBEDDED || !X86
+ 	default y
+ 	select SERIO
+ 	select SERIO_LIBPS2
+-	select SERIO_I8042 if X86_PC
++	select SERIO_I8042 if X86
+ 	select SERIO_GSCPS2 if GSC
+ 	help
+ 	  Say Y here if you want to use a standard AT or PS/2 keyboard. Usually
+Index: linux-2.6-tip/drivers/input/mouse/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/input/mouse/Kconfig
++++ linux-2.6-tip/drivers/input/mouse/Kconfig
+@@ -17,7 +17,7 @@ config MOUSE_PS2
+ 	default y
+ 	select SERIO
+ 	select SERIO_LIBPS2
+-	select SERIO_I8042 if X86_PC
++	select SERIO_I8042 if X86
+ 	select SERIO_GSCPS2 if GSC
+ 	help
+ 	  Say Y here if you have a PS/2 mouse connected to your system. This
+Index: linux-2.6-tip/drivers/input/touchscreen/htcpen.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/input/touchscreen/htcpen.c
++++ linux-2.6-tip/drivers/input/touchscreen/htcpen.c
+@@ -47,12 +47,6 @@ static int invert_y;
+ module_param(invert_y, bool, 0644);
+ MODULE_PARM_DESC(invert_y, "If set, Y axis is inverted");
+ 
+-static struct pnp_device_id pnp_ids[] = {
+-	{ .id = "PNP0cc0" },
+-	{ .id = "" }
+-};
+-MODULE_DEVICE_TABLE(pnp, pnp_ids);
+-
+ static irqreturn_t htcpen_interrupt(int irq, void *handle)
+ {
+ 	struct input_dev *htcpen_dev = handle;
+@@ -253,3 +247,4 @@ static void __exit htcpen_isa_exit(void)
+ 
+ module_init(htcpen_isa_init);
+ module_exit(htcpen_isa_exit);
++
+Index: linux-2.6-tip/drivers/isdn/capi/capidrv.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/capi/capidrv.c
++++ linux-2.6-tip/drivers/isdn/capi/capidrv.c
+@@ -1551,8 +1551,8 @@ static int decodeFVteln(char *teln, unsi
+ 
+ static int FVteln2capi20(char *teln, u8 AdditionalInfo[1+2+2+31])
+ {
+-	unsigned long bmask;
+-	int active;
++	unsigned long uninitialized_var(bmask);
++	int uninitialized_var(active);
+ 	int rc, i;
+    
+ 	rc = decodeFVteln(teln, &bmask, &active);
+Index: linux-2.6-tip/drivers/isdn/hardware/eicon/maintidi.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/hardware/eicon/maintidi.c
++++ linux-2.6-tip/drivers/isdn/hardware/eicon/maintidi.c
+@@ -959,7 +959,7 @@ static int process_idi_event (diva_strac
+ 	}
+ 	if (!strncmp("State\\Layer2 No1", path, pVar->path_length)) {
+ 		char* tmp = &pLib->lines[0].pInterface->Layer2[0];
+-    dword l2_state;
++    dword uninitialized_var(l2_state);
+     diva_strace_read_uint (pVar, &l2_state);
+ 
+ 		switch (l2_state) {
+Index: linux-2.6-tip/drivers/isdn/hardware/eicon/message.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/hardware/eicon/message.c
++++ linux-2.6-tip/drivers/isdn/hardware/eicon/message.c
+@@ -2682,7 +2682,7 @@ byte connect_b3_req(dword Id, word Numbe
+           if (!(fax_control_bits & T30_CONTROL_BIT_MORE_DOCUMENTS)
+            || (fax_feature_bits & T30_FEATURE_BIT_MORE_DOCUMENTS))
+           {
+-            len = (byte)(&(((T30_INFO *) 0)->universal_6));
++            len = (byte)(offsetof(T30_INFO, universal_6));
+             fax_info_change = false;
+             if (ncpi->length >= 4)
+             {
+@@ -2744,7 +2744,7 @@ byte connect_b3_req(dword Id, word Numbe
+                     for (i = 0; i < w; i++)
+                       ((T30_INFO   *)(plci->fax_connect_info_buffer))->station_id[i] = fax_parms[4].info[1+i];
+                     ((T30_INFO   *)(plci->fax_connect_info_buffer))->head_line_len = 0;
+-                    len = (byte)(((T30_INFO *) 0)->station_id + 20);
++                    len = (byte)(offsetof(T30_INFO, station_id) + 20);
+                     w = fax_parms[5].length;
+                     if (w > 20)
+                       w = 20;
+@@ -2778,7 +2778,7 @@ byte connect_b3_req(dword Id, word Numbe
+                 }
+                 else
+                 {
+-                  len = (byte)(&(((T30_INFO *) 0)->universal_6));
++                  len = (byte)(offsetof(T30_INFO, universal_6));
+                 }
+                 fax_info_change = true;
+ 
+@@ -2881,7 +2881,7 @@ byte connect_b3_res(dword Id, word Numbe
+     && (plci->nsf_control_bits & T30_NSF_CONTROL_BIT_ENABLE_NSF)
+     && (plci->nsf_control_bits & T30_NSF_CONTROL_BIT_NEGOTIATE_RESP))
+    {
+-            len = ((byte)(((T30_INFO *) 0)->station_id + 20));
++            len = (byte)(offsetof(T30_INFO, station_id) + 20);
+             if (plci->fax_connect_info_length < len)
+             {
+               ((T30_INFO *)(plci->fax_connect_info_buffer))->station_id_len = 0;
+@@ -3782,7 +3782,7 @@ static byte manufacturer_res(dword Id, w
+       break;
+     }
+     ncpi = &m_parms[1];
+-    len = ((byte)(((T30_INFO *) 0)->station_id + 20));
++    len = (byte)(offsetof(T30_INFO, station_id) + 20);
+     if (plci->fax_connect_info_length < len)
+     {
+       ((T30_INFO *)(plci->fax_connect_info_buffer))->station_id_len = 0;
+@@ -6485,7 +6485,7 @@ static void nl_ind(PLCI *plci)
+   word info = 0;
+   word fax_feature_bits;
+   byte fax_send_edata_ack;
+-  static byte v120_header_buffer[2 + 3];
++  static byte v120_header_buffer[2 + 3] __attribute__ ((aligned(8)));
+   static word fax_info[] = {
+     0,                     /* T30_SUCCESS                        */
+     _FAX_NO_CONNECTION,    /* T30_ERR_NO_DIS_RECEIVED            */
+@@ -6824,7 +6824,7 @@ static void nl_ind(PLCI *plci)
+         if ((plci->requested_options_conn | plci->requested_options | a->requested_options_table[plci->appl->Id-1])
+           & ((1L << PRIVATE_FAX_SUB_SEP_PWD) | (1L << PRIVATE_FAX_NONSTANDARD)))
+         {
+-          i = ((word)(((T30_INFO *) 0)->station_id + 20)) + ((T30_INFO   *)plci->NL.RBuffer->P)->head_line_len;
++          i = ((word)(offsetof(T30_INFO, station_id) + 20)) + ((T30_INFO   *)plci->NL.RBuffer->P)->head_line_len;
+           while (i < plci->NL.RBuffer->length)
+             plci->ncpi_buffer[++len] = plci->NL.RBuffer->P[i++];
+         }
+@@ -7216,7 +7216,7 @@ static void nl_ind(PLCI *plci)
+     {
+       plci->RData[1].P = plci->RData[0].P;
+       plci->RData[1].PLength = plci->RData[0].PLength;
+-      plci->RData[0].P = v120_header_buffer + (-((int) v120_header_buffer) & 3);
++      plci->RData[0].P = v120_header_buffer;
+       if ((plci->NL.RBuffer->P[0] & V120_HEADER_EXTEND_BIT) || (plci->NL.RLength == 1))
+         plci->RData[0].PLength = 1;
+       else
+@@ -8395,6 +8395,7 @@ static word add_b23(PLCI *plci, API_PARS
+       /* copy head line to NLC */
+       if(b3_config_parms[3].length)
+       {
++        byte *head_line = (void *) ((T30_INFO *)&nlc[1] + 1);
+ 
+         pos = (byte)(fax_head_line_time (&(((T30_INFO *)&nlc[1])->station_id[20])));
+         if (pos != 0)
+@@ -8403,17 +8404,17 @@ static word add_b23(PLCI *plci, API_PARS
+             pos = 0;
+           else
+           {
+-            ((T30_INFO *)&nlc[1])->station_id[20 + pos++] = ' ';
+-            ((T30_INFO *)&nlc[1])->station_id[20 + pos++] = ' ';
++            head_line[pos++] = ' ';
++            head_line[pos++] = ' ';
+             len = (byte)b3_config_parms[2].length;
+             if (len > 20)
+               len = 20;
+             if (CAPI_MAX_DATE_TIME_LENGTH + 2 + len + 2 + b3_config_parms[3].length <= CAPI_MAX_HEAD_LINE_SPACE)
+             {
+               for (i = 0; i < len; i++)
+-                ((T30_INFO *)&nlc[1])->station_id[20 + pos++] = ((byte   *)b3_config_parms[2].info)[1+i];
+-              ((T30_INFO *)&nlc[1])->station_id[20 + pos++] = ' ';
+-              ((T30_INFO *)&nlc[1])->station_id[20 + pos++] = ' ';
++                head_line[pos++] = ((byte   *)b3_config_parms[2].info)[1+i];
++              head_line[pos++] = ' ';
++              head_line[pos++] = ' ';
+             }
+           }
+         }
+@@ -8424,7 +8425,7 @@ static word add_b23(PLCI *plci, API_PARS
+         ((T30_INFO *)&nlc[1])->head_line_len = (byte)(pos + len);
+         nlc[0] += (byte)(pos + len);
+         for (i = 0; i < len; i++)
+-          ((T30_INFO *)&nlc[1])->station_id[20 + pos++] = ((byte   *)b3_config_parms[3].info)[1+i];
++          head_line[pos++] = ((byte   *)b3_config_parms[3].info)[1+i];
+         }
+       else
+         ((T30_INFO *)&nlc[1])->head_line_len = 0;
+@@ -8453,7 +8454,7 @@ static word add_b23(PLCI *plci, API_PARS
+             fax_control_bits |= T30_CONTROL_BIT_ACCEPT_SEL_POLLING;
+           }
+             len = nlc[0];
+-          pos = ((byte)(((T30_INFO *) 0)->station_id + 20));
++          pos = (byte)(offsetof(T30_INFO, station_id) + 20);
+    if (pos < plci->fax_connect_info_length)
+    {
+      for (i = 1 + plci->fax_connect_info_buffer[pos]; i != 0; i--)
+@@ -8505,7 +8506,7 @@ static word add_b23(PLCI *plci, API_PARS
+       }
+ 
+       PUT_WORD(&(((T30_INFO *)&nlc[1])->control_bits_low), fax_control_bits);
+-      len = ((byte)(((T30_INFO *) 0)->station_id + 20));
++      len = (byte)(offsetof(T30_INFO, station_id) + 20);
+       for (i = 0; i < len; i++)
+         plci->fax_connect_info_buffer[i] = nlc[1+i];
+       ((T30_INFO   *) plci->fax_connect_info_buffer)->head_line_len = 0;
+@@ -15049,3 +15050,4 @@ static void diva_free_dma_descriptor (PL
+ }
+ 
+ /*------------------------------------------------------------------*/
++
+Index: linux-2.6-tip/drivers/isdn/hisax/config.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/hisax/config.c
++++ linux-2.6-tip/drivers/isdn/hisax/config.c
+@@ -1980,7 +1980,7 @@ static struct pci_device_id hisax_pci_tb
+ 	{ }				/* Terminating entry */
+ };
+ 
+-MODULE_DEVICE_TABLE(pci, hisax_pci_tbl);
++MODULE_STATIC_DEVICE_TABLE(pci, hisax_pci_tbl);
+ #endif /* CONFIG_PCI */
+ 
+ module_init(HiSax_init);
+Index: linux-2.6-tip/drivers/isdn/i4l/isdn_common.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/i4l/isdn_common.c
++++ linux-2.6-tip/drivers/isdn/i4l/isdn_common.c
+@@ -1280,7 +1280,9 @@ isdn_ioctl(struct inode *inode, struct f
+ 	int ret;
+ 	int i;
+ 	char __user *p;
++#ifdef CONFIG_NETDEVICES
+ 	char *s;
++#endif
+ 	union iocpar {
+ 		char name[10];
+ 		char bname[22];
+Index: linux-2.6-tip/drivers/isdn/i4l/isdn_ppp.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/i4l/isdn_ppp.c
++++ linux-2.6-tip/drivers/isdn/i4l/isdn_ppp.c
+@@ -466,7 +466,7 @@ static int get_filter(void __user *arg, 
+ 	*p = code;
+ 	return uprog.len;
+ }
+-#endif /* CONFIG_IPPP_FILTER */
++#endif
+ 
+ /*
+  * ippp device ioctl
+Index: linux-2.6-tip/drivers/isdn/icn/icn.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/icn/icn.c
++++ linux-2.6-tip/drivers/isdn/icn/icn.c
+@@ -717,7 +717,7 @@ icn_sendbuf(int channel, int ack, struct
+ 			return 0;
+ 		if (card->sndcount[channel] > ICN_MAX_SQUEUE)
+ 			return 0;
+-		#warning TODO test headroom or use skb->nb to flag ACK
++		/* TODO test headroom or use skb->nb to flag ACK: */
+ 		nskb = skb_clone(skb, GFP_ATOMIC);
+ 		if (nskb) {
+ 			/* Push ACK flag as one
+Index: linux-2.6-tip/drivers/isdn/mISDN/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/mISDN/Kconfig
++++ linux-2.6-tip/drivers/isdn/mISDN/Kconfig
+@@ -4,6 +4,9 @@
+ 
+ menuconfig MISDN
+ 	tristate "Modular ISDN driver"
++	# broken with:
++	# http://redhat.com/~mingo/misc/config-Sun_Jul_27_08_30_16_CEST_2008.bad
++	depends on 0
+ 	help
+ 	  Enable support for the modular ISDN driver.
+ 
+Index: linux-2.6-tip/drivers/isdn/sc/card.h
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/sc/card.h
++++ linux-2.6-tip/drivers/isdn/sc/card.h
+@@ -82,7 +82,7 @@ typedef struct {
+ 	int ioport[MAX_IO_REGS];	/* Index to I/O ports */
+ 	int shmem_pgport;		/* port for the exp mem page reg. */
+ 	int shmem_magic;		/* adapter magic number */
+-	unsigned int rambase;		/* Shared RAM base address */
++	u8 __iomem *rambase;		/* Shared RAM base address */
+ 	unsigned int ramsize;		/* Size of shared memory */
+ 	RspMessage async_msg;		/* Async response message */
+ 	int want_async_messages;	/* Snoop the Q ? */
+Index: linux-2.6-tip/drivers/isdn/sc/init.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/sc/init.c
++++ linux-2.6-tip/drivers/isdn/sc/init.c
+@@ -27,7 +27,7 @@ static const char *boardname[] = { "Data
+ /* insmod set parameters */
+ static unsigned int io[] = {0,0,0,0};
+ static unsigned char irq[] = {0,0,0,0};
+-static unsigned long ram[] = {0,0,0,0};
++static u8 __iomem * ram[] = {0,0,0,0};
+ static int do_reset = 0;
+ 
+ module_param_array(io, int, NULL, 0);
+@@ -35,7 +35,7 @@ module_param_array(irq, int, NULL, 0);
+ module_param_array(ram, int, NULL, 0);
+ module_param(do_reset, bool, 0);
+ 
+-static int identify_board(unsigned long, unsigned int);
++static int identify_board(u8 __iomem *rambase, unsigned int iobase);
+ 
+ static int __init sc_init(void)
+ {
+@@ -153,7 +153,7 @@ static int __init sc_init(void)
+ 			outb(0xFF, io[b] + RESET_OFFSET);
+ 			msleep_interruptible(10000);
+ 		}
+-		pr_debug("RAM Base for board %d is 0x%lx, %s probe\n", b,
++		pr_debug("RAM Base for board %d is %p, %s probe\n", b,
+ 			ram[b], ram[b] == 0 ? "will" : "won't");
+ 
+ 		if(ram[b]) {
+@@ -162,10 +162,10 @@ static int __init sc_init(void)
+ 			 * Just look for a signature and ID the
+ 			 * board model
+ 			 */
+-			if(request_region(ram[b], SRAM_PAGESIZE, "sc test")) {
+-				pr_debug("request_region for RAM base 0x%lx succeeded\n", ram[b]);
++			if (request_region((unsigned long)ram[b], SRAM_PAGESIZE, "sc test")) {
++				pr_debug("request_region for RAM base %p succeeded\n", ram[b]);
+ 			 	model = identify_board(ram[b], io[b]);
+-				release_region(ram[b], SRAM_PAGESIZE);
++				release_region((unsigned long)ram[b], SRAM_PAGESIZE);
+ 			}
+ 		}
+ 		else {
+@@ -177,12 +177,12 @@ static int __init sc_init(void)
+ 				pr_debug("Checking RAM address 0x%x...\n", i);
+ 				if(request_region(i, SRAM_PAGESIZE, "sc test")) {
+ 					pr_debug("  request_region succeeded\n");
+-					model = identify_board(i, io[b]);
++					model = identify_board((u8 __iomem *)i, io[b]);
+ 					release_region(i, SRAM_PAGESIZE);
+ 					if (model >= 0) {
+ 						pr_debug("  Identified a %s\n",
+ 							boardname[model]);
+-						ram[b] = i;
++						ram[b] = (u8 __iomem *)i;
+ 						break;
+ 					}
+ 					pr_debug("  Unidentifed or inaccessible\n");
+@@ -199,7 +199,7 @@ static int __init sc_init(void)
+ 			 * Nope, there was no place in RAM for the
+ 			 * board, or it couldn't be identified
+ 			 */
+-			 pr_debug("Failed to find an adapter at 0x%lx\n", ram[b]);
++			 pr_debug("Failed to find an adapter at %p\n", ram[b]);
+ 			 continue;
+ 		}
+ 
+@@ -222,7 +222,7 @@ static int __init sc_init(void)
+ 			features = BRI_FEATURES;
+ 			break;
+ 		}
+-		switch(ram[b] >> 12 & 0x0F) {
++		switch((unsigned long)ram[b] >> 12 & 0x0F) {
+ 		case 0x0:
+ 			pr_debug("RAM Page register set to EXP_PAGE0\n");
+ 			pgport = EXP_PAGE0;
+@@ -358,10 +358,10 @@ static int __init sc_init(void)
+ 		pr_debug("Requesting I/O Port %#x\n",
+ 				sc_adapter[cinst]->ioport[IRQ_SELECT]);
+ 		sc_adapter[cinst]->rambase = ram[b];
+-		request_region(sc_adapter[cinst]->rambase, SRAM_PAGESIZE,
+-				interface->id);
++		request_region((unsigned long)sc_adapter[cinst]->rambase,
++				SRAM_PAGESIZE, interface->id);
+ 
+-		pr_info("  %s (%d) - %s %d channels IRQ %d, I/O Base 0x%x, RAM Base 0x%lx\n", 
++		pr_info("  %s (%d) - %s %d channels IRQ %d, I/O Base 0x%x, RAM Base %p\n", 
+ 			sc_adapter[cinst]->devicename,
+ 			sc_adapter[cinst]->driverId,
+ 			boardname[model], channels, irq[b], io[b], ram[b]);
+@@ -400,7 +400,7 @@ static void __exit sc_exit(void)
+ 		/*
+ 		 * Release shared RAM
+ 		 */
+-		release_region(sc_adapter[i]->rambase, SRAM_PAGESIZE);
++		release_region((unsigned long)sc_adapter[i]->rambase, SRAM_PAGESIZE);
+ 
+ 		/*
+ 		 * Release the IRQ
+@@ -434,7 +434,7 @@ static void __exit sc_exit(void)
+ 	pr_info("SpellCaster ISA ISDN Adapter Driver Unloaded.\n");
+ }
+ 
+-static int identify_board(unsigned long rambase, unsigned int iobase)
++static int identify_board(u8 __iomem *rambase, unsigned int iobase)
+ {
+ 	unsigned int pgport;
+ 	unsigned long sig;
+@@ -444,15 +444,15 @@ static int identify_board(unsigned long 
+ 	HWConfig_pl hwci;
+ 	int x;
+ 
+-	pr_debug("Attempting to identify adapter @ 0x%lx io 0x%x\n",
++	pr_debug("Attempting to identify adapter @ %p io 0x%x\n",
+ 		rambase, iobase);
+ 
+ 	/*
+ 	 * Enable the base pointer
+ 	 */
+-	outb(rambase >> 12, iobase + 0x2c00);
++	outb((unsigned long)rambase >> 12, iobase + 0x2c00);
+ 
+-	switch(rambase >> 12 & 0x0F) {
++	switch((unsigned long)rambase >> 12 & 0x0F) {
+ 	case 0x0:
+ 		pgport = iobase + PG0_OFFSET;
+ 		pr_debug("Page Register offset is 0x%x\n", PG0_OFFSET);
+@@ -473,7 +473,7 @@ static int identify_board(unsigned long 
+ 		pr_debug("Page Register offset is 0x%x\n", PG3_OFFSET);
+ 		break;
+ 	default:
+-		pr_debug("Invalid rambase 0x%lx\n", rambase);
++		pr_debug("Invalid rambase %p\n", rambase);
+ 		return -1;
+ 	}
+ 
+Index: linux-2.6-tip/drivers/isdn/sc/scioc.h
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/sc/scioc.h
++++ linux-2.6-tip/drivers/isdn/sc/scioc.h
+@@ -86,7 +86,7 @@ typedef struct {
+ 	char load_ver[11];
+ 	char proc_ver[11];
+ 	int iobase;
+-	long rambase;
++	u8 __iomem *rambase;
+ 	char irq;
+ 	long ramsize;
+ 	char interface;
+Index: linux-2.6-tip/drivers/isdn/sc/timer.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/isdn/sc/timer.c
++++ linux-2.6-tip/drivers/isdn/sc/timer.c
+@@ -27,7 +27,7 @@
+ static void setup_ports(int card)
+ {
+ 
+-	outb((sc_adapter[card]->rambase >> 12), sc_adapter[card]->ioport[EXP_BASE]);
++	outb(((long)sc_adapter[card]->rambase >> 12), sc_adapter[card]->ioport[EXP_BASE]);
+ 
+ 	/* And the IRQ */
+ 	outb((sc_adapter[card]->interrupt | 0x80),
+Index: linux-2.6-tip/drivers/lguest/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/lguest/Kconfig
++++ linux-2.6-tip/drivers/lguest/Kconfig
+@@ -1,6 +1,6 @@
+ config LGUEST
+ 	tristate "Linux hypervisor example code"
+-	depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX && !X86_VOYAGER
++	depends on X86_32 && EXPERIMENTAL && !X86_PAE && FUTEX
+ 	select HVC_DRIVER
+ 	---help---
+ 	  This is a very simple module which allows you to run
+Index: linux-2.6-tip/drivers/md/dm-raid1.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/md/dm-raid1.c
++++ linux-2.6-tip/drivers/md/dm-raid1.c
+@@ -923,7 +923,7 @@ static int parse_features(struct mirror_
+ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+ {
+ 	int r;
+-	unsigned int nr_mirrors, m, args_used;
++	unsigned int nr_mirrors, m, uninitialized_var(args_used);
+ 	struct mirror_set *ms;
+ 	struct dm_dirty_log *dl;
+ 
+Index: linux-2.6-tip/drivers/media/dvb/dvb-usb/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/media/dvb/dvb-usb/Kconfig
++++ linux-2.6-tip/drivers/media/dvb/dvb-usb/Kconfig
+@@ -235,6 +235,7 @@ config DVB_USB_OPERA1
+ config DVB_USB_AF9005
+ 	tristate "Afatech AF9005 DVB-T USB1.1 support"
+ 	depends on DVB_USB && EXPERIMENTAL
++	depends on 0
+ 	select MEDIA_TUNER_MT2060 if !MEDIA_TUNER_CUSTOMIZE
+ 	select MEDIA_TUNER_QT1010 if !MEDIA_TUNER_CUSTOMIZE
+ 	help
+Index: linux-2.6-tip/drivers/media/video/cx88/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/media/video/cx88/Kconfig
++++ linux-2.6-tip/drivers/media/video/cx88/Kconfig
+@@ -1,6 +1,8 @@
+ config VIDEO_CX88
+ 	tristate "Conexant 2388x (bt878 successor) support"
+ 	depends on VIDEO_DEV && PCI && I2C && INPUT
++	# build failure, see config-Mon_Oct_20_13_45_14_CEST_2008.bad
++	depends on BROKEN
+ 	select I2C_ALGOBIT
+ 	select VIDEO_BTCX
+ 	select VIDEOBUF_DMA_SG
+Index: linux-2.6-tip/drivers/memstick/core/mspro_block.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/memstick/core/mspro_block.c
++++ linux-2.6-tip/drivers/memstick/core/mspro_block.c
+@@ -651,6 +651,7 @@ has_int_reg:
+ 
+ 	default:
+ 		BUG();
++		return -EINVAL;
+ 	}
+ }
+ 
+Index: linux-2.6-tip/drivers/message/fusion/mptbase.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/message/fusion/mptbase.c
++++ linux-2.6-tip/drivers/message/fusion/mptbase.c
+@@ -126,7 +126,9 @@ static int mfcounter = 0;
+  *  Public data...
+  */
+ 
++#ifdef CONFIG_PROC_FS
+ static struct proc_dir_entry *mpt_proc_root_dir;
++#endif
+ 
+ #define WHOINIT_UNKNOWN		0xAA
+ 
+Index: linux-2.6-tip/drivers/message/i2o/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/message/i2o/Kconfig
++++ linux-2.6-tip/drivers/message/i2o/Kconfig
+@@ -54,7 +54,7 @@ config I2O_EXT_ADAPTEC_DMA64
+ 
+ config I2O_CONFIG
+ 	tristate "I2O Configuration support"
+-	depends on VIRT_TO_BUS
++	depends on VIRT_TO_BUS && (BROKEN || !64BIT)
+ 	---help---
+ 	  Say Y for support of the configuration interface for the I2O adapters.
+ 	  If you have a RAID controller from Adaptec and you want to use the
+@@ -66,6 +66,8 @@ config I2O_CONFIG
+ 	  Note: If you want to use the new API you have to download the
+ 	  i2o_config patch from http://i2o.shadowconnect.com/
+ 
++	  Note: This is broken on 64-bit architectures.
++
+ config I2O_CONFIG_OLD_IOCTL
+ 	bool "Enable ioctls (OBSOLETE)"
+ 	depends on I2O_CONFIG
+Index: linux-2.6-tip/drivers/mfd/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/mfd/Kconfig
++++ linux-2.6-tip/drivers/mfd/Kconfig
+@@ -210,6 +210,8 @@ config MFD_WM8350_I2C
+ 	tristate "Support Wolfson Microelectronics WM8350 with I2C"
+ 	select MFD_WM8350
+ 	depends on I2C
++	# build failure
++	depends on 0
+ 	help
+ 	  The WM8350 is an integrated audio and power management
+ 	  subsystem with watchdog and RTC functionality for embedded
+Index: linux-2.6-tip/drivers/mfd/da903x.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/mfd/da903x.c
++++ linux-2.6-tip/drivers/mfd/da903x.c
+@@ -75,6 +75,7 @@ static inline int __da903x_read(struct i
+ {
+ 	int ret;
+ 
++	*val = 0;
+ 	ret = i2c_smbus_read_byte_data(client, reg);
+ 	if (ret < 0) {
+ 		dev_err(&client->dev, "failed reading at 0x%02x\n", reg);
+Index: linux-2.6-tip/drivers/misc/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/misc/Kconfig
++++ linux-2.6-tip/drivers/misc/Kconfig
+@@ -76,6 +76,34 @@ config IBM_ASM
+ 	  information on the specific driver level and support statement
+ 	  for your IBM server.
+ 
++config HWLAT_DETECTOR
++	tristate "Testing module to detect hardware-induced latencies"
++	depends on DEBUG_FS
++	default m
++	---help---
++	  A simple hardware latency detector. Use this module to detect
++	  large latencies introduced by the behavior of the underlying
++	  system firmware external to Linux. We do this using periodic
++	  use of stop_machine to grab all available CPUs and measure
++	  for unexplainable gaps in the CPU timestamp counter(s). By
++	  default, the module is not enabled until the "enable" file
++	  within the "hwlat_detector" debugfs directory is toggled.
++
++	  This module is often used to detect SMI (System Management
++	  Interrupts) on x86 systems, though is not x86 specific. To
++	  this end, we default to using a sample window of 1 second,
++	  during which we will sample for 0.5 seconds. If an SMI or
++	  similar event occurs during that time, it is recorded
++	  into an 8K samples global ring buffer until retreived.
++
++	  WARNING: This software should never be enabled (it can be built
++	  but should not be turned on after it is loaded) in a production
++	  environment where high latencies are a concern since the
++	  sampling mechanism actually introduces latencies for
++	  regular tasks while the CPU(s) are being held.
++
++	  If unsure, say N
++
+ config PHANTOM
+ 	tristate "Sensable PHANToM (PCI)"
+ 	depends on PCI
+@@ -162,7 +190,7 @@ config ENCLOSURE_SERVICES
+ config SGI_XP
+ 	tristate "Support communication between SGI SSIs"
+ 	depends on NET
+-	depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP
++	depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
+ 	select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+ 	select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
+ 	select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
+@@ -189,7 +217,7 @@ config HP_ILO
+ 
+ config SGI_GRU
+ 	tristate "SGI GRU driver"
+-	depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP
++	depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP
+ 	default n
+ 	select MMU_NOTIFIER
+ 	---help---
+@@ -218,6 +246,8 @@ config DELL_LAPTOP
+ 	depends on BACKLIGHT_CLASS_DEVICE
+ 	depends on RFKILL
+ 	depends on POWER_SUPPLY
++	# broken build with: config-Thu_Jan_15_01_30_52_CET_2009.bad
++	depends on 0
+ 	default n
+ 	---help---
+ 	This driver adds support for rfkill and backlight control to Dell
+Index: linux-2.6-tip/drivers/misc/c2port/core.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/misc/c2port/core.c
++++ linux-2.6-tip/drivers/misc/c2port/core.c
+@@ -15,6 +15,7 @@
+ #include <linux/errno.h>
+ #include <linux/err.h>
+ #include <linux/kernel.h>
++#include <linux/kmemcheck.h>
+ #include <linux/ctype.h>
+ #include <linux/delay.h>
+ #include <linux/idr.h>
+@@ -893,6 +894,7 @@ struct c2port_device *c2port_device_regi
+ 	c2dev = kmalloc(sizeof(struct c2port_device), GFP_KERNEL);
+ 	if (unlikely(!c2dev))
+ 		return ERR_PTR(-ENOMEM);
++	kmemcheck_annotate_bitfield(c2dev->flags);
+ 
+ 	ret = idr_pre_get(&c2port_idr, GFP_KERNEL);
+ 	if (!ret) {
+Index: linux-2.6-tip/drivers/misc/ics932s401.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/misc/ics932s401.c
++++ linux-2.6-tip/drivers/misc/ics932s401.c
+@@ -374,7 +374,7 @@ static ssize_t show_value(struct device 
+ 			  struct device_attribute *devattr,
+ 			  char *buf)
+ {
+-	int x;
++	int x = 0;
+ 
+ 	if (devattr == &dev_attr_usb_clock)
+ 		x = 48000;
+@@ -392,7 +392,7 @@ static ssize_t show_spread(struct device
+ {
+ 	struct ics932s401_data *data = ics932s401_update_device(dev);
+ 	int reg;
+-	unsigned long val;
++	unsigned long val = 0;
+ 
+ 	if (!(data->regs[ICS932S401_REG_CFG2] & ICS932S401_CFG1_SPREAD))
+ 		return sprintf(buf, "0%%\n");
+Index: linux-2.6-tip/drivers/misc/sgi-gru/grufault.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/misc/sgi-gru/grufault.c
++++ linux-2.6-tip/drivers/misc/sgi-gru/grufault.c
+@@ -282,8 +282,8 @@ static int gru_try_dropin(struct gru_thr
+ {
+ 	struct mm_struct *mm = gts->ts_mm;
+ 	struct vm_area_struct *vma;
+-	int pageshift, asid, write, ret;
+-	unsigned long paddr, gpa, vaddr;
++	int uninitialized_var(pageshift), asid, write, ret;
++	unsigned long uninitialized_var(paddr), gpa, vaddr;
+ 
+ 	/*
+ 	 * NOTE: The GRU contains magic hardware that eliminates races between
+Index: linux-2.6-tip/drivers/misc/sgi-gru/grufile.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/misc/sgi-gru/grufile.c
++++ linux-2.6-tip/drivers/misc/sgi-gru/grufile.c
+@@ -36,23 +36,11 @@
+ #include <linux/interrupt.h>
+ #include <linux/proc_fs.h>
+ #include <linux/uaccess.h>
++#include <asm/uv/uv.h>
+ #include "gru.h"
+ #include "grulib.h"
+ #include "grutables.h"
+ 
+-#if defined CONFIG_X86_64
+-#include <asm/genapic.h>
+-#include <asm/irq.h>
+-#define IS_UV()		is_uv_system()
+-#elif defined CONFIG_IA64
+-#include <asm/system.h>
+-#include <asm/sn/simulator.h>
+-/* temp support for running on hardware simulator */
+-#define IS_UV()		IS_MEDUSA() || ia64_platform_is("uv")
+-#else
+-#define IS_UV()		0
+-#endif
+-
+ #include <asm/uv/uv_hub.h>
+ #include <asm/uv/uv_mmrs.h>
+ 
+@@ -381,7 +369,7 @@ static int __init gru_init(void)
+ 	char id[10];
+ 	void *gru_start_vaddr;
+ 
+-	if (!IS_UV())
++	if (!is_uv_system())
+ 		return 0;
+ 
+ #if defined CONFIG_IA64
+@@ -451,7 +439,7 @@ static void __exit gru_exit(void)
+ 	int order = get_order(sizeof(struct gru_state) *
+ 			      GRU_CHIPLETS_PER_BLADE);
+ 
+-	if (!IS_UV())
++	if (!is_uv_system())
+ 		return;
+ 
+ 	for (i = 0; i < GRU_CHIPLETS_PER_BLADE; i++)
+Index: linux-2.6-tip/drivers/misc/sgi-xp/xp.h
+===================================================================
+--- linux-2.6-tip.orig/drivers/misc/sgi-xp/xp.h
++++ linux-2.6-tip/drivers/misc/sgi-xp/xp.h
+@@ -15,19 +15,19 @@
+ 
+ #include <linux/mutex.h>
+ 
+-#ifdef CONFIG_IA64
++#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV
++#include <asm/uv/uv.h>
++#define is_uv()		is_uv_system()
++#endif
++
++#ifndef is_uv
++#define is_uv()		0
++#endif
++
++#if defined CONFIG_IA64
+ #include <asm/system.h>
+ #include <asm/sn/arch.h>	/* defines is_shub1() and is_shub2() */
+ #define is_shub()	ia64_platform_is("sn2")
+-#ifdef CONFIG_IA64_SGI_UV
+-#define is_uv()		ia64_platform_is("uv")
+-#else
+-#define is_uv()		0
+-#endif
+-#endif
+-#ifdef CONFIG_X86_64
+-#include <asm/genapic.h>
+-#define is_uv()		is_uv_system()
+ #endif
+ 
+ #ifndef is_shub1
+@@ -42,10 +42,6 @@
+ #define is_shub()	0
+ #endif
+ 
+-#ifndef is_uv
+-#define is_uv()		0
+-#endif
+-
+ #ifdef USE_DBUG_ON
+ #define DBUG_ON(condition)	BUG_ON(condition)
+ #else
+Index: linux-2.6-tip/drivers/misc/sgi-xp/xpc_main.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/misc/sgi-xp/xpc_main.c
++++ linux-2.6-tip/drivers/misc/sgi-xp/xpc_main.c
+@@ -318,7 +318,7 @@ xpc_hb_checker(void *ignore)
+ 
+ 	/* this thread was marked active by xpc_hb_init() */
+ 
+-	set_cpus_allowed_ptr(current, &cpumask_of_cpu(XPC_HB_CHECK_CPU));
++	set_cpus_allowed_ptr(current, cpumask_of(XPC_HB_CHECK_CPU));
+ 
+ 	/* set our heartbeating to other partitions into motion */
+ 	xpc_hb_check_timeout = jiffies + (xpc_hb_check_interval * HZ);
+Index: linux-2.6-tip/drivers/mtd/devices/mtd_dataflash.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/mtd/devices/mtd_dataflash.c
++++ linux-2.6-tip/drivers/mtd/devices/mtd_dataflash.c
+@@ -679,7 +679,7 @@ add_dataflash_otp(struct spi_device *spi
+ 	dev_set_drvdata(&spi->dev, priv);
+ 
+ 	if (mtd_has_partitions()) {
+-		struct mtd_partition	*parts;
++		struct mtd_partition	*uninitialized_var(parts);
+ 		int			nr_parts = 0;
+ 
+ #ifdef CONFIG_MTD_CMDLINE_PARTS
+Index: linux-2.6-tip/drivers/mtd/devices/phram.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/mtd/devices/phram.c
++++ linux-2.6-tip/drivers/mtd/devices/phram.c
+@@ -235,7 +235,7 @@ static int phram_setup(const char *val, 
+ {
+ 	char buf[64+12+12], *str = buf;
+ 	char *token[3];
+-	char *name;
++	char *uninitialized_var(name);
+ 	uint32_t start;
+ 	uint32_t len;
+ 	int i, ret;
+Index: linux-2.6-tip/drivers/mtd/nand/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/mtd/nand/Kconfig
++++ linux-2.6-tip/drivers/mtd/nand/Kconfig
+@@ -273,7 +273,7 @@ config MTD_NAND_CAFE
+ 
+ config MTD_NAND_CS553X
+ 	tristate "NAND support for CS5535/CS5536 (AMD Geode companion chip)"
+-	depends on X86_32 && (X86_PC || X86_GENERICARCH)
++	depends on X86_32
+ 	help
+ 	  The CS553x companion chips for the AMD Geode processor
+ 	  include NAND flash controllers with built-in hardware ECC
+Index: linux-2.6-tip/drivers/net/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/Kconfig
++++ linux-2.6-tip/drivers/net/Kconfig
+@@ -776,6 +776,8 @@ config NET_VENDOR_SMC
+ config WD80x3
+ 	tristate "WD80*3 support"
+ 	depends on NET_VENDOR_SMC && ISA
++	# broken build
++	depends on 0
+ 	select CRC32
+ 	help
+ 	  If you have a network (Ethernet) card of this type, say Y and read
+@@ -1162,6 +1164,8 @@ config EEXPRESS_PRO
+ config HPLAN_PLUS
+ 	tristate "HP PCLAN+ (27247B and 27252A) support"
+ 	depends on NET_ISA
++	# broken build with config-Mon_Jul_21_20_21_08_CEST_2008.bad
++	depends on 0
+ 	select CRC32
+ 	help
+ 	  If you have a network (Ethernet) card of this type, say Y and read
+@@ -2559,6 +2563,8 @@ config MYRI10GE_DCA
+ 
+ config NETXEN_NIC
+ 	tristate "NetXen Multi port (1/10) Gigabit Ethernet NIC"
++	# build breakage
++	depends on 0
+ 	depends on PCI
+ 	help
+ 	  This enables the support for NetXen's Gigabit Ethernet card.
+Index: linux-2.6-tip/drivers/net/Makefile
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/Makefile
++++ linux-2.6-tip/drivers/net/Makefile
+@@ -111,7 +111,7 @@ ifeq ($(CONFIG_FEC_MPC52xx_MDIO),y)
+ 	obj-$(CONFIG_FEC_MPC52xx) += fec_mpc52xx_phy.o
+ endif
+ obj-$(CONFIG_68360_ENET) += 68360enet.o
+-obj-$(CONFIG_WD80x3) += wd.o 8390.o
++obj-$(CONFIG_WD80x3) += wd.o 8390p.o
+ obj-$(CONFIG_EL2) += 3c503.o 8390p.o
+ obj-$(CONFIG_NE2000) += ne.o 8390p.o
+ obj-$(CONFIG_NE2_MCA) += ne2.o 8390p.o
+Index: linux-2.6-tip/drivers/net/e1000/e1000_main.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/e1000/e1000_main.c
++++ linux-2.6-tip/drivers/net/e1000/e1000_main.c
+@@ -2056,14 +2056,10 @@ void e1000_free_all_tx_resources(struct 
+ static void e1000_unmap_and_free_tx_resource(struct e1000_adapter *adapter,
+ 					     struct e1000_buffer *buffer_info)
+ {
+-	if (buffer_info->dma) {
+-		pci_unmap_page(adapter->pdev,
+-				buffer_info->dma,
+-				buffer_info->length,
+-				PCI_DMA_TODEVICE);
+-		buffer_info->dma = 0;
+-	}
++	buffer_info->dma = 0;
+ 	if (buffer_info->skb) {
++		skb_dma_unmap(&adapter->pdev->dev, buffer_info->skb,
++		              DMA_TO_DEVICE);
+ 		dev_kfree_skb_any(buffer_info->skb);
+ 		buffer_info->skb = NULL;
+ 	}
+@@ -2914,16 +2910,24 @@ static int e1000_tx_map(struct e1000_ada
+ 			unsigned int mss)
+ {
+ 	struct e1000_hw *hw = &adapter->hw;
+-	struct e1000_buffer *buffer_info;
+-	unsigned int len = skb->len;
+-	unsigned int offset = 0, size, count = 0, i;
++	unsigned int len = skb_headlen(skb);
++	unsigned int offset, size, count = 0, i;
+ 	unsigned int f;
+-	len -= skb->data_len;
++	dma_addr_t map;
+ 
+ 	i = tx_ring->next_to_use;
+ 
++	if (skb_dma_map(&adapter->pdev->dev, skb, DMA_TO_DEVICE)) {
++		dev_err(&adapter->pdev->dev, "TX DMA map failed\n");
++		dev_kfree_skb(skb);
++		return -2;
++	}
++
++	map = skb_shinfo(skb)->dma_maps[0];
++	offset = 0;
++
+ 	while (len) {
+-		buffer_info = &tx_ring->buffer_info[i];
++		struct e1000_buffer *buffer_info = &tx_ring->buffer_info[i];
+ 		size = min(len, max_per_txd);
+ 		/* Workaround for Controller erratum --
+ 		 * descriptor for non-tso packet in a linear SKB that follows a
+@@ -2956,11 +2960,7 @@ static int e1000_tx_map(struct e1000_ada
+ 			size -= 4;
+ 
+ 		buffer_info->length = size;
+-		buffer_info->dma =
+-			pci_map_single(adapter->pdev,
+-				skb->data + offset,
+-				size,
+-				PCI_DMA_TODEVICE);
++		buffer_info->dma = map + offset;
+ 		buffer_info->time_stamp = jiffies;
+ 		buffer_info->next_to_watch = i;
+ 
+@@ -2975,9 +2975,11 @@ static int e1000_tx_map(struct e1000_ada
+ 
+ 		frag = &skb_shinfo(skb)->frags[f];
+ 		len = frag->size;
+-		offset = frag->page_offset;
++		map = skb_shinfo(skb)->dma_maps[f + 1];
++		offset = 0;
+ 
+ 		while (len) {
++			struct e1000_buffer *buffer_info;
+ 			buffer_info = &tx_ring->buffer_info[i];
+ 			size = min(len, max_per_txd);
+ 			/* Workaround for premature desc write-backs
+@@ -2993,12 +2995,7 @@ static int e1000_tx_map(struct e1000_ada
+ 				size -= 4;
+ 
+ 			buffer_info->length = size;
+-			buffer_info->dma =
+-				pci_map_page(adapter->pdev,
+-					frag->page,
+-					offset,
+-					size,
+-					PCI_DMA_TODEVICE);
++			buffer_info->dma = map + offset;
+ 			buffer_info->time_stamp = jiffies;
+ 			buffer_info->next_to_watch = i;
+ 
+@@ -3012,6 +3009,7 @@ static int e1000_tx_map(struct e1000_ada
+ 	i = (i == 0) ? tx_ring->count - 1 : i - 1;
+ 	tx_ring->buffer_info[i].skb = skb;
+ 	tx_ring->buffer_info[first].next_to_watch = i;
++	smp_wmb();
+ 
+ 	return count;
+ }
+@@ -3290,9 +3288,7 @@ static int e1000_xmit_frame(struct sk_bu
+ 	    (hw->mac_type == e1000_82573))
+ 		e1000_transfer_dhcp_info(adapter, skb);
+ 
+-	if (!spin_trylock_irqsave(&tx_ring->tx_lock, flags))
+-		/* Collision - tell upper layer to requeue */
+-		return NETDEV_TX_LOCKED;
++	spin_lock_irqsave(&tx_ring->tx_lock, flags);
+ 
+ 	/* need: count + 2 desc gap to keep tail from touching
+ 	 * head, otherwise try next time */
+@@ -3869,6 +3865,11 @@ static bool e1000_clean_tx_irq(struct e1
+ 		/* Detect a transmit hang in hardware, this serializes the
+ 		 * check with the clearing of time_stamp and movement of i */
+ 		adapter->detect_tx_hung = false;
++		/*
++		 * read barrier to make sure that the ->dma member and time
++		 * stamp are updated fully
++		 */
++		smp_rmb();
+ 		if (tx_ring->buffer_info[eop].dma &&
+ 		    time_after(jiffies, tx_ring->buffer_info[eop].time_stamp +
+ 		               (adapter->tx_timeout_factor * HZ))
+Index: linux-2.6-tip/drivers/net/e1000e/netdev.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/e1000e/netdev.c
++++ linux-2.6-tip/drivers/net/e1000e/netdev.c
+@@ -565,12 +565,10 @@ next_desc:
+ static void e1000_put_txbuf(struct e1000_adapter *adapter,
+ 			     struct e1000_buffer *buffer_info)
+ {
+-	if (buffer_info->dma) {
+-		pci_unmap_page(adapter->pdev, buffer_info->dma,
+-			       buffer_info->length, PCI_DMA_TODEVICE);
+-		buffer_info->dma = 0;
+-	}
++	buffer_info->dma = 0;
+ 	if (buffer_info->skb) {
++		skb_dma_unmap(&adapter->pdev->dev, buffer_info->skb,
++		              DMA_TO_DEVICE);
+ 		dev_kfree_skb_any(buffer_info->skb);
+ 		buffer_info->skb = NULL;
+ 	}
+@@ -683,6 +681,11 @@ static bool e1000_clean_tx_irq(struct e1
+ 		 * check with the clearing of time_stamp and movement of i
+ 		 */
+ 		adapter->detect_tx_hung = 0;
++		/*
++		 * read barrier to make sure that the ->dma member and time
++		 * stamp are updated fully
++		 */
++		smp_rmb();
+ 		if (tx_ring->buffer_info[eop].dma &&
+ 		    time_after(jiffies, tx_ring->buffer_info[eop].time_stamp
+ 			       + (adapter->tx_timeout_factor * HZ))
+@@ -3831,15 +3834,25 @@ static int e1000_tx_map(struct e1000_ada
+ 			unsigned int mss)
+ {
+ 	struct e1000_ring *tx_ring = adapter->tx_ring;
+-	struct e1000_buffer *buffer_info;
+-	unsigned int len = skb->len - skb->data_len;
+-	unsigned int offset = 0, size, count = 0, i;
++	unsigned int len = skb_headlen(skb);
++	unsigned int offset, size, count = 0, i;
+ 	unsigned int f;
++	dma_addr_t map;
+ 
+ 	i = tx_ring->next_to_use;
+ 
++	if (skb_dma_map(&adapter->pdev->dev, skb, DMA_TO_DEVICE)) {
++		dev_err(&adapter->pdev->dev, "TX DMA map failed\n");
++		adapter->tx_dma_failed++;
++		dev_kfree_skb(skb);
++		return -2;
++	}
++
++	map = skb_shinfo(skb)->dma_maps[0];
++	offset = 0;
++
+ 	while (len) {
+-		buffer_info = &tx_ring->buffer_info[i];
++		struct e1000_buffer *buffer_info = &tx_ring->buffer_info[i];
+ 		size = min(len, max_per_txd);
+ 
+ 		/* Workaround for premature desc write-backs
+@@ -3850,16 +3863,7 @@ static int e1000_tx_map(struct e1000_ada
+ 		buffer_info->length = size;
+ 		/* set time_stamp *before* dma to help avoid a possible race */
+ 		buffer_info->time_stamp = jiffies;
+-		buffer_info->dma =
+-			pci_map_single(adapter->pdev,
+-				skb->data + offset,
+-				size,
+-				PCI_DMA_TODEVICE);
+-		if (pci_dma_mapping_error(adapter->pdev, buffer_info->dma)) {
+-			dev_err(&adapter->pdev->dev, "TX DMA map failed\n");
+-			adapter->tx_dma_failed++;
+-			return -1;
+-		}
++		buffer_info->dma = map + offset;
+ 		buffer_info->next_to_watch = i;
+ 
+ 		len -= size;
+@@ -3875,9 +3879,11 @@ static int e1000_tx_map(struct e1000_ada
+ 
+ 		frag = &skb_shinfo(skb)->frags[f];
+ 		len = frag->size;
+-		offset = frag->page_offset;
++		map = skb_shinfo(skb)->dma_maps[f + 1];
++		offset = 0;
+ 
+ 		while (len) {
++			struct e1000_buffer *buffer_info;
+ 			buffer_info = &tx_ring->buffer_info[i];
+ 			size = min(len, max_per_txd);
+ 			/* Workaround for premature desc write-backs
+@@ -3887,20 +3893,7 @@ static int e1000_tx_map(struct e1000_ada
+ 
+ 			buffer_info->length = size;
+ 			buffer_info->time_stamp = jiffies;
+-			buffer_info->dma =
+-				pci_map_page(adapter->pdev,
+-					frag->page,
+-					offset,
+-					size,
+-					PCI_DMA_TODEVICE);
+-			if (pci_dma_mapping_error(adapter->pdev,
+-						  buffer_info->dma)) {
+-				dev_err(&adapter->pdev->dev,
+-					"TX DMA page map failed\n");
+-				adapter->tx_dma_failed++;
+-				return -1;
+-			}
+-
++			buffer_info->dma = map + offset;
+ 			buffer_info->next_to_watch = i;
+ 
+ 			len -= size;
+@@ -3920,6 +3913,7 @@ static int e1000_tx_map(struct e1000_ada
+ 
+ 	tx_ring->buffer_info[i].skb = skb;
+ 	tx_ring->buffer_info[first].next_to_watch = i;
++	smp_wmb();
+ 
+ 	return count;
+ }
+@@ -4138,9 +4132,7 @@ static int e1000_xmit_frame(struct sk_bu
+ 	if (adapter->hw.mac.tx_pkt_filtering)
+ 		e1000_transfer_dhcp_info(adapter, skb);
+ 
+-	if (!spin_trylock_irqsave(&adapter->tx_queue_lock, irq_flags))
+-		/* Collision - tell upper layer to requeue */
+-		return NETDEV_TX_LOCKED;
++	spin_lock_irqsave(&adapter->tx_queue_lock, irq_flags);
+ 
+ 	/*
+ 	 * need: count + 2 desc gap to keep tail from touching
+Index: linux-2.6-tip/drivers/net/ne3210.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/ne3210.c
++++ linux-2.6-tip/drivers/net/ne3210.c
+@@ -150,7 +150,8 @@ static int __init ne3210_eisa_probe (str
+ 		if (phys_mem < virt_to_phys(high_memory)) {
+ 			printk(KERN_CRIT "ne3210.c: Card RAM overlaps with normal memory!!!\n");
+ 			printk(KERN_CRIT "ne3210.c: Use EISA SCU to set card memory below 1MB,\n");
+-			printk(KERN_CRIT "ne3210.c: or to an address above 0x%lx.\n", virt_to_phys(high_memory));
++			printk(KERN_CRIT "ne3210.c: or to an address above 0x%llx.\n",
++				(u64)virt_to_phys(high_memory));
+ 			printk(KERN_CRIT "ne3210.c: Driver NOT installed.\n");
+ 			retval = -EINVAL;
+ 			goto out3;
+Index: linux-2.6-tip/drivers/net/sfc/efx.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/sfc/efx.c
++++ linux-2.6-tip/drivers/net/sfc/efx.c
+@@ -850,20 +850,27 @@ static void efx_fini_io(struct efx_nic *
+  * interrupts across them. */
+ static int efx_wanted_rx_queues(void)
+ {
+-	cpumask_t core_mask;
++	cpumask_var_t core_mask;
+ 	int count;
+ 	int cpu;
+ 
+-	cpus_clear(core_mask);
++	if (!alloc_cpumask_var(&core_mask, GFP_KERNEL)) {
++		printk(KERN_WARNING
++		       "efx.c: allocation failure, irq balancing hobbled\n");
++		return 1;
++	}
++
++	cpumask_clear(core_mask);
+ 	count = 0;
+ 	for_each_online_cpu(cpu) {
+-		if (!cpu_isset(cpu, core_mask)) {
++		if (!cpumask_test_cpu(cpu, core_mask)) {
+ 			++count;
+-			cpus_or(core_mask, core_mask,
+-				topology_core_siblings(cpu));
++			cpumask_or(core_mask, core_mask,
++				   topology_core_cpumask(cpu));
+ 		}
+ 	}
+ 
++	free_cpumask_var(core_mask);
+ 	return count;
+ }
+ 
+Index: linux-2.6-tip/drivers/net/sfc/falcon.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/sfc/falcon.c
++++ linux-2.6-tip/drivers/net/sfc/falcon.c
+@@ -338,10 +338,10 @@ static int falcon_alloc_special_buffer(s
+ 	nic_data->next_buffer_table += buffer->entries;
+ 
+ 	EFX_LOG(efx, "allocating special buffers %d-%d at %llx+%x "
+-		"(virt %p phys %lx)\n", buffer->index,
++		"(virt %p phys %llx)\n", buffer->index,
+ 		buffer->index + buffer->entries - 1,
+-		(unsigned long long)buffer->dma_addr, len,
+-		buffer->addr, virt_to_phys(buffer->addr));
++		(u64)buffer->dma_addr, len,
++		buffer->addr, (u64)virt_to_phys(buffer->addr));
+ 
+ 	return 0;
+ }
+@@ -353,10 +353,10 @@ static void falcon_free_special_buffer(s
+ 		return;
+ 
+ 	EFX_LOG(efx, "deallocating special buffers %d-%d at %llx+%x "
+-		"(virt %p phys %lx)\n", buffer->index,
++		"(virt %p phys %llx)\n", buffer->index,
+ 		buffer->index + buffer->entries - 1,
+-		(unsigned long long)buffer->dma_addr, buffer->len,
+-		buffer->addr, virt_to_phys(buffer->addr));
++		(u64)buffer->dma_addr, buffer->len,
++		buffer->addr, (u64)virt_to_phys(buffer->addr));
+ 
+ 	pci_free_consistent(efx->pci_dev, buffer->len, buffer->addr,
+ 			    buffer->dma_addr);
+@@ -2343,10 +2343,10 @@ int falcon_probe_port(struct efx_nic *ef
+ 				 FALCON_MAC_STATS_SIZE);
+ 	if (rc)
+ 		return rc;
+-	EFX_LOG(efx, "stats buffer at %llx (virt %p phys %lx)\n",
+-		(unsigned long long)efx->stats_buffer.dma_addr,
++	EFX_LOG(efx, "stats buffer at %llx (virt %p phys %llx)\n",
++		(u64)efx->stats_buffer.dma_addr,
+ 		efx->stats_buffer.addr,
+-		virt_to_phys(efx->stats_buffer.addr));
++		(u64)virt_to_phys(efx->stats_buffer.addr));
+ 
+ 	return 0;
+ }
+@@ -2921,9 +2921,9 @@ int falcon_probe_nic(struct efx_nic *efx
+ 		goto fail4;
+ 	BUG_ON(efx->irq_status.dma_addr & 0x0f);
+ 
+-	EFX_LOG(efx, "INT_KER at %llx (virt %p phys %lx)\n",
+-		(unsigned long long)efx->irq_status.dma_addr,
+-		efx->irq_status.addr, virt_to_phys(efx->irq_status.addr));
++	EFX_LOG(efx, "INT_KER at %llx (virt %p phys %llx)\n",
++		(u64)efx->irq_status.dma_addr,
++		efx->irq_status.addr, (u64)virt_to_phys(efx->irq_status.addr));
+ 
+ 	falcon_probe_spi_devices(efx);
+ 
+Index: linux-2.6-tip/drivers/net/sky2.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/sky2.c
++++ linux-2.6-tip/drivers/net/sky2.c
+@@ -2748,7 +2748,7 @@ static u32 sky2_mhz(const struct sky2_hw
+ 		return 156;
+ 
+ 	default:
+-		BUG();
++		panic("sky2_mhz: unknown chip id!");
+ 	}
+ }
+ 
+Index: linux-2.6-tip/drivers/net/wimax/i2400m/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/wimax/i2400m/Kconfig
++++ linux-2.6-tip/drivers/net/wimax/i2400m/Kconfig
+@@ -13,6 +13,8 @@ comment "Enable MMC support to see WiMAX
+ config WIMAX_I2400M_USB
+ 	tristate "Intel Wireless WiMAX Connection 2400 over USB (including 5x50)"
+ 	depends on WIMAX && USB
++	# build failure: config-Thu_Jan__8_10_51_13_CET_2009.bad
++	depends on 0
+ 	select WIMAX_I2400M
+ 	help
+ 	  Select if you have a device based on the Intel WiMAX
+Index: linux-2.6-tip/drivers/net/wireless/arlan-main.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/wireless/arlan-main.c
++++ linux-2.6-tip/drivers/net/wireless/arlan-main.c
+@@ -1082,8 +1082,8 @@ static int __init arlan_probe_here(struc
+ 	if (arlan_check_fingerprint(memaddr))
+ 		return -ENODEV;
+ 
+-	printk(KERN_NOTICE "%s: Arlan found at %x, \n ", dev->name, 
+-	       (int) virt_to_phys((void*)memaddr));
++	printk(KERN_NOTICE "%s: Arlan found at %llx, \n ", dev->name, 
++	       (u64) virt_to_phys((void*)memaddr));
+ 
+ 	ap->card = (void *) memaddr;
+ 	dev->mem_start = memaddr;
+Index: linux-2.6-tip/drivers/net/wireless/b43/b43.h
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/wireless/b43/b43.h
++++ linux-2.6-tip/drivers/net/wireless/b43/b43.h
+@@ -852,7 +852,8 @@ void b43warn(struct b43_wl *wl, const ch
+ void b43dbg(struct b43_wl *wl, const char *fmt, ...)
+     __attribute__ ((format(printf, 2, 3)));
+ #else /* DEBUG */
+-# define b43dbg(wl, fmt...) do { /* nothing */ } while (0)
++static inline void __attribute__ ((format(printf, 2, 3)))
++b43dbg(struct b43_wl *wl, const char *fmt, ...) { }
+ #endif /* DEBUG */
+ 
+ /* A WARN_ON variant that vanishes when b43 debugging is disabled.
+Index: linux-2.6-tip/drivers/net/wireless/ray_cs.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/wireless/ray_cs.c
++++ linux-2.6-tip/drivers/net/wireless/ray_cs.c
+@@ -294,7 +294,9 @@ static char hop_pattern_length[] = { 1,
+ 	     JAPAN_TEST_HOP_MOD
+ };
+ 
++#ifdef CONFIG_PROC_FS
+ static char rcsid[] = "Raylink/WebGear wireless LAN - Corey <Thomas corey@world.std.com>";
++#endif
+ 
+ /*=============================================================================
+     ray_attach() creates an "instance" of the driver, allocating
+Index: linux-2.6-tip/drivers/net/wireless/zd1201.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/wireless/zd1201.c
++++ linux-2.6-tip/drivers/net/wireless/zd1201.c
+@@ -593,6 +593,9 @@ static inline int zd1201_getconfig16(str
+ 	int err;
+ 	__le16 zdval;
+ 
++	/* initialize */
++	*val = 0;
++
+ 	err = zd1201_getconfig(zd, rid, &zdval, sizeof(__le16));
+ 	if (err)
+ 		return err;
+Index: linux-2.6-tip/drivers/oprofile/buffer_sync.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/oprofile/buffer_sync.c
++++ linux-2.6-tip/drivers/oprofile/buffer_sync.c
+@@ -38,7 +38,7 @@
+ 
+ static LIST_HEAD(dying_tasks);
+ static LIST_HEAD(dead_tasks);
+-static cpumask_t marked_cpus = CPU_MASK_NONE;
++static cpumask_var_t marked_cpus;
+ static DEFINE_SPINLOCK(task_mortuary);
+ static void process_task_mortuary(void);
+ 
+@@ -154,6 +154,10 @@ int sync_start(void)
+ {
+ 	int err;
+ 
++	if (!alloc_cpumask_var(&marked_cpus, GFP_KERNEL))
++		return -ENOMEM;
++	cpumask_clear(marked_cpus);
++
+ 	start_cpu_work();
+ 
+ 	err = task_handoff_register(&task_free_nb);
+@@ -179,6 +183,7 @@ out2:
+ 	task_handoff_unregister(&task_free_nb);
+ out1:
+ 	end_sync();
++	free_cpumask_var(marked_cpus);
+ 	goto out;
+ }
+ 
+@@ -190,6 +195,7 @@ void sync_stop(void)
+ 	profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
+ 	task_handoff_unregister(&task_free_nb);
+ 	end_sync();
++	free_cpumask_var(marked_cpus);
+ }
+ 
+ 
+@@ -456,10 +462,10 @@ static void mark_done(int cpu)
+ {
+ 	int i;
+ 
+-	cpu_set(cpu, marked_cpus);
++	cpumask_set_cpu(cpu, marked_cpus);
+ 
+ 	for_each_online_cpu(i) {
+-		if (!cpu_isset(i, marked_cpus))
++		if (!cpumask_test_cpu(i, marked_cpus))
+ 			return;
+ 	}
+ 
+@@ -468,7 +474,7 @@ static void mark_done(int cpu)
+ 	 */
+ 	process_task_mortuary();
+ 
+-	cpus_clear(marked_cpus);
++	cpumask_clear(marked_cpus);
+ }
+ 
+ 
+Index: linux-2.6-tip/drivers/oprofile/cpu_buffer.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/oprofile/cpu_buffer.c
++++ linux-2.6-tip/drivers/oprofile/cpu_buffer.c
+@@ -161,7 +161,7 @@ struct op_sample
+ {
+ 	entry->event = ring_buffer_lock_reserve
+ 		(op_ring_buffer_write, sizeof(struct op_sample) +
+-		 size * sizeof(entry->sample->data[0]), &entry->irq_flags);
++		 size * sizeof(entry->sample->data[0]));
+ 	if (entry->event)
+ 		entry->sample = ring_buffer_event_data(entry->event);
+ 	else
+@@ -178,8 +178,7 @@ struct op_sample
+ 
+ int op_cpu_buffer_write_commit(struct op_entry *entry)
+ {
+-	return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event,
+-					 entry->irq_flags);
++	return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event);
+ }
+ 
+ struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
+Index: linux-2.6-tip/drivers/pci/dmar.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/pci/dmar.c
++++ linux-2.6-tip/drivers/pci/dmar.c
+@@ -31,6 +31,8 @@
+ #include <linux/iova.h>
+ #include <linux/intel-iommu.h>
+ #include <linux/timer.h>
++#include <linux/irq.h>
++#include <linux/interrupt.h>
+ 
+ #undef PREFIX
+ #define PREFIX "DMAR:"
+@@ -42,6 +44,7 @@
+ LIST_HEAD(dmar_drhd_units);
+ 
+ static struct acpi_table_header * __initdata dmar_tbl;
++static acpi_size dmar_tbl_size;
+ 
+ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
+ {
+@@ -297,8 +300,9 @@ static int __init dmar_table_detect(void
+ 	acpi_status status = AE_OK;
+ 
+ 	/* if we could find DMAR table, then there are DMAR devices */
+-	status = acpi_get_table(ACPI_SIG_DMAR, 0,
+-				(struct acpi_table_header **)&dmar_tbl);
++	status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
++				(struct acpi_table_header **)&dmar_tbl,
++				&dmar_tbl_size);
+ 
+ 	if (ACPI_SUCCESS(status) && !dmar_tbl) {
+ 		printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
+@@ -498,6 +502,7 @@ void __init detect_intel_iommu(void)
+ 			iommu_detected = 1;
+ #endif
+ 	}
++	early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
+ 	dmar_tbl = NULL;
+ }
+ 
+@@ -515,6 +520,7 @@ int alloc_iommu(struct dmar_drhd_unit *d
+ 		return -ENOMEM;
+ 
+ 	iommu->seq_id = iommu_allocated++;
++	sprintf (iommu->name, "dmar%d", iommu->seq_id);
+ 
+ 	iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
+ 	if (!iommu->reg) {
+@@ -757,6 +763,42 @@ int qi_flush_iotlb(struct intel_iommu *i
+ }
+ 
+ /*
++ * Disable Queued Invalidation interface.
++ */
++void dmar_disable_qi(struct intel_iommu *iommu)
++{
++	unsigned long flags;
++	u32 sts;
++	cycles_t start_time = get_cycles();
++
++	if (!ecap_qis(iommu->ecap))
++		return;
++
++	spin_lock_irqsave(&iommu->register_lock, flags);
++
++	sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
++	if (!(sts & DMA_GSTS_QIES))
++		goto end;
++
++	/*
++	 * Give a chance to HW to complete the pending invalidation requests.
++	 */
++	while ((readl(iommu->reg + DMAR_IQT_REG) !=
++		readl(iommu->reg + DMAR_IQH_REG)) &&
++		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
++		cpu_relax();
++
++	iommu->gcmd &= ~DMA_GCMD_QIE;
++
++	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
++
++	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
++		      !(sts & DMA_GSTS_QIES), sts);
++end:
++	spin_unlock_irqrestore(&iommu->register_lock, flags);
++}
++
++/*
+  * Enable Queued Invalidation interface. This is a must to support
+  * interrupt-remapping. Also used by DMA-remapping, which replaces
+  * register based IOTLB invalidation.
+@@ -776,20 +818,20 @@ int dmar_enable_qi(struct intel_iommu *i
+ 	if (iommu->qi)
+ 		return 0;
+ 
+-	iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
++	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
+ 	if (!iommu->qi)
+ 		return -ENOMEM;
+ 
+ 	qi = iommu->qi;
+ 
+-	qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
++	qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
+ 	if (!qi->desc) {
+ 		kfree(qi);
+ 		iommu->qi = 0;
+ 		return -ENOMEM;
+ 	}
+ 
+-	qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
++	qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
+ 	if (!qi->desc_status) {
+ 		free_page((unsigned long) qi->desc);
+ 		kfree(qi);
+@@ -818,3 +860,254 @@ int dmar_enable_qi(struct intel_iommu *i
+ 
+ 	return 0;
+ }
++
++/* iommu interrupt handling. Most stuff are MSI-like. */
++
++enum faulttype {
++	DMA_REMAP,
++	INTR_REMAP,
++	UNKNOWN,
++};
++
++static const char *dma_remap_fault_reasons[] =
++{
++	"Software",
++	"Present bit in root entry is clear",
++	"Present bit in context entry is clear",
++	"Invalid context entry",
++	"Access beyond MGAW",
++	"PTE Write access is not set",
++	"PTE Read access is not set",
++	"Next page table ptr is invalid",
++	"Root table address invalid",
++	"Context table ptr is invalid",
++	"non-zero reserved fields in RTP",
++	"non-zero reserved fields in CTP",
++	"non-zero reserved fields in PTE",
++};
++
++static const char *intr_remap_fault_reasons[] =
++{
++	"Detected reserved fields in the decoded interrupt-remapped request",
++	"Interrupt index exceeded the interrupt-remapping table size",
++	"Present field in the IRTE entry is clear",
++	"Error accessing interrupt-remapping table pointed by IRTA_REG",
++	"Detected reserved fields in the IRTE entry",
++	"Blocked a compatibility format interrupt request",
++	"Blocked an interrupt request due to source-id verification failure",
++};
++
++#define MAX_FAULT_REASON_IDX 	(ARRAY_SIZE(fault_reason_strings) - 1)
++
++const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
++{
++	if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
++				     ARRAY_SIZE(intr_remap_fault_reasons))) {
++		*fault_type = INTR_REMAP;
++		return intr_remap_fault_reasons[fault_reason - 0x20];
++	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
++		*fault_type = DMA_REMAP;
++		return dma_remap_fault_reasons[fault_reason];
++	} else {
++		*fault_type = UNKNOWN;
++		return "Unknown";
++	}
++}
++
++void dmar_msi_unmask(unsigned int irq)
++{
++	struct intel_iommu *iommu = get_irq_data(irq);
++	unsigned long flag;
++
++	/* unmask it */
++	spin_lock_irqsave(&iommu->register_lock, flag);
++	writel(0, iommu->reg + DMAR_FECTL_REG);
++	/* Read a reg to force flush the post write */
++	readl(iommu->reg + DMAR_FECTL_REG);
++	spin_unlock_irqrestore(&iommu->register_lock, flag);
++}
++
++void dmar_msi_mask(unsigned int irq)
++{
++	unsigned long flag;
++	struct intel_iommu *iommu = get_irq_data(irq);
++
++	/* mask it */
++	spin_lock_irqsave(&iommu->register_lock, flag);
++	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
++	/* Read a reg to force flush the post write */
++	readl(iommu->reg + DMAR_FECTL_REG);
++	spin_unlock_irqrestore(&iommu->register_lock, flag);
++}
++
++void dmar_msi_write(int irq, struct msi_msg *msg)
++{
++	struct intel_iommu *iommu = get_irq_data(irq);
++	unsigned long flag;
++
++	spin_lock_irqsave(&iommu->register_lock, flag);
++	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
++	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
++	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
++	spin_unlock_irqrestore(&iommu->register_lock, flag);
++}
++
++void dmar_msi_read(int irq, struct msi_msg *msg)
++{
++	struct intel_iommu *iommu = get_irq_data(irq);
++	unsigned long flag;
++
++	spin_lock_irqsave(&iommu->register_lock, flag);
++	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
++	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
++	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
++	spin_unlock_irqrestore(&iommu->register_lock, flag);
++}
++
++static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
++		u8 fault_reason, u16 source_id, unsigned long long addr)
++{
++	const char *reason;
++	int fault_type;
++
++	reason = dmar_get_fault_reason(fault_reason, &fault_type);
++
++	if (fault_type == INTR_REMAP)
++		printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
++		       "fault index %llx\n"
++			"INTR-REMAP:[fault reason %02d] %s\n",
++			(source_id >> 8), PCI_SLOT(source_id & 0xFF),
++			PCI_FUNC(source_id & 0xFF), addr >> 48,
++			fault_reason, reason);
++	else
++		printk(KERN_ERR
++		       "DMAR:[%s] Request device [%02x:%02x.%d] "
++		       "fault addr %llx \n"
++		       "DMAR:[fault reason %02d] %s\n",
++		       (type ? "DMA Read" : "DMA Write"),
++		       (source_id >> 8), PCI_SLOT(source_id & 0xFF),
++		       PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
++	return 0;
++}
++
++#define PRIMARY_FAULT_REG_LEN (16)
++irqreturn_t dmar_fault(int irq, void *dev_id)
++{
++	struct intel_iommu *iommu = dev_id;
++	int reg, fault_index;
++	u32 fault_status;
++	unsigned long flag;
++
++	spin_lock_irqsave(&iommu->register_lock, flag);
++	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
++	if (fault_status)
++		printk(KERN_ERR "DRHD: handling fault status reg %x\n",
++		       fault_status);
++
++	/* TBD: ignore advanced fault log currently */
++	if (!(fault_status & DMA_FSTS_PPF))
++		goto clear_rest;
++
++	fault_index = dma_fsts_fault_record_index(fault_status);
++	reg = cap_fault_reg_offset(iommu->cap);
++	while (1) {
++		u8 fault_reason;
++		u16 source_id;
++		u64 guest_addr;
++		int type;
++		u32 data;
++
++		/* highest 32 bits */
++		data = readl(iommu->reg + reg +
++				fault_index * PRIMARY_FAULT_REG_LEN + 12);
++		if (!(data & DMA_FRCD_F))
++			break;
++
++		fault_reason = dma_frcd_fault_reason(data);
++		type = dma_frcd_type(data);
++
++		data = readl(iommu->reg + reg +
++				fault_index * PRIMARY_FAULT_REG_LEN + 8);
++		source_id = dma_frcd_source_id(data);
++
++		guest_addr = dmar_readq(iommu->reg + reg +
++				fault_index * PRIMARY_FAULT_REG_LEN);
++		guest_addr = dma_frcd_page_addr(guest_addr);
++		/* clear the fault */
++		writel(DMA_FRCD_F, iommu->reg + reg +
++			fault_index * PRIMARY_FAULT_REG_LEN + 12);
++
++		spin_unlock_irqrestore(&iommu->register_lock, flag);
++
++		dmar_fault_do_one(iommu, type, fault_reason,
++				source_id, guest_addr);
++
++		fault_index++;
++		if (fault_index > cap_num_fault_regs(iommu->cap))
++			fault_index = 0;
++		spin_lock_irqsave(&iommu->register_lock, flag);
++	}
++clear_rest:
++	/* clear all the other faults */
++	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
++	writel(fault_status, iommu->reg + DMAR_FSTS_REG);
++
++	spin_unlock_irqrestore(&iommu->register_lock, flag);
++	return IRQ_HANDLED;
++}
++
++int dmar_set_interrupt(struct intel_iommu *iommu)
++{
++	int irq, ret;
++
++	/*
++	 * Check if the fault interrupt is already initialized.
++	 */
++	if (iommu->irq)
++		return 0;
++
++	irq = create_irq();
++	if (!irq) {
++		printk(KERN_ERR "IOMMU: no free vectors\n");
++		return -EINVAL;
++	}
++
++	set_irq_data(irq, iommu);
++	iommu->irq = irq;
++
++	ret = arch_setup_dmar_msi(irq);
++	if (ret) {
++		set_irq_data(irq, NULL);
++		iommu->irq = 0;
++		destroy_irq(irq);
++		return 0;
++	}
++
++	ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
++	if (ret)
++		printk(KERN_ERR "IOMMU: can't request irq\n");
++	return ret;
++}
++
++int __init enable_drhd_fault_handling(void)
++{
++	struct dmar_drhd_unit *drhd;
++
++	/*
++	 * Enable fault control interrupt.
++	 */
++	for_each_drhd_unit(drhd) {
++		int ret;
++		struct intel_iommu *iommu = drhd->iommu;
++		ret = dmar_set_interrupt(iommu);
++
++		if (ret) {
++			printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
++			       " interrupt, ret %d\n",
++			       (unsigned long long)drhd->reg_base_addr, ret);
++			return -1;
++		}
++	}
++
++	return 0;
++}
+Index: linux-2.6-tip/drivers/pci/hotplug/cpqphp.h
+===================================================================
+--- linux-2.6-tip.orig/drivers/pci/hotplug/cpqphp.h
++++ linux-2.6-tip/drivers/pci/hotplug/cpqphp.h
+@@ -449,7 +449,7 @@ extern u8 cpqhp_disk_irq;
+ 
+ /* inline functions */
+ 
+-static inline char *slot_name(struct slot *slot)
++static inline const char *slot_name(struct slot *slot)
+ {
+ 	return hotplug_slot_name(slot->hotplug_slot);
+ }
+Index: linux-2.6-tip/drivers/pci/hotplug/ibmphp_core.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/pci/hotplug/ibmphp_core.c
++++ linux-2.6-tip/drivers/pci/hotplug/ibmphp_core.c
+@@ -1419,3 +1419,4 @@ static void __exit ibmphp_exit(void)
+ }
+ 
+ module_init(ibmphp_init);
++module_exit(ibmphp_exit);
+Index: linux-2.6-tip/drivers/pci/intel-iommu.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/pci/intel-iommu.c
++++ linux-2.6-tip/drivers/pci/intel-iommu.c
+@@ -1010,194 +1010,6 @@ static int iommu_disable_translation(str
+ 	return 0;
+ }
+ 
+-/* iommu interrupt handling. Most stuff are MSI-like. */
+-
+-static const char *fault_reason_strings[] =
+-{
+-	"Software",
+-	"Present bit in root entry is clear",
+-	"Present bit in context entry is clear",
+-	"Invalid context entry",
+-	"Access beyond MGAW",
+-	"PTE Write access is not set",
+-	"PTE Read access is not set",
+-	"Next page table ptr is invalid",
+-	"Root table address invalid",
+-	"Context table ptr is invalid",
+-	"non-zero reserved fields in RTP",
+-	"non-zero reserved fields in CTP",
+-	"non-zero reserved fields in PTE",
+-};
+-#define MAX_FAULT_REASON_IDX 	(ARRAY_SIZE(fault_reason_strings) - 1)
+-
+-const char *dmar_get_fault_reason(u8 fault_reason)
+-{
+-	if (fault_reason > MAX_FAULT_REASON_IDX)
+-		return "Unknown";
+-	else
+-		return fault_reason_strings[fault_reason];
+-}
+-
+-void dmar_msi_unmask(unsigned int irq)
+-{
+-	struct intel_iommu *iommu = get_irq_data(irq);
+-	unsigned long flag;
+-
+-	/* unmask it */
+-	spin_lock_irqsave(&iommu->register_lock, flag);
+-	writel(0, iommu->reg + DMAR_FECTL_REG);
+-	/* Read a reg to force flush the post write */
+-	readl(iommu->reg + DMAR_FECTL_REG);
+-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+-}
+-
+-void dmar_msi_mask(unsigned int irq)
+-{
+-	unsigned long flag;
+-	struct intel_iommu *iommu = get_irq_data(irq);
+-
+-	/* mask it */
+-	spin_lock_irqsave(&iommu->register_lock, flag);
+-	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
+-	/* Read a reg to force flush the post write */
+-	readl(iommu->reg + DMAR_FECTL_REG);
+-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+-}
+-
+-void dmar_msi_write(int irq, struct msi_msg *msg)
+-{
+-	struct intel_iommu *iommu = get_irq_data(irq);
+-	unsigned long flag;
+-
+-	spin_lock_irqsave(&iommu->register_lock, flag);
+-	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
+-	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
+-	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
+-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+-}
+-
+-void dmar_msi_read(int irq, struct msi_msg *msg)
+-{
+-	struct intel_iommu *iommu = get_irq_data(irq);
+-	unsigned long flag;
+-
+-	spin_lock_irqsave(&iommu->register_lock, flag);
+-	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
+-	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
+-	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
+-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+-}
+-
+-static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
+-		u8 fault_reason, u16 source_id, unsigned long long addr)
+-{
+-	const char *reason;
+-
+-	reason = dmar_get_fault_reason(fault_reason);
+-
+-	printk(KERN_ERR
+-		"DMAR:[%s] Request device [%02x:%02x.%d] "
+-		"fault addr %llx \n"
+-		"DMAR:[fault reason %02d] %s\n",
+-		(type ? "DMA Read" : "DMA Write"),
+-		(source_id >> 8), PCI_SLOT(source_id & 0xFF),
+-		PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
+-	return 0;
+-}
+-
+-#define PRIMARY_FAULT_REG_LEN (16)
+-static irqreturn_t iommu_page_fault(int irq, void *dev_id)
+-{
+-	struct intel_iommu *iommu = dev_id;
+-	int reg, fault_index;
+-	u32 fault_status;
+-	unsigned long flag;
+-
+-	spin_lock_irqsave(&iommu->register_lock, flag);
+-	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+-
+-	/* TBD: ignore advanced fault log currently */
+-	if (!(fault_status & DMA_FSTS_PPF))
+-		goto clear_overflow;
+-
+-	fault_index = dma_fsts_fault_record_index(fault_status);
+-	reg = cap_fault_reg_offset(iommu->cap);
+-	while (1) {
+-		u8 fault_reason;
+-		u16 source_id;
+-		u64 guest_addr;
+-		int type;
+-		u32 data;
+-
+-		/* highest 32 bits */
+-		data = readl(iommu->reg + reg +
+-				fault_index * PRIMARY_FAULT_REG_LEN + 12);
+-		if (!(data & DMA_FRCD_F))
+-			break;
+-
+-		fault_reason = dma_frcd_fault_reason(data);
+-		type = dma_frcd_type(data);
+-
+-		data = readl(iommu->reg + reg +
+-				fault_index * PRIMARY_FAULT_REG_LEN + 8);
+-		source_id = dma_frcd_source_id(data);
+-
+-		guest_addr = dmar_readq(iommu->reg + reg +
+-				fault_index * PRIMARY_FAULT_REG_LEN);
+-		guest_addr = dma_frcd_page_addr(guest_addr);
+-		/* clear the fault */
+-		writel(DMA_FRCD_F, iommu->reg + reg +
+-			fault_index * PRIMARY_FAULT_REG_LEN + 12);
+-
+-		spin_unlock_irqrestore(&iommu->register_lock, flag);
+-
+-		iommu_page_fault_do_one(iommu, type, fault_reason,
+-				source_id, guest_addr);
+-
+-		fault_index++;
+-		if (fault_index > cap_num_fault_regs(iommu->cap))
+-			fault_index = 0;
+-		spin_lock_irqsave(&iommu->register_lock, flag);
+-	}
+-clear_overflow:
+-	/* clear primary fault overflow */
+-	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
+-	if (fault_status & DMA_FSTS_PFO)
+-		writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
+-
+-	spin_unlock_irqrestore(&iommu->register_lock, flag);
+-	return IRQ_HANDLED;
+-}
+-
+-int dmar_set_interrupt(struct intel_iommu *iommu)
+-{
+-	int irq, ret;
+-
+-	irq = create_irq();
+-	if (!irq) {
+-		printk(KERN_ERR "IOMMU: no free vectors\n");
+-		return -EINVAL;
+-	}
+-
+-	set_irq_data(irq, iommu);
+-	iommu->irq = irq;
+-
+-	ret = arch_setup_dmar_msi(irq);
+-	if (ret) {
+-		set_irq_data(irq, NULL);
+-		iommu->irq = 0;
+-		destroy_irq(irq);
+-		return 0;
+-	}
+-
+-	/* Force fault register is cleared */
+-	iommu_page_fault(irq, iommu);
+-
+-	ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
+-	if (ret)
+-		printk(KERN_ERR "IOMMU: can't request irq\n");
+-	return ret;
+-}
+ 
+ static int iommu_init_domains(struct intel_iommu *iommu)
+ {
+@@ -1993,7 +1805,7 @@ static int __init init_dmars(void)
+ 	struct dmar_rmrr_unit *rmrr;
+ 	struct pci_dev *pdev;
+ 	struct intel_iommu *iommu;
+-	int i, ret, unit = 0;
++	int i, ret;
+ 
+ 	/*
+ 	 * for each drhd
+@@ -2049,11 +1861,40 @@ static int __init init_dmars(void)
+ 		}
+ 	}
+ 
++	/*
++	 * Start from the sane iommu hardware state.
++	 */
++	for_each_drhd_unit(drhd) {
++		if (drhd->ignored)
++			continue;
++
++		iommu = drhd->iommu;
++
++		/*
++		 * If the queued invalidation is already initialized by us
++		 * (for example, while enabling interrupt-remapping) then
++		 * we got the things already rolling from a sane state.
++		 */
++		if (iommu->qi)
++			continue;
++
++		/*
++		 * Clear any previous faults.
++		 */
++		dmar_fault(-1, iommu);
++		/*
++		 * Disable queued invalidation if supported and already enabled
++		 * before OS handover.
++		 */
++		dmar_disable_qi(iommu);
++	}
++
+ 	for_each_drhd_unit(drhd) {
+ 		if (drhd->ignored)
+ 			continue;
+ 
+ 		iommu = drhd->iommu;
++
+ 		if (dmar_enable_qi(iommu)) {
+ 			/*
+ 			 * Queued Invalidate not enabled, use Register Based
+@@ -2115,7 +1956,6 @@ static int __init init_dmars(void)
+ 		if (drhd->ignored)
+ 			continue;
+ 		iommu = drhd->iommu;
+-		sprintf (iommu->name, "dmar%d", unit++);
+ 
+ 		iommu_flush_write_buffer(iommu);
+ 
+@@ -2290,11 +2130,13 @@ error:
+ 	return 0;
+ }
+ 
+-dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
+-			    size_t size, int dir)
++static dma_addr_t intel_map_page(struct device *dev, struct page *page,
++				 unsigned long offset, size_t size,
++				 enum dma_data_direction dir,
++				 struct dma_attrs *attrs)
+ {
+-	return __intel_map_single(hwdev, paddr, size, dir,
+-				  to_pci_dev(hwdev)->dma_mask);
++	return __intel_map_single(dev, page_to_phys(page) + offset, size,
++				  dir, to_pci_dev(dev)->dma_mask);
+ }
+ 
+ static void flush_unmaps(void)
+@@ -2358,8 +2200,9 @@ static void add_unmap(struct dmar_domain
+ 	spin_unlock_irqrestore(&async_umap_flush_lock, flags);
+ }
+ 
+-void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
+-			int dir)
++static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
++			     size_t size, enum dma_data_direction dir,
++			     struct dma_attrs *attrs)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(dev);
+ 	struct dmar_domain *domain;
+@@ -2403,8 +2246,14 @@ void intel_unmap_single(struct device *d
+ 	}
+ }
+ 
+-void *intel_alloc_coherent(struct device *hwdev, size_t size,
+-			   dma_addr_t *dma_handle, gfp_t flags)
++static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
++			       int dir)
++{
++	intel_unmap_page(dev, dev_addr, size, dir, NULL);
++}
++
++static void *intel_alloc_coherent(struct device *hwdev, size_t size,
++				  dma_addr_t *dma_handle, gfp_t flags)
+ {
+ 	void *vaddr;
+ 	int order;
+@@ -2427,8 +2276,8 @@ void *intel_alloc_coherent(struct device
+ 	return NULL;
+ }
+ 
+-void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
+-			 dma_addr_t dma_handle)
++static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
++				dma_addr_t dma_handle)
+ {
+ 	int order;
+ 
+@@ -2441,8 +2290,9 @@ void intel_free_coherent(struct device *
+ 
+ #define SG_ENT_VIRT_ADDRESS(sg)	(sg_virt((sg)))
+ 
+-void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
+-		    int nelems, int dir)
++static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
++			   int nelems, enum dma_data_direction dir,
++			   struct dma_attrs *attrs)
+ {
+ 	int i;
+ 	struct pci_dev *pdev = to_pci_dev(hwdev);
+@@ -2499,8 +2349,8 @@ static int intel_nontranslate_map_sg(str
+ 	return nelems;
+ }
+ 
+-int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
+-		 int dir)
++static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
++			enum dma_data_direction dir, struct dma_attrs *attrs)
+ {
+ 	void *addr;
+ 	int i;
+@@ -2580,13 +2430,19 @@ int intel_map_sg(struct device *hwdev, s
+ 	return nelems;
+ }
+ 
+-static struct dma_mapping_ops intel_dma_ops = {
++static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
++{
++	return !dma_addr;
++}
++
++struct dma_map_ops intel_dma_ops = {
+ 	.alloc_coherent = intel_alloc_coherent,
+ 	.free_coherent = intel_free_coherent,
+-	.map_single = intel_map_single,
+-	.unmap_single = intel_unmap_single,
+ 	.map_sg = intel_map_sg,
+ 	.unmap_sg = intel_unmap_sg,
++	.map_page = intel_map_page,
++	.unmap_page = intel_unmap_page,
++	.mapping_error = intel_mapping_error,
+ };
+ 
+ static inline int iommu_domain_cache_init(void)
+Index: linux-2.6-tip/drivers/pci/intr_remapping.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/pci/intr_remapping.c
++++ linux-2.6-tip/drivers/pci/intr_remapping.c
+@@ -6,6 +6,7 @@
+ #include <linux/irq.h>
+ #include <asm/io_apic.h>
+ #include <asm/smp.h>
++#include <asm/cpu.h>
+ #include <linux/intel-iommu.h>
+ #include "intr_remapping.h"
+ 
+@@ -20,7 +21,7 @@ struct irq_2_iommu {
+ 	u8  irte_mask;
+ };
+ 
+-#ifdef CONFIG_SPARSE_IRQ
++#ifdef CONFIG_GENERIC_HARDIRQS
+ static struct irq_2_iommu *get_one_free_irq_2_iommu(int cpu)
+ {
+ 	struct irq_2_iommu *iommu;
+@@ -116,21 +117,22 @@ int get_irte(int irq, struct irte *entry
+ {
+ 	int index;
+ 	struct irq_2_iommu *irq_iommu;
++	unsigned long flags;
+ 
+ 	if (!entry)
+ 		return -1;
+ 
+-	spin_lock(&irq_2_ir_lock);
++	spin_lock_irqsave(&irq_2_ir_lock, flags);
+ 	irq_iommu = valid_irq_2_iommu(irq);
+ 	if (!irq_iommu) {
+-		spin_unlock(&irq_2_ir_lock);
++		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 		return -1;
+ 	}
+ 
+ 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
+ 	*entry = *(irq_iommu->iommu->ir_table->base + index);
+ 
+-	spin_unlock(&irq_2_ir_lock);
++	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 	return 0;
+ }
+ 
+@@ -140,6 +142,7 @@ int alloc_irte(struct intel_iommu *iommu
+ 	struct irq_2_iommu *irq_iommu;
+ 	u16 index, start_index;
+ 	unsigned int mask = 0;
++	unsigned long flags;
+ 	int i;
+ 
+ 	if (!count)
+@@ -169,7 +172,7 @@ int alloc_irte(struct intel_iommu *iommu
+ 		return -1;
+ 	}
+ 
+-	spin_lock(&irq_2_ir_lock);
++	spin_lock_irqsave(&irq_2_ir_lock, flags);
+ 	do {
+ 		for (i = index; i < index + count; i++)
+ 			if  (table->base[i].present)
+@@ -181,7 +184,7 @@ int alloc_irte(struct intel_iommu *iommu
+ 		index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
+ 
+ 		if (index == start_index) {
+-			spin_unlock(&irq_2_ir_lock);
++			spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 			printk(KERN_ERR "can't allocate an IRTE\n");
+ 			return -1;
+ 		}
+@@ -192,7 +195,7 @@ int alloc_irte(struct intel_iommu *iommu
+ 
+ 	irq_iommu = irq_2_iommu_alloc(irq);
+ 	if (!irq_iommu) {
+-		spin_unlock(&irq_2_ir_lock);
++		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 		printk(KERN_ERR "can't allocate irq_2_iommu\n");
+ 		return -1;
+ 	}
+@@ -202,7 +205,7 @@ int alloc_irte(struct intel_iommu *iommu
+ 	irq_iommu->sub_handle = 0;
+ 	irq_iommu->irte_mask = mask;
+ 
+-	spin_unlock(&irq_2_ir_lock);
++	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 
+ 	return index;
+ }
+@@ -222,30 +225,32 @@ int map_irq_to_irte_handle(int irq, u16 
+ {
+ 	int index;
+ 	struct irq_2_iommu *irq_iommu;
++	unsigned long flags;
+ 
+-	spin_lock(&irq_2_ir_lock);
++	spin_lock_irqsave(&irq_2_ir_lock, flags);
+ 	irq_iommu = valid_irq_2_iommu(irq);
+ 	if (!irq_iommu) {
+-		spin_unlock(&irq_2_ir_lock);
++		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 		return -1;
+ 	}
+ 
+ 	*sub_handle = irq_iommu->sub_handle;
+ 	index = irq_iommu->irte_index;
+-	spin_unlock(&irq_2_ir_lock);
++	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 	return index;
+ }
+ 
+ int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
+ {
+ 	struct irq_2_iommu *irq_iommu;
++	unsigned long flags;
+ 
+-	spin_lock(&irq_2_ir_lock);
++	spin_lock_irqsave(&irq_2_ir_lock, flags);
+ 
+ 	irq_iommu = irq_2_iommu_alloc(irq);
+ 
+ 	if (!irq_iommu) {
+-		spin_unlock(&irq_2_ir_lock);
++		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 		printk(KERN_ERR "can't allocate irq_2_iommu\n");
+ 		return -1;
+ 	}
+@@ -255,7 +260,7 @@ int set_irte_irq(int irq, struct intel_i
+ 	irq_iommu->sub_handle = subhandle;
+ 	irq_iommu->irte_mask = 0;
+ 
+-	spin_unlock(&irq_2_ir_lock);
++	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 
+ 	return 0;
+ }
+@@ -263,11 +268,12 @@ int set_irte_irq(int irq, struct intel_i
+ int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
+ {
+ 	struct irq_2_iommu *irq_iommu;
++	unsigned long flags;
+ 
+-	spin_lock(&irq_2_ir_lock);
++	spin_lock_irqsave(&irq_2_ir_lock, flags);
+ 	irq_iommu = valid_irq_2_iommu(irq);
+ 	if (!irq_iommu) {
+-		spin_unlock(&irq_2_ir_lock);
++		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 		return -1;
+ 	}
+ 
+@@ -276,7 +282,7 @@ int clear_irte_irq(int irq, struct intel
+ 	irq_iommu->sub_handle = 0;
+ 	irq_2_iommu(irq)->irte_mask = 0;
+ 
+-	spin_unlock(&irq_2_ir_lock);
++	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 
+ 	return 0;
+ }
+@@ -288,11 +294,12 @@ int modify_irte(int irq, struct irte *ir
+ 	struct irte *irte;
+ 	struct intel_iommu *iommu;
+ 	struct irq_2_iommu *irq_iommu;
++	unsigned long flags;
+ 
+-	spin_lock(&irq_2_ir_lock);
++	spin_lock_irqsave(&irq_2_ir_lock, flags);
+ 	irq_iommu = valid_irq_2_iommu(irq);
+ 	if (!irq_iommu) {
+-		spin_unlock(&irq_2_ir_lock);
++		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 		return -1;
+ 	}
+ 
+@@ -301,11 +308,11 @@ int modify_irte(int irq, struct irte *ir
+ 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
+ 	irte = &iommu->ir_table->base[index];
+ 
+-	set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
++	set_64bit((unsigned long *)irte, irte_modified->low);
+ 	__iommu_flush_cache(iommu, irte, sizeof(*irte));
+ 
+ 	rc = qi_flush_iec(iommu, index, 0);
+-	spin_unlock(&irq_2_ir_lock);
++	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 
+ 	return rc;
+ }
+@@ -316,11 +323,12 @@ int flush_irte(int irq)
+ 	int index;
+ 	struct intel_iommu *iommu;
+ 	struct irq_2_iommu *irq_iommu;
++	unsigned long flags;
+ 
+-	spin_lock(&irq_2_ir_lock);
++	spin_lock_irqsave(&irq_2_ir_lock, flags);
+ 	irq_iommu = valid_irq_2_iommu(irq);
+ 	if (!irq_iommu) {
+-		spin_unlock(&irq_2_ir_lock);
++		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 		return -1;
+ 	}
+ 
+@@ -329,7 +337,7 @@ int flush_irte(int irq)
+ 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
+ 
+ 	rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
+-	spin_unlock(&irq_2_ir_lock);
++	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 
+ 	return rc;
+ }
+@@ -362,11 +370,12 @@ int free_irte(int irq)
+ 	struct irte *irte;
+ 	struct intel_iommu *iommu;
+ 	struct irq_2_iommu *irq_iommu;
++	unsigned long flags;
+ 
+-	spin_lock(&irq_2_ir_lock);
++	spin_lock_irqsave(&irq_2_ir_lock, flags);
+ 	irq_iommu = valid_irq_2_iommu(irq);
+ 	if (!irq_iommu) {
+-		spin_unlock(&irq_2_ir_lock);
++		spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 		return -1;
+ 	}
+ 
+@@ -377,7 +386,7 @@ int free_irte(int irq)
+ 
+ 	if (!irq_iommu->sub_handle) {
+ 		for (i = 0; i < (1 << irq_iommu->irte_mask); i++)
+-			set_64bit((unsigned long *)irte, 0);
++			set_64bit((unsigned long *)(irte + i), 0);
+ 		rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask);
+ 	}
+ 
+@@ -386,7 +395,7 @@ int free_irte(int irq)
+ 	irq_iommu->sub_handle = 0;
+ 	irq_iommu->irte_mask = 0;
+ 
+-	spin_unlock(&irq_2_ir_lock);
++	spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+ 
+ 	return rc;
+ }
+@@ -438,12 +447,12 @@ static int setup_intr_remapping(struct i
+ 	struct page *pages;
+ 
+ 	ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
+-					     GFP_KERNEL);
++					     GFP_ATOMIC);
+ 
+ 	if (!iommu->ir_table)
+ 		return -ENOMEM;
+ 
+-	pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
++	pages = alloc_pages(GFP_ATOMIC | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
+ 
+ 	if (!pages) {
+ 		printk(KERN_ERR "failed to allocate pages of order %d\n",
+@@ -458,11 +467,55 @@ static int setup_intr_remapping(struct i
+ 	return 0;
+ }
+ 
++/*
++ * Disable Interrupt Remapping.
++ */
++static void disable_intr_remapping(struct intel_iommu *iommu)
++{
++	unsigned long flags;
++	u32 sts;
++
++	if (!ecap_ir_support(iommu->ecap))
++		return;
++
++	spin_lock_irqsave(&iommu->register_lock, flags);
++
++	sts = dmar_readq(iommu->reg + DMAR_GSTS_REG);
++	if (!(sts & DMA_GSTS_IRES))
++		goto end;
++
++	iommu->gcmd &= ~DMA_GCMD_IRE;
++	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
++
++	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
++		      readl, !(sts & DMA_GSTS_IRES), sts);
++
++end:
++	spin_unlock_irqrestore(&iommu->register_lock, flags);
++}
++
+ int __init enable_intr_remapping(int eim)
+ {
+ 	struct dmar_drhd_unit *drhd;
+ 	int setup = 0;
+ 
++	for_each_drhd_unit(drhd) {
++		struct intel_iommu *iommu = drhd->iommu;
++
++		/*
++		 * Clear previous faults.
++		 */
++		dmar_fault(-1, iommu);
++
++		/*
++		 * Disable intr remapping and queued invalidation, if already
++		 * enabled prior to OS handover.
++		 */
++		disable_intr_remapping(iommu);
++
++		dmar_disable_qi(iommu);
++	}
++
+ 	/*
+ 	 * check for the Interrupt-remapping support
+ 	 */
+Index: linux-2.6-tip/drivers/pci/pci-driver.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/pci/pci-driver.c
++++ linux-2.6-tip/drivers/pci/pci-driver.c
+@@ -212,10 +212,9 @@ static int pci_call_probe(struct pci_dri
+ 	node = dev_to_node(&dev->dev);
+ 	if (node >= 0) {
+ 		int cpu;
+-		node_to_cpumask_ptr(nodecpumask, node);
+ 
+ 		get_online_cpus();
+-		cpu = cpumask_any_and(nodecpumask, cpu_online_mask);
++		cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
+ 		if (cpu < nr_cpu_ids)
+ 			error = work_on_cpu(cpu, local_pci_probe, &ddi);
+ 		else
+Index: linux-2.6-tip/drivers/pci/search.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/pci/search.c
++++ linux-2.6-tip/drivers/pci/search.c
+@@ -277,8 +277,12 @@ static struct pci_dev *pci_get_dev_by_id
+ 			      match_pci_dev_by_id);
+ 	if (dev)
+ 		pdev = to_pci_dev(dev);
++
++	/*
++	 * FIXME: take the cast off, when pci_dev_put() is made const:
++	 */
+ 	if (from)
+-		pci_dev_put(from);
++		pci_dev_put((struct pci_dev *)from);
+ 	return pdev;
+ }
+ 
+Index: linux-2.6-tip/drivers/platform/x86/fujitsu-laptop.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/platform/x86/fujitsu-laptop.c
++++ linux-2.6-tip/drivers/platform/x86/fujitsu-laptop.c
+@@ -1301,4 +1301,4 @@ static struct pnp_device_id pnp_ids[] = 
+ 	{.id = ""}
+ };
+ 
+-MODULE_DEVICE_TABLE(pnp, pnp_ids);
++MODULE_STATIC_DEVICE_TABLE(pnp, pnp_ids);
+Index: linux-2.6-tip/drivers/platform/x86/toshiba_acpi.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/platform/x86/toshiba_acpi.c
++++ linux-2.6-tip/drivers/platform/x86/toshiba_acpi.c
+@@ -729,8 +729,8 @@ static int __init toshiba_acpi_init(void
+ {
+ 	acpi_status status = AE_OK;
+ 	u32 hci_result;
+-	bool bt_present;
+-	bool bt_on;
++	bool uninitialized_var(bt_present);
++	bool uninitialized_var(bt_on);
+ 	bool radio_on;
+ 	int ret = 0;
+ 
+Index: linux-2.6-tip/drivers/pnp/pnpbios/core.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/pnp/pnpbios/core.c
++++ linux-2.6-tip/drivers/pnp/pnpbios/core.c
+@@ -573,6 +573,8 @@ static int __init pnpbios_init(void)
+ 
+ fs_initcall(pnpbios_init);
+ 
++#ifdef CONFIG_HOTPLUG
++
+ static int __init pnpbios_thread_init(void)
+ {
+ 	struct task_struct *task;
+@@ -583,16 +585,18 @@ static int __init pnpbios_thread_init(vo
+ #endif
+ 	if (pnpbios_disabled)
+ 		return 0;
+-#ifdef CONFIG_HOTPLUG
++
+ 	init_completion(&unload_sem);
+ 	task = kthread_run(pnp_dock_thread, NULL, "kpnpbiosd");
+ 	if (!IS_ERR(task))
+ 		unloading = 0;
+-#endif
++
+ 	return 0;
+ }
+ 
+ /* Start the kernel thread later: */
+ module_init(pnpbios_thread_init);
+ 
++#endif
++
+ EXPORT_SYMBOL(pnpbios_protocol);
+Index: linux-2.6-tip/drivers/scsi/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/Kconfig
++++ linux-2.6-tip/drivers/scsi/Kconfig
+@@ -608,6 +608,7 @@ config SCSI_FLASHPOINT
+ config LIBFC
+ 	tristate "LibFC module"
+ 	select SCSI_FC_ATTRS
++	select CRC32
+ 	---help---
+ 	  Fibre Channel library module
+ 
+Index: linux-2.6-tip/drivers/scsi/advansys.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/advansys.c
++++ linux-2.6-tip/drivers/scsi/advansys.c
+@@ -68,7 +68,9 @@
+  *  7. advansys_info is not safe against multiple simultaneous callers
+  *  8. Add module_param to override ISA/VLB ioport array
+  */
+-#warning this driver is still not properly converted to the DMA API
++#ifdef CONFIG_ALLOW_WARNINGS
++# warning this driver is still not properly converted to the DMA API
++#endif
+ 
+ /* Enable driver /proc statistics. */
+ #define ADVANSYS_STATS
+@@ -10516,7 +10518,7 @@ AscSendScsiQueue(ASC_DVC_VAR *asc_dvc, A
+ {
+ 	PortAddr iop_base;
+ 	uchar free_q_head;
+-	uchar next_qp;
++	uchar uninitialized_var(next_qp);
+ 	uchar tid_no;
+ 	uchar target_ix;
+ 	int sta;
+@@ -10945,7 +10947,7 @@ static int asc_execute_scsi_cmnd(struct 
+ 		err_code = asc_dvc->err_code;
+ 	} else {
+ 		ADV_DVC_VAR *adv_dvc = &boardp->dvc_var.adv_dvc_var;
+-		ADV_SCSI_REQ_Q *adv_scsiqp;
++		ADV_SCSI_REQ_Q *uninitialized_var(adv_scsiqp);
+ 
+ 		switch (adv_build_req(boardp, scp, &adv_scsiqp)) {
+ 		case ASC_NOERROR:
+@@ -13877,7 +13879,9 @@ static int __devinit advansys_board_foun
+ #endif
+  err_free_proc:
+ 	kfree(boardp->prtbuf);
++#ifdef CONFIG_PROC_FS
+  err_unmap:
++#endif
+ 	if (boardp->ioremap_addr)
+ 		iounmap(boardp->ioremap_addr);
+  err_shost:
+Index: linux-2.6-tip/drivers/scsi/dpt_i2o.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/dpt_i2o.c
++++ linux-2.6-tip/drivers/scsi/dpt_i2o.c
+@@ -183,7 +183,7 @@ static struct pci_device_id dptids[] = {
+ 	{ PCI_DPT_VENDOR_ID, PCI_DPT_RAPTOR_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID,},
+ 	{ 0, }
+ };
+-MODULE_DEVICE_TABLE(pci,dptids);
++MODULE_STATIC_DEVICE_TABLE(pci,dptids);
+ 
+ static int adpt_detect(struct scsi_host_template* sht)
+ {
+Index: linux-2.6-tip/drivers/scsi/dtc.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/dtc.c
++++ linux-2.6-tip/drivers/scsi/dtc.c
+@@ -165,36 +165,6 @@ static const struct signature {
+ 
+ #define NO_SIGNATURES ARRAY_SIZE(signatures)
+ 
+-#ifndef MODULE
+-/*
+- * Function : dtc_setup(char *str, int *ints)
+- *
+- * Purpose : LILO command line initialization of the overrides array,
+- *
+- * Inputs : str - unused, ints - array of integer parameters with ints[0]
+- *	equal to the number of ints.
+- *
+- */
+-
+-static void __init dtc_setup(char *str, int *ints)
+-{
+-	static int commandline_current = 0;
+-	int i;
+-	if (ints[0] != 2)
+-		printk("dtc_setup: usage dtc=address,irq\n");
+-	else if (commandline_current < NO_OVERRIDES) {
+-		overrides[commandline_current].address = ints[1];
+-		overrides[commandline_current].irq = ints[2];
+-		for (i = 0; i < NO_BASES; ++i)
+-			if (bases[i].address == ints[1]) {
+-				bases[i].noauto = 1;
+-				break;
+-			}
+-		++commandline_current;
+-	}
+-}
+-#endif
+-
+ /* 
+  * Function : int dtc_detect(struct scsi_host_template * tpnt)
+  *
+Index: linux-2.6-tip/drivers/scsi/fdomain.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/fdomain.c
++++ linux-2.6-tip/drivers/scsi/fdomain.c
+@@ -1774,7 +1774,7 @@ static struct pci_device_id fdomain_pci_
+ 	  PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL },
+ 	{ }
+ };
+-MODULE_DEVICE_TABLE(pci, fdomain_pci_tbl);
++MODULE_STATIC_DEVICE_TABLE(pci, fdomain_pci_tbl);
+ #endif
+ #define driver_template fdomain_driver_template
+ #include "scsi_module.c"
+Index: linux-2.6-tip/drivers/scsi/g_NCR5380.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/g_NCR5380.c
++++ linux-2.6-tip/drivers/scsi/g_NCR5380.c
+@@ -938,18 +938,6 @@ module_param(ncr_53c400a, int, 0);
+ module_param(dtc_3181e, int, 0);
+ MODULE_LICENSE("GPL");
+ 
+-
+-static struct isapnp_device_id id_table[] __devinitdata = {
+-	{
+-	 ISAPNP_ANY_ID, ISAPNP_ANY_ID,
+-	 ISAPNP_VENDOR('D', 'T', 'C'), ISAPNP_FUNCTION(0x436e),
+-	 0},
+-	{0}
+-};
+-
+-MODULE_DEVICE_TABLE(isapnp, id_table);
+-
+-
+ __setup("ncr5380=", do_NCR5380_setup);
+ __setup("ncr53c400=", do_NCR53C400_setup);
+ __setup("ncr53c400a=", do_NCR53C400A_setup);
+Index: linux-2.6-tip/drivers/scsi/initio.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/initio.c
++++ linux-2.6-tip/drivers/scsi/initio.c
+@@ -136,7 +136,7 @@ static struct pci_device_id i91u_pci_dev
+ 	{ PCI_VENDOR_ID_DOMEX, I920_DEVICE_ID, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0},
+ 	{ }
+ };
+-MODULE_DEVICE_TABLE(pci, i91u_pci_devices);
++MODULE_STATIC_DEVICE_TABLE(pci, i91u_pci_devices);
+ 
+ #define DEBUG_INTERRUPT 0
+ #define DEBUG_QUEUE     0
+Index: linux-2.6-tip/drivers/scsi/lpfc/lpfc_els.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/lpfc/lpfc_els.c
++++ linux-2.6-tip/drivers/scsi/lpfc/lpfc_els.c
+@@ -3968,7 +3968,8 @@ lpfc_els_rcv_rscn(struct lpfc_vport *vpo
+ 	struct lpfc_dmabuf *pcmd;
+ 	uint32_t *lp, *datap;
+ 	IOCB_t *icmd;
+-	uint32_t payload_len, length, nportid, *cmd;
++	uint32_t payload_len, uninitialized_var(length), nportid,
++		 *uninitialized_var(cmd);
+ 	int rscn_cnt;
+ 	int rscn_id = 0, hba_id = 0;
+ 	int i;
+Index: linux-2.6-tip/drivers/scsi/megaraid/megaraid_mm.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/megaraid/megaraid_mm.c
++++ linux-2.6-tip/drivers/scsi/megaraid/megaraid_mm.c
+@@ -117,7 +117,7 @@ mraid_mm_ioctl(struct inode *inode, stru
+ 	int		rval;
+ 	mraid_mmadp_t	*adp;
+ 	uint8_t		old_ioctl;
+-	int		drvrcmd_rval;
++	int		uninitialized_var(drvrcmd_rval);
+ 	void __user *argp = (void __user *)arg;
+ 
+ 	/*
+Index: linux-2.6-tip/drivers/scsi/ncr53c8xx.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/ncr53c8xx.c
++++ linux-2.6-tip/drivers/scsi/ncr53c8xx.c
+@@ -8295,7 +8295,7 @@ __setup("ncr53c8xx=", ncr53c8xx_setup);
+ struct Scsi_Host * __init ncr_attach(struct scsi_host_template *tpnt,
+ 					int unit, struct ncr_device *device)
+ {
+-	struct host_data *host_data;
++	struct host_data *uninitialized_var(host_data);
+ 	struct ncb *np = NULL;
+ 	struct Scsi_Host *instance = NULL;
+ 	u_long flags = 0;
+Index: linux-2.6-tip/drivers/scsi/qla4xxx/ql4_mbx.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/qla4xxx/ql4_mbx.c
++++ linux-2.6-tip/drivers/scsi/qla4xxx/ql4_mbx.c
+@@ -867,7 +867,7 @@ int qla4xxx_send_tgts(struct scsi_qla_ho
+ {
+ 	struct dev_db_entry *fw_ddb_entry;
+ 	dma_addr_t fw_ddb_entry_dma;
+-	uint32_t ddb_index;
++	uint32_t uninitialized_var(ddb_index);
+ 	int ret_val = QLA_SUCCESS;
+ 
+ 
+Index: linux-2.6-tip/drivers/scsi/scsi_lib.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/scsi_lib.c
++++ linux-2.6-tip/drivers/scsi/scsi_lib.c
+@@ -703,71 +703,6 @@ void scsi_run_host_queues(struct Scsi_Ho
+ 
+ static void __scsi_release_buffers(struct scsi_cmnd *, int);
+ 
+-/*
+- * Function:    scsi_end_request()
+- *
+- * Purpose:     Post-processing of completed commands (usually invoked at end
+- *		of upper level post-processing and scsi_io_completion).
+- *
+- * Arguments:   cmd	 - command that is complete.
+- *              error    - 0 if I/O indicates success, < 0 for I/O error.
+- *              bytes    - number of bytes of completed I/O
+- *		requeue  - indicates whether we should requeue leftovers.
+- *
+- * Lock status: Assumed that lock is not held upon entry.
+- *
+- * Returns:     cmd if requeue required, NULL otherwise.
+- *
+- * Notes:       This is called for block device requests in order to
+- *              mark some number of sectors as complete.
+- * 
+- *		We are guaranteeing that the request queue will be goosed
+- *		at some point during this call.
+- * Notes:	If cmd was requeued, upon return it will be a stale pointer.
+- */
+-static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error,
+-					  int bytes, int requeue)
+-{
+-	struct request_queue *q = cmd->device->request_queue;
+-	struct request *req = cmd->request;
+-
+-	/*
+-	 * If there are blocks left over at the end, set up the command
+-	 * to queue the remainder of them.
+-	 */
+-	if (blk_end_request(req, error, bytes)) {
+-		int leftover = (req->hard_nr_sectors << 9);
+-
+-		if (blk_pc_request(req))
+-			leftover = req->data_len;
+-
+-		/* kill remainder if no retrys */
+-		if (error && scsi_noretry_cmd(cmd))
+-			blk_end_request(req, error, leftover);
+-		else {
+-			if (requeue) {
+-				/*
+-				 * Bleah.  Leftovers again.  Stick the
+-				 * leftovers in the front of the
+-				 * queue, and goose the queue again.
+-				 */
+-				scsi_release_buffers(cmd);
+-				scsi_requeue_command(q, cmd);
+-				cmd = NULL;
+-			}
+-			return cmd;
+-		}
+-	}
+-
+-	/*
+-	 * This will goose the queue request function at the end, so we don't
+-	 * need to worry about launching another command.
+-	 */
+-	__scsi_release_buffers(cmd, 0);
+-	scsi_next_command(cmd);
+-	return NULL;
+-}
+-
+ static inline unsigned int scsi_sgtable_index(unsigned short nents)
+ {
+ 	unsigned int index;
+@@ -929,7 +864,6 @@ static void scsi_end_bidi_request(struct
+ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
+ {
+ 	int result = cmd->result;
+-	int this_count;
+ 	struct request_queue *q = cmd->device->request_queue;
+ 	struct request *req = cmd->request;
+ 	int error = 0;
+@@ -980,24 +914,37 @@ void scsi_io_completion(struct scsi_cmnd
+ 	SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, "
+ 				      "%d bytes done.\n",
+ 				      req->nr_sectors, good_bytes));
+-
+-	/* A number of bytes were successfully read.  If there
+-	 * are leftovers and there is some kind of error
+-	 * (result != 0), retry the rest.
+-	 */
+-	if (scsi_end_request(cmd, error, good_bytes, result == 0) == NULL)
++	if (blk_end_request(req, error, good_bytes) == 0) {
++		/* This request is completely finished; start the next one */
++		 __scsi_release_buffers(cmd, 0);
++		scsi_next_command(cmd);
+ 		return;
+-	this_count = blk_rq_bytes(req);
+-
+-	error = -EIO;
++	}
+ 
+-	if (host_byte(result) == DID_RESET) {
++	/* The request isn't finished yet.  Figure out what to do next. */
++	if (result == 0) {
++		/* No error, so carry out the remainder of the request.
++		 * Failure to make forward progress counts against the
++		 * the number of retries.
++		 */
++		if (good_bytes > 0 || --req->retries >= 0)
++			action = ACTION_REPREP;
++		else {
++			description = "Retries exhausted";
++			action = ACTION_FAIL;
++			error = -EIO;
++		}
++	} else if (error && scsi_noretry_cmd(cmd)) {
++		/* Retrys are disallowed, so kill the remainder. */
++		action = ACTION_FAIL;
++	} else if (host_byte(result) == DID_RESET) {
+ 		/* Third party bus reset or reset for error recovery
+ 		 * reasons.  Just retry the command and see what
+ 		 * happens.
+ 		 */
+ 		action = ACTION_RETRY;
+ 	} else if (sense_valid && !sense_deferred) {
++		error = -EIO;
+ 		switch (sshdr.sense_key) {
+ 		case UNIT_ATTENTION:
+ 			if (cmd->device->removable) {
+@@ -1097,7 +1044,7 @@ void scsi_io_completion(struct scsi_cmnd
+ 			if (driver_byte(result) & DRIVER_SENSE)
+ 				scsi_print_sense("", cmd);
+ 		}
+-		blk_end_request(req, -EIO, blk_rq_bytes(req));
++		blk_end_request(req, error, blk_rq_bytes(req));
+ 		scsi_next_command(cmd);
+ 		break;
+ 	case ACTION_REPREP:
+Index: linux-2.6-tip/drivers/telephony/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/telephony/Kconfig
++++ linux-2.6-tip/drivers/telephony/Kconfig
+@@ -20,6 +20,8 @@ if PHONE
+ config PHONE_IXJ
+ 	tristate "QuickNet Internet LineJack/PhoneJack support"
+ 	depends on ISA || PCI
++	# build breakage, config-Sat_Jul_19_00_58_16_CEST_2008.bad
++	depends on 0
+ 	---help---
+ 	  Say M if you have a telephony card manufactured by Quicknet
+ 	  Technologies, Inc.  These include the Internet PhoneJACK and
+Index: linux-2.6-tip/drivers/telephony/ixj.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/telephony/ixj.c
++++ linux-2.6-tip/drivers/telephony/ixj.c
+@@ -288,7 +288,7 @@ static struct pci_device_id ixj_pci_tbl[
+ 	{ }
+ };
+ 
+-MODULE_DEVICE_TABLE(pci, ixj_pci_tbl);
++MODULE_STATIC_DEVICE_TABLE(pci, ixj_pci_tbl);
+ 
+ /************************************************************************
+ *
+Index: linux-2.6-tip/drivers/usb/atm/ueagle-atm.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/usb/atm/ueagle-atm.c
++++ linux-2.6-tip/drivers/usb/atm/ueagle-atm.c
+@@ -1427,7 +1427,7 @@ static int uea_stat_e1(struct uea_softc 
+ static int uea_stat_e4(struct uea_softc *sc)
+ {
+ 	u32 data;
+-	u32 tmp_arr[2];
++	u32 tmp_arr[2] = { 0, };
+ 	int ret;
+ 
+ 	uea_enters(INS_TO_USBDEV(sc));
+Index: linux-2.6-tip/drivers/usb/gadget/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/usb/gadget/Kconfig
++++ linux-2.6-tip/drivers/usb/gadget/Kconfig
+@@ -15,6 +15,9 @@
+ 
+ menuconfig USB_GADGET
+ 	tristate "USB Gadget Support"
++	# crashes on titan with:
++	# http://redhat.com/~mingo/misc/config-Tue_Jul_22_13_44_45_CEST_2008.bad
++	depends on 0
+ 	help
+ 	   USB is a master/slave protocol, organized with one master
+ 	   host (such as a PC) controlling up to 127 peripheral devices.
+Index: linux-2.6-tip/drivers/usb/host/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/drivers/usb/host/Kconfig
++++ linux-2.6-tip/drivers/usb/host/Kconfig
+@@ -329,6 +329,8 @@ config USB_WHCI_HCD
+ 	tristate "Wireless USB Host Controller Interface (WHCI) driver (EXPERIMENTAL)"
+ 	depends on EXPERIMENTAL
+ 	depends on PCI && USB
++	depends on 0
++
+ 	select USB_WUSB
+ 	select UWB_WHCI
+ 	help
+Index: linux-2.6-tip/drivers/usb/serial/io_edgeport.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/usb/serial/io_edgeport.c
++++ linux-2.6-tip/drivers/usb/serial/io_edgeport.c
+@@ -293,7 +293,7 @@ static void update_edgeport_E2PROM(struc
+ 	__u16 BootBuildNumber;
+ 	__u32 Bootaddr;
+ 	const struct ihex_binrec *rec;
+-	const struct firmware *fw;
++	const struct firmware *uninitialized_var(fw);
+ 	const char *fw_name;
+ 	int response;
+ 
+@@ -2457,7 +2457,7 @@ static int send_cmd_write_baud_rate(stru
+ 	unsigned char *cmdBuffer;
+ 	unsigned char *currCmd;
+ 	int cmdLen = 0;
+-	int divisor;
++	int uninitialized_var(divisor);
+ 	int status;
+ 	unsigned char number =
+ 		edge_port->port->number - edge_port->port->serial->minor;
+Index: linux-2.6-tip/drivers/usb/serial/keyspan.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/usb/serial/keyspan.c
++++ linux-2.6-tip/drivers/usb/serial/keyspan.c
+@@ -1345,7 +1345,7 @@ static int keyspan_fake_startup(struct u
+ 	int 				response;
+ 	const struct ihex_binrec 	*record;
+ 	char				*fw_name;
+-	const struct firmware		*fw;
++	const struct firmware		*uninitialized_var(fw);
+ 
+ 	dbg("Keyspan startup version %04x product %04x",
+ 	    le16_to_cpu(serial->dev->descriptor.bcdDevice),
+Index: linux-2.6-tip/drivers/usb/serial/keyspan_pda.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/usb/serial/keyspan_pda.c
++++ linux-2.6-tip/drivers/usb/serial/keyspan_pda.c
+@@ -456,7 +456,7 @@ static int keyspan_pda_tiocmget(struct t
+ 	struct usb_serial_port *port = tty->driver_data;
+ 	struct usb_serial *serial = port->serial;
+ 	int rc;
+-	unsigned char status;
++	unsigned char uninitialized_var(status);
+ 	int value;
+ 
+ 	rc = keyspan_pda_get_modem_info(serial, &status);
+@@ -478,7 +478,7 @@ static int keyspan_pda_tiocmset(struct t
+ 	struct usb_serial_port *port = tty->driver_data;
+ 	struct usb_serial *serial = port->serial;
+ 	int rc;
+-	unsigned char status;
++	unsigned char uninitialized_var(status);
+ 
+ 	rc = keyspan_pda_get_modem_info(serial, &status);
+ 	if (rc < 0)
+@@ -726,7 +726,7 @@ static int keyspan_pda_fake_startup(stru
+ 	int response;
+ 	const char *fw_name;
+ 	const struct ihex_binrec *record;
+-	const struct firmware *fw;
++	const struct firmware *uninitialized_var(fw);
+ 
+ 	/* download the firmware here ... */
+ 	response = ezusb_set_reset(serial, 1);
+Index: linux-2.6-tip/drivers/usb/serial/mos7720.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/usb/serial/mos7720.c
++++ linux-2.6-tip/drivers/usb/serial/mos7720.c
+@@ -959,7 +959,7 @@ static int send_cmd_write_baud_rate(stru
+ {
+ 	struct usb_serial_port *port;
+ 	struct usb_serial *serial;
+-	int divisor;
++	int uninitialized_var(divisor);
+ 	int status;
+ 	unsigned char data;
+ 	unsigned char number;
+Index: linux-2.6-tip/drivers/uwb/i1480/i1480-est.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/uwb/i1480/i1480-est.c
++++ linux-2.6-tip/drivers/uwb/i1480/i1480-est.c
+@@ -96,4 +96,4 @@ static struct usb_device_id i1480_est_id
+ 	{ USB_DEVICE(0x8086, 0x0c3b), },
+ 	{ },
+ };
+-MODULE_DEVICE_TABLE(usb, i1480_est_id_table);
++MODULE_STATIC_DEVICE_TABLE(usb, i1480_est_id_table);
+Index: linux-2.6-tip/drivers/uwb/whc-rc.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/uwb/whc-rc.c
++++ linux-2.6-tip/drivers/uwb/whc-rc.c
+@@ -452,7 +452,7 @@ static struct pci_device_id whcrc_id_tab
+ 	{ PCI_DEVICE_CLASS(PCI_CLASS_WIRELESS_WHCI, ~0) },
+ 	{ /* empty last entry */ }
+ };
+-MODULE_DEVICE_TABLE(pci, whcrc_id_table);
++MODULE_STATIC_DEVICE_TABLE(pci, whcrc_id_table);
+ 
+ static struct umc_driver whcrc_driver = {
+ 	.name       = "whc-rc",
+Index: linux-2.6-tip/drivers/uwb/wlp/messages.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/uwb/wlp/messages.c
++++ linux-2.6-tip/drivers/uwb/wlp/messages.c
+@@ -903,7 +903,7 @@ int wlp_parse_f0(struct wlp *wlp, struct
+ 	size_t len = skb->len;
+ 	size_t used;
+ 	ssize_t result;
+-	struct wlp_nonce enonce, rnonce;
++	struct wlp_nonce uninitialized_var(enonce), uninitialized_var(rnonce);
+ 	enum wlp_assc_error assc_err;
+ 	char enonce_buf[WLP_WSS_NONCE_STRSIZE];
+ 	char rnonce_buf[WLP_WSS_NONCE_STRSIZE];
+Index: linux-2.6-tip/drivers/video/aty/atyfb_base.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/video/aty/atyfb_base.c
++++ linux-2.6-tip/drivers/video/aty/atyfb_base.c
+@@ -430,7 +430,7 @@ static int __devinit correct_chipset(str
+ 	u16 type;
+ 	u32 chip_id;
+ 	const char *name;
+-	int i;
++	long i;
+ 
+ 	for (i = ARRAY_SIZE(aty_chips) - 1; i >= 0; i--)
+ 		if (par->pci_id == aty_chips[i].pci_id)
+@@ -529,8 +529,10 @@ static int __devinit correct_chipset(str
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_FB_ATY_GX) || defined(CONFIG_FB_ATY_CT)
+ static char ram_dram[] __devinitdata = "DRAM";
+ static char ram_resv[] __devinitdata = "RESV";
++#endif
+ #ifdef CONFIG_FB_ATY_GX
+ static char ram_vram[] __devinitdata = "VRAM";
+ #endif /* CONFIG_FB_ATY_GX */
+@@ -3860,3 +3862,4 @@ MODULE_PARM_DESC(mode, "Specify resoluti
+ module_param(nomtrr, bool, 0);
+ MODULE_PARM_DESC(nomtrr, "bool: disable use of MTRR registers");
+ #endif
++
+Index: linux-2.6-tip/drivers/video/matrox/matroxfb_crtc2.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/video/matrox/matroxfb_crtc2.c
++++ linux-2.6-tip/drivers/video/matrox/matroxfb_crtc2.c
+@@ -262,7 +262,7 @@ static int matroxfb_dh_open(struct fb_in
+ #define m2info (container_of(info, struct matroxfb_dh_fb_info, fbcon))
+ 	MINFO_FROM(m2info->primary_dev);
+ 
+-	if (MINFO) {
++	if (MINFO != NULL) {
+ 		int err;
+ 
+ 		if (ACCESS_FBINFO(dead)) {
+@@ -282,7 +282,7 @@ static int matroxfb_dh_release(struct fb
+ 	int err = 0;
+ 	MINFO_FROM(m2info->primary_dev);
+ 
+-	if (MINFO) {
++	if (MINFO != NULL) {
+ 		err = ACCESS_FBINFO(fbops).fb_release(&ACCESS_FBINFO(fbcon), user);
+ 	}
+ 	return err;
+Index: linux-2.6-tip/drivers/video/mb862xx/mb862xxfb.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/video/mb862xx/mb862xxfb.c
++++ linux-2.6-tip/drivers/video/mb862xx/mb862xxfb.c
+@@ -85,6 +85,8 @@ static inline unsigned int chan_to_field
+ 	return chan << bf->offset;
+ }
+ 
++#if defined(CONFIG_FB_MB862XX_PCI_GDC) || defined(CONFIG_FB_MB862XX_LIME)
++
+ static int mb862xxfb_setcolreg(unsigned regno,
+ 			       unsigned red, unsigned green, unsigned blue,
+ 			       unsigned transp, struct fb_info *info)
+@@ -458,6 +460,8 @@ static ssize_t mb862xxfb_show_dispregs(s
+ 
+ static DEVICE_ATTR(dispregs, 0444, mb862xxfb_show_dispregs, NULL);
+ 
++#endif
++
+ irqreturn_t mb862xx_intr(int irq, void *dev_id)
+ {
+ 	struct mb862xxfb_par *par = (struct mb862xxfb_par *) dev_id;
+Index: linux-2.6-tip/drivers/video/sis/init301.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/video/sis/init301.c
++++ linux-2.6-tip/drivers/video/sis/init301.c
+@@ -6691,7 +6691,7 @@ SiS_SetGroup2(struct SiS_Private *SiS_Pr
+   bool           newtvphase;
+   const unsigned char *TimingPoint;
+ #ifdef SIS315H
+-  unsigned short resindex, CRT2Index;
++  unsigned short uninitialized_var(resindex), uninitialized_var(CRT2Index);
+   const struct SiS_Part2PortTbl *CRT2Part2Ptr = NULL;
+ 
+   if(SiS_Pr->SiS_VBInfo & SetCRT2ToLCDA) return;
+Index: linux-2.6-tip/drivers/video/sis/sis_main.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/video/sis/sis_main.c
++++ linux-2.6-tip/drivers/video/sis/sis_main.c
+@@ -4175,6 +4175,7 @@ sisfb_find_rom(struct pci_dev *pdev)
+ 	return myrombase;
+ }
+ 
++#if defined(CONFIG_FB_SIS_300) || defined(CONFIG_FB_SIS_315)
+ static void __devinit
+ sisfb_post_map_vram(struct sis_video_info *ivideo, unsigned int *mapsize,
+ 			unsigned int min)
+@@ -4197,6 +4198,7 @@ sisfb_post_map_vram(struct sis_video_inf
+ 		}
+ 	}
+ }
++#endif
+ 
+ #ifdef CONFIG_FB_SIS_300
+ static int __devinit
+Index: linux-2.6-tip/drivers/watchdog/alim1535_wdt.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/watchdog/alim1535_wdt.c
++++ linux-2.6-tip/drivers/watchdog/alim1535_wdt.c
+@@ -306,7 +306,7 @@ static struct pci_device_id ali_pci_tbl[
+ 	{ PCI_VENDOR_ID_AL, 0x1535, PCI_ANY_ID, PCI_ANY_ID,},
+ 	{ 0, },
+ };
+-MODULE_DEVICE_TABLE(pci, ali_pci_tbl);
++MODULE_STATIC_DEVICE_TABLE(pci, ali_pci_tbl);
+ 
+ /*
+  *	ali_find_watchdog	-	find a 1535 and 7101
+Index: linux-2.6-tip/drivers/watchdog/alim7101_wdt.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/watchdog/alim7101_wdt.c
++++ linux-2.6-tip/drivers/watchdog/alim7101_wdt.c
+@@ -427,7 +427,7 @@ static struct pci_device_id alim7101_pci
+ 	{ }
+ };
+ 
+-MODULE_DEVICE_TABLE(pci, alim7101_pci_tbl);
++MODULE_STATIC_DEVICE_TABLE(pci, alim7101_pci_tbl);
+ 
+ MODULE_AUTHOR("Steve Hill");
+ MODULE_DESCRIPTION("ALi M7101 PMU Computer Watchdog Timer driver");
+Index: linux-2.6-tip/drivers/watchdog/i6300esb.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/watchdog/i6300esb.c
++++ linux-2.6-tip/drivers/watchdog/i6300esb.c
+@@ -355,20 +355,6 @@ static struct notifier_block esb_notifie
+ };
+ 
+ /*
+- * Data for PCI driver interface
+- *
+- * This data only exists for exporting the supported
+- * PCI ids via MODULE_DEVICE_TABLE.  We do not actually
+- * register a pci_driver, because someone else might one day
+- * want to register another driver on the same PCI id.
+- */
+-static struct pci_device_id esb_pci_tbl[] = {
+-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_9), },
+-	{ 0, },                 /* End of list */
+-};
+-MODULE_DEVICE_TABLE(pci, esb_pci_tbl);
+-
+-/*
+  *      Init & exit routines
+  */
+ 
+Index: linux-2.6-tip/drivers/watchdog/rdc321x_wdt.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/watchdog/rdc321x_wdt.c
++++ linux-2.6-tip/drivers/watchdog/rdc321x_wdt.c
+@@ -37,7 +37,7 @@
+ #include <linux/io.h>
+ #include <linux/uaccess.h>
+ 
+-#include <asm/mach-rdc321x/rdc321x_defs.h>
++#include <asm/rdc321x_defs.h>
+ 
+ #define RDC_WDT_MASK	0x80000000 /* Mask */
+ #define RDC_WDT_EN	0x00800000 /* Enable bit */
+Index: linux-2.6-tip/drivers/watchdog/w83697ug_wdt.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/watchdog/w83697ug_wdt.c
++++ linux-2.6-tip/drivers/watchdog/w83697ug_wdt.c
+@@ -79,7 +79,7 @@ MODULE_PARM_DESC(nowayout,
+ 							(same as EFER) */
+ #define WDT_EFDR (WDT_EFIR+1) /* Extended Function Data Register */
+ 
+-static void w83697ug_select_wd_register(void)
++static int w83697ug_select_wd_register(void)
+ {
+ 	unsigned char c;
+ 	unsigned char version;
+@@ -102,7 +102,7 @@ static void w83697ug_select_wd_register(
+ 
+ 	} else {
+ 		printk(KERN_ERR PFX "No W83697UG/UF could be found\n");
+-		return;
++		return -EIO;
+ 	}
+ 
+ 	outb_p(0x07, WDT_EFER); /* point to logical device number reg */
+@@ -110,6 +110,8 @@ static void w83697ug_select_wd_register(
+ 	outb_p(0x30, WDT_EFER); /* select CR30 */
+ 	c = inb_p(WDT_EFDR);
+ 	outb_p(c || 0x01, WDT_EFDR); /* set bit 0 to activate GPIO2 */
++
++	return 0;
+ }
+ 
+ static void w83697ug_unselect_wd_register(void)
+@@ -117,11 +119,12 @@ static void w83697ug_unselect_wd_registe
+ 	outb_p(0xAA, WDT_EFER); /* Leave extended function mode */
+ }
+ 
+-static void w83697ug_init(void)
++static int w83697ug_init(void)
+ {
+ 	unsigned char t;
+ 
+-	w83697ug_select_wd_register();
++	if (w83697ug_select_wd_register())
++		return -EIO;
+ 
+ 	outb_p(0xF6, WDT_EFER); /* Select CRF6 */
+ 	t = inb_p(WDT_EFDR);    /* read CRF6 */
+@@ -137,6 +140,8 @@ static void w83697ug_init(void)
+ 	outb_p(t, WDT_EFDR);    /* Write back to CRF5 */
+ 
+ 	w83697ug_unselect_wd_register();
++
++	return 0;
+ }
+ 
+ static void wdt_ctrl(int timeout)
+@@ -347,7 +352,11 @@ static int __init wdt_init(void)
+ 		goto out;
+ 	}
+ 
+-	w83697ug_init();
++	ret = w83697ug_init();
++	if (ret) {
++		printk(KERN_ERR PFX "init failed\n");
++		goto unreg_regions;
++	}
+ 
+ 	ret = register_reboot_notifier(&wdt_notifier);
+ 	if (ret != 0) {
+Index: linux-2.6-tip/drivers/xen/cpu_hotplug.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/xen/cpu_hotplug.c
++++ linux-2.6-tip/drivers/xen/cpu_hotplug.c
+@@ -10,7 +10,7 @@ static void enable_hotplug_cpu(int cpu)
+ 	if (!cpu_present(cpu))
+ 		arch_register_cpu(cpu);
+ 
+-	cpu_set(cpu, cpu_present_map);
++	set_cpu_present(cpu, true);
+ }
+ 
+ static void disable_hotplug_cpu(int cpu)
+@@ -18,7 +18,7 @@ static void disable_hotplug_cpu(int cpu)
+ 	if (cpu_present(cpu))
+ 		arch_unregister_cpu(cpu);
+ 
+-	cpu_clear(cpu, cpu_present_map);
++	set_cpu_present(cpu, false);
+ }
+ 
+ static void vcpu_hotplug(unsigned int cpu)
+Index: linux-2.6-tip/drivers/xen/events.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/xen/events.c
++++ linux-2.6-tip/drivers/xen/events.c
+@@ -26,9 +26,11 @@
+ #include <linux/irq.h>
+ #include <linux/module.h>
+ #include <linux/string.h>
++#include <linux/bootmem.h>
+ 
+ #include <asm/ptrace.h>
+ #include <asm/irq.h>
++#include <asm/idle.h>
+ #include <asm/sync_bitops.h>
+ #include <asm/xen/hypercall.h>
+ #include <asm/xen/hypervisor.h>
+@@ -50,36 +52,55 @@ static DEFINE_PER_CPU(int, virq_to_irq[N
+ /* IRQ <-> IPI mapping */
+ static DEFINE_PER_CPU(int, ipi_to_irq[XEN_NR_IPIS]) = {[0 ... XEN_NR_IPIS-1] = -1};
+ 
+-/* Packed IRQ information: binding type, sub-type index, and event channel. */
+-struct packed_irq
+-{
+-	unsigned short evtchn;
+-	unsigned char index;
+-	unsigned char type;
+-};
+-
+-static struct packed_irq irq_info[NR_IRQS];
+-
+-/* Binding types. */
+-enum {
+-	IRQT_UNBOUND,
++/* Interrupt types. */
++enum xen_irq_type {
++	IRQT_UNBOUND = 0,
+ 	IRQT_PIRQ,
+ 	IRQT_VIRQ,
+ 	IRQT_IPI,
+ 	IRQT_EVTCHN
+ };
+ 
+-/* Convenient shorthand for packed representation of an unbound IRQ. */
+-#define IRQ_UNBOUND	mk_irq_info(IRQT_UNBOUND, 0, 0)
++/*
++ * Packed IRQ information:
++ * type - enum xen_irq_type
++ * event channel - irq->event channel mapping
++ * cpu - cpu this event channel is bound to
++ * index - type-specific information:
++ *    PIRQ - vector, with MSB being "needs EIO"
++ *    VIRQ - virq number
++ *    IPI - IPI vector
++ *    EVTCHN -
++ */
++struct irq_info
++{
++	enum xen_irq_type type;	/* type */
++	unsigned short evtchn;	/* event channel */
++	unsigned short cpu;	/* cpu bound */
++
++	union {
++		unsigned short virq;
++		enum ipi_vector ipi;
++		struct {
++			unsigned short gsi;
++			unsigned short vector;
++		} pirq;
++	} u;
++};
++
++static struct irq_info irq_info[NR_IRQS];
+ 
+ static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
+ 	[0 ... NR_EVENT_CHANNELS-1] = -1
+ };
+-static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
+-static u8 cpu_evtchn[NR_EVENT_CHANNELS];
+-
+-/* Reference counts for bindings to IRQs. */
+-static int irq_bindcount[NR_IRQS];
++struct cpu_evtchn_s {
++	unsigned long bits[NR_EVENT_CHANNELS/BITS_PER_LONG];
++};
++static struct cpu_evtchn_s *cpu_evtchn_mask_p;
++static inline unsigned long *cpu_evtchn_mask(int cpu)
++{
++	return cpu_evtchn_mask_p[cpu].bits;
++}
+ 
+ /* Xen will never allocate port zero for any purpose. */
+ #define VALID_EVTCHN(chn)	((chn) != 0)
+@@ -87,27 +108,108 @@ static int irq_bindcount[NR_IRQS];
+ static struct irq_chip xen_dynamic_chip;
+ 
+ /* Constructor for packed IRQ information. */
+-static inline struct packed_irq mk_irq_info(u32 type, u32 index, u32 evtchn)
++static struct irq_info mk_unbound_info(void)
++{
++	return (struct irq_info) { .type = IRQT_UNBOUND };
++}
++
++static struct irq_info mk_evtchn_info(unsigned short evtchn)
++{
++	return (struct irq_info) { .type = IRQT_EVTCHN, .evtchn = evtchn,
++			.cpu = 0 };
++}
++
++static struct irq_info mk_ipi_info(unsigned short evtchn, enum ipi_vector ipi)
+ {
+-	return (struct packed_irq) { evtchn, index, type };
++	return (struct irq_info) { .type = IRQT_IPI, .evtchn = evtchn,
++			.cpu = 0, .u.ipi = ipi };
++}
++
++static struct irq_info mk_virq_info(unsigned short evtchn, unsigned short virq)
++{
++	return (struct irq_info) { .type = IRQT_VIRQ, .evtchn = evtchn,
++			.cpu = 0, .u.virq = virq };
++}
++
++static struct irq_info mk_pirq_info(unsigned short evtchn,
++				    unsigned short gsi, unsigned short vector)
++{
++	return (struct irq_info) { .type = IRQT_PIRQ, .evtchn = evtchn,
++			.cpu = 0, .u.pirq = { .gsi = gsi, .vector = vector } };
+ }
+ 
+ /*
+  * Accessors for packed IRQ information.
+  */
+-static inline unsigned int evtchn_from_irq(int irq)
++static struct irq_info *info_for_irq(unsigned irq)
++{
++	return &irq_info[irq];
++}
++
++static unsigned int evtchn_from_irq(unsigned irq)
++{
++	return info_for_irq(irq)->evtchn;
++}
++
++static enum ipi_vector ipi_from_irq(unsigned irq)
++{
++	struct irq_info *info = info_for_irq(irq);
++
++	BUG_ON(info == NULL);
++	BUG_ON(info->type != IRQT_IPI);
++
++	return info->u.ipi;
++}
++
++static unsigned virq_from_irq(unsigned irq)
+ {
+-	return irq_info[irq].evtchn;
++	struct irq_info *info = info_for_irq(irq);
++
++	BUG_ON(info == NULL);
++	BUG_ON(info->type != IRQT_VIRQ);
++
++	return info->u.virq;
++}
++
++static unsigned gsi_from_irq(unsigned irq)
++{
++	struct irq_info *info = info_for_irq(irq);
++
++	BUG_ON(info == NULL);
++	BUG_ON(info->type != IRQT_PIRQ);
++
++	return info->u.pirq.gsi;
++}
++
++static unsigned vector_from_irq(unsigned irq)
++{
++	struct irq_info *info = info_for_irq(irq);
++
++	BUG_ON(info == NULL);
++	BUG_ON(info->type != IRQT_PIRQ);
++
++	return info->u.pirq.vector;
+ }
+ 
+-static inline unsigned int index_from_irq(int irq)
++static enum xen_irq_type type_from_irq(unsigned irq)
+ {
+-	return irq_info[irq].index;
++	return info_for_irq(irq)->type;
+ }
+ 
+-static inline unsigned int type_from_irq(int irq)
++static unsigned cpu_from_irq(unsigned irq)
+ {
+-	return irq_info[irq].type;
++	return info_for_irq(irq)->cpu;
++}
++
++static unsigned int cpu_from_evtchn(unsigned int evtchn)
++{
++	int irq = evtchn_to_irq[evtchn];
++	unsigned ret = 0;
++
++	if (irq != -1)
++		ret = cpu_from_irq(irq);
++
++	return ret;
+ }
+ 
+ static inline unsigned long active_evtchns(unsigned int cpu,
+@@ -115,7 +217,7 @@ static inline unsigned long active_evtch
+ 					   unsigned int idx)
+ {
+ 	return (sh->evtchn_pending[idx] &
+-		cpu_evtchn_mask[cpu][idx] &
++		cpu_evtchn_mask(cpu)[idx] &
+ 		~sh->evtchn_mask[idx]);
+ }
+ 
+@@ -125,13 +227,13 @@ static void bind_evtchn_to_cpu(unsigned 
+ 
+ 	BUG_ON(irq == -1);
+ #ifdef CONFIG_SMP
+-	irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu);
++	cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
+ #endif
+ 
+-	__clear_bit(chn, cpu_evtchn_mask[cpu_evtchn[chn]]);
+-	__set_bit(chn, cpu_evtchn_mask[cpu]);
++	__clear_bit(chn, cpu_evtchn_mask(cpu_from_irq(irq)));
++	__set_bit(chn, cpu_evtchn_mask(cpu));
+ 
+-	cpu_evtchn[chn] = cpu;
++	irq_info[irq].cpu = cpu;
+ }
+ 
+ static void init_evtchn_cpu_bindings(void)
+@@ -142,17 +244,11 @@ static void init_evtchn_cpu_bindings(voi
+ 
+ 	/* By default all event channels notify CPU#0. */
+ 	for_each_irq_desc(i, desc) {
+-		desc->affinity = cpumask_of_cpu(0);
++		cpumask_copy(desc->affinity, cpumask_of(0));
+ 	}
+ #endif
+ 
+-	memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
+-	memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
+-}
+-
+-static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
+-{
+-	return cpu_evtchn[evtchn];
++	memset(cpu_evtchn_mask(0), ~0, sizeof(cpu_evtchn_mask(0)));
+ }
+ 
+ static inline void clear_evtchn(int port)
+@@ -232,9 +328,8 @@ static int find_unbound_irq(void)
+ 	int irq;
+ 	struct irq_desc *desc;
+ 
+-	/* Only allocate from dynirq range */
+ 	for (irq = 0; irq < nr_irqs; irq++)
+-		if (irq_bindcount[irq] == 0)
++		if (irq_info[irq].type == IRQT_UNBOUND)
+ 			break;
+ 
+ 	if (irq == nr_irqs)
+@@ -244,6 +339,8 @@ static int find_unbound_irq(void)
+ 	if (WARN_ON(desc == NULL))
+ 		return -1;
+ 
++	dynamic_irq_init(irq);
++
+ 	return irq;
+ }
+ 
+@@ -258,16 +355,13 @@ int bind_evtchn_to_irq(unsigned int evtc
+ 	if (irq == -1) {
+ 		irq = find_unbound_irq();
+ 
+-		dynamic_irq_init(irq);
+ 		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ 					      handle_level_irq, "event");
+ 
+ 		evtchn_to_irq[evtchn] = irq;
+-		irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
++		irq_info[irq] = mk_evtchn_info(evtchn);
+ 	}
+ 
+-	irq_bindcount[irq]++;
+-
+ 	spin_unlock(&irq_mapping_update_lock);
+ 
+ 	return irq;
+@@ -282,12 +376,12 @@ static int bind_ipi_to_irq(unsigned int 
+ 	spin_lock(&irq_mapping_update_lock);
+ 
+ 	irq = per_cpu(ipi_to_irq, cpu)[ipi];
++
+ 	if (irq == -1) {
+ 		irq = find_unbound_irq();
+ 		if (irq < 0)
+ 			goto out;
+ 
+-		dynamic_irq_init(irq);
+ 		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ 					      handle_level_irq, "ipi");
+ 
+@@ -298,15 +392,12 @@ static int bind_ipi_to_irq(unsigned int 
+ 		evtchn = bind_ipi.port;
+ 
+ 		evtchn_to_irq[evtchn] = irq;
+-		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
+-
++		irq_info[irq] = mk_ipi_info(evtchn, ipi);
+ 		per_cpu(ipi_to_irq, cpu)[ipi] = irq;
+ 
+ 		bind_evtchn_to_cpu(evtchn, cpu);
+ 	}
+ 
+-	irq_bindcount[irq]++;
+-
+  out:
+ 	spin_unlock(&irq_mapping_update_lock);
+ 	return irq;
+@@ -332,20 +423,17 @@ static int bind_virq_to_irq(unsigned int
+ 
+ 		irq = find_unbound_irq();
+ 
+-		dynamic_irq_init(irq);
+ 		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+ 					      handle_level_irq, "virq");
+ 
+ 		evtchn_to_irq[evtchn] = irq;
+-		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
++		irq_info[irq] = mk_virq_info(evtchn, virq);
+ 
+ 		per_cpu(virq_to_irq, cpu)[virq] = irq;
+ 
+ 		bind_evtchn_to_cpu(evtchn, cpu);
+ 	}
+ 
+-	irq_bindcount[irq]++;
+-
+ 	spin_unlock(&irq_mapping_update_lock);
+ 
+ 	return irq;
+@@ -358,7 +446,7 @@ static void unbind_from_irq(unsigned int
+ 
+ 	spin_lock(&irq_mapping_update_lock);
+ 
+-	if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
++	if (VALID_EVTCHN(evtchn)) {
+ 		close.port = evtchn;
+ 		if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ 			BUG();
+@@ -366,11 +454,11 @@ static void unbind_from_irq(unsigned int
+ 		switch (type_from_irq(irq)) {
+ 		case IRQT_VIRQ:
+ 			per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
+-				[index_from_irq(irq)] = -1;
++				[virq_from_irq(irq)] = -1;
+ 			break;
+ 		case IRQT_IPI:
+ 			per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
+-				[index_from_irq(irq)] = -1;
++				[ipi_from_irq(irq)] = -1;
+ 			break;
+ 		default:
+ 			break;
+@@ -380,7 +468,7 @@ static void unbind_from_irq(unsigned int
+ 		bind_evtchn_to_cpu(evtchn, 0);
+ 
+ 		evtchn_to_irq[evtchn] = -1;
+-		irq_info[irq] = IRQ_UNBOUND;
++		irq_info[irq] = mk_unbound_info();
+ 
+ 		dynamic_irq_cleanup(irq);
+ 	}
+@@ -498,8 +586,8 @@ irqreturn_t xen_debug_interrupt(int irq,
+ 	for(i = 0; i < NR_EVENT_CHANNELS; i++) {
+ 		if (sync_test_bit(i, sh->evtchn_pending)) {
+ 			printk("  %d: event %d -> irq %d\n",
+-				cpu_evtchn[i], i,
+-				evtchn_to_irq[i]);
++			       cpu_from_evtchn(i), i,
++			       evtchn_to_irq[i]);
+ 		}
+ 	}
+ 
+@@ -508,7 +596,6 @@ irqreturn_t xen_debug_interrupt(int irq,
+ 	return IRQ_HANDLED;
+ }
+ 
+-
+ /*
+  * Search the CPUs pending events bitmasks.  For each one found, map
+  * the event number to an irq, and feed it into do_IRQ() for
+@@ -521,11 +608,15 @@ irqreturn_t xen_debug_interrupt(int irq,
+ void xen_evtchn_do_upcall(struct pt_regs *regs)
+ {
+ 	int cpu = get_cpu();
++	struct pt_regs *old_regs = set_irq_regs(regs);
+ 	struct shared_info *s = HYPERVISOR_shared_info;
+ 	struct vcpu_info *vcpu_info = __get_cpu_var(xen_vcpu);
+ 	static DEFINE_PER_CPU(unsigned, nesting_count);
+  	unsigned count;
+ 
++	exit_idle();
++	irq_enter();
++
+ 	do {
+ 		unsigned long pending_words;
+ 
+@@ -550,7 +641,7 @@ void xen_evtchn_do_upcall(struct pt_regs
+ 				int irq = evtchn_to_irq[port];
+ 
+ 				if (irq != -1)
+-					xen_do_IRQ(irq, regs);
++					handle_irq(irq, regs);
+ 			}
+ 		}
+ 
+@@ -561,12 +652,17 @@ void xen_evtchn_do_upcall(struct pt_regs
+ 	} while(count != 1);
+ 
+ out:
++	irq_exit();
++	set_irq_regs(old_regs);
++
+ 	put_cpu();
+ }
+ 
+ /* Rebind a new event channel to an existing irq. */
+ void rebind_evtchn_irq(int evtchn, int irq)
+ {
++	struct irq_info *info = info_for_irq(irq);
++
+ 	/* Make sure the irq is masked, since the new event channel
+ 	   will also be masked. */
+ 	disable_irq(irq);
+@@ -576,11 +672,11 @@ void rebind_evtchn_irq(int evtchn, int i
+ 	/* After resume the irq<->evtchn mappings are all cleared out */
+ 	BUG_ON(evtchn_to_irq[evtchn] != -1);
+ 	/* Expect irq to have been bound before,
+-	   so the bindcount should be non-0 */
+-	BUG_ON(irq_bindcount[irq] == 0);
++	   so there should be a proper type */
++	BUG_ON(info->type == IRQT_UNBOUND);
+ 
+ 	evtchn_to_irq[evtchn] = irq;
+-	irq_info[irq] = mk_irq_info(IRQT_EVTCHN, 0, evtchn);
++	irq_info[irq] = mk_evtchn_info(evtchn);
+ 
+ 	spin_unlock(&irq_mapping_update_lock);
+ 
+@@ -690,8 +786,7 @@ static void restore_cpu_virqs(unsigned i
+ 		if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
+ 			continue;
+ 
+-		BUG_ON(irq_info[irq].type != IRQT_VIRQ);
+-		BUG_ON(irq_info[irq].index != virq);
++		BUG_ON(virq_from_irq(irq) != virq);
+ 
+ 		/* Get a new binding from Xen. */
+ 		bind_virq.virq = virq;
+@@ -703,7 +798,7 @@ static void restore_cpu_virqs(unsigned i
+ 
+ 		/* Record the new mapping. */
+ 		evtchn_to_irq[evtchn] = irq;
+-		irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
++		irq_info[irq] = mk_virq_info(evtchn, virq);
+ 		bind_evtchn_to_cpu(evtchn, cpu);
+ 
+ 		/* Ready for use. */
+@@ -720,8 +815,7 @@ static void restore_cpu_ipis(unsigned in
+ 		if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
+ 			continue;
+ 
+-		BUG_ON(irq_info[irq].type != IRQT_IPI);
+-		BUG_ON(irq_info[irq].index != ipi);
++		BUG_ON(ipi_from_irq(irq) != ipi);
+ 
+ 		/* Get a new binding from Xen. */
+ 		bind_ipi.vcpu = cpu;
+@@ -732,7 +826,7 @@ static void restore_cpu_ipis(unsigned in
+ 
+ 		/* Record the new mapping. */
+ 		evtchn_to_irq[evtchn] = irq;
+-		irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
++		irq_info[irq] = mk_ipi_info(evtchn, ipi);
+ 		bind_evtchn_to_cpu(evtchn, cpu);
+ 
+ 		/* Ready for use. */
+@@ -812,8 +906,11 @@ void xen_irq_resume(void)
+ 
+ static struct irq_chip xen_dynamic_chip __read_mostly = {
+ 	.name		= "xen-dyn",
++
++	.disable	= disable_dynirq,
+ 	.mask		= disable_dynirq,
+ 	.unmask		= enable_dynirq,
++
+ 	.ack		= ack_dynirq,
+ 	.set_affinity	= set_affinity_irq,
+ 	.retrigger	= retrigger_dynirq,
+@@ -822,6 +919,10 @@ static struct irq_chip xen_dynamic_chip 
+ void __init xen_init_IRQ(void)
+ {
+ 	int i;
++	size_t size = nr_cpu_ids * sizeof(struct cpu_evtchn_s);
++
++	cpu_evtchn_mask_p = alloc_bootmem(size);
++	BUG_ON(cpu_evtchn_mask_p == NULL);
+ 
+ 	init_evtchn_cpu_bindings();
+ 
+@@ -829,9 +930,5 @@ void __init xen_init_IRQ(void)
+ 	for (i = 0; i < NR_EVENT_CHANNELS; i++)
+ 		mask_evtchn(i);
+ 
+-	/* Dynamic IRQ space is currently unbound. Zero the refcnts. */
+-	for (i = 0; i < nr_irqs; i++)
+-		irq_bindcount[i] = 0;
+-
+ 	irq_ctx_init(smp_processor_id());
+ }
+Index: linux-2.6-tip/drivers/xen/manage.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/xen/manage.c
++++ linux-2.6-tip/drivers/xen/manage.c
+@@ -108,7 +108,7 @@ static void do_suspend(void)
+ 	/* XXX use normal device tree? */
+ 	xenbus_suspend();
+ 
+-	err = stop_machine(xen_suspend, &cancelled, &cpumask_of_cpu(0));
++	err = stop_machine(xen_suspend, &cancelled, cpumask_of(0));
+ 	if (err) {
+ 		printk(KERN_ERR "failed to start xen_suspend: %d\n", err);
+ 		goto out;
+Index: linux-2.6-tip/fs/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/fs/Kconfig
++++ linux-2.6-tip/fs/Kconfig
+@@ -40,7 +40,7 @@ config FS_POSIX_ACL
+ 	default n
+ 
+ config FILE_LOCKING
+-	bool "Enable POSIX file locking API" if EMBEDDED
++	bool "Enable POSIX file locking API" if BROKEN
+ 	default y
+ 	help
+ 	  This option enables standard file locking support, required
+Index: linux-2.6-tip/fs/afs/dir.c
+===================================================================
+--- linux-2.6-tip.orig/fs/afs/dir.c
++++ linux-2.6-tip/fs/afs/dir.c
+@@ -564,7 +564,7 @@ static struct dentry *afs_lookup(struct 
+ static int afs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
+ {
+ 	struct afs_vnode *vnode, *dir;
+-	struct afs_fid fid;
++	struct afs_fid fid = { 0, };
+ 	struct dentry *parent;
+ 	struct key *key;
+ 	void *dir_version;
+Index: linux-2.6-tip/fs/befs/debug.c
+===================================================================
+--- linux-2.6-tip.orig/fs/befs/debug.c
++++ linux-2.6-tip/fs/befs/debug.c
+@@ -17,6 +17,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/kernel.h>
+ #include <linux/fs.h>
++#include <linux/slab.h>
+ 
+ #endif				/* __KERNEL__ */
+ 
+Index: linux-2.6-tip/fs/befs/linuxvfs.c
+===================================================================
+--- linux-2.6-tip.orig/fs/befs/linuxvfs.c
++++ linux-2.6-tip/fs/befs/linuxvfs.c
+@@ -168,7 +168,7 @@ befs_lookup(struct inode *dir, struct de
+ 	befs_off_t offset;
+ 	int ret;
+ 	int utfnamelen;
+-	char *utfname;
++	char *uninitialized_var(utfname);
+ 	const char *name = dentry->d_name.name;
+ 
+ 	befs_debug(sb, "---> befs_lookup() "
+@@ -221,8 +221,8 @@ befs_readdir(struct file *filp, void *di
+ 	size_t keysize;
+ 	unsigned char d_type;
+ 	char keybuf[BEFS_NAME_LEN + 1];
+-	char *nlsname;
+-	int nlsnamelen;
++	char *uninitialized_var(nlsname);
++	int uninitialized_var(nlsnamelen);
+ 	const char *dirname = filp->f_path.dentry->d_name.name;
+ 
+ 	befs_debug(sb, "---> befs_readdir() "
+Index: linux-2.6-tip/fs/cifs/cifssmb.c
+===================================================================
+--- linux-2.6-tip.orig/fs/cifs/cifssmb.c
++++ linux-2.6-tip/fs/cifs/cifssmb.c
+@@ -3118,7 +3118,7 @@ CIFSSMBGetCIFSACL(const int xid, struct 
+ 		__u32 parm_len;
+ 		__u32 acl_len;
+ 		struct smb_com_ntransact_rsp *pSMBr;
+-		char *pdata;
++		char *uninitialized_var(pdata);
+ 
+ /* validate_nttransact */
+ 		rc = validate_ntransact(iov[0].iov_base, (char **)&parm,
+Index: linux-2.6-tip/fs/cifs/readdir.c
+===================================================================
+--- linux-2.6-tip.orig/fs/cifs/readdir.c
++++ linux-2.6-tip/fs/cifs/readdir.c
+@@ -906,7 +906,7 @@ static int cifs_filldir(char *pfindEntry
+ 	__u64  inum;
+ 	struct cifs_sb_info *cifs_sb;
+ 	struct inode *tmp_inode;
+-	struct dentry *tmp_dentry;
++	struct dentry *uninitialized_var(tmp_dentry);
+ 
+ 	/* get filename and len into qstring */
+ 	/* get dentry */
+@@ -990,7 +990,7 @@ int cifs_readdir(struct file *file, void
+ 	struct cifs_sb_info *cifs_sb;
+ 	struct cifsTconInfo *pTcon;
+ 	struct cifsFileInfo *cifsFile = NULL;
+-	char *current_entry;
++	char *uninitialized_var(current_entry);
+ 	int num_to_fill = 0;
+ 	char *tmp_buf = NULL;
+ 	char *end_of_smb;
+Index: linux-2.6-tip/fs/coda/Makefile
+===================================================================
+--- linux-2.6-tip.orig/fs/coda/Makefile
++++ linux-2.6-tip/fs/coda/Makefile
+@@ -5,7 +5,9 @@
+ obj-$(CONFIG_CODA_FS) += coda.o
+ 
+ coda-objs := psdev.o cache.o cnode.o inode.o dir.o file.o upcall.o \
+-	     coda_linux.o symlink.o pioctl.o sysctl.o 
++	     coda_linux.o symlink.o pioctl.o
++
++coda-$(CONFIG_SYSCTL) += sysctl.o
+ 
+ # If you want debugging output, please uncomment the following line.
+ 
+Index: linux-2.6-tip/fs/coda/coda_int.h
+===================================================================
+--- linux-2.6-tip.orig/fs/coda/coda_int.h
++++ linux-2.6-tip/fs/coda/coda_int.h
+@@ -12,8 +12,13 @@ void coda_destroy_inodecache(void);
+ int coda_init_inodecache(void);
+ int coda_fsync(struct file *coda_file, struct dentry *coda_dentry,
+ 	       int datasync);
++#ifdef CONFIG_SYSCTL
+ void coda_sysctl_init(void);
+ void coda_sysctl_clean(void);
++#else
++static inline void coda_sysctl_init(void) { }
++static inline void coda_sysctl_clean(void) { }
++#endif
+ 
+ #endif  /*  _CODA_INT_  */
+ 
+Index: linux-2.6-tip/fs/coda/sysctl.c
+===================================================================
+--- linux-2.6-tip.orig/fs/coda/sysctl.c
++++ linux-2.6-tip/fs/coda/sysctl.c
+@@ -57,18 +57,14 @@ static ctl_table fs_table[] = {
+ 
+ void coda_sysctl_init(void)
+ {
+-#ifdef CONFIG_SYSCTL
+-	if ( !fs_table_header )
++	if (!fs_table_header)
+ 		fs_table_header = register_sysctl_table(fs_table);
+-#endif
+ }
+ 
+ void coda_sysctl_clean(void)
+ {
+-#ifdef CONFIG_SYSCTL
+-	if ( fs_table_header ) {
++	if (fs_table_header) {
+ 		unregister_sysctl_table(fs_table_header);
+ 		fs_table_header = NULL;
+ 	}
+-#endif
+ }
+Index: linux-2.6-tip/fs/compat_binfmt_elf.c
+===================================================================
+--- linux-2.6-tip.orig/fs/compat_binfmt_elf.c
++++ linux-2.6-tip/fs/compat_binfmt_elf.c
+@@ -42,6 +42,7 @@
+ #define elf_prstatus	compat_elf_prstatus
+ #define elf_prpsinfo	compat_elf_prpsinfo
+ 
++#if defined(USE_ELF_CORE_DUMP) && defined(CONFIG_ELF_CORE)
+ /*
+  * Compat version of cputime_to_compat_timeval, perhaps this
+  * should be an inline in <linux/compat.h>.
+@@ -55,8 +56,9 @@ static void cputime_to_compat_timeval(co
+ 	value->tv_usec = tv.tv_usec;
+ }
+ 
+-#undef cputime_to_timeval
+-#define cputime_to_timeval cputime_to_compat_timeval
++# undef cputime_to_timeval
++# define cputime_to_timeval cputime_to_compat_timeval
++#endif
+ 
+ 
+ /*
+Index: linux-2.6-tip/fs/configfs/symlink.c
+===================================================================
+--- linux-2.6-tip.orig/fs/configfs/symlink.c
++++ linux-2.6-tip/fs/configfs/symlink.c
+@@ -135,7 +135,7 @@ int configfs_symlink(struct inode *dir, 
+ 	struct path path;
+ 	struct configfs_dirent *sd;
+ 	struct config_item *parent_item;
+-	struct config_item *target_item;
++	struct config_item *uninitialized_var(target_item);
+ 	struct config_item_type *type;
+ 
+ 	ret = -EPERM;  /* What lack-of-symlink returns */
+Index: linux-2.6-tip/fs/dcache.c
+===================================================================
+--- linux-2.6-tip.orig/fs/dcache.c
++++ linux-2.6-tip/fs/dcache.c
+@@ -726,8 +726,9 @@ void shrink_dcache_for_umount(struct sup
+ {
+ 	struct dentry *dentry;
+ 
+-	if (down_read_trylock(&sb->s_umount))
+-		BUG();
++// -rt: this might succeed there ...
++//	if (down_read_trylock(&sb->s_umount))
++//		BUG();
+ 
+ 	dentry = sb->s_root;
+ 	sb->s_root = NULL;
+@@ -1877,6 +1878,8 @@ out_nolock:
+ shouldnt_be_hashed:
+ 	spin_unlock(&dcache_lock);
+ 	BUG();
++
++	return NULL;
+ }
+ 
+ static int prepend(char **buffer, int *buflen, const char *str, int namelen)
+Index: linux-2.6-tip/fs/debugfs/inode.c
+===================================================================
+--- linux-2.6-tip.orig/fs/debugfs/inode.c
++++ linux-2.6-tip/fs/debugfs/inode.c
+@@ -30,6 +30,7 @@
+ 
+ static struct vfsmount *debugfs_mount;
+ static int debugfs_mount_count;
++static bool debugfs_registered;
+ 
+ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t dev)
+ {
+@@ -496,6 +497,16 @@ exit:
+ }
+ EXPORT_SYMBOL_GPL(debugfs_rename);
+ 
++/**
++ * debugfs_initialized - Tells whether debugfs has been registered
++ */
++bool debugfs_initialized(void)
++{
++	return debugfs_registered;
++}
++EXPORT_SYMBOL_GPL(debugfs_initialized);
++
++
+ static struct kobject *debug_kobj;
+ 
+ static int __init debugfs_init(void)
+@@ -509,11 +520,16 @@ static int __init debugfs_init(void)
+ 	retval = register_filesystem(&debug_fs_type);
+ 	if (retval)
+ 		kobject_put(debug_kobj);
++	else
++		debugfs_registered = true;
++
+ 	return retval;
+ }
+ 
+ static void __exit debugfs_exit(void)
+ {
++	debugfs_registered = false;
++
+ 	simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+ 	unregister_filesystem(&debug_fs_type);
+ 	kobject_put(debug_kobj);
+Index: linux-2.6-tip/fs/ecryptfs/keystore.c
+===================================================================
+--- linux-2.6-tip.orig/fs/ecryptfs/keystore.c
++++ linux-2.6-tip/fs/ecryptfs/keystore.c
+@@ -1013,7 +1013,7 @@ decrypt_pki_encrypted_session_key(struct
+ 	struct ecryptfs_message *msg = NULL;
+ 	char *auth_tok_sig;
+ 	char *payload;
+-	size_t payload_len;
++	size_t uninitialized_var(payload_len);
+ 	int rc;
+ 
+ 	rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok);
+@@ -1845,7 +1845,7 @@ pki_encrypt_session_key(struct ecryptfs_
+ {
+ 	struct ecryptfs_msg_ctx *msg_ctx = NULL;
+ 	char *payload = NULL;
+-	size_t payload_len;
++	size_t uninitialized_var(payload_len);
+ 	struct ecryptfs_message *msg;
+ 	int rc;
+ 
+Index: linux-2.6-tip/fs/eventpoll.c
+===================================================================
+--- linux-2.6-tip.orig/fs/eventpoll.c
++++ linux-2.6-tip/fs/eventpoll.c
+@@ -1098,7 +1098,7 @@ retry:
+ SYSCALL_DEFINE1(epoll_create1, int, flags)
+ {
+ 	int error, fd = -1;
+-	struct eventpoll *ep;
++	struct eventpoll *uninitialized_var(ep);
+ 
+ 	/* Check the EPOLL_* constant for consistency.  */
+ 	BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
+Index: linux-2.6-tip/fs/exec.c
+===================================================================
+--- linux-2.6-tip.orig/fs/exec.c
++++ linux-2.6-tip/fs/exec.c
+@@ -33,6 +33,7 @@
+ #include <linux/string.h>
+ #include <linux/init.h>
+ #include <linux/pagemap.h>
++#include <linux/perf_counter.h>
+ #include <linux/highmem.h>
+ #include <linux/spinlock.h>
+ #include <linux/key.h>
+@@ -46,6 +47,7 @@
+ #include <linux/mount.h>
+ #include <linux/security.h>
+ #include <linux/syscalls.h>
++#include <linux/delay.h>
+ #include <linux/tsacct_kern.h>
+ #include <linux/cn_proc.h>
+ #include <linux/audit.h>
+@@ -509,7 +511,7 @@ static int shift_arg_pages(struct vm_are
+ 	unsigned long length = old_end - old_start;
+ 	unsigned long new_start = old_start - shift;
+ 	unsigned long new_end = old_end - shift;
+-	struct mmu_gather *tlb;
++	struct mmu_gather tlb;
+ 
+ 	BUG_ON(new_start > new_end);
+ 
+@@ -534,12 +536,12 @@ static int shift_arg_pages(struct vm_are
+ 		return -ENOMEM;
+ 
+ 	lru_add_drain();
+-	tlb = tlb_gather_mmu(mm, 0);
++	tlb_gather_mmu(&tlb, mm, 0);
+ 	if (new_end > old_start) {
+ 		/*
+ 		 * when the old and new regions overlap clear from new_end.
+ 		 */
+-		free_pgd_range(tlb, new_end, old_end, new_end,
++		free_pgd_range(&tlb, new_end, old_end, new_end,
+ 			vma->vm_next ? vma->vm_next->vm_start : 0);
+ 	} else {
+ 		/*
+@@ -548,10 +550,10 @@ static int shift_arg_pages(struct vm_are
+ 		 * have constraints on va-space that make this illegal (IA64) -
+ 		 * for the others its just a little faster.
+ 		 */
+-		free_pgd_range(tlb, old_start, old_end, new_end,
++		free_pgd_range(&tlb, old_start, old_end, new_end,
+ 			vma->vm_next ? vma->vm_next->vm_start : 0);
+ 	}
+-	tlb_finish_mmu(tlb, new_end, old_end);
++	tlb_finish_mmu(&tlb, new_end, old_end);
+ 
+ 	/*
+ 	 * shrink the vma to just the new range.
+@@ -738,10 +740,12 @@ static int exec_mmap(struct mm_struct *m
+ 		}
+ 	}
+ 	task_lock(tsk);
++	local_irq_disable();
+ 	active_mm = tsk->active_mm;
++	activate_mm(active_mm, mm);
+ 	tsk->mm = mm;
+ 	tsk->active_mm = mm;
+-	activate_mm(active_mm, mm);
++	local_irq_enable();
+ 	task_unlock(tsk);
+ 	arch_pick_mmap_layout(mm);
+ 	if (old_mm) {
+@@ -1010,6 +1014,13 @@ int flush_old_exec(struct linux_binprm *
+ 
+ 	current->personality &= ~bprm->per_clear;
+ 
++	/*
++	 * Flush performance counters when crossing a
++	 * security domain:
++	 */
++	if (!get_dumpable(current->mm))
++		perf_counter_exit_task(current);
++
+ 	/* An exec changes our domain. We are no longer part of the thread
+ 	   group */
+ 
+Index: linux-2.6-tip/fs/ext4/extents.c
+===================================================================
+--- linux-2.6-tip.orig/fs/ext4/extents.c
++++ linux-2.6-tip/fs/ext4/extents.c
+@@ -1159,6 +1159,7 @@ ext4_ext_search_right(struct inode *inod
+ 		return 0;
+ 	}
+ 
++	ix = NULL; /* avoid gcc false positive warning */
+ 	/* go up and search for index to the right */
+ 	while (--depth >= 0) {
+ 		ix = path[depth].p_idx;
+Index: linux-2.6-tip/fs/fat/namei_vfat.c
+===================================================================
+--- linux-2.6-tip.orig/fs/fat/namei_vfat.c
++++ linux-2.6-tip/fs/fat/namei_vfat.c
+@@ -595,12 +595,12 @@ static int vfat_build_slots(struct inode
+ 	struct fat_mount_options *opts = &sbi->options;
+ 	struct msdos_dir_slot *ps;
+ 	struct msdos_dir_entry *de;
+-	unsigned char cksum, lcase;
++	unsigned char cksum, uninitialized_var(lcase);
+ 	unsigned char msdos_name[MSDOS_NAME];
+ 	wchar_t *uname;
+ 	__le16 time, date;
+ 	u8 time_cs;
+-	int err, ulen, usize, i;
++	int err, uninitialized_var(ulen), usize, i;
+ 	loff_t offset;
+ 
+ 	*nr_slots = 0;
+Index: linux-2.6-tip/fs/jfs/jfs_dmap.c
+===================================================================
+--- linux-2.6-tip.orig/fs/jfs/jfs_dmap.c
++++ linux-2.6-tip/fs/jfs/jfs_dmap.c
+@@ -1618,7 +1618,7 @@ static int dbAllocAny(struct bmap * bmp,
+  */
+ static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
+ {
+-	int rc, leafidx, lev;
++	int rc, uninitialized_var(leafidx), lev;
+ 	s64 b, lblkno;
+ 	struct dmapctl *dcp;
+ 	int budmin;
+Index: linux-2.6-tip/fs/libfs.c
+===================================================================
+--- linux-2.6-tip.orig/fs/libfs.c
++++ linux-2.6-tip/fs/libfs.c
+@@ -574,6 +574,21 @@ ssize_t memory_read_from_buffer(void *to
+  * possibly a read which collects the result - which is stored in a
+  * file-local buffer.
+  */
++
++void simple_transaction_set(struct file *file, size_t n)
++{
++	struct simple_transaction_argresp *ar = file->private_data;
++
++	BUG_ON(n > SIMPLE_TRANSACTION_LIMIT);
++
++	/*
++	 * The barrier ensures that ar->size will really remain zero until
++	 * ar->data is ready for reading.
++	 */
++	smp_mb();
++	ar->size = n;
++}
++
+ char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
+ {
+ 	struct simple_transaction_argresp *ar;
+@@ -819,6 +834,7 @@ EXPORT_SYMBOL(simple_sync_file);
+ EXPORT_SYMBOL(simple_unlink);
+ EXPORT_SYMBOL(simple_read_from_buffer);
+ EXPORT_SYMBOL(memory_read_from_buffer);
++EXPORT_SYMBOL(simple_transaction_set);
+ EXPORT_SYMBOL(simple_transaction_get);
+ EXPORT_SYMBOL(simple_transaction_read);
+ EXPORT_SYMBOL(simple_transaction_release);
+Index: linux-2.6-tip/fs/locks.c
+===================================================================
+--- linux-2.6-tip.orig/fs/locks.c
++++ linux-2.6-tip/fs/locks.c
+@@ -1567,7 +1567,7 @@ EXPORT_SYMBOL(flock_lock_file_wait);
+ SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
+ {
+ 	struct file *filp;
+-	struct file_lock *lock;
++	struct file_lock *uninitialized_var(lock);
+ 	int can_sleep, unlock;
+ 	int error;
+ 
+Index: linux-2.6-tip/fs/ocfs2/aops.c
+===================================================================
+--- linux-2.6-tip.orig/fs/ocfs2/aops.c
++++ linux-2.6-tip/fs/ocfs2/aops.c
+@@ -1646,7 +1646,7 @@ int ocfs2_write_begin_nolock(struct addr
+ {
+ 	int ret, credits = OCFS2_INODE_UPDATE_CREDITS;
+ 	unsigned int clusters_to_alloc, extents_to_split;
+-	struct ocfs2_write_ctxt *wc;
++	struct ocfs2_write_ctxt *uninitialized_var(wc);
+ 	struct inode *inode = mapping->host;
+ 	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ 	struct ocfs2_dinode *di;
+Index: linux-2.6-tip/fs/ocfs2/cluster/heartbeat.c
+===================================================================
+--- linux-2.6-tip.orig/fs/ocfs2/cluster/heartbeat.c
++++ linux-2.6-tip/fs/ocfs2/cluster/heartbeat.c
+@@ -1026,8 +1026,8 @@ static ssize_t o2hb_region_block_bytes_w
+ 					     size_t count)
+ {
+ 	int status;
+-	unsigned long block_bytes;
+-	unsigned int block_bits;
++	unsigned long uninitialized_var(block_bytes);
++	unsigned int uninitialized_var(block_bits);
+ 
+ 	if (reg->hr_bdev)
+ 		return -EINVAL;
+Index: linux-2.6-tip/fs/ocfs2/ioctl.c
+===================================================================
+--- linux-2.6-tip.orig/fs/ocfs2/ioctl.c
++++ linux-2.6-tip/fs/ocfs2/ioctl.c
+@@ -111,7 +111,7 @@ bail:
+ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+ {
+ 	struct inode *inode = filp->f_path.dentry->d_inode;
+-	unsigned int flags;
++	unsigned int uninitialized_var(flags);
+ 	int new_clusters;
+ 	int status;
+ 	struct ocfs2_space_resv sr;
+Index: linux-2.6-tip/fs/ocfs2/slot_map.c
+===================================================================
+--- linux-2.6-tip.orig/fs/ocfs2/slot_map.c
++++ linux-2.6-tip/fs/ocfs2/slot_map.c
+@@ -357,7 +357,7 @@ static int ocfs2_map_slot_buffers(struct
+ {
+ 	int status = 0;
+ 	u64 blkno;
+-	unsigned long long blocks, bytes;
++	unsigned long long blocks, uninitialized_var(bytes);
+ 	unsigned int i;
+ 	struct buffer_head *bh;
+ 
+Index: linux-2.6-tip/fs/ocfs2/stack_user.c
+===================================================================
+--- linux-2.6-tip.orig/fs/ocfs2/stack_user.c
++++ linux-2.6-tip/fs/ocfs2/stack_user.c
+@@ -807,7 +807,7 @@ static int fs_protocol_compare(struct oc
+ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
+ {
+ 	dlm_lockspace_t *fsdlm;
+-	struct ocfs2_live_connection *control;
++	struct ocfs2_live_connection *uninitialized_var(control);
+ 	int rc = 0;
+ 
+ 	BUG_ON(conn == NULL);
+Index: linux-2.6-tip/fs/omfs/file.c
+===================================================================
+--- linux-2.6-tip.orig/fs/omfs/file.c
++++ linux-2.6-tip/fs/omfs/file.c
+@@ -237,14 +237,14 @@ static int omfs_get_block(struct inode *
+ 	struct buffer_head *bh;
+ 	sector_t next, offset;
+ 	int ret;
+-	u64 new_block;
++	u64 uninitialized_var(new_block);
+ 	u32 max_extents;
+ 	int extent_count;
+ 	struct omfs_extent *oe;
+ 	struct omfs_extent_entry *entry;
+ 	struct omfs_sb_info *sbi = OMFS_SB(inode->i_sb);
+ 	int max_blocks = bh_result->b_size >> inode->i_blkbits;
+-	int remain;
++	int uninitialized_var(remain);
+ 
+ 	ret = -EIO;
+ 	bh = sb_bread(inode->i_sb, clus_to_blk(sbi, inode->i_ino));
+Index: linux-2.6-tip/fs/partitions/check.c
+===================================================================
+--- linux-2.6-tip.orig/fs/partitions/check.c
++++ linux-2.6-tip/fs/partitions/check.c
+@@ -19,6 +19,7 @@
+ #include <linux/kmod.h>
+ #include <linux/ctype.h>
+ #include <linux/genhd.h>
++#include <linux/blktrace_api.h>
+ 
+ #include "check.h"
+ 
+@@ -294,6 +295,9 @@ static struct attribute_group part_attr_
+ 
+ static struct attribute_group *part_attr_groups[] = {
+ 	&part_attr_group,
++#ifdef CONFIG_BLK_DEV_IO_TRACE
++	&blk_trace_attr_group,
++#endif
+ 	NULL
+ };
+ 
+Index: linux-2.6-tip/fs/proc/loadavg.c
+===================================================================
+--- linux-2.6-tip.orig/fs/proc/loadavg.c
++++ linux-2.6-tip/fs/proc/loadavg.c
+@@ -12,20 +12,14 @@
+ 
+ static int loadavg_proc_show(struct seq_file *m, void *v)
+ {
+-	int a, b, c;
+-	unsigned long seq;
++	unsigned long avnrun[3];
+ 
+-	do {
+-		seq = read_seqbegin(&xtime_lock);
+-		a = avenrun[0] + (FIXED_1/200);
+-		b = avenrun[1] + (FIXED_1/200);
+-		c = avenrun[2] + (FIXED_1/200);
+-	} while (read_seqretry(&xtime_lock, seq));
+-
+-	seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
+-		LOAD_INT(a), LOAD_FRAC(a),
+-		LOAD_INT(b), LOAD_FRAC(b),
+-		LOAD_INT(c), LOAD_FRAC(c),
++	get_avenrun(avnrun, FIXED_1/200, 0);
++
++	seq_printf(m, "%lu.%02lu %lu.%02lu %lu.%02lu %ld/%d %d\n",
++		LOAD_INT(avnrun[0]), LOAD_FRAC(avnrun[0]),
++		LOAD_INT(avnrun[1]), LOAD_FRAC(avnrun[1]),
++		LOAD_INT(avnrun[2]), LOAD_FRAC(avnrun[2]),
+ 		nr_running(), nr_threads,
+ 		task_active_pid_ns(current)->last_pid);
+ 	return 0;
+Index: linux-2.6-tip/fs/reiserfs/do_balan.c
+===================================================================
+--- linux-2.6-tip.orig/fs/reiserfs/do_balan.c
++++ linux-2.6-tip/fs/reiserfs/do_balan.c
+@@ -1295,9 +1295,8 @@ static int balance_leaf(struct tree_bala
+ 
+ 					RFALSE(ih, "PAP-12210: ih must be 0");
+ 
+-					if (is_direntry_le_ih
+-					    (aux_ih =
+-					     B_N_PITEM_HEAD(tbS0, item_pos))) {
++					aux_ih = B_N_PITEM_HEAD(tbS0, item_pos);
++					if (is_direntry_le_ih(aux_ih)) {
+ 						/* we append to directory item */
+ 
+ 						int entry_count;
+Index: linux-2.6-tip/fs/reiserfs/lbalance.c
+===================================================================
+--- linux-2.6-tip.orig/fs/reiserfs/lbalance.c
++++ linux-2.6-tip/fs/reiserfs/lbalance.c
+@@ -389,7 +389,8 @@ static void leaf_item_bottle(struct buff
+ 
+ 	if (last_first == FIRST_TO_LAST) {
+ 		/* if ( if item in position item_num in buffer SOURCE is directory item ) */
+-		if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num)))
++		ih = B_N_PITEM_HEAD(src, item_num);
++		if (is_direntry_le_ih(ih))
+ 			leaf_copy_dir_entries(dest_bi, src, FIRST_TO_LAST,
+ 					      item_num, 0, cpy_bytes);
+ 		else {
+@@ -417,7 +418,8 @@ static void leaf_item_bottle(struct buff
+ 		}
+ 	} else {
+ 		/*  if ( if item in position item_num in buffer SOURCE is directory item ) */
+-		if (is_direntry_le_ih(ih = B_N_PITEM_HEAD(src, item_num)))
++		ih = B_N_PITEM_HEAD(src, item_num);
++		if (is_direntry_le_ih(ih))
+ 			leaf_copy_dir_entries(dest_bi, src, LAST_TO_FIRST,
+ 					      item_num,
+ 					      I_ENTRY_COUNT(ih) - cpy_bytes,
+@@ -774,8 +776,8 @@ void leaf_delete_items(struct buffer_inf
+ 			leaf_delete_items_entirely(cur_bi, first + 1,
+ 						   del_num - 1);
+ 
+-			if (is_direntry_le_ih
+-			    (ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1)))
++			ih = B_N_PITEM_HEAD(bh, B_NR_ITEMS(bh) - 1);
++			if (is_direntry_le_ih(ih))
+ 				/* the last item is directory  */
+ 				/* len = numbers of directory entries in this item */
+ 				len = ih_entry_count(ih);
+Index: linux-2.6-tip/fs/squashfs/export.c
+===================================================================
+--- linux-2.6-tip.orig/fs/squashfs/export.c
++++ linux-2.6-tip/fs/squashfs/export.c
+@@ -40,6 +40,7 @@
+ #include <linux/dcache.h>
+ #include <linux/exportfs.h>
+ #include <linux/zlib.h>
++#include <linux/slab.h>
+ 
+ #include "squashfs_fs.h"
+ #include "squashfs_fs_sb.h"
+Index: linux-2.6-tip/fs/udf/truncate.c
+===================================================================
+--- linux-2.6-tip.orig/fs/udf/truncate.c
++++ linux-2.6-tip/fs/udf/truncate.c
+@@ -87,7 +87,7 @@ void udf_truncate_tail_extent(struct ino
+ 	else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
+ 		adsize = sizeof(long_ad);
+ 	else
+-		BUG();
++		panic("udf_truncate_tail_extent: unknown alloc type!");
+ 
+ 	/* Find the last extent in the file */
+ 	while ((netype = udf_next_aext(inode, &epos, &eloc, &elen, 1)) != -1) {
+@@ -214,7 +214,7 @@ void udf_truncate_extents(struct inode *
+ 	else if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_LONG)
+ 		adsize = sizeof(long_ad);
+ 	else
+-		BUG();
++		panic("udf_truncate_extents: unknown alloc type!");
+ 
+ 	etype = inode_bmap(inode, first_block, &epos, &eloc, &elen, &offset);
+ 	byte_offset = (offset << sb->s_blocksize_bits) +
+Index: linux-2.6-tip/fs/xfs/linux-2.6/xfs_xattr.c
+===================================================================
+--- linux-2.6-tip.orig/fs/xfs/linux-2.6/xfs_xattr.c
++++ linux-2.6-tip/fs/xfs/linux-2.6/xfs_xattr.c
+@@ -30,20 +30,6 @@
+ 
+ 
+ /*
+- * ACL handling.  Should eventually be moved into xfs_acl.c
+- */
+-
+-static int
+-xfs_decode_acl(const char *name)
+-{
+-	if (strcmp(name, "posix_acl_access") == 0)
+-		return _ACL_TYPE_ACCESS;
+-	else if (strcmp(name, "posix_acl_default") == 0)
+-		return _ACL_TYPE_DEFAULT;
+-	return -EINVAL;
+-}
+-
+-/*
+  * Get system extended attributes which at the moment only
+  * includes Posix ACLs.
+  */
+Index: linux-2.6-tip/fs/xfs/xfs_acl.c
+===================================================================
+--- linux-2.6-tip.orig/fs/xfs/xfs_acl.c
++++ linux-2.6-tip/fs/xfs/xfs_acl.c
+@@ -51,6 +51,19 @@ kmem_zone_t *xfs_acl_zone;
+ 
+ 
+ /*
++ * ACL handling.
++ */
++int
++xfs_decode_acl(const char *name)
++{
++	if (strcmp(name, "posix_acl_access") == 0)
++		return _ACL_TYPE_ACCESS;
++	else if (strcmp(name, "posix_acl_default") == 0)
++		return _ACL_TYPE_DEFAULT;
++	return -EINVAL;
++}
++
++/*
+  * Test for existence of access ACL attribute as efficiently as possible.
+  */
+ int
+Index: linux-2.6-tip/fs/xfs/xfs_acl.h
+===================================================================
+--- linux-2.6-tip.orig/fs/xfs/xfs_acl.h
++++ linux-2.6-tip/fs/xfs/xfs_acl.h
+@@ -58,6 +58,7 @@ extern struct kmem_zone *xfs_acl_zone;
+ 		(zone) = kmem_zone_init(sizeof(xfs_acl_t), (name))
+ #define xfs_acl_zone_destroy(zone)	kmem_zone_destroy(zone)
+ 
++extern int xfs_decode_acl(const char *);
+ extern int xfs_acl_inherit(struct inode *, mode_t mode, xfs_acl_t *);
+ extern int xfs_acl_iaccess(struct xfs_inode *, mode_t, cred_t *);
+ extern int xfs_acl_vtoacl(struct inode *, xfs_acl_t *, xfs_acl_t *);
+@@ -79,6 +80,7 @@ extern int xfs_acl_vremove(struct inode 
+ #define _ACL_FREE(a)		((a)? kmem_zone_free(xfs_acl_zone, (a)):(void)0)
+ 
+ #else
++#define xfs_decode_acl(name)	(-EINVAL)
+ #define xfs_acl_zone_init(zone,name)
+ #define xfs_acl_zone_destroy(zone)
+ #define xfs_acl_vset(v,p,sz,t)	(-EOPNOTSUPP)
+Index: linux-2.6-tip/fs/xfs/xfs_mount.c
+===================================================================
+--- linux-2.6-tip.orig/fs/xfs/xfs_mount.c
++++ linux-2.6-tip/fs/xfs/xfs_mount.c
+@@ -1424,6 +1424,8 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fi
+ 	/* find modified range */
+ 
+ 	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
++	if ((long)f < 0) /* work around gcc warning */
++		return;
+ 	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
+ 	first = xfs_sb_info[f].offset;
+ 
+Index: linux-2.6-tip/include/acpi/acpiosxf.h
+===================================================================
+--- linux-2.6-tip.orig/include/acpi/acpiosxf.h
++++ linux-2.6-tip/include/acpi/acpiosxf.h
+@@ -61,7 +61,7 @@ typedef enum {
+ 	OSL_EC_BURST_HANDLER
+ } acpi_execute_type;
+ 
+-#define ACPI_NO_UNIT_LIMIT          ((u32) -1)
++#define ACPI_NO_UNIT_LIMIT          (INT_MAX/2)
+ #define ACPI_MUTEX_SEM              1
+ 
+ /* Functions for acpi_os_signal */
+@@ -144,6 +144,7 @@ void __iomem *acpi_os_map_memory(acpi_ph
+ 				acpi_size length);
+ 
+ void acpi_os_unmap_memory(void __iomem * logical_address, acpi_size size);
++void early_acpi_os_unmap_memory(void __iomem * virt, acpi_size size);
+ 
+ #ifdef ACPI_FUTURE_USAGE
+ acpi_status
+Index: linux-2.6-tip/include/acpi/acpixf.h
+===================================================================
+--- linux-2.6-tip.orig/include/acpi/acpixf.h
++++ linux-2.6-tip/include/acpi/acpixf.h
+@@ -130,6 +130,10 @@ acpi_get_table_header(acpi_string signat
+ 		      struct acpi_table_header *out_table_header);
+ 
+ acpi_status
++acpi_get_table_with_size(acpi_string signature,
++	       u32 instance, struct acpi_table_header **out_table,
++	       acpi_size *tbl_size);
++acpi_status
+ acpi_get_table(acpi_string signature,
+ 	       u32 instance, struct acpi_table_header **out_table);
+ 
+Index: linux-2.6-tip/include/asm-frv/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/asm-frv/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/include/asm-frv/swab.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-frv/swab.h
++++ linux-2.6-tip/include/asm-frv/swab.h
+@@ -1,7 +1,7 @@
+ #ifndef _ASM_SWAB_H
+ #define _ASM_SWAB_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ #if defined(__GNUC__) && !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+ #  define __SWAB_64_THRU_32__
+Index: linux-2.6-tip/include/asm-generic/bug.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-generic/bug.h
++++ linux-2.6-tip/include/asm-generic/bug.h
+@@ -3,6 +3,10 @@
+ 
+ #include <linux/compiler.h>
+ 
++#ifndef __ASSEMBLY__
++extern void __WARN_ON(const char *func, const char *file, const int line);
++#endif /* __ASSEMBLY__ */
++
+ #ifdef CONFIG_BUG
+ 
+ #ifdef CONFIG_GENERIC_BUG
+@@ -103,10 +107,9 @@ extern void warn_slowpath(const char *fi
+ #endif
+ 
+ #ifndef WARN
+-#define WARN(condition, format...) ({					\
+-	int __ret_warn_on = !!(condition);				\
+-	unlikely(__ret_warn_on);					\
+-})
++static inline int __attribute__ ((format(printf, 2, 3)))
++__WARN(int condition, const char *fmt, ...) { return condition; }
++#define WARN(condition, format...) __WARN(!!(condition), format)
+ #endif
+ 
+ #endif
+@@ -140,4 +143,18 @@ extern void warn_slowpath(const char *fi
+ # define WARN_ON_SMP(x)			do { } while (0)
+ #endif
+ 
++#ifdef CONFIG_PREEMPT_RT
++# define BUG_ON_RT(c)			BUG_ON(c)
++# define BUG_ON_NONRT(c)		do { } while (0)
++# define WARN_ON_RT(condition)		WARN_ON(condition)
++# define WARN_ON_NONRT(condition)	do { } while (0)
++# define WARN_ON_ONCE_NONRT(condition)	do { } while (0)
++#else
++# define BUG_ON_RT(c)			do { } while (0)
++# define BUG_ON_NONRT(c)		BUG_ON(c)
++# define WARN_ON_RT(condition)		do { } while (0)
++# define WARN_ON_NONRT(condition)	WARN_ON(condition)
++# define WARN_ON_ONCE_NONRT(condition)	WARN_ON_ONCE(condition)
++#endif
++
+ #endif
+Index: linux-2.6-tip/include/asm-generic/fcntl.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-generic/fcntl.h
++++ linux-2.6-tip/include/asm-generic/fcntl.h
+@@ -117,9 +117,9 @@
+ struct flock {
+ 	short	l_type;
+ 	short	l_whence;
+-	off_t	l_start;
+-	off_t	l_len;
+-	pid_t	l_pid;
++	__kernel_off_t	l_start;
++	__kernel_off_t	l_len;
++	__kernel_pid_t	l_pid;
+ 	__ARCH_FLOCK_PAD
+ };
+ #endif
+@@ -140,9 +140,9 @@ struct flock {
+ struct flock64 {
+ 	short  l_type;
+ 	short  l_whence;
+-	loff_t l_start;
+-	loff_t l_len;
+-	pid_t  l_pid;
++	__kernel_loff_t l_start;
++	__kernel_loff_t l_len;
++	__kernel_pid_t  l_pid;
+ 	__ARCH_FLOCK64_PAD
+ };
+ #endif
+Index: linux-2.6-tip/include/asm-generic/percpu.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-generic/percpu.h
++++ linux-2.6-tip/include/asm-generic/percpu.h
+@@ -9,6 +9,9 @@
+  */
+ #define per_cpu_var(var) per_cpu__##var
+ 
++#define __per_cpu_var_lock(var)	per_cpu__lock_##var##_locked
++#define __per_cpu_var_lock_var(var) per_cpu__##var##_locked
++
+ #ifdef CONFIG_SMP
+ 
+ /*
+@@ -60,6 +63,14 @@ extern unsigned long __per_cpu_offset[NR
+ #define __raw_get_cpu_var(var) \
+ 	(*SHIFT_PERCPU_PTR(&per_cpu_var(var), __my_cpu_offset))
+ 
++#define per_cpu_lock(var, cpu) \
++	(*SHIFT_PERCPU_PTR(&__per_cpu_var_lock(var), per_cpu_offset(cpu)))
++#define per_cpu_var_locked(var, cpu) \
++	(*SHIFT_PERCPU_PTR(&__per_cpu_var_lock_var(var), per_cpu_offset(cpu)))
++#define __get_cpu_lock(var, cpu) \
++		per_cpu_lock(var, cpu)
++#define __get_cpu_var_locked(var, cpu) \
++		per_cpu_var_locked(var, cpu)
+ 
+ #ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+ extern void setup_per_cpu_areas(void);
+@@ -68,9 +79,11 @@ extern void setup_per_cpu_areas(void);
+ #else /* ! SMP */
+ 
+ #define per_cpu(var, cpu)			(*((void)(cpu), &per_cpu_var(var)))
++#define per_cpu_var_locked(var, cpu)		(*((void)(cpu), &__per_cpu_var_lock_var(var)))
+ #define __get_cpu_var(var)			per_cpu_var(var)
+ #define __raw_get_cpu_var(var)			per_cpu_var(var)
+-
++#define __get_cpu_lock(var, cpu)		__per_cpu_var_lock(var)
++#define __get_cpu_var_locked(var, cpu)		__per_cpu_var_lock_var(var)
+ #endif	/* SMP */
+ 
+ #ifndef PER_CPU_ATTRIBUTES
+@@ -79,5 +92,60 @@ extern void setup_per_cpu_areas(void);
+ 
+ #define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \
+ 					__typeof__(type) per_cpu_var(name)
++#define DECLARE_PER_CPU_LOCKED(type, name)					\
++	extern PER_CPU_ATTRIBUTES spinlock_t __per_cpu_var_lock(name);		\
++	extern PER_CPU_ATTRIBUTES __typeof__(type) __per_cpu_var_lock_var(name)
++
++/*
++ * Optional methods for optimized non-lvalue per-cpu variable access.
++ *
++ * @var can be a percpu variable or a field of it and its size should
++ * equal char, int or long.  percpu_read() evaluates to a lvalue and
++ * all others to void.
++ *
++ * These operations are guaranteed to be atomic w.r.t. preemption.
++ * The generic versions use plain get/put_cpu_var().  Archs are
++ * encouraged to implement single-instruction alternatives which don't
++ * require preemption protection.
++ */
++#ifndef percpu_read
++# define percpu_read(var)						\
++  ({									\
++	typeof(per_cpu_var(var)) __tmp_var__;				\
++	__tmp_var__ = get_cpu_var(var);					\
++	put_cpu_var(var);						\
++	__tmp_var__;							\
++  })
++#endif
++
++#define __percpu_generic_to_op(var, val, op)				\
++do {									\
++	get_cpu_var(var) op val;					\
++	put_cpu_var(var);						\
++} while (0)
++
++#ifndef percpu_write
++# define percpu_write(var, val)		__percpu_generic_to_op(var, (val), =)
++#endif
++
++#ifndef percpu_add
++# define percpu_add(var, val)		__percpu_generic_to_op(var, (val), +=)
++#endif
++
++#ifndef percpu_sub
++# define percpu_sub(var, val)		__percpu_generic_to_op(var, (val), -=)
++#endif
++
++#ifndef percpu_and
++# define percpu_and(var, val)		__percpu_generic_to_op(var, (val), &=)
++#endif
++
++#ifndef percpu_or
++# define percpu_or(var, val)		__percpu_generic_to_op(var, (val), |=)
++#endif
++
++#ifndef percpu_xor
++# define percpu_xor(var, val)		__percpu_generic_to_op(var, (val), ^=)
++#endif
+ 
+ #endif /* _ASM_GENERIC_PERCPU_H_ */
+Index: linux-2.6-tip/include/asm-generic/sections.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-generic/sections.h
++++ linux-2.6-tip/include/asm-generic/sections.h
+@@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[];
+ extern char __init_begin[], __init_end[];
+ extern char _sinittext[], _einittext[];
+ extern char _end[];
+-extern char __per_cpu_start[], __per_cpu_end[];
++extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
+ extern char __kprobes_text_start[], __kprobes_text_end[];
+ extern char __initdata_begin[], __initdata_end[];
+ extern char __start_rodata[], __end_rodata[];
+Index: linux-2.6-tip/include/asm-generic/siginfo.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-generic/siginfo.h
++++ linux-2.6-tip/include/asm-generic/siginfo.h
+@@ -23,7 +23,7 @@ typedef union sigval {
+ #endif
+ 
+ #ifndef __ARCH_SI_UID_T
+-#define __ARCH_SI_UID_T	uid_t
++#define __ARCH_SI_UID_T	__kernel_uid32_t
+ #endif
+ 
+ /*
+@@ -47,13 +47,13 @@ typedef struct siginfo {
+ 
+ 		/* kill() */
+ 		struct {
+-			pid_t _pid;		/* sender's pid */
++			__kernel_pid_t _pid;	/* sender's pid */
+ 			__ARCH_SI_UID_T _uid;	/* sender's uid */
+ 		} _kill;
+ 
+ 		/* POSIX.1b timers */
+ 		struct {
+-			timer_t _tid;		/* timer id */
++			__kernel_timer_t _tid;	/* timer id */
+ 			int _overrun;		/* overrun count */
+ 			char _pad[sizeof( __ARCH_SI_UID_T) - sizeof(int)];
+ 			sigval_t _sigval;	/* same as below */
+@@ -62,18 +62,18 @@ typedef struct siginfo {
+ 
+ 		/* POSIX.1b signals */
+ 		struct {
+-			pid_t _pid;		/* sender's pid */
++			__kernel_pid_t _pid;	/* sender's pid */
+ 			__ARCH_SI_UID_T _uid;	/* sender's uid */
+ 			sigval_t _sigval;
+ 		} _rt;
+ 
+ 		/* SIGCHLD */
+ 		struct {
+-			pid_t _pid;		/* which child */
++			__kernel_pid_t _pid;	/* which child */
+ 			__ARCH_SI_UID_T _uid;	/* sender's uid */
+ 			int _status;		/* exit code */
+-			clock_t _utime;
+-			clock_t _stime;
++			__kernel_clock_t _utime;
++			__kernel_clock_t _stime;
+ 		} _sigchld;
+ 
+ 		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+Index: linux-2.6-tip/include/asm-generic/statfs.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-generic/statfs.h
++++ linux-2.6-tip/include/asm-generic/statfs.h
+@@ -1,8 +1,9 @@
+ #ifndef _GENERIC_STATFS_H
+ #define _GENERIC_STATFS_H
+ 
+-#ifndef __KERNEL_STRICT_NAMES
+-# include <linux/types.h>
++#include <linux/types.h>
++
++#ifdef __KERNEL__
+ typedef __kernel_fsid_t	fsid_t;
+ #endif
+ 
+Index: linux-2.6-tip/include/asm-generic/vmlinux.lds.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-generic/vmlinux.lds.h
++++ linux-2.6-tip/include/asm-generic/vmlinux.lds.h
+@@ -61,6 +61,30 @@
+ #define BRANCH_PROFILE()
+ #endif
+ 
++#ifdef CONFIG_EVENT_TRACER
++#define FTRACE_EVENTS()	VMLINUX_SYMBOL(__start_ftrace_events) = .;	\
++			*(_ftrace_events)				\
++			VMLINUX_SYMBOL(__stop_ftrace_events) = .;
++#else
++#define FTRACE_EVENTS()
++#endif
++
++#ifdef CONFIG_TRACING
++#define TRACE_PRINTKS() VMLINUX_SYMBOL(__start___trace_bprintk_fmt) = .;      \
++			 *(__trace_printk_fmt) /* Trace_printk fmt' pointer */ \
++			 VMLINUX_SYMBOL(__stop___trace_bprintk_fmt) = .;
++#else
++#define TRACE_PRINTKS()
++#endif
++
++#ifdef CONFIG_FTRACE_SYSCALLS
++#define TRACE_SYSCALLS() VMLINUX_SYMBOL(__start_syscalls_metadata) = .;	\
++			 *(__syscalls_metadata)				\
++			 VMLINUX_SYMBOL(__stop_syscalls_metadata) = .;
++#else
++#define TRACE_SYSCALLS()
++#endif
++
+ /* .data section */
+ #define DATA_DATA							\
+ 	*(.data)							\
+@@ -81,7 +105,10 @@
+ 	*(__tracepoints)						\
+ 	VMLINUX_SYMBOL(__stop___tracepoints) = .;			\
+ 	LIKELY_PROFILE()		       				\
+-	BRANCH_PROFILE()
++	BRANCH_PROFILE()						\
++	TRACE_PRINTKS()							\
++	FTRACE_EVENTS()							\
++	TRACE_SYSCALLS()
+ 
+ #define RO_DATA(align)							\
+ 	. = ALIGN((align));						\
+@@ -430,12 +457,59 @@
+   	*(.initcall7.init)						\
+   	*(.initcall7s.init)
+ 
++/**
++ * PERCPU_VADDR - define output section for percpu area
++ * @vaddr: explicit base address (optional)
++ * @phdr: destination PHDR (optional)
++ *
++ * Macro which expands to output section for percpu area.  If @vaddr
++ * is not blank, it specifies explicit base address and all percpu
++ * symbols will be offset from the given address.  If blank, @vaddr
++ * always equals @laddr + LOAD_OFFSET.
++ *
++ * @phdr defines the output PHDR to use if not blank.  Be warned that
++ * output PHDR is sticky.  If @phdr is specified, the next output
++ * section in the linker script will go there too.  @phdr should have
++ * a leading colon.
++ *
++ * Note that this macros defines __per_cpu_load as an absolute symbol.
++ * If there is no need to put the percpu section at a predetermined
++ * address, use PERCPU().
++ */
++#define PERCPU_VADDR(vaddr, phdr)					\
++	VMLINUX_SYMBOL(__per_cpu_load) = .;				\
++	.data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load)		\
++				- LOAD_OFFSET) {			\
++		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
++		*(.data.percpu.first)					\
++		*(.data.percpu.page_aligned)				\
++		*(.data.percpu)						\
++		*(.data.percpu.shared_aligned)				\
++		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
++	} phdr								\
++	. = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
++
++/**
++ * PERCPU - define output section for percpu area, simple version
++ * @align: required alignment
++ *
++ * Align to @align and outputs output section for percpu area.  This
++ * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
++ * __per_cpu_start will be identical.
++ *
++ * This macro is equivalent to ALIGN(align); PERCPU_VADDR( , ) except
++ * that __per_cpu_load is defined as a relative symbol against
++ * .data.percpu which is required for relocatable x86_32
++ * configuration.
++ */
+ #define PERCPU(align)							\
+ 	. = ALIGN(align);						\
+-	VMLINUX_SYMBOL(__per_cpu_start) = .;				\
+-	.data.percpu  : AT(ADDR(.data.percpu) - LOAD_OFFSET) {		\
++	.data.percpu	: AT(ADDR(.data.percpu) - LOAD_OFFSET) {	\
++		VMLINUX_SYMBOL(__per_cpu_load) = .;			\
++		VMLINUX_SYMBOL(__per_cpu_start) = .;			\
++		*(.data.percpu.first)					\
+ 		*(.data.percpu.page_aligned)				\
+ 		*(.data.percpu)						\
+ 		*(.data.percpu.shared_aligned)				\
+-	}								\
+-	VMLINUX_SYMBOL(__per_cpu_end) = .;
++		VMLINUX_SYMBOL(__per_cpu_end) = .;			\
++	}
+Index: linux-2.6-tip/include/asm-m32r/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/asm-m32r/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/include/asm-m32r/swab.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-m32r/swab.h
++++ linux-2.6-tip/include/asm-m32r/swab.h
+@@ -1,7 +1,7 @@
+ #ifndef _ASM_M32R_SWAB_H
+ #define _ASM_M32R_SWAB_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ #if !defined(__STRICT_ANSI__) || defined(__KERNEL__)
+ #  define __SWAB_64_THRU_32__
+Index: linux-2.6-tip/include/asm-mn10300/ftrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/asm-mn10300/ftrace.h
+@@ -0,0 +1 @@
++/* empty */
+Index: linux-2.6-tip/include/asm-mn10300/swab.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-mn10300/swab.h
++++ linux-2.6-tip/include/asm-mn10300/swab.h
+@@ -11,7 +11,7 @@
+ #ifndef _ASM_SWAB_H
+ #define _ASM_SWAB_H
+ 
+-#include <asm/types.h>
++#include <linux/types.h>
+ 
+ #ifdef __GNUC__
+ 
+Index: linux-2.6-tip/include/drm/drm.h
+===================================================================
+--- linux-2.6-tip.orig/include/drm/drm.h
++++ linux-2.6-tip/include/drm/drm.h
+@@ -36,8 +36,7 @@
+ #ifndef _DRM_H_
+ #define _DRM_H_
+ 
+-#if defined(__KERNEL__)
+-#endif
++#include <linux/types.h>
+ #include <asm/ioctl.h>		/* For _IO* macros */
+ #define DRM_IOCTL_NR(n)		_IOC_NR(n)
+ #define DRM_IOC_VOID		_IOC_NONE
+@@ -497,8 +496,8 @@ union drm_wait_vblank {
+  * \sa drmModesetCtl().
+  */
+ struct drm_modeset_ctl {
+-	uint32_t crtc;
+-	uint32_t cmd;
++	__u32 crtc;
++	__u32 cmd;
+ };
+ 
+ /**
+@@ -574,29 +573,29 @@ struct drm_set_version {
+ /** DRM_IOCTL_GEM_CLOSE ioctl argument type */
+ struct drm_gem_close {
+ 	/** Handle of the object to be closed. */
+-	uint32_t handle;
+-	uint32_t pad;
++	__u32 handle;
++	__u32 pad;
+ };
+ 
+ /** DRM_IOCTL_GEM_FLINK ioctl argument type */
+ struct drm_gem_flink {
+ 	/** Handle for the object being named */
+-	uint32_t handle;
++	__u32 handle;
+ 
+ 	/** Returned global name */
+-	uint32_t name;
++	__u32 name;
+ };
+ 
+ /** DRM_IOCTL_GEM_OPEN ioctl argument type */
+ struct drm_gem_open {
+ 	/** Name of object being opened */
+-	uint32_t name;
++	__u32 name;
+ 
+ 	/** Returned handle for the object */
+-	uint32_t handle;
++	__u32 handle;
+ 
+ 	/** Returned size of the object */
+-	uint64_t size;
++	__u64 size;
+ };
+ 
+ #include "drm_mode.h"
+Index: linux-2.6-tip/include/drm/drm_mode.h
+===================================================================
+--- linux-2.6-tip.orig/include/drm/drm_mode.h
++++ linux-2.6-tip/include/drm/drm_mode.h
+@@ -27,11 +27,8 @@
+ #ifndef _DRM_MODE_H
+ #define _DRM_MODE_H
+ 
+-#if !defined(__KERNEL__) && !defined(_KERNEL)
+-#include <stdint.h>
+-#else
+ #include <linux/kernel.h>
+-#endif
++#include <linux/types.h>
+ 
+ #define DRM_DISPLAY_INFO_LEN	32
+ #define DRM_CONNECTOR_NAME_LEN	32
+@@ -81,41 +78,41 @@
+ #define DRM_MODE_DITHERING_ON	1
+ 
+ struct drm_mode_modeinfo {
+-	uint32_t clock;
+-	uint16_t hdisplay, hsync_start, hsync_end, htotal, hskew;
+-	uint16_t vdisplay, vsync_start, vsync_end, vtotal, vscan;
++	__u32 clock;
++	__u16 hdisplay, hsync_start, hsync_end, htotal, hskew;
++	__u16 vdisplay, vsync_start, vsync_end, vtotal, vscan;
+ 
+-	uint32_t vrefresh; /* vertical refresh * 1000 */
++	__u32 vrefresh; /* vertical refresh * 1000 */
+ 
+-	uint32_t flags;
+-	uint32_t type;
++	__u32 flags;
++	__u32 type;
+ 	char name[DRM_DISPLAY_MODE_LEN];
+ };
+ 
+ struct drm_mode_card_res {
+-	uint64_t fb_id_ptr;
+-	uint64_t crtc_id_ptr;
+-	uint64_t connector_id_ptr;
+-	uint64_t encoder_id_ptr;
+-	uint32_t count_fbs;
+-	uint32_t count_crtcs;
+-	uint32_t count_connectors;
+-	uint32_t count_encoders;
+-	uint32_t min_width, max_width;
+-	uint32_t min_height, max_height;
++	__u64 fb_id_ptr;
++	__u64 crtc_id_ptr;
++	__u64 connector_id_ptr;
++	__u64 encoder_id_ptr;
++	__u32 count_fbs;
++	__u32 count_crtcs;
++	__u32 count_connectors;
++	__u32 count_encoders;
++	__u32 min_width, max_width;
++	__u32 min_height, max_height;
+ };
+ 
+ struct drm_mode_crtc {
+-	uint64_t set_connectors_ptr;
+-	uint32_t count_connectors;
++	__u64 set_connectors_ptr;
++	__u32 count_connectors;
+ 
+-	uint32_t crtc_id; /**< Id */
+-	uint32_t fb_id; /**< Id of framebuffer */
++	__u32 crtc_id; /**< Id */
++	__u32 fb_id; /**< Id of framebuffer */
+ 
+-	uint32_t x, y; /**< Position on the frameuffer */
++	__u32 x, y; /**< Position on the frameuffer */
+ 
+-	uint32_t gamma_size;
+-	uint32_t mode_valid;
++	__u32 gamma_size;
++	__u32 mode_valid;
+ 	struct drm_mode_modeinfo mode;
+ };
+ 
+@@ -126,13 +123,13 @@ struct drm_mode_crtc {
+ #define DRM_MODE_ENCODER_TVDAC	4
+ 
+ struct drm_mode_get_encoder {
+-	uint32_t encoder_id;
+-	uint32_t encoder_type;
++	__u32 encoder_id;
++	__u32 encoder_type;
+ 
+-	uint32_t crtc_id; /**< Id of crtc */
++	__u32 crtc_id; /**< Id of crtc */
+ 
+-	uint32_t possible_crtcs;
+-	uint32_t possible_clones;
++	__u32 possible_crtcs;
++	__u32 possible_clones;
+ };
+ 
+ /* This is for connectors with multiple signal types. */
+@@ -161,23 +158,23 @@ struct drm_mode_get_encoder {
+ 
+ struct drm_mode_get_connector {
+ 
+-	uint64_t encoders_ptr;
+-	uint64_t modes_ptr;
+-	uint64_t props_ptr;
+-	uint64_t prop_values_ptr;
+-
+-	uint32_t count_modes;
+-	uint32_t count_props;
+-	uint32_t count_encoders;
+-
+-	uint32_t encoder_id; /**< Current Encoder */
+-	uint32_t connector_id; /**< Id */
+-	uint32_t connector_type;
+-	uint32_t connector_type_id;
+-
+-	uint32_t connection;
+-	uint32_t mm_width, mm_height; /**< HxW in millimeters */
+-	uint32_t subpixel;
++	__u64 encoders_ptr;
++	__u64 modes_ptr;
++	__u64 props_ptr;
++	__u64 prop_values_ptr;
++
++	__u32 count_modes;
++	__u32 count_props;
++	__u32 count_encoders;
++
++	__u32 encoder_id; /**< Current Encoder */
++	__u32 connector_id; /**< Id */
++	__u32 connector_type;
++	__u32 connector_type_id;
++
++	__u32 connection;
++	__u32 mm_width, mm_height; /**< HxW in millimeters */
++	__u32 subpixel;
+ };
+ 
+ #define DRM_MODE_PROP_PENDING	(1<<0)
+@@ -187,46 +184,46 @@ struct drm_mode_get_connector {
+ #define DRM_MODE_PROP_BLOB	(1<<4)
+ 
+ struct drm_mode_property_enum {
+-	uint64_t value;
++	__u64 value;
+ 	char name[DRM_PROP_NAME_LEN];
+ };
+ 
+ struct drm_mode_get_property {
+-	uint64_t values_ptr; /* values and blob lengths */
+-	uint64_t enum_blob_ptr; /* enum and blob id ptrs */
++	__u64 values_ptr; /* values and blob lengths */
++	__u64 enum_blob_ptr; /* enum and blob id ptrs */
+ 
+-	uint32_t prop_id;
+-	uint32_t flags;
++	__u32 prop_id;
++	__u32 flags;
+ 	char name[DRM_PROP_NAME_LEN];
+ 
+-	uint32_t count_values;
+-	uint32_t count_enum_blobs;
++	__u32 count_values;
++	__u32 count_enum_blobs;
+ };
+ 
+ struct drm_mode_connector_set_property {
+-	uint64_t value;
+-	uint32_t prop_id;
+-	uint32_t connector_id;
++	__u64 value;
++	__u32 prop_id;
++	__u32 connector_id;
+ };
+ 
+ struct drm_mode_get_blob {
+-	uint32_t blob_id;
+-	uint32_t length;
+-	uint64_t data;
++	__u32 blob_id;
++	__u32 length;
++	__u64 data;
+ };
+ 
+ struct drm_mode_fb_cmd {
+-	uint32_t fb_id;
+-	uint32_t width, height;
+-	uint32_t pitch;
+-	uint32_t bpp;
+-	uint32_t depth;
++	__u32 fb_id;
++	__u32 width, height;
++	__u32 pitch;
++	__u32 bpp;
++	__u32 depth;
+ 	/* driver specific handle */
+-	uint32_t handle;
++	__u32 handle;
+ };
+ 
+ struct drm_mode_mode_cmd {
+-	uint32_t connector_id;
++	__u32 connector_id;
+ 	struct drm_mode_modeinfo mode;
+ };
+ 
+@@ -248,24 +245,24 @@ struct drm_mode_mode_cmd {
+  *    y
+  */
+ struct drm_mode_cursor {
+-	uint32_t flags;
+-	uint32_t crtc_id;
+-	int32_t x;
+-	int32_t y;
+-	uint32_t width;
+-	uint32_t height;
++	__u32 flags;
++	__u32 crtc_id;
++	__s32 x;
++	__s32 y;
++	__u32 width;
++	__u32 height;
+ 	/* driver specific handle */
+-	uint32_t handle;
++	__u32 handle;
+ };
+ 
+ struct drm_mode_crtc_lut {
+-	uint32_t crtc_id;
+-	uint32_t gamma_size;
++	__u32 crtc_id;
++	__u32 gamma_size;
+ 
+ 	/* pointers to arrays */
+-	uint64_t red;
+-	uint64_t green;
+-	uint64_t blue;
++	__u64 red;
++	__u64 green;
++	__u64 blue;
+ };
+ 
+ #endif
+Index: linux-2.6-tip/include/drm/i915_drm.h
+===================================================================
+--- linux-2.6-tip.orig/include/drm/i915_drm.h
++++ linux-2.6-tip/include/drm/i915_drm.h
+@@ -30,7 +30,7 @@
+ /* Please note that modifications to all structs defined here are
+  * subject to backwards-compatibility constraints.
+  */
+-
++#include <linux/types.h>
+ #include "drm.h"
+ 
+ /* Each region is a minimum of 16k, and there are at most 255 of them.
+@@ -116,15 +116,15 @@ typedef struct _drm_i915_sarea {
+ 
+ 	/* fill out some space for old userspace triple buffer */
+ 	drm_handle_t unused_handle;
+-	uint32_t unused1, unused2, unused3;
++	__u32 unused1, unused2, unused3;
+ 
+ 	/* buffer object handles for static buffers. May change
+ 	 * over the lifetime of the client.
+ 	 */
+-	uint32_t front_bo_handle;
+-	uint32_t back_bo_handle;
+-	uint32_t unused_bo_handle;
+-	uint32_t depth_bo_handle;
++	__u32 front_bo_handle;
++	__u32 back_bo_handle;
++	__u32 unused_bo_handle;
++	__u32 depth_bo_handle;
+ 
+ } drm_i915_sarea_t;
+ 
+@@ -327,7 +327,7 @@ typedef struct drm_i915_vblank_swap {
+ } drm_i915_vblank_swap_t;
+ 
+ typedef struct drm_i915_hws_addr {
+-	uint64_t addr;
++	__u64 addr;
+ } drm_i915_hws_addr_t;
+ 
+ struct drm_i915_gem_init {
+@@ -335,12 +335,12 @@ struct drm_i915_gem_init {
+ 	 * Beginning offset in the GTT to be managed by the DRM memory
+ 	 * manager.
+ 	 */
+-	uint64_t gtt_start;
++	__u64 gtt_start;
+ 	/**
+ 	 * Ending offset in the GTT to be managed by the DRM memory
+ 	 * manager.
+ 	 */
+-	uint64_t gtt_end;
++	__u64 gtt_end;
+ };
+ 
+ struct drm_i915_gem_create {
+@@ -349,94 +349,94 @@ struct drm_i915_gem_create {
+ 	 *
+ 	 * The (page-aligned) allocated size for the object will be returned.
+ 	 */
+-	uint64_t size;
++	__u64 size;
+ 	/**
+ 	 * Returned handle for the object.
+ 	 *
+ 	 * Object handles are nonzero.
+ 	 */
+-	uint32_t handle;
+-	uint32_t pad;
++	__u32 handle;
++	__u32 pad;
+ };
+ 
+ struct drm_i915_gem_pread {
+ 	/** Handle for the object being read. */
+-	uint32_t handle;
+-	uint32_t pad;
++	__u32 handle;
++	__u32 pad;
+ 	/** Offset into the object to read from */
+-	uint64_t offset;
++	__u64 offset;
+ 	/** Length of data to read */
+-	uint64_t size;
++	__u64 size;
+ 	/**
+ 	 * Pointer to write the data into.
+ 	 *
+ 	 * This is a fixed-size type for 32/64 compatibility.
+ 	 */
+-	uint64_t data_ptr;
++	__u64 data_ptr;
+ };
+ 
+ struct drm_i915_gem_pwrite {
+ 	/** Handle for the object being written to. */
+-	uint32_t handle;
+-	uint32_t pad;
++	__u32 handle;
++	__u32 pad;
+ 	/** Offset into the object to write to */
+-	uint64_t offset;
++	__u64 offset;
+ 	/** Length of data to write */
+-	uint64_t size;
++	__u64 size;
+ 	/**
+ 	 * Pointer to read the data from.
+ 	 *
+ 	 * This is a fixed-size type for 32/64 compatibility.
+ 	 */
+-	uint64_t data_ptr;
++	__u64 data_ptr;
+ };
+ 
+ struct drm_i915_gem_mmap {
+ 	/** Handle for the object being mapped. */
+-	uint32_t handle;
+-	uint32_t pad;
++	__u32 handle;
++	__u32 pad;
+ 	/** Offset in the object to map. */
+-	uint64_t offset;
++	__u64 offset;
+ 	/**
+ 	 * Length of data to map.
+ 	 *
+ 	 * The value will be page-aligned.
+ 	 */
+-	uint64_t size;
++	__u64 size;
+ 	/**
+ 	 * Returned pointer the data was mapped at.
+ 	 *
+ 	 * This is a fixed-size type for 32/64 compatibility.
+ 	 */
+-	uint64_t addr_ptr;
++	__u64 addr_ptr;
+ };
+ 
+ struct drm_i915_gem_mmap_gtt {
+ 	/** Handle for the object being mapped. */
+-	uint32_t handle;
+-	uint32_t pad;
++	__u32 handle;
++	__u32 pad;
+ 	/**
+ 	 * Fake offset to use for subsequent mmap call
+ 	 *
+ 	 * This is a fixed-size type for 32/64 compatibility.
+ 	 */
+-	uint64_t offset;
++	__u64 offset;
+ };
+ 
+ struct drm_i915_gem_set_domain {
+ 	/** Handle for the object */
+-	uint32_t handle;
++	__u32 handle;
+ 
+ 	/** New read domains */
+-	uint32_t read_domains;
++	__u32 read_domains;
+ 
+ 	/** New write domain */
+-	uint32_t write_domain;
++	__u32 write_domain;
+ };
+ 
+ struct drm_i915_gem_sw_finish {
+ 	/** Handle for the object */
+-	uint32_t handle;
++	__u32 handle;
+ };
+ 
+ struct drm_i915_gem_relocation_entry {
+@@ -448,16 +448,16 @@ struct drm_i915_gem_relocation_entry {
+ 	 * a relocation list for state buffers and not re-write it per
+ 	 * exec using the buffer.
+ 	 */
+-	uint32_t target_handle;
++	__u32 target_handle;
+ 
+ 	/**
+ 	 * Value to be added to the offset of the target buffer to make up
+ 	 * the relocation entry.
+ 	 */
+-	uint32_t delta;
++	__u32 delta;
+ 
+ 	/** Offset in the buffer the relocation entry will be written into */
+-	uint64_t offset;
++	__u64 offset;
+ 
+ 	/**
+ 	 * Offset value of the target buffer that the relocation entry was last
+@@ -467,12 +467,12 @@ struct drm_i915_gem_relocation_entry {
+ 	 * and writing the relocation.  This value is written back out by
+ 	 * the execbuffer ioctl when the relocation is written.
+ 	 */
+-	uint64_t presumed_offset;
++	__u64 presumed_offset;
+ 
+ 	/**
+ 	 * Target memory domains read by this operation.
+ 	 */
+-	uint32_t read_domains;
++	__u32 read_domains;
+ 
+ 	/**
+ 	 * Target memory domains written by this operation.
+@@ -481,7 +481,7 @@ struct drm_i915_gem_relocation_entry {
+ 	 * execbuffer operation, so that where there are conflicts,
+ 	 * the application will get -EINVAL back.
+ 	 */
+-	uint32_t write_domain;
++	__u32 write_domain;
+ };
+ 
+ /** @{
+@@ -512,24 +512,24 @@ struct drm_i915_gem_exec_object {
+ 	 * User's handle for a buffer to be bound into the GTT for this
+ 	 * operation.
+ 	 */
+-	uint32_t handle;
++	__u32 handle;
+ 
+ 	/** Number of relocations to be performed on this buffer */
+-	uint32_t relocation_count;
++	__u32 relocation_count;
+ 	/**
+ 	 * Pointer to array of struct drm_i915_gem_relocation_entry containing
+ 	 * the relocations to be performed in this buffer.
+ 	 */
+-	uint64_t relocs_ptr;
++	__u64 relocs_ptr;
+ 
+ 	/** Required alignment in graphics aperture */
+-	uint64_t alignment;
++	__u64 alignment;
+ 
+ 	/**
+ 	 * Returned value of the updated offset of the object, for future
+ 	 * presumed_offset writes.
+ 	 */
+-	uint64_t offset;
++	__u64 offset;
+ };
+ 
+ struct drm_i915_gem_execbuffer {
+@@ -543,44 +543,44 @@ struct drm_i915_gem_execbuffer {
+ 	 * a buffer is performing refer to buffers that have already appeared
+ 	 * in the validate list.
+ 	 */
+-	uint64_t buffers_ptr;
+-	uint32_t buffer_count;
++	__u64 buffers_ptr;
++	__u32 buffer_count;
+ 
+ 	/** Offset in the batchbuffer to start execution from. */
+-	uint32_t batch_start_offset;
++	__u32 batch_start_offset;
+ 	/** Bytes used in batchbuffer from batch_start_offset */
+-	uint32_t batch_len;
+-	uint32_t DR1;
+-	uint32_t DR4;
+-	uint32_t num_cliprects;
++	__u32 batch_len;
++	__u32 DR1;
++	__u32 DR4;
++	__u32 num_cliprects;
+ 	/** This is a struct drm_clip_rect *cliprects */
+-	uint64_t cliprects_ptr;
++	__u64 cliprects_ptr;
+ };
+ 
+ struct drm_i915_gem_pin {
+ 	/** Handle of the buffer to be pinned. */
+-	uint32_t handle;
+-	uint32_t pad;
++	__u32 handle;
++	__u32 pad;
+ 
+ 	/** alignment required within the aperture */
+-	uint64_t alignment;
++	__u64 alignment;
+ 
+ 	/** Returned GTT offset of the buffer. */
+-	uint64_t offset;
++	__u64 offset;
+ };
+ 
+ struct drm_i915_gem_unpin {
+ 	/** Handle of the buffer to be unpinned. */
+-	uint32_t handle;
+-	uint32_t pad;
++	__u32 handle;
++	__u32 pad;
+ };
+ 
+ struct drm_i915_gem_busy {
+ 	/** Handle of the buffer to check for busy */
+-	uint32_t handle;
++	__u32 handle;
+ 
+ 	/** Return busy status (1 if busy, 0 if idle) */
+-	uint32_t busy;
++	__u32 busy;
+ };
+ 
+ #define I915_TILING_NONE	0
+@@ -597,7 +597,7 @@ struct drm_i915_gem_busy {
+ 
+ struct drm_i915_gem_set_tiling {
+ 	/** Handle of the buffer to have its tiling state updated */
+-	uint32_t handle;
++	__u32 handle;
+ 
+ 	/**
+ 	 * Tiling mode for the object (I915_TILING_NONE, I915_TILING_X,
+@@ -611,47 +611,47 @@ struct drm_i915_gem_set_tiling {
+ 	 *
+ 	 * Buffer contents become undefined when changing tiling_mode.
+ 	 */
+-	uint32_t tiling_mode;
++	__u32 tiling_mode;
+ 
+ 	/**
+ 	 * Stride in bytes for the object when in I915_TILING_X or
+ 	 * I915_TILING_Y.
+ 	 */
+-	uint32_t stride;
++	__u32 stride;
+ 
+ 	/**
+ 	 * Returned address bit 6 swizzling required for CPU access through
+ 	 * mmap mapping.
+ 	 */
+-	uint32_t swizzle_mode;
++	__u32 swizzle_mode;
+ };
+ 
+ struct drm_i915_gem_get_tiling {
+ 	/** Handle of the buffer to get tiling state for. */
+-	uint32_t handle;
++	__u32 handle;
+ 
+ 	/**
+ 	 * Current tiling mode for the object (I915_TILING_NONE, I915_TILING_X,
+ 	 * I915_TILING_Y).
+ 	 */
+-	uint32_t tiling_mode;
++	__u32 tiling_mode;
+ 
+ 	/**
+ 	 * Returned address bit 6 swizzling required for CPU access through
+ 	 * mmap mapping.
+ 	 */
+-	uint32_t swizzle_mode;
++	__u32 swizzle_mode;
+ };
+ 
+ struct drm_i915_gem_get_aperture {
+ 	/** Total size of the aperture used by i915_gem_execbuffer, in bytes */
+-	uint64_t aper_size;
++	__u64 aper_size;
+ 
+ 	/**
+ 	 * Available space in the aperture used by i915_gem_execbuffer, in
+ 	 * bytes
+ 	 */
+-	uint64_t aper_available_size;
++	__u64 aper_available_size;
+ };
+ 
+ #endif				/* _I915_DRM_H_ */
+Index: linux-2.6-tip/include/drm/mga_drm.h
+===================================================================
+--- linux-2.6-tip.orig/include/drm/mga_drm.h
++++ linux-2.6-tip/include/drm/mga_drm.h
+@@ -35,6 +35,8 @@
+ #ifndef __MGA_DRM_H__
+ #define __MGA_DRM_H__
+ 
++#include <linux/types.h>
++
+ /* WARNING: If you change any of these defines, make sure to change the
+  * defines in the Xserver file (mga_sarea.h)
+  */
+@@ -255,8 +257,8 @@ typedef struct _drm_mga_sarea {
+ #define DRM_IOCTL_MGA_ILOAD    DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_ILOAD, drm_mga_iload_t)
+ #define DRM_IOCTL_MGA_BLIT     DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_BLIT, drm_mga_blit_t)
+ #define DRM_IOCTL_MGA_GETPARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_GETPARAM, drm_mga_getparam_t)
+-#define DRM_IOCTL_MGA_SET_FENCE     DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_SET_FENCE, uint32_t)
+-#define DRM_IOCTL_MGA_WAIT_FENCE    DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_WAIT_FENCE, uint32_t)
++#define DRM_IOCTL_MGA_SET_FENCE     DRM_IOW( DRM_COMMAND_BASE + DRM_MGA_SET_FENCE, __u32)
++#define DRM_IOCTL_MGA_WAIT_FENCE    DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_WAIT_FENCE, __u32)
+ #define DRM_IOCTL_MGA_DMA_BOOTSTRAP DRM_IOWR(DRM_COMMAND_BASE + DRM_MGA_DMA_BOOTSTRAP, drm_mga_dma_bootstrap_t)
+ 
+ typedef struct _drm_mga_warp_index {
+@@ -310,7 +312,7 @@ typedef struct drm_mga_dma_bootstrap {
+ 	 */
+ 	/*@{ */
+ 	unsigned long texture_handle; /**< Handle used to map AGP textures. */
+-	uint32_t texture_size;	      /**< Size of the AGP texture region. */
++	__u32 texture_size;	      /**< Size of the AGP texture region. */
+ 	/*@} */
+ 
+ 	/**
+@@ -319,7 +321,7 @@ typedef struct drm_mga_dma_bootstrap {
+ 	 * On return from the DRM_MGA_DMA_BOOTSTRAP ioctl, this field will be
+ 	 * filled in with the actual AGP mode.  If AGP was not available
+ 	 */
+-	uint32_t primary_size;
++	__u32 primary_size;
+ 
+ 	/**
+ 	 * Requested number of secondary DMA buffers.
+@@ -329,7 +331,7 @@ typedef struct drm_mga_dma_bootstrap {
+ 	 * allocated.  Particularly when PCI DMA is used, this may be
+ 	 * (subtantially) less than the number requested.
+ 	 */
+-	uint32_t secondary_bin_count;
++	__u32 secondary_bin_count;
+ 
+ 	/**
+ 	 * Requested size of each secondary DMA buffer.
+@@ -338,7 +340,7 @@ typedef struct drm_mga_dma_bootstrap {
+ 	 * dma_mga_dma_bootstrap::secondary_bin_count, it is \b not allowed
+ 	 * to reduce dma_mga_dma_bootstrap::secondary_bin_size.
+ 	 */
+-	uint32_t secondary_bin_size;
++	__u32 secondary_bin_size;
+ 
+ 	/**
+ 	 * Bit-wise mask of AGPSTAT2_* values.  Currently only \c AGPSTAT2_1X,
+@@ -350,12 +352,12 @@ typedef struct drm_mga_dma_bootstrap {
+ 	 * filled in with the actual AGP mode.  If AGP was not available
+ 	 * (i.e., PCI DMA was used), this value will be zero.
+ 	 */
+-	uint32_t agp_mode;
++	__u32 agp_mode;
+ 
+ 	/**
+ 	 * Desired AGP GART size, measured in megabytes.
+ 	 */
+-	uint8_t agp_size;
++	__u8 agp_size;
+ } drm_mga_dma_bootstrap_t;
+ 
+ typedef struct drm_mga_clear {
+Index: linux-2.6-tip/include/drm/radeon_drm.h
+===================================================================
+--- linux-2.6-tip.orig/include/drm/radeon_drm.h
++++ linux-2.6-tip/include/drm/radeon_drm.h
+@@ -33,6 +33,8 @@
+ #ifndef __RADEON_DRM_H__
+ #define __RADEON_DRM_H__
+ 
++#include <linux/types.h>
++
+ /* WARNING: If you change any of these defines, make sure to change the
+  * defines in the X server file (radeon_sarea.h)
+  */
+@@ -722,7 +724,7 @@ typedef struct drm_radeon_irq_wait {
+ 
+ typedef struct drm_radeon_setparam {
+ 	unsigned int param;
+-	int64_t value;
++	__s64 value;
+ } drm_radeon_setparam_t;
+ 
+ #define RADEON_SETPARAM_FB_LOCATION    1	/* determined framebuffer location */
+Index: linux-2.6-tip/include/drm/via_drm.h
+===================================================================
+--- linux-2.6-tip.orig/include/drm/via_drm.h
++++ linux-2.6-tip/include/drm/via_drm.h
+@@ -24,6 +24,8 @@
+ #ifndef _VIA_DRM_H_
+ #define _VIA_DRM_H_
+ 
++#include <linux/types.h>
++
+ /* WARNING: These defines must be the same as what the Xserver uses.
+  * if you change them, you must change the defines in the Xserver.
+  */
+@@ -114,19 +116,19 @@
+ #define VIA_MEM_UNKNOWN 4
+ 
+ typedef struct {
+-	uint32_t offset;
+-	uint32_t size;
++	__u32 offset;
++	__u32 size;
+ } drm_via_agp_t;
+ 
+ typedef struct {
+-	uint32_t offset;
+-	uint32_t size;
++	__u32 offset;
++	__u32 size;
+ } drm_via_fb_t;
+ 
+ typedef struct {
+-	uint32_t context;
+-	uint32_t type;
+-	uint32_t size;
++	__u32 context;
++	__u32 type;
++	__u32 size;
+ 	unsigned long index;
+ 	unsigned long offset;
+ } drm_via_mem_t;
+@@ -148,9 +150,9 @@ typedef struct _drm_via_futex {
+ 		VIA_FUTEX_WAIT = 0x00,
+ 		VIA_FUTEX_WAKE = 0X01
+ 	} func;
+-	uint32_t ms;
+-	uint32_t lock;
+-	uint32_t val;
++	__u32 ms;
++	__u32 lock;
++	__u32 val;
+ } drm_via_futex_t;
+ 
+ typedef struct _drm_via_dma_init {
+@@ -211,7 +213,7 @@ typedef struct _drm_via_cmdbuf_size {
+ 		VIA_CMDBUF_LAG = 0x02
+ 	} func;
+ 	int wait;
+-	uint32_t size;
++	__u32 size;
+ } drm_via_cmdbuf_size_t;
+ 
+ typedef enum {
+@@ -236,8 +238,8 @@ enum drm_via_irqs {
+ struct drm_via_wait_irq_request {
+ 	unsigned irq;
+ 	via_irq_seq_type_t type;
+-	uint32_t sequence;
+-	uint32_t signal;
++	__u32 sequence;
++	__u32 signal;
+ };
+ 
+ typedef union drm_via_irqwait {
+@@ -246,7 +248,7 @@ typedef union drm_via_irqwait {
+ } drm_via_irqwait_t;
+ 
+ typedef struct drm_via_blitsync {
+-	uint32_t sync_handle;
++	__u32 sync_handle;
+ 	unsigned engine;
+ } drm_via_blitsync_t;
+ 
+@@ -257,16 +259,16 @@ typedef struct drm_via_blitsync {
+  */
+ 
+ typedef struct drm_via_dmablit {
+-	uint32_t num_lines;
+-	uint32_t line_length;
++	__u32 num_lines;
++	__u32 line_length;
+ 
+-	uint32_t fb_addr;
+-	uint32_t fb_stride;
++	__u32 fb_addr;
++	__u32 fb_stride;
+ 
+ 	unsigned char *mem_addr;
+-	uint32_t mem_stride;
++	__u32 mem_stride;
+ 
+-	uint32_t flags;
++	__u32 flags;
+ 	int to_fb;
+ 
+ 	drm_via_blitsync_t sync;
+Index: linux-2.6-tip/include/linux/acpi.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/acpi.h
++++ linux-2.6-tip/include/linux/acpi.h
+@@ -79,6 +79,7 @@ typedef int (*acpi_table_handler) (struc
+ typedef int (*acpi_table_entry_handler) (struct acpi_subtable_header *header, const unsigned long end);
+ 
+ char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
++void __acpi_unmap_table(char *map, unsigned long size);
+ int early_acpi_boot_init(void);
+ int acpi_boot_init (void);
+ int acpi_boot_table_init (void);
+Index: linux-2.6-tip/include/linux/agpgart.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/agpgart.h
++++ linux-2.6-tip/include/linux/agpgart.h
+@@ -77,20 +77,20 @@ typedef struct _agp_setup {
+  * The "prot" down below needs still a "sleep" flag somehow ...
+  */
+ typedef struct _agp_segment {
+-	off_t pg_start;		/* starting page to populate    */
+-	size_t pg_count;	/* number of pages              */
+-	int prot;		/* prot flags for mmap          */
++	__kernel_off_t pg_start;	/* starting page to populate    */
++	__kernel_size_t pg_count;	/* number of pages              */
++	int prot;			/* prot flags for mmap          */
+ } agp_segment;
+ 
+ typedef struct _agp_region {
+-	pid_t pid;		/* pid of process               */
+-	size_t seg_count;	/* number of segments           */
++	__kernel_pid_t pid;		/* pid of process       */
++	__kernel_size_t seg_count;	/* number of segments   */
+ 	struct _agp_segment *seg_list;
+ } agp_region;
+ 
+ typedef struct _agp_allocate {
+ 	int key;		/* tag of allocation            */
+-	size_t pg_count;	/* number of pages              */
++	__kernel_size_t pg_count;/* number of pages             */
+ 	__u32 type;		/* 0 == normal, other devspec   */
+    	__u32 physical;         /* device specific (some devices  
+ 				 * need a phys address of the     
+@@ -100,7 +100,7 @@ typedef struct _agp_allocate {
+ 
+ typedef struct _agp_bind {
+ 	int key;		/* tag of allocation            */
+-	off_t pg_start;		/* starting page to populate    */
++	__kernel_off_t pg_start;/* starting page to populate    */
+ } agp_bind;
+ 
+ typedef struct _agp_unbind {
+Index: linux-2.6-tip/include/linux/atmlec.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/atmlec.h
++++ linux-2.6-tip/include/linux/atmlec.h
+@@ -11,6 +11,7 @@
+ #include <linux/atmioc.h>
+ #include <linux/atm.h>
+ #include <linux/if_ether.h>
++#include <linux/types.h>
+ 
+ /* ATM lec daemon control socket */
+ #define ATMLEC_CTRL	_IO('a', ATMIOC_LANE)
+@@ -78,8 +79,8 @@ struct atmlec_msg {
+ 		} normal;
+ 		struct atmlec_config_msg config;
+ 		struct {
+-			uint16_t lec_id;			/* requestor lec_id  */
+-			uint32_t tran_id;			/* transaction id    */
++			__u16 lec_id;				/* requestor lec_id  */
++			__u32 tran_id;				/* transaction id    */
+ 			unsigned char mac_addr[ETH_ALEN];	/* dst mac addr      */
+ 			unsigned char atm_addr[ATM_ESA_LEN];	/* reqestor ATM addr */
+ 		} proxy;	/*
+Index: linux-2.6-tip/include/linux/atmmpc.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/atmmpc.h
++++ linux-2.6-tip/include/linux/atmmpc.h
+@@ -4,6 +4,7 @@
+ #include <linux/atmapi.h>
+ #include <linux/atmioc.h>
+ #include <linux/atm.h>
++#include <linux/types.h>
+ 
+ #define ATMMPC_CTRL _IO('a', ATMIOC_MPOA)
+ #define ATMMPC_DATA _IO('a', ATMIOC_MPOA+1)
+@@ -18,39 +19,39 @@ struct atmmpc_ioc {
+ };
+ 
+ typedef struct in_ctrl_info {
+-        uint8_t   Last_NHRP_CIE_code;
+-        uint8_t   Last_Q2931_cause_value;     
+-        uint8_t   eg_MPC_ATM_addr[ATM_ESA_LEN];
++        __u8   Last_NHRP_CIE_code;
++        __u8   Last_Q2931_cause_value;
++        __u8   eg_MPC_ATM_addr[ATM_ESA_LEN];
+         __be32  tag;
+         __be32  in_dst_ip;      /* IP address this ingress MPC sends packets to */
+-        uint16_t  holding_time;
+-        uint32_t  request_id;
++        __u16  holding_time;
++        __u32  request_id;
+ } in_ctrl_info;
+ 
+ typedef struct eg_ctrl_info {
+-        uint8_t   DLL_header[256];
+-        uint8_t   DH_length;
++        __u8   DLL_header[256];
++        __u8   DH_length;
+         __be32  cache_id;
+         __be32  tag;
+         __be32  mps_ip;
+         __be32  eg_dst_ip;      /* IP address to which ingress MPC sends packets */
+-        uint8_t   in_MPC_data_ATM_addr[ATM_ESA_LEN];
+-        uint16_t  holding_time;
++        __u8   in_MPC_data_ATM_addr[ATM_ESA_LEN];
++        __u16  holding_time;
+ } eg_ctrl_info;
+ 
+ struct mpc_parameters {
+-        uint16_t mpc_p1;   /* Shortcut-Setup Frame Count    */
+-        uint16_t mpc_p2;   /* Shortcut-Setup Frame Time     */
+-        uint8_t mpc_p3[8]; /* Flow-detection Protocols      */
+-        uint16_t mpc_p4;   /* MPC Initial Retry Time        */
+-        uint16_t mpc_p5;   /* MPC Retry Time Maximum        */
+-        uint16_t mpc_p6;   /* Hold Down Time                */      
++        __u16 mpc_p1;   /* Shortcut-Setup Frame Count    */
++        __u16 mpc_p2;   /* Shortcut-Setup Frame Time     */
++        __u8 mpc_p3[8]; /* Flow-detection Protocols      */
++        __u16 mpc_p4;   /* MPC Initial Retry Time        */
++        __u16 mpc_p5;   /* MPC Retry Time Maximum        */
++        __u16 mpc_p6;   /* Hold Down Time                */
+ } ;
+ 
+ struct k_message {
+-        uint16_t type;
++        __u16 type;
+         __be32 ip_mask;
+-        uint8_t  MPS_ctrl[ATM_ESA_LEN];
++        __u8  MPS_ctrl[ATM_ESA_LEN];
+         union {
+                 in_ctrl_info in_info;
+                 eg_ctrl_info eg_info;
+@@ -61,11 +62,11 @@ struct k_message {
+ 
+ struct llc_snap_hdr {
+ 	/* RFC 1483 LLC/SNAP encapsulation for routed IP PDUs */
+-        uint8_t  dsap;    /* Destination Service Access Point (0xAA)     */
+-        uint8_t  ssap;    /* Source Service Access Point      (0xAA)     */
+-        uint8_t  ui;      /* Unnumbered Information           (0x03)     */
+-        uint8_t  org[3];  /* Organizational identification    (0x000000) */
+-        uint8_t  type[2]; /* Ether type (for IP)              (0x0800)   */
++        __u8  dsap;    /* Destination Service Access Point (0xAA)     */
++        __u8  ssap;    /* Source Service Access Point      (0xAA)     */
++        __u8  ui;      /* Unnumbered Information           (0x03)     */
++        __u8  org[3];  /* Organizational identification    (0x000000) */
++        __u8  type[2]; /* Ether type (for IP)              (0x0800)   */
+ };
+ 
+ /* TLVs this MPC recognizes */
+Index: linux-2.6-tip/include/linux/audit.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/audit.h
++++ linux-2.6-tip/include/linux/audit.h
+@@ -606,7 +606,8 @@ extern int audit_enabled;
+ #define audit_log(c,g,t,f,...) do { ; } while (0)
+ #define audit_log_start(c,g,t) ({ NULL; })
+ #define audit_log_vformat(b,f,a) do { ; } while (0)
+-#define audit_log_format(b,f,...) do { ; } while (0)
++static inline void __attribute__ ((format(printf, 2, 3)))
++audit_log_format(struct audit_buffer *ab, const char *fmt, ...) { }
+ #define audit_log_end(b) do { ; } while (0)
+ #define audit_log_n_hex(a,b,l) do { ; } while (0)
+ #define audit_log_n_string(a,c,l) do { ; } while (0)
+Index: linux-2.6-tip/include/linux/blktrace_api.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/blktrace_api.h
++++ linux-2.6-tip/include/linux/blktrace_api.h
+@@ -144,6 +144,9 @@ struct blk_user_trace_setup {
+ 
+ #ifdef __KERNEL__
+ #if defined(CONFIG_BLK_DEV_IO_TRACE)
++
++#include <linux/sysfs.h>
++
+ struct blk_trace {
+ 	int trace_state;
+ 	struct rchan *rchan;
+@@ -194,6 +197,8 @@ extern int blk_trace_setup(struct reques
+ extern int blk_trace_startstop(struct request_queue *q, int start);
+ extern int blk_trace_remove(struct request_queue *q);
+ 
++extern struct attribute_group blk_trace_attr_group;
++
+ #else /* !CONFIG_BLK_DEV_IO_TRACE */
+ #define blk_trace_ioctl(bdev, cmd, arg)		(-ENOTTY)
+ #define blk_trace_shutdown(q)			do { } while (0)
+Index: linux-2.6-tip/include/linux/bootmem.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/bootmem.h
++++ linux-2.6-tip/include/linux/bootmem.h
+@@ -65,23 +65,20 @@ extern void free_bootmem(unsigned long a
+ #define BOOTMEM_DEFAULT		0
+ #define BOOTMEM_EXCLUSIVE	(1<<0)
+ 
++extern int reserve_bootmem(unsigned long addr,
++			   unsigned long size,
++			   int flags);
+ extern int reserve_bootmem_node(pg_data_t *pgdat,
+-				 unsigned long physaddr,
+-				 unsigned long size,
+-				 int flags);
+-#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
+-extern int reserve_bootmem(unsigned long addr, unsigned long size, int flags);
+-#endif
++				unsigned long physaddr,
++				unsigned long size,
++				int flags);
+ 
+-extern void *__alloc_bootmem_nopanic(unsigned long size,
++extern void *__alloc_bootmem(unsigned long size,
+ 			     unsigned long align,
+ 			     unsigned long goal);
+-extern void *__alloc_bootmem(unsigned long size,
++extern void *__alloc_bootmem_nopanic(unsigned long size,
+ 				     unsigned long align,
+ 				     unsigned long goal);
+-extern void *__alloc_bootmem_low(unsigned long size,
+-				 unsigned long align,
+-				 unsigned long goal);
+ extern void *__alloc_bootmem_node(pg_data_t *pgdat,
+ 				  unsigned long size,
+ 				  unsigned long align,
+@@ -90,30 +87,35 @@ extern void *__alloc_bootmem_node_nopani
+ 				  unsigned long size,
+ 				  unsigned long align,
+ 				  unsigned long goal);
++extern void *__alloc_bootmem_low(unsigned long size,
++				 unsigned long align,
++				 unsigned long goal);
+ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
+ 				      unsigned long size,
+ 				      unsigned long align,
+ 				      unsigned long goal);
+-#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
++
+ #define alloc_bootmem(x) \
+ 	__alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ #define alloc_bootmem_nopanic(x) \
+ 	__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+-#define alloc_bootmem_low(x) \
+-	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
+ #define alloc_bootmem_pages(x) \
+ 	__alloc_bootmem(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+ #define alloc_bootmem_pages_nopanic(x) \
+ 	__alloc_bootmem_nopanic(x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
+-#define alloc_bootmem_low_pages(x) \
+-	__alloc_bootmem_low(x, PAGE_SIZE, 0)
+ #define alloc_bootmem_node(pgdat, x) \
+ 	__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
+ #define alloc_bootmem_pages_node(pgdat, x) \
+ 	__alloc_bootmem_node(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
++#define alloc_bootmem_pages_node_nopanic(pgdat, x) \
++	__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
++
++#define alloc_bootmem_low(x) \
++	__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
++#define alloc_bootmem_low_pages(x) \
++	__alloc_bootmem_low(x, PAGE_SIZE, 0)
+ #define alloc_bootmem_low_pages_node(pgdat, x) \
+ 	__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
+-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+ 
+ extern int reserve_bootmem_generic(unsigned long addr, unsigned long size,
+ 				   int flags);
+Index: linux-2.6-tip/include/linux/c2port.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/c2port.h
++++ linux-2.6-tip/include/linux/c2port.h
+@@ -10,6 +10,7 @@
+  */
+ 
+ #include <linux/device.h>
++#include <linux/kmemcheck.h>
+ 
+ #define C2PORT_NAME_LEN			32
+ 
+@@ -20,8 +21,10 @@
+ /* Main struct */
+ struct c2port_ops;
+ struct c2port_device {
+-	unsigned int access:1;
+-	unsigned int flash_access:1;
++	kmemcheck_define_bitfield(flags, {
++		unsigned int access:1;
++		unsigned int flash_access:1;
++	});
+ 
+ 	int id;
+ 	char name[C2PORT_NAME_LEN];
+Index: linux-2.6-tip/include/linux/cm4000_cs.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/cm4000_cs.h
++++ linux-2.6-tip/include/linux/cm4000_cs.h
+@@ -1,6 +1,8 @@
+ #ifndef	_CM4000_H_
+ #define	_CM4000_H_
+ 
++#include <linux/types.h>
++
+ #define	MAX_ATR			33
+ 
+ #define	CM4000_MAX_DEV		4
+@@ -10,9 +12,9 @@
+  * not to break compilation of userspace apps. -HW */
+ 
+ typedef struct atreq {
+-	int32_t atr_len;
++	__s32 atr_len;
+ 	unsigned char atr[64];
+-	int32_t power_act;
++	__s32 power_act;
+ 	unsigned char bIFSD;
+ 	unsigned char bIFSC;
+ } atreq_t;
+@@ -22,13 +24,13 @@ typedef struct atreq {
+  * member sizes. This leads to CONFIG_COMPAT breakage, since 32bit userspace
+  * will lay out the structure members differently than the 64bit kernel.
+  *
+- * I've changed "ptsreq.protocol" from "unsigned long" to "u_int32_t".
++ * I've changed "ptsreq.protocol" from "unsigned long" to "__u32".
+  * On 32bit this will make no difference.  With 64bit kernels, it will make
+  * 32bit apps work, too.
+  */
+ 
+ typedef struct ptsreq {
+-	u_int32_t protocol; /*T=0: 2^0, T=1:  2^1*/
++	__u32 protocol; /*T=0: 2^0, T=1:  2^1*/
+  	unsigned char flags;
+  	unsigned char pts1;
+  	unsigned char pts2;
+Index: linux-2.6-tip/include/linux/cn_proc.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/cn_proc.h
++++ linux-2.6-tip/include/linux/cn_proc.h
+@@ -65,20 +65,20 @@ struct proc_event {
+ 		} ack;
+ 
+ 		struct fork_proc_event {
+-			pid_t parent_pid;
+-			pid_t parent_tgid;
+-			pid_t child_pid;
+-			pid_t child_tgid;
++			__kernel_pid_t parent_pid;
++			__kernel_pid_t parent_tgid;
++			__kernel_pid_t child_pid;
++			__kernel_pid_t child_tgid;
+ 		} fork;
+ 
+ 		struct exec_proc_event {
+-			pid_t process_pid;
+-			pid_t process_tgid;
++			__kernel_pid_t process_pid;
++			__kernel_pid_t process_tgid;
+ 		} exec;
+ 
+ 		struct id_proc_event {
+-			pid_t process_pid;
+-			pid_t process_tgid;
++			__kernel_pid_t process_pid;
++			__kernel_pid_t process_tgid;
+ 			union {
+ 				__u32 ruid; /* task uid */
+ 				__u32 rgid; /* task gid */
+@@ -90,8 +90,8 @@ struct proc_event {
+ 		} id;
+ 
+ 		struct exit_proc_event {
+-			pid_t process_pid;
+-			pid_t process_tgid;
++			__kernel_pid_t process_pid;
++			__kernel_pid_t process_tgid;
+ 			__u32 exit_code, exit_signal;
+ 		} exit;
+ 	} event_data;
+Index: linux-2.6-tip/include/linux/coda_linux.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/coda_linux.h
++++ linux-2.6-tip/include/linux/coda_linux.h
+@@ -51,10 +51,6 @@ void coda_vattr_to_iattr(struct inode *,
+ void coda_iattr_to_vattr(struct iattr *, struct coda_vattr *);
+ unsigned short coda_flags_to_cflags(unsigned short);
+ 
+-/* sysctl.h */
+-void coda_sysctl_init(void);
+-void coda_sysctl_clean(void);
+-
+ #define CODA_ALLOC(ptr, cast, size) do { \
+     if (size < PAGE_SIZE) \
+         ptr = kmalloc((unsigned long) size, GFP_KERNEL); \
+Index: linux-2.6-tip/include/linux/coda_psdev.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/coda_psdev.h
++++ linux-2.6-tip/include/linux/coda_psdev.h
+@@ -6,6 +6,7 @@
+ #define CODA_PSDEV_MAJOR 67
+ #define MAX_CODADEVS  5	   /* how many do we allow */
+ 
++#ifdef __KERNEL__
+ struct kstatfs;
+ 
+ /* communication pending/processing queues */
+@@ -24,7 +25,6 @@ static inline struct venus_comm *coda_vc
+ 	return (struct venus_comm *)((sb)->s_fs_info);
+ }
+ 
+-
+ /* upcalls */
+ int venus_rootfid(struct super_block *sb, struct CodaFid *fidp);
+ int venus_getattr(struct super_block *sb, struct CodaFid *fid,
+@@ -64,6 +64,12 @@ int coda_downcall(int opcode, union outp
+ int venus_fsync(struct super_block *sb, struct CodaFid *fid);
+ int venus_statfs(struct dentry *dentry, struct kstatfs *sfs);
+ 
++/*
++ * Statistics
++ */
++
++extern struct venus_comm coda_comms[];
++#endif /* __KERNEL__ */
+ 
+ /* messages between coda filesystem in kernel and Venus */
+ struct upc_req {
+@@ -82,11 +88,4 @@ struct upc_req {
+ #define REQ_WRITE  0x4
+ #define REQ_ABORT  0x8
+ 
+-
+-/*
+- * Statistics
+- */
+-
+-extern struct venus_comm coda_comms[];
+-
+ #endif
+Index: linux-2.6-tip/include/linux/compat.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/compat.h
++++ linux-2.6-tip/include/linux/compat.h
+@@ -208,6 +208,8 @@ int copy_siginfo_from_user32(siginfo_t *
+ int copy_siginfo_to_user32(struct compat_siginfo __user *to, siginfo_t *from);
+ int get_compat_sigevent(struct sigevent *event,
+ 		const struct compat_sigevent __user *u_event);
++long compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
++				  struct compat_siginfo __user *uinfo);
+ 
+ static inline int compat_timeval_compare(struct compat_timeval *lhs,
+ 					struct compat_timeval *rhs)
+Index: linux-2.6-tip/include/linux/compiler-gcc4.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/compiler-gcc4.h
++++ linux-2.6-tip/include/linux/compiler-gcc4.h
+@@ -3,8 +3,10 @@
+ #endif
+ 
+ /* GCC 4.1.[01] miscompiles __weak */
+-#if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1
+-# error Your version of gcc miscompiles the __weak directive
++#ifdef __KERNEL__
++# if __GNUC_MINOR__ == 1 && __GNUC_PATCHLEVEL__ <= 1
++#  error Your version of gcc miscompiles the __weak directive
++# endif
+ #endif
+ 
+ #define __used			__attribute__((__used__))
+Index: linux-2.6-tip/include/linux/compiler.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/compiler.h
++++ linux-2.6-tip/include/linux/compiler.h
+@@ -68,6 +68,7 @@ struct ftrace_branch_data {
+ 			unsigned long miss;
+ 			unsigned long hit;
+ 		};
++		unsigned long miss_hit[2];
+ 	};
+ };
+ 
+@@ -126,10 +127,7 @@ void ftrace_likely_update(struct ftrace_
+ 				.line = __LINE__,			\
+ 			};						\
+ 		______r = !!(cond);					\
+-		if (______r)						\
+-			______f.hit++;					\
+-		else							\
+-			______f.miss++;					\
++		______f.miss_hit[______r]++;					\
+ 		______r;						\
+ 	}))
+ #endif /* CONFIG_PROFILE_ALL_BRANCHES */
+Index: linux-2.6-tip/include/linux/cyclades.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/cyclades.h
++++ linux-2.6-tip/include/linux/cyclades.h
+@@ -82,9 +82,9 @@ struct cyclades_monitor {
+  * open)
+  */
+ struct cyclades_idle_stats {
+-    time_t	   in_use;	/* Time device has been in use (secs) */
+-    time_t	   recv_idle;	/* Time since last char received (secs) */
+-    time_t	   xmit_idle;	/* Time since last char transmitted (secs) */
++    __kernel_time_t in_use;	/* Time device has been in use (secs) */
++    __kernel_time_t recv_idle;	/* Time since last char received (secs) */
++    __kernel_time_t xmit_idle;	/* Time since last char transmitted (secs) */
+     unsigned long  recv_bytes;	/* Bytes received */
+     unsigned long  xmit_bytes;	/* Bytes transmitted */
+     unsigned long  overruns;	/* Input overruns */
+Index: linux-2.6-tip/include/linux/debugfs.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/debugfs.h
++++ linux-2.6-tip/include/linux/debugfs.h
+@@ -71,6 +71,9 @@ struct dentry *debugfs_create_bool(const
+ struct dentry *debugfs_create_blob(const char *name, mode_t mode,
+ 				  struct dentry *parent,
+ 				  struct debugfs_blob_wrapper *blob);
++
++bool debugfs_initialized(void);
++
+ #else
+ 
+ #include <linux/err.h>
+@@ -183,6 +186,11 @@ static inline struct dentry *debugfs_cre
+ 	return ERR_PTR(-ENODEV);
+ }
+ 
++static inline bool debugfs_initialized(void)
++{
++	return false;
++}
++
+ #endif
+ 
+ #endif
+Index: linux-2.6-tip/include/linux/decompress/bunzip2.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/decompress/bunzip2.h
+@@ -0,0 +1,10 @@
++#ifndef DECOMPRESS_BUNZIP2_H
++#define DECOMPRESS_BUNZIP2_H
++
++int bunzip2(unsigned char *inbuf, int len,
++	    int(*fill)(void*, unsigned int),
++	    int(*flush)(void*, unsigned int),
++	    unsigned char *output,
++	    int *pos,
++	    void(*error)(char *x));
++#endif
+Index: linux-2.6-tip/include/linux/decompress/generic.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/decompress/generic.h
+@@ -0,0 +1,33 @@
++#ifndef DECOMPRESS_GENERIC_H
++#define DECOMPRESS_GENERIC_H
++
++/* Minimal chunksize to be read.
++ *Bzip2 prefers at least 4096
++ *Lzma prefers 0x10000 */
++#define COMPR_IOBUF_SIZE	4096
++
++typedef int (*decompress_fn) (unsigned char *inbuf, int len,
++			      int(*fill)(void*, unsigned int),
++			      int(*writebb)(void*, unsigned int),
++			      unsigned char *output,
++			      int *posp,
++			      void(*error)(char *x));
++
++/* inbuf   - input buffer
++ *len     - len of pre-read data in inbuf
++ *fill    - function to fill inbuf if empty
++ *writebb - function to write out outbug
++ *posp    - if non-null, input position (number of bytes read) will be
++ *	  returned here
++ *
++ *If len != 0, the inbuf is initialized (with as much data), and fill
++ *should not be called
++ *If len = 0, the inbuf is allocated, but empty. Its size is IOBUF_SIZE
++ *fill should be called (repeatedly...) to read data, at most IOBUF_SIZE
++ */
++
++/* Utility routine to detect the decompression method */
++decompress_fn decompress_method(const unsigned char *inbuf, int len,
++				const char **name);
++
++#endif
+Index: linux-2.6-tip/include/linux/decompress/inflate.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/decompress/inflate.h
+@@ -0,0 +1,13 @@
++#ifndef INFLATE_H
++#define INFLATE_H
++
++/* Other housekeeping constants */
++#define INBUFSIZ 4096
++
++int gunzip(unsigned char *inbuf, int len,
++	   int(*fill)(void*, unsigned int),
++	   int(*flush)(void*, unsigned int),
++	   unsigned char *output,
++	   int *pos,
++	   void(*error_fn)(char *x));
++#endif
+Index: linux-2.6-tip/include/linux/decompress/mm.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/decompress/mm.h
+@@ -0,0 +1,87 @@
++/*
++ * linux/compr_mm.h
++ *
++ * Memory management for pre-boot and ramdisk uncompressors
++ *
++ * Authors: Alain Knaff <alain@knaff.lu>
++ *
++ */
++
++#ifndef DECOMPR_MM_H
++#define DECOMPR_MM_H
++
++#ifdef STATIC
++
++/* Code active when included from pre-boot environment: */
++
++/* A trivial malloc implementation, adapted from
++ *  malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
++ */
++static unsigned long malloc_ptr;
++static int malloc_count;
++
++static void *malloc(int size)
++{
++	void *p;
++
++	if (size < 0)
++		error("Malloc error");
++	if (!malloc_ptr)
++		malloc_ptr = free_mem_ptr;
++
++	malloc_ptr = (malloc_ptr + 3) & ~3;     /* Align */
++
++	p = (void *)malloc_ptr;
++	malloc_ptr += size;
++
++	if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
++		error("Out of memory");
++
++	malloc_count++;
++	return p;
++}
++
++static void free(void *where)
++{
++	malloc_count--;
++	if (!malloc_count)
++		malloc_ptr = free_mem_ptr;
++}
++
++#define large_malloc(a) malloc(a)
++#define large_free(a) free(a)
++
++#define set_error_fn(x)
++
++#define INIT
++
++#else /* STATIC */
++
++/* Code active when compiled standalone for use when loading ramdisk: */
++
++#include <linux/kernel.h>
++#include <linux/fs.h>
++#include <linux/string.h>
++#include <linux/vmalloc.h>
++
++/* Use defines rather than static inline in order to avoid spurious
++ * warnings when not needed (indeed large_malloc / large_free are not
++ * needed by inflate */
++
++#define malloc(a) kmalloc(a, GFP_KERNEL)
++#define free(a) kfree(a)
++
++#define large_malloc(a) vmalloc(a)
++#define large_free(a) vfree(a)
++
++static void(*error)(char *m);
++#define set_error_fn(x) error = x;
++
++#define INIT __init
++#define STATIC
++
++#include <linux/init.h>
++
++#endif /* STATIC */
++
++#endif /* DECOMPR_MM_H */
+Index: linux-2.6-tip/include/linux/decompress/unlzma.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/decompress/unlzma.h
+@@ -0,0 +1,12 @@
++#ifndef DECOMPRESS_UNLZMA_H
++#define DECOMPRESS_UNLZMA_H
++
++int unlzma(unsigned char *, int,
++	   int(*fill)(void*, unsigned int),
++	   int(*flush)(void*, unsigned int),
++	   unsigned char *output,
++	   int *posp,
++	   void(*error)(char *x)
++	);
++
++#endif
+Index: linux-2.6-tip/include/linux/dlm_netlink.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/dlm_netlink.h
++++ linux-2.6-tip/include/linux/dlm_netlink.h
+@@ -9,6 +9,8 @@
+ #ifndef _DLM_NETLINK_H
+ #define _DLM_NETLINK_H
+ 
++#include <linux/types.h>
++
+ enum {
+ 	DLM_STATUS_WAITING = 1,
+ 	DLM_STATUS_GRANTED = 2,
+@@ -18,16 +20,16 @@ enum {
+ #define DLM_LOCK_DATA_VERSION 1
+ 
+ struct dlm_lock_data {
+-	uint16_t version;
+-	uint32_t lockspace_id;
++	__u16 version;
++	__u32 lockspace_id;
+ 	int nodeid;
+ 	int ownpid;
+-	uint32_t id;
+-	uint32_t remid;
+-	uint64_t xid;
+-	int8_t status;
+-	int8_t grmode;
+-	int8_t rqmode;
++	__u32 id;
++	__u32 remid;
++	__u64 xid;
++	__s8 status;
++	__s8 grmode;
++	__s8 rqmode;
+ 	unsigned long timestamp;
+ 	int resource_namelen;
+ 	char resource_name[DLM_RESNAME_MAXLEN];
+Index: linux-2.6-tip/include/linux/dm-ioctl.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/dm-ioctl.h
++++ linux-2.6-tip/include/linux/dm-ioctl.h
+@@ -113,20 +113,20 @@ struct dm_ioctl {
+ 	 * return -ENOTTY) fill out this field, even if the
+ 	 * command failed.
+ 	 */
+-	uint32_t version[3];	/* in/out */
+-	uint32_t data_size;	/* total size of data passed in
++	__u32 version[3];	/* in/out */
++	__u32 data_size;	/* total size of data passed in
+ 				 * including this struct */
+ 
+-	uint32_t data_start;	/* offset to start of data
++	__u32 data_start;	/* offset to start of data
+ 				 * relative to start of this struct */
+ 
+-	uint32_t target_count;	/* in/out */
+-	int32_t open_count;	/* out */
+-	uint32_t flags;		/* in/out */
+-	uint32_t event_nr;      	/* in/out */
+-	uint32_t padding;
++	__u32 target_count;	/* in/out */
++	__s32 open_count;	/* out */
++	__u32 flags;		/* in/out */
++	__u32 event_nr;      	/* in/out */
++	__u32 padding;
+ 
+-	uint64_t dev;		/* in/out */
++	__u64 dev;		/* in/out */
+ 
+ 	char name[DM_NAME_LEN];	/* device name */
+ 	char uuid[DM_UUID_LEN];	/* unique identifier for
+@@ -139,9 +139,9 @@ struct dm_ioctl {
+  * dm_ioctl.
+  */
+ struct dm_target_spec {
+-	uint64_t sector_start;
+-	uint64_t length;
+-	int32_t status;		/* used when reading from kernel only */
++	__u64 sector_start;
++	__u64 length;
++	__s32 status;		/* used when reading from kernel only */
+ 
+ 	/*
+ 	 * Location of the next dm_target_spec.
+@@ -153,7 +153,7 @@ struct dm_target_spec {
+ 	 *   (that follows the dm_ioctl struct) to the start of the "next"
+ 	 *   dm_target_spec.
+ 	 */
+-	uint32_t next;
++	__u32 next;
+ 
+ 	char target_type[DM_MAX_TYPE_NAME];
+ 
+@@ -168,17 +168,17 @@ struct dm_target_spec {
+  * Used to retrieve the target dependencies.
+  */
+ struct dm_target_deps {
+-	uint32_t count;	/* Array size */
+-	uint32_t padding;	/* unused */
+-	uint64_t dev[0];	/* out */
++	__u32 count;	/* Array size */
++	__u32 padding;	/* unused */
++	__u64 dev[0];	/* out */
+ };
+ 
+ /*
+  * Used to get a list of all dm devices.
+  */
+ struct dm_name_list {
+-	uint64_t dev;
+-	uint32_t next;		/* offset to the next record from
++	__u64 dev;
++	__u32 next;		/* offset to the next record from
+ 				   the _start_ of this */
+ 	char name[0];
+ };
+@@ -187,8 +187,8 @@ struct dm_name_list {
+  * Used to retrieve the target versions
+  */
+ struct dm_target_versions {
+-        uint32_t next;
+-        uint32_t version[3];
++        __u32 next;
++        __u32 version[3];
+ 
+         char name[0];
+ };
+@@ -197,7 +197,7 @@ struct dm_target_versions {
+  * Used to pass message to a target
+  */
+ struct dm_target_msg {
+-	uint64_t sector;	/* Device sector */
++	__u64 sector;	/* Device sector */
+ 
+ 	char message[0];
+ };
+Index: linux-2.6-tip/include/linux/dma-debug.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/dma-debug.h
+@@ -0,0 +1,174 @@
++/*
++ * Copyright (C) 2008 Advanced Micro Devices, Inc.
++ *
++ * Author: Joerg Roedel <joerg.roedel@amd.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 as published
++ * by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
++ */
++
++#ifndef __DMA_DEBUG_H
++#define __DMA_DEBUG_H
++
++#include <linux/types.h>
++
++struct device;
++struct scatterlist;
++struct bus_type;
++
++#ifdef CONFIG_DMA_API_DEBUG
++
++extern void dma_debug_add_bus(struct bus_type *bus);
++
++extern void dma_debug_init(u32 num_entries);
++
++extern void debug_dma_map_page(struct device *dev, struct page *page,
++			       size_t offset, size_t size,
++			       int direction, dma_addr_t dma_addr,
++			       bool map_single);
++
++extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
++				 size_t size, int direction, bool map_single);
++
++extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
++			     int nents, int mapped_ents, int direction);
++
++extern void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
++			       int nelems, int dir);
++
++extern void debug_dma_alloc_coherent(struct device *dev, size_t size,
++				     dma_addr_t dma_addr, void *virt);
++
++extern void debug_dma_free_coherent(struct device *dev, size_t size,
++				    void *virt, dma_addr_t addr);
++
++extern void debug_dma_sync_single_for_cpu(struct device *dev,
++					  dma_addr_t dma_handle, size_t size,
++					  int direction);
++
++extern void debug_dma_sync_single_for_device(struct device *dev,
++					     dma_addr_t dma_handle,
++					     size_t size, int direction);
++
++extern void debug_dma_sync_single_range_for_cpu(struct device *dev,
++						dma_addr_t dma_handle,
++						unsigned long offset,
++						size_t size,
++						int direction);
++
++extern void debug_dma_sync_single_range_for_device(struct device *dev,
++						   dma_addr_t dma_handle,
++						   unsigned long offset,
++						   size_t size, int direction);
++
++extern void debug_dma_sync_sg_for_cpu(struct device *dev,
++				      struct scatterlist *sg,
++				      int nelems, int direction);
++
++extern void debug_dma_sync_sg_for_device(struct device *dev,
++					 struct scatterlist *sg,
++					 int nelems, int direction);
++
++extern void debug_dma_dump_mappings(struct device *dev);
++
++#else /* CONFIG_DMA_API_DEBUG */
++
++static inline void dma_debug_add_bus(struct bus_type *bus)
++{
++}
++
++static inline void dma_debug_init(u32 num_entries)
++{
++}
++
++static inline void debug_dma_map_page(struct device *dev, struct page *page,
++				      size_t offset, size_t size,
++				      int direction, dma_addr_t dma_addr,
++				      bool map_single)
++{
++}
++
++static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
++					size_t size, int direction,
++					bool map_single)
++{
++}
++
++static inline void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
++				    int nents, int mapped_ents, int direction)
++{
++}
++
++static inline void debug_dma_unmap_sg(struct device *dev,
++				      struct scatterlist *sglist,
++				      int nelems, int dir)
++{
++}
++
++static inline void debug_dma_alloc_coherent(struct device *dev, size_t size,
++					    dma_addr_t dma_addr, void *virt)
++{
++}
++
++static inline void debug_dma_free_coherent(struct device *dev, size_t size,
++					   void *virt, dma_addr_t addr)
++{
++}
++
++static inline void debug_dma_sync_single_for_cpu(struct device *dev,
++						 dma_addr_t dma_handle,
++						 size_t size, int direction)
++{
++}
++
++static inline void debug_dma_sync_single_for_device(struct device *dev,
++						    dma_addr_t dma_handle,
++						    size_t size, int direction)
++{
++}
++
++static inline void debug_dma_sync_single_range_for_cpu(struct device *dev,
++						       dma_addr_t dma_handle,
++						       unsigned long offset,
++						       size_t size,
++						       int direction)
++{
++}
++
++static inline void debug_dma_sync_single_range_for_device(struct device *dev,
++							  dma_addr_t dma_handle,
++							  unsigned long offset,
++							  size_t size,
++							  int direction)
++{
++}
++
++static inline void debug_dma_sync_sg_for_cpu(struct device *dev,
++					     struct scatterlist *sg,
++					     int nelems, int direction)
++{
++}
++
++static inline void debug_dma_sync_sg_for_device(struct device *dev,
++						struct scatterlist *sg,
++						int nelems, int direction)
++{
++}
++
++static inline void debug_dma_dump_mappings(struct device *dev)
++{
++}
++
++#endif /* CONFIG_DMA_API_DEBUG */
++
++#endif /* __DMA_DEBUG_H */
+Index: linux-2.6-tip/include/linux/dma-mapping.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/dma-mapping.h
++++ linux-2.6-tip/include/linux/dma-mapping.h
+@@ -3,6 +3,8 @@
+ 
+ #include <linux/device.h>
+ #include <linux/err.h>
++#include <linux/dma-attrs.h>
++#include <linux/scatterlist.h>
+ 
+ /* These definitions mirror those in pci.h, so they can be used
+  * interchangeably with their PCI_ counterparts */
+@@ -13,6 +15,52 @@ enum dma_data_direction {
+ 	DMA_NONE = 3,
+ };
+ 
++struct dma_map_ops {
++	void* (*alloc_coherent)(struct device *dev, size_t size,
++				dma_addr_t *dma_handle, gfp_t gfp);
++	void (*free_coherent)(struct device *dev, size_t size,
++			      void *vaddr, dma_addr_t dma_handle);
++	dma_addr_t (*map_page)(struct device *dev, struct page *page,
++			       unsigned long offset, size_t size,
++			       enum dma_data_direction dir,
++			       struct dma_attrs *attrs);
++	void (*unmap_page)(struct device *dev, dma_addr_t dma_handle,
++			   size_t size, enum dma_data_direction dir,
++			   struct dma_attrs *attrs);
++	int (*map_sg)(struct device *dev, struct scatterlist *sg,
++		      int nents, enum dma_data_direction dir,
++		      struct dma_attrs *attrs);
++	void (*unmap_sg)(struct device *dev,
++			 struct scatterlist *sg, int nents,
++			 enum dma_data_direction dir,
++			 struct dma_attrs *attrs);
++	void (*sync_single_for_cpu)(struct device *dev,
++				    dma_addr_t dma_handle, size_t size,
++				    enum dma_data_direction dir);
++	void (*sync_single_for_device)(struct device *dev,
++				       dma_addr_t dma_handle, size_t size,
++				       enum dma_data_direction dir);
++	void (*sync_single_range_for_cpu)(struct device *dev,
++					  dma_addr_t dma_handle,
++					  unsigned long offset,
++					  size_t size,
++					  enum dma_data_direction dir);
++	void (*sync_single_range_for_device)(struct device *dev,
++					     dma_addr_t dma_handle,
++					     unsigned long offset,
++					     size_t size,
++					     enum dma_data_direction dir);
++	void (*sync_sg_for_cpu)(struct device *dev,
++				struct scatterlist *sg, int nents,
++				enum dma_data_direction dir);
++	void (*sync_sg_for_device)(struct device *dev,
++				   struct scatterlist *sg, int nents,
++				   enum dma_data_direction dir);
++	int (*mapping_error)(struct device *dev, dma_addr_t dma_addr);
++	int (*dma_supported)(struct device *dev, u64 mask);
++	int is_phys;
++};
++
+ #define DMA_BIT_MASK(n)	(((n) == 64) ? ~0ULL : ((1ULL<<(n))-1))
+ 
+ /*
+Index: linux-2.6-tip/include/linux/dmar.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/dmar.h
++++ linux-2.6-tip/include/linux/dmar.h
+@@ -24,10 +24,10 @@
+ #include <linux/acpi.h>
+ #include <linux/types.h>
+ #include <linux/msi.h>
++#include <linux/irqreturn.h>
+ 
+-#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
+ struct intel_iommu;
+-
++#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
+ struct dmar_drhd_unit {
+ 	struct list_head list;		/* list of drhd units	*/
+ 	struct  acpi_dmar_header *hdr;	/* ACPI header		*/
+@@ -49,7 +49,7 @@ extern int dmar_dev_scope_init(void);
+ 
+ /* Intel IOMMU detection */
+ extern void detect_intel_iommu(void);
+-
++extern int enable_drhd_fault_handling(void);
+ 
+ extern int parse_ioapics_under_ir(void);
+ extern int alloc_iommu(struct dmar_drhd_unit *);
+@@ -63,12 +63,12 @@ static inline int dmar_table_init(void)
+ {
+ 	return -ENODEV;
+ }
++static inline int enable_drhd_fault_handling(void)
++{
++	return -1;
++}
+ #endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */
+ 
+-#ifdef CONFIG_INTR_REMAP
+-extern int intr_remapping_enabled;
+-extern int enable_intr_remapping(int);
+-
+ struct irte {
+ 	union {
+ 		struct {
+@@ -97,6 +97,10 @@ struct irte {
+ 		__u64 high;
+ 	};
+ };
++#ifdef CONFIG_INTR_REMAP
++extern int intr_remapping_enabled;
++extern int enable_intr_remapping(int);
++
+ extern int get_irte(int irq, struct irte *entry);
+ extern int modify_irte(int irq, struct irte *irte_modified);
+ extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
+@@ -111,14 +115,40 @@ extern int irq_remapped(int irq);
+ extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
+ extern struct intel_iommu *map_ioapic_to_ir(int apic);
+ #else
++static inline int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
++{
++	return -1;
++}
++static inline int modify_irte(int irq, struct irte *irte_modified)
++{
++	return -1;
++}
++static inline int free_irte(int irq)
++{
++	return -1;
++}
++static inline int map_irq_to_irte_handle(int irq, u16 *sub_handle)
++{
++	return -1;
++}
++static inline int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
++			       u16 sub_handle)
++{
++	return -1;
++}
++static inline struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
++{
++	return NULL;
++}
++static inline struct intel_iommu *map_ioapic_to_ir(int apic)
++{
++	return NULL;
++}
+ #define irq_remapped(irq)		(0)
+ #define enable_intr_remapping(mode)	(-1)
+ #define intr_remapping_enabled		(0)
+ #endif
+ 
+-#ifdef CONFIG_DMAR
+-extern const char *dmar_get_fault_reason(u8 fault_reason);
+-
+ /* Can't use the common MSI interrupt functions
+  * since DMAR is not a pci device
+  */
+@@ -127,8 +157,10 @@ extern void dmar_msi_mask(unsigned int i
+ extern void dmar_msi_read(int irq, struct msi_msg *msg);
+ extern void dmar_msi_write(int irq, struct msi_msg *msg);
+ extern int dmar_set_interrupt(struct intel_iommu *iommu);
++extern irqreturn_t dmar_fault(int irq, void *dev_id);
+ extern int arch_setup_dmar_msi(unsigned int irq);
+ 
++#ifdef CONFIG_DMAR
+ extern int iommu_detected, no_iommu;
+ extern struct list_head dmar_rmrr_units;
+ struct dmar_rmrr_unit {
+Index: linux-2.6-tip/include/linux/dvb/audio.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/dvb/audio.h
++++ linux-2.6-tip/include/linux/dvb/audio.h
+@@ -76,7 +76,7 @@ struct audio_karaoke{  /* if Vocal1 or V
+ } audio_karaoke_t;     /* into left and right  */
+ 
+ 
+-typedef uint16_t audio_attributes_t;
++typedef __u16 audio_attributes_t;
+ /*   bits: descr. */
+ /*   15-13 audio coding mode (0=ac3, 2=mpeg1, 3=mpeg2ext, 4=LPCM, 6=DTS, */
+ /*   12    multichannel extension */
+Index: linux-2.6-tip/include/linux/dvb/video.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/dvb/video.h
++++ linux-2.6-tip/include/linux/dvb/video.h
+@@ -132,12 +132,12 @@ struct video_command {
+ #define VIDEO_VSYNC_FIELD_PROGRESSIVE	(3)
+ 
+ struct video_event {
+-	int32_t type;
++	__s32 type;
+ #define VIDEO_EVENT_SIZE_CHANGED	1
+ #define VIDEO_EVENT_FRAME_RATE_CHANGED	2
+ #define VIDEO_EVENT_DECODER_STOPPED 	3
+ #define VIDEO_EVENT_VSYNC 		4
+-	time_t timestamp;
++	__kernel_time_t timestamp;
+ 	union {
+ 		video_size_t size;
+ 		unsigned int frame_rate;	/* in frames per 1000sec */
+@@ -157,25 +157,25 @@ struct video_status {
+ 
+ struct video_still_picture {
+ 	char __user *iFrame;        /* pointer to a single iframe in memory */
+-	int32_t size;
++	__s32 size;
+ };
+ 
+ 
+ typedef
+ struct video_highlight {
+ 	int     active;      /*    1=show highlight, 0=hide highlight */
+-	uint8_t contrast1;   /*    7- 4  Pattern pixel contrast */
++	__u8    contrast1;   /*    7- 4  Pattern pixel contrast */
+ 			     /*    3- 0  Background pixel contrast */
+-	uint8_t contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
++	__u8    contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
+ 			     /*    3- 0  Emphasis pixel-1 contrast */
+-	uint8_t color1;      /*    7- 4  Pattern pixel color */
++	__u8    color1;      /*    7- 4  Pattern pixel color */
+ 			     /*    3- 0  Background pixel color */
+-	uint8_t color2;      /*    7- 4  Emphasis pixel-2 color */
++	__u8    color2;      /*    7- 4  Emphasis pixel-2 color */
+ 			     /*    3- 0  Emphasis pixel-1 color */
+-	uint32_t ypos;       /*   23-22  auto action mode */
++	__u32    ypos;       /*   23-22  auto action mode */
+ 			     /*   21-12  start y */
+ 			     /*    9- 0  end y */
+-	uint32_t xpos;       /*   23-22  button color number */
++	__u32    xpos;       /*   23-22  button color number */
+ 			     /*   21-12  start x */
+ 			     /*    9- 0  end x */
+ } video_highlight_t;
+@@ -189,17 +189,17 @@ typedef struct video_spu {
+ 
+ typedef struct video_spu_palette {      /* SPU Palette information */
+ 	int length;
+-	uint8_t __user *palette;
++	__u8 __user *palette;
+ } video_spu_palette_t;
+ 
+ 
+ typedef struct video_navi_pack {
+ 	int length;          /* 0 ... 1024 */
+-	uint8_t data[1024];
++	__u8 data[1024];
+ } video_navi_pack_t;
+ 
+ 
+-typedef uint16_t video_attributes_t;
++typedef __u16 video_attributes_t;
+ /*   bits: descr. */
+ /*   15-14 Video compression mode (0=MPEG-1, 1=MPEG-2) */
+ /*   13-12 TV system (0=525/60, 1=625/50) */
+Index: linux-2.6-tip/include/linux/elfcore.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/elfcore.h
++++ linux-2.6-tip/include/linux/elfcore.h
+@@ -111,6 +111,15 @@ static inline void elf_core_copy_regs(el
+ #endif
+ }
+ 
++static inline void elf_core_copy_kernel_regs(elf_gregset_t *elfregs, struct pt_regs *regs)
++{
++#ifdef ELF_CORE_COPY_KERNEL_REGS
++	ELF_CORE_COPY_KERNEL_REGS((*elfregs), regs);
++#else
++	elf_core_copy_regs(elfregs, regs);
++#endif
++}
++
+ static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs)
+ {
+ #ifdef ELF_CORE_COPY_TASK_REGS
+Index: linux-2.6-tip/include/linux/fdtable.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/fdtable.h
++++ linux-2.6-tip/include/linux/fdtable.h
+@@ -5,12 +5,14 @@
+ #ifndef __LINUX_FDTABLE_H
+ #define __LINUX_FDTABLE_H
+ 
+-#include <asm/atomic.h>
+ #include <linux/posix_types.h>
+ #include <linux/compiler.h>
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
+ #include <linux/types.h>
++#include <linux/init.h>
++
++#include <asm/atomic.h>
+ 
+ /*
+  * The default fd array needs to be at least BITS_PER_LONG,
+Index: linux-2.6-tip/include/linux/fs.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/fs.h
++++ linux-2.6-tip/include/linux/fs.h
+@@ -671,7 +671,7 @@ struct inode {
+ 	umode_t			i_mode;
+ 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
+ 	struct mutex		i_mutex;
+-	struct rw_semaphore	i_alloc_sem;
++	struct compat_rw_semaphore	i_alloc_sem;
+ 	const struct inode_operations	*i_op;
+ 	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
+ 	struct super_block	*i_sb;
+@@ -1081,13 +1081,25 @@ extern int lock_may_write(struct inode *
+ #define posix_lock_file_wait(a, b) ({ -ENOLCK; })
+ #define posix_unblock_lock(a, b) (-ENOENT)
+ #define vfs_test_lock(a, b) ({ 0; })
+-#define vfs_lock_file(a, b, c, d) (-ENOLCK)
++static inline int
++vfs_lock_file(struct file *filp, unsigned int cmd,
++	      struct file_lock *fl, struct file_lock *conf)
++{
++	return -ENOLCK;
++}
+ #define vfs_cancel_lock(a, b) ({ 0; })
+ #define flock_lock_file_wait(a, b) ({ -ENOLCK; })
+ #define __break_lease(a, b) ({ 0; })
+-#define lease_get_mtime(a, b) ({ })
++static inline void lease_get_mtime(struct inode *inode, struct timespec *time)
++{
++	*time = (struct timespec) { 0, };
++}
+ #define generic_setlease(a, b, c) ({ -EINVAL; })
+-#define vfs_setlease(a, b, c) ({ -EINVAL; })
++static inline int
++vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
++{
++	return -EINVAL;
++}
+ #define lease_modify(a, b) ({ -EINVAL; })
+ #define lock_may_read(a, b, c) ({ 1; })
+ #define lock_may_write(a, b, c) ({ 1; })
+@@ -1611,9 +1623,9 @@ int __put_super_and_need_restart(struct 
+ 
+ /* Alas, no aliases. Too much hassle with bringing module.h everywhere */
+ #define fops_get(fops) \
+-	(((fops) && try_module_get((fops)->owner) ? (fops) : NULL))
++	(((fops != NULL) && try_module_get((fops)->owner) ? (fops) : NULL))
+ #define fops_put(fops) \
+-	do { if (fops) module_put((fops)->owner); } while(0)
++	do { if (fops != NULL) module_put((fops)->owner); } while(0)
+ 
+ extern int register_filesystem(struct file_system_type *);
+ extern int unregister_filesystem(struct file_system_type *);
+@@ -1689,7 +1701,7 @@ static inline int break_lease(struct ino
+ #else /* !CONFIG_FILE_LOCKING */
+ #define locks_mandatory_locked(a) ({ 0; })
+ #define locks_mandatory_area(a, b, c, d, e) ({ 0; })
+-#define __mandatory_lock(a) ({ 0; })
++static inline int __mandatory_lock(struct inode *ino) { return 0; }
+ #define mandatory_lock(a) ({ 0; })
+ #define locks_verify_locked(a) ({ 0; })
+ #define locks_verify_truncate(a, b, c) ({ 0; })
+@@ -2171,19 +2183,7 @@ ssize_t simple_transaction_read(struct f
+ 				size_t size, loff_t *pos);
+ int simple_transaction_release(struct inode *inode, struct file *file);
+ 
+-static inline void simple_transaction_set(struct file *file, size_t n)
+-{
+-	struct simple_transaction_argresp *ar = file->private_data;
+-
+-	BUG_ON(n > SIMPLE_TRANSACTION_LIMIT);
+-
+-	/*
+-	 * The barrier ensures that ar->size will really remain zero until
+-	 * ar->data is ready for reading.
+-	 */
+-	smp_mb();
+-	ar->size = n;
+-}
++void simple_transaction_set(struct file *file, size_t n);
+ 
+ /*
+  * simple attribute files
+@@ -2230,27 +2230,6 @@ ssize_t simple_attr_read(struct file *fi
+ ssize_t simple_attr_write(struct file *file, const char __user *buf,
+ 			  size_t len, loff_t *ppos);
+ 
+-
+-#ifdef CONFIG_SECURITY
+-static inline char *alloc_secdata(void)
+-{
+-	return (char *)get_zeroed_page(GFP_KERNEL);
+-}
+-
+-static inline void free_secdata(void *secdata)
+-{
+-	free_page((unsigned long)secdata);
+-}
+-#else
+-static inline char *alloc_secdata(void)
+-{
+-	return (char *)1;
+-}
+-
+-static inline void free_secdata(void *secdata)
+-{ }
+-#endif	/* CONFIG_SECURITY */
+-
+ struct ctl_table;
+ int proc_nr_files(struct ctl_table *table, int write, struct file *filp,
+ 		  void __user *buffer, size_t *lenp, loff_t *ppos);
+Index: linux-2.6-tip/include/linux/ftrace.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/ftrace.h
++++ linux-2.6-tip/include/linux/ftrace.h
+@@ -1,15 +1,18 @@
+ #ifndef _LINUX_FTRACE_H
+ #define _LINUX_FTRACE_H
+ 
+-#include <linux/linkage.h>
+-#include <linux/fs.h>
+-#include <linux/ktime.h>
+-#include <linux/init.h>
+-#include <linux/types.h>
+-#include <linux/module.h>
++#include <linux/trace_clock.h>
+ #include <linux/kallsyms.h>
++#include <linux/linkage.h>
+ #include <linux/bitops.h>
++#include <linux/module.h>
++#include <linux/ktime.h>
+ #include <linux/sched.h>
++#include <linux/types.h>
++#include <linux/init.h>
++#include <linux/fs.h>
++
++#include <asm/ftrace.h>
+ 
+ #ifdef CONFIG_FUNCTION_TRACER
+ 
+@@ -95,9 +98,41 @@ stack_trace_sysctl(struct ctl_table *tab
+ 		   loff_t *ppos);
+ #endif
+ 
++struct ftrace_func_command {
++	struct list_head	list;
++	char			*name;
++	int			(*func)(char *func, char *cmd,
++					char *params, int enable);
++};
++
+ #ifdef CONFIG_DYNAMIC_FTRACE
+-/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */
+-#include <asm/ftrace.h>
++
++int ftrace_arch_code_modify_prepare(void);
++int ftrace_arch_code_modify_post_process(void);
++
++struct seq_file;
++
++struct ftrace_probe_ops {
++	void			(*func)(unsigned long ip,
++					unsigned long parent_ip,
++					void **data);
++	int			(*callback)(unsigned long ip, void **data);
++	void			(*free)(void **data);
++	int			(*print)(struct seq_file *m,
++					 unsigned long ip,
++					 struct ftrace_probe_ops *ops,
++					 void *data);
++};
++
++extern int
++register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
++			      void *data);
++extern void
++unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
++				void *data);
++extern void
++unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops);
++extern void unregister_ftrace_function_probe_all(char *glob);
+ 
+ enum {
+ 	FTRACE_FL_FREE		= (1 << 0),
+@@ -110,15 +145,23 @@ enum {
+ };
+ 
+ struct dyn_ftrace {
+-	struct list_head	list;
+-	unsigned long		ip; /* address of mcount call-site */
+-	unsigned long		flags;
+-	struct dyn_arch_ftrace	arch;
++	union {
++		unsigned long		ip; /* address of mcount call-site */
++		struct dyn_ftrace	*freelist;
++	};
++	union {
++		unsigned long		flags;
++		struct dyn_ftrace	*newlist;
++	};
++	struct dyn_arch_ftrace		arch;
+ };
+ 
+ int ftrace_force_update(void);
+ void ftrace_set_filter(unsigned char *buf, int len, int reset);
+ 
++int register_ftrace_command(struct ftrace_func_command *cmd);
++int unregister_ftrace_command(struct ftrace_func_command *cmd);
++
+ /* defined in arch */
+ extern int ftrace_ip_converted(unsigned long ip);
+ extern int ftrace_dyn_arch_init(void *data);
+@@ -126,6 +169,10 @@ extern int ftrace_update_ftrace_func(ftr
+ extern void ftrace_caller(void);
+ extern void ftrace_call(void);
+ extern void mcount_call(void);
++
++#ifndef FTRACE_ADDR
++#define FTRACE_ADDR ((unsigned long)ftrace_caller)
++#endif
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ extern void ftrace_graph_caller(void);
+ extern int ftrace_enable_ftrace_graph_caller(void);
+@@ -136,7 +183,7 @@ static inline int ftrace_disable_ftrace_
+ #endif
+ 
+ /**
+- * ftrace_make_nop - convert code into top
++ * ftrace_make_nop - convert code into nop
+  * @mod: module structure if called by module load initialization
+  * @rec: the mcount call site record
+  * @addr: the address that the call site should be calling
+@@ -181,7 +228,6 @@ extern int ftrace_make_nop(struct module
+  */
+ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
+ 
+-
+ /* May be defined in arch */
+ extern int ftrace_arch_read_dyn_info(char *buf, int size);
+ 
+@@ -198,6 +244,14 @@ extern void ftrace_enable_daemon(void);
+ # define ftrace_disable_daemon()		do { } while (0)
+ # define ftrace_enable_daemon()			do { } while (0)
+ static inline void ftrace_release(void *start, unsigned long size) { }
++static inline int register_ftrace_command(struct ftrace_func_command *cmd)
++{
++	return -EINVAL;
++}
++static inline int unregister_ftrace_command(char *cmd_name)
++{
++	return -EINVAL;
++}
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+ 
+ /* totally disable ftrace - can not re-enable after this */
+@@ -233,24 +287,25 @@ static inline void __ftrace_enabled_rest
+ #endif
+ }
+ 
+-#ifdef CONFIG_FRAME_POINTER
+-/* TODO: need to fix this for ARM */
+-# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+-# define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
+-# define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
+-# define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
+-# define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
+-# define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
+-# define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
+-#else
+-# define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+-# define CALLER_ADDR1 0UL
+-# define CALLER_ADDR2 0UL
+-# define CALLER_ADDR3 0UL
+-# define CALLER_ADDR4 0UL
+-# define CALLER_ADDR5 0UL
+-# define CALLER_ADDR6 0UL
+-#endif
++#ifndef HAVE_ARCH_CALLER_ADDR
++# ifdef CONFIG_FRAME_POINTER
++#  define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
++#  define CALLER_ADDR1 ((unsigned long)__builtin_return_address(1))
++#  define CALLER_ADDR2 ((unsigned long)__builtin_return_address(2))
++#  define CALLER_ADDR3 ((unsigned long)__builtin_return_address(3))
++#  define CALLER_ADDR4 ((unsigned long)__builtin_return_address(4))
++#  define CALLER_ADDR5 ((unsigned long)__builtin_return_address(5))
++#  define CALLER_ADDR6 ((unsigned long)__builtin_return_address(6))
++# else
++#  define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
++#  define CALLER_ADDR1 0UL
++#  define CALLER_ADDR2 0UL
++#  define CALLER_ADDR3 0UL
++#  define CALLER_ADDR4 0UL
++#  define CALLER_ADDR5 0UL
++#  define CALLER_ADDR6 0UL
++# endif
++#endif /* ifndef HAVE_ARCH_CALLER_ADDR */
+ 
+ #ifdef CONFIG_IRQSOFF_TRACER
+   extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
+@@ -268,54 +323,6 @@ static inline void __ftrace_enabled_rest
+ # define trace_preempt_off(a0, a1)		do { } while (0)
+ #endif
+ 
+-#ifdef CONFIG_TRACING
+-extern int ftrace_dump_on_oops;
+-
+-extern void tracing_start(void);
+-extern void tracing_stop(void);
+-extern void ftrace_off_permanent(void);
+-
+-extern void
+-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
+-
+-/**
+- * ftrace_printk - printf formatting in the ftrace buffer
+- * @fmt: the printf format for printing
+- *
+- * Note: __ftrace_printk is an internal function for ftrace_printk and
+- *       the @ip is passed in via the ftrace_printk macro.
+- *
+- * This function allows a kernel developer to debug fast path sections
+- * that printk is not appropriate for. By scattering in various
+- * printk like tracing in the code, a developer can quickly see
+- * where problems are occurring.
+- *
+- * This is intended as a debugging tool for the developer only.
+- * Please refrain from leaving ftrace_printks scattered around in
+- * your code.
+- */
+-# define ftrace_printk(fmt...) __ftrace_printk(_THIS_IP_, fmt)
+-extern int
+-__ftrace_printk(unsigned long ip, const char *fmt, ...)
+-	__attribute__ ((format (printf, 2, 3)));
+-extern void ftrace_dump(void);
+-#else
+-static inline void
+-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
+-static inline int
+-ftrace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
+-
+-static inline void tracing_start(void) { }
+-static inline void tracing_stop(void) { }
+-static inline void ftrace_off_permanent(void) { }
+-static inline int
+-ftrace_printk(const char *fmt, ...)
+-{
+-	return 0;
+-}
+-static inline void ftrace_dump(void) { }
+-#endif
+-
+ #ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ extern void ftrace_init(void);
+ extern void ftrace_init_module(struct module *mod,
+@@ -327,36 +334,6 @@ ftrace_init_module(struct module *mod,
+ 		   unsigned long *start, unsigned long *end) { }
+ #endif
+ 
+-enum {
+-	POWER_NONE = 0,
+-	POWER_CSTATE = 1,
+-	POWER_PSTATE = 2,
+-};
+-
+-struct power_trace {
+-#ifdef CONFIG_POWER_TRACER
+-	ktime_t			stamp;
+-	ktime_t			end;
+-	int			type;
+-	int			state;
+-#endif
+-};
+-
+-#ifdef CONFIG_POWER_TRACER
+-extern void trace_power_start(struct power_trace *it, unsigned int type,
+-					unsigned int state);
+-extern void trace_power_mark(struct power_trace *it, unsigned int type,
+-					unsigned int state);
+-extern void trace_power_end(struct power_trace *it);
+-#else
+-static inline void trace_power_start(struct power_trace *it, unsigned int type,
+-					unsigned int state) { }
+-static inline void trace_power_mark(struct power_trace *it, unsigned int type,
+-					unsigned int state) { }
+-static inline void trace_power_end(struct power_trace *it) { }
+-#endif
+-
+-
+ /*
+  * Structure that defines an entry function trace.
+  */
+@@ -380,6 +357,28 @@ struct ftrace_graph_ret {
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ 
+ /*
++ * Stack of return addresses for functions
++ * of a thread.
++ * Used in struct thread_info
++ */
++struct ftrace_ret_stack {
++	unsigned long ret;
++	unsigned long func;
++	unsigned long long calltime;
++	unsigned long long subtime;
++};
++
++/*
++ * Primary handler of a function return.
++ * It relays on ftrace_return_to_handler.
++ * Defined in entry_32/64.S
++ */
++extern void return_to_handler(void);
++
++extern int
++ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth);
++
++/*
+  * Sometimes we don't want to trace a function with the function
+  * graph tracer but we want them to keep traced by the usual function
+  * tracer if the function graph tracer is not configured.
+@@ -490,6 +489,50 @@ static inline int test_tsk_trace_graph(s
+ 	return tsk->trace & TSK_TRACE_FL_GRAPH;
+ }
+ 
++extern int ftrace_dump_on_oops;
++
+ #endif /* CONFIG_TRACING */
+ 
++
++#ifdef CONFIG_HW_BRANCH_TRACER
++
++void trace_hw_branch(u64 from, u64 to);
++void trace_hw_branch_oops(void);
++
++#else /* CONFIG_HW_BRANCH_TRACER */
++
++static inline void trace_hw_branch(u64 from, u64 to) {}
++static inline void trace_hw_branch_oops(void) {}
++
++#endif /* CONFIG_HW_BRANCH_TRACER */
++
++/*
++ * A syscall entry in the ftrace syscalls array.
++ *
++ * @name: name of the syscall
++ * @nb_args: number of parameters it takes
++ * @types: list of types as strings
++ * @args: list of args as strings (args[i] matches types[i])
++ */
++struct syscall_metadata {
++	const char	*name;
++	int		nb_args;
++	const char	**types;
++	const char	**args;
++};
++
++#ifdef CONFIG_FTRACE_SYSCALLS
++extern void arch_init_ftrace_syscalls(void);
++extern struct syscall_metadata *syscall_nr_to_meta(int nr);
++extern void start_ftrace_syscalls(void);
++extern void stop_ftrace_syscalls(void);
++extern void ftrace_syscall_enter(struct pt_regs *regs);
++extern void ftrace_syscall_exit(struct pt_regs *regs);
++#else
++static inline void start_ftrace_syscalls(void) { }
++static inline void stop_ftrace_syscalls(void) { }
++static inline void ftrace_syscall_enter(struct pt_regs *regs) { }
++static inline void ftrace_syscall_exit(struct pt_regs *regs) { }
++#endif
++
+ #endif /* _LINUX_FTRACE_H */
+Index: linux-2.6-tip/include/linux/ftrace_irq.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/ftrace_irq.h
++++ linux-2.6-tip/include/linux/ftrace_irq.h
+@@ -2,7 +2,7 @@
+ #define _LINUX_FTRACE_IRQ_H
+ 
+ 
+-#if defined(CONFIG_DYNAMIC_FTRACE) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
++#ifdef CONFIG_FTRACE_NMI_ENTER
+ extern void ftrace_nmi_enter(void);
+ extern void ftrace_nmi_exit(void);
+ #else
+Index: linux-2.6-tip/include/linux/futex.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/futex.h
++++ linux-2.6-tip/include/linux/futex.h
+@@ -23,6 +23,8 @@ union ktime;
+ #define FUTEX_TRYLOCK_PI	8
+ #define FUTEX_WAIT_BITSET	9
+ #define FUTEX_WAKE_BITSET	10
++#define FUTEX_WAIT_REQUEUE_PI	11
++#define FUTEX_CMP_REQUEUE_PI	12
+ 
+ #define FUTEX_PRIVATE_FLAG	128
+ #define FUTEX_CLOCK_REALTIME	256
+@@ -38,6 +40,10 @@ union ktime;
+ #define FUTEX_TRYLOCK_PI_PRIVATE (FUTEX_TRYLOCK_PI | FUTEX_PRIVATE_FLAG)
+ #define FUTEX_WAIT_BITSET_PRIVATE	(FUTEX_WAIT_BITS | FUTEX_PRIVATE_FLAG)
+ #define FUTEX_WAKE_BITSET_PRIVATE	(FUTEX_WAKE_BITS | FUTEX_PRIVATE_FLAG)
++#define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
++					 FUTEX_PRIVATE_FLAG)
++#define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
++					 FUTEX_PRIVATE_FLAG)
+ 
+ /*
+  * Support for robust futexes: the kernel cleans up held futexes at
+Index: linux-2.6-tip/include/linux/gfp.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/gfp.h
++++ linux-2.6-tip/include/linux/gfp.h
+@@ -4,6 +4,7 @@
+ #include <linux/mmzone.h>
+ #include <linux/stddef.h>
+ #include <linux/linkage.h>
++#include <linux/topology.h>
+ 
+ struct vm_area_struct;
+ 
+@@ -51,7 +52,13 @@ struct vm_area_struct;
+ #define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
+ #define __GFP_MOVABLE	((__force gfp_t)0x100000u)  /* Page is movable */
+ 
+-#define __GFP_BITS_SHIFT 21	/* Room for 21 __GFP_FOO bits */
++#ifdef CONFIG_KMEMCHECK
++#define __GFP_NOTRACK	((__force gfp_t)0x200000u)  /* Don't track with kmemcheck */
++#else
++#define __GFP_NOTRACK	((__force gfp_t)0)
++#endif
++
++#define __GFP_BITS_SHIFT 22	/* Room for 22 __GFP_FOO bits */
+ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+ 
+ /* This equals 0, but use constants in case they ever change */
+Index: linux-2.6-tip/include/linux/hardirq.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/hardirq.h
++++ linux-2.6-tip/include/linux/hardirq.h
+@@ -15,71 +15,74 @@
+  * - bits 0-7 are the preemption count (max preemption depth: 256)
+  * - bits 8-15 are the softirq count (max # of softirqs: 256)
+  *
+- * The hardirq count can be overridden per architecture, the default is:
++ * The hardirq count can in theory reach the same as NR_IRQS.
++ * In reality, the number of nested IRQS is limited to the stack
++ * size as well. For archs with over 1000 IRQS it is not practical
++ * to expect that they will all nest. We give a max of 10 bits for
++ * hardirq nesting. An arch may choose to give less than 10 bits.
++ * m68k expects it to be 8.
+  *
+- * - bits 16-27 are the hardirq count (max # of hardirqs: 4096)
+- * - ( bit 28 is the PREEMPT_ACTIVE flag. )
++ * - bits 16-25 are the hardirq count (max # of nested hardirqs: 1024)
++ * - bit 26 is the NMI_MASK
++ * - bit 28 is the PREEMPT_ACTIVE flag
+  *
+  * PREEMPT_MASK: 0x000000ff
+  * SOFTIRQ_MASK: 0x0000ff00
+- * HARDIRQ_MASK: 0x0fff0000
++ * HARDIRQ_MASK: 0x03ff0000
++ *     NMI_MASK: 0x04000000
+  */
+ #define PREEMPT_BITS	8
+ #define SOFTIRQ_BITS	8
++#define NMI_BITS	1
+ 
+-#ifndef HARDIRQ_BITS
+-#define HARDIRQ_BITS	12
++#define MAX_HARDIRQ_BITS 10
+ 
+-#ifndef MAX_HARDIRQS_PER_CPU
+-#define MAX_HARDIRQS_PER_CPU NR_IRQS
++#ifndef HARDIRQ_BITS
++# define HARDIRQ_BITS	MAX_HARDIRQ_BITS
+ #endif
+ 
+-/*
+- * The hardirq mask has to be large enough to have space for potentially
+- * all IRQ sources in the system nesting on a single CPU.
+- */
+-#if (1 << HARDIRQ_BITS) < MAX_HARDIRQS_PER_CPU
+-# error HARDIRQ_BITS is too low!
+-#endif
++#if HARDIRQ_BITS > MAX_HARDIRQ_BITS
++#error HARDIRQ_BITS too high!
+ #endif
+ 
+ #define PREEMPT_SHIFT	0
+ #define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
+ #define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
++#define NMI_SHIFT	(HARDIRQ_SHIFT + HARDIRQ_BITS)
+ 
+ #define __IRQ_MASK(x)	((1UL << (x))-1)
+ 
+ #define PREEMPT_MASK	(__IRQ_MASK(PREEMPT_BITS) << PREEMPT_SHIFT)
+ #define SOFTIRQ_MASK	(__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+ #define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
++#define NMI_MASK	(__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
+ 
+ #define PREEMPT_OFFSET	(1UL << PREEMPT_SHIFT)
+ #define SOFTIRQ_OFFSET	(1UL << SOFTIRQ_SHIFT)
+ #define HARDIRQ_OFFSET	(1UL << HARDIRQ_SHIFT)
++#define NMI_OFFSET	(1UL << NMI_SHIFT)
+ 
+-#if PREEMPT_ACTIVE < (1 << (HARDIRQ_SHIFT + HARDIRQ_BITS))
++#if PREEMPT_ACTIVE < (1 << (NMI_SHIFT + NMI_BITS))
+ #error PREEMPT_ACTIVE is too low!
+ #endif
+ 
+ #define hardirq_count()	(preempt_count() & HARDIRQ_MASK)
+ #define softirq_count()	(preempt_count() & SOFTIRQ_MASK)
+-#define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK))
++#define irq_count()	(preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \
++				 | NMI_MASK))
+ 
+ /*
+  * Are we doing bottom half or hardware interrupt processing?
+  * Are we in a softirq context? Interrupt context?
+  */
+-#define in_irq()		(hardirq_count())
+-#define in_softirq()		(softirq_count())
+-#define in_interrupt()		(irq_count())
+-
+-#if defined(CONFIG_PREEMPT)
+-# define PREEMPT_INATOMIC_BASE kernel_locked()
+-# define PREEMPT_CHECK_OFFSET 1
+-#else
+-# define PREEMPT_INATOMIC_BASE 0
+-# define PREEMPT_CHECK_OFFSET 0
+-#endif
++#define in_irq()	(hardirq_count() || (current->flags & PF_HARDIRQ))
++#define in_softirq()	(softirq_count() || (current->flags & PF_SOFTIRQ))
++#define in_interrupt()	(irq_count())
++
++/*
++ * Are we in NMI context?
++ */
++#define in_nmi()	(preempt_count() & NMI_MASK)
+ 
+ /*
+  * Are we running in atomic context?  WARNING: this macro cannot
+@@ -88,14 +91,7 @@
+  * used in the general case to determine whether sleeping is possible.
+  * Do not use in_atomic() in driver code.
+  */
+-#define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE)
+-
+-/*
+- * Check whether we were atomic before we did preempt_disable():
+- * (used by the scheduler, *after* releasing the kernel lock)
+- */
+-#define in_atomic_preempt_off() \
+-		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
++#define in_atomic()		((preempt_count() & ~PREEMPT_ACTIVE) != 0)
+ 
+ #ifdef CONFIG_PREEMPT
+ # define preemptible()	(preempt_count() == 0 && !irqs_disabled())
+@@ -164,20 +160,24 @@ extern void irq_enter(void);
+  */
+ extern void irq_exit(void);
+ 
+-#define nmi_enter()				\
+-	do {					\
+-		ftrace_nmi_enter();		\
+-		lockdep_off();			\
+-		rcu_nmi_enter();		\
+-		__irq_enter();			\
++#define nmi_enter()						\
++	do {							\
++		ftrace_nmi_enter();				\
++		BUG_ON(in_nmi());				\
++		add_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
++		lockdep_off();					\
++		rcu_nmi_enter();				\
++		trace_hardirq_enter();				\
+ 	} while (0)
+ 
+-#define nmi_exit()				\
+-	do {					\
+-		__irq_exit();			\
+-		rcu_nmi_exit();			\
+-		lockdep_on();			\
+-		ftrace_nmi_exit();		\
++#define nmi_exit()						\
++	do {							\
++		trace_hardirq_exit();				\
++		rcu_nmi_exit();					\
++		lockdep_on();					\
++		BUG_ON(!in_nmi());				\
++		sub_preempt_count(NMI_OFFSET + HARDIRQ_OFFSET);	\
++		ftrace_nmi_exit();				\
+ 	} while (0)
+ 
+ #endif /* LINUX_HARDIRQ_H */
+Index: linux-2.6-tip/include/linux/if_arcnet.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/if_arcnet.h
++++ linux-2.6-tip/include/linux/if_arcnet.h
+@@ -16,6 +16,7 @@
+ #ifndef _LINUX_IF_ARCNET_H
+ #define _LINUX_IF_ARCNET_H
+ 
++#include <linux/types.h>
+ #include <linux/if_ether.h>
+ 
+ 
+@@ -57,10 +58,10 @@
+  */
+ struct arc_rfc1201
+ {
+-    uint8_t  proto;		/* protocol ID field - varies		*/
+-    uint8_t  split_flag;	/* for use with split packets		*/
++    __u8  proto;		/* protocol ID field - varies		*/
++    __u8  split_flag;	/* for use with split packets		*/
+     __be16   sequence;		/* sequence number			*/
+-    uint8_t  payload[0];	/* space remaining in packet (504 bytes)*/
++    __u8  payload[0];	/* space remaining in packet (504 bytes)*/
+ };
+ #define RFC1201_HDR_SIZE 4
+ 
+@@ -70,8 +71,8 @@ struct arc_rfc1201
+  */
+ struct arc_rfc1051
+ {
+-    uint8_t proto;		/* ARC_P_RFC1051_ARP/RFC1051_IP	*/
+-    uint8_t payload[0];		/* 507 bytes			*/
++    __u8 proto;		/* ARC_P_RFC1051_ARP/RFC1051_IP	*/
++    __u8 payload[0];		/* 507 bytes			*/
+ };
+ #define RFC1051_HDR_SIZE 1
+ 
+@@ -82,20 +83,20 @@ struct arc_rfc1051
+  */
+ struct arc_eth_encap
+ {
+-    uint8_t proto;		/* Always ARC_P_ETHER			*/
++    __u8 proto;		/* Always ARC_P_ETHER			*/
+     struct ethhdr eth;		/* standard ethernet header (yuck!)	*/
+-    uint8_t payload[0];		/* 493 bytes				*/
++    __u8 payload[0];		/* 493 bytes				*/
+ };
+ #define ETH_ENCAP_HDR_SIZE 14
+ 
+ 
+ struct arc_cap
+ {
+-	uint8_t proto;
+-	uint8_t cookie[sizeof(int)];   /* Actually NOT sent over the network */
++	__u8 proto;
++	__u8 cookie[sizeof(int)];   /* Actually NOT sent over the network */
+ 	union {
+-		uint8_t ack;
+-		uint8_t raw[0];		/* 507 bytes */
++		__u8 ack;
++		__u8 raw[0];		/* 507 bytes */
+ 	} mes;
+ };
+ 
+@@ -109,7 +110,7 @@ struct arc_cap
+  */
+ struct arc_hardware
+ {
+-    uint8_t  source,		/* source ARCnet - filled in automagically */
++    __u8  source,		/* source ARCnet - filled in automagically */
+              dest,		/* destination ARCnet - 0 for broadcast    */
+     	     offset[2];		/* offset bytes (some weird semantics)     */
+ };
+@@ -130,7 +131,7 @@ struct archdr
+ 	struct arc_rfc1051   rfc1051;
+ 	struct arc_eth_encap eth_encap;
+ 	struct arc_cap       cap;
+-	uint8_t raw[0];		/* 508 bytes				*/
++	__u8 raw[0];		/* 508 bytes				*/
+     } soft;
+ };
+ 
+Index: linux-2.6-tip/include/linux/if_pppol2tp.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/if_pppol2tp.h
++++ linux-2.6-tip/include/linux/if_pppol2tp.h
+@@ -26,7 +26,7 @@
+  */
+ struct pppol2tp_addr
+ {
+-	pid_t	pid;			/* pid that owns the fd.
++	__kernel_pid_t	pid;		/* pid that owns the fd.
+ 					 * 0 => current */
+ 	int	fd;			/* FD of UDP socket to use */
+ 
+Index: linux-2.6-tip/include/linux/in6.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/in6.h
++++ linux-2.6-tip/include/linux/in6.h
+@@ -44,11 +44,11 @@ struct in6_addr
+  * NOTE: Be aware the IN6ADDR_* constants and in6addr_* externals are defined
+  * in network byte order, not in host byte order as are the IPv4 equivalents
+  */
++#ifdef __KERNEL__
+ extern const struct in6_addr in6addr_any;
+ #define IN6ADDR_ANY_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } } }
+ extern const struct in6_addr in6addr_loopback;
+ #define IN6ADDR_LOOPBACK_INIT { { { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+-#ifdef __KERNEL__
+ extern const struct in6_addr in6addr_linklocal_allnodes;
+ #define IN6ADDR_LINKLOCAL_ALLNODES_INIT	\
+ 		{ { { 0xff,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+Index: linux-2.6-tip/include/linux/init.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/init.h
++++ linux-2.6-tip/include/linux/init.h
+@@ -313,16 +313,20 @@ void __init parse_early_param(void);
+ #define __initdata_or_module __initdata
+ #endif /*CONFIG_MODULES*/
+ 
+-/* Functions marked as __devexit may be discarded at kernel link time, depending
+-   on config options.  Newer versions of binutils detect references from
+-   retained sections to discarded sections and flag an error.  Pointers to
+-   __devexit functions must use __devexit_p(function_name), the wrapper will
+-   insert either the function_name or NULL, depending on the config options.
++/*
++ * Functions marked as __devexit may be discarded at kernel link time,
++ * depending on config options.  Newer versions of binutils detect
++ * references from retained sections to discarded sections and flag an
++ * error.
++ *
++ * Pointers to __devexit functions must use __devexit_p(function_name),
++ * the wrapper will insert either the function_name or NULL, depending on
++ * the config options.
+  */
+ #if defined(MODULE) || defined(CONFIG_HOTPLUG)
+-#define __devexit_p(x) x
++# define __devexit_p(x) x
+ #else
+-#define __devexit_p(x) NULL
++# define __devexit_p(x) ((void *)((long)(x) & 0) /* NULL */)
+ #endif
+ 
+ #ifdef MODULE
+Index: linux-2.6-tip/include/linux/init_task.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/init_task.h
++++ linux-2.6-tip/include/linux/init_task.h
+@@ -10,6 +10,7 @@
+ #include <linux/user_namespace.h>
+ #include <linux/securebits.h>
+ #include <net/net_namespace.h>
++#include <linux/spinlock.h>
+ 
+ extern struct files_struct init_files;
+ extern struct fs_struct init_fs;
+@@ -51,7 +52,7 @@ extern struct fs_struct init_fs;
+ 	.cputimer	= { 						\
+ 		.cputime = INIT_CPUTIME,				\
+ 		.running = 0,						\
+-		.lock = __SPIN_LOCK_UNLOCKED(sig.cputimer.lock),	\
++		.lock = RAW_SPIN_LOCK_UNLOCKED(sig.cputimer.lock),	\
+ 	},								\
+ }
+ 
+@@ -120,6 +121,18 @@ extern struct group_info init_groups;
+ 
+ extern struct cred init_cred;
+ 
++#ifdef CONFIG_PERF_COUNTERS
++# define INIT_PERF_COUNTERS(tsk)					\
++	.perf_counter_ctx.counter_list =				\
++		LIST_HEAD_INIT(tsk.perf_counter_ctx.counter_list),	\
++	.perf_counter_ctx.event_list =					\
++		LIST_HEAD_INIT(tsk.perf_counter_ctx.event_list),	\
++	.perf_counter_ctx.lock =					\
++		RAW_SPIN_LOCK_UNLOCKED(tsk.perf_counter_ctx.lock),
++#else
++# define INIT_PERF_COUNTERS(tsk)
++#endif
++
+ /*
+  *  INIT_TASK is used to set up the first task table, touch at
+  * your own risk!. Base=0, limit=0x1fffff (=2MB)
+@@ -147,6 +160,7 @@ extern struct cred init_cred;
+ 		.nr_cpus_allowed = NR_CPUS,				\
+ 	},								\
+ 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
++	.pushable_tasks = PLIST_NODE_INIT(tsk.pushable_tasks, MAX_PRIO), \
+ 	.ptraced	= LIST_HEAD_INIT(tsk.ptraced),			\
+ 	.ptrace_entry	= LIST_HEAD_INIT(tsk.ptrace_entry),		\
+ 	.real_parent	= &tsk,						\
+@@ -173,8 +187,9 @@ extern struct cred init_cred;
+ 	.journal_info	= NULL,						\
+ 	.cpu_timers	= INIT_CPU_TIMERS(tsk.cpu_timers),		\
+ 	.fs_excl	= ATOMIC_INIT(0),				\
+-	.pi_lock	= __SPIN_LOCK_UNLOCKED(tsk.pi_lock),		\
+ 	.timer_slack_ns = 50000, /* 50 usec default slack */		\
++	.posix_timer_list = NULL,					\
++	.pi_lock	= RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock),		\
+ 	.pids = {							\
+ 		[PIDTYPE_PID]  = INIT_PID_LINK(PIDTYPE_PID),		\
+ 		[PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID),		\
+@@ -184,6 +199,7 @@ extern struct cred init_cred;
+ 	INIT_IDS							\
+ 	INIT_TRACE_IRQFLAGS						\
+ 	INIT_LOCKDEP							\
++	INIT_PERF_COUNTERS(tsk)						\
+ }
+ 
+ 
+Index: linux-2.6-tip/include/linux/intel-iommu.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/intel-iommu.h
++++ linux-2.6-tip/include/linux/intel-iommu.h
+@@ -292,6 +292,8 @@ struct intel_iommu {
+ 	spinlock_t	register_lock; /* protect register handling */
+ 	int		seq_id;	/* sequence id of the iommu */
+ 	int		agaw; /* agaw of this iommu */
++	unsigned int 	irq;
++	unsigned char 	name[13];    /* Device Name */
+ 
+ #ifdef CONFIG_DMAR
+ 	unsigned long 	*domain_ids; /* bitmap of domains */
+@@ -299,8 +301,6 @@ struct intel_iommu {
+ 	spinlock_t	lock; /* protect context, domain ids */
+ 	struct root_entry *root_entry; /* virtual address */
+ 
+-	unsigned int irq;
+-	unsigned char name[7];    /* Device Name */
+ 	struct iommu_flush flush;
+ #endif
+ 	struct q_inval  *qi;            /* Queued invalidation info */
+@@ -321,6 +321,7 @@ extern struct dmar_drhd_unit * dmar_find
+ extern int alloc_iommu(struct dmar_drhd_unit *drhd);
+ extern void free_iommu(struct intel_iommu *iommu);
+ extern int dmar_enable_qi(struct intel_iommu *iommu);
++extern void dmar_disable_qi(struct intel_iommu *iommu);
+ extern void qi_global_iec(struct intel_iommu *iommu);
+ 
+ extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,
+@@ -331,11 +332,4 @@ extern int qi_flush_iotlb(struct intel_i
+ 
+ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
+ 
+-extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t);
+-extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t);
+-extern dma_addr_t intel_map_single(struct device *, phys_addr_t, size_t, int);
+-extern void intel_unmap_single(struct device *, dma_addr_t, size_t, int);
+-extern int intel_map_sg(struct device *, struct scatterlist *, int, int);
+-extern void intel_unmap_sg(struct device *, struct scatterlist *, int, int);
+-
+ #endif
+Index: linux-2.6-tip/include/linux/interrupt.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/interrupt.h
++++ linux-2.6-tip/include/linux/interrupt.h
+@@ -54,13 +54,26 @@
+ #define IRQF_SAMPLE_RANDOM	0x00000040
+ #define IRQF_SHARED		0x00000080
+ #define IRQF_PROBE_SHARED	0x00000100
+-#define IRQF_TIMER		0x00000200
++#define __IRQF_TIMER		0x00000200
+ #define IRQF_PERCPU		0x00000400
+ #define IRQF_NOBALANCING	0x00000800
+ #define IRQF_IRQPOLL		0x00001000
++#define IRQF_NODELAY		0x00002000
++#define IRQF_TIMER		(__IRQF_TIMER | IRQF_NODELAY)
+ 
+ typedef irqreturn_t (*irq_handler_t)(int, void *);
+ 
++/**
++ * struct irqaction - per interrupt action descriptor
++ * @handler:	interrupt handler function
++ * @flags:	flags (see IRQF_* above)
++ * @mask:	no comment as it is useless and about to be removed
++ * @name:	name of the device
++ * @dev_id:	cookie to identify the device
++ * @next:	pointer to the next irqaction for shared interrupts
++ * @irq:	interrupt number
++ * @dir:	pointer to the proc/irq/NN/name entry
++ */
+ struct irqaction {
+ 	irq_handler_t handler;
+ 	unsigned long flags;
+@@ -69,19 +82,23 @@ struct irqaction {
+ 	void *dev_id;
+ 	struct irqaction *next;
+ 	int irq;
+-	struct proc_dir_entry *dir;
++	struct proc_dir_entry *dir, *threaded;
+ };
+ 
+ extern irqreturn_t no_action(int cpl, void *dev_id);
+-extern int __must_check request_irq(unsigned int, irq_handler_t handler,
+-		       unsigned long, const char *, void *);
++
++extern int __must_check
++request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
++	    const char *name, void *dev);
++
+ extern void free_irq(unsigned int, void *);
+ 
+ struct device;
+ 
+-extern int __must_check devm_request_irq(struct device *dev, unsigned int irq,
+-			    irq_handler_t handler, unsigned long irqflags,
+-			    const char *devname, void *dev_id);
++extern int __must_check
++devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler,
++		 unsigned long irqflags, const char *devname, void *dev_id);
++
+ extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
+ 
+ /*
+@@ -99,7 +116,7 @@ extern void devm_free_irq(struct device 
+ #ifdef CONFIG_LOCKDEP
+ # define local_irq_enable_in_hardirq()	do { } while (0)
+ #else
+-# define local_irq_enable_in_hardirq()	local_irq_enable()
++# define local_irq_enable_in_hardirq()	local_irq_enable_nort()
+ #endif
+ 
+ extern void disable_irq_nosync(unsigned int irq);
+@@ -224,6 +241,7 @@ static inline int disable_irq_wake(unsig
+ 
+ #ifndef __ARCH_SET_SOFTIRQ_PENDING
+ #define set_softirq_pending(x) (local_softirq_pending() = (x))
++// FIXME: PREEMPT_RT: set_bit()?
+ #define or_softirq_pending(x)  (local_softirq_pending() |= (x))
+ #endif
+ 
+@@ -254,10 +272,17 @@ enum
+ 	SCHED_SOFTIRQ,
+ 	HRTIMER_SOFTIRQ,
+ 	RCU_SOFTIRQ,	/* Preferable RCU should always be the last softirq */
++	/* Entries after this are ignored in split softirq mode */
++	MAX_SOFTIRQ,
+ 
+ 	NR_SOFTIRQS
+ };
+ 
++/* map softirq index to softirq name. update 'softirq_to_name' in
++ * kernel/softirq.c when adding a new softirq.
++ */
++extern char *softirq_to_name[NR_SOFTIRQS];
++
+ /* softirq mask and active fields moved to irq_cpustat_t in
+  * asm/hardirq.h to get better cache usage.  KAO
+  */
+@@ -267,14 +292,21 @@ struct softirq_action
+ 	void	(*action)(struct softirq_action *);
+ };
+ 
++#ifdef CONFIG_PREEMPT_HARDIRQS
++# define __raise_softirq_irqoff(nr) raise_softirq_irqoff(nr)
++# define __do_raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
++#else
++# define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
++# define __do_raise_softirq_irqoff(nr) __raise_softirq_irqoff(nr)
++#endif
++
+ asmlinkage void do_softirq(void);
+ asmlinkage void __do_softirq(void);
+ extern void open_softirq(int nr, void (*action)(struct softirq_action *));
+ extern void softirq_init(void);
+-#define __raise_softirq_irqoff(nr) do { or_softirq_pending(1UL << (nr)); } while (0)
+ extern void raise_softirq_irqoff(unsigned int nr);
+ extern void raise_softirq(unsigned int nr);
+-extern void wakeup_softirqd(void);
++extern void softirq_check_pending_idle(void);
+ 
+ /* This is the worklist that queues up per-cpu softirq work.
+  *
+@@ -284,6 +316,11 @@ extern void wakeup_softirqd(void);
+  * only be accessed by the local cpu that they are for.
+  */
+ DECLARE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++extern void wait_for_softirq(int softirq);
++#else
++# define wait_for_softirq(x) do {} while(0)
++#endif
+ 
+ /* Try to send a softirq to a remote cpu.  If this cannot be done, the
+  * work will be queued to the local cpu.
+@@ -309,8 +346,9 @@ extern void __send_remote_softirq(struct
+      to be executed on some cpu at least once after this.
+    * If the tasklet is already scheduled, but its excecution is still not
+      started, it will be executed only once.
+-   * If this tasklet is already running on another CPU (or schedule is called
+-     from tasklet itself), it is rescheduled for later.
++   * If this tasklet is already running on another CPU, it is rescheduled
++     for later.
++   * Schedule must not be called from the tasklet itself (a lockup occurs)
+    * Tasklet is strictly serialized wrt itself, but not
+      wrt another tasklets. If client needs some intertask synchronization,
+      he makes it with spinlocks.
+@@ -335,27 +373,36 @@ struct tasklet_struct name = { NULL, 0, 
+ enum
+ {
+ 	TASKLET_STATE_SCHED,	/* Tasklet is scheduled for execution */
+-	TASKLET_STATE_RUN	/* Tasklet is running (SMP only) */
++	TASKLET_STATE_RUN,	/* Tasklet is running (SMP only) */
++	TASKLET_STATE_PENDING	/* Tasklet is pending */
+ };
+ 
+-#ifdef CONFIG_SMP
++#define TASKLET_STATEF_SCHED	(1 << TASKLET_STATE_SCHED)
++#define TASKLET_STATEF_RUN	(1 << TASKLET_STATE_RUN)
++#define TASKLET_STATEF_PENDING	(1 << TASKLET_STATE_PENDING)
++
++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
+ static inline int tasklet_trylock(struct tasklet_struct *t)
+ {
+ 	return !test_and_set_bit(TASKLET_STATE_RUN, &(t)->state);
+ }
+ 
++static inline int tasklet_tryunlock(struct tasklet_struct *t)
++{
++	return cmpxchg(&t->state, TASKLET_STATEF_RUN, 0) == TASKLET_STATEF_RUN;
++}
++
+ static inline void tasklet_unlock(struct tasklet_struct *t)
+ {
+ 	smp_mb__before_clear_bit(); 
+ 	clear_bit(TASKLET_STATE_RUN, &(t)->state);
+ }
+ 
+-static inline void tasklet_unlock_wait(struct tasklet_struct *t)
+-{
+-	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); }
+-}
++extern void tasklet_unlock_wait(struct tasklet_struct *t);
++
+ #else
+ #define tasklet_trylock(t) 1
++#define tasklet_tryunlock(t)	1
+ #define tasklet_unlock_wait(t) do { } while (0)
+ #define tasklet_unlock(t) do { } while (0)
+ #endif
+@@ -376,6 +423,20 @@ static inline void tasklet_hi_schedule(s
+ 		__tasklet_hi_schedule(t);
+ }
+ 
++extern void __tasklet_hi_schedule_first(struct tasklet_struct *t);
++
++/*
++ * This version avoids touching any other tasklets. Needed for kmemcheck
++ * in order not to take any page faults while enqueueing this tasklet;
++ * consider VERY carefully whether you really need this or
++ * tasklet_hi_schedule()...
++ */
++static inline void tasklet_hi_schedule_first(struct tasklet_struct *t)
++{
++	if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state))
++		__tasklet_hi_schedule_first(t);
++}
++
+ 
+ static inline void tasklet_disable_nosync(struct tasklet_struct *t)
+ {
+@@ -390,22 +451,14 @@ static inline void tasklet_disable(struc
+ 	smp_mb();
+ }
+ 
+-static inline void tasklet_enable(struct tasklet_struct *t)
+-{
+-	smp_mb__before_atomic_dec();
+-	atomic_dec(&t->count);
+-}
+-
+-static inline void tasklet_hi_enable(struct tasklet_struct *t)
+-{
+-	smp_mb__before_atomic_dec();
+-	atomic_dec(&t->count);
+-}
++extern  void tasklet_enable(struct tasklet_struct *t);
++extern  void tasklet_hi_enable(struct tasklet_struct *t);
+ 
+ extern void tasklet_kill(struct tasklet_struct *t);
+ extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
+ extern void tasklet_init(struct tasklet_struct *t,
+ 			 void (*func)(unsigned long), unsigned long data);
++void takeover_tasklets(unsigned int cpu);
+ 
+ /*
+  * Autoprobing for irqs:
+@@ -463,12 +516,52 @@ static inline void init_irq_proc(void)
+ }
+ #endif
+ 
++#if defined(CONFIG_GENERIC_HARDIRQS) && defined(CONFIG_DEBUG_SHIRQ)
++extern void debug_poll_all_shared_irqs(void);
++#else
++static inline void debug_poll_all_shared_irqs(void) { }
++#endif
++
+ int show_interrupts(struct seq_file *p, void *v);
+ 
+ struct irq_desc;
+ 
+ extern int early_irq_init(void);
++extern int arch_probe_nr_irqs(void);
+ extern int arch_early_irq_init(void);
+ extern int arch_init_chip_data(struct irq_desc *desc, int cpu);
+ 
++#ifdef CONFIG_PREEMPT_RT
++# define local_irq_disable_nort()	do { } while (0)
++# define local_irq_enable_nort()	do { } while (0)
++# define local_irq_enable_rt()		local_irq_enable()
++# define local_irq_save_nort(flags)	do { local_save_flags(flags); } while (0)
++# define local_irq_restore_nort(flags)	do { (void)(flags); } while (0)
++# define spin_lock_nort(lock)		do { } while (0)
++# define spin_unlock_nort(lock)		do { } while (0)
++# define spin_lock_bh_nort(lock)	do { } while (0)
++# define spin_unlock_bh_nort(lock)	do { } while (0)
++# define spin_lock_rt(lock)		spin_lock(lock)
++# define spin_unlock_rt(lock)		spin_unlock(lock)
++# define smp_processor_id_rt(cpu)	(cpu)
++# define in_atomic_rt()			(!oops_in_progress && \
++					  (in_atomic() || irqs_disabled()))
++# define read_trylock_rt(lock)		({read_lock(lock); 1; })
++#else
++# define local_irq_disable_nort()	local_irq_disable()
++# define local_irq_enable_nort()	local_irq_enable()
++# define local_irq_enable_rt()		do { } while (0)
++# define local_irq_save_nort(flags)	local_irq_save(flags)
++# define local_irq_restore_nort(flags)	local_irq_restore(flags)
++# define spin_lock_rt(lock)		do { } while (0)
++# define spin_unlock_rt(lock)		do { } while (0)
++# define spin_lock_nort(lock)		spin_lock(lock)
++# define spin_unlock_nort(lock)		spin_unlock(lock)
++# define spin_lock_bh_nort(lock)	spin_lock_bh(lock)
++# define spin_unlock_bh_nort(lock)	spin_unlock_bh(lock)
++# define smp_processor_id_rt(cpu)	smp_processor_id()
++# define in_atomic_rt()			0
++# define read_trylock_rt(lock)		read_trylock(lock)
++#endif
++
+ #endif
+Index: linux-2.6-tip/include/linux/ip_vs.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/ip_vs.h
++++ linux-2.6-tip/include/linux/ip_vs.h
+@@ -96,10 +96,10 @@
+  */
+ struct ip_vs_service_user {
+ 	/* virtual service addresses */
+-	u_int16_t		protocol;
++	__u16		protocol;
+ 	__be32			addr;		/* virtual ip address */
+ 	__be16			port;
+-	u_int32_t		fwmark;		/* firwall mark of service */
++	__u32		fwmark;		/* firwall mark of service */
+ 
+ 	/* virtual service options */
+ 	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
+@@ -119,8 +119,8 @@ struct ip_vs_dest_user {
+ 	int			weight;		/* destination weight */
+ 
+ 	/* thresholds for active connections */
+-	u_int32_t		u_threshold;	/* upper threshold */
+-	u_int32_t		l_threshold;	/* lower threshold */
++	__u32		u_threshold;	/* upper threshold */
++	__u32		l_threshold;	/* lower threshold */
+ };
+ 
+ 
+@@ -159,10 +159,10 @@ struct ip_vs_getinfo {
+ /* The argument to IP_VS_SO_GET_SERVICE */
+ struct ip_vs_service_entry {
+ 	/* which service: user fills in these */
+-	u_int16_t		protocol;
++	__u16		protocol;
+ 	__be32			addr;		/* virtual address */
+ 	__be16			port;
+-	u_int32_t		fwmark;		/* firwall mark of service */
++	__u32		fwmark;		/* firwall mark of service */
+ 
+ 	/* service options */
+ 	char			sched_name[IP_VS_SCHEDNAME_MAXLEN];
+@@ -184,12 +184,12 @@ struct ip_vs_dest_entry {
+ 	unsigned		conn_flags;	/* connection flags */
+ 	int			weight;		/* destination weight */
+ 
+-	u_int32_t		u_threshold;	/* upper threshold */
+-	u_int32_t		l_threshold;	/* lower threshold */
++	__u32		u_threshold;	/* upper threshold */
++	__u32		l_threshold;	/* lower threshold */
+ 
+-	u_int32_t		activeconns;	/* active connections */
+-	u_int32_t		inactconns;	/* inactive connections */
+-	u_int32_t		persistconns;	/* persistent connections */
++	__u32		activeconns;	/* active connections */
++	__u32		inactconns;	/* inactive connections */
++	__u32		persistconns;	/* persistent connections */
+ 
+ 	/* statistics */
+ 	struct ip_vs_stats_user stats;
+@@ -199,10 +199,10 @@ struct ip_vs_dest_entry {
+ /* The argument to IP_VS_SO_GET_DESTS */
+ struct ip_vs_get_dests {
+ 	/* which service: user fills in these */
+-	u_int16_t		protocol;
++	__u16		protocol;
+ 	__be32			addr;		/* virtual address */
+ 	__be16			port;
+-	u_int32_t		fwmark;		/* firwall mark of service */
++	__u32		fwmark;		/* firwall mark of service */
+ 
+ 	/* number of real servers */
+ 	unsigned int		num_dests;
+Index: linux-2.6-tip/include/linux/irq.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/irq.h
++++ linux-2.6-tip/include/linux/irq.h
+@@ -20,10 +20,12 @@
+ #include <linux/irqreturn.h>
+ #include <linux/irqnr.h>
+ #include <linux/errno.h>
++#include <linux/wait.h>
+ 
+ #include <asm/irq.h>
+ #include <asm/ptrace.h>
+ #include <asm/irq_regs.h>
++#include <asm/timex.h>
+ 
+ struct irq_desc;
+ typedef	void (*irq_flow_handler_t)(unsigned int irq,
+@@ -65,6 +67,7 @@ typedef	void (*irq_flow_handler_t)(unsig
+ #define IRQ_SPURIOUS_DISABLED	0x00800000	/* IRQ was disabled by the spurious trap */
+ #define IRQ_MOVE_PCNTXT		0x01000000	/* IRQ migration from process context */
+ #define IRQ_AFFINITY_SET	0x02000000	/* IRQ affinity was set from userspace*/
++#define IRQ_NODELAY		0x40000000	/* IRQ must run immediately */
+ 
+ #ifdef CONFIG_IRQ_PER_CPU
+ # define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
+@@ -151,6 +154,8 @@ struct irq_2_iommu;
+  * @irq_count:		stats field to detect stalled irqs
+  * @last_unhandled:	aging timer for unhandled count
+  * @irqs_unhandled:	stats field for spurious unhandled interrupts
++ * @thread:		Thread pointer for threaded preemptible irq handling
++ * @wait_for_handler:	Waitqueue to wait for a running preemptible handler
+  * @lock:		locking for SMP
+  * @affinity:		IRQ affinity on SMP
+  * @cpu:		cpu index useful for balancing
+@@ -160,12 +165,10 @@ struct irq_2_iommu;
+  */
+ struct irq_desc {
+ 	unsigned int		irq;
+-#ifdef CONFIG_SPARSE_IRQ
+ 	struct timer_rand_state *timer_rand_state;
+ 	unsigned int            *kstat_irqs;
+-# ifdef CONFIG_INTR_REMAP
++#ifdef CONFIG_INTR_REMAP
+ 	struct irq_2_iommu      *irq_2_iommu;
+-# endif
+ #endif
+ 	irq_flow_handler_t	handle_irq;
+ 	struct irq_chip		*chip;
+@@ -180,13 +183,16 @@ struct irq_desc {
+ 	unsigned int		irq_count;	/* For detecting broken IRQs */
+ 	unsigned long		last_unhandled;	/* Aging timer for unhandled count */
+ 	unsigned int		irqs_unhandled;
+-	spinlock_t		lock;
++	struct task_struct	*thread;
++	wait_queue_head_t	wait_for_handler;
++	cycles_t		timestamp;
++	raw_spinlock_t		lock;
+ #ifdef CONFIG_SMP
+-	cpumask_t		affinity;
++	cpumask_var_t		affinity;
+ 	unsigned int		cpu;
+-#endif
+ #ifdef CONFIG_GENERIC_PENDING_IRQ
+-	cpumask_t		pending_mask;
++	cpumask_var_t		pending_mask;
++#endif
+ #endif
+ #ifdef CONFIG_PROC_FS
+ 	struct proc_dir_entry	*dir;
+@@ -202,12 +208,6 @@ extern void arch_free_chip_data(struct i
+ extern struct irq_desc irq_desc[NR_IRQS];
+ #else /* CONFIG_SPARSE_IRQ */
+ extern struct irq_desc *move_irq_desc(struct irq_desc *old_desc, int cpu);
+-
+-#define kstat_irqs_this_cpu(DESC) \
+-	((DESC)->kstat_irqs[smp_processor_id()])
+-#define kstat_incr_irqs_this_cpu(irqno, DESC) \
+-	((DESC)->kstat_irqs[smp_processor_id()]++)
+-
+ #endif /* CONFIG_SPARSE_IRQ */
+ 
+ extern struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu);
+@@ -226,7 +226,6 @@ irq_remap_to_desc(unsigned int irq, stru
+  * Migration helpers for obsolete names, they will go away:
+  */
+ #define hw_interrupt_type	irq_chip
+-typedef struct irq_chip		hw_irq_controller;
+ #define no_irq_type		no_irq_chip
+ typedef struct irq_desc		irq_desc_t;
+ 
+@@ -236,6 +235,7 @@ typedef struct irq_desc		irq_desc_t;
+ #include <asm/hw_irq.h>
+ 
+ extern int setup_irq(unsigned int irq, struct irqaction *new);
++extern void remove_irq(unsigned int irq, struct irqaction *act);
+ 
+ #ifdef CONFIG_GENERIC_HARDIRQS
+ 
+@@ -280,7 +280,7 @@ static inline int irq_balancing_disabled
+ }
+ 
+ /* Handle irq action chains: */
+-extern int handle_IRQ_event(unsigned int irq, struct irqaction *action);
++extern irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action);
+ 
+ /*
+  * Built-in IRQ handlers for various IRQ types,
+@@ -325,7 +325,7 @@ static inline void generic_handle_irq(un
+ 
+ /* Handling of unhandled and spurious interrupts: */
+ extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
+-			   int action_ret);
++			   irqreturn_t action_ret);
+ 
+ /* Resending of interrupts :*/
+ void check_irq_resend(struct irq_desc *desc, unsigned int irq);
+@@ -418,8 +418,102 @@ extern int set_irq_msi(unsigned int irq,
+ #define get_irq_desc_data(desc)		((desc)->handler_data)
+ #define get_irq_desc_msi(desc)		((desc)->msi_desc)
+ 
+-#endif /* CONFIG_GENERIC_HARDIRQS */
++/* Early initialization of irqs */
++extern void early_init_hardirqs(void);
++
++#if defined(CONFIG_PREEMPT_HARDIRQS)
++extern void init_hardirqs(void);
++#else
++static inline void init_hardirqs(void) { }
++#endif
++
++#else	/* end GENERIC HARDIRQS */
++
++static inline void early_init_hardirqs(void) { }
++static inline void init_hardirqs(void) { }
++
++#endif /* !CONFIG_GENERIC_HARDIRQS */
+ 
+ #endif /* !CONFIG_S390 */
+ 
++#ifdef CONFIG_SMP
++/**
++ * init_alloc_desc_masks - allocate cpumasks for irq_desc
++ * @desc:	pointer to irq_desc struct
++ * @cpu:	cpu which will be handling the cpumasks
++ * @boot:	true if need bootmem
++ *
++ * Allocates affinity and pending_mask cpumask if required.
++ * Returns true if successful (or not required).
++ * Side effect: affinity has all bits set, pending_mask has all bits clear.
++ */
++static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
++								bool boot)
++{
++	int node;
++
++	if (boot) {
++		alloc_bootmem_cpumask_var(&desc->affinity);
++		cpumask_setall(desc->affinity);
++
++#ifdef CONFIG_GENERIC_PENDING_IRQ
++		alloc_bootmem_cpumask_var(&desc->pending_mask);
++		cpumask_clear(desc->pending_mask);
++#endif
++		return true;
++	}
++
++	node = cpu_to_node(cpu);
++
++	if (!alloc_cpumask_var_node(&desc->affinity, GFP_ATOMIC, node))
++		return false;
++	cpumask_setall(desc->affinity);
++
++#ifdef CONFIG_GENERIC_PENDING_IRQ
++	if (!alloc_cpumask_var_node(&desc->pending_mask, GFP_ATOMIC, node)) {
++		free_cpumask_var(desc->affinity);
++		return false;
++	}
++	cpumask_clear(desc->pending_mask);
++#endif
++	return true;
++}
++
++/**
++ * init_copy_desc_masks - copy cpumasks for irq_desc
++ * @old_desc:	pointer to old irq_desc struct
++ * @new_desc:	pointer to new irq_desc struct
++ *
++ * Insures affinity and pending_masks are copied to new irq_desc.
++ * If !CONFIG_CPUMASKS_OFFSTACK the cpumasks are embedded in the
++ * irq_desc struct so the copy is redundant.
++ */
++
++static inline void init_copy_desc_masks(struct irq_desc *old_desc,
++					struct irq_desc *new_desc)
++{
++#ifdef CONFIG_CPUMASKS_OFFSTACK
++	cpumask_copy(new_desc->affinity, old_desc->affinity);
++
++#ifdef CONFIG_GENERIC_PENDING_IRQ
++	cpumask_copy(new_desc->pending_mask, old_desc->pending_mask);
++#endif
++#endif
++}
++
++#else /* !CONFIG_SMP */
++
++static inline bool init_alloc_desc_masks(struct irq_desc *desc, int cpu,
++								bool boot)
++{
++	return true;
++}
++
++static inline void init_copy_desc_masks(struct irq_desc *old_desc,
++					struct irq_desc *new_desc)
++{
++}
++
++#endif	/* CONFIG_SMP */
++
+ #endif /* _LINUX_IRQ_H */
+Index: linux-2.6-tip/include/linux/irqflags.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/irqflags.h
++++ linux-2.6-tip/include/linux/irqflags.h
+@@ -13,6 +13,9 @@
+ 
+ #include <linux/typecheck.h>
+ 
++/* dummy wrapper for now: */
++#define BUILD_CHECK_IRQ_FLAGS(flags)
++
+ #ifdef CONFIG_TRACE_IRQFLAGS
+   extern void trace_softirqs_on(unsigned long ip);
+   extern void trace_softirqs_off(unsigned long ip);
+@@ -24,8 +27,8 @@
+ # define trace_softirqs_enabled(p)	((p)->softirqs_enabled)
+ # define trace_hardirq_enter()	do { current->hardirq_context++; } while (0)
+ # define trace_hardirq_exit()	do { current->hardirq_context--; } while (0)
+-# define trace_softirq_enter()	do { current->softirq_context++; } while (0)
+-# define trace_softirq_exit()	do { current->softirq_context--; } while (0)
++# define lockdep_softirq_enter()	do { current->softirq_context++; } while (0)
++# define lockdep_softirq_exit()	do { current->softirq_context--; } while (0)
+ # define INIT_TRACE_IRQFLAGS	.softirqs_enabled = 1,
+ #else
+ # define trace_hardirqs_on()		do { } while (0)
+@@ -38,8 +41,8 @@
+ # define trace_softirqs_enabled(p)	0
+ # define trace_hardirq_enter()		do { } while (0)
+ # define trace_hardirq_exit()		do { } while (0)
+-# define trace_softirq_enter()		do { } while (0)
+-# define trace_softirq_exit()		do { } while (0)
++# define lockdep_softirq_enter()	do { } while (0)
++# define lockdep_softirq_exit()		do { } while (0)
+ # define INIT_TRACE_IRQFLAGS
+ #endif
+ 
+Index: linux-2.6-tip/include/linux/irqnr.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/irqnr.h
++++ linux-2.6-tip/include/linux/irqnr.h
+@@ -20,6 +20,7 @@
+ 
+ # define for_each_irq_desc_reverse(irq, desc)                          \
+ 	for (irq = nr_irqs - 1; irq >= 0; irq--)
++
+ #else /* CONFIG_GENERIC_HARDIRQS */
+ 
+ extern int nr_irqs;
+@@ -28,13 +29,17 @@ extern struct irq_desc *irq_to_desc(unsi
+ # define for_each_irq_desc(irq, desc)					\
+ 	for (irq = 0, desc = irq_to_desc(irq); irq < nr_irqs;		\
+ 	     irq++, desc = irq_to_desc(irq))				\
+-		if (desc)
++		if (!desc)						\
++			;						\
++		else
+ 
+ 
+ # define for_each_irq_desc_reverse(irq, desc)				\
+ 	for (irq = nr_irqs - 1, desc = irq_to_desc(irq); irq >= 0;	\
+ 	     irq--, desc = irq_to_desc(irq))				\
+-		if (desc)
++		if (!desc)						\
++			;						\
++		else
+ 
+ #endif /* CONFIG_GENERIC_HARDIRQS */
+ 
+Index: linux-2.6-tip/include/linux/irqreturn.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/irqreturn.h
++++ linux-2.6-tip/include/linux/irqreturn.h
+@@ -1,25 +1,17 @@
+-/* irqreturn.h */
+ #ifndef _LINUX_IRQRETURN_H
+ #define _LINUX_IRQRETURN_H
+ 
+-/*
+- * For 2.4.x compatibility, 2.4.x can use
+- *
+- *	typedef void irqreturn_t;
+- *	#define IRQ_NONE
+- *	#define IRQ_HANDLED
+- *	#define IRQ_RETVAL(x)
+- *
+- * To mix old-style and new-style irq handler returns.
+- *
+- * IRQ_NONE means we didn't handle it.
+- * IRQ_HANDLED means that we did have a valid interrupt and handled it.
+- * IRQ_RETVAL(x) selects on the two depending on x being non-zero (for handled)
++/**
++ * enum irqreturn
++ * @IRQ_NONE		interrupt was not from this device
++ * @IRQ_HANDLED		interrupt was handled by this device
+  */
+-typedef int irqreturn_t;
++enum irqreturn {
++	IRQ_NONE,
++	IRQ_HANDLED,
++};
+ 
+-#define IRQ_NONE	(0)
+-#define IRQ_HANDLED	(1)
+-#define IRQ_RETVAL(x)	((x) != 0)
++typedef enum irqreturn irqreturn_t;
++#define IRQ_RETVAL(x)	((x) != IRQ_NONE)
+ 
+ #endif
+Index: linux-2.6-tip/include/linux/ivtvfb.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/ivtvfb.h
++++ linux-2.6-tip/include/linux/ivtvfb.h
+@@ -33,6 +33,6 @@ struct ivtvfb_dma_frame {
+ };
+ 
+ #define IVTVFB_IOC_DMA_FRAME 	_IOW('V', BASE_VIDIOC_PRIVATE+0, struct ivtvfb_dma_frame)
+-#define FBIO_WAITFORVSYNC	_IOW('F', 0x20, u_int32_t)
++#define FBIO_WAITFORVSYNC	_IOW('F', 0x20, __u32)
+ 
+ #endif
+Index: linux-2.6-tip/include/linux/jffs2.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/jffs2.h
++++ linux-2.6-tip/include/linux/jffs2.h
+@@ -12,6 +12,7 @@
+ #ifndef __LINUX_JFFS2_H__
+ #define __LINUX_JFFS2_H__
+ 
++#include <linux/types.h>
+ #include <linux/magic.h>
+ 
+ /* You must include something which defines the C99 uintXX_t types. 
+@@ -91,15 +92,15 @@
+    byteswapping */
+ 
+ typedef struct {
+-	uint32_t v32;
++	__u32 v32;
+ } __attribute__((packed)) jint32_t;
+ 
+ typedef struct {
+-	uint32_t m;
++	__u32 m;
+ } __attribute__((packed)) jmode_t;
+ 
+ typedef struct {
+-	uint16_t v16;
++	__u16 v16;
+ } __attribute__((packed)) jint16_t;
+ 
+ struct jffs2_unknown_node
+@@ -121,12 +122,12 @@ struct jffs2_raw_dirent
+ 	jint32_t version;
+ 	jint32_t ino; /* == zero for unlink */
+ 	jint32_t mctime;
+-	uint8_t nsize;
+-	uint8_t type;
+-	uint8_t unused[2];
++	__u8 nsize;
++	__u8 type;
++	__u8 unused[2];
+ 	jint32_t node_crc;
+ 	jint32_t name_crc;
+-	uint8_t name[0];
++	__u8 name[0];
+ };
+ 
+ /* The JFFS2 raw inode structure: Used for storage on physical media.  */
+@@ -153,12 +154,12 @@ struct jffs2_raw_inode
+ 	jint32_t offset;     /* Where to begin to write.  */
+ 	jint32_t csize;      /* (Compressed) data size */
+ 	jint32_t dsize;	     /* Size of the node's data. (after decompression) */
+-	uint8_t compr;       /* Compression algorithm used */
+-	uint8_t usercompr;   /* Compression algorithm requested by the user */
++	__u8 compr;       /* Compression algorithm used */
++	__u8 usercompr;   /* Compression algorithm requested by the user */
+ 	jint16_t flags;	     /* See JFFS2_INO_FLAG_* */
+ 	jint32_t data_crc;   /* CRC for the (compressed) data.  */
+ 	jint32_t node_crc;   /* CRC for the raw inode (excluding data)  */
+-	uint8_t data[0];
++	__u8 data[0];
+ };
+ 
+ struct jffs2_raw_xattr {
+@@ -168,12 +169,12 @@ struct jffs2_raw_xattr {
+ 	jint32_t hdr_crc;
+ 	jint32_t xid;		/* XATTR identifier number */
+ 	jint32_t version;
+-	uint8_t xprefix;
+-	uint8_t name_len;
++	__u8 xprefix;
++	__u8 name_len;
+ 	jint16_t value_len;
+ 	jint32_t data_crc;
+ 	jint32_t node_crc;
+-	uint8_t data[0];
++	__u8 data[0];
+ } __attribute__((packed));
+ 
+ struct jffs2_raw_xref
+Index: linux-2.6-tip/include/linux/kernel.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/kernel.h
++++ linux-2.6-tip/include/linux/kernel.h
+@@ -122,7 +122,7 @@ extern int _cond_resched(void);
+ # define might_resched() do { } while (0)
+ #endif
+ 
+-#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
++#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT)
+   void __might_sleep(char *file, int line);
+ /**
+  * might_sleep - annotation for functions that can sleep
+@@ -242,6 +242,19 @@ extern struct ratelimit_state printk_rat
+ extern int printk_ratelimit(void);
+ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+ 				   unsigned int interval_msec);
++
++/*
++ * Print a one-time message (analogous to WARN_ONCE() et al):
++ */
++#define printk_once(x...) ({			\
++	static int __print_once = 1;		\
++						\
++	if (__print_once) {			\
++		__print_once = 0;		\
++		printk(x);			\
++	}					\
++})
++
+ #else
+ static inline int vprintk(const char *s, va_list args)
+ 	__attribute__ ((format (printf, 1, 0)));
+@@ -253,6 +266,10 @@ static inline int printk_ratelimit(void)
+ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies, \
+ 					  unsigned int interval_msec)	\
+ 		{ return false; }
++
++/* No effect, but we still get type checking even in the !PRINTK case: */
++#define printk_once(x...) printk(x)
++
+ #endif
+ 
+ extern int printk_needs_cpu(int cpu);
+@@ -261,6 +278,12 @@ extern void printk_tick(void);
+ extern void asmlinkage __attribute__((format(printf, 1, 2)))
+ 	early_printk(const char *fmt, ...);
+ 
++#ifdef CONFIG_PREEMPT_RT
++extern void zap_rt_locks(void);
++#else
++# define zap_rt_locks() do { } while (0)
++#endif
++
+ unsigned long int_sqrt(unsigned long);
+ 
+ static inline void console_silent(void)
+@@ -289,6 +312,7 @@ extern int root_mountflags;
+ /* Values used for system_state */
+ extern enum system_states {
+ 	SYSTEM_BOOTING,
++	SYSTEM_BOOTING_SCHEDULER_OK,
+ 	SYSTEM_RUNNING,
+ 	SYSTEM_HALT,
+ 	SYSTEM_POWER_OFF,
+@@ -368,6 +392,139 @@ static inline char *pack_hex_byte(char *
+ #endif
+ 
+ /*
++ * General tracing related utility functions - trace_printk(),
++ * tracing_on/tracing_off and tracing_start()/tracing_stop
++ *
++ * Use tracing_on/tracing_off when you want to quickly turn on or off
++ * tracing. It simply enables or disables the recording of the trace events.
++ * This also corresponds to the user space debugfs/tracing/tracing_on
++ * file, which gives a means for the kernel and userspace to interact.
++ * Place a tracing_off() in the kernel where you want tracing to end.
++ * From user space, examine the trace, and then echo 1 > tracing_on
++ * to continue tracing.
++ *
++ * tracing_stop/tracing_start has slightly more overhead. It is used
++ * by things like suspend to ram where disabling the recording of the
++ * trace is not enough, but tracing must actually stop because things
++ * like calling smp_processor_id() may crash the system.
++ *
++ * Most likely, you want to use tracing_on/tracing_off.
++ */
++#ifdef CONFIG_RING_BUFFER
++void tracing_on(void);
++void tracing_off(void);
++/* trace_off_permanent stops recording with no way to bring it back */
++void tracing_off_permanent(void);
++int tracing_is_on(void);
++#else
++static inline void tracing_on(void) { }
++static inline void tracing_off(void) { }
++static inline void tracing_off_permanent(void) { }
++static inline int tracing_is_on(void) { return 0; }
++#endif
++#ifdef CONFIG_TRACING
++extern void tracing_start(void);
++extern void tracing_stop(void);
++extern void ftrace_off_permanent(void);
++
++extern void
++ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
++
++static inline void __attribute__ ((format (printf, 1, 2)))
++____trace_printk_check_format(const char *fmt, ...)
++{
++}
++#define __trace_printk_check_format(fmt, args...)			\
++do {									\
++	if (0)								\
++		____trace_printk_check_format(fmt, ##args);		\
++} while (0)
++
++/**
++ * trace_printk - printf formatting in the ftrace buffer
++ * @fmt: the printf format for printing
++ *
++ * Note: __trace_printk is an internal function for trace_printk and
++ *       the @ip is passed in via the trace_printk macro.
++ *
++ * This function allows a kernel developer to debug fast path sections
++ * that printk is not appropriate for. By scattering in various
++ * printk like tracing in the code, a developer can quickly see
++ * where problems are occurring.
++ *
++ * This is intended as a debugging tool for the developer only.
++ * Please refrain from leaving trace_printks scattered around in
++ * your code.
++ */
++
++#define trace_printk(fmt, args...)					\
++do {									\
++	__trace_printk_check_format(fmt, ##args);			\
++	if (__builtin_constant_p(fmt)) {				\
++		static const char *trace_printk_fmt			\
++		  __attribute__((section("__trace_printk_fmt"))) =	\
++			__builtin_constant_p(fmt) ? fmt : NULL;		\
++									\
++		__trace_bprintk(_THIS_IP_, trace_printk_fmt, ##args);	\
++	} else								\
++		__trace_printk(_THIS_IP_, fmt, ##args);		\
++} while (0)
++
++extern int
++__trace_bprintk(unsigned long ip, const char *fmt, ...)
++	__attribute__ ((format (printf, 2, 3)));
++
++extern int
++__trace_printk(unsigned long ip, const char *fmt, ...)
++	__attribute__ ((format (printf, 2, 3)));
++
++/*
++ * The double __builtin_constant_p is because gcc will give us an error
++ * if we try to allocate the static variable to fmt if it is not a
++ * constant. Even with the outer if statement.
++ */
++#define ftrace_vprintk(fmt, vargs)					\
++do {									\
++	if (__builtin_constant_p(fmt)) {				\
++		static const char *trace_printk_fmt			\
++		  __attribute__((section("__trace_printk_fmt"))) =	\
++			__builtin_constant_p(fmt) ? fmt : NULL;		\
++									\
++		__ftrace_vbprintk(_THIS_IP_, trace_printk_fmt, vargs);	\
++	} else								\
++		__ftrace_vprintk(_THIS_IP_, fmt, vargs);		\
++} while (0)
++
++extern int
++__ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap);
++
++extern int
++__ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
++
++extern void ftrace_dump(void);
++#else
++static inline void
++ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
++static inline int
++trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
++
++static inline void tracing_start(void) { }
++static inline void tracing_stop(void) { }
++static inline void ftrace_off_permanent(void) { }
++static inline int
++trace_printk(const char *fmt, ...)
++{
++	return 0;
++}
++static inline int
++ftrace_vprintk(const char *fmt, va_list ap)
++{
++	return 0;
++}
++static inline void ftrace_dump(void) { }
++#endif /* CONFIG_TRACING */
++
++/*
+  *      Display an IP address in readable format.
+  */
+ 
+Index: linux-2.6-tip/include/linux/kernel_stat.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/kernel_stat.h
++++ linux-2.6-tip/include/linux/kernel_stat.h
+@@ -23,12 +23,14 @@ struct cpu_usage_stat {
+ 	cputime64_t idle;
+ 	cputime64_t iowait;
+ 	cputime64_t steal;
++	cputime64_t user_rt;
++	cputime64_t system_rt;
+ 	cputime64_t guest;
+ };
+ 
+ struct kernel_stat {
+ 	struct cpu_usage_stat	cpustat;
+-#ifndef CONFIG_SPARSE_IRQ
++#ifndef CONFIG_GENERIC_HARDIRQS
+        unsigned int irqs[NR_IRQS];
+ #endif
+ };
+@@ -41,7 +43,7 @@ DECLARE_PER_CPU(struct kernel_stat, ksta
+ 
+ extern unsigned long long nr_context_switches(void);
+ 
+-#ifndef CONFIG_SPARSE_IRQ
++#ifndef CONFIG_GENERIC_HARDIRQS
+ #define kstat_irqs_this_cpu(irq) \
+ 	(kstat_this_cpu.irqs[irq])
+ 
+@@ -52,16 +54,19 @@ static inline void kstat_incr_irqs_this_
+ {
+ 	kstat_this_cpu.irqs[irq]++;
+ }
+-#endif
+-
+ 
+-#ifndef CONFIG_SPARSE_IRQ
+ static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+ {
+        return kstat_cpu(cpu).irqs[irq];
+ }
+ #else
++#include <linux/irq.h>
+ extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
++#define kstat_irqs_this_cpu(DESC) \
++	((DESC)->kstat_irqs[smp_processor_id()])
++#define kstat_incr_irqs_this_cpu(irqno, DESC) \
++	((DESC)->kstat_irqs[smp_processor_id()]++)
++
+ #endif
+ 
+ /*
+@@ -78,7 +83,15 @@ static inline unsigned int kstat_irqs(un
+ 	return sum;
+ }
+ 
++
++/*
++ * Lock/unlock the current runqueue - to extract task statistics:
++ */
++extern void curr_rq_lock_irq_save(unsigned long *flags);
++extern void curr_rq_unlock_irq_restore(unsigned long *flags);
++extern unsigned long long __task_delta_exec(struct task_struct *tsk, int update);
+ extern unsigned long long task_delta_exec(struct task_struct *);
++
+ extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
+ extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
+ extern void account_steal_time(cputime_t);
+Index: linux-2.6-tip/include/linux/key.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/key.h
++++ linux-2.6-tip/include/linux/key.h
+@@ -20,6 +20,7 @@
+ #include <linux/rbtree.h>
+ #include <linux/rcupdate.h>
+ #include <linux/sysctl.h>
++#include <linux/rwsem.h>
+ #include <asm/atomic.h>
+ 
+ #ifdef __KERNEL__
+Index: linux-2.6-tip/include/linux/kmemcheck.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/kmemcheck.h
+@@ -0,0 +1,154 @@
++#ifndef LINUX_KMEMCHECK_H
++#define LINUX_KMEMCHECK_H
++
++#include <linux/mm_types.h>
++#include <linux/types.h>
++
++/*
++ * How to use: If you have a struct using bitfields, for example
++ *
++ *     struct a {
++ *             int x:8, y:8;
++ *     };
++ *
++ * then this should be rewritten as
++ *
++ *     struct a {
++ *             kmemcheck_define_bitfield(flags, {
++ *                     int x:8, y:8;
++ *             });
++ *     };
++ *
++ * Now the "flags" member may be used to refer to the bitfield (and things
++ * like &x.flags is allowed). As soon as the struct is allocated, the bit-
++ * fields should be annotated:
++ *
++ *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
++ *     if (a)
++ *             kmemcheck_annotate_bitfield(a->flags);
++ *
++ * Note: We provide the same definitions for both kmemcheck and non-
++ * kmemcheck kernels. This makes it harder to introduce accidental errors.
++ */
++#define kmemcheck_define_bitfield(name, fields...)	\
++	union {						\
++		struct fields name;			\
++		struct fields;				\
++	};						\
++							\
++	/*						\
++	 * Erk. Due to gcc bug, we'll get a "error:	\
++	 * flexible array member in otherwise empty	\
++	 * struct without this.				\
++	 */						\
++	int kmemcheck_dummy_##name##_[0];
++
++#ifdef CONFIG_KMEMCHECK
++extern int kmemcheck_enabled;
++
++void kmemcheck_init(void);
++
++/* The slab-related functions. */
++void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node);
++void kmemcheck_free_shadow(struct page *page, int order);
++void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
++			  size_t size);
++void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size);
++
++void kmemcheck_pagealloc_alloc(struct page *p, unsigned int order,
++			       gfp_t gfpflags);
++
++void kmemcheck_show_pages(struct page *p, unsigned int n);
++void kmemcheck_hide_pages(struct page *p, unsigned int n);
++
++bool kmemcheck_page_is_tracked(struct page *p);
++
++void kmemcheck_mark_unallocated(void *address, unsigned int n);
++void kmemcheck_mark_uninitialized(void *address, unsigned int n);
++void kmemcheck_mark_initialized(void *address, unsigned int n);
++void kmemcheck_mark_freed(void *address, unsigned int n);
++
++void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n);
++void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n);
++void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n);
++
++int kmemcheck_show_addr(unsigned long address);
++int kmemcheck_hide_addr(unsigned long address);
++
++#define kmemcheck_annotate_bitfield(field)				\
++	do {								\
++		kmemcheck_mark_initialized(&(field), sizeof(field));	\
++	} while (0)
++#else
++#define kmemcheck_enabled 0
++
++static inline void kmemcheck_init(void)
++{
++}
++
++static inline void
++kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node)
++{
++}
++
++static inline void
++kmemcheck_free_shadow(struct page *page, int order)
++{
++}
++
++static inline void
++kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
++		     size_t size)
++{
++}
++
++static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object,
++				       size_t size)
++{
++}
++
++static inline void kmemcheck_pagealloc_alloc(struct page *p,
++	unsigned int order, gfp_t gfpflags)
++{
++}
++
++static inline bool kmemcheck_page_is_tracked(struct page *p)
++{
++	return false;
++}
++
++static inline void kmemcheck_mark_unallocated(void *address, unsigned int n)
++{
++}
++
++static inline void kmemcheck_mark_uninitialized(void *address, unsigned int n)
++{
++}
++
++static inline void kmemcheck_mark_initialized(void *address, unsigned int n)
++{
++}
++
++static inline void kmemcheck_mark_freed(void *address, unsigned int n)
++{
++}
++
++static inline void kmemcheck_mark_unallocated_pages(struct page *p,
++						    unsigned int n)
++{
++}
++
++static inline void kmemcheck_mark_uninitialized_pages(struct page *p,
++						      unsigned int n)
++{
++}
++
++static inline void kmemcheck_mark_initialized_pages(struct page *p,
++						    unsigned int n)
++{
++}
++
++#define kmemcheck_annotate_bitfield(field) do { } while (0)
++#endif /* CONFIG_KMEMCHECK */
++
++#endif /* LINUX_KMEMCHECK_H */
+Index: linux-2.6-tip/include/linux/kprobes.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/kprobes.h
++++ linux-2.6-tip/include/linux/kprobes.h
+@@ -156,7 +156,7 @@ struct kretprobe {
+ 	int nmissed;
+ 	size_t data_size;
+ 	struct hlist_head free_instances;
+-	spinlock_t lock;
++	raw_spinlock_t lock;
+ };
+ 
+ struct kretprobe_instance {
+@@ -182,6 +182,14 @@ struct kprobe_blackpoint {
+ DECLARE_PER_CPU(struct kprobe *, current_kprobe);
+ DECLARE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+ 
++/*
++ * For #ifdef avoidance:
++ */
++static inline int kprobes_built_in(void)
++{
++	return 1;
++}
++
+ #ifdef CONFIG_KRETPROBES
+ extern void arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ 				   struct pt_regs *regs);
+@@ -271,8 +279,16 @@ void unregister_kretprobes(struct kretpr
+ void kprobe_flush_task(struct task_struct *tk);
+ void recycle_rp_inst(struct kretprobe_instance *ri, struct hlist_head *head);
+ 
+-#else /* CONFIG_KPROBES */
++#else /* !CONFIG_KPROBES: */
+ 
++static inline int kprobes_built_in(void)
++{
++	return 0;
++}
++static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
++{
++	return 0;
++}
+ static inline struct kprobe *get_kprobe(void *addr)
+ {
+ 	return NULL;
+@@ -329,5 +345,5 @@ static inline void unregister_kretprobes
+ static inline void kprobe_flush_task(struct task_struct *tk)
+ {
+ }
+-#endif				/* CONFIG_KPROBES */
+-#endif				/* _LINUX_KPROBES_H */
++#endif /* CONFIG_KPROBES */
++#endif /* _LINUX_KPROBES_H */
+Index: linux-2.6-tip/include/linux/latencytop.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/latencytop.h
++++ linux-2.6-tip/include/linux/latencytop.h
+@@ -9,6 +9,7 @@
+ #ifndef _INCLUDE_GUARD_LATENCYTOP_H_
+ #define _INCLUDE_GUARD_LATENCYTOP_H_
+ 
++#include <linux/compiler.h>
+ #ifdef CONFIG_LATENCYTOP
+ 
+ #define LT_SAVECOUNT		32
+@@ -24,7 +25,14 @@ struct latency_record {
+ 
+ struct task_struct;
+ 
+-void account_scheduler_latency(struct task_struct *task, int usecs, int inter);
++extern int latencytop_enabled;
++void __account_scheduler_latency(struct task_struct *task, int usecs, int inter);
++static inline void
++account_scheduler_latency(struct task_struct *task, int usecs, int inter)
++{
++	if (unlikely(latencytop_enabled))
++		__account_scheduler_latency(task, usecs, inter);
++}
+ 
+ void clear_all_latency_tracing(struct task_struct *p);
+ 
+Index: linux-2.6-tip/include/linux/lockdep.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/lockdep.h
++++ linux-2.6-tip/include/linux/lockdep.h
+@@ -20,43 +20,10 @@ struct lockdep_map;
+ #include <linux/stacktrace.h>
+ 
+ /*
+- * Lock-class usage-state bits:
++ * We'd rather not expose kernel/lockdep_states.h this wide, but we do need
++ * the total number of states... :-(
+  */
+-enum lock_usage_bit
+-{
+-	LOCK_USED = 0,
+-	LOCK_USED_IN_HARDIRQ,
+-	LOCK_USED_IN_SOFTIRQ,
+-	LOCK_ENABLED_SOFTIRQS,
+-	LOCK_ENABLED_HARDIRQS,
+-	LOCK_USED_IN_HARDIRQ_READ,
+-	LOCK_USED_IN_SOFTIRQ_READ,
+-	LOCK_ENABLED_SOFTIRQS_READ,
+-	LOCK_ENABLED_HARDIRQS_READ,
+-	LOCK_USAGE_STATES
+-};
+-
+-/*
+- * Usage-state bitmasks:
+- */
+-#define LOCKF_USED			(1 << LOCK_USED)
+-#define LOCKF_USED_IN_HARDIRQ		(1 << LOCK_USED_IN_HARDIRQ)
+-#define LOCKF_USED_IN_SOFTIRQ		(1 << LOCK_USED_IN_SOFTIRQ)
+-#define LOCKF_ENABLED_HARDIRQS		(1 << LOCK_ENABLED_HARDIRQS)
+-#define LOCKF_ENABLED_SOFTIRQS		(1 << LOCK_ENABLED_SOFTIRQS)
+-
+-#define LOCKF_ENABLED_IRQS (LOCKF_ENABLED_HARDIRQS | LOCKF_ENABLED_SOFTIRQS)
+-#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
+-
+-#define LOCKF_USED_IN_HARDIRQ_READ	(1 << LOCK_USED_IN_HARDIRQ_READ)
+-#define LOCKF_USED_IN_SOFTIRQ_READ	(1 << LOCK_USED_IN_SOFTIRQ_READ)
+-#define LOCKF_ENABLED_HARDIRQS_READ	(1 << LOCK_ENABLED_HARDIRQS_READ)
+-#define LOCKF_ENABLED_SOFTIRQS_READ	(1 << LOCK_ENABLED_SOFTIRQS_READ)
+-
+-#define LOCKF_ENABLED_IRQS_READ \
+-		(LOCKF_ENABLED_HARDIRQS_READ | LOCKF_ENABLED_SOFTIRQS_READ)
+-#define LOCKF_USED_IN_IRQ_READ \
+-		(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
++#define XXX_LOCK_USAGE_STATES		(1+3*4)
+ 
+ #define MAX_LOCKDEP_SUBCLASSES		8UL
+ 
+@@ -97,7 +64,7 @@ struct lock_class {
+ 	 * IRQ/softirq usage tracking bits:
+ 	 */
+ 	unsigned long			usage_mask;
+-	struct stack_trace		usage_traces[LOCK_USAGE_STATES];
++	struct stack_trace		usage_traces[XXX_LOCK_USAGE_STATES];
+ 
+ 	/*
+ 	 * These fields represent a directed graph of lock dependencies,
+@@ -324,7 +291,11 @@ static inline void lock_set_subclass(str
+ 	lock_set_class(lock, lock->name, lock->key, subclass, ip);
+ }
+ 
+-# define INIT_LOCKDEP				.lockdep_recursion = 0,
++extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask);
++extern void lockdep_clear_current_reclaim_state(void);
++extern void lockdep_trace_alloc(gfp_t mask);
++
++# define INIT_LOCKDEP				.lockdep_recursion = 0, .lockdep_reclaim_gfp = 0,
+ 
+ #define lockdep_depth(tsk)	(debug_locks ? (tsk)->lockdep_depth : 0)
+ 
+@@ -342,6 +313,9 @@ static inline void lockdep_on(void)
+ # define lock_release(l, n, i)			do { } while (0)
+ # define lock_set_class(l, n, k, s, i)		do { } while (0)
+ # define lock_set_subclass(l, s, i)		do { } while (0)
++# define lockdep_set_current_reclaim_state(g)	do { } while (0)
++# define lockdep_clear_current_reclaim_state()	do { } while (0)
++# define lockdep_trace_alloc(g)			do { } while (0)
+ # define lockdep_init()				do { } while (0)
+ # define lockdep_info()				do { } while (0)
+ # define lockdep_init_map(lock, name, key, sub) \
+Index: linux-2.6-tip/include/linux/magic.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/magic.h
++++ linux-2.6-tip/include/linux/magic.h
+@@ -49,4 +49,5 @@
+ #define FUTEXFS_SUPER_MAGIC	0xBAD1DEA
+ #define INOTIFYFS_SUPER_MAGIC	0x2BAD1DEA
+ 
++#define STACK_END_MAGIC		0x57AC6E9D
+ #endif /* __LINUX_MAGIC_H__ */
+Index: linux-2.6-tip/include/linux/matroxfb.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/matroxfb.h
++++ linux-2.6-tip/include/linux/matroxfb.h
+@@ -37,7 +37,7 @@ enum matroxfb_ctrl_id {
+   MATROXFB_CID_LAST
+ };
+ 
+-#define FBIO_WAITFORVSYNC	_IOW('F', 0x20, u_int32_t)
++#define FBIO_WAITFORVSYNC	_IOW('F', 0x20, __u32)
+ 
+ #endif
+ 
+Index: linux-2.6-tip/include/linux/mca-legacy.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/mca-legacy.h
++++ linux-2.6-tip/include/linux/mca-legacy.h
+@@ -9,7 +9,7 @@
+ 
+ #include <linux/mca.h>
+ 
+-#warning "MCA legacy - please move your driver to the new sysfs api"
++/* #warning "MCA legacy - please move your driver to the new sysfs api" */
+ 
+ /* MCA_NOTFOUND is an error condition.  The other two indicate
+  * motherboard POS registers contain the adapter.  They might be
+Index: linux-2.6-tip/include/linux/memory.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/memory.h
++++ linux-2.6-tip/include/linux/memory.h
+@@ -99,4 +99,10 @@ enum mem_add_context { BOOT, HOTPLUG };
+ #define hotplug_memory_notifier(fn, pri) do { } while (0)
+ #endif
+ 
++/*
++ * Kernel text modification mutex, used for code patching. Users of this lock
++ * can sleep.
++ */
++extern struct mutex text_mutex;
++
+ #endif /* _LINUX_MEMORY_H_ */
+Index: linux-2.6-tip/include/linux/mm.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/mm.h
++++ linux-2.6-tip/include/linux/mm.h
+@@ -98,12 +98,13 @@ extern unsigned int kobjsize(const void 
+ #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
+ #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
+ #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
+-#define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it. Refer note in VM_PFNMAP_AT_MMAP below */
++#define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
+ #define VM_ALWAYSDUMP	0x04000000	/* Always include in core dumps */
+ 
+ #define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
+ #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
+ #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
++#define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
+ 
+ #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
+ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
+@@ -127,17 +128,6 @@ extern unsigned int kobjsize(const void 
+ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+ 
+ /*
+- * pfnmap vmas that are fully mapped at mmap time (not mapped on fault).
+- * Used by x86 PAT to identify such PFNMAP mappings and optimize their handling.
+- * Note VM_INSERTPAGE flag is overloaded here. i.e,
+- * VM_INSERTPAGE && !VM_PFNMAP implies
+- *     The vma has had "vm_insert_page()" done on it
+- * VM_INSERTPAGE && VM_PFNMAP implies
+- *     The vma is PFNMAP with full mapping at mmap time
+- */
+-#define VM_PFNMAP_AT_MMAP (VM_INSERTPAGE | VM_PFNMAP)
+-
+-/*
+  * mapping from the currently active vm_flags protection bits (the
+  * low four bits) to a page protection mask..
+  */
+@@ -157,7 +147,7 @@ extern pgprot_t protection_map[16];
+  */
+ static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
+ {
+-	return ((vma->vm_flags & VM_PFNMAP_AT_MMAP) == VM_PFNMAP_AT_MMAP);
++	return (vma->vm_flags & VM_PFN_AT_MMAP);
+ }
+ 
+ static inline int is_pfn_mapping(struct vm_area_struct *vma)
+@@ -614,23 +604,39 @@ static __always_inline void *lowmem_page
+ #endif
+ 
+ #if defined(WANT_PAGE_VIRTUAL)
+-#define page_address(page) ((page)->virtual)
+-#define set_page_address(page, address)			\
+-	do {						\
+-		(page)->virtual = (address);		\
+-	} while(0)
+-#define page_address_init()  do { } while(0)
++/*
++ * wrap page->virtual so it is safe to set/read locklessly
++ */
++#define page_address(page) \
++	({ typeof((page)->virtual) v = (page)->virtual; \
++	 smp_read_barrier_depends(); \
++	 v; })
++
++static inline int set_page_address(struct page *page, void *address)
++{
++	if (address)
++		return cmpxchg(&page->virtual, NULL, address) == NULL;
++	else {
++		/*
++		 * cmpxchg is a bit abused because it is not guaranteed
++		 * safe wrt direct assignment on all platforms.
++		 */
++		void *virt = page->virtual;
++		return cmpxchg(&page->vitrual, virt, NULL) == virt;
++	}
++}
++void page_address_init(void);
+ #endif
+ 
+ #if defined(HASHED_PAGE_VIRTUAL)
+ void *page_address(struct page *page);
+-void set_page_address(struct page *page, void *virtual);
++int set_page_address(struct page *page, void *virtual);
+ void page_address_init(void);
+ #endif
+ 
+ #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL)
+ #define page_address(page) lowmem_page_address(page)
+-#define set_page_address(page, address)  do { } while(0)
++#define set_page_address(page, address)  (0)
+ #define page_address_init()  do { } while(0)
+ #endif
+ 
+@@ -771,7 +777,7 @@ int zap_vma_ptes(struct vm_area_struct *
+ 		unsigned long size);
+ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
+ 		unsigned long size, struct zap_details *);
+-unsigned long unmap_vmas(struct mmu_gather **tlb,
++unsigned long unmap_vmas(struct mmu_gather *tlb,
+ 		struct vm_area_struct *start_vma, unsigned long start_addr,
+ 		unsigned long end_addr, unsigned long *nr_accounted,
+ 		struct zap_details *);
+@@ -951,27 +957,85 @@ static inline pmd_t *pmd_alloc(struct mm
+  * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
+  * When freeing, reset page->mapping so free_pages_check won't complain.
+  */
++#ifndef CONFIG_PREEMPT_RT
++
+ #define __pte_lockptr(page)	&((page)->ptl)
+-#define pte_lock_init(_page)	do {					\
+-	spin_lock_init(__pte_lockptr(_page));				\
+-} while (0)
++
++static inline struct page *pte_lock_init(struct page *page)
++{
++	spin_lock_init(__pte_lockptr(page));
++	return page;
++}
++
+ #define pte_lock_deinit(page)	((page)->mapping = NULL)
++
++#else /* PREEMPT_RT */
++
++/*
++ * On PREEMPT_RT the spinlock_t's are too large to embed in the
++ * page frame, hence it only has a pointer and we need to dynamically
++ * allocate the lock when we allocate PTE-pages.
++ *
++ * This is an overall win, since only a small fraction of the pages
++ * will be PTE pages under normal circumstances.
++ */
++
++#define __pte_lockptr(page)	((page)->ptl)
++
++/*
++ * Heinous hack, relies on the caller doing something like:
++ *
++ *   pte = alloc_pages(PGALLOC_GFP, 0);
++ *   if (pte)
++ *     pgtable_page_ctor(pte);
++ *   return pte;
++ *
++ * This ensures we release the page and return NULL when the
++ * lock allocation fails.
++ */
++static inline struct page *pte_lock_init(struct page *page)
++{
++	page->ptl = kmalloc(sizeof(spinlock_t), GFP_KERNEL);
++	if (page->ptl) {
++		spin_lock_init(__pte_lockptr(page));
++	} else {
++		__free_page(page);
++		page = NULL;
++	}
++	return page;
++}
++
++static inline void pte_lock_deinit(struct page *page)
++{
++	kfree(page->ptl);
++	page->mapping = NULL;
++}
++
++#endif /* PREEMPT_RT */
++
+ #define pte_lockptr(mm, pmd)	({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+ #else	/* !USE_SPLIT_PTLOCKS */
+ /*
+  * We use mm->page_table_lock to guard all pagetable pages of the mm.
+  */
+-#define pte_lock_init(page)	do {} while (0)
++static inline struct page *pte_lock_init(struct page *page) { return page; }
+ #define pte_lock_deinit(page)	do {} while (0)
+ #define pte_lockptr(mm, pmd)	({(void)(pmd); &(mm)->page_table_lock;})
+ #endif /* USE_SPLIT_PTLOCKS */
+ 
+-static inline void pgtable_page_ctor(struct page *page)
++static inline struct page *__pgtable_page_ctor(struct page *page)
+ {
+-	pte_lock_init(page);
+-	inc_zone_page_state(page, NR_PAGETABLE);
++	page = pte_lock_init(page);
++	if (page)
++		inc_zone_page_state(page, NR_PAGETABLE);
++	return page;
+ }
+ 
++#define pgtable_page_ctor(page)				\
++do {							\
++	page = __pgtable_page_ctor(page);		\
++} while (0)
++
+ static inline void pgtable_page_dtor(struct page *page)
+ {
+ 	pte_lock_deinit(page);
+Index: linux-2.6-tip/include/linux/mm_types.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/mm_types.h
++++ linux-2.6-tip/include/linux/mm_types.h
+@@ -68,7 +68,11 @@ struct page {
+ 						 */
+ 	    };
+ #if USE_SPLIT_PTLOCKS
++#ifndef CONFIG_PREEMPT_RT
+ 	    spinlock_t ptl;
++#else
++	    spinlock_t *ptl;
++#endif
+ #endif
+ 	    struct kmem_cache *slab;	/* SLUB: Pointer to slab */
+ 	    struct page *first_page;	/* Compound tail pages */
+@@ -94,6 +98,14 @@ struct page {
+ 	void *virtual;			/* Kernel virtual address (NULL if
+ 					   not kmapped, ie. highmem) */
+ #endif /* WANT_PAGE_VIRTUAL */
++
++#ifdef CONFIG_KMEMCHECK
++	/*
++	 * kmemcheck wants to track the status of each byte in a page; this
++	 * is a pointer to such a status block. NULL if not tracked.
++	 */
++	void *shadow;
++#endif
+ };
+ 
+ /*
+@@ -233,6 +245,9 @@ struct mm_struct {
+ 	/* Architecture-specific MM context */
+ 	mm_context_t context;
+ 
++	/* realtime bits */
++	struct list_head	delayed_drop;
++
+ 	/* Swap token stuff */
+ 	/*
+ 	 * Last value of global fault stamp as seen by this process.
+Index: linux-2.6-tip/include/linux/mmiotrace.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/mmiotrace.h
++++ linux-2.6-tip/include/linux/mmiotrace.h
+@@ -1,5 +1,5 @@
+-#ifndef MMIOTRACE_H
+-#define MMIOTRACE_H
++#ifndef _LINUX_MMIOTRACE_H
++#define _LINUX_MMIOTRACE_H
+ 
+ #include <linux/types.h>
+ #include <linux/list.h>
+@@ -13,28 +13,34 @@ typedef void (*kmmio_post_handler_t)(str
+ 				unsigned long condition, struct pt_regs *);
+ 
+ struct kmmio_probe {
+-	struct list_head list; /* kmmio internal list */
+-	unsigned long addr; /* start location of the probe point */
+-	unsigned long len; /* length of the probe region */
+-	kmmio_pre_handler_t pre_handler; /* Called before addr is executed. */
+-	kmmio_post_handler_t post_handler; /* Called after addr is executed */
+-	void *private;
++	/* kmmio internal list: */
++	struct list_head	list;
++	/* start location of the probe point: */
++	unsigned long		addr;
++	/* length of the probe region: */
++	unsigned long		len;
++	/* Called before addr is executed: */
++	kmmio_pre_handler_t	pre_handler;
++	/* Called after addr is executed: */
++	kmmio_post_handler_t	post_handler;
++	void			*private;
+ };
+ 
++extern unsigned int kmmio_count;
++
++extern int register_kmmio_probe(struct kmmio_probe *p);
++extern void unregister_kmmio_probe(struct kmmio_probe *p);
++
++#ifdef CONFIG_MMIOTRACE
+ /* kmmio is active by some kmmio_probes? */
+ static inline int is_kmmio_active(void)
+ {
+-	extern unsigned int kmmio_count;
+ 	return kmmio_count;
+ }
+ 
+-extern int register_kmmio_probe(struct kmmio_probe *p);
+-extern void unregister_kmmio_probe(struct kmmio_probe *p);
+-
+ /* Called from page fault handler. */
+ extern int kmmio_handler(struct pt_regs *regs, unsigned long addr);
+ 
+-#ifdef CONFIG_MMIOTRACE
+ /* Called from ioremap.c */
+ extern void mmiotrace_ioremap(resource_size_t offset, unsigned long size,
+ 							void __iomem *addr);
+@@ -43,7 +49,17 @@ extern void mmiotrace_iounmap(volatile v
+ /* For anyone to insert markers. Remember trailing newline. */
+ extern int mmiotrace_printk(const char *fmt, ...)
+ 				__attribute__ ((format (printf, 1, 2)));
+-#else
++#else /* !CONFIG_MMIOTRACE: */
++static inline int is_kmmio_active(void)
++{
++	return 0;
++}
++
++static inline int kmmio_handler(struct pt_regs *regs, unsigned long addr)
++{
++	return 0;
++}
++
+ static inline void mmiotrace_ioremap(resource_size_t offset,
+ 					unsigned long size, void __iomem *addr)
+ {
+@@ -63,28 +79,28 @@ static inline int mmiotrace_printk(const
+ #endif /* CONFIG_MMIOTRACE */
+ 
+ enum mm_io_opcode {
+-	MMIO_READ = 0x1,     /* struct mmiotrace_rw */
+-	MMIO_WRITE = 0x2,    /* struct mmiotrace_rw */
+-	MMIO_PROBE = 0x3,    /* struct mmiotrace_map */
+-	MMIO_UNPROBE = 0x4,  /* struct mmiotrace_map */
+-	MMIO_UNKNOWN_OP = 0x5, /* struct mmiotrace_rw */
++	MMIO_READ	= 0x1,	/* struct mmiotrace_rw */
++	MMIO_WRITE	= 0x2,	/* struct mmiotrace_rw */
++	MMIO_PROBE	= 0x3,	/* struct mmiotrace_map */
++	MMIO_UNPROBE	= 0x4,	/* struct mmiotrace_map */
++	MMIO_UNKNOWN_OP = 0x5,	/* struct mmiotrace_rw */
+ };
+ 
+ struct mmiotrace_rw {
+-	resource_size_t phys;	/* PCI address of register */
+-	unsigned long value;
+-	unsigned long pc;	/* optional program counter */
+-	int map_id;
+-	unsigned char opcode;	/* one of MMIO_{READ,WRITE,UNKNOWN_OP} */
+-	unsigned char width;	/* size of register access in bytes */
++	resource_size_t	phys;	/* PCI address of register */
++	unsigned long	value;
++	unsigned long	pc;	/* optional program counter */
++	int		map_id;
++	unsigned char	opcode;	/* one of MMIO_{READ,WRITE,UNKNOWN_OP} */
++	unsigned char	width;	/* size of register access in bytes */
+ };
+ 
+ struct mmiotrace_map {
+-	resource_size_t phys;	/* base address in PCI space */
+-	unsigned long virt;	/* base virtual address */
+-	unsigned long len;	/* mapping size */
+-	int map_id;
+-	unsigned char opcode;	/* MMIO_PROBE or MMIO_UNPROBE */
++	resource_size_t	phys;	/* base address in PCI space */
++	unsigned long	virt;	/* base virtual address */
++	unsigned long	len;	/* mapping size */
++	int		map_id;
++	unsigned char	opcode;	/* MMIO_PROBE or MMIO_UNPROBE */
+ };
+ 
+ /* in kernel/trace/trace_mmiotrace.c */
+@@ -94,4 +110,4 @@ extern void mmio_trace_rw(struct mmiotra
+ extern void mmio_trace_mapping(struct mmiotrace_map *map);
+ extern int mmio_trace_printk(const char *fmt, va_list args);
+ 
+-#endif /* MMIOTRACE_H */
++#endif /* _LINUX_MMIOTRACE_H */
+Index: linux-2.6-tip/include/linux/mmzone.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/mmzone.h
++++ linux-2.6-tip/include/linux/mmzone.h
+@@ -764,12 +764,6 @@ extern int numa_zonelist_order_handler(s
+ extern char numa_zonelist_order[];
+ #define NUMA_ZONELIST_ORDER_LEN 16	/* string buffer size */
+ 
+-#include <linux/topology.h>
+-/* Returns the number of the current Node. */
+-#ifndef numa_node_id
+-#define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
+-#endif
+-
+ #ifndef CONFIG_NEED_MULTIPLE_NODES
+ 
+ extern struct pglist_data contig_page_data;
+Index: linux-2.6-tip/include/linux/module.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/module.h
++++ linux-2.6-tip/include/linux/module.h
+@@ -78,18 +78,34 @@ void sort_extable(struct exception_table
+ 		  struct exception_table_entry *finish);
+ void sort_main_extable(void);
+ 
++/*
++ * Return a pointer to the current module, but only if within a module
++ */
+ #ifdef MODULE
+-#define MODULE_GENERIC_TABLE(gtype,name)			\
+-extern const struct gtype##_id __mod_##gtype##_table		\
+-  __attribute__ ((unused, alias(__stringify(name))))
+-
+ extern struct module __this_module;
+ #define THIS_MODULE (&__this_module)
+ #else  /* !MODULE */
+-#define MODULE_GENERIC_TABLE(gtype,name)
+ #define THIS_MODULE ((struct module *)0)
+ #endif
+ 
++/*
++ * Declare a module table
++ * - this suppresses "'name' defined but not used" warnings from the compiler
++ *   as the table may not actually be used by the code within the module
++ */
++#ifdef MODULE
++#define MODULE_GENERIC_TABLE(gtype,name)			\
++extern const struct gtype##_id __mod_##gtype##_table		\
++  __attribute__ ((unused, alias(__stringify(name))))
++#define MODULE_STATIC_GENERIC_TABLE(gtype,name)			\
++extern const struct gtype##_id __mod_##gtype##_table		\
++  __attribute__ ((unused, alias(__stringify(name))))
++#else
++#define MODULE_GENERIC_TABLE(gtype,name)
++#define MODULE_STATIC_GENERIC_TABLE(gtype,name) \
++static __typeof__((name)) name __attribute__((unused));
++#endif
++
+ /* Generic info of form tag = "info" */
+ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info)
+ 
+@@ -139,6 +155,8 @@ extern struct module __this_module;
+ 
+ #define MODULE_DEVICE_TABLE(type,name)		\
+   MODULE_GENERIC_TABLE(type##_device,name)
++#define MODULE_STATIC_DEVICE_TABLE(type,name)		\
++  MODULE_STATIC_GENERIC_TABLE(type##_device,name)
+ 
+ /* Version of form [<epoch>:]<version>[-<extra-version>].
+    Or for CVS/RCS ID version, everything but the number is stripped.
+@@ -329,6 +347,11 @@ struct module
+ 	unsigned int num_tracepoints;
+ #endif
+ 
++#ifdef CONFIG_TRACING
++	const char **trace_bprintk_fmt_start;
++	unsigned int num_trace_bprintk_fmt;
++#endif
++
+ #ifdef CONFIG_MODULE_UNLOAD
+ 	/* What modules depend on me? */
+ 	struct list_head modules_which_use_me;
+Index: linux-2.6-tip/include/linux/mroute6.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/mroute6.h
++++ linux-2.6-tip/include/linux/mroute6.h
+@@ -65,7 +65,7 @@ struct mif6ctl {
+ 	mifi_t	mif6c_mifi;		/* Index of MIF */
+ 	unsigned char mif6c_flags;	/* MIFF_ flags */
+ 	unsigned char vifc_threshold;	/* ttl limit */
+-	u_short	 mif6c_pifi;		/* the index of the physical IF */
++	__u16	 mif6c_pifi;		/* the index of the physical IF */
+ 	unsigned int vifc_rate_limit;	/* Rate limiter values (NI) */
+ };
+ 
+Index: linux-2.6-tip/include/linux/mutex.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/mutex.h
++++ linux-2.6-tip/include/linux/mutex.h
+@@ -12,11 +12,83 @@
+ 
+ #include <linux/list.h>
+ #include <linux/spinlock_types.h>
++#include <linux/rt_lock.h>
+ #include <linux/linkage.h>
+ #include <linux/lockdep.h>
+ 
+ #include <asm/atomic.h>
+ 
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
++		, .dep_map = { .name = #lockname }
++#else
++# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
++#endif
++
++#ifdef CONFIG_PREEMPT_RT
++
++#include <linux/rtmutex.h>
++
++struct mutex {
++	struct rt_mutex		lock;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++	struct lockdep_map	dep_map;
++#endif
++};
++
++
++#define __MUTEX_INITIALIZER(mutexname)					\
++	{								\
++		.lock = __RT_MUTEX_INITIALIZER(mutexname.lock)		\
++		__DEP_MAP_MUTEX_INITIALIZER(mutexname)			\
++	}
++
++#define DEFINE_MUTEX(mutexname)						\
++	struct mutex mutexname = __MUTEX_INITIALIZER(mutexname)
++
++extern void
++__mutex_init(struct mutex *lock, char *name, struct lock_class_key *key);
++
++extern void __lockfunc _mutex_lock(struct mutex *lock);
++extern int __lockfunc _mutex_lock_interruptible(struct mutex *lock);
++extern int __lockfunc _mutex_lock_killable(struct mutex *lock);
++extern void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass);
++extern int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass);
++extern int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass);
++extern int __lockfunc _mutex_trylock(struct mutex *lock);
++extern void __lockfunc _mutex_unlock(struct mutex *lock);
++
++#define mutex_is_locked(l)		rt_mutex_is_locked(&(l)->lock)
++#define mutex_lock(l)			_mutex_lock(l)
++#define mutex_lock_interruptible(l)	_mutex_lock_interruptible(l)
++#define mutex_lock_killable(l)		_mutex_lock_killable(l)
++#define mutex_trylock(l)		_mutex_trylock(l)
++#define mutex_unlock(l)			_mutex_unlock(l)
++#define mutex_destroy(l)		rt_mutex_destroy(&(l)->lock)
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++# define mutex_lock_nested(l, s)	_mutex_lock_nested(l, s)
++# define mutex_lock_interruptible_nested(l, s) \
++					_mutex_lock_interruptible_nested(l, s)
++# define mutex_lock_killable_nested(l, s) \
++					_mutex_lock_killable_nested(l, s)
++#else
++# define mutex_lock_nested(l, s)	_mutex_lock(l)
++# define mutex_lock_interruptible_nested(l, s) \
++					_mutex_lock_interruptible(l)
++# define mutex_lock_killable_nested(l, s) \
++					_mutex_lock_killable(l)
++#endif
++
++# define mutex_init(mutex)				\
++do {							\
++	static struct lock_class_key __key;		\
++							\
++	__mutex_init((mutex), #mutex, &__key);		\
++} while (0)
++
++#else /* PREEMPT_RT */
++
+ /*
+  * Simple, straightforward mutexes with strict semantics:
+  *
+@@ -50,8 +122,10 @@ struct mutex {
+ 	atomic_t		count;
+ 	spinlock_t		wait_lock;
+ 	struct list_head	wait_list;
+-#ifdef CONFIG_DEBUG_MUTEXES
++#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
+ 	struct thread_info	*owner;
++#endif
++#ifdef CONFIG_DEBUG_MUTEXES
+ 	const char 		*name;
+ 	void			*magic;
+ #endif
+@@ -68,7 +142,6 @@ struct mutex_waiter {
+ 	struct list_head	list;
+ 	struct task_struct	*task;
+ #ifdef CONFIG_DEBUG_MUTEXES
+-	struct mutex		*lock;
+ 	void			*magic;
+ #endif
+ };
+@@ -86,13 +159,6 @@ do {							\
+ # define mutex_destroy(mutex)				do { } while (0)
+ #endif
+ 
+-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+-# define __DEP_MAP_MUTEX_INITIALIZER(lockname) \
+-		, .dep_map = { .name = #lockname }
+-#else
+-# define __DEP_MAP_MUTEX_INITIALIZER(lockname)
+-#endif
+-
+ #define __MUTEX_INITIALIZER(lockname) \
+ 		{ .count = ATOMIC_INIT(1) \
+ 		, .wait_lock = __SPIN_LOCK_UNLOCKED(lockname.wait_lock) \
+@@ -150,4 +216,29 @@ extern int __must_check mutex_lock_killa
+ extern int mutex_trylock(struct mutex *lock);
+ extern void mutex_unlock(struct mutex *lock);
+ 
++#endif /* !PREEMPT_RT */
++
++/**
++ * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0
++ * @cnt: the atomic which we are to dec
++ * @lock: the mutex to return holding if we dec to 0
++ *
++ * return true and hold lock if we dec to 0, return false otherwise
++ */
++static inline int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock)
++{
++	/* dec if we can't possibly hit 0 */
++	if (atomic_add_unless(cnt, -1, 1))
++		return 0;
++	/* we might hit 0, so take the lock */
++	mutex_lock(lock);
++	if (!atomic_dec_and_test(cnt)) {
++		/* when we actually did the dec, we didn't hit 0 */
++		mutex_unlock(lock);
++		return 0;
++	}
++	/* we hit 0, and we hold the lock */
++	return 1;
++}
++
+ #endif
+Index: linux-2.6-tip/include/linux/netfilter/nf_conntrack_tcp.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/nf_conntrack_tcp.h
++++ linux-2.6-tip/include/linux/netfilter/nf_conntrack_tcp.h
+@@ -2,6 +2,8 @@
+ #define _NF_CONNTRACK_TCP_H
+ /* TCP tracking. */
+ 
++#include <linux/types.h>
++
+ /* This is exposed to userspace (ctnetlink) */
+ enum tcp_conntrack {
+ 	TCP_CONNTRACK_NONE,
+@@ -34,8 +36,8 @@ enum tcp_conntrack {
+ #define IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED	0x10
+ 
+ struct nf_ct_tcp_flags {
+-	u_int8_t flags;
+-	u_int8_t mask;
++	__u8 flags;
++	__u8 mask;
+ };
+ 
+ #ifdef __KERNEL__
+Index: linux-2.6-tip/include/linux/netfilter/nfnetlink.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/nfnetlink.h
++++ linux-2.6-tip/include/linux/netfilter/nfnetlink.h
+@@ -25,8 +25,8 @@ enum nfnetlink_groups {
+ /* General form of address family dependent message.
+  */
+ struct nfgenmsg {
+-	u_int8_t  nfgen_family;		/* AF_xxx */
+-	u_int8_t  version;		/* nfnetlink version */
++	__u8  nfgen_family;		/* AF_xxx */
++	__u8  version;		/* nfnetlink version */
+ 	__be16    res_id;		/* resource id */
+ };
+ 
+Index: linux-2.6-tip/include/linux/netfilter/nfnetlink_compat.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/nfnetlink_compat.h
++++ linux-2.6-tip/include/linux/netfilter/nfnetlink_compat.h
+@@ -1,5 +1,8 @@
+ #ifndef _NFNETLINK_COMPAT_H
+ #define _NFNETLINK_COMPAT_H
++
++#include <linux/types.h>
++
+ #ifndef __KERNEL__
+ /* Old nfnetlink macros for userspace */
+ 
+@@ -20,8 +23,8 @@
+ 
+ struct nfattr
+ {
+-	u_int16_t nfa_len;
+-	u_int16_t nfa_type;	/* we use 15 bits for the type, and the highest
++	__u16 nfa_len;
++	__u16 nfa_type;	/* we use 15 bits for the type, and the highest
+ 				 * bit to indicate whether the payload is nested */
+ };
+ 
+Index: linux-2.6-tip/include/linux/netfilter/nfnetlink_log.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/nfnetlink_log.h
++++ linux-2.6-tip/include/linux/netfilter/nfnetlink_log.h
+@@ -17,14 +17,14 @@ enum nfulnl_msg_types {
+ 
+ struct nfulnl_msg_packet_hdr {
+ 	__be16		hw_protocol;	/* hw protocol (network order) */
+-	u_int8_t	hook;		/* netfilter hook */
+-	u_int8_t	_pad;
++	__u8	hook;		/* netfilter hook */
++	__u8	_pad;
+ };
+ 
+ struct nfulnl_msg_packet_hw {
+ 	__be16		hw_addrlen;
+-	u_int16_t	_pad;
+-	u_int8_t	hw_addr[8];
++	__u16	_pad;
++	__u8	hw_addr[8];
+ };
+ 
+ struct nfulnl_msg_packet_timestamp {
+@@ -35,12 +35,12 @@ struct nfulnl_msg_packet_timestamp {
+ enum nfulnl_attr_type {
+ 	NFULA_UNSPEC,
+ 	NFULA_PACKET_HDR,
+-	NFULA_MARK,			/* u_int32_t nfmark */
++	NFULA_MARK,			/* __u32 nfmark */
+ 	NFULA_TIMESTAMP,		/* nfulnl_msg_packet_timestamp */
+-	NFULA_IFINDEX_INDEV,		/* u_int32_t ifindex */
+-	NFULA_IFINDEX_OUTDEV,		/* u_int32_t ifindex */
+-	NFULA_IFINDEX_PHYSINDEV,	/* u_int32_t ifindex */
+-	NFULA_IFINDEX_PHYSOUTDEV,	/* u_int32_t ifindex */
++	NFULA_IFINDEX_INDEV,		/* __u32 ifindex */
++	NFULA_IFINDEX_OUTDEV,		/* __u32 ifindex */
++	NFULA_IFINDEX_PHYSINDEV,	/* __u32 ifindex */
++	NFULA_IFINDEX_PHYSOUTDEV,	/* __u32 ifindex */
+ 	NFULA_HWADDR,			/* nfulnl_msg_packet_hw */
+ 	NFULA_PAYLOAD,			/* opaque data payload */
+ 	NFULA_PREFIX,			/* string prefix */
+@@ -65,23 +65,23 @@ enum nfulnl_msg_config_cmds {
+ };
+ 
+ struct nfulnl_msg_config_cmd {
+-	u_int8_t	command;	/* nfulnl_msg_config_cmds */
++	__u8	command;	/* nfulnl_msg_config_cmds */
+ } __attribute__ ((packed));
+ 
+ struct nfulnl_msg_config_mode {
+ 	__be32		copy_range;
+-	u_int8_t	copy_mode;
+-	u_int8_t	_pad;
++	__u8	copy_mode;
++	__u8	_pad;
+ } __attribute__ ((packed));
+ 
+ enum nfulnl_attr_config {
+ 	NFULA_CFG_UNSPEC,
+ 	NFULA_CFG_CMD,			/* nfulnl_msg_config_cmd */
+ 	NFULA_CFG_MODE,			/* nfulnl_msg_config_mode */
+-	NFULA_CFG_NLBUFSIZ,		/* u_int32_t buffer size */
+-	NFULA_CFG_TIMEOUT,		/* u_int32_t in 1/100 s */
+-	NFULA_CFG_QTHRESH,		/* u_int32_t */
+-	NFULA_CFG_FLAGS,		/* u_int16_t */
++	NFULA_CFG_NLBUFSIZ,		/* __u32 buffer size */
++	NFULA_CFG_TIMEOUT,		/* __u32 in 1/100 s */
++	NFULA_CFG_QTHRESH,		/* __u32 */
++	NFULA_CFG_FLAGS,		/* __u16 */
+ 	__NFULA_CFG_MAX
+ };
+ #define NFULA_CFG_MAX (__NFULA_CFG_MAX -1)
+Index: linux-2.6-tip/include/linux/netfilter/nfnetlink_queue.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/nfnetlink_queue.h
++++ linux-2.6-tip/include/linux/netfilter/nfnetlink_queue.h
+@@ -15,13 +15,13 @@ enum nfqnl_msg_types {
+ struct nfqnl_msg_packet_hdr {
+ 	__be32		packet_id;	/* unique ID of packet in queue */
+ 	__be16		hw_protocol;	/* hw protocol (network order) */
+-	u_int8_t	hook;		/* netfilter hook */
++	__u8	hook;		/* netfilter hook */
+ } __attribute__ ((packed));
+ 
+ struct nfqnl_msg_packet_hw {
+ 	__be16		hw_addrlen;
+-	u_int16_t	_pad;
+-	u_int8_t	hw_addr[8];
++	__u16	_pad;
++	__u8	hw_addr[8];
+ };
+ 
+ struct nfqnl_msg_packet_timestamp {
+@@ -33,12 +33,12 @@ enum nfqnl_attr_type {
+ 	NFQA_UNSPEC,
+ 	NFQA_PACKET_HDR,
+ 	NFQA_VERDICT_HDR,		/* nfqnl_msg_verdict_hrd */
+-	NFQA_MARK,			/* u_int32_t nfmark */
++	NFQA_MARK,			/* __u32 nfmark */
+ 	NFQA_TIMESTAMP,			/* nfqnl_msg_packet_timestamp */
+-	NFQA_IFINDEX_INDEV,		/* u_int32_t ifindex */
+-	NFQA_IFINDEX_OUTDEV,		/* u_int32_t ifindex */
+-	NFQA_IFINDEX_PHYSINDEV,		/* u_int32_t ifindex */
+-	NFQA_IFINDEX_PHYSOUTDEV,	/* u_int32_t ifindex */
++	NFQA_IFINDEX_INDEV,		/* __u32 ifindex */
++	NFQA_IFINDEX_OUTDEV,		/* __u32 ifindex */
++	NFQA_IFINDEX_PHYSINDEV,		/* __u32 ifindex */
++	NFQA_IFINDEX_PHYSOUTDEV,	/* __u32 ifindex */
+ 	NFQA_HWADDR,			/* nfqnl_msg_packet_hw */
+ 	NFQA_PAYLOAD,			/* opaque data payload */
+ 
+@@ -61,8 +61,8 @@ enum nfqnl_msg_config_cmds {
+ };
+ 
+ struct nfqnl_msg_config_cmd {
+-	u_int8_t	command;	/* nfqnl_msg_config_cmds */
+-	u_int8_t	_pad;
++	__u8	command;	/* nfqnl_msg_config_cmds */
++	__u8	_pad;
+ 	__be16		pf;		/* AF_xxx for PF_[UN]BIND */
+ };
+ 
+@@ -74,7 +74,7 @@ enum nfqnl_config_mode {
+ 
+ struct nfqnl_msg_config_params {
+ 	__be32		copy_range;
+-	u_int8_t	copy_mode;	/* enum nfqnl_config_mode */
++	__u8	copy_mode;	/* enum nfqnl_config_mode */
+ } __attribute__ ((packed));
+ 
+ 
+@@ -82,7 +82,7 @@ enum nfqnl_attr_config {
+ 	NFQA_CFG_UNSPEC,
+ 	NFQA_CFG_CMD,			/* nfqnl_msg_config_cmd */
+ 	NFQA_CFG_PARAMS,		/* nfqnl_msg_config_params */
+-	NFQA_CFG_QUEUE_MAXLEN,		/* u_int32_t */
++	NFQA_CFG_QUEUE_MAXLEN,		/* __u32 */
+ 	__NFQA_CFG_MAX
+ };
+ #define NFQA_CFG_MAX (__NFQA_CFG_MAX-1)
+Index: linux-2.6-tip/include/linux/netfilter/x_tables.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/x_tables.h
++++ linux-2.6-tip/include/linux/netfilter/x_tables.h
+@@ -1,6 +1,8 @@
+ #ifndef _X_TABLES_H
+ #define _X_TABLES_H
+ 
++#include <linux/types.h>
++
+ #define XT_FUNCTION_MAXNAMELEN 30
+ #define XT_TABLE_MAXNAMELEN 32
+ 
+@@ -8,22 +10,22 @@ struct xt_entry_match
+ {
+ 	union {
+ 		struct {
+-			u_int16_t match_size;
++			__u16 match_size;
+ 
+ 			/* Used by userspace */
+ 			char name[XT_FUNCTION_MAXNAMELEN-1];
+ 
+-			u_int8_t revision;
++			__u8 revision;
+ 		} user;
+ 		struct {
+-			u_int16_t match_size;
++			__u16 match_size;
+ 
+ 			/* Used inside the kernel */
+ 			struct xt_match *match;
+ 		} kernel;
+ 
+ 		/* Total length */
+-		u_int16_t match_size;
++		__u16 match_size;
+ 	} u;
+ 
+ 	unsigned char data[0];
+@@ -33,22 +35,22 @@ struct xt_entry_target
+ {
+ 	union {
+ 		struct {
+-			u_int16_t target_size;
++			__u16 target_size;
+ 
+ 			/* Used by userspace */
+ 			char name[XT_FUNCTION_MAXNAMELEN-1];
+ 
+-			u_int8_t revision;
++			__u8 revision;
+ 		} user;
+ 		struct {
+-			u_int16_t target_size;
++			__u16 target_size;
+ 
+ 			/* Used inside the kernel */
+ 			struct xt_target *target;
+ 		} kernel;
+ 
+ 		/* Total length */
+-		u_int16_t target_size;
++		__u16 target_size;
+ 	} u;
+ 
+ 	unsigned char data[0];
+@@ -74,7 +76,7 @@ struct xt_get_revision
+ {
+ 	char name[XT_FUNCTION_MAXNAMELEN-1];
+ 
+-	u_int8_t revision;
++	__u8 revision;
+ };
+ 
+ /* CONTINUE verdict for targets */
+@@ -90,10 +92,10 @@ struct xt_get_revision
+  */
+ struct _xt_align
+ {
+-	u_int8_t u8;
+-	u_int16_t u16;
+-	u_int32_t u32;
+-	u_int64_t u64;
++	__u8 u8;
++	__u16 u16;
++	__u32 u32;
++	__u64 u64;
+ };
+ 
+ #define XT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) 	\
+@@ -109,7 +111,7 @@ struct _xt_align
+ 
+ struct xt_counters
+ {
+-	u_int64_t pcnt, bcnt;			/* Packet and byte counters */
++	__u64 pcnt, bcnt;			/* Packet and byte counters */
+ };
+ 
+ /* The argument to IPT_SO_ADD_COUNTERS. */
+Index: linux-2.6-tip/include/linux/netfilter/xt_CLASSIFY.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_CLASSIFY.h
++++ linux-2.6-tip/include/linux/netfilter/xt_CLASSIFY.h
+@@ -1,8 +1,10 @@
+ #ifndef _XT_CLASSIFY_H
+ #define _XT_CLASSIFY_H
+ 
++#include <linux/types.h>
++
+ struct xt_classify_target_info {
+-	u_int32_t priority;
++	__u32 priority;
+ };
+ 
+ #endif /*_XT_CLASSIFY_H */
+Index: linux-2.6-tip/include/linux/netfilter/xt_CONNMARK.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_CONNMARK.h
++++ linux-2.6-tip/include/linux/netfilter/xt_CONNMARK.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_CONNMARK_H_target
+ #define _XT_CONNMARK_H_target
+ 
++#include <linux/types.h>
++
+ /* Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+  * by Henrik Nordstrom <hno@marasystems.com>
+  *
+@@ -19,12 +21,12 @@ enum {
+ struct xt_connmark_target_info {
+ 	unsigned long mark;
+ 	unsigned long mask;
+-	u_int8_t mode;
++	__u8 mode;
+ };
+ 
+ struct xt_connmark_tginfo1 {
+-	u_int32_t ctmark, ctmask, nfmask;
+-	u_int8_t mode;
++	__u32 ctmark, ctmask, nfmask;
++	__u8 mode;
+ };
+ 
+ #endif /*_XT_CONNMARK_H_target*/
+Index: linux-2.6-tip/include/linux/netfilter/xt_CONNSECMARK.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_CONNSECMARK.h
++++ linux-2.6-tip/include/linux/netfilter/xt_CONNSECMARK.h
+@@ -1,13 +1,15 @@
+ #ifndef _XT_CONNSECMARK_H_target
+ #define _XT_CONNSECMARK_H_target
+ 
++#include <linux/types.h>
++
+ enum {
+ 	CONNSECMARK_SAVE = 1,
+ 	CONNSECMARK_RESTORE,
+ };
+ 
+ struct xt_connsecmark_target_info {
+-	u_int8_t mode;
++	__u8 mode;
+ };
+ 
+ #endif /*_XT_CONNSECMARK_H_target */
+Index: linux-2.6-tip/include/linux/netfilter/xt_DSCP.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_DSCP.h
++++ linux-2.6-tip/include/linux/netfilter/xt_DSCP.h
+@@ -11,15 +11,16 @@
+ #ifndef _XT_DSCP_TARGET_H
+ #define _XT_DSCP_TARGET_H
+ #include <linux/netfilter/xt_dscp.h>
++#include <linux/types.h>
+ 
+ /* target info */
+ struct xt_DSCP_info {
+-	u_int8_t dscp;
++	__u8 dscp;
+ };
+ 
+ struct xt_tos_target_info {
+-	u_int8_t tos_value;
+-	u_int8_t tos_mask;
++	__u8 tos_value;
++	__u8 tos_mask;
+ };
+ 
+ #endif /* _XT_DSCP_TARGET_H */
+Index: linux-2.6-tip/include/linux/netfilter/xt_MARK.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_MARK.h
++++ linux-2.6-tip/include/linux/netfilter/xt_MARK.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_MARK_H_target
+ #define _XT_MARK_H_target
+ 
++#include <linux/types.h>
++
+ /* Version 0 */
+ struct xt_mark_target_info {
+ 	unsigned long mark;
+@@ -15,11 +17,11 @@ enum {
+ 
+ struct xt_mark_target_info_v1 {
+ 	unsigned long mark;
+-	u_int8_t mode;
++	__u8 mode;
+ };
+ 
+ struct xt_mark_tginfo2 {
+-	u_int32_t mark, mask;
++	__u32 mark, mask;
+ };
+ 
+ #endif /*_XT_MARK_H_target */
+Index: linux-2.6-tip/include/linux/netfilter/xt_NFLOG.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_NFLOG.h
++++ linux-2.6-tip/include/linux/netfilter/xt_NFLOG.h
+@@ -1,17 +1,19 @@
+ #ifndef _XT_NFLOG_TARGET
+ #define _XT_NFLOG_TARGET
+ 
++#include <linux/types.h>
++
+ #define XT_NFLOG_DEFAULT_GROUP		0x1
+ #define XT_NFLOG_DEFAULT_THRESHOLD	0
+ 
+ #define XT_NFLOG_MASK			0x0
+ 
+ struct xt_nflog_info {
+-	u_int32_t	len;
+-	u_int16_t	group;
+-	u_int16_t	threshold;
+-	u_int16_t	flags;
+-	u_int16_t	pad;
++	__u32	len;
++	__u16	group;
++	__u16	threshold;
++	__u16	flags;
++	__u16	pad;
+ 	char		prefix[64];
+ };
+ 
+Index: linux-2.6-tip/include/linux/netfilter/xt_NFQUEUE.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_NFQUEUE.h
++++ linux-2.6-tip/include/linux/netfilter/xt_NFQUEUE.h
+@@ -8,9 +8,11 @@
+ #ifndef _XT_NFQ_TARGET_H
+ #define _XT_NFQ_TARGET_H
+ 
++#include <linux/types.h>
++
+ /* target info */
+ struct xt_NFQ_info {
+-	u_int16_t queuenum;
++	__u16 queuenum;
+ };
+ 
+ #endif /* _XT_NFQ_TARGET_H */
+Index: linux-2.6-tip/include/linux/netfilter/xt_RATEEST.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_RATEEST.h
++++ linux-2.6-tip/include/linux/netfilter/xt_RATEEST.h
+@@ -1,10 +1,12 @@
+ #ifndef _XT_RATEEST_TARGET_H
+ #define _XT_RATEEST_TARGET_H
+ 
++#include <linux/types.h>
++
+ struct xt_rateest_target_info {
+ 	char			name[IFNAMSIZ];
+-	int8_t			interval;
+-	u_int8_t		ewma_log;
++	__s8			interval;
++	__u8		ewma_log;
+ 
+ 	/* Used internally by the kernel */
+ 	struct xt_rateest	*est __attribute__((aligned(8)));
+Index: linux-2.6-tip/include/linux/netfilter/xt_SECMARK.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_SECMARK.h
++++ linux-2.6-tip/include/linux/netfilter/xt_SECMARK.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_SECMARK_H_target
+ #define _XT_SECMARK_H_target
+ 
++#include <linux/types.h>
++
+ /*
+  * This is intended for use by various security subsystems (but not
+  * at the same time).
+@@ -12,12 +14,12 @@
+ #define SECMARK_SELCTX_MAX	256
+ 
+ struct xt_secmark_target_selinux_info {
+-	u_int32_t selsid;
++	__u32 selsid;
+ 	char selctx[SECMARK_SELCTX_MAX];
+ };
+ 
+ struct xt_secmark_target_info {
+-	u_int8_t mode;
++	__u8 mode;
+ 	union {
+ 		struct xt_secmark_target_selinux_info sel;
+ 	} u;
+Index: linux-2.6-tip/include/linux/netfilter/xt_TCPMSS.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_TCPMSS.h
++++ linux-2.6-tip/include/linux/netfilter/xt_TCPMSS.h
+@@ -1,8 +1,10 @@
+ #ifndef _XT_TCPMSS_H
+ #define _XT_TCPMSS_H
+ 
++#include <linux/types.h>
++
+ struct xt_tcpmss_info {
+-	u_int16_t mss;
++	__u16 mss;
+ };
+ 
+ #define XT_TCPMSS_CLAMP_PMTU 0xffff
+Index: linux-2.6-tip/include/linux/netfilter/xt_connbytes.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_connbytes.h
++++ linux-2.6-tip/include/linux/netfilter/xt_connbytes.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_CONNBYTES_H
+ #define _XT_CONNBYTES_H
+ 
++#include <linux/types.h>
++
+ enum xt_connbytes_what {
+ 	XT_CONNBYTES_PKTS,
+ 	XT_CONNBYTES_BYTES,
+@@ -19,7 +21,7 @@ struct xt_connbytes_info
+ 		aligned_u64 from;	/* count to be matched */
+ 		aligned_u64 to;		/* count to be matched */
+ 	} count;
+-	u_int8_t what;		/* ipt_connbytes_what */
+-	u_int8_t direction;	/* ipt_connbytes_direction */
++	__u8 what;		/* ipt_connbytes_what */
++	__u8 direction;	/* ipt_connbytes_direction */
+ };
+ #endif
+Index: linux-2.6-tip/include/linux/netfilter/xt_connmark.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_connmark.h
++++ linux-2.6-tip/include/linux/netfilter/xt_connmark.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_CONNMARK_H
+ #define _XT_CONNMARK_H
+ 
++#include <linux/types.h>
++
+ /* Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
+  * by Henrik Nordstrom <hno@marasystems.com>
+  *
+@@ -12,12 +14,12 @@
+ 
+ struct xt_connmark_info {
+ 	unsigned long mark, mask;
+-	u_int8_t invert;
++	__u8 invert;
+ };
+ 
+ struct xt_connmark_mtinfo1 {
+-	u_int32_t mark, mask;
+-	u_int8_t invert;
++	__u32 mark, mask;
++	__u8 invert;
+ };
+ 
+ #endif /*_XT_CONNMARK_H*/
+Index: linux-2.6-tip/include/linux/netfilter/xt_conntrack.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_conntrack.h
++++ linux-2.6-tip/include/linux/netfilter/xt_conntrack.h
+@@ -63,9 +63,9 @@ struct xt_conntrack_info
+ 	unsigned long expires_min, expires_max;
+ 
+ 	/* Flags word */
+-	u_int8_t flags;
++	__u8 flags;
+ 	/* Inverse flags */
+-	u_int8_t invflags;
++	__u8 invflags;
+ };
+ 
+ struct xt_conntrack_mtinfo1 {
+@@ -73,12 +73,12 @@ struct xt_conntrack_mtinfo1 {
+ 	union nf_inet_addr origdst_addr, origdst_mask;
+ 	union nf_inet_addr replsrc_addr, replsrc_mask;
+ 	union nf_inet_addr repldst_addr, repldst_mask;
+-	u_int32_t expires_min, expires_max;
+-	u_int16_t l4proto;
++	__u32 expires_min, expires_max;
++	__u16 l4proto;
+ 	__be16 origsrc_port, origdst_port;
+ 	__be16 replsrc_port, repldst_port;
+-	u_int16_t match_flags, invert_flags;
+-	u_int8_t state_mask, status_mask;
++	__u16 match_flags, invert_flags;
++	__u8 state_mask, status_mask;
+ };
+ 
+ #endif /*_XT_CONNTRACK_H*/
+Index: linux-2.6-tip/include/linux/netfilter/xt_dccp.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_dccp.h
++++ linux-2.6-tip/include/linux/netfilter/xt_dccp.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_DCCP_H_
+ #define _XT_DCCP_H_
+ 
++#include <linux/types.h>
++
+ #define XT_DCCP_SRC_PORTS	        0x01
+ #define XT_DCCP_DEST_PORTS	        0x02
+ #define XT_DCCP_TYPE			0x04
+@@ -9,14 +11,14 @@
+ #define XT_DCCP_VALID_FLAGS		0x0f
+ 
+ struct xt_dccp_info {
+-	u_int16_t dpts[2];  /* Min, Max */
+-	u_int16_t spts[2];  /* Min, Max */
++	__u16 dpts[2];  /* Min, Max */
++	__u16 spts[2];  /* Min, Max */
+ 
+-	u_int16_t flags;
+-	u_int16_t invflags;
++	__u16 flags;
++	__u16 invflags;
+ 
+-	u_int16_t typemask;
+-	u_int8_t option;
++	__u16 typemask;
++	__u8 option;
+ };
+ 
+ #endif /* _XT_DCCP_H_ */
+Index: linux-2.6-tip/include/linux/netfilter/xt_dscp.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_dscp.h
++++ linux-2.6-tip/include/linux/netfilter/xt_dscp.h
+@@ -10,20 +10,22 @@
+ #ifndef _XT_DSCP_H
+ #define _XT_DSCP_H
+ 
++#include <linux/types.h>
++
+ #define XT_DSCP_MASK	0xfc	/* 11111100 */
+ #define XT_DSCP_SHIFT	2
+ #define XT_DSCP_MAX	0x3f	/* 00111111 */
+ 
+ /* match info */
+ struct xt_dscp_info {
+-	u_int8_t dscp;
+-	u_int8_t invert;
++	__u8 dscp;
++	__u8 invert;
+ };
+ 
+ struct xt_tos_match_info {
+-	u_int8_t tos_mask;
+-	u_int8_t tos_value;
+-	u_int8_t invert;
++	__u8 tos_mask;
++	__u8 tos_value;
++	__u8 invert;
+ };
+ 
+ #endif /* _XT_DSCP_H */
+Index: linux-2.6-tip/include/linux/netfilter/xt_esp.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_esp.h
++++ linux-2.6-tip/include/linux/netfilter/xt_esp.h
+@@ -1,10 +1,12 @@
+ #ifndef _XT_ESP_H
+ #define _XT_ESP_H
+ 
++#include <linux/types.h>
++
+ struct xt_esp
+ {
+-	u_int32_t spis[2];	/* Security Parameter Index */
+-	u_int8_t  invflags;	/* Inverse flags */
++	__u32 spis[2];	/* Security Parameter Index */
++	__u8  invflags;	/* Inverse flags */
+ };
+ 
+ /* Values for "invflags" field in struct xt_esp. */
+Index: linux-2.6-tip/include/linux/netfilter/xt_hashlimit.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_hashlimit.h
++++ linux-2.6-tip/include/linux/netfilter/xt_hashlimit.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_HASHLIMIT_H
+ #define _XT_HASHLIMIT_H
+ 
++#include <linux/types.h>
++
+ /* timings are in milliseconds. */
+ #define XT_HASHLIMIT_SCALE 10000
+ /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
+@@ -18,15 +20,15 @@ enum {
+ };
+ 
+ struct hashlimit_cfg {
+-	u_int32_t mode;	  /* bitmask of XT_HASHLIMIT_HASH_* */
+-	u_int32_t avg;    /* Average secs between packets * scale */
+-	u_int32_t burst;  /* Period multiplier for upper limit. */
++	__u32 mode;	  /* bitmask of XT_HASHLIMIT_HASH_* */
++	__u32 avg;    /* Average secs between packets * scale */
++	__u32 burst;  /* Period multiplier for upper limit. */
+ 
+ 	/* user specified */
+-	u_int32_t size;		/* how many buckets */
+-	u_int32_t max;		/* max number of entries */
+-	u_int32_t gc_interval;	/* gc interval */
+-	u_int32_t expire;	/* when do entries expire? */
++	__u32 size;		/* how many buckets */
++	__u32 max;		/* max number of entries */
++	__u32 gc_interval;	/* gc interval */
++	__u32 expire;	/* when do entries expire? */
+ };
+ 
+ struct xt_hashlimit_info {
+@@ -42,17 +44,17 @@ struct xt_hashlimit_info {
+ };
+ 
+ struct hashlimit_cfg1 {
+-	u_int32_t mode;	  /* bitmask of XT_HASHLIMIT_HASH_* */
+-	u_int32_t avg;    /* Average secs between packets * scale */
+-	u_int32_t burst;  /* Period multiplier for upper limit. */
++	__u32 mode;	  /* bitmask of XT_HASHLIMIT_HASH_* */
++	__u32 avg;    /* Average secs between packets * scale */
++	__u32 burst;  /* Period multiplier for upper limit. */
+ 
+ 	/* user specified */
+-	u_int32_t size;		/* how many buckets */
+-	u_int32_t max;		/* max number of entries */
+-	u_int32_t gc_interval;	/* gc interval */
+-	u_int32_t expire;	/* when do entries expire? */
++	__u32 size;		/* how many buckets */
++	__u32 max;		/* max number of entries */
++	__u32 gc_interval;	/* gc interval */
++	__u32 expire;	/* when do entries expire? */
+ 
+-	u_int8_t srcmask, dstmask;
++	__u8 srcmask, dstmask;
+ };
+ 
+ struct xt_hashlimit_mtinfo1 {
+Index: linux-2.6-tip/include/linux/netfilter/xt_iprange.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_iprange.h
++++ linux-2.6-tip/include/linux/netfilter/xt_iprange.h
+@@ -1,6 +1,8 @@
+ #ifndef _LINUX_NETFILTER_XT_IPRANGE_H
+ #define _LINUX_NETFILTER_XT_IPRANGE_H 1
+ 
++#include <linux/types.h>
++
+ enum {
+ 	IPRANGE_SRC     = 1 << 0,	/* match source IP address */
+ 	IPRANGE_DST     = 1 << 1,	/* match destination IP address */
+@@ -11,7 +13,7 @@ enum {
+ struct xt_iprange_mtinfo {
+ 	union nf_inet_addr src_min, src_max;
+ 	union nf_inet_addr dst_min, dst_max;
+-	u_int8_t flags;
++	__u8 flags;
+ };
+ 
+ #endif /* _LINUX_NETFILTER_XT_IPRANGE_H */
+Index: linux-2.6-tip/include/linux/netfilter/xt_length.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_length.h
++++ linux-2.6-tip/include/linux/netfilter/xt_length.h
+@@ -1,9 +1,11 @@
+ #ifndef _XT_LENGTH_H
+ #define _XT_LENGTH_H
+ 
++#include <linux/types.h>
++
+ struct xt_length_info {
+-    u_int16_t	min, max;
+-    u_int8_t	invert;
++    __u16	min, max;
++    __u8	invert;
+ };
+ 
+ #endif /*_XT_LENGTH_H*/
+Index: linux-2.6-tip/include/linux/netfilter/xt_limit.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_limit.h
++++ linux-2.6-tip/include/linux/netfilter/xt_limit.h
+@@ -1,19 +1,21 @@
+ #ifndef _XT_RATE_H
+ #define _XT_RATE_H
+ 
++#include <linux/types.h>
++
+ /* timings are in milliseconds. */
+ #define XT_LIMIT_SCALE 10000
+ 
+ /* 1/10,000 sec period => max of 10,000/sec.  Min rate is then 429490
+    seconds, or one every 59 hours. */
+ struct xt_rateinfo {
+-	u_int32_t avg;    /* Average secs between packets * scale */
+-	u_int32_t burst;  /* Period multiplier for upper limit. */
++	__u32 avg;    /* Average secs between packets * scale */
++	__u32 burst;  /* Period multiplier for upper limit. */
+ 
+ 	/* Used internally by the kernel */
+ 	unsigned long prev;
+-	u_int32_t credit;
+-	u_int32_t credit_cap, cost;
++	__u32 credit;
++	__u32 credit_cap, cost;
+ 
+ 	/* Ugly, ugly fucker. */
+ 	struct xt_rateinfo *master;
+Index: linux-2.6-tip/include/linux/netfilter/xt_mark.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_mark.h
++++ linux-2.6-tip/include/linux/netfilter/xt_mark.h
+@@ -1,14 +1,16 @@
+ #ifndef _XT_MARK_H
+ #define _XT_MARK_H
+ 
++#include <linux/types.h>
++
+ struct xt_mark_info {
+     unsigned long mark, mask;
+-    u_int8_t invert;
++    __u8 invert;
+ };
+ 
+ struct xt_mark_mtinfo1 {
+-	u_int32_t mark, mask;
+-	u_int8_t invert;
++	__u32 mark, mask;
++	__u8 invert;
+ };
+ 
+ #endif /*_XT_MARK_H*/
+Index: linux-2.6-tip/include/linux/netfilter/xt_multiport.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_multiport.h
++++ linux-2.6-tip/include/linux/netfilter/xt_multiport.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_MULTIPORT_H
+ #define _XT_MULTIPORT_H
+ 
++#include <linux/types.h>
++
+ enum xt_multiport_flags
+ {
+ 	XT_MULTIPORT_SOURCE,
+@@ -13,18 +15,18 @@ enum xt_multiport_flags
+ /* Must fit inside union xt_matchinfo: 16 bytes */
+ struct xt_multiport
+ {
+-	u_int8_t flags;				/* Type of comparison */
+-	u_int8_t count;				/* Number of ports */
+-	u_int16_t ports[XT_MULTI_PORTS];	/* Ports */
++	__u8 flags;				/* Type of comparison */
++	__u8 count;				/* Number of ports */
++	__u16 ports[XT_MULTI_PORTS];	/* Ports */
+ };
+ 
+ struct xt_multiport_v1
+ {
+-	u_int8_t flags;				/* Type of comparison */
+-	u_int8_t count;				/* Number of ports */
+-	u_int16_t ports[XT_MULTI_PORTS];	/* Ports */
+-	u_int8_t pflags[XT_MULTI_PORTS];	/* Port flags */
+-	u_int8_t invert;			/* Invert flag */
++	__u8 flags;				/* Type of comparison */
++	__u8 count;				/* Number of ports */
++	__u16 ports[XT_MULTI_PORTS];	/* Ports */
++	__u8 pflags[XT_MULTI_PORTS];	/* Port flags */
++	__u8 invert;			/* Invert flag */
+ };
+ 
+ #endif /*_XT_MULTIPORT_H*/
+Index: linux-2.6-tip/include/linux/netfilter/xt_owner.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_owner.h
++++ linux-2.6-tip/include/linux/netfilter/xt_owner.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_OWNER_MATCH_H
+ #define _XT_OWNER_MATCH_H
+ 
++#include <linux/types.h>
++
+ enum {
+ 	XT_OWNER_UID    = 1 << 0,
+ 	XT_OWNER_GID    = 1 << 1,
+@@ -8,9 +10,9 @@ enum {
+ };
+ 
+ struct xt_owner_match_info {
+-	u_int32_t uid_min, uid_max;
+-	u_int32_t gid_min, gid_max;
+-	u_int8_t match, invert;
++	__u32 uid_min, uid_max;
++	__u32 gid_min, gid_max;
++	__u8 match, invert;
+ };
+ 
+ #endif /* _XT_OWNER_MATCH_H */
+Index: linux-2.6-tip/include/linux/netfilter/xt_physdev.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_physdev.h
++++ linux-2.6-tip/include/linux/netfilter/xt_physdev.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_PHYSDEV_H
+ #define _XT_PHYSDEV_H
+ 
++#include <linux/types.h>
++
+ #ifdef __KERNEL__
+ #include <linux/if.h>
+ #endif
+@@ -17,8 +19,8 @@ struct xt_physdev_info {
+ 	char in_mask[IFNAMSIZ];
+ 	char physoutdev[IFNAMSIZ];
+ 	char out_mask[IFNAMSIZ];
+-	u_int8_t invert;
+-	u_int8_t bitmask;
++	__u8 invert;
++	__u8 bitmask;
+ };
+ 
+ #endif /*_XT_PHYSDEV_H*/
+Index: linux-2.6-tip/include/linux/netfilter/xt_policy.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_policy.h
++++ linux-2.6-tip/include/linux/netfilter/xt_policy.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_POLICY_H
+ #define _XT_POLICY_H
+ 
++#include <linux/types.h>
++
+ #define XT_POLICY_MAX_ELEM	4
+ 
+ enum xt_policy_flags
+@@ -19,7 +21,7 @@ enum xt_policy_modes
+ 
+ struct xt_policy_spec
+ {
+-	u_int8_t	saddr:1,
++	__u8	saddr:1,
+ 			daddr:1,
+ 			proto:1,
+ 			mode:1,
+@@ -55,9 +57,9 @@ struct xt_policy_elem
+ #endif
+ 	};
+ 	__be32			spi;
+-	u_int32_t		reqid;
+-	u_int8_t		proto;
+-	u_int8_t		mode;
++	__u32		reqid;
++	__u8		proto;
++	__u8		mode;
+ 
+ 	struct xt_policy_spec	match;
+ 	struct xt_policy_spec	invert;
+@@ -66,8 +68,8 @@ struct xt_policy_elem
+ struct xt_policy_info
+ {
+ 	struct xt_policy_elem pol[XT_POLICY_MAX_ELEM];
+-	u_int16_t flags;
+-	u_int16_t len;
++	__u16 flags;
++	__u16 len;
+ };
+ 
+ #endif /* _XT_POLICY_H */
+Index: linux-2.6-tip/include/linux/netfilter/xt_rateest.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_rateest.h
++++ linux-2.6-tip/include/linux/netfilter/xt_rateest.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_RATEEST_MATCH_H
+ #define _XT_RATEEST_MATCH_H
+ 
++#include <linux/types.h>
++
+ enum xt_rateest_match_flags {
+ 	XT_RATEEST_MATCH_INVERT	= 1<<0,
+ 	XT_RATEEST_MATCH_ABS	= 1<<1,
+@@ -20,12 +22,12 @@ enum xt_rateest_match_mode {
+ struct xt_rateest_match_info {
+ 	char			name1[IFNAMSIZ];
+ 	char			name2[IFNAMSIZ];
+-	u_int16_t		flags;
+-	u_int16_t		mode;
+-	u_int32_t		bps1;
+-	u_int32_t		pps1;
+-	u_int32_t		bps2;
+-	u_int32_t		pps2;
++	__u16		flags;
++	__u16		mode;
++	__u32		bps1;
++	__u32		pps1;
++	__u32		bps2;
++	__u32		pps2;
+ 
+ 	/* Used internally by the kernel */
+ 	struct xt_rateest	*est1 __attribute__((aligned(8)));
+Index: linux-2.6-tip/include/linux/netfilter/xt_realm.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_realm.h
++++ linux-2.6-tip/include/linux/netfilter/xt_realm.h
+@@ -1,10 +1,12 @@
+ #ifndef _XT_REALM_H
+ #define _XT_REALM_H
+ 
++#include <linux/types.h>
++
+ struct xt_realm_info {
+-	u_int32_t id;
+-	u_int32_t mask;
+-	u_int8_t invert;
++	__u32 id;
++	__u32 mask;
++	__u8 invert;
+ };
+ 
+ #endif /* _XT_REALM_H */
+Index: linux-2.6-tip/include/linux/netfilter/xt_recent.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_recent.h
++++ linux-2.6-tip/include/linux/netfilter/xt_recent.h
+@@ -1,6 +1,8 @@
+ #ifndef _LINUX_NETFILTER_XT_RECENT_H
+ #define _LINUX_NETFILTER_XT_RECENT_H 1
+ 
++#include <linux/types.h>
++
+ enum {
+ 	XT_RECENT_CHECK    = 1 << 0,
+ 	XT_RECENT_SET      = 1 << 1,
+@@ -15,12 +17,12 @@ enum {
+ };
+ 
+ struct xt_recent_mtinfo {
+-	u_int32_t seconds;
+-	u_int32_t hit_count;
+-	u_int8_t check_set;
+-	u_int8_t invert;
++	__u32 seconds;
++	__u32 hit_count;
++	__u8 check_set;
++	__u8 invert;
+ 	char name[XT_RECENT_NAME_LEN];
+-	u_int8_t side;
++	__u8 side;
+ };
+ 
+ #endif /* _LINUX_NETFILTER_XT_RECENT_H */
+Index: linux-2.6-tip/include/linux/netfilter/xt_sctp.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_sctp.h
++++ linux-2.6-tip/include/linux/netfilter/xt_sctp.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_SCTP_H_
+ #define _XT_SCTP_H_
+ 
++#include <linux/types.h>
++
+ #define XT_SCTP_SRC_PORTS	        0x01
+ #define XT_SCTP_DEST_PORTS	        0x02
+ #define XT_SCTP_CHUNK_TYPES		0x04
+@@ -8,49 +10,49 @@
+ #define XT_SCTP_VALID_FLAGS		0x07
+ 
+ struct xt_sctp_flag_info {
+-	u_int8_t chunktype;
+-	u_int8_t flag;
+-	u_int8_t flag_mask;
++	__u8 chunktype;
++	__u8 flag;
++	__u8 flag_mask;
+ };
+ 
+ #define XT_NUM_SCTP_FLAGS	4
+ 
+ struct xt_sctp_info {
+-	u_int16_t dpts[2];  /* Min, Max */
+-	u_int16_t spts[2];  /* Min, Max */
++	__u16 dpts[2];  /* Min, Max */
++	__u16 spts[2];  /* Min, Max */
+ 
+-	u_int32_t chunkmap[256 / sizeof (u_int32_t)];  /* Bit mask of chunks to be matched according to RFC 2960 */
++	__u32 chunkmap[256 / sizeof (__u32)];  /* Bit mask of chunks to be matched according to RFC 2960 */
+ 
+ #define SCTP_CHUNK_MATCH_ANY   0x01  /* Match if any of the chunk types are present */
+ #define SCTP_CHUNK_MATCH_ALL   0x02  /* Match if all of the chunk types are present */
+ #define SCTP_CHUNK_MATCH_ONLY  0x04  /* Match if these are the only chunk types present */
+ 
+-	u_int32_t chunk_match_type;
++	__u32 chunk_match_type;
+ 	struct xt_sctp_flag_info flag_info[XT_NUM_SCTP_FLAGS];
+ 	int flag_count;
+ 
+-	u_int32_t flags;
+-	u_int32_t invflags;
++	__u32 flags;
++	__u32 invflags;
+ };
+ 
+ #define bytes(type) (sizeof(type) * 8)
+ 
+ #define SCTP_CHUNKMAP_SET(chunkmap, type) 		\
+ 	do { 						\
+-		(chunkmap)[type / bytes(u_int32_t)] |= 	\
+-			1 << (type % bytes(u_int32_t));	\
++		(chunkmap)[type / bytes(__u32)] |= 	\
++			1 << (type % bytes(__u32));	\
+ 	} while (0)
+ 
+ #define SCTP_CHUNKMAP_CLEAR(chunkmap, type)		 	\
+ 	do {							\
+-		(chunkmap)[type / bytes(u_int32_t)] &= 		\
+-			~(1 << (type % bytes(u_int32_t)));	\
++		(chunkmap)[type / bytes(__u32)] &= 		\
++			~(1 << (type % bytes(__u32)));	\
+ 	} while (0)
+ 
+ #define SCTP_CHUNKMAP_IS_SET(chunkmap, type) 			\
+ ({								\
+-	((chunkmap)[type / bytes (u_int32_t)] & 		\
+-		(1 << (type % bytes (u_int32_t)))) ? 1: 0;	\
++	((chunkmap)[type / bytes (__u32)] & 		\
++		(1 << (type % bytes (__u32)))) ? 1: 0;	\
+ })
+ 
+ #define SCTP_CHUNKMAP_RESET(chunkmap) \
+@@ -65,7 +67,7 @@ struct xt_sctp_info {
+ #define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \
+ 	__sctp_chunkmap_is_clear((chunkmap), ARRAY_SIZE(chunkmap))
+ static inline bool
+-__sctp_chunkmap_is_clear(const u_int32_t *chunkmap, unsigned int n)
++__sctp_chunkmap_is_clear(const __u32 *chunkmap, unsigned int n)
+ {
+ 	unsigned int i;
+ 	for (i = 0; i < n; ++i)
+@@ -77,7 +79,7 @@ __sctp_chunkmap_is_clear(const u_int32_t
+ #define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \
+ 	__sctp_chunkmap_is_all_set((chunkmap), ARRAY_SIZE(chunkmap))
+ static inline bool
+-__sctp_chunkmap_is_all_set(const u_int32_t *chunkmap, unsigned int n)
++__sctp_chunkmap_is_all_set(const __u32 *chunkmap, unsigned int n)
+ {
+ 	unsigned int i;
+ 	for (i = 0; i < n; ++i)
+Index: linux-2.6-tip/include/linux/netfilter/xt_statistic.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_statistic.h
++++ linux-2.6-tip/include/linux/netfilter/xt_statistic.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_STATISTIC_H
+ #define _XT_STATISTIC_H
+ 
++#include <linux/types.h>
++
+ enum xt_statistic_mode {
+ 	XT_STATISTIC_MODE_RANDOM,
+ 	XT_STATISTIC_MODE_NTH,
+@@ -14,17 +16,17 @@ enum xt_statistic_flags {
+ #define XT_STATISTIC_MASK		0x1
+ 
+ struct xt_statistic_info {
+-	u_int16_t			mode;
+-	u_int16_t			flags;
++	__u16			mode;
++	__u16			flags;
+ 	union {
+ 		struct {
+-			u_int32_t	probability;
++			__u32	probability;
+ 		} random;
+ 		struct {
+-			u_int32_t	every;
+-			u_int32_t	packet;
++			__u32	every;
++			__u32	packet;
+ 			/* Used internally by the kernel */
+-			u_int32_t	count;
++			__u32	count;
+ 		} nth;
+ 	} u;
+ 	struct xt_statistic_info	*master __attribute__((aligned(8)));
+Index: linux-2.6-tip/include/linux/netfilter/xt_string.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_string.h
++++ linux-2.6-tip/include/linux/netfilter/xt_string.h
+@@ -1,6 +1,8 @@
+ #ifndef _XT_STRING_H
+ #define _XT_STRING_H
+ 
++#include <linux/types.h>
++
+ #define XT_STRING_MAX_PATTERN_SIZE 128
+ #define XT_STRING_MAX_ALGO_NAME_SIZE 16
+ 
+@@ -11,18 +13,18 @@ enum {
+ 
+ struct xt_string_info
+ {
+-	u_int16_t from_offset;
+-	u_int16_t to_offset;
++	__u16 from_offset;
++	__u16 to_offset;
+ 	char	  algo[XT_STRING_MAX_ALGO_NAME_SIZE];
+ 	char 	  pattern[XT_STRING_MAX_PATTERN_SIZE];
+-	u_int8_t  patlen;
++	__u8  patlen;
+ 	union {
+ 		struct {
+-			u_int8_t  invert;
++			__u8  invert;
+ 		} v0;
+ 
+ 		struct {
+-			u_int8_t  flags;
++			__u8  flags;
+ 		} v1;
+ 	} u;
+ 
+Index: linux-2.6-tip/include/linux/netfilter/xt_tcpmss.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_tcpmss.h
++++ linux-2.6-tip/include/linux/netfilter/xt_tcpmss.h
+@@ -1,9 +1,11 @@
+ #ifndef _XT_TCPMSS_MATCH_H
+ #define _XT_TCPMSS_MATCH_H
+ 
++#include <linux/types.h>
++
+ struct xt_tcpmss_match_info {
+-    u_int16_t mss_min, mss_max;
+-    u_int8_t invert;
++    __u16 mss_min, mss_max;
++    __u8 invert;
+ };
+ 
+ #endif /*_XT_TCPMSS_MATCH_H*/
+Index: linux-2.6-tip/include/linux/netfilter/xt_tcpudp.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter/xt_tcpudp.h
++++ linux-2.6-tip/include/linux/netfilter/xt_tcpudp.h
+@@ -1,15 +1,17 @@
+ #ifndef _XT_TCPUDP_H
+ #define _XT_TCPUDP_H
+ 
++#include <linux/types.h>
++
+ /* TCP matching stuff */
+ struct xt_tcp
+ {
+-	u_int16_t spts[2];			/* Source port range. */
+-	u_int16_t dpts[2];			/* Destination port range. */
+-	u_int8_t option;			/* TCP Option iff non-zero*/
+-	u_int8_t flg_mask;			/* TCP flags mask byte */
+-	u_int8_t flg_cmp;			/* TCP flags compare byte */
+-	u_int8_t invflags;			/* Inverse flags */
++	__u16 spts[2];			/* Source port range. */
++	__u16 dpts[2];			/* Destination port range. */
++	__u8 option;			/* TCP Option iff non-zero*/
++	__u8 flg_mask;			/* TCP flags mask byte */
++	__u8 flg_cmp;			/* TCP flags compare byte */
++	__u8 invflags;			/* Inverse flags */
+ };
+ 
+ /* Values for "inv" field in struct ipt_tcp. */
+@@ -22,9 +24,9 @@ struct xt_tcp
+ /* UDP matching stuff */
+ struct xt_udp
+ {
+-	u_int16_t spts[2];			/* Source port range. */
+-	u_int16_t dpts[2];			/* Destination port range. */
+-	u_int8_t invflags;			/* Inverse flags */
++	__u16 spts[2];			/* Source port range. */
++	__u16 dpts[2];			/* Destination port range. */
++	__u8 invflags;			/* Inverse flags */
+ };
+ 
+ /* Values for "invflags" field in struct ipt_udp. */
+Index: linux-2.6-tip/include/linux/netfilter_ipv4/ipt_owner.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter_ipv4/ipt_owner.h
++++ linux-2.6-tip/include/linux/netfilter_ipv4/ipt_owner.h
+@@ -9,10 +9,10 @@
+ #define IPT_OWNER_COMM	0x10
+ 
+ struct ipt_owner_info {
+-    uid_t uid;
+-    gid_t gid;
+-    pid_t pid;
+-    pid_t sid;
++    __kernel_uid32_t uid;
++    __kernel_gid32_t gid;
++    __kernel_pid_t pid;
++    __kernel_pid_t sid;
+     char comm[16];
+     u_int8_t match, invert;	/* flags */
+ };
+Index: linux-2.6-tip/include/linux/netfilter_ipv6/ip6t_owner.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netfilter_ipv6/ip6t_owner.h
++++ linux-2.6-tip/include/linux/netfilter_ipv6/ip6t_owner.h
+@@ -8,10 +8,10 @@
+ #define IP6T_OWNER_SID	0x08
+ 
+ struct ip6t_owner_info {
+-    uid_t uid;
+-    gid_t gid;
+-    pid_t pid;
+-    pid_t sid;
++    __kernel_uid32_t uid;
++    __kernel_gid32_t gid;
++    __kernel_pid_t pid;
++    __kernel_pid_t sid;
+     u_int8_t match, invert;	/* flags */
+ };
+ 
+Index: linux-2.6-tip/include/linux/nubus.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/nubus.h
++++ linux-2.6-tip/include/linux/nubus.h
+@@ -237,6 +237,7 @@ struct nubus_dirent
+ 	int mask;
+ };
+ 
++#ifdef __KERNEL__
+ struct nubus_board {
+ 	struct nubus_board* next;
+ 	struct nubus_dev* first_dev;
+@@ -351,6 +352,7 @@ void nubus_get_rsrc_mem(void* dest,
+ void nubus_get_rsrc_str(void* dest,
+ 			const struct nubus_dirent *dirent,
+ 			int maxlen);
++#endif /* __KERNEL__ */
+ 
+ /* We'd like to get rid of this eventually.  Only daynaport.c uses it now. */
+ static inline void *nubus_slot_addr(int slot)
+Index: linux-2.6-tip/include/linux/pci.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/pci.h
++++ linux-2.6-tip/include/linux/pci.h
+@@ -923,7 +923,10 @@ static inline struct pci_dev *pci_get_cl
+ 	return NULL;
+ }
+ 
+-#define pci_dev_present(ids)	(0)
++static inline int pci_dev_present(const struct pci_device_id *ids)
++{
++	return 0;
++}
+ #define no_pci_devices()	(1)
+ #define pci_dev_put(dev)	do { } while (0)
+ 
+Index: linux-2.6-tip/include/linux/percpu.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/percpu.h
++++ linux-2.6-tip/include/linux/percpu.h
+@@ -5,53 +5,76 @@
+ #include <linux/slab.h> /* For kmalloc() */
+ #include <linux/smp.h>
+ #include <linux/cpumask.h>
++#include <linux/pfn.h>
+ 
+ #include <asm/percpu.h>
+ 
++#ifndef PER_CPU_BASE_SECTION
++#ifdef CONFIG_SMP
++#define PER_CPU_BASE_SECTION ".data.percpu"
++#else
++#define PER_CPU_BASE_SECTION ".data"
++#endif
++#endif
++
+ #ifdef CONFIG_SMP
+-#define DEFINE_PER_CPU(type, name)					\
+-	__attribute__((__section__(".data.percpu")))			\
+-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+ 
+ #ifdef MODULE
+-#define SHARED_ALIGNED_SECTION ".data.percpu"
++#define PER_CPU_SHARED_ALIGNED_SECTION ""
+ #else
+-#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned"
++#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
+ #endif
++#define PER_CPU_FIRST_SECTION ".first"
+ 
+-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)			\
+-	__attribute__((__section__(SHARED_ALIGNED_SECTION)))		\
+-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name		\
+-	____cacheline_aligned_in_smp
++#else
++
++#define PER_CPU_SHARED_ALIGNED_SECTION ""
++#define PER_CPU_FIRST_SECTION ""
++
++#endif
+ 
+-#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)			\
+-	__attribute__((__section__(".data.percpu.page_aligned")))	\
++#define DEFINE_PER_CPU_SECTION(type, name, section)			\
++	__attribute__((__section__(PER_CPU_BASE_SECTION section)))	\
+ 	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
+-#else
++
++#define DEFINE_PER_CPU_SPINLOCK(name, section)				\
++	__attribute__((__section__(PER_CPU_BASE_SECTION section)))	\
++	PER_CPU_ATTRIBUTES __DEFINE_SPINLOCK(per_cpu__lock_##name##_locked);
++
+ #define DEFINE_PER_CPU(type, name)					\
+-	PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
++	DEFINE_PER_CPU_SECTION(type, name, "")
+ 
+-#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)		      \
+-	DEFINE_PER_CPU(type, name)
++#define DEFINE_PER_CPU_LOCKED(type, name)				\
++	DEFINE_PER_CPU_SPINLOCK(name, "")				\
++	DEFINE_PER_CPU_SECTION(type, name##_locked, "")
+ 
+-#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)		      \
+-	DEFINE_PER_CPU(type, name)
+-#endif
++#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name)			\
++	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
++	____cacheline_aligned_in_smp
++
++#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)				\
++	DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
++
++#define DEFINE_PER_CPU_FIRST(type, name)				\
++	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
+ 
+ #define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
++#define EXPORT_PER_CPU_LOCKED_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var##_locked)
+ #define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
++#define EXPORT_PER_CPU_LOCKED_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var##_locked)
+ 
+-/* Enough to cover all DEFINE_PER_CPUs in kernel, including modules. */
+-#ifndef PERCPU_ENOUGH_ROOM
++/* enough to cover all DEFINE_PER_CPUs in modules */
+ #ifdef CONFIG_MODULES
+-#define PERCPU_MODULE_RESERVE	8192
++#define PERCPU_MODULE_RESERVE		(8 << 10)
+ #else
+-#define PERCPU_MODULE_RESERVE	0
++#define PERCPU_MODULE_RESERVE		0
+ #endif
+ 
++#ifndef PERCPU_ENOUGH_ROOM
+ #define PERCPU_ENOUGH_ROOM						\
+-	(__per_cpu_end - __per_cpu_start + PERCPU_MODULE_RESERVE)
+-#endif	/* PERCPU_ENOUGH_ROOM */
++	(ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES) +	\
++	 PERCPU_MODULE_RESERVE)
++#endif
+ 
+ /*
+  * Must be an lvalue. Since @var must be a simple identifier,
+@@ -63,54 +86,141 @@
+ 	&__get_cpu_var(var); }))
+ #define put_cpu_var(var) preempt_enable()
+ 
++/*
++ * Per-CPU data structures with an additional lock - useful for
++ * PREEMPT_RT code that wants to reschedule but also wants
++ * per-CPU data structures.
++ *
++ * 'cpu' gets updated with the CPU the task is currently executing on.
++ *
++ * NOTE: on normal !PREEMPT_RT kernels these per-CPU variables
++ * are the same as the normal per-CPU variables, so there no
++ * runtime overhead.
++ */
++#ifdef CONFIG_PREEMPT_RT
++#define get_cpu_var_locked(var, cpuptr)			\
++(*({							\
++	spinlock_t *__lock;				\
++	int __cpu;					\
++							\
++again:							\
++	__cpu = raw_smp_processor_id();			\
++	__lock = &__get_cpu_lock(var, __cpu);		\
++	spin_lock(__lock);				\
++	if (!cpu_online(__cpu)) {			\
++		spin_unlock(__lock);			\
++		goto again;				\
++	}						\
++	*(cpuptr) = __cpu;				\
++	&__get_cpu_var_locked(var, __cpu);		\
++}))
++#else
++#define get_cpu_var_locked(var, cpuptr)			\
++(*({							\
++	int __cpu;					\
++							\
++	preempt_disable();				\
++	__cpu = smp_processor_id();			\
++	spin_lock(&__get_cpu_lock(var, __cpu));		\
++	preempt_enable();				\
++	*(cpuptr) = __cpu;				\
++	&__get_cpu_var_locked(var, __cpu);		\
++}))
++#endif
++
++#define put_cpu_var_locked(var, cpu) \
++	 do { (void)cpu; spin_unlock(&__get_cpu_lock(var, cpu)); } while (0)
++
+ #ifdef CONFIG_SMP
+ 
++#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
++
++/* minimum unit size, also is the maximum supported allocation size */
++#define PCPU_MIN_UNIT_SIZE		PFN_ALIGN(64 << 10)
++
++/*
++ * PERCPU_DYNAMIC_RESERVE indicates the amount of free area to piggy
++ * back on the first chunk for dynamic percpu allocation if arch is
++ * manually allocating and mapping it for faster access (as a part of
++ * large page mapping for example).
++ *
++ * The following values give between one and two pages of free space
++ * after typical minimal boot (2-way SMP, single disk and NIC) with
++ * both defconfig and a distro config on x86_64 and 32.  More
++ * intelligent way to determine this would be nice.
++ */
++#if BITS_PER_LONG > 32
++#define PERCPU_DYNAMIC_RESERVE		(20 << 10)
++#else
++#define PERCPU_DYNAMIC_RESERVE		(12 << 10)
++#endif
++
++extern void *pcpu_base_addr;
++
++typedef struct page * (*pcpu_get_page_fn_t)(unsigned int cpu, int pageno);
++typedef void (*pcpu_populate_pte_fn_t)(unsigned long addr);
++
++extern size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
++				size_t static_size, size_t reserved_size,
++				ssize_t dyn_size, ssize_t unit_size,
++				void *base_addr,
++				pcpu_populate_pte_fn_t populate_pte_fn);
++
++extern ssize_t __init pcpu_embed_first_chunk(
++				size_t static_size, size_t reserved_size,
++				ssize_t dyn_size, ssize_t unit_size);
++
++/*
++ * Use this to get to a cpu's version of the per-cpu object
++ * dynamically allocated. Non-atomic access to the current CPU's
++ * version should probably be combined with get_cpu()/put_cpu().
++ */
++#define per_cpu_ptr(ptr, cpu)	SHIFT_PERCPU_PTR((ptr), per_cpu_offset((cpu)))
++
++extern void *__alloc_reserved_percpu(size_t size, size_t align);
++
++#else /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
++
+ struct percpu_data {
+ 	void *ptrs[1];
+ };
+ 
+ #define __percpu_disguise(pdata) (struct percpu_data *)~(unsigned long)(pdata)
+-/* 
+- * Use this to get to a cpu's version of the per-cpu object dynamically
+- * allocated. Non-atomic access to the current CPU's version should
+- * probably be combined with get_cpu()/put_cpu().
+- */ 
+-#define percpu_ptr(ptr, cpu)                              \
+-({                                                        \
+-        struct percpu_data *__p = __percpu_disguise(ptr); \
+-        (__typeof__(ptr))__p->ptrs[(cpu)];	          \
++
++#define per_cpu_ptr(ptr, cpu)						\
++({									\
++        struct percpu_data *__p = __percpu_disguise(ptr);		\
++        (__typeof__(ptr))__p->ptrs[(cpu)];				\
+ })
+ 
+-extern void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask);
+-extern void percpu_free(void *__pdata);
++#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
++
++extern void *__alloc_percpu(size_t size, size_t align);
++extern void free_percpu(void *__pdata);
+ 
+ #else /* CONFIG_SMP */
+ 
+-#define percpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
++#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+ 
+-static __always_inline void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
++static inline void *__alloc_percpu(size_t size, size_t align)
+ {
+-	return kzalloc(size, gfp);
++	/*
++	 * Can't easily make larger alignment work with kmalloc.  WARN
++	 * on it.  Larger alignment should only be used for module
++	 * percpu sections on SMP for which this path isn't used.
++	 */
++	WARN_ON_ONCE(align > SMP_CACHE_BYTES);
++	return kzalloc(size, GFP_KERNEL);
+ }
+ 
+-static inline void percpu_free(void *__pdata)
++static inline void free_percpu(void *p)
+ {
+-	kfree(__pdata);
++	kfree(p);
+ }
+ 
+ #endif /* CONFIG_SMP */
+ 
+-#define percpu_alloc_mask(size, gfp, mask) \
+-	__percpu_alloc_mask((size), (gfp), &(mask))
+-
+-#define percpu_alloc(size, gfp) percpu_alloc_mask((size), (gfp), cpu_online_map)
+-
+-/* (legacy) interface for use without CPU hotplug handling */
+-
+-#define __alloc_percpu(size)	percpu_alloc_mask((size), GFP_KERNEL, \
+-						  cpu_possible_map)
+-#define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type))
+-#define free_percpu(ptr)	percpu_free((ptr))
+-#define per_cpu_ptr(ptr, cpu)	percpu_ptr((ptr), (cpu))
++#define alloc_percpu(type)	(type *)__alloc_percpu(sizeof(type), \
++						       __alignof__(type))
+ 
+ #endif /* __LINUX_PERCPU_H */
+Index: linux-2.6-tip/include/linux/perf_counter.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/perf_counter.h
+@@ -0,0 +1,477 @@
++/*
++ *  Performance counters:
++ *
++ *   Copyright(C) 2008, Thomas Gleixner <tglx@linutronix.de>
++ *   Copyright(C) 2008, Red Hat, Inc., Ingo Molnar
++ *
++ *  Data type definitions, declarations, prototypes.
++ *
++ *  Started by: Thomas Gleixner and Ingo Molnar
++ *
++ *  For licencing details see kernel-base/COPYING
++ */
++#ifndef _LINUX_PERF_COUNTER_H
++#define _LINUX_PERF_COUNTER_H
++
++#include <linux/types.h>
++#include <linux/ioctl.h>
++#include <asm/byteorder.h>
++
++/*
++ * User-space ABI bits:
++ */
++
++/*
++ * hw_event.type
++ */
++enum perf_event_types {
++	PERF_TYPE_HARDWARE		= 0,
++	PERF_TYPE_SOFTWARE		= 1,
++	PERF_TYPE_TRACEPOINT		= 2,
++
++	/*
++	 * available TYPE space, raw is the max value.
++	 */
++
++	PERF_TYPE_RAW			= 128,
++};
++
++/*
++ * Generalized performance counter event types, used by the hw_event.event_id
++ * parameter of the sys_perf_counter_open() syscall:
++ */
++enum hw_event_ids {
++	/*
++	 * Common hardware events, generalized by the kernel:
++	 */
++	PERF_COUNT_CPU_CYCLES		= 0,
++	PERF_COUNT_INSTRUCTIONS		= 1,
++	PERF_COUNT_CACHE_REFERENCES	= 2,
++	PERF_COUNT_CACHE_MISSES		= 3,
++	PERF_COUNT_BRANCH_INSTRUCTIONS	= 4,
++	PERF_COUNT_BRANCH_MISSES	= 5,
++	PERF_COUNT_BUS_CYCLES		= 6,
++
++	PERF_HW_EVENTS_MAX		= 7,
++};
++
++/*
++ * Special "software" counters provided by the kernel, even if the hardware
++ * does not support performance counters. These counters measure various
++ * physical and sw events of the kernel (and allow the profiling of them as
++ * well):
++ */
++enum sw_event_ids {
++	PERF_COUNT_CPU_CLOCK		= 0,
++	PERF_COUNT_TASK_CLOCK		= 1,
++	PERF_COUNT_PAGE_FAULTS		= 2,
++	PERF_COUNT_CONTEXT_SWITCHES	= 3,
++	PERF_COUNT_CPU_MIGRATIONS	= 4,
++	PERF_COUNT_PAGE_FAULTS_MIN	= 5,
++	PERF_COUNT_PAGE_FAULTS_MAJ	= 6,
++
++	PERF_SW_EVENTS_MAX		= 7,
++};
++
++/*
++ * IRQ-notification data record type:
++ */
++enum perf_counter_record_type {
++	PERF_RECORD_SIMPLE		= 0,
++	PERF_RECORD_IRQ			= 1,
++	PERF_RECORD_GROUP		= 2,
++};
++
++#define __PERF_COUNTER_MASK(name) 			\
++	(((1ULL << PERF_COUNTER_##name##_BITS) - 1) <<	\
++	 PERF_COUNTER_##name##_SHIFT)
++
++#define PERF_COUNTER_RAW_BITS		1
++#define PERF_COUNTER_RAW_SHIFT		63
++#define PERF_COUNTER_RAW_MASK		__PERF_COUNTER_MASK(RAW)
++
++#define PERF_COUNTER_CONFIG_BITS	63
++#define PERF_COUNTER_CONFIG_SHIFT	0
++#define PERF_COUNTER_CONFIG_MASK	__PERF_COUNTER_MASK(CONFIG)
++
++#define PERF_COUNTER_TYPE_BITS		7
++#define PERF_COUNTER_TYPE_SHIFT		56
++#define PERF_COUNTER_TYPE_MASK		__PERF_COUNTER_MASK(TYPE)
++
++#define PERF_COUNTER_EVENT_BITS		56
++#define PERF_COUNTER_EVENT_SHIFT	0
++#define PERF_COUNTER_EVENT_MASK		__PERF_COUNTER_MASK(EVENT)
++
++/*
++ * Bits that can be set in hw_event.read_format to request that
++ * reads on the counter should return the indicated quantities,
++ * in increasing order of bit value, after the counter value.
++ */
++enum perf_counter_read_format {
++	PERF_FORMAT_TOTAL_TIME_ENABLED	=  1,
++	PERF_FORMAT_TOTAL_TIME_RUNNING	=  2,
++};
++
++/*
++ * Hardware event to monitor via a performance monitoring counter:
++ */
++struct perf_counter_hw_event {
++	/*
++	 * The MSB of the config word signifies if the rest contains cpu
++	 * specific (raw) counter configuration data, if unset, the next
++	 * 7 bits are an event type and the rest of the bits are the event
++	 * identifier.
++	 */
++	__u64			config;
++
++	__u64			irq_period;
++	__u64			record_type;
++	__u64			read_format;
++
++	__u64			disabled       :  1, /* off by default        */
++				nmi	       :  1, /* NMI sampling          */
++				inherit	       :  1, /* children inherit it   */
++				pinned	       :  1, /* must always be on PMU */
++				exclusive      :  1, /* only group on PMU     */
++				exclude_user   :  1, /* don't count user      */
++				exclude_kernel :  1, /* ditto kernel          */
++				exclude_hv     :  1, /* ditto hypervisor      */
++				exclude_idle   :  1, /* don't count when idle */
++				include_tid    :  1, /* include the tid */
++
++				__reserved_1   : 54;
++
++	__u32			extra_config_len;
++	__u32			__reserved_4;
++
++	__u64			__reserved_2;
++	__u64			__reserved_3;
++};
++
++/*
++ * Ioctls that can be done on a perf counter fd:
++ */
++#define PERF_COUNTER_IOC_ENABLE		_IO('$', 0)
++#define PERF_COUNTER_IOC_DISABLE	_IO('$', 1)
++
++/*
++ * Structure of the page that can be mapped via mmap
++ */
++struct perf_counter_mmap_page {
++	__u32	version;		/* version number of this structure */
++	__u32	compat_version;		/* lowest version this is compat with */
++	__u32	lock;			/* seqlock for synchronization */
++	__u32	index;			/* hardware counter identifier */
++	__s64	offset;			/* add to hardware counter value */
++
++	__u32   data_head;		/* head in the data section */
++};
++
++struct perf_event_header {
++	__u32	type;
++	__u32	size;
++};
++
++enum perf_event_type {
++	PERF_EVENT_IP		= 0,
++	PERF_EVENT_GROUP	= 1,
++
++	__PERF_EVENT_TID	= 0x100,
++};
++
++#ifdef __KERNEL__
++/*
++ * Kernel-internal data types and definitions:
++ */
++
++#ifdef CONFIG_PERF_COUNTERS
++# include <asm/perf_counter.h>
++#endif
++
++#include <linux/list.h>
++#include <linux/mutex.h>
++#include <linux/rculist.h>
++#include <linux/rcupdate.h>
++#include <linux/spinlock.h>
++#include <linux/hrtimer.h>
++#include <asm/atomic.h>
++
++struct task_struct;
++
++static inline u64 perf_event_raw(struct perf_counter_hw_event *hw_event)
++{
++	return hw_event->config & PERF_COUNTER_RAW_MASK;
++}
++
++static inline u64 perf_event_config(struct perf_counter_hw_event *hw_event)
++{
++	return hw_event->config & PERF_COUNTER_CONFIG_MASK;
++}
++
++static inline u64 perf_event_type(struct perf_counter_hw_event *hw_event)
++{
++	return (hw_event->config & PERF_COUNTER_TYPE_MASK) >>
++		PERF_COUNTER_TYPE_SHIFT;
++}
++
++static inline u64 perf_event_id(struct perf_counter_hw_event *hw_event)
++{
++	return hw_event->config & PERF_COUNTER_EVENT_MASK;
++}
++
++/**
++ * struct hw_perf_counter - performance counter hardware details:
++ */
++struct hw_perf_counter {
++#ifdef CONFIG_PERF_COUNTERS
++	union {
++		struct { /* hardware */
++			u64				config;
++			unsigned long			config_base;
++			unsigned long			counter_base;
++			int				nmi;
++			unsigned int			idx;
++		};
++		union { /* software */
++			atomic64_t			count;
++			struct hrtimer			hrtimer;
++		};
++	};
++	atomic64_t			prev_count;
++	u64				irq_period;
++	atomic64_t			period_left;
++#endif
++};
++
++struct perf_counter;
++
++/**
++ * struct hw_perf_counter_ops - performance counter hw ops
++ */
++struct hw_perf_counter_ops {
++	int (*enable)			(struct perf_counter *counter);
++	void (*disable)			(struct perf_counter *counter);
++	void (*read)			(struct perf_counter *counter);
++};
++
++/**
++ * enum perf_counter_active_state - the states of a counter
++ */
++enum perf_counter_active_state {
++	PERF_COUNTER_STATE_ERROR	= -2,
++	PERF_COUNTER_STATE_OFF		= -1,
++	PERF_COUNTER_STATE_INACTIVE	=  0,
++	PERF_COUNTER_STATE_ACTIVE	=  1,
++};
++
++struct file;
++
++struct perf_mmap_data {
++	struct rcu_head			rcu_head;
++	int				nr_pages;
++	atomic_t			wakeup;
++	atomic_t			head;
++	struct perf_counter_mmap_page   *user_page;
++	void 				*data_pages[0];
++};
++
++/**
++ * struct perf_counter - performance counter kernel representation:
++ */
++struct perf_counter {
++#ifdef CONFIG_PERF_COUNTERS
++	struct list_head		list_entry;
++	struct list_head		event_entry;
++	struct list_head		sibling_list;
++	int 				nr_siblings;
++	struct perf_counter		*group_leader;
++	const struct hw_perf_counter_ops *hw_ops;
++
++	enum perf_counter_active_state	state;
++	enum perf_counter_active_state	prev_state;
++	atomic64_t			count;
++
++	/*
++	 * These are the total time in nanoseconds that the counter
++	 * has been enabled (i.e. eligible to run, and the task has
++	 * been scheduled in, if this is a per-task counter)
++	 * and running (scheduled onto the CPU), respectively.
++	 *
++	 * They are computed from tstamp_enabled, tstamp_running and
++	 * tstamp_stopped when the counter is in INACTIVE or ACTIVE state.
++	 */
++	u64				total_time_enabled;
++	u64				total_time_running;
++
++	/*
++	 * These are timestamps used for computing total_time_enabled
++	 * and total_time_running when the counter is in INACTIVE or
++	 * ACTIVE state, measured in nanoseconds from an arbitrary point
++	 * in time.
++	 * tstamp_enabled: the notional time when the counter was enabled
++	 * tstamp_running: the notional time when the counter was scheduled on
++	 * tstamp_stopped: in INACTIVE state, the notional time when the
++	 *	counter was scheduled off.
++	 */
++	u64				tstamp_enabled;
++	u64				tstamp_running;
++	u64				tstamp_stopped;
++
++	struct perf_counter_hw_event	hw_event;
++	struct hw_perf_counter		hw;
++
++	struct perf_counter_context	*ctx;
++	struct task_struct		*task;
++	struct file			*filp;
++
++	struct perf_counter		*parent;
++	struct list_head		child_list;
++
++	/*
++	 * These accumulate total time (in nanoseconds) that children
++	 * counters have been enabled and running, respectively.
++	 */
++	atomic64_t			child_total_time_enabled;
++	atomic64_t			child_total_time_running;
++
++	/*
++	 * Protect attach/detach and child_list:
++	 */
++	struct mutex			mutex;
++
++	int				oncpu;
++	int				cpu;
++
++	/* mmap bits */
++	struct mutex			mmap_mutex;
++	atomic_t			mmap_count;
++	struct perf_mmap_data		*data;
++
++	/* poll related */
++	wait_queue_head_t		waitq;
++	/* optional: for NMIs */
++	int				wakeup_pending;
++
++	void (*destroy)(struct perf_counter *);
++	struct rcu_head			rcu_head;
++#endif
++};
++
++/**
++ * struct perf_counter_context - counter context structure
++ *
++ * Used as a container for task counters and CPU counters as well:
++ */
++struct perf_counter_context {
++#ifdef CONFIG_PERF_COUNTERS
++	/*
++	 * Protect the states of the counters in the list,
++	 * nr_active, and the list:
++	 */
++	raw_spinlock_t		lock;
++	/*
++	 * Protect the list of counters.  Locking either mutex or lock
++	 * is sufficient to ensure the list doesn't change; to change
++	 * the list you need to lock both the mutex and the spinlock.
++	 */
++	struct mutex		mutex;
++
++	struct list_head	counter_list;
++	struct list_head	event_list;
++	int			nr_counters;
++	int			nr_active;
++	int			is_active;
++	struct task_struct	*task;
++
++	/*
++	 * time_now is the current time in nanoseconds since an arbitrary
++	 * point in the past.  For per-task counters, this is based on the
++	 * task clock, and for per-cpu counters it is based on the cpu clock.
++	 * time_lost is an offset from the task/cpu clock, used to make it
++	 * appear that time only passes while the context is scheduled in.
++	 */
++	u64			time_now;
++	u64			time_lost;
++#endif
++};
++
++/**
++ * struct perf_counter_cpu_context - per cpu counter context structure
++ */
++struct perf_cpu_context {
++	struct perf_counter_context	ctx;
++	struct perf_counter_context	*task_ctx;
++	int				active_oncpu;
++	int				max_pertask;
++	int				exclusive;
++
++	/*
++	 * Recursion avoidance:
++	 *
++	 * task, softirq, irq, nmi context
++	 */
++	int			recursion[4];
++};
++
++/*
++ * Set by architecture code:
++ */
++extern int perf_max_counters;
++
++#ifdef CONFIG_PERF_COUNTERS
++extern const struct hw_perf_counter_ops *
++hw_perf_counter_init(struct perf_counter *counter);
++
++extern void perf_counter_task_sched_in(struct task_struct *task, int cpu);
++extern void perf_counter_task_sched_out(struct task_struct *task, int cpu);
++extern void perf_counter_task_tick(struct task_struct *task, int cpu);
++extern void perf_counter_init_task(struct task_struct *child);
++extern void perf_counter_exit_task(struct task_struct *child);
++extern void perf_counter_notify(struct pt_regs *regs);
++extern void perf_counter_print_debug(void);
++extern void perf_counter_unthrottle(void);
++extern u64 hw_perf_save_disable(void);
++extern void hw_perf_restore(u64 ctrl);
++extern int perf_counter_task_disable(void);
++extern int perf_counter_task_enable(void);
++extern int hw_perf_group_sched_in(struct perf_counter *group_leader,
++	       struct perf_cpu_context *cpuctx,
++	       struct perf_counter_context *ctx, int cpu);
++extern void perf_counter_update_userpage(struct perf_counter *counter);
++
++extern void perf_counter_output(struct perf_counter *counter,
++				int nmi, struct pt_regs *regs);
++/*
++ * Return 1 for a software counter, 0 for a hardware counter
++ */
++static inline int is_software_counter(struct perf_counter *counter)
++{
++	return !perf_event_raw(&counter->hw_event) &&
++		perf_event_type(&counter->hw_event) != PERF_TYPE_HARDWARE;
++}
++
++extern void perf_swcounter_event(u32, u64, int, struct pt_regs *);
++
++#else
++static inline void
++perf_counter_task_sched_in(struct task_struct *task, int cpu)		{ }
++static inline void
++perf_counter_task_sched_out(struct task_struct *task, int cpu)		{ }
++static inline void
++perf_counter_task_tick(struct task_struct *task, int cpu)		{ }
++static inline void perf_counter_init_task(struct task_struct *child)	{ }
++static inline void perf_counter_exit_task(struct task_struct *child)	{ }
++static inline void perf_counter_notify(struct pt_regs *regs)		{ }
++static inline void perf_counter_print_debug(void)			{ }
++static inline void perf_counter_unthrottle(void)			{ }
++static inline void hw_perf_restore(u64 ctrl)				{ }
++static inline u64 hw_perf_save_disable(void)		      { return 0; }
++static inline int perf_counter_task_disable(void)	{ return -EINVAL; }
++static inline int perf_counter_task_enable(void)	{ return -EINVAL; }
++
++static inline void perf_swcounter_event(u32 event, u64 nr,
++					int nmi, struct pt_regs *regs)	{ }
++#endif
++
++#endif /* __KERNEL__ */
++#endif /* _LINUX_PERF_COUNTER_H */
+Index: linux-2.6-tip/include/linux/pfkeyv2.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/pfkeyv2.h
++++ linux-2.6-tip/include/linux/pfkeyv2.h
+@@ -12,187 +12,187 @@
+ #define PFKEYV2_REVISION	199806L
+ 
+ struct sadb_msg {
+-	uint8_t		sadb_msg_version;
+-	uint8_t		sadb_msg_type;
+-	uint8_t		sadb_msg_errno;
+-	uint8_t		sadb_msg_satype;
+-	uint16_t	sadb_msg_len;
+-	uint16_t	sadb_msg_reserved;
+-	uint32_t	sadb_msg_seq;
+-	uint32_t	sadb_msg_pid;
++	__u8		sadb_msg_version;
++	__u8		sadb_msg_type;
++	__u8		sadb_msg_errno;
++	__u8		sadb_msg_satype;
++	__u16	sadb_msg_len;
++	__u16	sadb_msg_reserved;
++	__u32	sadb_msg_seq;
++	__u32	sadb_msg_pid;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_msg) == 16 */
+ 
+ struct sadb_ext {
+-	uint16_t	sadb_ext_len;
+-	uint16_t	sadb_ext_type;
++	__u16	sadb_ext_len;
++	__u16	sadb_ext_type;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_ext) == 4 */
+ 
+ struct sadb_sa {
+-	uint16_t	sadb_sa_len;
+-	uint16_t	sadb_sa_exttype;
++	__u16	sadb_sa_len;
++	__u16	sadb_sa_exttype;
+ 	__be32		sadb_sa_spi;
+-	uint8_t		sadb_sa_replay;
+-	uint8_t		sadb_sa_state;
+-	uint8_t		sadb_sa_auth;
+-	uint8_t		sadb_sa_encrypt;
+-	uint32_t	sadb_sa_flags;
++	__u8		sadb_sa_replay;
++	__u8		sadb_sa_state;
++	__u8		sadb_sa_auth;
++	__u8		sadb_sa_encrypt;
++	__u32	sadb_sa_flags;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_sa) == 16 */
+ 
+ struct sadb_lifetime {
+-	uint16_t	sadb_lifetime_len;
+-	uint16_t	sadb_lifetime_exttype;
+-	uint32_t	sadb_lifetime_allocations;
+-	uint64_t	sadb_lifetime_bytes;
+-	uint64_t	sadb_lifetime_addtime;
+-	uint64_t	sadb_lifetime_usetime;
++	__u16	sadb_lifetime_len;
++	__u16	sadb_lifetime_exttype;
++	__u32	sadb_lifetime_allocations;
++	__u64	sadb_lifetime_bytes;
++	__u64	sadb_lifetime_addtime;
++	__u64	sadb_lifetime_usetime;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_lifetime) == 32 */
+ 
+ struct sadb_address {
+-	uint16_t	sadb_address_len;
+-	uint16_t	sadb_address_exttype;
+-	uint8_t		sadb_address_proto;
+-	uint8_t		sadb_address_prefixlen;
+-	uint16_t	sadb_address_reserved;
++	__u16	sadb_address_len;
++	__u16	sadb_address_exttype;
++	__u8		sadb_address_proto;
++	__u8		sadb_address_prefixlen;
++	__u16	sadb_address_reserved;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_address) == 8 */
+ 
+ struct sadb_key {
+-	uint16_t	sadb_key_len;
+-	uint16_t	sadb_key_exttype;
+-	uint16_t	sadb_key_bits;
+-	uint16_t	sadb_key_reserved;
++	__u16	sadb_key_len;
++	__u16	sadb_key_exttype;
++	__u16	sadb_key_bits;
++	__u16	sadb_key_reserved;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_key) == 8 */
+ 
+ struct sadb_ident {
+-	uint16_t	sadb_ident_len;
+-	uint16_t	sadb_ident_exttype;
+-	uint16_t	sadb_ident_type;
+-	uint16_t	sadb_ident_reserved;
+-	uint64_t	sadb_ident_id;
++	__u16	sadb_ident_len;
++	__u16	sadb_ident_exttype;
++	__u16	sadb_ident_type;
++	__u16	sadb_ident_reserved;
++	__u64	sadb_ident_id;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_ident) == 16 */
+ 
+ struct sadb_sens {
+-	uint16_t	sadb_sens_len;
+-	uint16_t	sadb_sens_exttype;
+-	uint32_t	sadb_sens_dpd;
+-	uint8_t		sadb_sens_sens_level;
+-	uint8_t		sadb_sens_sens_len;
+-	uint8_t		sadb_sens_integ_level;
+-	uint8_t		sadb_sens_integ_len;
+-	uint32_t	sadb_sens_reserved;
++	__u16	sadb_sens_len;
++	__u16	sadb_sens_exttype;
++	__u32	sadb_sens_dpd;
++	__u8		sadb_sens_sens_level;
++	__u8		sadb_sens_sens_len;
++	__u8		sadb_sens_integ_level;
++	__u8		sadb_sens_integ_len;
++	__u32	sadb_sens_reserved;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_sens) == 16 */
+ 
+ /* followed by:
+-	uint64_t	sadb_sens_bitmap[sens_len];
+-	uint64_t	sadb_integ_bitmap[integ_len];  */
++	__u64	sadb_sens_bitmap[sens_len];
++	__u64	sadb_integ_bitmap[integ_len];  */
+ 
+ struct sadb_prop {
+-	uint16_t	sadb_prop_len;
+-	uint16_t	sadb_prop_exttype;
+-	uint8_t		sadb_prop_replay;
+-	uint8_t		sadb_prop_reserved[3];
++	__u16	sadb_prop_len;
++	__u16	sadb_prop_exttype;
++	__u8		sadb_prop_replay;
++	__u8		sadb_prop_reserved[3];
+ } __attribute__((packed));
+ /* sizeof(struct sadb_prop) == 8 */
+ 
+ /* followed by:
+ 	struct sadb_comb sadb_combs[(sadb_prop_len +
+-		sizeof(uint64_t) - sizeof(struct sadb_prop)) /
++		sizeof(__u64) - sizeof(struct sadb_prop)) /
+ 		sizeof(struct sadb_comb)]; */
+ 
+ struct sadb_comb {
+-	uint8_t		sadb_comb_auth;
+-	uint8_t		sadb_comb_encrypt;
+-	uint16_t	sadb_comb_flags;
+-	uint16_t	sadb_comb_auth_minbits;
+-	uint16_t	sadb_comb_auth_maxbits;
+-	uint16_t	sadb_comb_encrypt_minbits;
+-	uint16_t	sadb_comb_encrypt_maxbits;
+-	uint32_t	sadb_comb_reserved;
+-	uint32_t	sadb_comb_soft_allocations;
+-	uint32_t	sadb_comb_hard_allocations;
+-	uint64_t	sadb_comb_soft_bytes;
+-	uint64_t	sadb_comb_hard_bytes;
+-	uint64_t	sadb_comb_soft_addtime;
+-	uint64_t	sadb_comb_hard_addtime;
+-	uint64_t	sadb_comb_soft_usetime;
+-	uint64_t	sadb_comb_hard_usetime;
++	__u8		sadb_comb_auth;
++	__u8		sadb_comb_encrypt;
++	__u16	sadb_comb_flags;
++	__u16	sadb_comb_auth_minbits;
++	__u16	sadb_comb_auth_maxbits;
++	__u16	sadb_comb_encrypt_minbits;
++	__u16	sadb_comb_encrypt_maxbits;
++	__u32	sadb_comb_reserved;
++	__u32	sadb_comb_soft_allocations;
++	__u32	sadb_comb_hard_allocations;
++	__u64	sadb_comb_soft_bytes;
++	__u64	sadb_comb_hard_bytes;
++	__u64	sadb_comb_soft_addtime;
++	__u64	sadb_comb_hard_addtime;
++	__u64	sadb_comb_soft_usetime;
++	__u64	sadb_comb_hard_usetime;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_comb) == 72 */
+ 
+ struct sadb_supported {
+-	uint16_t	sadb_supported_len;
+-	uint16_t	sadb_supported_exttype;
+-	uint32_t	sadb_supported_reserved;
++	__u16	sadb_supported_len;
++	__u16	sadb_supported_exttype;
++	__u32	sadb_supported_reserved;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_supported) == 8 */
+ 
+ /* followed by:
+ 	struct sadb_alg sadb_algs[(sadb_supported_len +
+-		sizeof(uint64_t) - sizeof(struct sadb_supported)) /
++		sizeof(__u64) - sizeof(struct sadb_supported)) /
+ 		sizeof(struct sadb_alg)]; */
+ 
+ struct sadb_alg {
+-	uint8_t		sadb_alg_id;
+-	uint8_t		sadb_alg_ivlen;
+-	uint16_t	sadb_alg_minbits;
+-	uint16_t	sadb_alg_maxbits;
+-	uint16_t	sadb_alg_reserved;
++	__u8		sadb_alg_id;
++	__u8		sadb_alg_ivlen;
++	__u16	sadb_alg_minbits;
++	__u16	sadb_alg_maxbits;
++	__u16	sadb_alg_reserved;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_alg) == 8 */
+ 
+ struct sadb_spirange {
+-	uint16_t	sadb_spirange_len;
+-	uint16_t	sadb_spirange_exttype;
+-	uint32_t	sadb_spirange_min;
+-	uint32_t	sadb_spirange_max;
+-	uint32_t	sadb_spirange_reserved;
++	__u16	sadb_spirange_len;
++	__u16	sadb_spirange_exttype;
++	__u32	sadb_spirange_min;
++	__u32	sadb_spirange_max;
++	__u32	sadb_spirange_reserved;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_spirange) == 16 */
+ 
+ struct sadb_x_kmprivate {
+-	uint16_t	sadb_x_kmprivate_len;
+-	uint16_t	sadb_x_kmprivate_exttype;
+-	uint32_t	sadb_x_kmprivate_reserved;
++	__u16	sadb_x_kmprivate_len;
++	__u16	sadb_x_kmprivate_exttype;
++	__u32	sadb_x_kmprivate_reserved;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_x_kmprivate) == 8 */
+ 
+ struct sadb_x_sa2 {
+-	uint16_t	sadb_x_sa2_len;
+-	uint16_t	sadb_x_sa2_exttype;
+-	uint8_t		sadb_x_sa2_mode;
+-	uint8_t		sadb_x_sa2_reserved1;
+-	uint16_t	sadb_x_sa2_reserved2;
+-	uint32_t	sadb_x_sa2_sequence;
+-	uint32_t	sadb_x_sa2_reqid;
++	__u16	sadb_x_sa2_len;
++	__u16	sadb_x_sa2_exttype;
++	__u8		sadb_x_sa2_mode;
++	__u8		sadb_x_sa2_reserved1;
++	__u16	sadb_x_sa2_reserved2;
++	__u32	sadb_x_sa2_sequence;
++	__u32	sadb_x_sa2_reqid;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_x_sa2) == 16 */
+ 
+ struct sadb_x_policy {
+-	uint16_t	sadb_x_policy_len;
+-	uint16_t	sadb_x_policy_exttype;
+-	uint16_t	sadb_x_policy_type;
+-	uint8_t		sadb_x_policy_dir;
+-	uint8_t		sadb_x_policy_reserved;
+-	uint32_t	sadb_x_policy_id;
+-	uint32_t	sadb_x_policy_priority;
++	__u16	sadb_x_policy_len;
++	__u16	sadb_x_policy_exttype;
++	__u16	sadb_x_policy_type;
++	__u8		sadb_x_policy_dir;
++	__u8		sadb_x_policy_reserved;
++	__u32	sadb_x_policy_id;
++	__u32	sadb_x_policy_priority;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_x_policy) == 16 */
+ 
+ struct sadb_x_ipsecrequest {
+-	uint16_t	sadb_x_ipsecrequest_len;
+-	uint16_t	sadb_x_ipsecrequest_proto;
+-	uint8_t		sadb_x_ipsecrequest_mode;
+-	uint8_t		sadb_x_ipsecrequest_level;
+-	uint16_t	sadb_x_ipsecrequest_reserved1;
+-	uint32_t	sadb_x_ipsecrequest_reqid;
+-	uint32_t	sadb_x_ipsecrequest_reserved2;
++	__u16	sadb_x_ipsecrequest_len;
++	__u16	sadb_x_ipsecrequest_proto;
++	__u8		sadb_x_ipsecrequest_mode;
++	__u8		sadb_x_ipsecrequest_level;
++	__u16	sadb_x_ipsecrequest_reserved1;
++	__u32	sadb_x_ipsecrequest_reqid;
++	__u32	sadb_x_ipsecrequest_reserved2;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_x_ipsecrequest) == 16 */
+ 
+@@ -200,38 +200,38 @@ struct sadb_x_ipsecrequest {
+  * type of NAT-T is supported, draft-ietf-ipsec-udp-encaps-06
+  */
+ struct sadb_x_nat_t_type {
+-	uint16_t	sadb_x_nat_t_type_len;
+-	uint16_t	sadb_x_nat_t_type_exttype;
+-	uint8_t		sadb_x_nat_t_type_type;
+-	uint8_t		sadb_x_nat_t_type_reserved[3];
++	__u16	sadb_x_nat_t_type_len;
++	__u16	sadb_x_nat_t_type_exttype;
++	__u8		sadb_x_nat_t_type_type;
++	__u8		sadb_x_nat_t_type_reserved[3];
+ } __attribute__((packed));
+ /* sizeof(struct sadb_x_nat_t_type) == 8 */
+ 
+ /* Pass a NAT Traversal port (Source or Dest port) */
+ struct sadb_x_nat_t_port {
+-	uint16_t	sadb_x_nat_t_port_len;
+-	uint16_t	sadb_x_nat_t_port_exttype;
++	__u16	sadb_x_nat_t_port_len;
++	__u16	sadb_x_nat_t_port_exttype;
+ 	__be16		sadb_x_nat_t_port_port;
+-	uint16_t	sadb_x_nat_t_port_reserved;
++	__u16	sadb_x_nat_t_port_reserved;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_x_nat_t_port) == 8 */
+ 
+ /* Generic LSM security context */
+ struct sadb_x_sec_ctx {
+-	uint16_t	sadb_x_sec_len;
+-	uint16_t	sadb_x_sec_exttype;
+-	uint8_t		sadb_x_ctx_alg;  /* LSMs: e.g., selinux == 1 */
+-	uint8_t		sadb_x_ctx_doi;
+-	uint16_t	sadb_x_ctx_len;
++	__u16	sadb_x_sec_len;
++	__u16	sadb_x_sec_exttype;
++	__u8		sadb_x_ctx_alg;  /* LSMs: e.g., selinux == 1 */
++	__u8		sadb_x_ctx_doi;
++	__u16	sadb_x_ctx_len;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_sec_ctx) = 8 */
+ 
+ /* Used by MIGRATE to pass addresses IKE will use to perform
+  * negotiation with the peer */
+ struct sadb_x_kmaddress {
+-	uint16_t	sadb_x_kmaddress_len;
+-	uint16_t	sadb_x_kmaddress_exttype;
+-	uint32_t	sadb_x_kmaddress_reserved;
++	__u16	sadb_x_kmaddress_len;
++	__u16	sadb_x_kmaddress_exttype;
++	__u32	sadb_x_kmaddress_reserved;
+ } __attribute__((packed));
+ /* sizeof(struct sadb_x_kmaddress) == 8 */
+ 
+Index: linux-2.6-tip/include/linux/pipe_fs_i.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/pipe_fs_i.h
++++ linux-2.6-tip/include/linux/pipe_fs_i.h
+@@ -1,9 +1,9 @@
+ #ifndef _LINUX_PIPE_FS_I_H
+ #define _LINUX_PIPE_FS_I_H
+ 
+-#define PIPEFS_MAGIC 0x50495045
++#define PIPEFS_MAGIC		0x50495045
+ 
+-#define PIPE_BUFFERS (16)
++#define PIPE_BUFFERS		64
+ 
+ #define PIPE_BUF_FLAG_LRU	0x01	/* page is on the LRU */
+ #define PIPE_BUF_FLAG_ATOMIC	0x02	/* was atomically mapped */
+Index: linux-2.6-tip/include/linux/pkt_sched.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/pkt_sched.h
++++ linux-2.6-tip/include/linux/pkt_sched.h
+@@ -515,7 +515,7 @@ enum
+ 
+ struct tc_drr_stats
+ {
+-	__u32	deficit;
++	u32	deficit;
+ };
+ 
+ #endif
+Index: linux-2.6-tip/include/linux/plist.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/plist.h
++++ linux-2.6-tip/include/linux/plist.h
+@@ -81,7 +81,7 @@ struct plist_head {
+ 	struct list_head prio_list;
+ 	struct list_head node_list;
+ #ifdef CONFIG_DEBUG_PI_LIST
+-	spinlock_t *lock;
++	raw_spinlock_t *lock;
+ #endif
+ };
+ 
+@@ -96,16 +96,19 @@ struct plist_node {
+ # define PLIST_HEAD_LOCK_INIT(_lock)
+ #endif
+ 
++#define _PLIST_HEAD_INIT(head)				\
++	.prio_list = LIST_HEAD_INIT((head).prio_list),	\
++	.node_list = LIST_HEAD_INIT((head).node_list)
++
+ /**
+  * PLIST_HEAD_INIT - static struct plist_head initializer
+  * @head:	struct plist_head variable name
+- * @_lock:	lock to initialize for this list
++ * @_lock:	lock * to initialize for this list
+  */
+ #define PLIST_HEAD_INIT(head, _lock)			\
+ {							\
+-	.prio_list = LIST_HEAD_INIT((head).prio_list),	\
+-	.node_list = LIST_HEAD_INIT((head).node_list),	\
+-	PLIST_HEAD_LOCK_INIT(&(_lock))			\
++        _PLIST_HEAD_INIT(head),                         \
++	PLIST_HEAD_LOCK_INIT(_lock)			\
+ }
+ 
+ /**
+@@ -116,7 +119,7 @@ struct plist_node {
+ #define PLIST_NODE_INIT(node, __prio)			\
+ {							\
+ 	.prio  = (__prio),				\
+-	.plist = PLIST_HEAD_INIT((node).plist, NULL),	\
++	.plist = { _PLIST_HEAD_INIT((node).plist) }, 	\
+ }
+ 
+ /**
+@@ -125,7 +128,7 @@ struct plist_node {
+  * @lock:	list spinlock, remembered for debugging
+  */
+ static inline void
+-plist_head_init(struct plist_head *head, spinlock_t *lock)
++plist_head_init(struct plist_head *head, raw_spinlock_t *lock)
+ {
+ 	INIT_LIST_HEAD(&head->prio_list);
+ 	INIT_LIST_HEAD(&head->node_list);
+Index: linux-2.6-tip/include/linux/poison.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/poison.h
++++ linux-2.6-tip/include/linux/poison.h
+@@ -2,13 +2,25 @@
+ #define _LINUX_POISON_H
+ 
+ /********** include/linux/list.h **********/
++
++/*
++ * Architectures might want to move the poison pointer offset
++ * into some well-recognized area such as 0xdead000000000000,
++ * that is also not mappable by user-space exploits:
++ */
++#ifdef CONFIG_ILLEGAL_POINTER_VALUE
++# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
++#else
++# define POISON_POINTER_DELTA 0
++#endif
++
+ /*
+  * These are non-NULL pointers that will result in page faults
+  * under normal circumstances, used to verify that nobody uses
+  * non-initialized list entries.
+  */
+-#define LIST_POISON1  ((void *) 0x00100100)
+-#define LIST_POISON2  ((void *) 0x00200200)
++#define LIST_POISON1  ((void *) 0x00100100 + POISON_POINTER_DELTA)
++#define LIST_POISON2  ((void *) 0x00200200 + POISON_POINTER_DELTA)
+ 
+ /********** include/linux/timer.h **********/
+ /*
+Index: linux-2.6-tip/include/linux/ppp_defs.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/ppp_defs.h
++++ linux-2.6-tip/include/linux/ppp_defs.h
+@@ -177,8 +177,8 @@ struct ppp_comp_stats {
+  * the last NP packet was sent or received.
+  */
+ struct ppp_idle {
+-    time_t xmit_idle;		/* time since last NP packet sent */
+-    time_t recv_idle;		/* time since last NP packet received */
++    __kernel_time_t xmit_idle;	/* time since last NP packet sent */
++    __kernel_time_t recv_idle;	/* time since last NP packet received */
+ };
+ 
+ #endif /* _PPP_DEFS_H_ */
+Index: linux-2.6-tip/include/linux/prctl.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/prctl.h
++++ linux-2.6-tip/include/linux/prctl.h
+@@ -85,4 +85,7 @@
+ #define PR_SET_TIMERSLACK 29
+ #define PR_GET_TIMERSLACK 30
+ 
++#define PR_TASK_PERF_COUNTERS_DISABLE		31
++#define PR_TASK_PERF_COUNTERS_ENABLE		32
++
+ #endif /* _LINUX_PRCTL_H */
+Index: linux-2.6-tip/include/linux/rcuclassic.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/rcuclassic.h
++++ linux-2.6-tip/include/linux/rcuclassic.h
+@@ -36,7 +36,6 @@
+ #include <linux/cache.h>
+ #include <linux/spinlock.h>
+ #include <linux/threads.h>
+-#include <linux/percpu.h>
+ #include <linux/cpumask.h>
+ #include <linux/seqlock.h>
+ 
+@@ -108,25 +107,14 @@ struct rcu_data {
+ 	struct rcu_head barrier;
+ };
+ 
+-DECLARE_PER_CPU(struct rcu_data, rcu_data);
+-DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
+-
+ /*
+  * Increment the quiescent state counter.
+  * The counter is a bit degenerated: We do not need to know
+  * how many quiescent states passed, just if there was at least
+  * one since the start of the grace period. Thus just a flag.
+  */
+-static inline void rcu_qsctr_inc(int cpu)
+-{
+-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+-	rdp->passed_quiesc = 1;
+-}
+-static inline void rcu_bh_qsctr_inc(int cpu)
+-{
+-	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+-	rdp->passed_quiesc = 1;
+-}
++extern void rcu_qsctr_inc(int cpu);
++extern void rcu_bh_qsctr_inc(int cpu);
+ 
+ extern int rcu_pending(int cpu);
+ extern int rcu_needs_cpu(int cpu);
+Index: linux-2.6-tip/include/linux/rcupdate.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/rcupdate.h
++++ linux-2.6-tip/include/linux/rcupdate.h
+@@ -36,7 +36,6 @@
+ #include <linux/cache.h>
+ #include <linux/spinlock.h>
+ #include <linux/threads.h>
+-#include <linux/percpu.h>
+ #include <linux/cpumask.h>
+ #include <linux/seqlock.h>
+ #include <linux/lockdep.h>
+Index: linux-2.6-tip/include/linux/rcupreempt.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/rcupreempt.h
++++ linux-2.6-tip/include/linux/rcupreempt.h
+@@ -36,34 +36,19 @@
+ #include <linux/cache.h>
+ #include <linux/spinlock.h>
+ #include <linux/threads.h>
+-#include <linux/percpu.h>
++#include <linux/smp.h>
+ #include <linux/cpumask.h>
+ #include <linux/seqlock.h>
+ 
+-struct rcu_dyntick_sched {
+-	int dynticks;
+-	int dynticks_snap;
+-	int sched_qs;
+-	int sched_qs_snap;
+-	int sched_dynticks_snap;
+-};
+-
+-DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
+-
+-static inline void rcu_qsctr_inc(int cpu)
+-{
+-	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
+-
+-	rdssp->sched_qs++;
+-}
+-#define rcu_bh_qsctr_inc(cpu)
++extern void rcu_qsctr_inc(int cpu);
++static inline void rcu_bh_qsctr_inc(int cpu) { }
+ 
+ /*
+  * Someone might want to pass call_rcu_bh as a function pointer.
+  * So this needs to just be a rename and not a macro function.
+  *  (no parentheses)
+  */
+-#define call_rcu_bh	 	call_rcu
++#define call_rcu_bh		call_rcu
+ 
+ /**
+  * call_rcu_sched - Queue RCU callback for invocation after sched grace period.
+@@ -117,30 +102,12 @@ extern struct rcupreempt_trace *rcupreem
+ struct softirq_action;
+ 
+ #ifdef CONFIG_NO_HZ
+-
+-static inline void rcu_enter_nohz(void)
+-{
+-	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+-
+-	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
+-	__get_cpu_var(rcu_dyntick_sched).dynticks++;
+-	WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
+-}
+-
+-static inline void rcu_exit_nohz(void)
+-{
+-	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
+-
+-	__get_cpu_var(rcu_dyntick_sched).dynticks++;
+-	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
+-	WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
+-				&rs);
+-}
+-
+-#else /* CONFIG_NO_HZ */
+-#define rcu_enter_nohz()	do { } while (0)
+-#define rcu_exit_nohz()		do { } while (0)
+-#endif /* CONFIG_NO_HZ */
++extern void rcu_enter_nohz(void);
++extern void rcu_exit_nohz(void);
++#else
++# define rcu_enter_nohz()	do { } while (0)
++# define rcu_exit_nohz()	do { } while (0)
++#endif
+ 
+ /*
+  * A context switch is a grace period for rcupreempt synchronize_rcu()
+Index: linux-2.6-tip/include/linux/rcutree.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/rcutree.h
++++ linux-2.6-tip/include/linux/rcutree.h
+@@ -33,7 +33,6 @@
+ #include <linux/cache.h>
+ #include <linux/spinlock.h>
+ #include <linux/threads.h>
+-#include <linux/percpu.h>
+ #include <linux/cpumask.h>
+ #include <linux/seqlock.h>
+ 
+@@ -236,30 +235,8 @@ struct rcu_state {
+ #endif /* #ifdef CONFIG_NO_HZ */
+ };
+ 
+-extern struct rcu_state rcu_state;
+-DECLARE_PER_CPU(struct rcu_data, rcu_data);
+-
+-extern struct rcu_state rcu_bh_state;
+-DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
+-
+-/*
+- * Increment the quiescent state counter.
+- * The counter is a bit degenerated: We do not need to know
+- * how many quiescent states passed, just if there was at least
+- * one since the start of the grace period. Thus just a flag.
+- */
+-static inline void rcu_qsctr_inc(int cpu)
+-{
+-	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+-	rdp->passed_quiesc = 1;
+-	rdp->passed_quiesc_completed = rdp->completed;
+-}
+-static inline void rcu_bh_qsctr_inc(int cpu)
+-{
+-	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
+-	rdp->passed_quiesc = 1;
+-	rdp->passed_quiesc_completed = rdp->completed;
+-}
++extern void rcu_qsctr_inc(int cpu);
++extern void rcu_bh_qsctr_inc(int cpu);
+ 
+ extern int rcu_pending(int cpu);
+ extern int rcu_needs_cpu(int cpu);
+Index: linux-2.6-tip/include/linux/reiserfs_fs.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/reiserfs_fs.h
++++ linux-2.6-tip/include/linux/reiserfs_fs.h
+@@ -28,8 +28,6 @@
+ #include <linux/reiserfs_fs_sb.h>
+ #endif
+ 
+-struct fid;
+-
+ /*
+  *  include/linux/reiser_fs.h
+  *
+@@ -37,6 +35,33 @@ struct fid;
+  *
+  */
+ 
++/* ioctl's command */
++#define REISERFS_IOC_UNPACK		_IOW(0xCD,1,long)
++/* define following flags to be the same as in ext2, so that chattr(1),
++   lsattr(1) will work with us. */
++#define REISERFS_IOC_GETFLAGS		FS_IOC_GETFLAGS
++#define REISERFS_IOC_SETFLAGS		FS_IOC_SETFLAGS
++#define REISERFS_IOC_GETVERSION		FS_IOC_GETVERSION
++#define REISERFS_IOC_SETVERSION		FS_IOC_SETVERSION
++
++#ifdef __KERNEL__
++/* the 32 bit compat definitions with int argument */
++#define REISERFS_IOC32_UNPACK		_IOW(0xCD, 1, int)
++#define REISERFS_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
++#define REISERFS_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
++#define REISERFS_IOC32_GETVERSION	FS_IOC32_GETVERSION
++#define REISERFS_IOC32_SETVERSION	FS_IOC32_SETVERSION
++
++/* Locking primitives */
++/* Right now we are still falling back to (un)lock_kernel, but eventually that
++   would evolve into real per-fs locks */
++#define reiserfs_write_lock( sb ) lock_kernel()
++#define reiserfs_write_unlock( sb ) unlock_kernel()
++
++/* xattr stuff */
++#define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem)
++struct fid;
++
+ /* in reading the #defines, it may help to understand that they employ
+    the following abbreviations:
+ 
+@@ -698,6 +723,7 @@ static inline void cpu_key_k_offset_dec(
+ /* object identifier for root dir */
+ #define REISERFS_ROOT_OBJECTID 2
+ #define REISERFS_ROOT_PARENT_OBJECTID 1
++
+ extern struct reiserfs_key root_key;
+ 
+ /* 
+@@ -1540,7 +1566,6 @@ struct reiserfs_iget_args {
+ /*                    FUNCTION DECLARATIONS                                */
+ /***************************************************************************/
+ 
+-/*#ifdef __KERNEL__*/
+ #define get_journal_desc_magic(bh) (bh->b_data + bh->b_size - 12)
+ 
+ #define journal_trans_half(blocksize) \
+@@ -2178,29 +2203,6 @@ long reiserfs_compat_ioctl(struct file *
+ 		   unsigned int cmd, unsigned long arg);
+ int reiserfs_unpack(struct inode *inode, struct file *filp);
+ 
+-/* ioctl's command */
+-#define REISERFS_IOC_UNPACK		_IOW(0xCD,1,long)
+-/* define following flags to be the same as in ext2, so that chattr(1),
+-   lsattr(1) will work with us. */
+-#define REISERFS_IOC_GETFLAGS		FS_IOC_GETFLAGS
+-#define REISERFS_IOC_SETFLAGS		FS_IOC_SETFLAGS
+-#define REISERFS_IOC_GETVERSION		FS_IOC_GETVERSION
+-#define REISERFS_IOC_SETVERSION		FS_IOC_SETVERSION
+-
+-/* the 32 bit compat definitions with int argument */
+-#define REISERFS_IOC32_UNPACK		_IOW(0xCD, 1, int)
+-#define REISERFS_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
+-#define REISERFS_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
+-#define REISERFS_IOC32_GETVERSION	FS_IOC32_GETVERSION
+-#define REISERFS_IOC32_SETVERSION	FS_IOC32_SETVERSION
+-
+-/* Locking primitives */
+-/* Right now we are still falling back to (un)lock_kernel, but eventually that
+-   would evolve into real per-fs locks */
+-#define reiserfs_write_lock( sb ) lock_kernel()
+-#define reiserfs_write_unlock( sb ) unlock_kernel()
+-
+-/* xattr stuff */
+-#define REISERFS_XATTR_DIR_SEM(s) (REISERFS_SB(s)->xattr_dir_sem)
+ 
++#endif /* __KERNEL__ */
+ #endif				/* _LINUX_REISER_FS_H */
+Index: linux-2.6-tip/include/linux/ring_buffer.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/ring_buffer.h
++++ linux-2.6-tip/include/linux/ring_buffer.h
+@@ -1,6 +1,7 @@
+ #ifndef _LINUX_RING_BUFFER_H
+ #define _LINUX_RING_BUFFER_H
+ 
++#include <linux/kmemcheck.h>
+ #include <linux/mm.h>
+ #include <linux/seq_file.h>
+ 
+@@ -8,20 +9,26 @@ struct ring_buffer;
+ struct ring_buffer_iter;
+ 
+ /*
+- * Don't reference this struct directly, use functions below.
++ * Don't refer to this struct directly, use functions below.
+  */
+ struct ring_buffer_event {
+-	u32		type:2, len:3, time_delta:27;
++	kmemcheck_define_bitfield(bitfield, {
++		u32		type:2, len:3, time_delta:27;
++	});
++
+ 	u32		array[];
+ };
+ 
+ /**
+  * enum ring_buffer_type - internal ring buffer types
+  *
+- * @RINGBUF_TYPE_PADDING:	Left over page padding
+- *				 array is ignored
+- *				 size is variable depending on how much
++ * @RINGBUF_TYPE_PADDING:	Left over page padding or discarded event
++ *				 If time_delta is 0:
++ *				  array is ignored
++ *				  size is variable depending on how much
+  *				  padding is needed
++ *				 If time_delta is non zero:
++ *				  everything else same as RINGBUF_TYPE_DATA
+  *
+  * @RINGBUF_TYPE_TIME_EXTEND:	Extend the time delta
+  *				 array[0] = time delta (28 .. 59)
+@@ -65,6 +72,8 @@ ring_buffer_event_time_delta(struct ring
+ 	return event->time_delta;
+ }
+ 
++void ring_buffer_event_discard(struct ring_buffer_event *event);
++
+ /*
+  * size is in bytes for each per CPU buffer.
+  */
+@@ -74,13 +83,10 @@ void ring_buffer_free(struct ring_buffer
+ 
+ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size);
+ 
+-struct ring_buffer_event *
+-ring_buffer_lock_reserve(struct ring_buffer *buffer,
+-			 unsigned long length,
+-			 unsigned long *flags);
++struct ring_buffer_event *ring_buffer_lock_reserve(struct ring_buffer *buffer,
++						   unsigned long length);
+ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+-			      struct ring_buffer_event *event,
+-			      unsigned long flags);
++			      struct ring_buffer_event *event);
+ int ring_buffer_write(struct ring_buffer *buffer,
+ 		      unsigned long length, void *data);
+ 
+@@ -121,17 +127,19 @@ unsigned long ring_buffer_overruns(struc
+ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu);
+ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu);
+ 
+-u64 ring_buffer_time_stamp(int cpu);
+-void ring_buffer_normalize_time_stamp(int cpu, u64 *ts);
++u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu);
++void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
++				      int cpu, u64 *ts);
++void ring_buffer_set_clock(struct ring_buffer *buffer,
++			   u64 (*clock)(void));
++
++size_t ring_buffer_page_len(void *page);
+ 
+-void tracing_on(void);
+-void tracing_off(void);
+-void tracing_off_permanent(void);
+ 
+ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer);
+ void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data);
+-int ring_buffer_read_page(struct ring_buffer *buffer,
+-			  void **data_page, int cpu, int full);
++int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page,
++			  size_t len, int cpu, int full);
+ 
+ enum ring_buffer_flags {
+ 	RB_FL_OVERWRITE		= 1 << 0,
+Index: linux-2.6-tip/include/linux/sched.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/sched.h
++++ linux-2.6-tip/include/linux/sched.h
+@@ -71,6 +71,7 @@ struct sched_param {
+ #include <linux/fs_struct.h>
+ #include <linux/compiler.h>
+ #include <linux/completion.h>
++#include <linux/perf_counter.h>
+ #include <linux/pid.h>
+ #include <linux/percpu.h>
+ #include <linux/topology.h>
+@@ -91,6 +92,28 @@ struct sched_param {
+ 
+ #include <asm/processor.h>
+ 
++#ifdef CONFIG_PREEMPT
++extern int kernel_preemption;
++#else
++# define kernel_preemption 0
++#endif
++#ifdef CONFIG_PREEMPT_VOLUNTARY
++extern int voluntary_preemption;
++#else
++# define voluntary_preemption 0
++#endif
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++extern int softirq_preemption;
++#else
++# define softirq_preemption 0
++#endif
++
++#ifdef CONFIG_PREEMPT_HARDIRQS
++extern int hardirq_preemption;
++#else
++# define hardirq_preemption 0
++#endif
++
+ struct mem_cgroup;
+ struct exec_domain;
+ struct futex_pi_state;
+@@ -115,6 +138,7 @@ struct bts_tracer;
+  *    11 bit fractions.
+  */
+ extern unsigned long avenrun[];		/* Load averages */
++extern void get_avenrun(unsigned long *loads, unsigned long offset, int shift);
+ 
+ #define FSHIFT		11		/* nr of bits of precision */
+ #define FIXED_1		(1<<FSHIFT)	/* 1.0 as fixed-point */
+@@ -134,8 +158,11 @@ DECLARE_PER_CPU(unsigned long, process_c
+ extern int nr_processes(void);
+ extern unsigned long nr_running(void);
+ extern unsigned long nr_uninterruptible(void);
+-extern unsigned long nr_active(void);
+ extern unsigned long nr_iowait(void);
++extern u64 cpu_nr_migrations(int cpu);
++extern void calc_global_load(void);
++
++extern unsigned long get_parent_ip(unsigned long addr);
+ 
+ struct seq_file;
+ struct cfs_rq;
+@@ -160,6 +187,7 @@ print_cfs_rq(struct seq_file *m, int cpu
+ #endif
+ 
+ extern unsigned long long time_sync_thresh;
++extern struct semaphore kernel_sem;
+ 
+ /*
+  * Task state bitmask. NOTE! These bits are also
+@@ -172,16 +200,17 @@ extern unsigned long long time_sync_thre
+  * mistake.
+  */
+ #define TASK_RUNNING		0
+-#define TASK_INTERRUPTIBLE	1
+-#define TASK_UNINTERRUPTIBLE	2
+-#define __TASK_STOPPED		4
+-#define __TASK_TRACED		8
++#define TASK_RUNNING_MUTEX	1
++#define TASK_INTERRUPTIBLE	2
++#define TASK_UNINTERRUPTIBLE	4
++#define __TASK_STOPPED		8
++#define __TASK_TRACED		16
+ /* in tsk->exit_state */
+-#define EXIT_ZOMBIE		16
+-#define EXIT_DEAD		32
++#define EXIT_ZOMBIE		32
++#define EXIT_DEAD		64
+ /* in tsk->state again */
+-#define TASK_DEAD		64
+-#define TASK_WAKEKILL		128
++#define TASK_DEAD		128
++#define TASK_WAKEKILL		256
+ 
+ /* Convenience macros for the sake of set_task_state */
+ #define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+@@ -193,7 +222,8 @@ extern unsigned long long time_sync_thre
+ #define TASK_ALL		(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+ 
+ /* get_task_state() */
+-#define TASK_REPORT		(TASK_RUNNING | TASK_INTERRUPTIBLE | \
++#define TASK_REPORT		(TASK_RUNNING | TASK_RUNNING_MUTEX | \
++				 TASK_INTERRUPTIBLE | \
+ 				 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
+ 				 __TASK_TRACED)
+ 
+@@ -210,6 +240,28 @@ extern unsigned long long time_sync_thre
+ #define set_task_state(tsk, state_value)		\
+ 	set_mb((tsk)->state, (state_value))
+ 
++// #define PREEMPT_DIRECT
++
++#ifdef CONFIG_X86_LOCAL_APIC
++extern void nmi_show_all_regs(void);
++#else
++# define nmi_show_all_regs() do { } while (0)
++#endif
++
++#include <linux/smp.h>
++#include <linux/sem.h>
++#include <linux/signal.h>
++#include <linux/securebits.h>
++#include <linux/fs_struct.h>
++#include <linux/compiler.h>
++#include <linux/completion.h>
++#include <linux/pid.h>
++#include <linux/percpu.h>
++#include <linux/topology.h>
++#include <linux/seccomp.h>
++
++struct exec_domain;
++
+ /*
+  * set_current_state() includes a barrier so that the write of current->state
+  * is correctly serialised wrt the caller's subsequent test of whether to
+@@ -290,6 +342,12 @@ extern void scheduler_tick(void);
+ 
+ extern void sched_show_task(struct task_struct *p);
+ 
++#ifdef CONFIG_GENERIC_HARDIRQS
++extern int debug_direct_keyboard;
++#else
++# define debug_direct_keyboard 0
++#endif
++
+ #ifdef CONFIG_DETECT_SOFTLOCKUP
+ extern void softlockup_tick(void);
+ extern void touch_softlockup_watchdog(void);
+@@ -298,17 +356,11 @@ extern int proc_dosoftlockup_thresh(stru
+ 				    struct file *filp, void __user *buffer,
+ 				    size_t *lenp, loff_t *ppos);
+ extern unsigned int  softlockup_panic;
+-extern unsigned long sysctl_hung_task_check_count;
+-extern unsigned long sysctl_hung_task_timeout_secs;
+-extern unsigned long sysctl_hung_task_warnings;
+ extern int softlockup_thresh;
+ #else
+ static inline void softlockup_tick(void)
+ {
+ }
+-static inline void spawn_softlockup_task(void)
+-{
+-}
+ static inline void touch_softlockup_watchdog(void)
+ {
+ }
+@@ -317,6 +369,15 @@ static inline void touch_all_softlockup_
+ }
+ #endif
+ 
++#ifdef CONFIG_DETECT_HUNG_TASK
++extern unsigned int  sysctl_hung_task_panic;
++extern unsigned long sysctl_hung_task_check_count;
++extern unsigned long sysctl_hung_task_timeout_secs;
++extern unsigned long sysctl_hung_task_warnings;
++extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
++					 struct file *filp, void __user *buffer,
++					 size_t *lenp, loff_t *ppos);
++#endif
+ 
+ /* Attach to any functions which should be ignored in wchan output. */
+ #define __sched		__attribute__((__section__(".sched.text")))
+@@ -332,7 +393,14 @@ extern signed long schedule_timeout(sign
+ extern signed long schedule_timeout_interruptible(signed long timeout);
+ extern signed long schedule_timeout_killable(signed long timeout);
+ extern signed long schedule_timeout_uninterruptible(signed long timeout);
++asmlinkage void __schedule(void);
+ asmlinkage void schedule(void);
++extern int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner);
++/*
++ * This one can be called with interrupts disabled, only
++ * to be used by lowlevel arch code!
++ */
++asmlinkage void __sched __schedule(void);
+ 
+ struct nsproxy;
+ struct user_namespace;
+@@ -480,7 +548,7 @@ struct task_cputime {
+ struct thread_group_cputimer {
+ 	struct task_cputime cputime;
+ 	int running;
+-	spinlock_t lock;
++	raw_spinlock_t lock;
+ };
+ 
+ /*
+@@ -999,6 +1067,7 @@ struct sched_class {
+ 			      struct rq *busiest, struct sched_domain *sd,
+ 			      enum cpu_idle_type idle);
+ 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
++	int (*needs_post_schedule) (struct rq *this_rq);
+ 	void (*post_schedule) (struct rq *this_rq);
+ 	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+ 
+@@ -1053,6 +1122,11 @@ struct sched_entity {
+ 	u64			last_wakeup;
+ 	u64			avg_overlap;
+ 
++	u64			nr_migrations;
++
++	u64			start_runtime;
++	u64			avg_wakeup;
++
+ #ifdef CONFIG_SCHEDSTATS
+ 	u64			wait_start;
+ 	u64			wait_max;
+@@ -1068,7 +1142,6 @@ struct sched_entity {
+ 	u64			exec_max;
+ 	u64			slice_max;
+ 
+-	u64			nr_migrations;
+ 	u64			nr_migrations_cold;
+ 	u64			nr_failed_migrations_affine;
+ 	u64			nr_failed_migrations_running;
+@@ -1122,10 +1195,8 @@ struct task_struct {
+ 	int lock_depth;		/* BKL lock depth */
+ 
+ #ifdef CONFIG_SMP
+-#ifdef __ARCH_WANT_UNLOCKED_CTXSW
+ 	int oncpu;
+ #endif
+-#endif
+ 
+ 	int prio, static_prio, normal_prio;
+ 	unsigned int rt_priority;
+@@ -1165,6 +1236,7 @@ struct task_struct {
+ #endif
+ 
+ 	struct list_head tasks;
++	struct plist_node pushable_tasks;
+ 
+ 	struct mm_struct *mm, *active_mm;
+ 
+@@ -1179,10 +1251,9 @@ struct task_struct {
+ 	pid_t pid;
+ 	pid_t tgid;
+ 
+-#ifdef CONFIG_CC_STACKPROTECTOR
+ 	/* Canary value for the -fstack-protector gcc feature */
+ 	unsigned long stack_canary;
+-#endif
++
+ 	/* 
+ 	 * pointers to (original) parent process, youngest child, younger sibling,
+ 	 * older sibling, respectively.  (p->father can be replaced with 
+@@ -1238,6 +1309,8 @@ struct task_struct {
+ 	struct task_cputime cputime_expires;
+ 	struct list_head cpu_timers[3];
+ 
++	struct task_struct* posix_timer_list;
++
+ /* process credentials */
+ 	const struct cred *real_cred;	/* objective and real subjective task
+ 					 * credentials (COW) */
+@@ -1255,9 +1328,8 @@ struct task_struct {
+ /* ipc stuff */
+ 	struct sysv_sem sysvsem;
+ #endif
+-#ifdef CONFIG_DETECT_SOFTLOCKUP
++#ifdef CONFIG_DETECT_HUNG_TASK
+ /* hung task detection */
+-	unsigned long last_switch_timestamp;
+ 	unsigned long last_switch_count;
+ #endif
+ /* CPU-specific state of this task */
+@@ -1271,6 +1343,7 @@ struct task_struct {
+ /* signal handlers */
+ 	struct signal_struct *signal;
+ 	struct sighand_struct *sighand;
++	struct sigqueue *sigqueue_cache;
+ 
+ 	sigset_t blocked, real_blocked;
+ 	sigset_t saved_sigmask;	/* restored if set_restore_sigmask() was used */
+@@ -1295,7 +1368,7 @@ struct task_struct {
+ 	spinlock_t alloc_lock;
+ 
+ 	/* Protection of the PI data structures: */
+-	spinlock_t pi_lock;
++	raw_spinlock_t pi_lock;
+ 
+ #ifdef CONFIG_RT_MUTEXES
+ 	/* PI waiters blocked on a rt_mutex held by this task */
+@@ -1308,6 +1381,7 @@ struct task_struct {
+ 	/* mutex deadlock detection */
+ 	struct mutex_waiter *blocked_on;
+ #endif
++	int pagefault_disabled;
+ #ifdef CONFIG_TRACE_IRQFLAGS
+ 	unsigned int irq_events;
+ 	int hardirqs_enabled;
+@@ -1329,6 +1403,27 @@ struct task_struct {
+ 	int lockdep_depth;
+ 	unsigned int lockdep_recursion;
+ 	struct held_lock held_locks[MAX_LOCK_DEPTH];
++	gfp_t lockdep_reclaim_gfp;
++#endif
++
++/* realtime bits */
++
++#define MAX_PREEMPT_TRACE 25
++#define MAX_LOCK_STACK	MAX_PREEMPT_TRACE
++#ifdef CONFIG_DEBUG_PREEMPT
++	atomic_t lock_count;
++# ifdef CONFIG_PREEMPT_RT
++	struct rt_mutex *owned_lock[MAX_LOCK_STACK];
++# endif
++#endif
++#ifdef CONFIG_DETECT_SOFTLOCKUP
++	unsigned long	softlockup_count; /* Count to keep track how long the
++					   *  thread is in the kernel without
++					   *  sleeping.
++					   */
++#endif
++#ifdef CONFIG_DEBUG_RT_MUTEXES
++	void *last_kernel_lock;
+ #endif
+ 
+ /* journalling filesystem info */
+@@ -1370,7 +1465,9 @@ struct task_struct {
+ #endif
+ 	struct list_head pi_state_list;
+ 	struct futex_pi_state *pi_state_cache;
++	struct task_struct *futex_wakeup;
+ #endif
++	struct perf_counter_context perf_counter_ctx;
+ #ifdef CONFIG_NUMA
+ 	struct mempolicy *mempolicy;
+ 	short il_next;
+@@ -1406,6 +1503,8 @@ struct task_struct {
+ 	int curr_ret_stack;
+ 	/* Stack of return addresses for return function tracing */
+ 	struct ftrace_ret_stack	*ret_stack;
++	/* time stamp for last schedule */
++	unsigned long long ftrace_timestamp;
+ 	/*
+ 	 * Number of functions that haven't been traced
+ 	 * because of depth overrun.
+@@ -1418,11 +1517,24 @@ struct task_struct {
+ 	/* state flags for use by tracers */
+ 	unsigned long trace;
+ #endif
++#ifdef CONFIG_PREEMPT_RT
++	/*
++	 * Temporary hack, until we find a solution to
++	 * handle printk in atomic operations.
++	 */
++	int in_printk;
++#endif
+ };
+ 
+ /* Future-safe accessor for struct task_struct's cpus_allowed. */
+ #define tsk_cpumask(tsk) (&(tsk)->cpus_allowed)
+ 
++#ifdef CONFIG_PREEMPT_RT
++# define set_printk_might_sleep(x) do { current->in_printk = x; } while(0)
++#else
++# define set_printk_might_sleep(x) do { } while(0)
++#endif
++
+ /*
+  * Priority of a process goes from 0..MAX_PRIO-1, valid RT
+  * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
+@@ -1587,6 +1699,15 @@ extern struct pid *cad_pid;
+ extern void free_task(struct task_struct *tsk);
+ #define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
+ 
++#ifdef CONFIG_PREEMPT_RT
++extern void __put_task_struct_cb(struct rcu_head *rhp);
++
++static inline void put_task_struct(struct task_struct *t)
++{
++	if (atomic_dec_and_test(&t->usage))
++		call_rcu(&t->rcu, __put_task_struct_cb);
++}
++#else
+ extern void __put_task_struct(struct task_struct *t);
+ 
+ static inline void put_task_struct(struct task_struct *t)
+@@ -1594,6 +1715,7 @@ static inline void put_task_struct(struc
+ 	if (atomic_dec_and_test(&t->usage))
+ 		__put_task_struct(t);
+ }
++#endif
+ 
+ extern cputime_t task_utime(struct task_struct *p);
+ extern cputime_t task_stime(struct task_struct *p);
+@@ -1608,13 +1730,16 @@ extern cputime_t task_gtime(struct task_
+ #define PF_EXITING	0x00000004	/* getting shut down */
+ #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
+ #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
++#define PF_NOSCHED	0x00000020	/* Userspace does not expect scheduling */
+ #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
++#define PF_HARDIRQ	0x00000080	/* hardirq context */
+ #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
+ #define PF_DUMPCORE	0x00000200	/* dumped core */
+ #define PF_SIGNALED	0x00000400	/* killed by a signal */
+ #define PF_MEMALLOC	0x00000800	/* Allocating memory */
+ #define PF_FLUSHER	0x00001000	/* responsible for disk writeback */
+ #define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
++#define PF_KMAP		0x00004000	/* this context has a kmap */
+ #define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
+ #define PF_FROZEN	0x00010000	/* frozen for system suspend */
+ #define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
+@@ -1627,6 +1752,7 @@ extern cputime_t task_gtime(struct task_
+ #define PF_SPREAD_PAGE	0x01000000	/* Spread page cache over cpuset */
+ #define PF_SPREAD_SLAB	0x02000000	/* Spread some slab caches over cpuset */
+ #define PF_THREAD_BOUND	0x04000000	/* Thread bound to specific cpu */
++#define PF_SOFTIRQ	0x08000000	/* softirq context */
+ #define PF_MEMPOLICY	0x10000000	/* Non-default NUMA mempolicy */
+ #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
+ #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezeable */
+@@ -1674,6 +1800,16 @@ static inline int set_cpus_allowed(struc
+ 	return set_cpus_allowed_ptr(p, &new_mask);
+ }
+ 
++/*
++ * Architectures can set this to 1 if they have specified
++ * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig,
++ * but then during bootup it turns out that sched_clock()
++ * is reliable after all:
++ */
++#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
++extern int sched_clock_stable;
++#endif
++
+ extern unsigned long long sched_clock(void);
+ 
+ extern void sched_clock_init(void);
+@@ -1755,9 +1891,14 @@ int sched_rt_handler(struct ctl_table *t
+ 
+ extern unsigned int sysctl_sched_compat_yield;
+ 
++extern void task_setprio(struct task_struct *p, int prio);
++
+ #ifdef CONFIG_RT_MUTEXES
+ extern int rt_mutex_getprio(struct task_struct *p);
+-extern void rt_mutex_setprio(struct task_struct *p, int prio);
++static inline void rt_mutex_setprio(struct task_struct *p, int prio)
++{
++	task_setprio(p, prio);
++}
+ extern void rt_mutex_adjust_pi(struct task_struct *p);
+ #else
+ static inline int rt_mutex_getprio(struct task_struct *p)
+@@ -1781,6 +1922,7 @@ extern struct task_struct *curr_task(int
+ extern void set_curr_task(int cpu, struct task_struct *p);
+ 
+ void yield(void);
++void __yield(void);
+ 
+ /*
+  * The default (Linux) execution domain.
+@@ -1848,6 +1990,9 @@ extern void do_timer(unsigned long ticks
+ 
+ extern int wake_up_state(struct task_struct *tsk, unsigned int state);
+ extern int wake_up_process(struct task_struct *tsk);
++extern int wake_up_process_mutex(struct task_struct * tsk);
++extern int wake_up_process_sync(struct task_struct * tsk);
++extern int wake_up_process_mutex_sync(struct task_struct * tsk);
+ extern void wake_up_new_task(struct task_struct *tsk,
+ 				unsigned long clone_flags);
+ #ifdef CONFIG_SMP
+@@ -1935,12 +2080,20 @@ extern struct mm_struct * mm_alloc(void)
+ 
+ /* mmdrop drops the mm and the page tables */
+ extern void __mmdrop(struct mm_struct *);
++extern void __mmdrop_delayed(struct mm_struct *);
++
+ static inline void mmdrop(struct mm_struct * mm)
+ {
+ 	if (unlikely(atomic_dec_and_test(&mm->mm_count)))
+ 		__mmdrop(mm);
+ }
+ 
++static inline void mmdrop_delayed(struct mm_struct * mm)
++{
++	if (atomic_dec_and_test(&mm->mm_count))
++		__mmdrop_delayed(mm);
++}
++
+ /* mmput gets rid of the mappings and all user-space */
+ extern void mmput(struct mm_struct *);
+ /* Grab a reference to a task's mm, if it is not already going away */
+@@ -2091,6 +2244,19 @@ static inline int object_is_on_stack(voi
+ 
+ extern void thread_info_cache_init(void);
+ 
++#ifdef CONFIG_DEBUG_STACK_USAGE
++static inline unsigned long stack_not_used(struct task_struct *p)
++{
++	unsigned long *n = end_of_stack(p);
++
++	do { 	/* Skip over canary */
++		n++;
++	} while (!*n);
++
++	return (unsigned long)n - (unsigned long)end_of_stack(p);
++}
++#endif
++
+ /* set thread flags in other task's structures
+  * - see asm/thread_info.h for TIF_xxxx flags available
+  */
+@@ -2180,19 +2346,27 @@ static inline int cond_resched(void)
+ 	return _cond_resched();
+ }
+ #endif
+-extern int cond_resched_lock(spinlock_t * lock);
++extern int __cond_resched_raw_spinlock(raw_spinlock_t *lock);
++extern int __cond_resched_spinlock(spinlock_t *spinlock);
++
++#define cond_resched_lock(lock) \
++	PICK_SPIN_OP_RET(__cond_resched_raw_spinlock, __cond_resched_spinlock,\
++		 lock)
++
+ extern int cond_resched_softirq(void);
+ static inline int cond_resched_bkl(void)
+ {
+ 	return _cond_resched();
+ }
++extern int cond_resched_softirq_context(void);
++extern int cond_resched_hardirq_context(void);
+ 
+ /*
+  * Does a critical section need to be broken due to another
+  * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+  * but a general need for low latency)
+  */
+-static inline int spin_needbreak(spinlock_t *lock)
++static inline int __raw_spin_needbreak(raw_spinlock_t *lock)
+ {
+ #ifdef CONFIG_PREEMPT
+ 	return spin_is_contended(lock);
+@@ -2218,6 +2392,40 @@ static inline void thread_group_cputime_
+ {
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++static inline int __spin_needbreak(spinlock_t *lock)
++{
++	struct task_struct *tsk = current;
++
++	/* break if we are priority boosted */
++	return tsk->prio < tsk->normal_prio;
++}
++#else
++static inline int __spin_needbreak(spinlock_t *lock)
++{
++	/* should never be call outside of RT */
++	BUG();
++	return 0;
++}
++#endif
++
++#define spin_needbreak(lock) \
++	PICK_SPIN_OP_RET(__raw_spin_needbreak, __spin_needbreak, lock)
++
++static inline int softirq_need_resched(void)
++{
++	if (softirq_preemption && (current->flags & PF_SOFTIRQ))
++		return need_resched();
++	return 0;
++}
++
++static inline int hardirq_need_resched(void)
++{
++	if (hardirq_preemption && (current->flags & PF_HARDIRQ))
++		return need_resched();
++	return 0;
++}
++
+ /*
+  * Reevaluate whether the task has signals pending delivery.
+  * Wake the task if so.
+@@ -2344,6 +2552,13 @@ static inline void inc_syscw(struct task
+ #define TASK_SIZE_OF(tsk)	TASK_SIZE
+ #endif
+ 
++/*
++ * Call the function if the target task is executing on a CPU right now:
++ */
++extern void task_oncpu_function_call(struct task_struct *p,
++				     void (*func) (void *info), void *info);
++
++
+ #ifdef CONFIG_MM_OWNER
+ extern void mm_update_next_owner(struct mm_struct *mm);
+ extern void mm_init_owner(struct mm_struct *mm, struct task_struct *p);
+@@ -2357,7 +2572,14 @@ static inline void mm_init_owner(struct 
+ }
+ #endif /* CONFIG_MM_OWNER */
+ 
+-#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
++#define TASK_STATE_TO_CHAR_STR "RMSDTtZX"
++
++#ifdef CONFIG_SMP
++static inline int task_is_current(struct task_struct *task)
++{
++	return task->oncpu;
++}
++#endif
+ 
+ #endif /* __KERNEL__ */
+ 
+Index: linux-2.6-tip/include/linux/security.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/security.h
++++ linux-2.6-tip/include/linux/security.h
+@@ -32,6 +32,7 @@
+ #include <linux/sched.h>
+ #include <linux/key.h>
+ #include <linux/xfrm.h>
++#include <linux/gfp.h>
+ #include <net/flow.h>
+ 
+ /* Maximum number of letters for an LSM name string */
+@@ -2966,5 +2967,28 @@ static inline void securityfs_remove(str
+ 
+ #endif
+ 
++#ifdef CONFIG_SECURITY
++
++static inline char *alloc_secdata(void)
++{
++	return (char *)get_zeroed_page(GFP_KERNEL);
++}
++
++static inline void free_secdata(void *secdata)
++{
++	free_page((unsigned long)secdata);
++}
++
++#else
++
++static inline char *alloc_secdata(void)
++{
++        return (char *)1;
++}
++
++static inline void free_secdata(void *secdata)
++{ }
++#endif /* CONFIG_SECURITY */
++
+ #endif /* ! __LINUX_SECURITY_H */
+ 
+Index: linux-2.6-tip/include/linux/selinux_netlink.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/selinux_netlink.h
++++ linux-2.6-tip/include/linux/selinux_netlink.h
+@@ -12,6 +12,8 @@
+ #ifndef _LINUX_SELINUX_NETLINK_H
+ #define _LINUX_SELINUX_NETLINK_H
+ 
++#include <linux/types.h>
++
+ /* Message types. */
+ #define SELNL_MSG_BASE 0x10
+ enum {
+@@ -38,11 +40,11 @@ enum selinux_nlgroups {
+ 
+ /* Message structures */
+ struct selnl_msg_setenforce {
+-	int32_t		val;
++	__s32		val;
+ };
+ 
+ struct selnl_msg_policyload {
+-	u_int32_t	seqno;
++	__u32	seqno;
+ };
+ 
+ #endif /* _LINUX_SELINUX_NETLINK_H */
+Index: linux-2.6-tip/include/linux/signal.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/signal.h
++++ linux-2.6-tip/include/linux/signal.h
+@@ -225,6 +225,7 @@ static inline void init_sigpending(struc
+ }
+ 
+ extern void flush_sigqueue(struct sigpending *queue);
++extern void flush_task_sigqueue(struct task_struct *tsk);
+ 
+ /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
+ static inline int valid_signal(unsigned long sig)
+@@ -235,6 +236,8 @@ static inline int valid_signal(unsigned 
+ extern int next_signal(struct sigpending *pending, sigset_t *mask);
+ extern int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p);
+ extern int __group_send_sig_info(int, struct siginfo *, struct task_struct *);
++extern long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig,
++				 siginfo_t *info);
+ extern long do_sigpending(void __user *, unsigned long);
+ extern int sigprocmask(int, sigset_t *, sigset_t *);
+ extern int show_unhandled_signals;
+Index: linux-2.6-tip/include/linux/skbuff.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/skbuff.h
++++ linux-2.6-tip/include/linux/skbuff.h
+@@ -15,6 +15,7 @@
+ #define _LINUX_SKBUFF_H
+ 
+ #include <linux/kernel.h>
++#include <linux/kmemcheck.h>
+ #include <linux/compiler.h>
+ #include <linux/time.h>
+ #include <linux/cache.h>
+@@ -100,6 +101,9 @@ struct pipe_inode_info;
+ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ struct nf_conntrack {
+ 	atomic_t use;
++#ifdef CONFIG_PREEMPT_RT
++	struct rcu_head rcu;
++#endif
+ };
+ #endif
+ 
+@@ -295,16 +299,18 @@ struct sk_buff {
+ 		};
+ 	};
+ 	__u32			priority;
+-	__u8			local_df:1,
+-				cloned:1,
+-				ip_summed:2,
+-				nohdr:1,
+-				nfctinfo:3;
+-	__u8			pkt_type:3,
+-				fclone:2,
+-				ipvs_property:1,
+-				peeked:1,
+-				nf_trace:1;
++	kmemcheck_define_bitfield(flags1, {
++		__u8			local_df:1,
++					cloned:1,
++					ip_summed:2,
++					nohdr:1,
++					nfctinfo:3;
++		__u8			pkt_type:3,
++					fclone:2,
++					ipvs_property:1,
++					peeked:1,
++					nf_trace:1;
++	});
+ 	__be16			protocol;
+ 
+ 	void			(*destructor)(struct sk_buff *skb);
+@@ -324,13 +330,17 @@ struct sk_buff {
+ 	__u16			tc_verd;	/* traffic control verdict */
+ #endif
+ #endif
++
++	kmemcheck_define_bitfield(flags2, {
+ #ifdef CONFIG_IPV6_NDISC_NODETYPE
+-	__u8			ndisc_nodetype:2;
++		__u8			ndisc_nodetype:2;
+ #endif
+ #if defined(CONFIG_MAC80211) || defined(CONFIG_MAC80211_MODULE)
+-	__u8			do_not_encrypt:1;
+-	__u8			requeue:1;
++		__u8			do_not_encrypt:1;
++		__u8			requeue:1;
+ #endif
++	});
++
+ 	/* 0/13/14 bit hole */
+ 
+ #ifdef CONFIG_NET_DMA
+Index: linux-2.6-tip/include/linux/slab.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/slab.h
++++ linux-2.6-tip/include/linux/slab.h
+@@ -62,6 +62,13 @@
+ # define SLAB_DEBUG_OBJECTS	0x00000000UL
+ #endif
+ 
++/* Don't track use of uninitialized memory */
++#ifdef CONFIG_KMEMCHECK
++# define SLAB_NOTRACK		0x00800000UL
++#else
++# define SLAB_NOTRACK		0x00000000UL
++#endif
++
+ /* The following flags affect the page allocator grouping pages by mobility */
+ #define SLAB_RECLAIM_ACCOUNT	0x00020000UL		/* Objects are reclaimable */
+ #define SLAB_TEMPORARY		SLAB_RECLAIM_ACCOUNT	/* Objects are short-lived */
+Index: linux-2.6-tip/include/linux/slab_def.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/slab_def.h
++++ linux-2.6-tip/include/linux/slab_def.h
+@@ -14,6 +14,88 @@
+ #include <asm/page.h>		/* kmalloc_sizes.h needs PAGE_SIZE */
+ #include <asm/cache.h>		/* kmalloc_sizes.h needs L1_CACHE_BYTES */
+ #include <linux/compiler.h>
++#include <trace/kmemtrace.h>
++
++/*
++ * struct kmem_cache
++ *
++ * manages a cache.
++ */
++
++struct kmem_cache {
++/* 1) per-cpu data, touched during every alloc/free */
++	struct array_cache *array[NR_CPUS];
++/* 2) Cache tunables. Protected by cache_chain_mutex */
++	unsigned int batchcount;
++	unsigned int limit;
++	unsigned int shared;
++
++	unsigned int buffer_size;
++	u32 reciprocal_buffer_size;
++/* 3) touched by every alloc & free from the backend */
++
++	unsigned int flags;		/* constant flags */
++	unsigned int num;		/* # of objs per slab */
++
++/* 4) cache_grow/shrink */
++	/* order of pgs per slab (2^n) */
++	unsigned int gfporder;
++
++	/* force GFP flags, e.g. GFP_DMA */
++	gfp_t gfpflags;
++
++	size_t colour;			/* cache colouring range */
++	unsigned int colour_off;	/* colour offset */
++	struct kmem_cache *slabp_cache;
++	unsigned int slab_size;
++	unsigned int dflags;		/* dynamic flags */
++
++	/* constructor func */
++	void (*ctor)(void *obj);
++
++/* 5) cache creation/removal */
++	const char *name;
++	struct list_head next;
++
++/* 6) statistics */
++#ifdef CONFIG_DEBUG_SLAB
++	unsigned long num_active;
++	unsigned long num_allocations;
++	unsigned long high_mark;
++	unsigned long grown;
++	unsigned long reaped;
++	unsigned long errors;
++	unsigned long max_freeable;
++	unsigned long node_allocs;
++	unsigned long node_frees;
++	unsigned long node_overflow;
++	atomic_t allochit;
++	atomic_t allocmiss;
++	atomic_t freehit;
++	atomic_t freemiss;
++
++	/*
++	 * If debugging is enabled, then the allocator can add additional
++	 * fields and/or padding to every object. buffer_size contains the total
++	 * object size including these internal fields, the following two
++	 * variables contain the offset to the user object and its size.
++	 */
++	int obj_offset;
++	int obj_size;
++#endif /* CONFIG_DEBUG_SLAB */
++
++	/*
++	 * We put nodelists[] at the end of kmem_cache, because we want to size
++	 * this array to nr_node_ids slots instead of MAX_NUMNODES
++	 * (see kmem_cache_init())
++	 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
++	 * is statically defined, so we reserve the max number of nodes.
++	 */
++	struct kmem_list3 *nodelists[MAX_NUMNODES];
++	/*
++	 * Do not add fields after nodelists[]
++	 */
++};
+ 
+ /* Size description struct for general caches. */
+ struct cache_sizes {
+@@ -28,8 +110,26 @@ extern struct cache_sizes malloc_sizes[]
+ void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
+ void *__kmalloc(size_t size, gfp_t flags);
+ 
+-static inline void *kmalloc(size_t size, gfp_t flags)
++#ifdef CONFIG_KMEMTRACE
++extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
++extern size_t slab_buffer_size(struct kmem_cache *cachep);
++#else
++static __always_inline void *
++kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
++{
++	return kmem_cache_alloc(cachep, flags);
++}
++static inline size_t slab_buffer_size(struct kmem_cache *cachep)
++{
++	return 0;
++}
++#endif
++
++static __always_inline void *kmalloc(size_t size, gfp_t flags)
+ {
++	struct kmem_cache *cachep;
++	void *ret;
++
+ 	if (__builtin_constant_p(size)) {
+ 		int i = 0;
+ 
+@@ -47,10 +147,17 @@ static inline void *kmalloc(size_t size,
+ found:
+ #ifdef CONFIG_ZONE_DMA
+ 		if (flags & GFP_DMA)
+-			return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep,
+-						flags);
++			cachep = malloc_sizes[i].cs_dmacachep;
++		else
+ #endif
+-		return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags);
++			cachep = malloc_sizes[i].cs_cachep;
++
++		ret = kmem_cache_alloc_notrace(cachep, flags);
++
++		trace_kmalloc(_THIS_IP_, ret,
++			      size, slab_buffer_size(cachep), flags);
++
++		return ret;
+ 	}
+ 	return __kmalloc(size, flags);
+ }
+@@ -59,8 +166,25 @@ found:
+ extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
+ extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+ 
+-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
++#ifdef CONFIG_KMEMTRACE
++extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
++					   gfp_t flags,
++					   int nodeid);
++#else
++static __always_inline void *
++kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
++			      gfp_t flags,
++			      int nodeid)
++{
++	return kmem_cache_alloc_node(cachep, flags, nodeid);
++}
++#endif
++
++static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+ {
++	struct kmem_cache *cachep;
++	void *ret;
++
+ 	if (__builtin_constant_p(size)) {
+ 		int i = 0;
+ 
+@@ -78,11 +202,18 @@ static inline void *kmalloc_node(size_t 
+ found:
+ #ifdef CONFIG_ZONE_DMA
+ 		if (flags & GFP_DMA)
+-			return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep,
+-						flags, node);
++			cachep = malloc_sizes[i].cs_dmacachep;
++		else
+ #endif
+-		return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep,
+-						flags, node);
++			cachep = malloc_sizes[i].cs_cachep;
++
++		ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
++
++		trace_kmalloc_node(_THIS_IP_, ret,
++				   size, slab_buffer_size(cachep),
++				   flags, node);
++
++		return ret;
+ 	}
+ 	return __kmalloc_node(size, flags, node);
+ }
+Index: linux-2.6-tip/include/linux/slob_def.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/slob_def.h
++++ linux-2.6-tip/include/linux/slob_def.h
+@@ -3,14 +3,15 @@
+ 
+ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+ 
+-static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
++static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
++					      gfp_t flags)
+ {
+ 	return kmem_cache_alloc_node(cachep, flags, -1);
+ }
+ 
+ void *__kmalloc_node(size_t size, gfp_t flags, int node);
+ 
+-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
++static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+ {
+ 	return __kmalloc_node(size, flags, node);
+ }
+@@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t 
+  * kmalloc is the normal method of allocating memory
+  * in the kernel.
+  */
+-static inline void *kmalloc(size_t size, gfp_t flags)
++static __always_inline void *kmalloc(size_t size, gfp_t flags)
+ {
+ 	return __kmalloc_node(size, flags, -1);
+ }
+ 
+-static inline void *__kmalloc(size_t size, gfp_t flags)
++static __always_inline void *__kmalloc(size_t size, gfp_t flags)
+ {
+ 	return kmalloc(size, flags);
+ }
+Index: linux-2.6-tip/include/linux/slub_def.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/slub_def.h
++++ linux-2.6-tip/include/linux/slub_def.h
+@@ -10,6 +10,7 @@
+ #include <linux/gfp.h>
+ #include <linux/workqueue.h>
+ #include <linux/kobject.h>
++#include <trace/kmemtrace.h>
+ 
+ enum stat_item {
+ 	ALLOC_FASTPATH,		/* Allocation from cpu slab */
+@@ -121,10 +122,23 @@ struct kmem_cache {
+ #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
+ 
+ /*
++ * Maximum kmalloc object size handled by SLUB. Larger object allocations
++ * are passed through to the page allocator. The page allocator "fastpath"
++ * is relatively slow so we need this value sufficiently high so that
++ * performance critical objects are allocated through the SLUB fastpath.
++ *
++ * This should be dropped to PAGE_SIZE / 2 once the page allocator
++ * "fastpath" becomes competitive with the slab allocator fastpaths.
++ */
++#define SLUB_MAX_SIZE (PAGE_SIZE)
++
++#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 1)
++
++/*
+  * We keep the general caches in an array of slab caches that are used for
+  * 2^x bytes of allocations.
+  */
+-extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1];
++extern struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT];
+ 
+ /*
+  * Sorry that the following has to be that ugly but some versions of GCC
+@@ -204,15 +218,32 @@ static __always_inline struct kmem_cache
+ void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
+ void *__kmalloc(size_t size, gfp_t flags);
+ 
++#ifdef CONFIG_KMEMTRACE
++extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
++#else
++static __always_inline void *
++kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
++{
++	return kmem_cache_alloc(s, gfpflags);
++}
++#endif
++
+ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
+ {
+-	return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
++	unsigned int order = get_order(size);
++	void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
++
++	trace_kmalloc(_THIS_IP_, ret, size, PAGE_SIZE << order, flags);
++
++	return ret;
+ }
+ 
+ static __always_inline void *kmalloc(size_t size, gfp_t flags)
+ {
++	void *ret;
++
+ 	if (__builtin_constant_p(size)) {
+-		if (size > PAGE_SIZE)
++		if (size > SLUB_MAX_SIZE)
+ 			return kmalloc_large(size, flags);
+ 
+ 		if (!(flags & SLUB_DMA)) {
+@@ -221,7 +252,11 @@ static __always_inline void *kmalloc(siz
+ 			if (!s)
+ 				return ZERO_SIZE_PTR;
+ 
+-			return kmem_cache_alloc(s, flags);
++			ret = kmem_cache_alloc_notrace(s, flags);
++
++			trace_kmalloc(_THIS_IP_, ret, size, s->size, flags);
++
++			return ret;
+ 		}
+ 	}
+ 	return __kmalloc(size, flags);
+@@ -231,16 +266,37 @@ static __always_inline void *kmalloc(siz
+ void *__kmalloc_node(size_t size, gfp_t flags, int node);
+ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+ 
++#ifdef CONFIG_KMEMTRACE
++extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
++					   gfp_t gfpflags,
++					   int node);
++#else
++static __always_inline void *
++kmem_cache_alloc_node_notrace(struct kmem_cache *s,
++			      gfp_t gfpflags,
++			      int node)
++{
++	return kmem_cache_alloc_node(s, gfpflags, node);
++}
++#endif
++
+ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+ {
++	void *ret;
++
+ 	if (__builtin_constant_p(size) &&
+-		size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
++		size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) {
+ 			struct kmem_cache *s = kmalloc_slab(size);
+ 
+ 		if (!s)
+ 			return ZERO_SIZE_PTR;
+ 
+-		return kmem_cache_alloc_node(s, flags, node);
++		ret = kmem_cache_alloc_node_notrace(s, flags, node);
++
++		trace_kmalloc_node(_THIS_IP_, ret,
++				   size, s->size, flags, node);
++
++		return ret;
+ 	}
+ 	return __kmalloc_node(size, flags, node);
+ }
+Index: linux-2.6-tip/include/linux/smp.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/smp.h
++++ linux-2.6-tip/include/linux/smp.h
+@@ -38,7 +38,7 @@ int smp_call_function_single(int cpuid, 
+ /*
+  * main cross-CPU interfaces, handles INIT, TLB flush, STOP, etc.
+  * (defined in asm header):
+- */ 
++ */
+ 
+ /*
+  * stops all CPUs but the current one:
+@@ -50,6 +50,16 @@ extern void smp_send_stop(void);
+  */
+ extern void smp_send_reschedule(int cpu);
+ 
++/*
++ * trigger a reschedule on all other CPUs:
++ */
++extern void smp_send_reschedule_allbutself(void);
++
++/*
++ * trigger a reschedule on all other CPUs:
++ */
++extern void smp_send_reschedule_allbutself(void);
++
+ 
+ /*
+  * Prepare machine for booting other CPUs.
+@@ -82,7 +92,8 @@ smp_call_function_mask(cpumask_t mask, v
+ 	return 0;
+ }
+ 
+-void __smp_call_function_single(int cpuid, struct call_single_data *data);
++void __smp_call_function_single(int cpuid, struct call_single_data *data,
++				int wait);
+ 
+ /*
+  * Generic and arch helpers
+@@ -121,6 +132,8 @@ extern unsigned int setup_max_cpus;
+ 
+ #else /* !SMP */
+ 
++static inline void smp_send_stop(void) { }
++
+ /*
+  *	These macros fold the SMP functionality into a single CPU system
+  */
+@@ -139,6 +152,7 @@ static inline int up_smp_call_function(v
+ 		0;				\
+ 	})
+ static inline void smp_send_reschedule(int cpu) { }
++static inline void smp_send_reschedule_allbutself(void) { }
+ #define num_booting_cpus()			1
+ #define smp_prepare_boot_cpu()			do {} while (0)
+ #define smp_call_function_mask(mask, func, info, wait) \
+@@ -174,7 +188,13 @@ static inline void init_call_single_data
+ 
+ #define get_cpu()		({ preempt_disable(); smp_processor_id(); })
+ #define put_cpu()		preempt_enable()
+-#define put_cpu_no_resched()	preempt_enable_no_resched()
++#define put_cpu_no_resched()	__preempt_enable_no_resched()
++
++/*
++ * Callback to arch code if there's nosmp or maxcpus=0 on the
++ * boot command line:
++ */
++extern void arch_disable_smp_support(void);
+ 
+ void smp_setup_processor_id(void);
+ 
+Index: linux-2.6-tip/include/linux/socket.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/socket.h
++++ linux-2.6-tip/include/linux/socket.h
+@@ -24,10 +24,12 @@ struct __kernel_sockaddr_storage {
+ #include <linux/types.h>		/* pid_t			*/
+ #include <linux/compiler.h>		/* __user			*/
+ 
+-#ifdef CONFIG_PROC_FS
++#ifdef __KERNEL__
++# ifdef CONFIG_PROC_FS
+ struct seq_file;
+ extern void socket_seq_show(struct seq_file *seq);
+-#endif
++# endif
++#endif /* __KERNEL__ */
+ 
+ typedef unsigned short	sa_family_t;
+ 
+Index: linux-2.6-tip/include/linux/stackprotector.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/stackprotector.h
+@@ -0,0 +1,16 @@
++#ifndef _LINUX_STACKPROTECTOR_H
++#define _LINUX_STACKPROTECTOR_H 1
++
++#include <linux/compiler.h>
++#include <linux/sched.h>
++#include <linux/random.h>
++
++#ifdef CONFIG_CC_STACKPROTECTOR
++# include <asm/stackprotector.h>
++#else
++static inline void boot_init_stack_canary(void)
++{
++}
++#endif
++
++#endif
+Index: linux-2.6-tip/include/linux/stacktrace.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/stacktrace.h
++++ linux-2.6-tip/include/linux/stacktrace.h
+@@ -4,6 +4,8 @@
+ struct task_struct;
+ 
+ #ifdef CONFIG_STACKTRACE
++struct task_struct;
++
+ struct stack_trace {
+ 	unsigned int nr_entries, max_entries;
+ 	unsigned long *entries;
+@@ -11,6 +13,7 @@ struct stack_trace {
+ };
+ 
+ extern void save_stack_trace(struct stack_trace *trace);
++extern void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp);
+ extern void save_stack_trace_tsk(struct task_struct *tsk,
+ 				struct stack_trace *trace);
+ 
+Index: linux-2.6-tip/include/linux/string.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/string.h
++++ linux-2.6-tip/include/linux/string.h
+@@ -10,6 +10,7 @@
+ #include <linux/compiler.h>	/* for inline */
+ #include <linux/types.h>	/* for size_t */
+ #include <linux/stddef.h>	/* for NULL */
++#include <stdarg.h>
+ 
+ extern char *strndup_user(const char __user *, long);
+ 
+@@ -111,6 +112,12 @@ extern void argv_free(char **argv);
+ 
+ extern bool sysfs_streq(const char *s1, const char *s2);
+ 
++#ifdef CONFIG_BINARY_PRINTF
++int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args);
++int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf);
++int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
++#endif
++
+ extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
+ 			const void *from, size_t available);
+ 
+Index: linux-2.6-tip/include/linux/suspend_ioctls.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/suspend_ioctls.h
++++ linux-2.6-tip/include/linux/suspend_ioctls.h
+@@ -1,14 +1,15 @@
+ #ifndef _LINUX_SUSPEND_IOCTLS_H
+ #define _LINUX_SUSPEND_IOCTLS_H
+ 
++#include <linux/types.h>
+ /*
+  * This structure is used to pass the values needed for the identification
+  * of the resume swap area from a user space to the kernel via the
+  * SNAPSHOT_SET_SWAP_AREA ioctl
+  */
+ struct resume_swap_area {
+-	loff_t offset;
+-	u_int32_t dev;
++	__kernel_loff_t offset;
++	__u32 dev;
+ } __attribute__((packed));
+ 
+ #define SNAPSHOT_IOC_MAGIC	'3'
+@@ -20,13 +21,13 @@ struct resume_swap_area {
+ #define SNAPSHOT_S2RAM			_IO(SNAPSHOT_IOC_MAGIC, 11)
+ #define SNAPSHOT_SET_SWAP_AREA		_IOW(SNAPSHOT_IOC_MAGIC, 13, \
+ 							struct resume_swap_area)
+-#define SNAPSHOT_GET_IMAGE_SIZE		_IOR(SNAPSHOT_IOC_MAGIC, 14, loff_t)
++#define SNAPSHOT_GET_IMAGE_SIZE		_IOR(SNAPSHOT_IOC_MAGIC, 14, __kernel_loff_t)
+ #define SNAPSHOT_PLATFORM_SUPPORT	_IO(SNAPSHOT_IOC_MAGIC, 15)
+ #define SNAPSHOT_POWER_OFF		_IO(SNAPSHOT_IOC_MAGIC, 16)
+ #define SNAPSHOT_CREATE_IMAGE		_IOW(SNAPSHOT_IOC_MAGIC, 17, int)
+ #define SNAPSHOT_PREF_IMAGE_SIZE	_IO(SNAPSHOT_IOC_MAGIC, 18)
+-#define SNAPSHOT_AVAIL_SWAP_SIZE	_IOR(SNAPSHOT_IOC_MAGIC, 19, loff_t)
+-#define SNAPSHOT_ALLOC_SWAP_PAGE	_IOR(SNAPSHOT_IOC_MAGIC, 20, loff_t)
++#define SNAPSHOT_AVAIL_SWAP_SIZE	_IOR(SNAPSHOT_IOC_MAGIC, 19, __kernel_loff_t)
++#define SNAPSHOT_ALLOC_SWAP_PAGE	_IOR(SNAPSHOT_IOC_MAGIC, 20, __kernel_loff_t)
+ #define SNAPSHOT_IOC_MAXNR	20
+ 
+ #endif /* _LINUX_SUSPEND_IOCTLS_H */
+Index: linux-2.6-tip/include/linux/swiotlb.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/swiotlb.h
++++ linux-2.6-tip/include/linux/swiotlb.h
+@@ -31,7 +31,7 @@ extern dma_addr_t swiotlb_phys_to_bus(st
+ 				      phys_addr_t address);
+ extern phys_addr_t swiotlb_bus_to_phys(dma_addr_t address);
+ 
+-extern int swiotlb_arch_range_needs_mapping(void *ptr, size_t size);
++extern int swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size);
+ 
+ extern void
+ *swiotlb_alloc_coherent(struct device *hwdev, size_t size,
+@@ -41,20 +41,13 @@ extern void
+ swiotlb_free_coherent(struct device *hwdev, size_t size,
+ 		      void *vaddr, dma_addr_t dma_handle);
+ 
+-extern dma_addr_t
+-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir);
+-
+-extern void
+-swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+-		     size_t size, int dir);
+-
+-extern dma_addr_t
+-swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
+-			 int dir, struct dma_attrs *attrs);
+-
+-extern void
+-swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
+-			   size_t size, int dir, struct dma_attrs *attrs);
++extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
++				   unsigned long offset, size_t size,
++				   enum dma_data_direction dir,
++				   struct dma_attrs *attrs);
++extern void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
++			       size_t size, enum dma_data_direction dir,
++			       struct dma_attrs *attrs);
+ 
+ extern int
+ swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nents,
+@@ -66,36 +59,38 @@ swiotlb_unmap_sg(struct device *hwdev, s
+ 
+ extern int
+ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
+-		     int dir, struct dma_attrs *attrs);
++		     enum dma_data_direction dir, struct dma_attrs *attrs);
+ 
+ extern void
+ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
+-		       int nelems, int dir, struct dma_attrs *attrs);
++		       int nelems, enum dma_data_direction dir,
++		       struct dma_attrs *attrs);
+ 
+ extern void
+ swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+-			    size_t size, int dir);
++			    size_t size, enum dma_data_direction dir);
+ 
+ extern void
+ swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+-			int nelems, int dir);
++			int nelems, enum dma_data_direction dir);
+ 
+ extern void
+ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+-			       size_t size, int dir);
++			       size_t size, enum dma_data_direction dir);
+ 
+ extern void
+ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+-			   int nelems, int dir);
++			   int nelems, enum dma_data_direction dir);
+ 
+ extern void
+ swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+-				  unsigned long offset, size_t size, int dir);
++				  unsigned long offset, size_t size,
++				  enum dma_data_direction dir);
+ 
+ extern void
+ swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
+ 				     unsigned long offset, size_t size,
+-				     int dir);
++				     enum dma_data_direction dir);
+ 
+ extern int
+ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
+Index: linux-2.6-tip/include/linux/syscalls.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/syscalls.h
++++ linux-2.6-tip/include/linux/syscalls.h
+@@ -55,6 +55,7 @@ struct compat_timeval;
+ struct robust_list_head;
+ struct getcpu_cache;
+ struct old_linux_dirent;
++struct perf_counter_hw_event;
+ 
+ #include <linux/types.h>
+ #include <linux/aio_abi.h>
+@@ -65,6 +66,7 @@ struct old_linux_dirent;
+ #include <asm/signal.h>
+ #include <linux/quota.h>
+ #include <linux/key.h>
++#include <linux/ftrace.h>
+ 
+ #define __SC_DECL1(t1, a1)	t1 a1
+ #define __SC_DECL2(t2, a2, ...) t2 a2, __SC_DECL1(__VA_ARGS__)
+@@ -95,7 +97,46 @@ struct old_linux_dirent;
+ #define __SC_TEST5(t5, a5, ...)	__SC_TEST(t5); __SC_TEST4(__VA_ARGS__)
+ #define __SC_TEST6(t6, a6, ...)	__SC_TEST(t6); __SC_TEST5(__VA_ARGS__)
+ 
++#ifdef CONFIG_FTRACE_SYSCALLS
++#define __SC_STR_ADECL1(t, a)		#a
++#define __SC_STR_ADECL2(t, a, ...)	#a, __SC_STR_ADECL1(__VA_ARGS__)
++#define __SC_STR_ADECL3(t, a, ...)	#a, __SC_STR_ADECL2(__VA_ARGS__)
++#define __SC_STR_ADECL4(t, a, ...)	#a, __SC_STR_ADECL3(__VA_ARGS__)
++#define __SC_STR_ADECL5(t, a, ...)	#a, __SC_STR_ADECL4(__VA_ARGS__)
++#define __SC_STR_ADECL6(t, a, ...)	#a, __SC_STR_ADECL5(__VA_ARGS__)
++
++#define __SC_STR_TDECL1(t, a)		#t
++#define __SC_STR_TDECL2(t, a, ...)	#t, __SC_STR_TDECL1(__VA_ARGS__)
++#define __SC_STR_TDECL3(t, a, ...)	#t, __SC_STR_TDECL2(__VA_ARGS__)
++#define __SC_STR_TDECL4(t, a, ...)	#t, __SC_STR_TDECL3(__VA_ARGS__)
++#define __SC_STR_TDECL5(t, a, ...)	#t, __SC_STR_TDECL4(__VA_ARGS__)
++#define __SC_STR_TDECL6(t, a, ...)	#t, __SC_STR_TDECL5(__VA_ARGS__)
++
++#define SYSCALL_METADATA(sname, nb)				\
++	static const struct syscall_metadata __used		\
++	  __attribute__((__aligned__(4)))			\
++	  __attribute__((section("__syscalls_metadata")))	\
++	  __syscall_meta_##sname = {				\
++		.name 		= "sys"#sname,			\
++		.nb_args 	= nb,				\
++		.types		= types_##sname,		\
++		.args		= args_##sname,			\
++	}
++
++#define SYSCALL_DEFINE0(sname)					\
++	static const struct syscall_metadata __used		\
++	  __attribute__((__aligned__(4)))			\
++	  __attribute__((section("__syscalls_metadata")))	\
++	  __syscall_meta_##sname = {				\
++		.name 		= "sys_"#sname,			\
++		.nb_args 	= 0,				\
++	};							\
++	asmlinkage long sys_##sname(void)
++
++#else
+ #define SYSCALL_DEFINE0(name)	   asmlinkage long sys_##name(void)
++#endif
++
+ #define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
+ #define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
+ #define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
+@@ -117,10 +158,26 @@ struct old_linux_dirent;
+ #endif
+ #endif
+ 
++#ifdef CONFIG_FTRACE_SYSCALLS
++#define SYSCALL_DEFINEx(x, sname, ...)				\
++	static const char *types_##sname[] = {			\
++		__SC_STR_TDECL##x(__VA_ARGS__)			\
++	};							\
++	static const char *args_##sname[] = {			\
++		__SC_STR_ADECL##x(__VA_ARGS__)			\
++	};							\
++	SYSCALL_METADATA(sname, x);				\
++	__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
++#else
++#define SYSCALL_DEFINEx(x, sname, ...)				\
++	__SYSCALL_DEFINEx(x, sname, __VA_ARGS__)
++#endif
++
+ #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
+ 
+ #define SYSCALL_DEFINE(name) static inline long SYSC_##name
+-#define SYSCALL_DEFINEx(x, name, ...)					\
++
++#define __SYSCALL_DEFINEx(x, name, ...)					\
+ 	asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__));		\
+ 	static inline long SYSC##name(__SC_DECL##x(__VA_ARGS__));	\
+ 	asmlinkage long SyS##name(__SC_LONG##x(__VA_ARGS__))		\
+@@ -134,7 +191,7 @@ struct old_linux_dirent;
+ #else /* CONFIG_HAVE_SYSCALL_WRAPPERS */
+ 
+ #define SYSCALL_DEFINE(name) asmlinkage long sys_##name
+-#define SYSCALL_DEFINEx(x, name, ...)					\
++#define __SYSCALL_DEFINEx(x, name, ...)					\
+ 	asmlinkage long sys##name(__SC_DECL##x(__VA_ARGS__))
+ 
+ #endif /* CONFIG_HAVE_SYSCALL_WRAPPERS */
+@@ -694,4 +751,8 @@ asmlinkage long sys_pipe(int __user *);
+ 
+ int kernel_execve(const char *filename, char *const argv[], char *const envp[]);
+ 
++
++asmlinkage long sys_perf_counter_open(
++		const struct perf_counter_hw_event __user *hw_event_uptr,
++		pid_t pid, int cpu, int group_fd, unsigned long flags);
+ #endif
+Index: linux-2.6-tip/include/linux/thread_info.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/thread_info.h
++++ linux-2.6-tip/include/linux/thread_info.h
+@@ -21,13 +21,14 @@ struct restart_block {
+ 		struct {
+ 			unsigned long arg0, arg1, arg2, arg3;
+ 		};
+-		/* For futex_wait */
++		/* For futex_wait and futex_wait_requeue_pi */
+ 		struct {
+ 			u32 *uaddr;
+ 			u32 val;
+ 			u32 flags;
+ 			u32 bitset;
+ 			u64 time;
++			u32 *uaddr2;
+ 		} futex;
+ 		/* For nanosleep */
+ 		struct {
+Index: linux-2.6-tip/include/linux/time.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/time.h
++++ linux-2.6-tip/include/linux/time.h
+@@ -12,14 +12,14 @@
+ #ifndef _STRUCT_TIMESPEC
+ #define _STRUCT_TIMESPEC
+ struct timespec {
+-	time_t	tv_sec;		/* seconds */
+-	long	tv_nsec;	/* nanoseconds */
++	__kernel_time_t	tv_sec;			/* seconds */
++	long		tv_nsec;		/* nanoseconds */
+ };
+ #endif
+ 
+ struct timeval {
+-	time_t		tv_sec;		/* seconds */
+-	suseconds_t	tv_usec;	/* microseconds */
++	__kernel_time_t		tv_sec;		/* seconds */
++	__kernel_suseconds_t	tv_usec;	/* microseconds */
+ };
+ 
+ struct timezone {
+@@ -99,7 +99,7 @@ static inline struct timespec timespec_s
+ 
+ extern struct timespec xtime;
+ extern struct timespec wall_to_monotonic;
+-extern seqlock_t xtime_lock;
++extern raw_seqlock_t xtime_lock;
+ 
+ extern unsigned long read_persistent_clock(void);
+ extern int update_persistent_clock(struct timespec now);
+Index: linux-2.6-tip/include/linux/timer.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/timer.h
++++ linux-2.6-tip/include/linux/timer.h
+@@ -5,6 +5,7 @@
+ #include <linux/ktime.h>
+ #include <linux/stddef.h>
+ #include <linux/debugobjects.h>
++#include <linux/stringify.h>
+ 
+ struct tvec_base;
+ 
+@@ -21,52 +22,126 @@ struct timer_list {
+ 	char start_comm[16];
+ 	int start_pid;
+ #endif
++#ifdef CONFIG_LOCKDEP
++	struct lockdep_map lockdep_map;
++#endif
+ };
+ 
+ extern struct tvec_base boot_tvec_bases;
+ 
++#ifdef CONFIG_LOCKDEP
++/*
++ * NB: because we have to copy the lockdep_map, setting the lockdep_map key
++ * (second argument) here is required, otherwise it could be initialised to
++ * the copy of the lockdep_map later! We use the pointer to and the string
++ * "<file>:<line>" as the key resp. the name of the lockdep_map.
++ */
++#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)				\
++	.lockdep_map = STATIC_LOCKDEP_MAP_INIT(_kn, &_kn),
++#else
++#define __TIMER_LOCKDEP_MAP_INITIALIZER(_kn)
++#endif
++
+ #define TIMER_INITIALIZER(_function, _expires, _data) {		\
+ 		.entry = { .prev = TIMER_ENTRY_STATIC },	\
+ 		.function = (_function),			\
+ 		.expires = (_expires),				\
+ 		.data = (_data),				\
+ 		.base = &boot_tvec_bases,			\
++		__TIMER_LOCKDEP_MAP_INITIALIZER(		\
++			__FILE__ ":" __stringify(__LINE__))	\
+ 	}
+ 
+ #define DEFINE_TIMER(_name, _function, _expires, _data)		\
+ 	struct timer_list _name =				\
+ 		TIMER_INITIALIZER(_function, _expires, _data)
+ 
+-void init_timer(struct timer_list *timer);
+-void init_timer_deferrable(struct timer_list *timer);
++void init_timer_key(struct timer_list *timer,
++		    const char *name,
++		    struct lock_class_key *key);
++void init_timer_deferrable_key(struct timer_list *timer,
++			       const char *name,
++			       struct lock_class_key *key);
++
++#ifdef CONFIG_LOCKDEP
++#define init_timer(timer)						\
++	do {								\
++		static struct lock_class_key __key;			\
++		init_timer_key((timer), #timer, &__key);		\
++	} while (0)
++
++#define init_timer_deferrable(timer)					\
++	do {								\
++		static struct lock_class_key __key;			\
++		init_timer_deferrable_key((timer), #timer, &__key);	\
++	} while (0)
++
++#define init_timer_on_stack(timer)					\
++	do {								\
++		static struct lock_class_key __key;			\
++		init_timer_on_stack_key((timer), #timer, &__key);	\
++	} while (0)
++
++#define setup_timer(timer, fn, data)					\
++	do {								\
++		static struct lock_class_key __key;			\
++		setup_timer_key((timer), #timer, &__key, (fn), (data));\
++	} while (0)
++
++#define setup_timer_on_stack(timer, fn, data)				\
++	do {								\
++		static struct lock_class_key __key;			\
++		setup_timer_on_stack_key((timer), #timer, &__key,	\
++					 (fn), (data));			\
++	} while (0)
++#else
++#define init_timer(timer)\
++	init_timer_key((timer), NULL, NULL)
++#define init_timer_deferrable(timer)\
++	init_timer_deferrable_key((timer), NULL, NULL)
++#define init_timer_on_stack(timer)\
++	init_timer_on_stack_key((timer), NULL, NULL)
++#define setup_timer(timer, fn, data)\
++	setup_timer_key((timer), NULL, NULL, (fn), (data))
++#define setup_timer_on_stack(timer, fn, data)\
++	setup_timer_on_stack_key((timer), NULL, NULL, (fn), (data))
++#endif
+ 
+ #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
+-extern void init_timer_on_stack(struct timer_list *timer);
++extern void init_timer_on_stack_key(struct timer_list *timer,
++				    const char *name,
++				    struct lock_class_key *key);
+ extern void destroy_timer_on_stack(struct timer_list *timer);
+ #else
+ static inline void destroy_timer_on_stack(struct timer_list *timer) { }
+-static inline void init_timer_on_stack(struct timer_list *timer)
++static inline void init_timer_on_stack_key(struct timer_list *timer,
++					   const char *name,
++					   struct lock_class_key *key)
+ {
+-	init_timer(timer);
++	init_timer_key(timer, name, key);
+ }
+ #endif
+ 
+-static inline void setup_timer(struct timer_list * timer,
++static inline void setup_timer_key(struct timer_list * timer,
++				const char *name,
++				struct lock_class_key *key,
+ 				void (*function)(unsigned long),
+ 				unsigned long data)
+ {
+ 	timer->function = function;
+ 	timer->data = data;
+-	init_timer(timer);
++	init_timer_key(timer, name, key);
+ }
+ 
+-static inline void setup_timer_on_stack(struct timer_list *timer,
++static inline void setup_timer_on_stack_key(struct timer_list *timer,
++					const char *name,
++					struct lock_class_key *key,
+ 					void (*function)(unsigned long),
+ 					unsigned long data)
+ {
+ 	timer->function = function;
+ 	timer->data = data;
+-	init_timer_on_stack(timer);
++	init_timer_on_stack_key(timer, name, key);
+ }
+ 
+ /**
+@@ -86,8 +161,8 @@ static inline int timer_pending(const st
+ 
+ extern void add_timer_on(struct timer_list *timer, int cpu);
+ extern int del_timer(struct timer_list * timer);
+-extern int __mod_timer(struct timer_list *timer, unsigned long expires);
+ extern int mod_timer(struct timer_list *timer, unsigned long expires);
++extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
+ 
+ /*
+  * The jiffies value which is added to now, when there is no timer
+@@ -146,30 +221,14 @@ static inline void timer_stats_timer_cle
+ }
+ #endif
+ 
+-/**
+- * add_timer - start a timer
+- * @timer: the timer to be added
+- *
+- * The kernel will do a ->function(->data) callback from the
+- * timer interrupt at the ->expires point in the future. The
+- * current time is 'jiffies'.
+- *
+- * The timer's ->expires, ->function (and if the handler uses it, ->data)
+- * fields must be set prior calling this function.
+- *
+- * Timers with an ->expires field in the past will be executed in the next
+- * timer tick.
+- */
+-static inline void add_timer(struct timer_list *timer)
+-{
+-	BUG_ON(timer_pending(timer));
+-	__mod_timer(timer, timer->expires);
+-}
++extern void add_timer(struct timer_list *timer);
+ 
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_SOFTIRQS)
++  extern int timer_pending_sync(struct timer_list *timer);
+   extern int try_to_del_timer_sync(struct timer_list *timer);
+   extern int del_timer_sync(struct timer_list *timer);
+ #else
++# define timer_pending_sync(t)		timer_pending(t)
+ # define try_to_del_timer_sync(t)	del_timer(t)
+ # define del_timer_sync(t)		del_timer(t)
+ #endif
+Index: linux-2.6-tip/include/linux/times.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/times.h
++++ linux-2.6-tip/include/linux/times.h
+@@ -4,10 +4,10 @@
+ #include <linux/types.h>
+ 
+ struct tms {
+-	clock_t tms_utime;
+-	clock_t tms_stime;
+-	clock_t tms_cutime;
+-	clock_t tms_cstime;
++	__kernel_clock_t tms_utime;
++	__kernel_clock_t tms_stime;
++	__kernel_clock_t tms_cutime;
++	__kernel_clock_t tms_cstime;
+ };
+ 
+ #endif
+Index: linux-2.6-tip/include/linux/timex.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/timex.h
++++ linux-2.6-tip/include/linux/timex.h
+@@ -190,7 +190,7 @@ struct timex {
+  * offset and maximum frequency tolerance.
+  */
+ #define SHIFT_USEC 16		/* frequency offset scale (shift) */
+-#define PPM_SCALE (NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
++#define PPM_SCALE ((s64)NSEC_PER_USEC << (NTP_SCALE_SHIFT - SHIFT_USEC))
+ #define PPM_SCALE_INV_SHIFT 19
+ #define PPM_SCALE_INV ((1ll << (PPM_SCALE_INV_SHIFT + NTP_SCALE_SHIFT)) / \
+ 		       PPM_SCALE + 1)
+Index: linux-2.6-tip/include/linux/topology.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/topology.h
++++ linux-2.6-tip/include/linux/topology.h
+@@ -38,11 +38,7 @@
+ #endif
+ 
+ #ifndef nr_cpus_node
+-#define nr_cpus_node(node)				\
+-	({						\
+-		node_to_cpumask_ptr(__tmp__, node);	\
+-		cpus_weight(*__tmp__);			\
+-	})
++#define nr_cpus_node(node) cpumask_weight(cpumask_of_node(node))
+ #endif
+ 
+ #define for_each_node_with_cpus(node)			\
+@@ -193,5 +189,16 @@ int arch_update_cpu_topology(void);
+ #ifndef topology_core_siblings
+ #define topology_core_siblings(cpu)		cpumask_of_cpu(cpu)
+ #endif
++#ifndef topology_thread_cpumask
++#define topology_thread_cpumask(cpu)		cpumask_of(cpu)
++#endif
++#ifndef topology_core_cpumask
++#define topology_core_cpumask(cpu)		cpumask_of(cpu)
++#endif
++
++/* Returns the number of the current Node. */
++#ifndef numa_node_id
++#define numa_node_id()		(cpu_to_node(raw_smp_processor_id()))
++#endif
+ 
+ #endif /* _LINUX_TOPOLOGY_H */
+Index: linux-2.6-tip/include/linux/trace_clock.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/trace_clock.h
+@@ -0,0 +1,19 @@
++#ifndef _LINUX_TRACE_CLOCK_H
++#define _LINUX_TRACE_CLOCK_H
++
++/*
++ * 3 trace clock variants, with differing scalability/precision
++ * tradeoffs:
++ *
++ *  -   local: CPU-local trace clock
++ *  -  medium: scalable global clock with some jitter
++ *  -  global: globally monotonic, serialized clock
++ */
++#include <linux/compiler.h>
++#include <linux/types.h>
++
++extern u64 notrace trace_clock_local(void);
++extern u64 notrace trace_clock(void);
++extern u64 notrace trace_clock_global(void);
++
++#endif /* _LINUX_TRACE_CLOCK_H */
+Index: linux-2.6-tip/include/linux/tracepoint.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/tracepoint.h
++++ linux-2.6-tip/include/linux/tracepoint.h
+@@ -31,8 +31,8 @@ struct tracepoint {
+ 					 * Keep in sync with vmlinux.lds.h.
+ 					 */
+ 
+-#define TPPROTO(args...)	args
+-#define TPARGS(args...)		args
++#define TP_PROTO(args...)	args
++#define TP_ARGS(args...)		args
+ 
+ #ifdef CONFIG_TRACEPOINTS
+ 
+@@ -65,7 +65,7 @@ struct tracepoint {
+ 	{								\
+ 		if (unlikely(__tracepoint_##name.state))		\
+ 			__DO_TRACE(&__tracepoint_##name,		\
+-				TPPROTO(proto), TPARGS(args));		\
++				TP_PROTO(proto), TP_ARGS(args));	\
+ 	}								\
+ 	static inline int register_trace_##name(void (*probe)(proto))	\
+ 	{								\
+@@ -153,4 +153,114 @@ static inline void tracepoint_synchroniz
+ 	synchronize_sched();
+ }
+ 
++#define PARAMS(args...) args
++#define TRACE_FORMAT(name, proto, args, fmt)		\
++	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
++
++
++/*
++ * For use with the TRACE_EVENT macro:
++ *
++ * We define a tracepoint, its arguments, its printk format
++ * and its 'fast binay record' layout.
++ *
++ * Firstly, name your tracepoint via TRACE_EVENT(name : the
++ * 'subsystem_event' notation is fine.
++ *
++ * Think about this whole construct as the
++ * 'trace_sched_switch() function' from now on.
++ *
++ *
++ *  TRACE_EVENT(sched_switch,
++ *
++ *	*
++ *	* A function has a regular function arguments
++ *	* prototype, declare it via TP_PROTO():
++ *	*
++ *
++ *	TP_PROTO(struct rq *rq, struct task_struct *prev,
++ *		 struct task_struct *next),
++ *
++ *	*
++ *	* Define the call signature of the 'function'.
++ *	* (Design sidenote: we use this instead of a
++ *	*  TP_PROTO1/TP_PROTO2/TP_PROTO3 ugliness.)
++ *	*
++ *
++ *	TP_ARGS(rq, prev, next),
++ *
++ *	*
++ *	* Fast binary tracing: define the trace record via
++ *	* TP_STRUCT__entry(). You can think about it like a
++ *	* regular C structure local variable definition.
++ *	*
++ *	* This is how the trace record is structured and will
++ *	* be saved into the ring buffer. These are the fields
++ *	* that will be exposed to user-space in
++ *	* /debug/tracing/events/<*>/format.
++ *	*
++ *	* The declared 'local variable' is called '__entry'
++ *	*
++ *	* __field(pid_t, prev_prid) is equivalent to a standard declariton:
++ *	*
++ *	*	pid_t	prev_pid;
++ *	*
++ *	* __array(char, prev_comm, TASK_COMM_LEN) is equivalent to:
++ *	*
++ *	*	char	prev_comm[TASK_COMM_LEN];
++ *	*
++ *
++ *	TP_STRUCT__entry(
++ *		__array(	char,	prev_comm,	TASK_COMM_LEN	)
++ *		__field(	pid_t,	prev_pid			)
++ *		__field(	int,	prev_prio			)
++ *		__array(	char,	next_comm,	TASK_COMM_LEN	)
++ *		__field(	pid_t,	next_pid			)
++ *		__field(	int,	next_prio			)
++ *	),
++ *
++ *	*
++ *	* Assign the entry into the trace record, by embedding
++ *	* a full C statement block into TP_fast_assign(). You
++ *	* can refer to the trace record as '__entry' -
++ *	* otherwise you can put arbitrary C code in here.
++ *	*
++ *	* Note: this C code will execute every time a trace event
++ *	* happens, on an active tracepoint.
++ *	*
++ *
++ *	TP_fast_assign(
++ *		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
++ *		__entry->prev_pid	= prev->pid;
++ *		__entry->prev_prio	= prev->prio;
++ *		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
++ *		__entry->next_pid	= next->pid;
++ *		__entry->next_prio	= next->prio;
++ *	)
++ *
++ *	*
++ *	* Formatted output of a trace record via TP_printk().
++ *	* This is how the tracepoint will appear under ftrace
++ *	* plugins that make use of this tracepoint.
++ *	*
++ *	* (raw-binary tracing wont actually perform this step.)
++ *	*
++ *
++ *	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
++ *		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
++ *		__entry->next_comm, __entry->next_pid, __entry->next_prio),
++ *
++ * );
++ *
++ * This macro construct is thus used for the regular printk format
++ * tracing setup, it is used to construct a function pointer based
++ * tracepoint callback (this is used by programmatic plugins and
++ * can also by used by generic instrumentation like SystemTap), and
++ * it is also used to expose a structured trace record in
++ * /debug/tracing/events/.
++ */
++
++#define TRACE_EVENT(name, proto, args, struct, assign, print)	\
++	DECLARE_TRACE(name, PARAMS(proto), PARAMS(args))
++
+ #endif
+Index: linux-2.6-tip/include/linux/types.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/types.h
++++ linux-2.6-tip/include/linux/types.h
+@@ -1,6 +1,9 @@
+ #ifndef _LINUX_TYPES_H
+ #define _LINUX_TYPES_H
+ 
++#include <asm/types.h>
++
++#ifndef __ASSEMBLY__
+ #ifdef	__KERNEL__
+ 
+ #define DECLARE_BITMAP(name,bits) \
+@@ -9,9 +12,8 @@
+ #endif
+ 
+ #include <linux/posix_types.h>
+-#include <asm/types.h>
+ 
+-#ifndef __KERNEL_STRICT_NAMES
++#ifdef __KERNEL__
+ 
+ typedef __u32 __kernel_dev_t;
+ 
+@@ -29,7 +31,6 @@ typedef __kernel_timer_t	timer_t;
+ typedef __kernel_clockid_t	clockid_t;
+ typedef __kernel_mqd_t		mqd_t;
+ 
+-#ifdef __KERNEL__
+ typedef _Bool			bool;
+ 
+ typedef __kernel_uid32_t	uid_t;
+@@ -45,14 +46,6 @@ typedef __kernel_old_uid_t	old_uid_t;
+ typedef __kernel_old_gid_t	old_gid_t;
+ #endif /* CONFIG_UID16 */
+ 
+-/* libc5 includes this file to define uid_t, thus uid_t can never change
+- * when it is included by non-kernel code
+- */
+-#else
+-typedef __kernel_uid_t		uid_t;
+-typedef __kernel_gid_t		gid_t;
+-#endif /* __KERNEL__ */
+-
+ #if defined(__GNUC__)
+ typedef __kernel_loff_t		loff_t;
+ #endif
+@@ -154,7 +147,7 @@ typedef unsigned long blkcnt_t;
+ #define pgoff_t unsigned long
+ #endif
+ 
+-#endif /* __KERNEL_STRICT_NAMES */
++#endif /* __KERNEL__ */
+ 
+ /*
+  * Below are truly Linux-specific types that should never collide with
+@@ -212,5 +205,5 @@ struct ustat {
+ };
+ 
+ #endif	/* __KERNEL__ */
+-
++#endif /*  __ASSEMBLY__ */
+ #endif /* _LINUX_TYPES_H */
+Index: linux-2.6-tip/include/linux/ucb1400.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/ucb1400.h
++++ linux-2.6-tip/include/linux/ucb1400.h
+@@ -134,8 +134,8 @@ static inline void ucb1400_adc_enable(st
+ 	ucb1400_reg_write(ac97, UCB_ADC_CR, UCB_ADC_ENA);
+ }
+ 
+-static unsigned int ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel,
+-					int adcsync)
++static inline unsigned int
++ucb1400_adc_read(struct snd_ac97 *ac97, u16 adc_channel, int adcsync)
+ {
+ 	unsigned int val;
+ 
+Index: linux-2.6-tip/include/linux/utime.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/utime.h
++++ linux-2.6-tip/include/linux/utime.h
+@@ -4,8 +4,8 @@
+ #include <linux/types.h>
+ 
+ struct utimbuf {
+-	time_t actime;
+-	time_t modtime;
++	__kernel_time_t actime;
++	__kernel_time_t modtime;
+ };
+ 
+ #endif
+Index: linux-2.6-tip/include/linux/vmalloc.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/vmalloc.h
++++ linux-2.6-tip/include/linux/vmalloc.h
+@@ -95,6 +95,9 @@ extern struct vm_struct *remove_vm_area(
+ 
+ extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
+ 			struct page ***pages);
++extern int map_kernel_range_noflush(unsigned long start, unsigned long size,
++				    pgprot_t prot, struct page **pages);
++extern void unmap_kernel_range_noflush(unsigned long addr, unsigned long size);
+ extern void unmap_kernel_range(unsigned long addr, unsigned long size);
+ 
+ /* Allocate/destroy a 'vmalloc' VM area. */
+@@ -110,5 +113,6 @@ extern long vwrite(char *buf, char *addr
+  */
+ extern rwlock_t vmlist_lock;
+ extern struct vm_struct *vmlist;
++extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
+ 
+ #endif /* _LINUX_VMALLOC_H */
+Index: linux-2.6-tip/include/linux/xfrm.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/xfrm.h
++++ linux-2.6-tip/include/linux/xfrm.h
+@@ -58,7 +58,7 @@ struct xfrm_selector
+ 	__u8	prefixlen_s;
+ 	__u8	proto;
+ 	int	ifindex;
+-	uid_t	user;
++	__kernel_uid32_t	user;
+ };
+ 
+ #define XFRM_INF (~(__u64)0)
+Index: linux-2.6-tip/include/mtd/inftl-user.h
+===================================================================
+--- linux-2.6-tip.orig/include/mtd/inftl-user.h
++++ linux-2.6-tip/include/mtd/inftl-user.h
+@@ -16,33 +16,33 @@
+ /* Block Control Information */
+ 
+ struct inftl_bci {
+-	uint8_t ECCsig[6];
+-	uint8_t Status;
+-	uint8_t Status1;
++	__u8 ECCsig[6];
++	__u8 Status;
++	__u8 Status1;
+ } __attribute__((packed));
+ 
+ struct inftl_unithead1 {
+-	uint16_t virtualUnitNo;
+-	uint16_t prevUnitNo;
+-	uint8_t ANAC;
+-	uint8_t NACs;
+-	uint8_t parityPerField;
+-	uint8_t discarded;
++	__u16 virtualUnitNo;
++	__u16 prevUnitNo;
++	__u8 ANAC;
++	__u8 NACs;
++	__u8 parityPerField;
++	__u8 discarded;
+ } __attribute__((packed));
+ 
+ struct inftl_unithead2 {
+-	uint8_t parityPerField;
+-	uint8_t ANAC;
+-	uint16_t prevUnitNo;
+-	uint16_t virtualUnitNo;
+-	uint8_t NACs;
+-	uint8_t discarded;
++	__u8 parityPerField;
++	__u8 ANAC;
++	__u16 prevUnitNo;
++	__u16 virtualUnitNo;
++	__u8 NACs;
++	__u8 discarded;
+ } __attribute__((packed));
+ 
+ struct inftl_unittail {
+-	uint8_t Reserved[4];
+-	uint16_t EraseMark;
+-	uint16_t EraseMark1;
++	__u8 Reserved[4];
++	__u16 EraseMark;
++	__u16 EraseMark1;
+ } __attribute__((packed));
+ 
+ union inftl_uci {
+Index: linux-2.6-tip/include/mtd/jffs2-user.h
+===================================================================
+--- linux-2.6-tip.orig/include/mtd/jffs2-user.h
++++ linux-2.6-tip/include/mtd/jffs2-user.h
+@@ -7,6 +7,7 @@
+ 
+ /* This file is blessed for inclusion by userspace */
+ #include <linux/jffs2.h>
++#include <linux/types.h>
+ #include <endian.h>
+ #include <byteswap.h>
+ 
+@@ -19,8 +20,8 @@
+ 
+ extern int target_endian;
+ 
+-#define t16(x) ({ uint16_t __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_16(__b); })
+-#define t32(x) ({ uint32_t __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_32(__b); })
++#define t16(x) ({ __u16 __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_16(__b); })
++#define t32(x) ({ __u32 __b = (x); (target_endian==__BYTE_ORDER)?__b:bswap_32(__b); })
+ 
+ #define cpu_to_je16(x) ((jint16_t){t16(x)})
+ #define cpu_to_je32(x) ((jint32_t){t32(x)})
+Index: linux-2.6-tip/include/mtd/mtd-abi.h
+===================================================================
+--- linux-2.6-tip.orig/include/mtd/mtd-abi.h
++++ linux-2.6-tip/include/mtd/mtd-abi.h
+@@ -5,14 +5,16 @@
+ #ifndef __MTD_ABI_H__
+ #define __MTD_ABI_H__
+ 
++#include <linux/types.h>
++
+ struct erase_info_user {
+-	uint32_t start;
+-	uint32_t length;
++	__u32 start;
++	__u32 length;
+ };
+ 
+ struct mtd_oob_buf {
+-	uint32_t start;
+-	uint32_t length;
++	__u32 start;
++	__u32 length;
+ 	unsigned char __user *ptr;
+ };
+ 
+@@ -48,30 +50,30 @@ struct mtd_oob_buf {
+ #define MTD_OTP_USER		2
+ 
+ struct mtd_info_user {
+-	uint8_t type;
+-	uint32_t flags;
+-	uint32_t size;	 // Total size of the MTD
+-	uint32_t erasesize;
+-	uint32_t writesize;
+-	uint32_t oobsize;   // Amount of OOB data per block (e.g. 16)
++	__u8 type;
++	__u32 flags;
++	__u32 size;	 // Total size of the MTD
++	__u32 erasesize;
++	__u32 writesize;
++	__u32 oobsize;   // Amount of OOB data per block (e.g. 16)
+ 	/* The below two fields are obsolete and broken, do not use them
+ 	 * (TODO: remove at some point) */
+-	uint32_t ecctype;
+-	uint32_t eccsize;
++	__u32 ecctype;
++	__u32 eccsize;
+ };
+ 
+ struct region_info_user {
+-	uint32_t offset;		/* At which this region starts,
++	__u32 offset;		/* At which this region starts,
+ 					 * from the beginning of the MTD */
+-	uint32_t erasesize;		/* For this region */
+-	uint32_t numblocks;		/* Number of blocks in this region */
+-	uint32_t regionindex;
++	__u32 erasesize;		/* For this region */
++	__u32 numblocks;		/* Number of blocks in this region */
++	__u32 regionindex;
+ };
+ 
+ struct otp_info {
+-	uint32_t start;
+-	uint32_t length;
+-	uint32_t locked;
++	__u32 start;
++	__u32 length;
++	__u32 locked;
+ };
+ 
+ #define MEMGETINFO		_IOR('M', 1, struct mtd_info_user)
+@@ -84,8 +86,8 @@ struct otp_info {
+ #define MEMGETREGIONINFO	_IOWR('M', 8, struct region_info_user)
+ #define MEMSETOOBSEL		_IOW('M', 9, struct nand_oobinfo)
+ #define MEMGETOOBSEL		_IOR('M', 10, struct nand_oobinfo)
+-#define MEMGETBADBLOCK		_IOW('M', 11, loff_t)
+-#define MEMSETBADBLOCK		_IOW('M', 12, loff_t)
++#define MEMGETBADBLOCK		_IOW('M', 11, __kernel_loff_t)
++#define MEMSETBADBLOCK		_IOW('M', 12, __kernel_loff_t)
+ #define OTPSELECT		_IOR('M', 13, int)
+ #define OTPGETREGIONCOUNT	_IOW('M', 14, int)
+ #define OTPGETREGIONINFO	_IOW('M', 15, struct otp_info)
+@@ -99,15 +101,15 @@ struct otp_info {
+  * interfaces
+  */
+ struct nand_oobinfo {
+-	uint32_t useecc;
+-	uint32_t eccbytes;
+-	uint32_t oobfree[8][2];
+-	uint32_t eccpos[32];
++	__u32 useecc;
++	__u32 eccbytes;
++	__u32 oobfree[8][2];
++	__u32 eccpos[32];
+ };
+ 
+ struct nand_oobfree {
+-	uint32_t offset;
+-	uint32_t length;
++	__u32 offset;
++	__u32 length;
+ };
+ 
+ #define MTD_MAX_OOBFREE_ENTRIES	8
+@@ -116,9 +118,9 @@ struct nand_oobfree {
+  * diagnosis and to allow creation of raw images
+  */
+ struct nand_ecclayout {
+-	uint32_t eccbytes;
+-	uint32_t eccpos[64];
+-	uint32_t oobavail;
++	__u32 eccbytes;
++	__u32 eccpos[64];
++	__u32 oobavail;
+ 	struct nand_oobfree oobfree[MTD_MAX_OOBFREE_ENTRIES];
+ };
+ 
+@@ -131,10 +133,10 @@ struct nand_ecclayout {
+  * @bbtblocks:	number of blocks reserved for bad block tables
+  */
+ struct mtd_ecc_stats {
+-	uint32_t corrected;
+-	uint32_t failed;
+-	uint32_t badblocks;
+-	uint32_t bbtblocks;
++	__u32 corrected;
++	__u32 failed;
++	__u32 badblocks;
++	__u32 bbtblocks;
+ };
+ 
+ /*
+Index: linux-2.6-tip/include/mtd/nftl-user.h
+===================================================================
+--- linux-2.6-tip.orig/include/mtd/nftl-user.h
++++ linux-2.6-tip/include/mtd/nftl-user.h
+@@ -6,33 +6,35 @@
+ #ifndef __MTD_NFTL_USER_H__
+ #define __MTD_NFTL_USER_H__
+ 
++#include <linux/types.h>
++
+ /* Block Control Information */
+ 
+ struct nftl_bci {
+ 	unsigned char ECCSig[6];
+-	uint8_t Status;
+-	uint8_t Status1;
++	__u8 Status;
++	__u8 Status1;
+ }__attribute__((packed));
+ 
+ /* Unit Control Information */
+ 
+ struct nftl_uci0 {
+-	uint16_t VirtUnitNum;
+-	uint16_t ReplUnitNum;
+-	uint16_t SpareVirtUnitNum;
+-	uint16_t SpareReplUnitNum;
++	__u16 VirtUnitNum;
++	__u16 ReplUnitNum;
++	__u16 SpareVirtUnitNum;
++	__u16 SpareReplUnitNum;
+ } __attribute__((packed));
+ 
+ struct nftl_uci1 {
+-	uint32_t WearInfo;
+-	uint16_t EraseMark;
+-	uint16_t EraseMark1;
++	__u32 WearInfo;
++	__u16 EraseMark;
++	__u16 EraseMark1;
+ } __attribute__((packed));
+ 
+ struct nftl_uci2 {
+-        uint16_t FoldMark;
+-        uint16_t FoldMark1;
+-	uint32_t unused;
++        __u16 FoldMark;
++        __u16 FoldMark1;
++	__u32 unused;
+ } __attribute__((packed));
+ 
+ union nftl_uci {
+@@ -50,9 +52,9 @@ struct nftl_oob {
+ 
+ struct NFTLMediaHeader {
+ 	char DataOrgID[6];
+-	uint16_t NumEraseUnits;
+-	uint16_t FirstPhysicalEUN;
+-	uint32_t FormattedSize;
++	__u16 NumEraseUnits;
++	__u16 FirstPhysicalEUN;
++	__u32 FormattedSize;
+ 	unsigned char UnitSizeFactor;
+ } __attribute__((packed));
+ 
+Index: linux-2.6-tip/include/mtd/ubi-user.h
+===================================================================
+--- linux-2.6-tip.orig/include/mtd/ubi-user.h
++++ linux-2.6-tip/include/mtd/ubi-user.h
+@@ -21,6 +21,8 @@
+ #ifndef __UBI_USER_H__
+ #define __UBI_USER_H__
+ 
++#include <linux/types.h>
++
+ /*
+  * UBI device creation (the same as MTD device attachment)
+  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+@@ -152,7 +154,7 @@
+ /* Create an UBI volume */
+ #define UBI_IOCMKVOL _IOW(UBI_IOC_MAGIC, 0, struct ubi_mkvol_req)
+ /* Remove an UBI volume */
+-#define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, int32_t)
++#define UBI_IOCRMVOL _IOW(UBI_IOC_MAGIC, 1, __s32)
+ /* Re-size an UBI volume */
+ #define UBI_IOCRSVOL _IOW(UBI_IOC_MAGIC, 2, struct ubi_rsvol_req)
+ /* Re-name volumes */
+@@ -165,24 +167,24 @@
+ /* Attach an MTD device */
+ #define UBI_IOCATT _IOW(UBI_CTRL_IOC_MAGIC, 64, struct ubi_attach_req)
+ /* Detach an MTD device */
+-#define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, int32_t)
++#define UBI_IOCDET _IOW(UBI_CTRL_IOC_MAGIC, 65, __s32)
+ 
+ /* ioctl commands of UBI volume character devices */
+ 
+ #define UBI_VOL_IOC_MAGIC 'O'
+ 
+ /* Start UBI volume update */
+-#define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, int64_t)
++#define UBI_IOCVOLUP _IOW(UBI_VOL_IOC_MAGIC, 0, __s64)
+ /* LEB erasure command, used for debugging, disabled by default */
+-#define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, int32_t)
++#define UBI_IOCEBER _IOW(UBI_VOL_IOC_MAGIC, 1, __s32)
+ /* Atomic LEB change command */
+-#define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, int32_t)
++#define UBI_IOCEBCH _IOW(UBI_VOL_IOC_MAGIC, 2, __s32)
+ /* Map LEB command */
+ #define UBI_IOCEBMAP _IOW(UBI_VOL_IOC_MAGIC, 3, struct ubi_map_req)
+ /* Unmap LEB command */
+-#define UBI_IOCEBUNMAP _IOW(UBI_VOL_IOC_MAGIC, 4, int32_t)
++#define UBI_IOCEBUNMAP _IOW(UBI_VOL_IOC_MAGIC, 4, __s32)
+ /* Check if LEB is mapped command */
+-#define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, int32_t)
++#define UBI_IOCEBISMAP _IOR(UBI_VOL_IOC_MAGIC, 5, __s32)
+ /* Set an UBI volume property */
+ #define UBI_IOCSETPROP _IOW(UBI_VOL_IOC_MAGIC, 6, struct ubi_set_prop_req)
+ 
+@@ -260,10 +262,10 @@ enum {
+  * sub-page of the first page and add needed padding.
+  */
+ struct ubi_attach_req {
+-	int32_t ubi_num;
+-	int32_t mtd_num;
+-	int32_t vid_hdr_offset;
+-	int8_t padding[12];
++	__s32 ubi_num;
++	__s32 mtd_num;
++	__s32 vid_hdr_offset;
++	__s8 padding[12];
+ };
+ 
+ /**
+@@ -298,13 +300,13 @@ struct ubi_attach_req {
+  * BLOBs, without caring about how to properly align them.
+  */
+ struct ubi_mkvol_req {
+-	int32_t vol_id;
+-	int32_t alignment;
+-	int64_t bytes;
+-	int8_t vol_type;
+-	int8_t padding1;
+-	int16_t name_len;
+-	int8_t padding2[4];
++	__s32 vol_id;
++	__s32 alignment;
++	__s64 bytes;
++	__s8 vol_type;
++	__s8 padding1;
++	__s16 name_len;
++	__s8 padding2[4];
+ 	char name[UBI_MAX_VOLUME_NAME + 1];
+ } __attribute__ ((packed));
+ 
+@@ -320,8 +322,8 @@ struct ubi_mkvol_req {
+  * zero number of bytes).
+  */
+ struct ubi_rsvol_req {
+-	int64_t bytes;
+-	int32_t vol_id;
++	__s64 bytes;
++	__s32 vol_id;
+ } __attribute__ ((packed));
+ 
+ /**
+@@ -356,12 +358,12 @@ struct ubi_rsvol_req {
+  * re-name request.
+  */
+ struct ubi_rnvol_req {
+-	int32_t count;
+-	int8_t padding1[12];
++	__s32 count;
++	__s8 padding1[12];
+ 	struct {
+-		int32_t vol_id;
+-		int16_t name_len;
+-		int8_t  padding2[2];
++		__s32 vol_id;
++		__s16 name_len;
++		__s8  padding2[2];
+ 		char    name[UBI_MAX_VOLUME_NAME + 1];
+ 	} ents[UBI_MAX_RNVOL];
+ } __attribute__ ((packed));
+@@ -375,10 +377,10 @@ struct ubi_rnvol_req {
+  * @padding: reserved for future, not used, has to be zeroed
+  */
+ struct ubi_leb_change_req {
+-	int32_t lnum;
+-	int32_t bytes;
+-	int8_t  dtype;
+-	int8_t  padding[7];
++	__s32 lnum;
++	__s32 bytes;
++	__s8  dtype;
++	__s8  padding[7];
+ } __attribute__ ((packed));
+ 
+ /**
+@@ -388,9 +390,9 @@ struct ubi_leb_change_req {
+  * @padding: reserved for future, not used, has to be zeroed
+  */
+ struct ubi_map_req {
+-	int32_t lnum;
+-	int8_t  dtype;
+-	int8_t  padding[3];
++	__s32 lnum;
++	__s8  dtype;
++	__s8  padding[3];
+ } __attribute__ ((packed));
+ 
+ 
+@@ -402,9 +404,9 @@ struct ubi_map_req {
+  * @value: value to set
+  */
+ struct ubi_set_prop_req {
+-       uint8_t  property;
+-       uint8_t  padding[7];
+-       uint64_t value;
++       __u8  property;
++       __u8  padding[7];
++       __u64 value;
+ }  __attribute__ ((packed));
+ 
+ #endif /* __UBI_USER_H__ */
+Index: linux-2.6-tip/include/net/inet_sock.h
+===================================================================
+--- linux-2.6-tip.orig/include/net/inet_sock.h
++++ linux-2.6-tip/include/net/inet_sock.h
+@@ -17,6 +17,7 @@
+ #define _INET_SOCK_H
+ 
+ 
++#include <linux/kmemcheck.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
+ #include <linux/jhash.h>
+@@ -66,14 +67,16 @@ struct inet_request_sock {
+ 	__be32			loc_addr;
+ 	__be32			rmt_addr;
+ 	__be16			rmt_port;
+-	u16			snd_wscale : 4, 
+-				rcv_wscale : 4, 
+-				tstamp_ok  : 1,
+-				sack_ok	   : 1,
+-				wscale_ok  : 1,
+-				ecn_ok	   : 1,
+-				acked	   : 1,
+-				no_srccheck: 1;
++	kmemcheck_define_bitfield(flags, {
++		u16			snd_wscale : 4,
++					rcv_wscale : 4,
++					tstamp_ok  : 1,
++					sack_ok	   : 1,
++					wscale_ok  : 1,
++					ecn_ok	   : 1,
++					acked	   : 1,
++					no_srccheck: 1;
++	});
+ 	struct ip_options	*opt;
+ };
+ 
+@@ -198,9 +201,12 @@ static inline int inet_sk_ehashfn(const 
+ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
+ {
+ 	struct request_sock *req = reqsk_alloc(ops);
++	struct inet_request_sock *ireq = inet_rsk(req);
+ 
+-	if (req != NULL)
+-		inet_rsk(req)->opt = NULL;
++	if (req != NULL) {
++		kmemcheck_annotate_bitfield(ireq->flags);
++		ireq->opt = NULL;
++	}
+ 
+ 	return req;
+ }
+Index: linux-2.6-tip/include/net/inet_timewait_sock.h
+===================================================================
+--- linux-2.6-tip.orig/include/net/inet_timewait_sock.h
++++ linux-2.6-tip/include/net/inet_timewait_sock.h
+@@ -16,6 +16,7 @@
+ #define _INET_TIMEWAIT_SOCK_
+ 
+ 
++#include <linux/kmemcheck.h>
+ #include <linux/list.h>
+ #include <linux/module.h>
+ #include <linux/timer.h>
+@@ -127,10 +128,12 @@ struct inet_timewait_sock {
+ 	__be32			tw_rcv_saddr;
+ 	__be16			tw_dport;
+ 	__u16			tw_num;
+-	/* And these are ours. */
+-	__u8			tw_ipv6only:1,
+-				tw_transparent:1;
+-	/* 15 bits hole, try to pack */
++	kmemcheck_define_bitfield(flags, {
++		/* And these are ours. */
++		__u8			tw_ipv6only:1,
++					tw_transparent:1;
++		/* 14 bits hole, try to pack */
++	});
+ 	__u16			tw_ipv6_offset;
+ 	unsigned long		tw_ttd;
+ 	struct inet_bind_bucket	*tw_tb;
+Index: linux-2.6-tip/include/net/sock.h
+===================================================================
+--- linux-2.6-tip.orig/include/net/sock.h
++++ linux-2.6-tip/include/net/sock.h
+@@ -218,9 +218,11 @@ struct sock {
+ #define sk_hash			__sk_common.skc_hash
+ #define sk_prot			__sk_common.skc_prot
+ #define sk_net			__sk_common.skc_net
+-	unsigned char		sk_shutdown : 2,
+-				sk_no_check : 2,
+-				sk_userlocks : 4;
++	kmemcheck_define_bitfield(flags, {
++		unsigned char		sk_shutdown : 2,
++					sk_no_check : 2,
++					sk_userlocks : 4;
++	});
+ 	unsigned char		sk_protocol;
+ 	unsigned short		sk_type;
+ 	int			sk_rcvbuf;
+Index: linux-2.6-tip/include/sound/asound.h
+===================================================================
+--- linux-2.6-tip.orig/include/sound/asound.h
++++ linux-2.6-tip/include/sound/asound.h
+@@ -23,9 +23,10 @@
+ #ifndef __SOUND_ASOUND_H
+ #define __SOUND_ASOUND_H
+ 
++#include <linux/types.h>
++
+ #ifdef __KERNEL__
+ #include <linux/ioctl.h>
+-#include <linux/types.h>
+ #include <linux/time.h>
+ #include <asm/byteorder.h>
+ 
+@@ -342,7 +343,7 @@ struct snd_interval {
+ #define SNDRV_MASK_MAX	256
+ 
+ struct snd_mask {
+-	u_int32_t bits[(SNDRV_MASK_MAX+31)/32];
++	__u32 bits[(SNDRV_MASK_MAX+31)/32];
+ };
+ 
+ struct snd_pcm_hw_params {
+@@ -385,7 +386,7 @@ struct snd_pcm_sw_params {
+ 
+ struct snd_pcm_channel_info {
+ 	unsigned int channel;
+-	off_t offset;			/* mmap offset */
++	__kernel_off_t offset;		/* mmap offset */
+ 	unsigned int first;		/* offset to first sample in bits */
+ 	unsigned int step;		/* samples distance in bits */
+ };
+@@ -789,7 +790,7 @@ struct snd_ctl_elem_info {
+ 	snd_ctl_elem_type_t type;	/* R: value type - SNDRV_CTL_ELEM_TYPE_* */
+ 	unsigned int access;		/* R: value access (bitmask) - SNDRV_CTL_ELEM_ACCESS_* */
+ 	unsigned int count;		/* count of values */
+-	pid_t owner;			/* owner's PID of this control */
++	__kernel_pid_t owner;		/* owner's PID of this control */
+ 	union {
+ 		struct {
+ 			long min;		/* R: minimum value */
+Index: linux-2.6-tip/include/sound/emu10k1.h
+===================================================================
+--- linux-2.6-tip.orig/include/sound/emu10k1.h
++++ linux-2.6-tip/include/sound/emu10k1.h
+@@ -1,6 +1,8 @@
+ #ifndef __SOUND_EMU10K1_H
+ #define __SOUND_EMU10K1_H
+ 
++#include <linux/types.h>
++
+ /*
+  *  Copyright (c) by Jaroslav Kysela <perex@perex.cz>,
+  *		     Creative Labs, Inc.
+@@ -34,6 +36,7 @@
+ #include <sound/timer.h>
+ #include <linux/interrupt.h>
+ #include <linux/mutex.h>
++
+ #include <asm/io.h>
+ 
+ /* ------------------- DEFINES -------------------- */
+@@ -2171,7 +2174,7 @@ struct snd_emu10k1_fx8010_code {
+ 	char name[128];
+ 
+ 	DECLARE_BITMAP(gpr_valid, 0x200); /* bitmask of valid initializers */
+-	u_int32_t __user *gpr_map;	  /* initializers */
++	__u32 __user *gpr_map;		/* initializers */
+ 
+ 	unsigned int gpr_add_control_count; /* count of GPR controls to add/replace */
+ 	struct snd_emu10k1_fx8010_control_gpr __user *gpr_add_controls; /* GPR controls to add/replace */
+@@ -2184,11 +2187,11 @@ struct snd_emu10k1_fx8010_code {
+ 	struct snd_emu10k1_fx8010_control_gpr __user *gpr_list_controls; /* listed GPR controls */
+ 
+ 	DECLARE_BITMAP(tram_valid, 0x100); /* bitmask of valid initializers */
+-	u_int32_t __user *tram_data_map;  /* data initializers */
+-	u_int32_t __user *tram_addr_map;  /* map initializers */
++	__u32 __user *tram_data_map;	  /* data initializers */
++	__u32 __user *tram_addr_map;	  /* map initializers */
+ 
+ 	DECLARE_BITMAP(code_valid, 1024); /* bitmask of valid instructions */
+-	u_int32_t __user *code;		  /* one instruction - 64 bits */
++	__u32 __user *code;		  /* one instruction - 64 bits */
+ };
+ 
+ struct snd_emu10k1_fx8010_tram {
+Index: linux-2.6-tip/include/trace/block.h
+===================================================================
+--- linux-2.6-tip.orig/include/trace/block.h
++++ linux-2.6-tip/include/trace/block.h
+@@ -5,72 +5,72 @@
+ #include <linux/tracepoint.h>
+ 
+ DECLARE_TRACE(block_rq_abort,
+-	TPPROTO(struct request_queue *q, struct request *rq),
+-		TPARGS(q, rq));
++	TP_PROTO(struct request_queue *q, struct request *rq),
++	      TP_ARGS(q, rq));
+ 
+ DECLARE_TRACE(block_rq_insert,
+-	TPPROTO(struct request_queue *q, struct request *rq),
+-		TPARGS(q, rq));
++	TP_PROTO(struct request_queue *q, struct request *rq),
++	      TP_ARGS(q, rq));
+ 
+ DECLARE_TRACE(block_rq_issue,
+-	TPPROTO(struct request_queue *q, struct request *rq),
+-		TPARGS(q, rq));
++	TP_PROTO(struct request_queue *q, struct request *rq),
++	      TP_ARGS(q, rq));
+ 
+ DECLARE_TRACE(block_rq_requeue,
+-	TPPROTO(struct request_queue *q, struct request *rq),
+-		TPARGS(q, rq));
++	TP_PROTO(struct request_queue *q, struct request *rq),
++	      TP_ARGS(q, rq));
+ 
+ DECLARE_TRACE(block_rq_complete,
+-	TPPROTO(struct request_queue *q, struct request *rq),
+-		TPARGS(q, rq));
++	TP_PROTO(struct request_queue *q, struct request *rq),
++	      TP_ARGS(q, rq));
+ 
+ DECLARE_TRACE(block_bio_bounce,
+-	TPPROTO(struct request_queue *q, struct bio *bio),
+-		TPARGS(q, bio));
++	TP_PROTO(struct request_queue *q, struct bio *bio),
++	      TP_ARGS(q, bio));
+ 
+ DECLARE_TRACE(block_bio_complete,
+-	TPPROTO(struct request_queue *q, struct bio *bio),
+-		TPARGS(q, bio));
++	TP_PROTO(struct request_queue *q, struct bio *bio),
++	      TP_ARGS(q, bio));
+ 
+ DECLARE_TRACE(block_bio_backmerge,
+-	TPPROTO(struct request_queue *q, struct bio *bio),
+-		TPARGS(q, bio));
++	TP_PROTO(struct request_queue *q, struct bio *bio),
++	      TP_ARGS(q, bio));
+ 
+ DECLARE_TRACE(block_bio_frontmerge,
+-	TPPROTO(struct request_queue *q, struct bio *bio),
+-		TPARGS(q, bio));
++	TP_PROTO(struct request_queue *q, struct bio *bio),
++	      TP_ARGS(q, bio));
+ 
+ DECLARE_TRACE(block_bio_queue,
+-	TPPROTO(struct request_queue *q, struct bio *bio),
+-		TPARGS(q, bio));
++	TP_PROTO(struct request_queue *q, struct bio *bio),
++	      TP_ARGS(q, bio));
+ 
+ DECLARE_TRACE(block_getrq,
+-	TPPROTO(struct request_queue *q, struct bio *bio, int rw),
+-		TPARGS(q, bio, rw));
++	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
++	      TP_ARGS(q, bio, rw));
+ 
+ DECLARE_TRACE(block_sleeprq,
+-	TPPROTO(struct request_queue *q, struct bio *bio, int rw),
+-		TPARGS(q, bio, rw));
++	TP_PROTO(struct request_queue *q, struct bio *bio, int rw),
++	      TP_ARGS(q, bio, rw));
+ 
+ DECLARE_TRACE(block_plug,
+-	TPPROTO(struct request_queue *q),
+-		TPARGS(q));
++	TP_PROTO(struct request_queue *q),
++	      TP_ARGS(q));
+ 
+ DECLARE_TRACE(block_unplug_timer,
+-	TPPROTO(struct request_queue *q),
+-		TPARGS(q));
++	TP_PROTO(struct request_queue *q),
++	      TP_ARGS(q));
+ 
+ DECLARE_TRACE(block_unplug_io,
+-	TPPROTO(struct request_queue *q),
+-		TPARGS(q));
++	TP_PROTO(struct request_queue *q),
++	      TP_ARGS(q));
+ 
+ DECLARE_TRACE(block_split,
+-	TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
+-		TPARGS(q, bio, pdu));
++	TP_PROTO(struct request_queue *q, struct bio *bio, unsigned int pdu),
++	      TP_ARGS(q, bio, pdu));
+ 
+ DECLARE_TRACE(block_remap,
+-	TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev,
+-		sector_t from, sector_t to),
+-		TPARGS(q, bio, dev, from, to));
++	TP_PROTO(struct request_queue *q, struct bio *bio, dev_t dev,
++		 sector_t from, sector_t to),
++	      TP_ARGS(q, bio, dev, from, to));
+ 
+ #endif
+Index: linux-2.6-tip/include/trace/irq.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/trace/irq.h
+@@ -0,0 +1,9 @@
++#ifndef _TRACE_IRQ_H
++#define _TRACE_IRQ_H
++
++#include <linux/interrupt.h>
++#include <linux/tracepoint.h>
++
++#include <trace/irq_event_types.h>
++
++#endif
+Index: linux-2.6-tip/include/trace/irq_event_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/trace/irq_event_types.h
+@@ -0,0 +1,55 @@
++
++/* use <trace/irq.h> instead */
++#ifndef TRACE_FORMAT
++# error Do not include this file directly.
++# error Unless you know what you are doing.
++#endif
++
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM irq
++
++/*
++ * Tracepoint for entry of interrupt handler:
++ */
++TRACE_FORMAT(irq_handler_entry,
++	TP_PROTO(int irq, struct irqaction *action),
++	TP_ARGS(irq, action),
++	TP_FMT("irq=%d handler=%s", irq, action->name)
++	);
++
++/*
++ * Tracepoint for return of an interrupt handler:
++ */
++TRACE_EVENT(irq_handler_exit,
++
++	TP_PROTO(int irq, struct irqaction *action, int ret),
++
++	TP_ARGS(irq, action, ret),
++
++	TP_STRUCT__entry(
++		__field(	int,	irq	)
++		__field(	int,	ret	)
++	),
++
++	TP_fast_assign(
++		__entry->irq	= irq;
++		__entry->ret	= ret;
++	),
++
++	TP_printk("irq=%d return=%s",
++		  __entry->irq, __entry->ret ? "handled" : "unhandled")
++);
++
++TRACE_FORMAT(softirq_entry,
++	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
++	TP_ARGS(h, vec),
++	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
++	);
++
++TRACE_FORMAT(softirq_exit,
++	TP_PROTO(struct softirq_action *h, struct softirq_action *vec),
++	TP_ARGS(h, vec),
++	TP_FMT("softirq=%d action=%s", (int)(h - vec), softirq_to_name[h-vec])
++	);
++
++#undef TRACE_SYSTEM
+Index: linux-2.6-tip/include/trace/kmemtrace.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/trace/kmemtrace.h
+@@ -0,0 +1,63 @@
++/*
++ * Copyright (C) 2008 Eduard - Gabriel Munteanu
++ *
++ * This file is released under GPL version 2.
++ */
++
++#ifndef _LINUX_KMEMTRACE_H
++#define _LINUX_KMEMTRACE_H
++
++#ifdef __KERNEL__
++
++#include <linux/tracepoint.h>
++#include <linux/types.h>
++
++#ifdef CONFIG_KMEMTRACE
++extern void kmemtrace_init(void);
++#else
++static inline void kmemtrace_init(void)
++{
++}
++#endif
++
++DECLARE_TRACE(kmalloc,
++	      TP_PROTO(unsigned long call_site,
++		      const void *ptr,
++		      size_t bytes_req,
++		      size_t bytes_alloc,
++		      gfp_t gfp_flags),
++	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
++DECLARE_TRACE(kmem_cache_alloc,
++	      TP_PROTO(unsigned long call_site,
++		      const void *ptr,
++		      size_t bytes_req,
++		      size_t bytes_alloc,
++		      gfp_t gfp_flags),
++	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags));
++DECLARE_TRACE(kmalloc_node,
++	      TP_PROTO(unsigned long call_site,
++		      const void *ptr,
++		      size_t bytes_req,
++		      size_t bytes_alloc,
++		      gfp_t gfp_flags,
++		      int node),
++	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
++DECLARE_TRACE(kmem_cache_alloc_node,
++	      TP_PROTO(unsigned long call_site,
++		      const void *ptr,
++		      size_t bytes_req,
++		      size_t bytes_alloc,
++		      gfp_t gfp_flags,
++		      int node),
++	      TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node));
++DECLARE_TRACE(kfree,
++	      TP_PROTO(unsigned long call_site, const void *ptr),
++	      TP_ARGS(call_site, ptr));
++DECLARE_TRACE(kmem_cache_free,
++	      TP_PROTO(unsigned long call_site, const void *ptr),
++	      TP_ARGS(call_site, ptr));
++
++#endif /* __KERNEL__ */
++
++#endif /* _LINUX_KMEMTRACE_H */
++
+Index: linux-2.6-tip/include/trace/lockdep.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/trace/lockdep.h
+@@ -0,0 +1,9 @@
++#ifndef _TRACE_LOCKDEP_H
++#define _TRACE_LOCKDEP_H
++
++#include <linux/lockdep.h>
++#include <linux/tracepoint.h>
++
++#include <trace/lockdep_event_types.h>
++
++#endif
+Index: linux-2.6-tip/include/trace/lockdep_event_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/trace/lockdep_event_types.h
+@@ -0,0 +1,44 @@
++
++#ifndef TRACE_FORMAT
++# error Do not include this file directly.
++# error Unless you know what you are doing.
++#endif
++
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM lock
++
++#ifdef CONFIG_LOCKDEP
++
++TRACE_FORMAT(lock_acquire,
++	TP_PROTO(struct lockdep_map *lock, unsigned int subclass,
++		int trylock, int read, int check,
++		struct lockdep_map *next_lock, unsigned long ip),
++	TP_ARGS(lock, subclass, trylock, read, check, next_lock, ip),
++	TP_FMT("%s%s%s", trylock ? "try " : "",
++		read ? "read " : "", lock->name)
++	);
++
++TRACE_FORMAT(lock_release,
++	TP_PROTO(struct lockdep_map *lock, int nested, unsigned long ip),
++	TP_ARGS(lock, nested, ip),
++	TP_FMT("%s", lock->name)
++	);
++
++#ifdef CONFIG_LOCK_STAT
++
++TRACE_FORMAT(lock_contended,
++	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
++	TP_ARGS(lock, ip),
++	TP_FMT("%s", lock->name)
++	);
++
++TRACE_FORMAT(lock_acquired,
++	TP_PROTO(struct lockdep_map *lock, unsigned long ip),
++	TP_ARGS(lock, ip),
++	TP_FMT("%s", lock->name)
++	);
++
++#endif
++#endif
++
++#undef TRACE_SYSTEM
+Index: linux-2.6-tip/include/trace/power.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/trace/power.h
+@@ -0,0 +1,32 @@
++#ifndef _TRACE_POWER_H
++#define _TRACE_POWER_H
++
++#include <linux/ktime.h>
++#include <linux/tracepoint.h>
++
++enum {
++	POWER_NONE = 0,
++	POWER_CSTATE = 1,
++	POWER_PSTATE = 2,
++};
++
++struct power_trace {
++	ktime_t			stamp;
++	ktime_t			end;
++	int			type;
++	int			state;
++};
++
++DECLARE_TRACE(power_start,
++	TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state),
++	      TP_ARGS(it, type, state));
++
++DECLARE_TRACE(power_mark,
++	TP_PROTO(struct power_trace *it, unsigned int type, unsigned int state),
++	      TP_ARGS(it, type, state));
++
++DECLARE_TRACE(power_end,
++	TP_PROTO(struct power_trace *it),
++	      TP_ARGS(it));
++
++#endif /* _TRACE_POWER_H */
+Index: linux-2.6-tip/include/trace/sched.h
+===================================================================
+--- linux-2.6-tip.orig/include/trace/sched.h
++++ linux-2.6-tip/include/trace/sched.h
+@@ -4,53 +4,6 @@
+ #include <linux/sched.h>
+ #include <linux/tracepoint.h>
+ 
+-DECLARE_TRACE(sched_kthread_stop,
+-	TPPROTO(struct task_struct *t),
+-		TPARGS(t));
+-
+-DECLARE_TRACE(sched_kthread_stop_ret,
+-	TPPROTO(int ret),
+-		TPARGS(ret));
+-
+-DECLARE_TRACE(sched_wait_task,
+-	TPPROTO(struct rq *rq, struct task_struct *p),
+-		TPARGS(rq, p));
+-
+-DECLARE_TRACE(sched_wakeup,
+-	TPPROTO(struct rq *rq, struct task_struct *p, int success),
+-		TPARGS(rq, p, success));
+-
+-DECLARE_TRACE(sched_wakeup_new,
+-	TPPROTO(struct rq *rq, struct task_struct *p, int success),
+-		TPARGS(rq, p, success));
+-
+-DECLARE_TRACE(sched_switch,
+-	TPPROTO(struct rq *rq, struct task_struct *prev,
+-		struct task_struct *next),
+-		TPARGS(rq, prev, next));
+-
+-DECLARE_TRACE(sched_migrate_task,
+-	TPPROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
+-		TPARGS(p, orig_cpu, dest_cpu));
+-
+-DECLARE_TRACE(sched_process_free,
+-	TPPROTO(struct task_struct *p),
+-		TPARGS(p));
+-
+-DECLARE_TRACE(sched_process_exit,
+-	TPPROTO(struct task_struct *p),
+-		TPARGS(p));
+-
+-DECLARE_TRACE(sched_process_wait,
+-	TPPROTO(struct pid *pid),
+-		TPARGS(pid));
+-
+-DECLARE_TRACE(sched_process_fork,
+-	TPPROTO(struct task_struct *parent, struct task_struct *child),
+-		TPARGS(parent, child));
+-
+-DECLARE_TRACE(sched_signal_send,
+-	TPPROTO(int sig, struct task_struct *p),
+-		TPARGS(sig, p));
++#include <trace/sched_event_types.h>
+ 
+ #endif
+Index: linux-2.6-tip/include/trace/sched_event_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/trace/sched_event_types.h
+@@ -0,0 +1,368 @@
++
++/* use <trace/sched.h> instead */
++#ifndef TRACE_EVENT
++# error Do not include this file directly.
++# error Unless you know what you are doing.
++#endif
++
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM sched
++
++/*
++ * Tracepoint for calling kthread_stop, performed to end a kthread:
++ */
++TRACE_EVENT(sched_kthread_stop,
++
++	TP_PROTO(struct task_struct *t),
++
++	TP_ARGS(t),
++
++	TP_STRUCT__entry(
++		__array(	char,	comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	pid			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, t->comm, TASK_COMM_LEN);
++		__entry->pid	= t->pid;
++	),
++
++	TP_printk("task %s:%d", __entry->comm, __entry->pid)
++);
++
++/*
++ * Tracepoint for the return value of the kthread stopping:
++ */
++TRACE_EVENT(sched_kthread_stop_ret,
++
++	TP_PROTO(int ret),
++
++	TP_ARGS(ret),
++
++	TP_STRUCT__entry(
++		__field(	int,	ret	)
++	),
++
++	TP_fast_assign(
++		__entry->ret	= ret;
++	),
++
++	TP_printk("ret %d", __entry->ret)
++);
++
++/*
++ * Tracepoint for waiting on task to unschedule:
++ *
++ * (NOTE: the 'rq' argument is not used by generic trace events,
++ *        but used by the latency tracer plugin. )
++ */
++TRACE_EVENT(sched_wait_task,
++
++	TP_PROTO(struct rq *rq, struct task_struct *p),
++
++	TP_ARGS(rq, p),
++
++	TP_STRUCT__entry(
++		__array(	char,	comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	pid			)
++		__field(	int,	prio			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
++		__entry->pid	= p->pid;
++		__entry->prio	= p->prio;
++	),
++
++	TP_printk("task %s:%d [%d]",
++		  __entry->comm, __entry->pid, __entry->prio)
++);
++
++/*
++ * Tracepoint for waking up a task:
++ *
++ * (NOTE: the 'rq' argument is not used by generic trace events,
++ *        but used by the latency tracer plugin. )
++ */
++TRACE_EVENT(sched_wakeup,
++
++	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
++
++	TP_ARGS(rq, p, success),
++
++	TP_STRUCT__entry(
++		__array(	char,	comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	pid			)
++		__field(	int,	prio			)
++		__field(	int,	success			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
++		__entry->pid		= p->pid;
++		__entry->prio		= p->prio;
++		__entry->success	= success;
++	),
++
++	TP_printk("task %s:%d [%d] success=%d",
++		  __entry->comm, __entry->pid, __entry->prio,
++		  __entry->success)
++);
++
++/*
++ * Tracepoint for waking up a new task:
++ *
++ * (NOTE: the 'rq' argument is not used by generic trace events,
++ *        but used by the latency tracer plugin. )
++ */
++TRACE_EVENT(sched_wakeup_new,
++
++	TP_PROTO(struct rq *rq, struct task_struct *p, int success),
++
++	TP_ARGS(rq, p, success),
++
++	TP_STRUCT__entry(
++		__array(	char,	comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	pid			)
++		__field(	int,	prio			)
++		__field(	int,	success			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
++		__entry->pid		= p->pid;
++		__entry->prio		= p->prio;
++		__entry->success	= success;
++	),
++
++	TP_printk("task %s:%d [%d] success=%d",
++		  __entry->comm, __entry->pid, __entry->prio,
++		  __entry->success)
++);
++
++/*
++ * Tracepoint for task switches, performed by the scheduler:
++ *
++ * (NOTE: the 'rq' argument is not used by generic trace events,
++ *        but used by the latency tracer plugin. )
++ */
++TRACE_EVENT(sched_switch,
++
++	TP_PROTO(struct rq *rq, struct task_struct *prev,
++		 struct task_struct *next),
++
++	TP_ARGS(rq, prev, next),
++
++	TP_STRUCT__entry(
++		__array(	char,	prev_comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	prev_pid			)
++		__field(	int,	prev_prio			)
++		__array(	char,	next_comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	next_pid			)
++		__field(	int,	next_prio			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
++		__entry->prev_pid	= prev->pid;
++		__entry->prev_prio	= prev->prio;
++		memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
++		__entry->next_pid	= next->pid;
++		__entry->next_prio	= next->prio;
++	),
++
++	TP_printk("task %s:%d [%d] ==> %s:%d [%d]",
++		__entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
++		__entry->next_comm, __entry->next_pid, __entry->next_prio)
++);
++
++/*
++ * Tracepoint for a task being migrated:
++ */
++TRACE_EVENT(sched_migrate_task,
++
++	TP_PROTO(struct task_struct *p, int orig_cpu, int dest_cpu),
++
++	TP_ARGS(p, orig_cpu, dest_cpu),
++
++	TP_STRUCT__entry(
++		__array(	char,	comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	pid			)
++		__field(	int,	prio			)
++		__field(	int,	orig_cpu		)
++		__field(	int,	dest_cpu		)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
++		__entry->pid		= p->pid;
++		__entry->prio		= p->prio;
++		__entry->orig_cpu	= orig_cpu;
++		__entry->dest_cpu	= dest_cpu;
++	),
++
++	TP_printk("task %s:%d [%d] from: %d  to: %d",
++		  __entry->comm, __entry->pid, __entry->prio,
++		  __entry->orig_cpu, __entry->dest_cpu)
++);
++
++/*
++ * Tracepoint for freeing a task:
++ */
++TRACE_EVENT(sched_process_free,
++
++	TP_PROTO(struct task_struct *p),
++
++	TP_ARGS(p),
++
++	TP_STRUCT__entry(
++		__array(	char,	comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	pid			)
++		__field(	int,	prio			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
++		__entry->pid		= p->pid;
++		__entry->prio		= p->prio;
++	),
++
++	TP_printk("task %s:%d [%d]",
++		  __entry->comm, __entry->pid, __entry->prio)
++);
++
++/*
++ * Tracepoint for a task exiting:
++ */
++TRACE_EVENT(sched_process_exit,
++
++	TP_PROTO(struct task_struct *p),
++
++	TP_ARGS(p),
++
++	TP_STRUCT__entry(
++		__array(	char,	comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	pid			)
++		__field(	int,	prio			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
++		__entry->pid		= p->pid;
++		__entry->prio		= p->prio;
++	),
++
++	TP_printk("task %s:%d [%d]",
++		  __entry->comm, __entry->pid, __entry->prio)
++);
++
++/*
++ * Tracepoint for priority boosting/deboosting of a task:
++ *
++ * (NOTE: the 'rq' argument is not used by generic trace events,
++ *        but used by the latency tracer plugin. )
++ */
++TRACE_EVENT(sched_task_setprio,
++
++	TP_PROTO(struct rq *rq, struct task_struct *p, int oldprio),
++
++	TP_ARGS(rq, p, oldprio),
++
++	TP_STRUCT__entry(
++		__array(	char,	comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	pid			)
++		__field(	int,	prio			)
++		__field(	int,	oldprio			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
++		__entry->pid		= p->pid;
++		__entry->prio		= p->prio;
++		__entry->oldprio	= oldprio;
++	),
++
++	TP_printk("task %s:%d [%d] oldprio=%d",
++		  __entry->comm, __entry->pid, __entry->prio,
++		  __entry->oldprio)
++);
++
++/*
++ * Tracepoint for a waiting task:
++ */
++TRACE_EVENT(sched_process_wait,
++
++	TP_PROTO(struct pid *pid),
++
++	TP_ARGS(pid),
++
++	TP_STRUCT__entry(
++		__array(	char,	comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	pid			)
++		__field(	int,	prio			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
++		__entry->pid		= pid_nr(pid);
++		__entry->prio		= current->prio;
++	),
++
++	TP_printk("task %s:%d [%d]",
++		  __entry->comm, __entry->pid, __entry->prio)
++);
++
++/*
++ * Tracepoint for do_fork:
++ */
++TRACE_EVENT(sched_process_fork,
++
++	TP_PROTO(struct task_struct *parent, struct task_struct *child),
++
++	TP_ARGS(parent, child),
++
++	TP_STRUCT__entry(
++		__array(	char,	parent_comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	parent_pid			)
++		__array(	char,	child_comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	child_pid			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->parent_comm, parent->comm, TASK_COMM_LEN);
++		__entry->parent_pid	= parent->pid;
++		memcpy(__entry->child_comm, child->comm, TASK_COMM_LEN);
++		__entry->child_pid	= child->pid;
++	),
++
++	TP_printk("parent %s:%d  child %s:%d",
++		__entry->parent_comm, __entry->parent_pid,
++		__entry->child_comm, __entry->child_pid)
++);
++
++/*
++ * Tracepoint for sending a signal:
++ */
++TRACE_EVENT(sched_signal_send,
++
++	TP_PROTO(int sig, struct task_struct *p),
++
++	TP_ARGS(sig, p),
++
++	TP_STRUCT__entry(
++		__field(	int,	sig			)
++		__array(	char,	comm,	TASK_COMM_LEN	)
++		__field(	pid_t,	pid			)
++	),
++
++	TP_fast_assign(
++		memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
++		__entry->pid	= p->pid;
++		__entry->sig	= sig;
++	),
++
++	TP_printk("sig: %d  task %s:%d",
++		  __entry->sig, __entry->comm, __entry->pid)
++);
++
++#undef TRACE_SYSTEM
+Index: linux-2.6-tip/include/trace/trace_event_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/trace/trace_event_types.h
+@@ -0,0 +1,5 @@
++/* trace/<type>_event_types.h here */
++
++#include <trace/sched_event_types.h>
++#include <trace/irq_event_types.h>
++#include <trace/lockdep_event_types.h>
+Index: linux-2.6-tip/include/trace/trace_events.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/trace/trace_events.h
+@@ -0,0 +1,5 @@
++/* trace/<type>.h here */
++
++#include <trace/sched.h>
++#include <trace/irq.h>
++#include <trace/lockdep.h>
+Index: linux-2.6-tip/include/trace/workqueue.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/trace/workqueue.h
+@@ -0,0 +1,25 @@
++#ifndef __TRACE_WORKQUEUE_H
++#define __TRACE_WORKQUEUE_H
++
++#include <linux/tracepoint.h>
++#include <linux/workqueue.h>
++#include <linux/sched.h>
++
++DECLARE_TRACE(workqueue_insertion,
++	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
++	   TP_ARGS(wq_thread, work));
++
++DECLARE_TRACE(workqueue_execution,
++	   TP_PROTO(struct task_struct *wq_thread, struct work_struct *work),
++	   TP_ARGS(wq_thread, work));
++
++/* Trace the creation of one workqueue thread on a cpu */
++DECLARE_TRACE(workqueue_creation,
++	   TP_PROTO(struct task_struct *wq_thread, int cpu),
++	   TP_ARGS(wq_thread, cpu));
++
++DECLARE_TRACE(workqueue_destruction,
++	   TP_PROTO(struct task_struct *wq_thread),
++	   TP_ARGS(wq_thread));
++
++#endif /* __TRACE_WORKQUEUE_H */
+Index: linux-2.6-tip/init/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/init/Kconfig
++++ linux-2.6-tip/init/Kconfig
+@@ -101,6 +101,66 @@ config LOCALVERSION_AUTO
+ 
+ 	  which is done within the script "scripts/setlocalversion".)
+ 
++config HAVE_KERNEL_GZIP
++	bool
++
++config HAVE_KERNEL_BZIP2
++	bool
++
++config HAVE_KERNEL_LZMA
++	bool
++
++choice
++	prompt "Kernel compression mode"
++	default KERNEL_GZIP
++	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA
++	help
++	  The linux kernel is a kind of self-extracting executable.
++	  Several compression algorithms are available, which differ
++	  in efficiency, compression and decompression speed.
++	  Compression speed is only relevant when building a kernel.
++	  Decompression speed is relevant at each boot.
++
++	  If you have any problems with bzip2 or lzma compressed
++	  kernels, mail me (Alain Knaff) <alain@knaff.lu>. (An older
++	  version of this functionality (bzip2 only), for 2.4, was
++	  supplied by Christian Ludwig)
++
++	  High compression options are mostly useful for users, who
++	  are low on disk space (embedded systems), but for whom ram
++	  size matters less.
++
++	  If in doubt, select 'gzip'
++
++config KERNEL_GZIP
++	bool "Gzip"
++	depends on HAVE_KERNEL_GZIP
++	help
++	  The old and tried gzip compression. Its compression ratio is
++	  the poorest among the 3 choices; however its speed (both
++	  compression and decompression) is the fastest.
++
++config KERNEL_BZIP2
++	bool "Bzip2"
++	depends on HAVE_KERNEL_BZIP2
++	help
++	  Its compression ratio and speed is intermediate.
++	  Decompression speed is slowest among the three.  The kernel
++	  size is about 10% smaller with bzip2, in comparison to gzip.
++	  Bzip2 uses a large amount of memory. For modern kernels you
++	  will need at least 8MB RAM or more for booting.
++
++config KERNEL_LZMA
++	bool "LZMA"
++	depends on HAVE_KERNEL_LZMA
++	help
++	  The most recent compression algorithm.
++	  Its ratio is best, decompression speed is between the other
++	  two. Compression is slowest.	The kernel size is about 33%
++	  smaller with LZMA in comparison to gzip.
++
++endchoice
++
+ config SWAP
+ 	bool "Support for paging of anonymous memory (swap)"
+ 	depends on MMU && BLOCK
+@@ -246,6 +306,7 @@ choice
+ 
+ config CLASSIC_RCU
+ 	bool "Classic RCU"
++	depends on !PREEMPT_RT
+ 	help
+ 	  This option selects the classic RCU implementation that is
+ 	  designed for best read-side performance on non-realtime
+@@ -255,6 +316,7 @@ config CLASSIC_RCU
+ 
+ config TREE_RCU
+ 	bool "Tree-based hierarchical RCU"
++	depends on !PREEMPT_RT
+ 	help
+ 	  This option selects the RCU implementation that is
+ 	  designed for very large SMP system with hundreds or
+@@ -857,6 +919,41 @@ config AIO
+           by some high performance threaded applications. Disabling
+           this option saves about 7k.
+ 
++config HAVE_PERF_COUNTERS
++	bool
++
++menu "Performance Counters"
++
++config PERF_COUNTERS
++	bool "Kernel Performance Counters"
++	depends on HAVE_PERF_COUNTERS
++	default y
++	select ANON_INODES
++	help
++	  Enable kernel support for performance counter hardware.
++
++	  Performance counters are special hardware registers available
++	  on most modern CPUs. These registers count the number of certain
++	  types of hw events: such as instructions executed, cachemisses
++	  suffered, or branches mis-predicted - without slowing down the
++	  kernel or applications. These registers can also trigger interrupts
++	  when a threshold number of events have passed - and can thus be
++	  used to profile the code that runs on that CPU.
++
++	  The Linux Performance Counter subsystem provides an abstraction of
++	  these hardware capabilities, available via a system call. It
++	  provides per task and per CPU counters, and it provides event
++	  capabilities on top of those.
++
++	  Say Y if unsure.
++
++config EVENT_PROFILE
++	bool "Tracepoint profile sources"
++	depends on PERF_COUNTERS && EVENT_TRACER
++	default y
++
++endmenu
++
+ config VM_EVENT_COUNTERS
+ 	default y
+ 	bool "Enable VM event counters for /proc/vmstat" if EMBEDDED
+@@ -912,6 +1009,7 @@ config SLAB
+ 
+ config SLUB
+ 	bool "SLUB (Unqueued Allocator)"
++	depends on !PREEMPT_RT
+ 	help
+ 	   SLUB is a slab allocator that minimizes cache line usage
+ 	   instead of managing queues of cached objects (SLAB approach).
+@@ -922,6 +1020,8 @@ config SLUB
+ 
+ config SLOB
+ 	depends on EMBEDDED
++	# lockups observed:
++	depends on 0
+ 	bool "SLOB (Simple Allocator)"
+ 	help
+ 	   SLOB replaces the stock allocator with a drastically simpler
+@@ -945,7 +1045,7 @@ config TRACEPOINTS
+ 
+ config MARKERS
+ 	bool "Activate markers"
+-	depends on TRACEPOINTS
++	select TRACEPOINTS
+ 	help
+ 	  Place an empty function call at each marker site. Can be
+ 	  dynamically changed for a probe function.
+@@ -966,7 +1066,6 @@ config SLABINFO
+ 
+ config RT_MUTEXES
+ 	boolean
+-	select PLIST
+ 
+ config BASE_SMALL
+ 	int
+Index: linux-2.6-tip/init/do_mounts.c
+===================================================================
+--- linux-2.6-tip.orig/init/do_mounts.c
++++ linux-2.6-tip/init/do_mounts.c
+@@ -228,9 +228,13 @@ static int __init do_mount_root(char *na
+ 	return 0;
+ }
+ 
++#if PAGE_SIZE < PATH_MAX
++# error increase the fs_names allocation size here
++#endif
++
+ void __init mount_block_root(char *name, int flags)
+ {
+-	char *fs_names = __getname();
++	char *fs_names = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
+ 	char *p;
+ #ifdef CONFIG_BLOCK
+ 	char b[BDEVNAME_SIZE];
+@@ -282,7 +286,7 @@ retry:
+ #endif
+ 	panic("VFS: Unable to mount root fs on %s", b);
+ out:
+-	putname(fs_names);
++	free_pages((unsigned long)fs_names, 1);
+ }
+  
+ #ifdef CONFIG_ROOT_NFS
+Index: linux-2.6-tip/init/do_mounts_rd.c
+===================================================================
+--- linux-2.6-tip.orig/init/do_mounts_rd.c
++++ linux-2.6-tip/init/do_mounts_rd.c
+@@ -11,6 +11,9 @@
+ #include "do_mounts.h"
+ #include "../fs/squashfs/squashfs_fs.h"
+ 
++#include <linux/decompress/generic.h>
++
++
+ int __initdata rd_prompt = 1;/* 1 = prompt for RAM disk, 0 = don't prompt */
+ 
+ static int __init prompt_ramdisk(char *str)
+@@ -29,7 +32,7 @@ static int __init ramdisk_start_setup(ch
+ }
+ __setup("ramdisk_start=", ramdisk_start_setup);
+ 
+-static int __init crd_load(int in_fd, int out_fd);
++static int __init crd_load(int in_fd, int out_fd, decompress_fn deco);
+ 
+ /*
+  * This routine tries to find a RAM disk image to load, and returns the
+@@ -38,15 +41,15 @@ static int __init crd_load(int in_fd, in
+  * numbers could not be found.
+  *
+  * We currently check for the following magic numbers:
+- * 	minix
+- * 	ext2
++ *	minix
++ *	ext2
+  *	romfs
+  *	cramfs
+  *	squashfs
+- * 	gzip
++ *	gzip
+  */
+-static int __init 
+-identify_ramdisk_image(int fd, int start_block)
++static int __init
++identify_ramdisk_image(int fd, int start_block, decompress_fn *decompressor)
+ {
+ 	const int size = 512;
+ 	struct minix_super_block *minixsb;
+@@ -56,6 +59,7 @@ identify_ramdisk_image(int fd, int start
+ 	struct squashfs_super_block *squashfsb;
+ 	int nblocks = -1;
+ 	unsigned char *buf;
++	const char *compress_name;
+ 
+ 	buf = kmalloc(size, GFP_KERNEL);
+ 	if (!buf)
+@@ -69,18 +73,19 @@ identify_ramdisk_image(int fd, int start
+ 	memset(buf, 0xe5, size);
+ 
+ 	/*
+-	 * Read block 0 to test for gzipped kernel
++	 * Read block 0 to test for compressed kernel
+ 	 */
+ 	sys_lseek(fd, start_block * BLOCK_SIZE, 0);
+ 	sys_read(fd, buf, size);
+ 
+-	/*
+-	 * If it matches the gzip magic numbers, return 0
+-	 */
+-	if (buf[0] == 037 && ((buf[1] == 0213) || (buf[1] == 0236))) {
+-		printk(KERN_NOTICE
+-		       "RAMDISK: Compressed image found at block %d\n",
+-		       start_block);
++	*decompressor = decompress_method(buf, size, &compress_name);
++	if (compress_name) {
++		printk(KERN_NOTICE "RAMDISK: %s image found at block %d\n",
++		       compress_name, start_block);
++		if (!*decompressor)
++			printk(KERN_EMERG
++			       "RAMDISK: %s decompressor not configured!\n",
++			       compress_name);
+ 		nblocks = 0;
+ 		goto done;
+ 	}
+@@ -142,7 +147,7 @@ identify_ramdisk_image(int fd, int start
+ 	printk(KERN_NOTICE
+ 	       "RAMDISK: Couldn't find valid RAM disk image starting at %d.\n",
+ 	       start_block);
+-	
++
+ done:
+ 	sys_lseek(fd, start_block * BLOCK_SIZE, 0);
+ 	kfree(buf);
+@@ -157,6 +162,7 @@ int __init rd_load_image(char *from)
+ 	int nblocks, i, disk;
+ 	char *buf = NULL;
+ 	unsigned short rotate = 0;
++	decompress_fn decompressor = NULL;
+ #if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES)
+ 	char rotator[4] = { '|' , '/' , '-' , '\\' };
+ #endif
+@@ -169,12 +175,12 @@ int __init rd_load_image(char *from)
+ 	if (in_fd < 0)
+ 		goto noclose_input;
+ 
+-	nblocks = identify_ramdisk_image(in_fd, rd_image_start);
++	nblocks = identify_ramdisk_image(in_fd, rd_image_start, &decompressor);
+ 	if (nblocks < 0)
+ 		goto done;
+ 
+ 	if (nblocks == 0) {
+-		if (crd_load(in_fd, out_fd) == 0)
++		if (crd_load(in_fd, out_fd, decompressor) == 0)
+ 			goto successful_load;
+ 		goto done;
+ 	}
+@@ -200,7 +206,7 @@ int __init rd_load_image(char *from)
+ 		       nblocks, rd_blocks);
+ 		goto done;
+ 	}
+-		
++
+ 	/*
+ 	 * OK, time to copy in the data
+ 	 */
+@@ -273,138 +279,48 @@ int __init rd_load_disk(int n)
+ 	return rd_load_image("/dev/root");
+ }
+ 
+-/*
+- * gzip declarations
+- */
+-
+-#define OF(args)  args
+-
+-#ifndef memzero
+-#define memzero(s, n)     memset ((s), 0, (n))
+-#endif
+-
+-typedef unsigned char  uch;
+-typedef unsigned short ush;
+-typedef unsigned long  ulg;
+-
+-#define INBUFSIZ 4096
+-#define WSIZE 0x8000    /* window size--must be a power of two, and */
+-			/*  at least 32K for zip's deflate method */
+-
+-static uch *inbuf;
+-static uch *window;
+-
+-static unsigned insize;  /* valid bytes in inbuf */
+-static unsigned inptr;   /* index of next byte to be processed in inbuf */
+-static unsigned outcnt;  /* bytes in output buffer */
+ static int exit_code;
+-static int unzip_error;
+-static long bytes_out;
++static int decompress_error;
+ static int crd_infd, crd_outfd;
+ 
+-#define get_byte()  (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+-		
+-/* Diagnostic functions (stubbed out) */
+-#define Assert(cond,msg)
+-#define Trace(x)
+-#define Tracev(x)
+-#define Tracevv(x)
+-#define Tracec(c,x)
+-#define Tracecv(c,x)
+-
+-#define STATIC static
+-#define INIT __init
+-
+-static int  __init fill_inbuf(void);
+-static void __init flush_window(void);
+-static void __init error(char *m);
+-
+-#define NO_INFLATE_MALLOC
+-
+-#include "../lib/inflate.c"
+-
+-/* ===========================================================================
+- * Fill the input buffer. This is called only when the buffer is empty
+- * and at least one byte is really needed.
+- * Returning -1 does not guarantee that gunzip() will ever return.
+- */
+-static int __init fill_inbuf(void)
++static int __init compr_fill(void *buf, unsigned int len)
+ {
+-	if (exit_code) return -1;
+-	
+-	insize = sys_read(crd_infd, inbuf, INBUFSIZ);
+-	if (insize == 0) {
+-		error("RAMDISK: ran out of compressed data");
+-		return -1;
+-	}
+-
+-	inptr = 1;
+-
+-	return inbuf[0];
++	int r = sys_read(crd_infd, buf, len);
++	if (r < 0)
++		printk(KERN_ERR "RAMDISK: error while reading compressed data");
++	else if (r == 0)
++		printk(KERN_ERR "RAMDISK: EOF while reading compressed data");
++	return r;
+ }
+ 
+-/* ===========================================================================
+- * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+- * (Used for the decompressed data only.)
+- */
+-static void __init flush_window(void)
++static int __init compr_flush(void *window, unsigned int outcnt)
+ {
+-    ulg c = crc;         /* temporary variable */
+-    unsigned n, written;
+-    uch *in, ch;
+-    
+-    written = sys_write(crd_outfd, window, outcnt);
+-    if (written != outcnt && unzip_error == 0) {
+-	printk(KERN_ERR "RAMDISK: incomplete write (%d != %d) %ld\n",
+-	       written, outcnt, bytes_out);
+-	unzip_error = 1;
+-    }
+-    in = window;
+-    for (n = 0; n < outcnt; n++) {
+-	    ch = *in++;
+-	    c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+-    }
+-    crc = c;
+-    bytes_out += (ulg)outcnt;
+-    outcnt = 0;
++	int written = sys_write(crd_outfd, window, outcnt);
++	if (written != outcnt) {
++		if (decompress_error == 0)
++			printk(KERN_ERR
++			       "RAMDISK: incomplete write (%d != %d)\n",
++			       written, outcnt);
++		decompress_error = 1;
++		return -1;
++	}
++	return outcnt;
+ }
+ 
+ static void __init error(char *x)
+ {
+ 	printk(KERN_ERR "%s\n", x);
+ 	exit_code = 1;
+-	unzip_error = 1;
++	decompress_error = 1;
+ }
+ 
+-static int __init crd_load(int in_fd, int out_fd)
++static int __init crd_load(int in_fd, int out_fd, decompress_fn deco)
+ {
+ 	int result;
+-
+-	insize = 0;		/* valid bytes in inbuf */
+-	inptr = 0;		/* index of next byte to be processed in inbuf */
+-	outcnt = 0;		/* bytes in output buffer */
+-	exit_code = 0;
+-	bytes_out = 0;
+-	crc = (ulg)0xffffffffL; /* shift register contents */
+-
+ 	crd_infd = in_fd;
+ 	crd_outfd = out_fd;
+-	inbuf = kmalloc(INBUFSIZ, GFP_KERNEL);
+-	if (!inbuf) {
+-		printk(KERN_ERR "RAMDISK: Couldn't allocate gzip buffer\n");
+-		return -1;
+-	}
+-	window = kmalloc(WSIZE, GFP_KERNEL);
+-	if (!window) {
+-		printk(KERN_ERR "RAMDISK: Couldn't allocate gzip window\n");
+-		kfree(inbuf);
+-		return -1;
+-	}
+-	makecrc();
+-	result = gunzip();
+-	if (unzip_error)
++	result = deco(NULL, 0, compr_fill, compr_flush, NULL, NULL, error);
++	if (decompress_error)
+ 		result = 1;
+-	kfree(inbuf);
+-	kfree(window);
+ 	return result;
+ }
+Index: linux-2.6-tip/init/initramfs.c
+===================================================================
+--- linux-2.6-tip.orig/init/initramfs.c
++++ linux-2.6-tip/init/initramfs.c
+@@ -390,11 +390,13 @@ static int __init write_buffer(char *buf
+ 	return len - count;
+ }
+ 
+-static void __init flush_buffer(char *buf, unsigned len)
++static int __init flush_buffer(void *bufv, unsigned len)
+ {
++	char *buf = (char *) bufv;
+ 	int written;
++	int origLen = len;
+ 	if (message)
+-		return;
++		return -1;
+ 	while ((written = write_buffer(buf, len)) < len && !message) {
+ 		char c = buf[written];
+ 		if (c == '0') {
+@@ -408,84 +410,28 @@ static void __init flush_buffer(char *bu
+ 		} else
+ 			error("junk in compressed archive");
+ 	}
++	return origLen;
+ }
+ 
+-/*
+- * gzip declarations
+- */
++static unsigned my_inptr;   /* index of next byte to be processed in inbuf */
+ 
+-#define OF(args)  args
+-
+-#ifndef memzero
+-#define memzero(s, n)     memset ((s), 0, (n))
+-#endif
+-
+-typedef unsigned char  uch;
+-typedef unsigned short ush;
+-typedef unsigned long  ulg;
+-
+-#define WSIZE 0x8000    /* window size--must be a power of two, and */
+-			/*  at least 32K for zip's deflate method */
+-
+-static uch *inbuf;
+-static uch *window;
+-
+-static unsigned insize;  /* valid bytes in inbuf */
+-static unsigned inptr;   /* index of next byte to be processed in inbuf */
+-static unsigned outcnt;  /* bytes in output buffer */
+-static long bytes_out;
+-
+-#define get_byte()  (inptr < insize ? inbuf[inptr++] : -1)
+-		
+-/* Diagnostic functions (stubbed out) */
+-#define Assert(cond,msg)
+-#define Trace(x)
+-#define Tracev(x)
+-#define Tracevv(x)
+-#define Tracec(c,x)
+-#define Tracecv(c,x)
+-
+-#define STATIC static
+-#define INIT __init
+-
+-static void __init flush_window(void);
+-static void __init error(char *m);
+-
+-#define NO_INFLATE_MALLOC
+-
+-#include "../lib/inflate.c"
+-
+-/* ===========================================================================
+- * Write the output window window[0..outcnt-1] and update crc and bytes_out.
+- * (Used for the decompressed data only.)
+- */
+-static void __init flush_window(void)
+-{
+-	ulg c = crc;         /* temporary variable */
+-	unsigned n;
+-	uch *in, ch;
+-
+-	flush_buffer(window, outcnt);
+-	in = window;
+-	for (n = 0; n < outcnt; n++) {
+-		ch = *in++;
+-		c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+-	}
+-	crc = c;
+-	bytes_out += (ulg)outcnt;
+-	outcnt = 0;
+-}
++#include <linux/decompress/generic.h>
+ 
+ static char * __init unpack_to_rootfs(char *buf, unsigned len, int check_only)
+ {
+ 	int written;
++	decompress_fn decompress;
++	const char *compress_name;
++	static __initdata char msg_buf[64];
++
+ 	dry_run = check_only;
+ 	header_buf = kmalloc(110, GFP_KERNEL);
+ 	symlink_buf = kmalloc(PATH_MAX + N_ALIGN(PATH_MAX) + 1, GFP_KERNEL);
+ 	name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL);
+-	window = kmalloc(WSIZE, GFP_KERNEL);
+-	if (!window || !header_buf || !symlink_buf || !name_buf)
++
++	if (!header_buf || !symlink_buf || !name_buf)
+ 		panic("can't allocate buffers");
++
+ 	state = Start;
+ 	this_header = 0;
+ 	message = NULL;
+@@ -505,22 +451,25 @@ static char * __init unpack_to_rootfs(ch
+ 			continue;
+ 		}
+ 		this_header = 0;
+-		insize = len;
+-		inbuf = buf;
+-		inptr = 0;
+-		outcnt = 0;		/* bytes in output buffer */
+-		bytes_out = 0;
+-		crc = (ulg)0xffffffffL; /* shift register contents */
+-		makecrc();
+-		gunzip();
++		decompress = decompress_method(buf, len, &compress_name);
++		if (decompress)
++			decompress(buf, len, NULL, flush_buffer, NULL,
++				   &my_inptr, error);
++		else if (compress_name) {
++			if (!message) {
++				snprintf(msg_buf, sizeof msg_buf,
++					 "compression method %s not configured",
++					 compress_name);
++				message = msg_buf;
++			}
++		}
+ 		if (state != Reset)
+-			error("junk in gzipped archive");
+-		this_header = saved_offset + inptr;
+-		buf += inptr;
+-		len -= inptr;
++			error("junk in compressed archive");
++		this_header = saved_offset + my_inptr;
++		buf += my_inptr;
++		len -= my_inptr;
+ 	}
+ 	dir_utime();
+-	kfree(window);
+ 	kfree(name_buf);
+ 	kfree(symlink_buf);
+ 	kfree(header_buf);
+@@ -579,7 +528,7 @@ static int __init populate_rootfs(void)
+ 	char *err = unpack_to_rootfs(__initramfs_start,
+ 			 __initramfs_end - __initramfs_start, 0);
+ 	if (err)
+-		panic(err);
++		panic(err);	/* Failed to decompress INTERNAL initramfs */
+ 	if (initrd_start) {
+ #ifdef CONFIG_BLK_DEV_RAM
+ 		int fd;
+@@ -605,9 +554,12 @@ static int __init populate_rootfs(void)
+ 		printk(KERN_INFO "Unpacking initramfs...");
+ 		err = unpack_to_rootfs((char *)initrd_start,
+ 			initrd_end - initrd_start, 0);
+-		if (err)
+-			panic(err);
+-		printk(" done\n");
++		if (err) {
++			printk(" failed!\n");
++			printk(KERN_EMERG "%s\n", err);
++		} else {
++			printk(" done\n");
++		}
+ 		free_initrd();
+ #endif
+ 	}
+Index: linux-2.6-tip/init/main.c
+===================================================================
+--- linux-2.6-tip.orig/init/main.c
++++ linux-2.6-tip/init/main.c
+@@ -14,6 +14,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/kernel.h>
+ #include <linux/syscalls.h>
++#include <linux/stackprotector.h>
+ #include <linux/string.h>
+ #include <linux/ctype.h>
+ #include <linux/delay.h>
+@@ -35,6 +36,7 @@
+ #include <linux/workqueue.h>
+ #include <linux/profile.h>
+ #include <linux/rcupdate.h>
++#include <linux/posix-timers.h>
+ #include <linux/moduleparam.h>
+ #include <linux/kallsyms.h>
+ #include <linux/writeback.h>
+@@ -48,6 +50,7 @@
+ #include <linux/delayacct.h>
+ #include <linux/unistd.h>
+ #include <linux/rmap.h>
++#include <linux/irq.h>
+ #include <linux/mempolicy.h>
+ #include <linux/key.h>
+ #include <linux/buffer_head.h>
+@@ -61,6 +64,7 @@
+ #include <linux/sched.h>
+ #include <linux/signal.h>
+ #include <linux/idr.h>
++#include <linux/kmemcheck.h>
+ #include <linux/ftrace.h>
+ #include <linux/async.h>
+ #include <trace/boot.h>
+@@ -70,6 +74,7 @@
+ #include <asm/setup.h>
+ #include <asm/sections.h>
+ #include <asm/cacheflush.h>
++#include <trace/kmemtrace.h>
+ 
+ #ifdef CONFIG_X86_LOCAL_APIC
+ #include <asm/smp.h>
+@@ -135,14 +140,14 @@ unsigned int __initdata setup_max_cpus =
+  * greater than 0, limits the maximum number of CPUs activated in
+  * SMP mode to <NUM>.
+  */
+-#ifndef CONFIG_X86_IO_APIC
+-static inline void disable_ioapic_setup(void) {};
+-#endif
++
++void __weak arch_disable_smp_support(void) { }
+ 
+ static int __init nosmp(char *str)
+ {
+ 	setup_max_cpus = 0;
+-	disable_ioapic_setup();
++	arch_disable_smp_support();
++
+ 	return 0;
+ }
+ 
+@@ -152,14 +157,14 @@ static int __init maxcpus(char *str)
+ {
+ 	get_option(&str, &setup_max_cpus);
+ 	if (setup_max_cpus == 0)
+-		disable_ioapic_setup();
++		arch_disable_smp_support();
+ 
+ 	return 0;
+ }
+ 
+ early_param("maxcpus", maxcpus);
+ #else
+-#define setup_max_cpus NR_CPUS
++const unsigned int setup_max_cpus = NR_CPUS;
+ #endif
+ 
+ /*
+@@ -452,6 +457,8 @@ static noinline void __init_refok rest_i
+ {
+ 	int pid;
+ 
++	system_state = SYSTEM_BOOTING_SCHEDULER_OK;
++
+ 	kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);
+ 	numa_default_policy();
+ 	pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
+@@ -464,7 +471,7 @@ static noinline void __init_refok rest_i
+ 	 */
+ 	init_idle_bootup_task(current);
+ 	rcu_scheduler_starting();
+-	preempt_enable_no_resched();
++	__preempt_enable_no_resched();
+ 	schedule();
+ 	preempt_disable();
+ 
+@@ -540,6 +547,12 @@ asmlinkage void __init start_kernel(void
+ 	 */
+ 	lockdep_init();
+ 	debug_objects_early_init();
++
++	/*
++	 * Set up the the initial canary ASAP:
++	 */
++	boot_init_stack_canary();
++
+ 	cgroup_init_early();
+ 
+ 	local_irq_disable();
+@@ -574,8 +587,10 @@ asmlinkage void __init start_kernel(void
+ 	 * fragile until we cpu_idle() for the first time.
+ 	 */
+ 	preempt_disable();
++
+ 	build_all_zonelists();
+ 	page_alloc_init();
++	early_init_hardirqs();
+ 	printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line);
+ 	parse_early_param();
+ 	parse_args("Booting kernel", static_command_line, __start___param,
+@@ -642,6 +657,7 @@ asmlinkage void __init start_kernel(void
+ 	enable_debug_pagealloc();
+ 	cpu_hotplug_init();
+ 	kmem_cache_init();
++	kmemtrace_init();
+ 	debug_objects_mem_init();
+ 	idr_init_cache();
+ 	setup_per_cpu_pageset();
+@@ -683,6 +699,9 @@ asmlinkage void __init start_kernel(void
+ 
+ 	ftrace_init();
+ 
++#ifdef CONFIG_PREEMPT_RT
++	WARN_ON(irqs_disabled());
++#endif
+ 	/* Do the rest non-__init'ed, we're now alive */
+ 	rest_init();
+ }
+@@ -763,6 +782,7 @@ static void __init do_basic_setup(void)
+ {
+ 	rcu_init_sched(); /* needed by module_init stage. */
+ 	init_workqueues();
++	cpuset_init_smp();
+ 	usermodehelper_init();
+ 	driver_init();
+ 	init_irq_proc();
+@@ -772,9 +792,14 @@ static void __init do_basic_setup(void)
+ static void __init do_pre_smp_initcalls(void)
+ {
+ 	initcall_t *call;
++	extern int spawn_desched_task(void);
++
++	/* kmemcheck must initialize before all early initcalls: */
++	kmemcheck_init();
+ 
+ 	for (call = __initcall_start; call < __early_initcall_end; call++)
+ 		do_one_initcall(*call);
++	spawn_desched_task();
+ }
+ 
+ static void run_init_process(char *init_filename)
+@@ -809,6 +834,9 @@ static noinline int init_post(void)
+ 		printk(KERN_WARNING "Failed to execute %s\n",
+ 				ramdisk_execute_command);
+ 	}
++#ifdef CONFIG_PREEMPT_RT
++	WARN_ON(irqs_disabled());
++#endif
+ 
+ 	/*
+ 	 * We try each of these until one succeeds.
+@@ -850,14 +878,14 @@ static int __init kernel_init(void * unu
+ 
+ 	smp_prepare_cpus(setup_max_cpus);
+ 
++	init_hardirqs();
++
+ 	do_pre_smp_initcalls();
+ 	start_boot_trace();
+ 
+ 	smp_init();
+ 	sched_init_smp();
+ 
+-	cpuset_init_smp();
+-
+ 	do_basic_setup();
+ 
+ 	/*
+@@ -872,7 +900,57 @@ static int __init kernel_init(void * unu
+ 		ramdisk_execute_command = NULL;
+ 		prepare_namespace();
+ 	}
++#ifdef CONFIG_PREEMPT_RT
++	WARN_ON(irqs_disabled());
++#endif
+ 
++#define DEBUG_COUNT (defined(CONFIG_DEBUG_RT_MUTEXES) + defined(CONFIG_IRQSOFF_TRACER) + defined(CONFIG_PREEMPT_TRACER) + defined(CONFIG_STACK_TRACER) + defined(CONFIG_INTERRUPT_OFF_HIST) + defined(CONFIG_PREEMPT_OFF_HIST) + defined(CONFIG_DEBUG_SLAB) + defined(CONFIG_DEBUG_PAGEALLOC) + defined(CONFIG_LOCKDEP) + (defined(CONFIG_FTRACE) - defined(CONFIG_FTRACE_MCOUNT_RECORD)))
++
++#if DEBUG_COUNT > 0
++	printk(KERN_ERR "*****************************************************************************\n");
++	printk(KERN_ERR "*                                                                           *\n");
++#if DEBUG_COUNT == 1
++	printk(KERN_ERR "*  REMINDER, the following debugging option is turned on in your .config:   *\n");
++#else
++	printk(KERN_ERR "*  REMINDER, the following debugging options are turned on in your .config: *\n");
++#endif
++	printk(KERN_ERR "*                                                                           *\n");
++#ifdef CONFIG_FTRACE
++	printk(KERN_ERR "*        CONFIG_FTRACE                                                      *\n");
++#endif
++#ifdef CONFIG_DEBUG_RT_MUTEXES
++	printk(KERN_ERR "*        CONFIG_DEBUG_RT_MUTEXES                                            *\n");
++#endif
++#ifdef CONFIG_IRQSOFF_TRACER
++	printk(KERN_ERR "*        CONFIG_IRQSOFF_TRACER                                              *\n");
++#endif
++#ifdef CONFIG_PREEMPT_TRACER
++	printk(KERN_ERR "*        CONFIG_PREEMPT_TRACER                                              *\n");
++#endif
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++	printk(KERN_ERR "*        CONFIG_INTERRUPT_OFF_HIST                                          *\n");
++#endif
++#ifdef CONFIG_PREEMPT_OFF_HIST
++	printk(KERN_ERR "*        CONFIG_PREEMPT_OFF_HIST                                            *\n");
++#endif
++#ifdef CONFIG_DEBUG_SLAB
++	printk(KERN_ERR "*        CONFIG_DEBUG_SLAB                                                  *\n");
++#endif
++#ifdef CONFIG_DEBUG_PAGEALLOC
++	printk(KERN_ERR "*        CONFIG_DEBUG_PAGEALLOC                                             *\n");
++#endif
++#ifdef CONFIG_LOCKDEP
++	printk(KERN_ERR "*        CONFIG_LOCKDEP                                                     *\n");
++#endif
++	printk(KERN_ERR "*                                                                           *\n");
++#if DEBUG_COUNT == 1
++	printk(KERN_ERR "*  it may increase runtime overhead and latencies.                          *\n");
++#else
++	printk(KERN_ERR "*  they may increase runtime overhead and latencies.                        *\n");
++#endif
++	printk(KERN_ERR "*                                                                           *\n");
++	printk(KERN_ERR "*****************************************************************************\n");
++#endif
+ 	/*
+ 	 * Ok, we have completed the initial bootup, and
+ 	 * we're essentially up and running. Get rid of the
+@@ -880,5 +958,7 @@ static int __init kernel_init(void * unu
+ 	 */
+ 
+ 	init_post();
++	WARN_ON(debug_direct_keyboard);
++
+ 	return 0;
+ }
+Index: linux-2.6-tip/kernel/Makefile
+===================================================================
+--- linux-2.6-tip.orig/kernel/Makefile
++++ linux-2.6-tip/kernel/Makefile
+@@ -7,7 +7,7 @@ obj-y     = sched.o fork.o exec_domain.o
+ 	    sysctl.o capability.o ptrace.o timer.o user.o \
+ 	    signal.o sys.o kmod.o workqueue.o pid.o \
+ 	    rcupdate.o extable.o params.o posix-timers.o \
+-	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
++	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o \
+ 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
+ 	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
+ 	    async.o
+@@ -27,7 +27,10 @@ obj-$(CONFIG_PROFILING) += profile.o
+ obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
+ obj-$(CONFIG_STACKTRACE) += stacktrace.o
+ obj-y += time/
++ifneq ($(CONFIG_PREEMPT_RT),y)
++obj-y += mutex.o
+ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o
++endif
+ obj-$(CONFIG_LOCKDEP) += lockdep.o
+ ifeq ($(CONFIG_PROC_FS),y)
+ obj-$(CONFIG_LOCKDEP) += lockdep_proc.o
+@@ -39,6 +42,7 @@ endif
+ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
+ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
+ obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
++obj-$(CONFIG_PREEMPT_RT) += rt.o
+ obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
+ obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o
+ ifneq ($(CONFIG_SMP),y)
+@@ -74,6 +78,7 @@ obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
+ obj-$(CONFIG_KPROBES) += kprobes.o
+ obj-$(CONFIG_KGDB) += kgdb.o
+ obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
++obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
+ obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
+ obj-$(CONFIG_SECCOMP) += seccomp.o
+ obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
+@@ -93,6 +98,7 @@ obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) 
+ obj-$(CONFIG_FUNCTION_TRACER) += trace/
+ obj-$(CONFIG_TRACING) += trace/
+ obj-$(CONFIG_SMP) += sched_cpupri.o
++obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o
+ 
+ ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
+ # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
+Index: linux-2.6-tip/kernel/auditsc.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/auditsc.c
++++ linux-2.6-tip/kernel/auditsc.c
+@@ -741,6 +741,7 @@ void audit_filter_inodes(struct task_str
+ 	rcu_read_unlock();
+ }
+ 
++#ifdef CONFIG_AUDIT_TREE
+ static void audit_set_auditable(struct audit_context *ctx)
+ {
+ 	if (!ctx->prio) {
+@@ -748,6 +749,7 @@ static void audit_set_auditable(struct a
+ 		ctx->current_state = AUDIT_RECORD_CONTEXT;
+ 	}
+ }
++#endif
+ 
+ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
+ 						      int return_valid,
+Index: linux-2.6-tip/kernel/compat.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/compat.c
++++ linux-2.6-tip/kernel/compat.c
+@@ -882,6 +882,17 @@ compat_sys_rt_sigtimedwait (compat_sigse
+ 
+ }
+ 
++asmlinkage long
++compat_sys_rt_tgsigqueueinfo(compat_pid_t tgid, compat_pid_t pid, int sig,
++			     struct compat_siginfo __user *uinfo)
++{
++	siginfo_t info;
++
++	if (copy_siginfo_from_user32(&info, uinfo))
++		return -EFAULT;
++	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
++}
++
+ #ifdef __ARCH_WANT_COMPAT_SYS_TIME
+ 
+ /* compat_time_t is a 32 bit "long" and needs to get converted. */
+Index: linux-2.6-tip/kernel/exit.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/exit.c
++++ linux-2.6-tip/kernel/exit.c
+@@ -75,7 +75,9 @@ static void __unhash_process(struct task
+ 		detach_pid(p, PIDTYPE_SID);
+ 
+ 		list_del_rcu(&p->tasks);
++		preempt_disable();
+ 		__get_cpu_var(process_counts)--;
++		preempt_enable();
+ 	}
+ 	list_del_rcu(&p->thread_group);
+ 	list_del_init(&p->sibling);
+@@ -138,7 +140,7 @@ static void __exit_signal(struct task_st
+ 	 * Do this under ->siglock, we can race with another thread
+ 	 * doing sigqueue_free() if we have SIGQUEUE_PREALLOC signals.
+ 	 */
+-	flush_sigqueue(&tsk->pending);
++	flush_task_sigqueue(tsk);
+ 
+ 	tsk->signal = NULL;
+ 	tsk->sighand = NULL;
+@@ -162,6 +164,9 @@ static void delayed_put_task_struct(stru
+ {
+ 	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+ 
++#ifdef CONFIG_PERF_COUNTERS
++	WARN_ON_ONCE(!list_empty(&tsk->perf_counter_ctx.counter_list));
++#endif
+ 	trace_sched_process_free(tsk);
+ 	put_task_struct(tsk);
+ }
+@@ -694,9 +699,11 @@ static void exit_mm(struct task_struct *
+ 	task_lock(tsk);
+ 	tsk->mm = NULL;
+ 	up_read(&mm->mmap_sem);
++	preempt_disable(); // FIXME
+ 	enter_lazy_tlb(mm, current);
+ 	/* We don't want this task to be frozen prematurely */
+ 	clear_freeze_flag(tsk);
++	preempt_enable();
+ 	task_unlock(tsk);
+ 	mm_update_next_owner(mm);
+ 	mmput(mm);
+@@ -945,12 +952,9 @@ static void check_stack_usage(void)
+ {
+ 	static DEFINE_SPINLOCK(low_water_lock);
+ 	static int lowest_to_date = THREAD_SIZE;
+-	unsigned long *n = end_of_stack(current);
+ 	unsigned long free;
+ 
+-	while (*n == 0)
+-		n++;
+-	free = (unsigned long)n - (unsigned long)end_of_stack(current);
++	free = stack_not_used(current);
+ 
+ 	if (free >= lowest_to_date)
+ 		return;
+@@ -1061,10 +1065,6 @@ NORET_TYPE void do_exit(long code)
+ 	tsk->mempolicy = NULL;
+ #endif
+ #ifdef CONFIG_FUTEX
+-	/*
+-	 * This must happen late, after the PID is not
+-	 * hashed anymore:
+-	 */
+ 	if (unlikely(!list_empty(&tsk->pi_state_list)))
+ 		exit_pi_state_list(tsk);
+ 	if (unlikely(current->pi_state_cache))
+@@ -1087,14 +1087,17 @@ NORET_TYPE void do_exit(long code)
+ 	if (tsk->splice_pipe)
+ 		__free_pipe_info(tsk->splice_pipe);
+ 
+-	preempt_disable();
++again:
++	local_irq_disable();
+ 	/* causes final put_task_struct in finish_task_switch(). */
+ 	tsk->state = TASK_DEAD;
+-	schedule();
+-	BUG();
+-	/* Avoid "noreturn function does return".  */
+-	for (;;)
+-		cpu_relax();	/* For when BUG is null */
++	__schedule();
++	printk(KERN_ERR "BUG: dead task %s:%d back from the grave!\n",
++		current->comm, current->pid);
++	printk(KERN_ERR ".... flags: %08x, count: %d, state: %08lx\n",
++		current->flags, atomic_read(&current->usage), current->state);
++	printk(KERN_ERR ".... trying again ...\n");
++	goto again;
+ }
+ 
+ EXPORT_SYMBOL_GPL(do_exit);
+@@ -1331,6 +1334,12 @@ static int wait_task_zombie(struct task_
+ 	 */
+ 	read_unlock(&tasklist_lock);
+ 
++	/*
++	 * Flush inherited counters to the parent - before the parent
++	 * gets woken up by child-exit notifications.
++	 */
++	perf_counter_exit_task(p);
++
+ 	retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
+ 	status = (p->signal->flags & SIGNAL_GROUP_EXIT)
+ 		? p->signal->group_exit_code : p->exit_code;
+@@ -1537,6 +1546,7 @@ static int wait_consider_task(struct tas
+ 			      int __user *stat_addr, struct rusage __user *ru)
+ {
+ 	int ret = eligible_child(type, pid, options, p);
++			BUG_ON(!atomic_read(&p->usage));
+ 	if (!ret)
+ 		return ret;
+ 
+Index: linux-2.6-tip/kernel/extable.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/extable.c
++++ linux-2.6-tip/kernel/extable.c
+@@ -15,11 +15,22 @@
+     along with this program; if not, write to the Free Software
+     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
++#include <linux/ftrace.h>
++#include <linux/memory.h>
+ #include <linux/module.h>
++#include <linux/mutex.h>
+ #include <linux/init.h>
+-#include <linux/ftrace.h>
+-#include <asm/uaccess.h>
++
+ #include <asm/sections.h>
++#include <asm/uaccess.h>
++
++/*
++ * mutex protecting text section modification (dynamic code patching).
++ * some users need to sleep (allocating memory...) while they hold this lock.
++ *
++ * NOT exported to modules - patching kernel text is a really delicate matter.
++ */
++DEFINE_MUTEX(text_mutex);
+ 
+ extern struct exception_table_entry __start___ex_table[];
+ extern struct exception_table_entry __stop___ex_table[];
+@@ -41,24 +52,43 @@ const struct exception_table_entry *sear
+ 	return e;
+ }
+ 
+-__notrace_funcgraph int core_kernel_text(unsigned long addr)
++static inline int init_kernel_text(unsigned long addr)
++{
++	if (addr >= (unsigned long)_sinittext &&
++	    addr <= (unsigned long)_einittext)
++		return 1;
++	return 0;
++}
++
++int core_kernel_text(unsigned long addr)
+ {
+ 	if (addr >= (unsigned long)_stext &&
+ 	    addr <= (unsigned long)_etext)
+ 		return 1;
+ 
+ 	if (system_state == SYSTEM_BOOTING &&
+-	    addr >= (unsigned long)_sinittext &&
+-	    addr <= (unsigned long)_einittext)
++	    init_kernel_text(addr))
+ 		return 1;
+ 	return 0;
+ }
+ 
+-__notrace_funcgraph int __kernel_text_address(unsigned long addr)
++int __kernel_text_address(unsigned long addr)
+ {
+ 	if (core_kernel_text(addr))
+ 		return 1;
+-	return __module_text_address(addr) != NULL;
++	if (__module_text_address(addr))
++		return 1;
++	/*
++	 * There might be init symbols in saved stacktraces.
++	 * Give those symbols a chance to be printed in
++	 * backtraces (such as lockdep traces).
++	 *
++	 * Since we are after the module-symbols check, there's
++	 * no danger of address overlap:
++	 */
++	if (init_kernel_text(addr))
++		return 1;
++	return 0;
+ }
+ 
+ int kernel_text_address(unsigned long addr)
+Index: linux-2.6-tip/kernel/fork.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/fork.c
++++ linux-2.6-tip/kernel/fork.c
+@@ -39,6 +39,7 @@
+ #include <linux/syscalls.h>
+ #include <linux/jiffies.h>
+ #include <linux/tracehook.h>
++#include <linux/interrupt.h>
+ #include <linux/futex.h>
+ #include <linux/compat.h>
+ #include <linux/task_io_accounting_ops.h>
+@@ -49,6 +50,8 @@
+ #include <linux/memcontrol.h>
+ #include <linux/ftrace.h>
+ #include <linux/profile.h>
++#include <linux/kthread.h>
++#include <linux/notifier.h>
+ #include <linux/rmap.h>
+ #include <linux/acct.h>
+ #include <linux/tsacct_kern.h>
+@@ -61,6 +64,7 @@
+ #include <linux/proc_fs.h>
+ #include <linux/blkdev.h>
+ #include <trace/sched.h>
++#include <linux/magic.h>
+ 
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+@@ -79,10 +83,23 @@ int max_threads;		/* tunable limit on nr
+ 
+ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
+ 
++#ifdef CONFIG_PREEMPT_RT
++DEFINE_RWLOCK(tasklist_lock);  /* outer */
++#else
+ __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
++#endif
+ 
+ DEFINE_TRACE(sched_process_fork);
+ 
++/*
++ * Delayed mmdrop. In the PREEMPT_RT case we
++ * dont want to do this from the scheduling
++ * context.
++ */
++static DEFINE_PER_CPU(struct task_struct *, desched_task);
++
++static DEFINE_PER_CPU(struct list_head, delayed_drop_list);
++
+ int nr_processes(void)
+ {
+ 	int cpu;
+@@ -159,6 +176,16 @@ void __put_task_struct(struct task_struc
+ 		free_task(tsk);
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++void __put_task_struct_cb(struct rcu_head *rhp)
++{
++	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
++
++	__put_task_struct(tsk);
++
++}
++#endif
++
+ /*
+  * macro override instead of weak attribute alias, to workaround
+  * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
+@@ -169,6 +196,8 @@ void __put_task_struct(struct task_struc
+ 
+ void __init fork_init(unsigned long mempages)
+ {
++	int i;
++
+ #ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+ #ifndef ARCH_MIN_TASKALIGN
+ #define ARCH_MIN_TASKALIGN	L1_CACHE_BYTES
+@@ -176,7 +205,7 @@ void __init fork_init(unsigned long memp
+ 	/* create a slab on which task_structs can be allocated */
+ 	task_struct_cachep =
+ 		kmem_cache_create("task_struct", sizeof(struct task_struct),
+-			ARCH_MIN_TASKALIGN, SLAB_PANIC, NULL);
++			ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
+ #endif
+ 
+ 	/* do the arch specific task caches init */
+@@ -199,6 +228,9 @@ void __init fork_init(unsigned long memp
+ 	init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
+ 	init_task.signal->rlim[RLIMIT_SIGPENDING] =
+ 		init_task.signal->rlim[RLIMIT_NPROC];
++
++	for (i = 0; i < NR_CPUS; i++)
++		INIT_LIST_HEAD(&per_cpu(delayed_drop_list, i));
+ }
+ 
+ int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
+@@ -212,6 +244,8 @@ static struct task_struct *dup_task_stru
+ {
+ 	struct task_struct *tsk;
+ 	struct thread_info *ti;
++	unsigned long *stackend;
++
+ 	int err;
+ 
+ 	prepare_to_copy(orig);
+@@ -237,6 +271,8 @@ static struct task_struct *dup_task_stru
+ 		goto out;
+ 
+ 	setup_thread_stack(tsk, orig);
++	stackend = end_of_stack(tsk);
++	*stackend = STACK_END_MAGIC;	/* for overflow detection */
+ 
+ #ifdef CONFIG_CC_STACKPROTECTOR
+ 	tsk->stack_canary = get_random_int();
+@@ -276,6 +312,7 @@ static int dup_mmap(struct mm_struct *mm
+ 	mm->locked_vm = 0;
+ 	mm->mmap = NULL;
+ 	mm->mmap_cache = NULL;
++	INIT_LIST_HEAD(&mm->delayed_drop);
+ 	mm->free_area_cache = oldmm->mmap_base;
+ 	mm->cached_hole_size = ~0UL;
+ 	mm->map_count = 0;
+@@ -639,6 +676,9 @@ static int copy_mm(unsigned long clone_f
+ 
+ 	tsk->min_flt = tsk->maj_flt = 0;
+ 	tsk->nvcsw = tsk->nivcsw = 0;
++#ifdef CONFIG_DETECT_HUNG_TASK
++	tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
++#endif
+ 
+ 	tsk->mm = NULL;
+ 	tsk->active_mm = NULL;
+@@ -901,6 +941,9 @@ static void rt_mutex_init_task(struct ta
+ #ifdef CONFIG_RT_MUTEXES
+ 	plist_head_init(&p->pi_waiters, &p->pi_lock);
+ 	p->pi_blocked_on = NULL;
++# ifdef CONFIG_DEBUG_RT_MUTEXES
++	p->last_kernel_lock = NULL;
++# endif
+ #endif
+ }
+ 
+@@ -972,6 +1015,7 @@ static struct task_struct *copy_process(
+ 		goto fork_out;
+ 
+ 	rt_mutex_init_task(p);
++	perf_counter_init_task(p);
+ 
+ #ifdef CONFIG_PROVE_LOCKING
+ 	DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
+@@ -1018,6 +1062,7 @@ static struct task_struct *copy_process(
+ 
+ 	clear_tsk_thread_flag(p, TIF_SIGPENDING);
+ 	init_sigpending(&p->pending);
++	p->sigqueue_cache = NULL;
+ 
+ 	p->utime = cputime_zero;
+ 	p->stime = cputime_zero;
+@@ -1029,16 +1074,11 @@ static struct task_struct *copy_process(
+ 
+ 	p->default_timer_slack_ns = current->timer_slack_ns;
+ 
+-#ifdef CONFIG_DETECT_SOFTLOCKUP
+-	p->last_switch_count = 0;
+-	p->last_switch_timestamp = 0;
+-#endif
+-
+ 	task_io_accounting_init(&p->ioac);
+ 	acct_clear_integrals(p);
+ 
+ 	posix_cpu_timers_init(p);
+-
++	p->posix_timer_list = NULL;
+ 	p->lock_depth = -1;		/* -1 = no lock */
+ 	do_posix_clock_monotonic_gettime(&p->start_time);
+ 	p->real_start_time = p->start_time;
+@@ -1074,6 +1114,7 @@ static struct task_struct *copy_process(
+ 	p->hardirq_context = 0;
+ 	p->softirq_context = 0;
+ #endif
++	p->pagefault_disabled = 0;
+ #ifdef CONFIG_LOCKDEP
+ 	p->lockdep_depth = 0; /* no locks held yet */
+ 	p->curr_chain_key = 0;
+@@ -1111,6 +1152,9 @@ static struct task_struct *copy_process(
+ 	retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
+ 	if (retval)
+ 		goto bad_fork_cleanup_io;
++#ifdef CONFIG_DEBUG_PREEMPT
++	atomic_set(&p->lock_count, 0);
++#endif
+ 
+ 	if (pid != &init_struct_pid) {
+ 		retval = -ENOMEM;
+@@ -1150,6 +1194,7 @@ static struct task_struct *copy_process(
+ #endif
+ 	INIT_LIST_HEAD(&p->pi_state_list);
+ 	p->pi_state_cache = NULL;
++	p->futex_wakeup = NULL;
+ #endif
+ 	/*
+ 	 * sigaltstack should be cleared when sharing the same VM
+@@ -1197,11 +1242,13 @@ static struct task_struct *copy_process(
+ 	 * to ensure it is on a valid CPU (and if not, just force it back to
+ 	 * parent's CPU). This avoids alot of nasty races.
+ 	 */
++	preempt_disable();
+ 	p->cpus_allowed = current->cpus_allowed;
+ 	p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
+ 	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
+ 			!cpu_online(task_cpu(p))))
+ 		set_task_cpu(p, smp_processor_id());
++	preempt_enable();
+ 
+ 	/* CLONE_PARENT re-uses the old parent */
+ 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
+@@ -1251,7 +1298,9 @@ static struct task_struct *copy_process(
+ 			attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
+ 			attach_pid(p, PIDTYPE_SID, task_session(current));
+ 			list_add_tail_rcu(&p->tasks, &init_task.tasks);
++			preempt_disable();
+ 			__get_cpu_var(process_counts)++;
++			preempt_enable();
+ 		}
+ 		attach_pid(p, PIDTYPE_PID, pid);
+ 		nr_threads++;
+@@ -1457,20 +1506,20 @@ void __init proc_caches_init(void)
+ {
+ 	sighand_cachep = kmem_cache_create("sighand_cache",
+ 			sizeof(struct sighand_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
+-			sighand_ctor);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
++			SLAB_NOTRACK, sighand_ctor);
+ 	signal_cachep = kmem_cache_create("signal_cache",
+ 			sizeof(struct signal_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+ 	files_cachep = kmem_cache_create("files_cache",
+ 			sizeof(struct files_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+ 	fs_cachep = kmem_cache_create("fs_cache",
+ 			sizeof(struct fs_struct), 0,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+ 	mm_cachep = kmem_cache_create("mm_struct",
+ 			sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
+-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
++			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
+ 	mmap_init();
+ }
+ 
+@@ -1726,3 +1775,138 @@ int unshare_files(struct files_struct **
+ 	task_unlock(task);
+ 	return 0;
+ }
++
++static int mmdrop_complete(void)
++{
++	struct list_head *head;
++	int ret = 0;
++
++	head = &get_cpu_var(delayed_drop_list);
++	while (!list_empty(head)) {
++		struct mm_struct *mm = list_entry(head->next,
++					struct mm_struct, delayed_drop);
++		list_del(&mm->delayed_drop);
++		put_cpu_var(delayed_drop_list);
++
++		__mmdrop(mm);
++		ret = 1;
++
++		head = &get_cpu_var(delayed_drop_list);
++	}
++	put_cpu_var(delayed_drop_list);
++
++	return ret;
++}
++
++/*
++ * We dont want to do complex work from the scheduler, thus
++ * we delay the work to a per-CPU worker thread:
++ */
++void  __mmdrop_delayed(struct mm_struct *mm)
++{
++	struct task_struct *desched_task;
++	struct list_head *head;
++
++	head = &get_cpu_var(delayed_drop_list);
++	list_add_tail(&mm->delayed_drop, head);
++	desched_task = __get_cpu_var(desched_task);
++	if (desched_task)
++		wake_up_process(desched_task);
++	put_cpu_var(delayed_drop_list);
++}
++
++static void takeover_delayed_drop(int hotcpu)
++{
++	struct list_head *head = &per_cpu(delayed_drop_list, hotcpu);
++
++	while (!list_empty(head)) {
++		struct mm_struct *mm = list_entry(head->next,
++				struct mm_struct, delayed_drop);
++
++		list_del(&mm->delayed_drop);
++		__mmdrop_delayed(mm);
++	}
++}
++
++static int desched_thread(void * __bind_cpu)
++{
++	set_user_nice(current, -10);
++	current->flags |= PF_NOFREEZE | PF_SOFTIRQ;
++
++	set_current_state(TASK_INTERRUPTIBLE);
++
++	while (!kthread_should_stop()) {
++
++		if (mmdrop_complete())
++			continue;
++		schedule();
++
++		/*
++ 		 * This must be called from time to time on ia64, and is a
++ 		 * no-op on other archs. Used to be in cpu_idle(), but with
++ 		 * the new -rt semantics it can't stay there.
++		 */
++		check_pgt_cache();
++
++		set_current_state(TASK_INTERRUPTIBLE);
++	}
++	__set_current_state(TASK_RUNNING);
++	return 0;
++}
++
++static int __devinit cpu_callback(struct notifier_block *nfb,
++				  unsigned long action,
++				  void *hcpu)
++{
++	int hotcpu = (unsigned long)hcpu;
++	struct task_struct *p;
++
++	switch (action) {
++	case CPU_UP_PREPARE:
++
++		BUG_ON(per_cpu(desched_task, hotcpu));
++		INIT_LIST_HEAD(&per_cpu(delayed_drop_list, hotcpu));
++		p = kthread_create(desched_thread, hcpu, "desched/%d", hotcpu);
++		if (IS_ERR(p)) {
++			printk("desched_thread for %i failed\n", hotcpu);
++			return NOTIFY_BAD;
++		}
++		per_cpu(desched_task, hotcpu) = p;
++		kthread_bind(p, hotcpu);
++		break;
++	case CPU_ONLINE:
++
++		wake_up_process(per_cpu(desched_task, hotcpu));
++		break;
++#ifdef CONFIG_HOTPLUG_CPU
++	case CPU_UP_CANCELED:
++
++		/* Unbind so it can run.  Fall thru. */
++		kthread_bind(per_cpu(desched_task, hotcpu), smp_processor_id());
++	case CPU_DEAD:
++
++		p = per_cpu(desched_task, hotcpu);
++		per_cpu(desched_task, hotcpu) = NULL;
++		kthread_stop(p);
++		takeover_delayed_drop(hotcpu);
++		takeover_tasklets(hotcpu);
++		break;
++#endif /* CONFIG_HOTPLUG_CPU */
++	}
++	return NOTIFY_OK;
++}
++
++static struct notifier_block __devinitdata cpu_nfb = {
++	.notifier_call = cpu_callback
++};
++
++__init int spawn_desched_task(void)
++{
++	void *cpu = (void *)(long)smp_processor_id();
++
++	cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
++	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
++	register_cpu_notifier(&cpu_nfb);
++	return 0;
++}
++
+Index: linux-2.6-tip/kernel/futex.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/futex.c
++++ linux-2.6-tip/kernel/futex.c
+@@ -19,6 +19,10 @@
+  *  PRIVATE futexes by Eric Dumazet
+  *  Copyright (C) 2007 Eric Dumazet <dada1@cosmosbay.com>
+  *
++ *  Requeue-PI support by Darren Hart <dvhltc@us.ibm.com>
++ *  Copyright (C) IBM Corporation, 2009
++ *  Thanks to Thomas Gleixner for conceptual design and careful reviews.
++ *
+  *  Thanks to Ben LaHaise for yelling "hashed waitqueues" loudly
+  *  enough at me, Linus for the original (flawed) idea, Matthew
+  *  Kirkwood for proof-of-concept implementation.
+@@ -96,8 +100,8 @@ struct futex_pi_state {
+  */
+ struct futex_q {
+ 	struct plist_node list;
+-	/* There can only be a single waiter */
+-	wait_queue_head_t waiter;
++	/* Waiter reference */
++	struct task_struct *task;
+ 
+ 	/* Which hash list lock to use: */
+ 	spinlock_t *lock_ptr;
+@@ -107,14 +111,18 @@ struct futex_q {
+ 
+ 	/* Optional priority inheritance state: */
+ 	struct futex_pi_state *pi_state;
+-	struct task_struct *task;
++
++	/* rt_waiter storage for requeue_pi: */
++	struct rt_mutex_waiter *rt_waiter;
+ 
+ 	/* Bitset for the optional bitmasked wakeup */
+ 	u32 bitset;
+ };
+ 
+ /*
+- * Split the global futex_lock into every hash list lock.
++ * Hash buckets are shared by all the futex_keys that hash to the same
++ * location.  Each key may have multiple futex_q structures, one for each task
++ * waiting on a futex.
+  */
+ struct futex_hash_bucket {
+ 	spinlock_t lock;
+@@ -189,8 +197,7 @@ static void drop_futex_key_refs(union fu
+ /**
+  * get_futex_key - Get parameters which are the keys for a futex.
+  * @uaddr: virtual address of the futex
+- * @shared: NULL for a PROCESS_PRIVATE futex,
+- *	&current->mm->mmap_sem for a PROCESS_SHARED futex
++ * @fshared: 0 for a PROCESS_PRIVATE futex, 1 for PROCESS_SHARED
+  * @key: address where result is stored.
+  * @rw: mapping needs to be read/write (values: VERIFY_READ, VERIFY_WRITE)
+  *
+@@ -201,9 +208,7 @@ static void drop_futex_key_refs(union fu
+  * offset_within_page).  For private mappings, it's (uaddr, current->mm).
+  * We can usually work out the index without swapping in the page.
+  *
+- * fshared is NULL for PROCESS_PRIVATE futexes
+- * For other futexes, it points to &current->mm->mmap_sem and
+- * caller must have taken the reader lock. but NOT any spinlocks.
++ * lock_page() might sleep, the caller should not hold a spinlock.
+  */
+ static int
+ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw)
+@@ -279,6 +284,50 @@ void put_futex_key(int fshared, union fu
+ 	drop_futex_key_refs(key);
+ }
+ 
++/*
++ * get_user_writeable - get user page and verify RW access
++ * @uaddr:	pointer to faulting user space address
++ *
++ * We cannot write to the user space address and get_user just faults
++ * the page in, but does not tell us whether the mapping is writeable.
++ *
++ * We can not rely on access_ok() for private futexes as it is just a
++ * range check and we can neither rely on get_user_pages() as there
++ * might be a mprotect(PROT_READ) for that mapping after
++ * get_user_pages() and before the fault in the atomic write access.
++ */
++static int get_user_writeable(u32 __user *uaddr)
++{
++	unsigned long addr = (unsigned long)uaddr;
++	struct page *page;
++	int ret;
++
++	ret = get_user_pages_fast(addr, 1, 1, &page);
++	if (ret > 0)
++		put_page(page);
++
++	return ret;
++}
++
++/**
++ * futex_top_waiter() - Return the highest priority waiter on a futex
++ * @hb:     the hash bucket the futex_q's reside in
++ * @key:    the futex key (to distinguish it from other futex futex_q's)
++ *
++ * Must be called with the hb lock held.
++ */
++static struct futex_q *futex_top_waiter(struct futex_hash_bucket *hb,
++					union futex_key *key)
++{
++	struct futex_q *this;
++
++	plist_for_each_entry(this, &hb->chain, list) {
++		if (match_futex(&this->key, key))
++			return this;
++	}
++	return NULL;
++}
++
+ static u32 cmpxchg_futex_value_locked(u32 __user *uaddr, u32 uval, u32 newval)
+ {
+ 	u32 curval;
+@@ -301,41 +350,6 @@ static int get_futex_value_locked(u32 *d
+ 	return ret ? -EFAULT : 0;
+ }
+ 
+-/*
+- * Fault handling.
+- */
+-static int futex_handle_fault(unsigned long address, int attempt)
+-{
+-	struct vm_area_struct * vma;
+-	struct mm_struct *mm = current->mm;
+-	int ret = -EFAULT;
+-
+-	if (attempt > 2)
+-		return ret;
+-
+-	down_read(&mm->mmap_sem);
+-	vma = find_vma(mm, address);
+-	if (vma && address >= vma->vm_start &&
+-	    (vma->vm_flags & VM_WRITE)) {
+-		int fault;
+-		fault = handle_mm_fault(mm, vma, address, 1);
+-		if (unlikely((fault & VM_FAULT_ERROR))) {
+-#if 0
+-			/* XXX: let's do this when we verify it is OK */
+-			if (ret & VM_FAULT_OOM)
+-				ret = -ENOMEM;
+-#endif
+-		} else {
+-			ret = 0;
+-			if (fault & VM_FAULT_MAJOR)
+-				current->maj_flt++;
+-			else
+-				current->min_flt++;
+-		}
+-	}
+-	up_read(&mm->mmap_sem);
+-	return ret;
+-}
+ 
+ /*
+  * PI code:
+@@ -575,29 +589,203 @@ lookup_pi_state(u32 uval, struct futex_h
+ 	return 0;
+ }
+ 
++/**
++ * futex_lock_pi_atomic() - atomic work required to acquire a pi aware futex
++ * @uaddr:		the pi futex user address
++ * @hb:			the pi futex hash bucket
++ * @key:		the futex key associated with uaddr and hb
++ * @ps:			the pi_state pointer where we store the result of the
++ *			lookup
++ * @task:		the task to perform the atomic lock work for.  This will
++ *			be "current" except in the case of requeue pi.
++ * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
++ *
++ * Returns:
++ *  0 - ready to wait
++ *  1 - acquired the lock
++ * <0 - error
++ *
++ * The hb->lock and futex_key refs shall be held by the caller.
++ */
++static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
++				union futex_key *key,
++				struct futex_pi_state **ps,
++				struct task_struct *task, int set_waiters)
++{
++	int lock_taken, ret, ownerdied = 0;
++	u32 uval, newval, curval;
++
++retry:
++	ret = lock_taken = 0;
++
++	/*
++	 * To avoid races, we attempt to take the lock here again
++	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
++	 * the locks. It will most likely not succeed.
++	 */
++	newval = task_pid_vnr(task);
++	if (set_waiters)
++		newval |= FUTEX_WAITERS;
++
++	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
++
++	if (unlikely(curval == -EFAULT))
++		return -EFAULT;
++
++	/*
++	 * Detect deadlocks.
++	 */
++	if ((unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(task))))
++		return -EDEADLK;
++
++	/*
++	 * Surprise - we got the lock. Just return to userspace:
++	 */
++	if (unlikely(!curval))
++		return 1;
++
++	uval = curval;
++
++	/*
++	 * Set the FUTEX_WAITERS flag, so the owner will know it has someone
++	 * to wake at the next unlock.
++	 */
++	newval = curval | FUTEX_WAITERS;
++
++	/*
++	 * There are two cases, where a futex might have no owner (the
++	 * owner TID is 0): OWNER_DIED. We take over the futex in this
++	 * case. We also do an unconditional take over, when the owner
++	 * of the futex died.
++	 *
++	 * This is safe as we are protected by the hash bucket lock !
++	 */
++	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
++		/* Keep the OWNER_DIED bit */
++		newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(task);
++		ownerdied = 0;
++		lock_taken = 1;
++	}
++
++	curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
++
++	if (unlikely(curval == -EFAULT))
++		return -EFAULT;
++	if (unlikely(curval != uval))
++		goto retry;
++
++	/*
++	 * We took the lock due to owner died take over.
++	 */
++	if (unlikely(lock_taken))
++		return 1;
++
++	/*
++	 * We dont have the lock. Look up the PI state (or create it if
++	 * we are the first waiter):
++	 */
++	ret = lookup_pi_state(uval, hb, key, ps);
++
++	if (unlikely(ret)) {
++		switch (ret) {
++		case -ESRCH:
++			/*
++			 * No owner found for this futex. Check if the
++			 * OWNER_DIED bit is set to figure out whether
++			 * this is a robust futex or not.
++			 */
++			if (get_futex_value_locked(&curval, uaddr))
++				return -EFAULT;
++
++			/*
++			 * We simply start over in case of a robust
++			 * futex. The code above will take the futex
++			 * and return happy.
++			 */
++			if (curval & FUTEX_OWNER_DIED) {
++				ownerdied = 1;
++				goto retry;
++			}
++		default:
++			break;
++		}
++	}
++
++	return ret;
++}
++
+ /*
+  * The hash bucket lock must be held when this is called.
+  * Afterwards, the futex_q must not be accessed.
+  */
+-static void wake_futex(struct futex_q *q)
++static void wake_futex(struct task_struct **wake_list, struct futex_q *q)
+ {
+-	plist_del(&q->list, &q->list.plist);
++	struct task_struct *p = q->task;
++
+ 	/*
+-	 * The lock in wake_up_all() is a crucial memory barrier after the
+-	 * plist_del() and also before assigning to q->lock_ptr.
++	 * We set q->lock_ptr = NULL _before_ we wake up the task. If
++	 * a non futex wake up happens on another CPU then the task
++	 * might exit and p would dereference a non existing task
++	 * struct. Prevent this by holding a reference on p across the
++	 * wake up.
+ 	 */
+-	wake_up(&q->waiter);
++	get_task_struct(p);
++
++	plist_del(&q->list, &q->list.plist);
+ 	/*
+-	 * The waiting task can free the futex_q as soon as this is written,
+-	 * without taking any locks.  This must come last.
+-	 *
+-	 * A memory barrier is required here to prevent the following store
+-	 * to lock_ptr from getting ahead of the wakeup. Clearing the lock
+-	 * at the end of wake_up_all() does not prevent this store from
+-	 * moving.
++	 * The waiting task can free the futex_q as soon as
++	 * q->lock_ptr = NULL is written, without taking any locks. A
++	 * memory barrier is required here to prevent the following
++	 * store to lock_ptr from getting ahead of the plist_del.
+ 	 */
+ 	smp_wmb();
+ 	q->lock_ptr = NULL;
++
++	/*
++	 * Atomically grab the task, if ->futex_wakeup is !0 already it means
++	 * its already queued (either by us or someone else) and will get the
++	 * wakeup due to that.
++	 *
++	 * This cmpxchg() implies a full barrier, which pairs with the write
++	 * barrier implied by the wakeup in wake_futex_list().
++	 */
++	if (cmpxchg(&p->futex_wakeup, 0, p) != 0) {
++		/*
++		 * It was already queued, drop the extra ref and we're done.
++		 */
++		put_task_struct(p);
++		return;
++	}
++
++	/*
++	 * Put the task on our wakeup list by atomically switching it with
++	 * the list head. (XXX its a local list, no possible concurrency,
++	 * this could be written without cmpxchg).
++	 */
++	do {
++		p->futex_wakeup = *wake_list;
++	} while (cmpxchg(wake_list, p->futex_wakeup, p) != p->futex_wakeup);
++}
++
++/*
++ * For each task on the list, deliver the pending wakeup and release the
++ * task reference obtained in wake_futex().
++ */
++static void wake_futex_list(struct task_struct *head)
++{
++	while (head != &init_task) {
++		struct task_struct *next = head->futex_wakeup;
++
++		head->futex_wakeup = NULL;
++		/*
++		 * wake_up_state() implies a wmb() to pair with the queueing
++		 * in wake_futex() so as to not miss wakeups.
++		 */
++		wake_up_state(head, TASK_NORMAL);
++		put_task_struct(head);
++
++		head = next;
++	}
+ }
+ 
+ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
+@@ -694,9 +882,16 @@ double_lock_hb(struct futex_hash_bucket 
+ 	}
+ }
+ 
++static inline void
++double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
++{
++	spin_unlock(&hb1->lock);
++	if (hb1 != hb2)
++		spin_unlock(&hb2->lock);
++}
++
+ /*
+- * Wake up all waiters hashed on the physical page that is mapped
+- * to this virtual address:
++ * Wake up waiters matching bitset queued on this futex (uaddr).
+  */
+ static int futex_wake(u32 __user *uaddr, int fshared, int nr_wake, u32 bitset)
+ {
+@@ -704,6 +899,7 @@ static int futex_wake(u32 __user *uaddr,
+ 	struct futex_q *this, *next;
+ 	struct plist_head *head;
+ 	union futex_key key = FUTEX_KEY_INIT;
++	struct task_struct *wake_list = &init_task;
+ 	int ret;
+ 
+ 	if (!bitset)
+@@ -719,7 +915,7 @@ static int futex_wake(u32 __user *uaddr,
+ 
+ 	plist_for_each_entry_safe(this, next, head, list) {
+ 		if (match_futex (&this->key, &key)) {
+-			if (this->pi_state) {
++			if (this->pi_state || this->rt_waiter) {
+ 				ret = -EINVAL;
+ 				break;
+ 			}
+@@ -728,7 +924,7 @@ static int futex_wake(u32 __user *uaddr,
+ 			if (!(this->bitset & bitset))
+ 				continue;
+ 
+-			wake_futex(this);
++			wake_futex(&wake_list, this);
+ 			if (++ret >= nr_wake)
+ 				break;
+ 		}
+@@ -736,6 +932,8 @@ static int futex_wake(u32 __user *uaddr,
+ 
+ 	spin_unlock(&hb->lock);
+ 	put_futex_key(fshared, &key);
++
++	wake_futex_list(wake_list);
+ out:
+ 	return ret;
+ }
+@@ -752,9 +950,10 @@ futex_wake_op(u32 __user *uaddr1, int fs
+ 	struct futex_hash_bucket *hb1, *hb2;
+ 	struct plist_head *head;
+ 	struct futex_q *this, *next;
+-	int ret, op_ret, attempt = 0;
++	struct task_struct *wake_list = &init_task;
++	int ret, op_ret;
+ 
+-retryfull:
++retry:
+ 	ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
+ 	if (unlikely(ret != 0))
+ 		goto out;
+@@ -765,16 +964,12 @@ retryfull:
+ 	hb1 = hash_futex(&key1);
+ 	hb2 = hash_futex(&key2);
+ 
+-retry:
+ 	double_lock_hb(hb1, hb2);
+-
++retry_private:
+ 	op_ret = futex_atomic_op_inuser(op, uaddr2);
+ 	if (unlikely(op_ret < 0)) {
+-		u32 dummy;
+ 
+-		spin_unlock(&hb1->lock);
+-		if (hb1 != hb2)
+-			spin_unlock(&hb2->lock);
++		double_unlock_hb(hb1, hb2);
+ 
+ #ifndef CONFIG_MMU
+ 		/*
+@@ -790,33 +985,23 @@ retry:
+ 			goto out_put_keys;
+ 		}
+ 
+-		/*
+-		 * futex_atomic_op_inuser needs to both read and write
+-		 * *(int __user *)uaddr2, but we can't modify it
+-		 * non-atomically.  Therefore, if get_user below is not
+-		 * enough, we need to handle the fault ourselves, while
+-		 * still holding the mmap_sem.
+-		 */
+-		if (attempt++) {
+-			ret = futex_handle_fault((unsigned long)uaddr2,
+-						 attempt);
+-			if (ret)
+-				goto out_put_keys;
+-			goto retry;
+-		}
+-
+-		ret = get_user(dummy, uaddr2);
++		ret = get_user_writeable(uaddr2);
+ 		if (ret)
+-			return ret;
++			goto out_put_keys;
++
++		if (!fshared)
++			goto retry_private;
+ 
+-		goto retryfull;
++		put_futex_key(fshared, &key2);
++		put_futex_key(fshared, &key1);
++		goto retry;
+ 	}
+ 
+ 	head = &hb1->chain;
+ 
+ 	plist_for_each_entry_safe(this, next, head, list) {
+ 		if (match_futex (&this->key, &key1)) {
+-			wake_futex(this);
++			wake_futex(&wake_list, this);
+ 			if (++ret >= nr_wake)
+ 				break;
+ 		}
+@@ -828,7 +1013,7 @@ retry:
+ 		op_ret = 0;
+ 		plist_for_each_entry_safe(this, next, head, list) {
+ 			if (match_futex (&this->key, &key2)) {
+-				wake_futex(this);
++				wake_futex(&wake_list, this);
+ 				if (++op_ret >= nr_wake2)
+ 					break;
+ 			}
+@@ -836,41 +1021,208 @@ retry:
+ 		ret += op_ret;
+ 	}
+ 
+-	spin_unlock(&hb1->lock);
+-	if (hb1 != hb2)
+-		spin_unlock(&hb2->lock);
++	double_unlock_hb(hb1, hb2);
+ out_put_keys:
+ 	put_futex_key(fshared, &key2);
+ out_put_key1:
+ 	put_futex_key(fshared, &key1);
++
++	wake_futex_list(wake_list);
+ out:
+ 	return ret;
+ }
+ 
+-/*
+- * Requeue all waiters hashed on one physical page to another
+- * physical page.
++/**
++ * requeue_futex() - Requeue a futex_q from one hb to another
++ * @q:		the futex_q to requeue
++ * @hb1:	the source hash_bucket
++ * @hb2:	the target hash_bucket
++ * @key2:	the new key for the requeued futex_q
++ */
++static inline
++void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
++		   struct futex_hash_bucket *hb2, union futex_key *key2)
++{
++
++	/*
++	 * If key1 and key2 hash to the same bucket, no need to
++	 * requeue.
++	 */
++	if (likely(&hb1->chain != &hb2->chain)) {
++		plist_del(&q->list, &hb1->chain);
++		plist_add(&q->list, &hb2->chain);
++		q->lock_ptr = &hb2->lock;
++#ifdef CONFIG_DEBUG_PI_LIST
++# ifdef CONFIG_PREEMPT_RT
++		q->list.plist.lock = NULL;
++# else
++		q->list.plist.lock = &hb2->lock;
++# endif
++#endif
++	}
++	get_futex_key_refs(key2);
++	q->key = *key2;
++}
++
++/**
++ * requeue_pi_wake_futex() - Wake a task that acquired the lock during requeue
++ * q:	the futex_q
++ * key:	the key of the requeue target futex
++ *
++ * During futex_requeue, with requeue_pi=1, it is possible to acquire the
++ * target futex if it is uncontended or via a lock steal.  Set the futex_q key
++ * to the requeue target futex so the waiter can detect the wakeup on the right
++ * futex, but remove it from the hb and NULL the rt_waiter so it can detect
++ * atomic lock acquisition.  Must be called with the q->lock_ptr held.
++ */
++static inline
++void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key)
++{
++	drop_futex_key_refs(&q->key);
++	get_futex_key_refs(key);
++	q->key = *key;
++
++	WARN_ON(plist_node_empty(&q->list));
++	plist_del(&q->list, &q->list.plist);
++
++	WARN_ON(!q->rt_waiter);
++	q->rt_waiter = NULL;
++
++	wake_up_state(q->task, TASK_NORMAL);
++}
++
++/**
++ * futex_proxy_trylock_atomic() - Attempt an atomic lock for the top waiter
++ * @pifutex:		the user address of the to futex
++ * @hb1:		the from futex hash bucket, must be locked by the caller
++ * @hb2:		the to futex hash bucket, must be locked by the caller
++ * @key1:		the from futex key
++ * @key2:		the to futex key
++ * @ps:			address to store the pi_state pointer
++ * @set_waiters:	force setting the FUTEX_WAITERS bit (1) or not (0)
++ *
++ * Try and get the lock on behalf of the top waiter if we can do it atomically.
++ * Wake the top waiter if we succeed.  If the caller specified set_waiters,
++ * then direct futex_lock_pi_atomic() to force setting the FUTEX_WAITERS bit.
++ * hb1 and hb2 must be held by the caller.
++ *
++ * Returns:
++ *  0 - failed to acquire the lock atomicly
++ *  1 - acquired the lock
++ * <0 - error
++ */
++static int futex_proxy_trylock_atomic(u32 __user *pifutex,
++				 struct futex_hash_bucket *hb1,
++				 struct futex_hash_bucket *hb2,
++				 union futex_key *key1, union futex_key *key2,
++				 struct futex_pi_state **ps, int set_waiters)
++{
++	struct futex_q *top_waiter = NULL;
++	u32 curval;
++	int ret;
++
++	if (get_futex_value_locked(&curval, pifutex))
++		return -EFAULT;
++
++	/*
++	 * Find the top_waiter and determine if there are additional waiters.
++	 * If the caller intends to requeue more than 1 waiter to pifutex,
++	 * force futex_lock_pi_atomic() to set the FUTEX_WAITERS bit now,
++	 * as we have means to handle the possible fault.  If not, don't set
++	 * the bit unecessarily as it will force the subsequent unlock to enter
++	 * the kernel.
++	 */
++	top_waiter = futex_top_waiter(hb1, key1);
++
++	/* There are no waiters, nothing for us to do. */
++	if (!top_waiter)
++		return 0;
++
++	/*
++	 * Try to take the lock for top_waiter.  Set the FUTEX_WAITERS bit in
++	 * the contended case or if set_waiters is 1.  The pi_state is returned
++	 * in ps in contended cases.
++	 */
++	ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
++				   set_waiters);
++	if (ret == 1)
++		requeue_pi_wake_futex(top_waiter, key2);
++
++	return ret;
++}
++
++/**
++ * futex_requeue() - Requeue waiters from uaddr1 to uaddr2
++ * uaddr1:	source futex user address
++ * uaddr2:	target futex user address
++ * nr_wake:	number of waiters to wake (must be 1 for requeue_pi)
++ * nr_requeue:	number of waiters to requeue (0-INT_MAX)
++ * requeue_pi:	if we are attempting to requeue from a non-pi futex to a
++ * 		pi futex (pi to pi requeue is not supported)
++ *
++ * Requeue waiters on uaddr1 to uaddr2. In the requeue_pi case, try to acquire
++ * uaddr2 atomically on behalf of the top waiter.
++ *
++ * Returns:
++ * >=0 - on success, the number of tasks requeued or woken
++ *  <0 - on error
+  */
+ static int futex_requeue(u32 __user *uaddr1, int fshared, u32 __user *uaddr2,
+-			 int nr_wake, int nr_requeue, u32 *cmpval)
++			 int nr_wake, int nr_requeue, u32 *cmpval,
++			 int requeue_pi)
+ {
+ 	union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
++	int drop_count = 0, task_count = 0, ret;
++	struct futex_pi_state *pi_state = NULL;
+ 	struct futex_hash_bucket *hb1, *hb2;
+ 	struct plist_head *head1;
+ 	struct futex_q *this, *next;
+-	int ret, drop_count = 0;
++	struct task_struct *wake_list = &init_task;
++	u32 curval2;
++
++	if (requeue_pi) {
++		/*
++		 * requeue_pi requires a pi_state, try to allocate it now
++		 * without any locks in case it fails.
++		 */
++		if (refill_pi_state_cache())
++			return -ENOMEM;
++		/*
++		 * requeue_pi must wake as many tasks as it can, up to nr_wake
++		 * + nr_requeue, since it acquires the rt_mutex prior to
++		 * returning to userspace, so as to not leave the rt_mutex with
++		 * waiters and no owner.  However, second and third wake-ups
++		 * cannot be predicted as they involve race conditions with the
++		 * first wake and a fault while looking up the pi_state.  Both
++		 * pthread_cond_signal() and pthread_cond_broadcast() should
++		 * use nr_wake=1.
++		 */
++		if (nr_wake != 1)
++			return -EINVAL;
++	}
+ 
+ retry:
++	if (pi_state != NULL) {
++		/*
++		 * We will have to lookup the pi_state again, so free this one
++		 * to keep the accounting correct.
++		 */
++		free_pi_state(pi_state);
++		pi_state = NULL;
++	}
++
+ 	ret = get_futex_key(uaddr1, fshared, &key1, VERIFY_READ);
+ 	if (unlikely(ret != 0))
+ 		goto out;
+-	ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
++	ret = get_futex_key(uaddr2, fshared, &key2,
++			    requeue_pi ? VERIFY_WRITE : VERIFY_READ);
+ 	if (unlikely(ret != 0))
+ 		goto out_put_key1;
+ 
+ 	hb1 = hash_futex(&key1);
+ 	hb2 = hash_futex(&key2);
+ 
++retry_private:
+ 	double_lock_hb(hb1, hb2);
+ 
+ 	if (likely(cmpval != NULL)) {
+@@ -879,16 +1231,18 @@ retry:
+ 		ret = get_futex_value_locked(&curval, uaddr1);
+ 
+ 		if (unlikely(ret)) {
+-			spin_unlock(&hb1->lock);
+-			if (hb1 != hb2)
+-				spin_unlock(&hb2->lock);
++			double_unlock_hb(hb1, hb2);
+ 
+ 			ret = get_user(curval, uaddr1);
++			if (ret)
++				goto out_put_keys;
+ 
+-			if (!ret)
+-				goto retry;
++			if (!fshared)
++				goto retry_private;
+ 
+-			goto out_put_keys;
++			put_futex_key(fshared, &key2);
++			put_futex_key(fshared, &key1);
++			goto retry;
+ 		}
+ 		if (curval != *cmpval) {
+ 			ret = -EAGAIN;
+@@ -896,40 +1250,110 @@ retry:
+ 		}
+ 	}
+ 
+-	head1 = &hb1->chain;
+-	plist_for_each_entry_safe(this, next, head1, list) {
+-		if (!match_futex (&this->key, &key1))
+-			continue;
+-		if (++ret <= nr_wake) {
+-			wake_futex(this);
+-		} else {
+-			/*
+-			 * If key1 and key2 hash to the same bucket, no need to
+-			 * requeue.
+-			 */
+-			if (likely(head1 != &hb2->chain)) {
+-				plist_del(&this->list, &hb1->chain);
+-				plist_add(&this->list, &hb2->chain);
+-				this->lock_ptr = &hb2->lock;
+-#ifdef CONFIG_DEBUG_PI_LIST
+-				this->list.plist.lock = &hb2->lock;
+-#endif
+-			}
+-			this->key = key2;
+-			get_futex_key_refs(&key2);
+-			drop_count++;
++	if (requeue_pi && (task_count - nr_wake < nr_requeue)) {
++		/*
++		 * Attempt to acquire uaddr2 and wake the top waiter. If we
++		 * intend to requeue waiters, force setting the FUTEX_WAITERS
++		 * bit.  We force this here where we are able to easily handle
++		 * faults rather in the requeue loop below.
++		 */
++		ret = futex_proxy_trylock_atomic(uaddr2, hb1, hb2, &key1,
++						 &key2, &pi_state, nr_requeue);
+ 
+-			if (ret - nr_wake >= nr_requeue)
+-				break;
++		/*
++		 * At this point the top_waiter has either taken uaddr2 or is
++		 * waiting on it.  If the former, then the pi_state will not
++		 * exist yet, look it up one more time to ensure we have a
++		 * reference to it.
++		 */
++		if (ret == 1) {
++			WARN_ON(pi_state);
++			task_count++;
++			ret = get_futex_value_locked(&curval2, uaddr2);
++			if (!ret)
++				ret = lookup_pi_state(curval2, hb2, &key2,
++						      &pi_state);
++		}
++
++		switch (ret) {
++		case 0:
++			break;
++		case -EFAULT:
++			double_unlock_hb(hb1, hb2);
++			put_futex_key(fshared, &key2);
++			put_futex_key(fshared, &key1);
++			ret = get_user_writeable(uaddr2);
++			if (!ret)
++				goto retry;
++			goto out;
++		case -EAGAIN:
++			/* The owner was exiting, try again. */
++			double_unlock_hb(hb1, hb2);
++			put_futex_key(fshared, &key2);
++			put_futex_key(fshared, &key1);
++			cond_resched();
++			goto retry;
++		default:
++			goto out_unlock;
+ 		}
+ 	}
+ 
++	head1 = &hb1->chain;
++	plist_for_each_entry_safe(this, next, head1, list) {
++		if (task_count - nr_wake >= nr_requeue)
++			break;
++
++		if (!match_futex(&this->key, &key1))
++			continue;
++
++		WARN_ON(!requeue_pi && this->rt_waiter);
++		WARN_ON(requeue_pi && !this->rt_waiter);
++
++		/*
++		 * Wake nr_wake waiters.  For requeue_pi, if we acquired the
++		 * lock, we already woke the top_waiter.  If not, it will be
++		 * woken by futex_unlock_pi().
++		 */
++		if (++task_count <= nr_wake && !requeue_pi) {
++			wake_futex(&wake_list, this);
++			continue;
++		}
++
++		/*
++		 * Requeue nr_requeue waiters and possibly one more in the case
++		 * of requeue_pi if we couldn't acquire the lock atomically.
++		 */
++		if (requeue_pi) {
++			/* Prepare the waiter to take the rt_mutex. */
++			atomic_inc(&pi_state->refcount);
++			this->pi_state = pi_state;
++			ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
++							this->rt_waiter,
++							this->task, 1);
++			if (ret == 1) {
++				/* We got the lock. */
++				requeue_pi_wake_futex(this, &key2);
++				continue;
++			} else if (ret) {
++				/* -EDEADLK */
++				this->pi_state = NULL;
++				free_pi_state(pi_state);
++				goto out_unlock;
++			}
++		}
++		requeue_futex(this, hb1, hb2, &key2);
++		drop_count++;
++	}
++
+ out_unlock:
+-	spin_unlock(&hb1->lock);
+-	if (hb1 != hb2)
+-		spin_unlock(&hb2->lock);
++	double_unlock_hb(hb1, hb2);
+ 
+-	/* drop_futex_key_refs() must be called outside the spinlocks. */
++	/*
++	 * drop_futex_key_refs() must be called outside the spinlocks. During
++	 * the requeue we moved futex_q's from the hash bucket at key1 to the
++	 * one at key2 and updated their key pointer.  We no longer need to
++	 * hold the references to key1.
++	 */
+ 	while (--drop_count >= 0)
+ 		drop_futex_key_refs(&key1);
+ 
+@@ -937,8 +1361,12 @@ out_put_keys:
+ 	put_futex_key(fshared, &key2);
+ out_put_key1:
+ 	put_futex_key(fshared, &key1);
++
++	wake_futex_list(wake_list);
+ out:
+-	return ret;
++	if (pi_state != NULL)
++		free_pi_state(pi_state);
++	return ret ? ret : task_count;
+ }
+ 
+ /* The key must be already stored in q->key. */
+@@ -946,8 +1374,6 @@ static inline struct futex_hash_bucket *
+ {
+ 	struct futex_hash_bucket *hb;
+ 
+-	init_waitqueue_head(&q->waiter);
+-
+ 	get_futex_key_refs(&q->key);
+ 	hb = hash_futex(&q->key);
+ 	q->lock_ptr = &hb->lock;
+@@ -972,8 +1398,12 @@ static inline void queue_me(struct futex
+ 
+ 	plist_node_init(&q->list, prio);
+ #ifdef CONFIG_DEBUG_PI_LIST
++#ifdef CONFIG_PREEMPT_RT
++	q->list.plist.lock = NULL;
++#else
+ 	q->list.plist.lock = &hb->lock;
+ #endif
++#endif
+ 	plist_add(&q->list, &hb->chain);
+ 	q->task = current;
+ 	spin_unlock(&hb->lock);
+@@ -1065,7 +1495,7 @@ static int fixup_pi_state_owner(u32 __us
+ 	struct futex_pi_state *pi_state = q->pi_state;
+ 	struct task_struct *oldowner = pi_state->owner;
+ 	u32 uval, curval, newval;
+-	int ret, attempt = 0;
++	int ret;
+ 
+ 	/* Owner died? */
+ 	if (!pi_state->owner)
+@@ -1078,11 +1508,9 @@ static int fixup_pi_state_owner(u32 __us
+ 	 * in the user space variable. This must be atomic as we have
+ 	 * to preserve the owner died bit here.
+ 	 *
+-	 * Note: We write the user space value _before_ changing the
+-	 * pi_state because we can fault here. Imagine swapped out
+-	 * pages or a fork, which was running right before we acquired
+-	 * mmap_sem, that marked all the anonymous memory readonly for
+-	 * cow.
++	 * Note: We write the user space value _before_ changing the pi_state
++	 * because we can fault here. Imagine swapped out pages or a fork
++	 * that marked all the anonymous memory readonly for cow.
+ 	 *
+ 	 * Modifying pi_state _before_ the user space value would
+ 	 * leave the pi_state in an inconsistent state when we fault
+@@ -1138,7 +1566,7 @@ retry:
+ handle_fault:
+ 	spin_unlock(q->lock_ptr);
+ 
+-	ret = futex_handle_fault((unsigned long)uaddr, attempt++);
++	ret = get_user_writeable(uaddr);
+ 
+ 	spin_lock(q->lock_ptr);
+ 
+@@ -1160,37 +1588,158 @@ handle_fault:
+  */
+ #define FLAGS_SHARED		0x01
+ #define FLAGS_CLOCKRT		0x02
++#define FLAGS_HAS_TIMEOUT	0x04
+ 
+ static long futex_wait_restart(struct restart_block *restart);
+ 
+-static int futex_wait(u32 __user *uaddr, int fshared,
+-		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
++/**
++ * fixup_owner() - Post lock pi_state and corner case management
++ * @uaddr:	user address of the futex
++ * @fshared:	whether the futex is shared (1) or not (0)
++ * @q:		futex_q (contains pi_state and access to the rt_mutex)
++ * @locked:	if the attempt to take the rt_mutex succeeded (1) or not (0)
++ *
++ * After attempting to lock an rt_mutex, this function is called to cleanup
++ * the pi_state owner as well as handle race conditions that may allow us to
++ * acquire the lock. Must be called with the hb lock held.
++ *
++ * Returns:
++ *  1 - success, lock taken
++ *  0 - success, lock not taken
++ * <0 - on error (-EFAULT)
++ */
++static int fixup_owner(u32 __user *uaddr, int fshared, struct futex_q *q,
++		       int locked)
+ {
+-	struct task_struct *curr = current;
+-	struct restart_block *restart;
+-	DECLARE_WAITQUEUE(wait, curr);
+-	struct futex_hash_bucket *hb;
+-	struct futex_q q;
+-	u32 uval;
+-	int ret;
+-	struct hrtimer_sleeper t;
+-	int rem = 0;
++	struct task_struct *owner;
++	int ret = 0;
+ 
+-	if (!bitset)
+-		return -EINVAL;
++	if (locked) {
++		/*
++		 * Got the lock. We might not be the anticipated owner if we
++		 * did a lock-steal - fix up the PI-state in that case:
++		 */
++		if (q->pi_state->owner != current)
++			ret = fixup_pi_state_owner(uaddr, q, current, fshared);
++		goto out;
++	}
+ 
+-	q.pi_state = NULL;
+-	q.bitset = bitset;
+-retry:
+-	q.key = FUTEX_KEY_INIT;
+-	ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_READ);
+-	if (unlikely(ret != 0))
++	/*
++	 * Catch the rare case, where the lock was released when we were on the
++	 * way back before we locked the hash bucket.
++	 */
++	if (q->pi_state->owner == current) {
++		/*
++		 * Try to get the rt_mutex now. This might fail as some other
++		 * task acquired the rt_mutex after we removed ourself from the
++		 * rt_mutex waiters list.
++		 */
++		if (rt_mutex_trylock(&q->pi_state->pi_mutex)) {
++			locked = 1;
++			goto out;
++		}
++
++		/*
++		 * pi_state is incorrect, some other task did a lock steal and
++		 * we returned due to timeout or signal without taking the
++		 * rt_mutex. Too late. We can access the rt_mutex_owner without
++		 * locking, as the other task is now blocked on the hash bucket
++		 * lock. Fix the state up.
++		 */
++		owner = rt_mutex_owner(&q->pi_state->pi_mutex);
++		ret = fixup_pi_state_owner(uaddr, q, owner, fshared);
+ 		goto out;
++	}
+ 
+-	hb = queue_lock(&q);
++	/*
++	 * Paranoia check. If we did not take the lock, then we should not be
++	 * the owner, nor the pending owner, of the rt_mutex.
++	 */
++	if (rt_mutex_owner(&q->pi_state->pi_mutex) == current)
++		printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
++				"pi-state %p\n", ret,
++				q->pi_state->pi_mutex.owner,
++				q->pi_state->owner);
++
++out:
++	return ret ? ret : locked;
++}
++
++/**
++ * futex_wait_queue_me() - queue_me() and wait for wakeup, timeout, or signal
++ * @hb:		the futex hash bucket, must be locked by the caller
++ * @q:		the futex_q to queue up on
++ * @timeout:	the prepared hrtimer_sleeper, or null for no timeout
++ */
++static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
++				struct hrtimer_sleeper *timeout)
++{
++	queue_me(q, hb);
+ 
+ 	/*
+-	 * Access the page AFTER the futex is queued.
++	 * There might have been scheduling since the queue_me(), as we
++	 * cannot hold a spinlock across the get_user() in case it
++	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
++	 * queueing ourselves into the futex hash. This code thus has to
++	 * rely on the futex_wake() code removing us from hash when it
++	 * wakes us up.
++	 */
++	set_current_state(TASK_INTERRUPTIBLE);
++
++	/* Arm the timer */
++	if (timeout) {
++		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
++		if (!hrtimer_active(&timeout->timer))
++			timeout->task = NULL;
++	}
++
++	/*
++	 * !plist_node_empty() is safe here without any lock.
++	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
++	 */
++	if (likely(!plist_node_empty(&q->list))) {
++		unsigned long nosched_flag = current->flags & PF_NOSCHED;
++
++		current->flags &= ~PF_NOSCHED;
++
++		/*
++		 * If the timer has already expired, current will already be
++		 * flagged for rescheduling. Only call schedule if there
++		 * is no timeout, or if it has yet to expire.
++		 */
++		if (!timeout || timeout->task)
++			schedule();
++
++		current->flags |= nosched_flag;
++	}
++	__set_current_state(TASK_RUNNING);
++}
++
++/**
++ * futex_wait_setup() - Prepare to wait on a futex
++ * @uaddr:	the futex userspace address
++ * @val:	the expected value
++ * @fshared:	whether the futex is shared (1) or not (0)
++ * @q:		the associated futex_q
++ * @hb:		storage for hash_bucket pointer to be returned to caller
++ *
++ * Setup the futex_q and locate the hash_bucket.  Get the futex value and
++ * compare it with the expected value.  Handle atomic faults internally.
++ * Return with the hb lock held and a q.key reference on success, and unlocked
++ * with no q.key reference on failure.
++ *
++ * Returns:
++ *  0 - uaddr contains val and hb has been locked
++ * <1 - -EFAULT or -EWOULDBLOCK (uaddr does not contain val) and hb is unlcoked
++ */
++static int futex_wait_setup(u32 __user *uaddr, u32 val, int fshared,
++			   struct futex_q *q, struct futex_hash_bucket **hb)
++{
++	u32 uval;
++	int ret;
++
++	/*
++	 * Access the page AFTER the hash-bucket is locked.
+ 	 * Order is important:
+ 	 *
+ 	 *   Userspace waiter: val = var; if (cond(val)) futex_wait(&var, val);
+@@ -1205,95 +1754,83 @@ retry:
+ 	 * A consequence is that futex_wait() can return zero and absorb
+ 	 * a wakeup when *uaddr != val on entry to the syscall.  This is
+ 	 * rare, but normal.
+-	 *
+-	 * for shared futexes, we hold the mmap semaphore, so the mapping
+-	 * cannot have changed since we looked it up in get_futex_key.
+ 	 */
++retry:
++	q->key = FUTEX_KEY_INIT;
++	ret = get_futex_key(uaddr, fshared, &q->key, VERIFY_READ);
++	if (unlikely(ret != 0))
++		return ret;
++
++retry_private:
++	*hb = queue_lock(q);
++
+ 	ret = get_futex_value_locked(&uval, uaddr);
+ 
+-	if (unlikely(ret)) {
+-		queue_unlock(&q, hb);
+-		put_futex_key(fshared, &q.key);
++	if (ret) {
++		queue_unlock(q, *hb);
+ 
+ 		ret = get_user(uval, uaddr);
++		if (ret)
++			goto out;
+ 
+-		if (!ret)
+-			goto retry;
+-		goto out;
+-	}
+-	ret = -EWOULDBLOCK;
+-	if (unlikely(uval != val)) {
+-		queue_unlock(&q, hb);
+-		goto out_put_key;
++		if (!fshared)
++			goto retry_private;
++
++		put_futex_key(fshared, &q->key);
++		goto retry;
+ 	}
+ 
+-	/* Only actually queue if *uaddr contained val.  */
+-	queue_me(&q, hb);
++	if (uval != val) {
++		queue_unlock(q, *hb);
++		ret = -EWOULDBLOCK;
++	}
+ 
+-	/*
+-	 * There might have been scheduling since the queue_me(), as we
+-	 * cannot hold a spinlock across the get_user() in case it
+-	 * faults, and we cannot just set TASK_INTERRUPTIBLE state when
+-	 * queueing ourselves into the futex hash.  This code thus has to
+-	 * rely on the futex_wake() code removing us from hash when it
+-	 * wakes us up.
+-	 */
++out:
++	if (ret)
++		put_futex_key(fshared, &q->key);
++	return ret;
++}
+ 
+-	/* add_wait_queue is the barrier after __set_current_state. */
+-	__set_current_state(TASK_INTERRUPTIBLE);
+-	add_wait_queue(&q.waiter, &wait);
+-	/*
+-	 * !plist_node_empty() is safe here without any lock.
+-	 * q.lock_ptr != 0 is not safe, because of ordering against wakeup.
+-	 */
+-	if (likely(!plist_node_empty(&q.list))) {
+-		if (!abs_time)
+-			schedule();
+-		else {
+-			unsigned long slack;
+-			slack = current->timer_slack_ns;
+-			if (rt_task(current))
+-				slack = 0;
+-			hrtimer_init_on_stack(&t.timer,
+-					      clockrt ? CLOCK_REALTIME :
+-					      CLOCK_MONOTONIC,
+-					      HRTIMER_MODE_ABS);
+-			hrtimer_init_sleeper(&t, current);
+-			hrtimer_set_expires_range_ns(&t.timer, *abs_time, slack);
+-
+-			hrtimer_start_expires(&t.timer, HRTIMER_MODE_ABS);
+-			if (!hrtimer_active(&t.timer))
+-				t.task = NULL;
++static int futex_wait(u32 __user *uaddr, int fshared,
++		      u32 val, ktime_t *abs_time, u32 bitset, int clockrt)
++{
++	struct hrtimer_sleeper timeout, *to = NULL;
++	struct restart_block *restart;
++	struct futex_hash_bucket *hb;
++	struct futex_q q;
++	int ret;
+ 
+-			/*
+-			 * the timer could have already expired, in which
+-			 * case current would be flagged for rescheduling.
+-			 * Don't bother calling schedule.
+-			 */
+-			if (likely(t.task))
+-				schedule();
++	if (!bitset)
++		return -EINVAL;
+ 
+-			hrtimer_cancel(&t.timer);
++	q.pi_state = NULL;
++	q.bitset = bitset;
++	q.rt_waiter = NULL;
+ 
+-			/* Flag if a timeout occured */
+-			rem = (t.task == NULL);
++	if (abs_time) {
++		to = &timeout;
+ 
+-			destroy_hrtimer_on_stack(&t.timer);
+-		}
++		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
++				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
++		hrtimer_init_sleeper(to, current);
++		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
++					     current->timer_slack_ns);
+ 	}
+-	__set_current_state(TASK_RUNNING);
+ 
+-	/*
+-	 * NOTE: we don't remove ourselves from the waitqueue because
+-	 * we are the only user of it.
+-	 */
++	/* Prepare to wait on uaddr. */
++	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
++	if (ret)
++		goto out;
++
++	/* queue_me and wait for wakeup, timeout, or a signal. */
++	futex_wait_queue_me(hb, &q, to);
+ 
+ 	/* If we were woken (and unqueued), we succeeded, whatever. */
+ 	ret = 0;
+ 	if (!unqueue_me(&q))
+ 		goto out_put_key;
+ 	ret = -ETIMEDOUT;
+-	if (rem)
++	if (to && !to->task)
+ 		goto out_put_key;
+ 
+ 	/*
+@@ -1310,7 +1847,7 @@ retry:
+ 	restart->futex.val = val;
+ 	restart->futex.time = abs_time->tv64;
+ 	restart->futex.bitset = bitset;
+-	restart->futex.flags = 0;
++	restart->futex.flags = FLAGS_HAS_TIMEOUT;
+ 
+ 	if (fshared)
+ 		restart->futex.flags |= FLAGS_SHARED;
+@@ -1322,6 +1859,10 @@ retry:
+ out_put_key:
+ 	put_futex_key(fshared, &q.key);
+ out:
++	if (to) {
++		hrtimer_cancel(&to->timer);
++		destroy_hrtimer_on_stack(&to->timer);
++	}
+ 	return ret;
+ }
+ 
+@@ -1330,13 +1871,16 @@ static long futex_wait_restart(struct re
+ {
+ 	u32 __user *uaddr = (u32 __user *)restart->futex.uaddr;
+ 	int fshared = 0;
+-	ktime_t t;
++	ktime_t t, *tp = NULL;
+ 
+-	t.tv64 = restart->futex.time;
++	if (restart->futex.flags & FLAGS_HAS_TIMEOUT) {
++		t.tv64 = restart->futex.time;
++		tp = &t;
++	}
+ 	restart->fn = do_no_restart_syscall;
+ 	if (restart->futex.flags & FLAGS_SHARED)
+ 		fshared = 1;
+-	return (long)futex_wait(uaddr, fshared, restart->futex.val, &t,
++	return (long)futex_wait(uaddr, fshared, restart->futex.val, tp,
+ 				restart->futex.bitset,
+ 				restart->futex.flags & FLAGS_CLOCKRT);
+ }
+@@ -1352,11 +1896,9 @@ static int futex_lock_pi(u32 __user *uad
+ 			 int detect, ktime_t *time, int trylock)
+ {
+ 	struct hrtimer_sleeper timeout, *to = NULL;
+-	struct task_struct *curr = current;
+ 	struct futex_hash_bucket *hb;
+-	u32 uval, newval, curval;
+ 	struct futex_q q;
+-	int ret, lock_taken, ownerdied = 0, attempt = 0;
++	int res, ret;
+ 
+ 	if (refill_pi_state_cache())
+ 		return -ENOMEM;
+@@ -1370,117 +1912,34 @@ static int futex_lock_pi(u32 __user *uad
+ 	}
+ 
+ 	q.pi_state = NULL;
++	q.rt_waiter = NULL;
+ retry:
+ 	q.key = FUTEX_KEY_INIT;
+ 	ret = get_futex_key(uaddr, fshared, &q.key, VERIFY_WRITE);
+ 	if (unlikely(ret != 0))
+ 		goto out;
+ 
+-retry_unlocked:
++retry_private:
+ 	hb = queue_lock(&q);
+ 
+-retry_locked:
+-	ret = lock_taken = 0;
+-
+-	/*
+-	 * To avoid races, we attempt to take the lock here again
+-	 * (by doing a 0 -> TID atomic cmpxchg), while holding all
+-	 * the locks. It will most likely not succeed.
+-	 */
+-	newval = task_pid_vnr(current);
+-
+-	curval = cmpxchg_futex_value_locked(uaddr, 0, newval);
+-
+-	if (unlikely(curval == -EFAULT))
+-		goto uaddr_faulted;
+-
+-	/*
+-	 * Detect deadlocks. In case of REQUEUE_PI this is a valid
+-	 * situation and we return success to user space.
+-	 */
+-	if (unlikely((curval & FUTEX_TID_MASK) == task_pid_vnr(current))) {
+-		ret = -EDEADLK;
+-		goto out_unlock_put_key;
+-	}
+-
+-	/*
+-	 * Surprise - we got the lock. Just return to userspace:
+-	 */
+-	if (unlikely(!curval))
+-		goto out_unlock_put_key;
+-
+-	uval = curval;
+-
+-	/*
+-	 * Set the WAITERS flag, so the owner will know it has someone
+-	 * to wake at next unlock
+-	 */
+-	newval = curval | FUTEX_WAITERS;
+-
+-	/*
+-	 * There are two cases, where a futex might have no owner (the
+-	 * owner TID is 0): OWNER_DIED. We take over the futex in this
+-	 * case. We also do an unconditional take over, when the owner
+-	 * of the futex died.
+-	 *
+-	 * This is safe as we are protected by the hash bucket lock !
+-	 */
+-	if (unlikely(ownerdied || !(curval & FUTEX_TID_MASK))) {
+-		/* Keep the OWNER_DIED bit */
+-		newval = (curval & ~FUTEX_TID_MASK) | task_pid_vnr(current);
+-		ownerdied = 0;
+-		lock_taken = 1;
+-	}
+-
+-	curval = cmpxchg_futex_value_locked(uaddr, uval, newval);
+-
+-	if (unlikely(curval == -EFAULT))
+-		goto uaddr_faulted;
+-	if (unlikely(curval != uval))
+-		goto retry_locked;
+-
+-	/*
+-	 * We took the lock due to owner died take over.
+-	 */
+-	if (unlikely(lock_taken))
+-		goto out_unlock_put_key;
+-
+-	/*
+-	 * We dont have the lock. Look up the PI state (or create it if
+-	 * we are the first waiter):
+-	 */
+-	ret = lookup_pi_state(uval, hb, &q.key, &q.pi_state);
+-
++	ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0);
+ 	if (unlikely(ret)) {
+ 		switch (ret) {
+-
++		case 1:
++			/* We got the lock. */
++			ret = 0;
++			goto out_unlock_put_key;
++		case -EFAULT:
++			goto uaddr_faulted;
+ 		case -EAGAIN:
+ 			/*
+ 			 * Task is exiting and we just wait for the
+ 			 * exit to complete.
+ 			 */
+ 			queue_unlock(&q, hb);
++			put_futex_key(fshared, &q.key);
+ 			cond_resched();
+ 			goto retry;
+-
+-		case -ESRCH:
+-			/*
+-			 * No owner found for this futex. Check if the
+-			 * OWNER_DIED bit is set to figure out whether
+-			 * this is a robust futex or not.
+-			 */
+-			if (get_futex_value_locked(&curval, uaddr))
+-				goto uaddr_faulted;
+-
+-			/*
+-			 * We simply start over in case of a robust
+-			 * futex. The code above will take the futex
+-			 * and return happy.
+-			 */
+-			if (curval & FUTEX_OWNER_DIED) {
+-				ownerdied = 1;
+-				goto retry_locked;
+-			}
+ 		default:
+ 			goto out_unlock_put_key;
+ 		}
+@@ -1504,74 +1963,29 @@ retry_locked:
+ 	}
+ 
+ 	spin_lock(q.lock_ptr);
++	/*
++	 * Fixup the pi_state owner and possibly acquire the lock if we
++	 * haven't already.
++	 */
++	res = fixup_owner(uaddr, fshared, &q, !ret);
++	/*
++	 * If fixup_owner() returned an error, proprogate that.  If it acquired
++	 * the lock, clear our -ETIMEDOUT or -EINTR.
++	 */
++	if (res)
++		ret = (res < 0) ? res : 0;
+ 
+-	if (!ret) {
+-		/*
+-		 * Got the lock. We might not be the anticipated owner
+-		 * if we did a lock-steal - fix up the PI-state in
+-		 * that case:
+-		 */
+-		if (q.pi_state->owner != curr)
+-			ret = fixup_pi_state_owner(uaddr, &q, curr, fshared);
+-	} else {
+-		/*
+-		 * Catch the rare case, where the lock was released
+-		 * when we were on the way back before we locked the
+-		 * hash bucket.
+-		 */
+-		if (q.pi_state->owner == curr) {
+-			/*
+-			 * Try to get the rt_mutex now. This might
+-			 * fail as some other task acquired the
+-			 * rt_mutex after we removed ourself from the
+-			 * rt_mutex waiters list.
+-			 */
+-			if (rt_mutex_trylock(&q.pi_state->pi_mutex))
+-				ret = 0;
+-			else {
+-				/*
+-				 * pi_state is incorrect, some other
+-				 * task did a lock steal and we
+-				 * returned due to timeout or signal
+-				 * without taking the rt_mutex. Too
+-				 * late. We can access the
+-				 * rt_mutex_owner without locking, as
+-				 * the other task is now blocked on
+-				 * the hash bucket lock. Fix the state
+-				 * up.
+-				 */
+-				struct task_struct *owner;
+-				int res;
+-
+-				owner = rt_mutex_owner(&q.pi_state->pi_mutex);
+-				res = fixup_pi_state_owner(uaddr, &q, owner,
+-							   fshared);
+-
+-				/* propagate -EFAULT, if the fixup failed */
+-				if (res)
+-					ret = res;
+-			}
+-		} else {
+-			/*
+-			 * Paranoia check. If we did not take the lock
+-			 * in the trylock above, then we should not be
+-			 * the owner of the rtmutex, neither the real
+-			 * nor the pending one:
+-			 */
+-			if (rt_mutex_owner(&q.pi_state->pi_mutex) == curr)
+-				printk(KERN_ERR "futex_lock_pi: ret = %d "
+-				       "pi-mutex: %p pi-state %p\n", ret,
+-				       q.pi_state->pi_mutex.owner,
+-				       q.pi_state->owner);
+-		}
+-	}
++	/*
++	 * If fixup_owner() faulted and was unable to handle the fault, unlock
++	 * it and return the fault to userspace.
++	 */
++	if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current))
++		rt_mutex_unlock(&q.pi_state->pi_mutex);
+ 
+ 	/* Unqueue and drop the lock */
+ 	unqueue_me_pi(&q);
+ 
+-	if (to)
+-		destroy_hrtimer_on_stack(&to->timer);
+-	return ret != -EINTR ? ret : -ERESTARTNOINTR;
++	goto out;
+ 
+ out_unlock_put_key:
+ 	queue_unlock(&q, hb);
+@@ -1581,32 +1995,20 @@ out_put_key:
+ out:
+ 	if (to)
+ 		destroy_hrtimer_on_stack(&to->timer);
+-	return ret;
++	return ret != -EINTR ? ret : -ERESTARTNOINTR;
+ 
+ uaddr_faulted:
+-	/*
+-	 * We have to r/w  *(int __user *)uaddr, and we have to modify it
+-	 * atomically.  Therefore, if we continue to fault after get_user()
+-	 * below, we need to handle the fault ourselves, while still holding
+-	 * the mmap_sem.  This can occur if the uaddr is under contention as
+-	 * we have to drop the mmap_sem in order to call get_user().
+-	 */
+ 	queue_unlock(&q, hb);
+ 
+-	if (attempt++) {
+-		ret = futex_handle_fault((unsigned long)uaddr, attempt);
+-		if (ret)
+-			goto out_put_key;
+-		goto retry_unlocked;
+-	}
++	ret = get_user_writeable(uaddr);
++	if (ret)
++		goto out_put_key;
+ 
+-	ret = get_user(uval, uaddr);
+-	if (!ret)
+-		goto retry;
++	if (!fshared)
++		goto retry_private;
+ 
+-	if (to)
+-		destroy_hrtimer_on_stack(&to->timer);
+-	return ret;
++	put_futex_key(fshared, &q.key);
++	goto retry;
+ }
+ 
+ /*
+@@ -1621,7 +2023,7 @@ static int futex_unlock_pi(u32 __user *u
+ 	u32 uval;
+ 	struct plist_head *head;
+ 	union futex_key key = FUTEX_KEY_INIT;
+-	int ret, attempt = 0;
++	int ret;
+ 
+ retry:
+ 	if (get_user(uval, uaddr))
+@@ -1637,7 +2039,6 @@ retry:
+ 		goto out;
+ 
+ 	hb = hash_futex(&key);
+-retry_unlocked:
+ 	spin_lock(&hb->lock);
+ 
+ 	/*
+@@ -1694,27 +2095,236 @@ out:
+ 	return ret;
+ 
+ pi_faulted:
++	spin_unlock(&hb->lock);
++	put_futex_key(fshared, &key);
++
++	ret = get_user_writeable(uaddr);
++	if (!ret)
++		goto retry;
++
++	return ret;
++}
++
++/**
++ * handle_early_requeue_pi_wakeup() - Detect early wakeup on the initial futex
++ * @hb:		the hash_bucket futex_q was original enqueued on
++ * @q:		the futex_q woken while waiting to be requeued
++ * @key2:	the futex_key of the requeue target futex
++ * @timeout:	the timeout associated with the wait (NULL if none)
++ *
++ * Detect if the task was woken on the initial futex as opposed to the requeue
++ * target futex.  If so, determine if it was a timeout or a signal that caused
++ * the wakeup and return the appropriate error code to the caller.  Must be
++ * called with the hb lock held.
++ *
++ * Returns
++ *  0 - no early wakeup detected
++ * <0 - -ETIMEDOUT or -ERESTARTNOINTR
++ */
++static inline
++int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
++				   struct futex_q *q, union futex_key *key2,
++				   struct hrtimer_sleeper *timeout)
++{
++	int ret = 0;
++
++	/*
++	 * With the hb lock held, we avoid races while we process the wakeup.
++	 * We only need to hold hb (and not hb2) to ensure atomicity as the
++	 * wakeup code can't change q.key from uaddr to uaddr2 if we hold hb.
++	 * It can't be requeued from uaddr2 to something else since we don't
++	 * support a PI aware source futex for requeue.
++	 */
++	if (!match_futex(&q->key, key2)) {
++		WARN_ON(q->lock_ptr && (&hb->lock != q->lock_ptr));
++		/*
++		 * We were woken prior to requeue by a timeout or a signal.
++		 * Unqueue the futex_q and determine which it was.
++		 */
++		plist_del(&q->list, &q->list.plist);
++		drop_futex_key_refs(&q->key);
++
++		if (timeout && !timeout->task)
++			ret = -ETIMEDOUT;
++		else
++			ret = -ERESTARTNOINTR;
++	}
++	return ret;
++}
++
++/**
++ * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2
++ * @uaddr:	the futex we initialyl wait on (non-pi)
++ * @fshared:	whether the futexes are shared (1) or not (0).  They must be
++ * 		the same type, no requeueing from private to shared, etc.
++ * @val:	the expected value of uaddr
++ * @abs_time:	absolute timeout
++ * @bitset:	32 bit wakeup bitset set by userspace, defaults to all.
++ * @clockrt:	whether to use CLOCK_REALTIME (1) or CLOCK_MONOTONIC (0)
++ * @uaddr2:	the pi futex we will take prior to returning to user-space
++ *
++ * The caller will wait on uaddr and will be requeued by futex_requeue() to
++ * uaddr2 which must be PI aware.  Normal wakeup will wake on uaddr2 and
++ * complete the acquisition of the rt_mutex prior to returning to userspace.
++ * This ensures the rt_mutex maintains an owner when it has waiters; without
++ * one, the pi logic wouldn't know which task to boost/deboost, if there was a
++ * need to.
++ *
++ * We call schedule in futex_wait_queue_me() when we enqueue and return there
++ * via the following:
++ * 1) wakeup on uaddr2 after an atomic lock acquisition by futex_requeue()
++ * 2) wakeup on uaddr2 after a requeue and subsequent unlock
++ * 3) signal (before or after requeue)
++ * 4) timeout (before or after requeue)
++ *
++ * If 3, we setup a restart_block with futex_wait_requeue_pi() as the function.
++ *
++ * If 2, we may then block on trying to take the rt_mutex and return via:
++ * 5) successful lock
++ * 6) signal
++ * 7) timeout
++ * 8) other lock acquisition failure
++ *
++ * If 6, we setup a restart_block with futex_lock_pi() as the function.
++ *
++ * If 4 or 7, we cleanup and return with -ETIMEDOUT.
++ *
++ * Returns:
++ *  0 - On success
++ * <0 - On error
++ */
++static int futex_wait_requeue_pi(u32 __user *uaddr, int fshared,
++				 u32 val, ktime_t *abs_time, u32 bitset,
++				 int clockrt, u32 __user *uaddr2)
++{
++	struct hrtimer_sleeper timeout, *to = NULL;
++	struct rt_mutex_waiter rt_waiter;
++	struct rt_mutex *pi_mutex = NULL;
++	struct futex_hash_bucket *hb;
++	union futex_key key2;
++	struct futex_q q;
++	int res, ret;
++
++	if (!bitset)
++		return -EINVAL;
++
++	if (abs_time) {
++		to = &timeout;
++		hrtimer_init_on_stack(&to->timer, clockrt ? CLOCK_REALTIME :
++				      CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
++		hrtimer_init_sleeper(to, current);
++		hrtimer_set_expires_range_ns(&to->timer, *abs_time,
++					     current->timer_slack_ns);
++	}
++
+ 	/*
+-	 * We have to r/w  *(int __user *)uaddr, and we have to modify it
+-	 * atomically.  Therefore, if we continue to fault after get_user()
+-	 * below, we need to handle the fault ourselves, while still holding
+-	 * the mmap_sem.  This can occur if the uaddr is under contention as
+-	 * we have to drop the mmap_sem in order to call get_user().
++	 * The waiter is allocated on our stack, manipulated by the requeue
++	 * code while we sleep on uaddr.
+ 	 */
++	debug_rt_mutex_init_waiter(&rt_waiter);
++	rt_waiter.task = NULL;
++
++	q.pi_state = NULL;
++	q.bitset = bitset;
++	q.rt_waiter = &rt_waiter;
++
++	key2 = FUTEX_KEY_INIT;
++	ret = get_futex_key(uaddr2, fshared, &key2, VERIFY_WRITE);
++	if (unlikely(ret != 0))
++		goto out;
++
++	/* Prepare to wait on uaddr. */
++	ret = futex_wait_setup(uaddr, val, fshared, &q, &hb);
++	if (ret)
++		goto out_key2;
++
++	/* Queue the futex_q, drop the hb lock, wait for wakeup. */
++	futex_wait_queue_me(hb, &q, to);
++
++	spin_lock(&hb->lock);
++	ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
+ 	spin_unlock(&hb->lock);
++	if (ret)
++		goto out_put_keys;
+ 
+-	if (attempt++) {
+-		ret = futex_handle_fault((unsigned long)uaddr, attempt);
+-		if (ret)
+-			goto out;
+-		uval = 0;
+-		goto retry_unlocked;
++	/*
++	 * In order for us to be here, we know our q.key == key2, and since
++	 * we took the hb->lock above, we also know that futex_requeue() has
++	 * completed and we no longer have to concern ourselves with a wakeup
++	 * race with the atomic proxy lock acquition by the requeue code.
++	 */
++
++	/* Check if the requeue code acquired the second futex for us. */
++	if (!q.rt_waiter) {
++		/*
++		 * Got the lock. We might not be the anticipated owner if we
++		 * did a lock-steal - fix up the PI-state in that case.
++		 */
++		if (q.pi_state && (q.pi_state->owner != current)) {
++			spin_lock(q.lock_ptr);
++			ret = fixup_pi_state_owner(uaddr2, &q, current,
++						   fshared);
++			spin_unlock(q.lock_ptr);
++		}
++	} else {
++		/*
++		 * We have been woken up by futex_unlock_pi(), a timeout, or a
++		 * signal.  futex_unlock_pi() will not destroy the lock_ptr nor
++		 * the pi_state.
++		 */
++		WARN_ON(!&q.pi_state);
++		pi_mutex = &q.pi_state->pi_mutex;
++		ret = rt_mutex_finish_proxy_lock(pi_mutex, to, &rt_waiter, 1);
++		debug_rt_mutex_free_waiter(&rt_waiter);
++
++		spin_lock(q.lock_ptr);
++		/*
++		 * Fixup the pi_state owner and possibly acquire the lock if we
++		 * haven't already.
++		 */
++		res = fixup_owner(uaddr2, fshared, &q, !ret);
++		/*
++		 * If fixup_owner() returned an error, proprogate that.  If it
++		 * acquired the lock, clear our -ETIMEDOUT or -EINTR.
++		 */
++		if (res)
++			ret = (res < 0) ? res : 0;
++
++		/* Unqueue and drop the lock. */
++		unqueue_me_pi(&q);
+ 	}
+ 
+-	ret = get_user(uval, uaddr);
+-	if (!ret)
+-		goto retry;
++	/*
++	 * If fixup_pi_state_owner() faulted and was unable to handle the
++	 * fault, unlock the rt_mutex and return the fault to userspace.
++	 */
++	if (ret == -EFAULT) {
++		if (rt_mutex_owner(pi_mutex) == current)
++			rt_mutex_unlock(pi_mutex);
++	} else if (ret == -EINTR) {
++		/*
++		 * We've already been requeued, but we have no way to
++		 * restart by calling futex_lock_pi() directly. We
++		 * could restart the syscall, but that will look at
++		 * the user space value and return right away. So we
++		 * drop back with EWOULDBLOCK to tell user space that
++		 * "val" has been changed. That's the same what the
++		 * restart of the syscall would do in
++		 * futex_wait_setup().
++		 */
++		ret = -EWOULDBLOCK;
++	}
++
++out_put_keys:
++	put_futex_key(fshared, &q.key);
++out_key2:
++	put_futex_key(fshared, &key2);
+ 
++out:
++	if (to) {
++		hrtimer_cancel(&to->timer);
++		destroy_hrtimer_on_stack(&to->timer);
++	}
+ 	return ret;
+ }
+ 
+@@ -1940,7 +2550,7 @@ long do_futex(u32 __user *uaddr, int op,
+ 		fshared = 1;
+ 
+ 	clockrt = op & FUTEX_CLOCK_REALTIME;
+-	if (clockrt && cmd != FUTEX_WAIT_BITSET)
++	if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
+ 		return -ENOSYS;
+ 
+ 	switch (cmd) {
+@@ -1955,10 +2565,11 @@ long do_futex(u32 __user *uaddr, int op,
+ 		ret = futex_wake(uaddr, fshared, val, val3);
+ 		break;
+ 	case FUTEX_REQUEUE:
+-		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
++		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
+ 		break;
+ 	case FUTEX_CMP_REQUEUE:
+-		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3);
++		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
++				    0);
+ 		break;
+ 	case FUTEX_WAKE_OP:
+ 		ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
+@@ -1975,6 +2586,15 @@ long do_futex(u32 __user *uaddr, int op,
+ 		if (futex_cmpxchg_enabled)
+ 			ret = futex_lock_pi(uaddr, fshared, 0, timeout, 1);
+ 		break;
++	case FUTEX_WAIT_REQUEUE_PI:
++		val3 = FUTEX_BITSET_MATCH_ANY;
++		ret = futex_wait_requeue_pi(uaddr, fshared, val, timeout, val3,
++					    clockrt, uaddr2);
++		break;
++	case FUTEX_CMP_REQUEUE_PI:
++		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
++				    1);
++		break;
+ 	default:
+ 		ret = -ENOSYS;
+ 	}
+@@ -1992,7 +2612,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uad
+ 	int cmd = op & FUTEX_CMD_MASK;
+ 
+ 	if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
+-		      cmd == FUTEX_WAIT_BITSET)) {
++		      cmd == FUTEX_WAIT_BITSET ||
++		      cmd == FUTEX_WAIT_REQUEUE_PI)) {
+ 		if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
+ 			return -EFAULT;
+ 		if (!timespec_valid(&ts))
+@@ -2004,11 +2625,11 @@ SYSCALL_DEFINE6(futex, u32 __user *, uad
+ 		tp = &t;
+ 	}
+ 	/*
+-	 * requeue parameter in 'utime' if cmd == FUTEX_REQUEUE.
++	 * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
+ 	 * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
+ 	 */
+ 	if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
+-	    cmd == FUTEX_WAKE_OP)
++	    cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
+ 		val2 = (u32) (unsigned long) utime;
+ 
+ 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+@@ -2034,7 +2655,11 @@ static int __init futex_init(void)
+ 		futex_cmpxchg_enabled = 1;
+ 
+ 	for (i = 0; i < ARRAY_SIZE(futex_queues); i++) {
++#ifdef CONFIG_PREEMPT_RT
++		plist_head_init(&futex_queues[i].chain, NULL);
++#else
+ 		plist_head_init(&futex_queues[i].chain, &futex_queues[i].lock);
++#endif
+ 		spin_lock_init(&futex_queues[i].lock);
+ 	}
+ 
+Index: linux-2.6-tip/kernel/hung_task.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/hung_task.c
+@@ -0,0 +1,217 @@
++/*
++ * Detect Hung Task
++ *
++ * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
++ *
++ */
++
++#include <linux/mm.h>
++#include <linux/cpu.h>
++#include <linux/nmi.h>
++#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/freezer.h>
++#include <linux/kthread.h>
++#include <linux/lockdep.h>
++#include <linux/module.h>
++#include <linux/sysctl.h>
++
++/*
++ * The number of tasks checked:
++ */
++unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
++
++/*
++ * Limit number of tasks checked in a batch.
++ *
++ * This value controls the preemptibility of khungtaskd since preemption
++ * is disabled during the critical section. It also controls the size of
++ * the RCU grace period. So it needs to be upper-bound.
++ */
++#define HUNG_TASK_BATCHING 1024
++
++/*
++ * Zero means infinite timeout - no checking done:
++ */
++unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120;
++
++unsigned long __read_mostly sysctl_hung_task_warnings = 10;
++
++static int __read_mostly did_panic;
++
++static struct task_struct *watchdog_task;
++
++/*
++ * Should we panic (and reboot, if panic_timeout= is set) when a
++ * hung task is detected:
++ */
++unsigned int __read_mostly sysctl_hung_task_panic =
++				CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
++
++static int __init hung_task_panic_setup(char *str)
++{
++	sysctl_hung_task_panic = simple_strtoul(str, NULL, 0);
++
++	return 1;
++}
++__setup("hung_task_panic=", hung_task_panic_setup);
++
++static int
++hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
++{
++	did_panic = 1;
++
++	return NOTIFY_DONE;
++}
++
++static struct notifier_block panic_block = {
++	.notifier_call = hung_task_panic,
++};
++
++static void check_hung_task(struct task_struct *t, unsigned long timeout)
++{
++	unsigned long switch_count = t->nvcsw + t->nivcsw;
++
++	/*
++	 * Ensure the task is not frozen.
++	 * Also, when a freshly created task is scheduled once, changes
++	 * its state to TASK_UNINTERRUPTIBLE without having ever been
++	 * switched out once, it musn't be checked.
++	 */
++	if (unlikely(t->flags & PF_FROZEN || !switch_count))
++		return;
++
++	if (switch_count != t->last_switch_count) {
++		t->last_switch_count = switch_count;
++		return;
++	}
++	if (!sysctl_hung_task_warnings)
++		return;
++	sysctl_hung_task_warnings--;
++
++	/*
++	 * Ok, the task did not get scheduled for more than 2 minutes,
++	 * complain:
++	 */
++	printk(KERN_ERR "INFO: task %s:%d blocked for more than "
++			"%ld seconds.\n", t->comm, t->pid, timeout);
++	printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
++			" disables this message.\n");
++	sched_show_task(t);
++	__debug_show_held_locks(t);
++
++	touch_nmi_watchdog();
++
++	if (sysctl_hung_task_panic)
++		panic("hung_task: blocked tasks");
++}
++
++/*
++ * To avoid extending the RCU grace period for an unbounded amount of time,
++ * periodically exit the critical section and enter a new one.
++ *
++ * For preemptible RCU it is sufficient to call rcu_read_unlock in order
++ * exit the grace period. For classic RCU, a reschedule is required.
++ */
++static void rcu_lock_break(struct task_struct *g, struct task_struct *t)
++{
++	get_task_struct(g);
++	get_task_struct(t);
++	rcu_read_unlock();
++	cond_resched();
++	rcu_read_lock();
++	put_task_struct(t);
++	put_task_struct(g);
++}
++
++/*
++ * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
++ * a really long time (120 seconds). If that happens, print out
++ * a warning.
++ */
++static void check_hung_uninterruptible_tasks(unsigned long timeout)
++{
++	int max_count = sysctl_hung_task_check_count;
++	int batch_count = HUNG_TASK_BATCHING;
++	struct task_struct *g, *t;
++
++	/*
++	 * If the system crashed already then all bets are off,
++	 * do not report extra hung tasks:
++	 */
++	if (test_taint(TAINT_DIE) || did_panic)
++		return;
++
++	rcu_read_lock();
++	do_each_thread(g, t) {
++		if (!--max_count)
++			goto unlock;
++		if (!--batch_count) {
++			batch_count = HUNG_TASK_BATCHING;
++			rcu_lock_break(g, t);
++			/* Exit if t or g was unhashed during refresh. */
++			if (t->state == TASK_DEAD || g->state == TASK_DEAD)
++				goto unlock;
++		}
++		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
++		if (t->state == TASK_UNINTERRUPTIBLE)
++			check_hung_task(t, timeout);
++	} while_each_thread(g, t);
++ unlock:
++	rcu_read_unlock();
++}
++
++static unsigned long timeout_jiffies(unsigned long timeout)
++{
++	/* timeout of 0 will disable the watchdog */
++	return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
++}
++
++/*
++ * Process updating of timeout sysctl
++ */
++int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
++				  struct file *filp, void __user *buffer,
++				  size_t *lenp, loff_t *ppos)
++{
++	int ret;
++
++	ret = proc_doulongvec_minmax(table, write, filp, buffer, lenp, ppos);
++
++	if (ret || !write)
++		goto out;
++
++	wake_up_process(watchdog_task);
++
++ out:
++	return ret;
++}
++
++/*
++ * kthread which checks for tasks stuck in D state
++ */
++static int watchdog(void *dummy)
++{
++	set_user_nice(current, 0);
++
++	for ( ; ; ) {
++		unsigned long timeout = sysctl_hung_task_timeout_secs;
++
++		while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
++			timeout = sysctl_hung_task_timeout_secs;
++
++		check_hung_uninterruptible_tasks(timeout);
++	}
++
++	return 0;
++}
++
++static int __init hung_task_init(void)
++{
++	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
++	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
++
++	return 0;
++}
++
++module_init(hung_task_init);
+Index: linux-2.6-tip/kernel/irq/chip.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/irq/chip.c
++++ linux-2.6-tip/kernel/irq/chip.c
+@@ -46,7 +46,10 @@ void dynamic_irq_init(unsigned int irq)
+ 	desc->irq_count = 0;
+ 	desc->irqs_unhandled = 0;
+ #ifdef CONFIG_SMP
+-	cpumask_setall(&desc->affinity);
++	cpumask_setall(desc->affinity);
++#ifdef CONFIG_GENERIC_PENDING_IRQ
++	cpumask_clear(desc->pending_mask);
++#endif
+ #endif
+ 	spin_unlock_irqrestore(&desc->lock, flags);
+ }
+@@ -78,6 +81,7 @@ void dynamic_irq_cleanup(unsigned int ir
+ 	desc->handle_irq = handle_bad_irq;
+ 	desc->chip = &no_irq_chip;
+ 	desc->name = NULL;
++	clear_kstat_irqs(desc);
+ 	spin_unlock_irqrestore(&desc->lock, flags);
+ }
+ 
+@@ -289,8 +293,10 @@ static inline void mask_ack_irq(struct i
+ 	if (desc->chip->mask_ack)
+ 		desc->chip->mask_ack(irq);
+ 	else {
+-		desc->chip->mask(irq);
+-		desc->chip->ack(irq);
++		if (desc->chip->mask)
++			desc->chip->mask(irq);
++		if (desc->chip->ack)
++			desc->chip->ack(irq);
+ 	}
+ }
+ 
+@@ -314,8 +320,10 @@ handle_simple_irq(unsigned int irq, stru
+ 
+ 	spin_lock(&desc->lock);
+ 
+-	if (unlikely(desc->status & IRQ_INPROGRESS))
++	if (unlikely(desc->status & IRQ_INPROGRESS)) {
++		desc->status |= IRQ_PENDING;
+ 		goto out_unlock;
++	}
+ 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ 	kstat_incr_irqs_this_cpu(irq, desc);
+ 
+@@ -324,6 +332,11 @@ handle_simple_irq(unsigned int irq, stru
+ 		goto out_unlock;
+ 
+ 	desc->status |= IRQ_INPROGRESS;
++	/*
++	 * hardirq redirection to the irqd process context:
++	 */
++	if (redirect_hardirq(desc))
++		goto out_unlock;
+ 	spin_unlock(&desc->lock);
+ 
+ 	action_ret = handle_IRQ_event(irq, action);
+@@ -332,6 +345,8 @@ handle_simple_irq(unsigned int irq, stru
+ 
+ 	spin_lock(&desc->lock);
+ 	desc->status &= ~IRQ_INPROGRESS;
++	if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
++		desc->chip->unmask(irq);
+ out_unlock:
+ 	spin_unlock(&desc->lock);
+ }
+@@ -370,6 +385,13 @@ handle_level_irq(unsigned int irq, struc
+ 		goto out_unlock;
+ 
+ 	desc->status |= IRQ_INPROGRESS;
++
++	/*
++	 * hardirq redirection to the irqd process context:
++	 */
++	if (redirect_hardirq(desc))
++		goto out_unlock;
++
+ 	spin_unlock(&desc->lock);
+ 
+ 	action_ret = handle_IRQ_event(irq, action);
+@@ -403,18 +425,16 @@ handle_fasteoi_irq(unsigned int irq, str
+ 
+ 	spin_lock(&desc->lock);
+ 
+-	if (unlikely(desc->status & IRQ_INPROGRESS))
+-		goto out;
+-
+ 	desc->status &= ~(IRQ_REPLAY | IRQ_WAITING);
+ 	kstat_incr_irqs_this_cpu(irq, desc);
+ 
+ 	/*
+-	 * If its disabled or no action available
++	 * If it's running, disabled or no action available
+ 	 * then mask it and get out of here:
+ 	 */
+ 	action = desc->action;
+-	if (unlikely(!action || (desc->status & IRQ_DISABLED))) {
++	if (unlikely(!action || (desc->status & (IRQ_INPROGRESS |
++						 IRQ_DISABLED)))) {
+ 		desc->status |= IRQ_PENDING;
+ 		if (desc->chip->mask)
+ 			desc->chip->mask(irq);
+@@ -422,6 +442,15 @@ handle_fasteoi_irq(unsigned int irq, str
+ 	}
+ 
+ 	desc->status |= IRQ_INPROGRESS;
++	/*
++	 * In the threaded case we fall back to a mask+eoi sequence:
++	 */
++	if (redirect_hardirq(desc)) {
++		if (desc->chip->mask)
++			desc->chip->mask(irq);
++		goto out;
++	}
++
+ 	desc->status &= ~IRQ_PENDING;
+ 	spin_unlock(&desc->lock);
+ 
+@@ -431,10 +460,11 @@ handle_fasteoi_irq(unsigned int irq, str
+ 
+ 	spin_lock(&desc->lock);
+ 	desc->status &= ~IRQ_INPROGRESS;
++	if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
++		desc->chip->unmask(irq);
+ out:
+ 	desc->chip->eoi(irq);
+ 	desc = irq_remap_to_desc(irq, desc);
+-
+ 	spin_unlock(&desc->lock);
+ }
+ 
+@@ -476,12 +506,19 @@ handle_edge_irq(unsigned int irq, struct
+ 	kstat_incr_irqs_this_cpu(irq, desc);
+ 
+ 	/* Start handling the irq */
+-	desc->chip->ack(irq);
++	if (desc->chip->ack)
++		desc->chip->ack(irq);
+ 	desc = irq_remap_to_desc(irq, desc);
+ 
+ 	/* Mark the IRQ currently in progress.*/
+ 	desc->status |= IRQ_INPROGRESS;
+ 
++	/*
++	 * hardirq redirection to the irqd process context:
++	 */
++	if (redirect_hardirq(desc))
++		goto out_unlock;
++
+ 	do {
+ 		struct irqaction *action = desc->action;
+ 		irqreturn_t action_ret;
+Index: linux-2.6-tip/kernel/irq/handle.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/irq/handle.c
++++ linux-2.6-tip/kernel/irq/handle.c
+@@ -13,10 +13,13 @@
+ #include <linux/irq.h>
+ #include <linux/module.h>
+ #include <linux/random.h>
++#include <linux/kallsyms.h>
+ #include <linux/interrupt.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/rculist.h>
+ #include <linux/hash.h>
++#include <trace/irq.h>
++#include <linux/bootmem.h>
+ 
+ #include "internals.h"
+ 
+@@ -69,33 +72,33 @@ int nr_irqs = NR_IRQS;
+ EXPORT_SYMBOL_GPL(nr_irqs);
+ 
+ #ifdef CONFIG_SPARSE_IRQ
++
+ static struct irq_desc irq_desc_init = {
+ 	.irq	    = -1,
+ 	.status	    = IRQ_DISABLED,
+ 	.chip	    = &no_irq_chip,
+ 	.handle_irq = handle_bad_irq,
+ 	.depth      = 1,
+-	.lock       = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+-#ifdef CONFIG_SMP
+-	.affinity   = CPU_MASK_ALL
+-#endif
++	.lock       = RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+ };
+ 
+ void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr)
+ {
+-	unsigned long bytes;
+-	char *ptr;
+ 	int node;
+-
+-	/* Compute how many bytes we need per irq and allocate them */
+-	bytes = nr * sizeof(unsigned int);
++	void *ptr;
+ 
+ 	node = cpu_to_node(cpu);
+-	ptr = kzalloc_node(bytes, GFP_ATOMIC, node);
+-	printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n", cpu, node);
++	ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), GFP_ATOMIC, node);
+ 
+-	if (ptr)
+-		desc->kstat_irqs = (unsigned int *)ptr;
++	/*
++	 * don't overwite if can not get new one
++	 * init_copy_kstat_irqs() could still use old one
++	 */
++	if (ptr) {
++		printk(KERN_DEBUG "  alloc kstat_irqs on cpu %d node %d\n",
++			 cpu, node);
++		desc->kstat_irqs = ptr;
++	}
+ }
+ 
+ static void init_one_irq_desc(int irq, struct irq_desc *desc, int cpu)
+@@ -103,6 +106,7 @@ static void init_one_irq_desc(int irq, s
+ 	memcpy(desc, &irq_desc_init, sizeof(struct irq_desc));
+ 
+ 	spin_lock_init(&desc->lock);
++	init_waitqueue_head(&desc->wait_for_handler);
+ 	desc->irq = irq;
+ #ifdef CONFIG_SMP
+ 	desc->cpu = cpu;
+@@ -113,6 +117,10 @@ static void init_one_irq_desc(int irq, s
+ 		printk(KERN_ERR "can not alloc kstat_irqs\n");
+ 		BUG_ON(1);
+ 	}
++	if (!init_alloc_desc_masks(desc, cpu, false)) {
++		printk(KERN_ERR "can not alloc irq_desc cpumasks\n");
++		BUG_ON(1);
++	}
+ 	arch_init_chip_data(desc, cpu);
+ }
+ 
+@@ -121,7 +129,7 @@ static void init_one_irq_desc(int irq, s
+  */
+ DEFINE_SPINLOCK(sparse_irq_lock);
+ 
+-struct irq_desc *irq_desc_ptrs[NR_IRQS] __read_mostly;
++struct irq_desc **irq_desc_ptrs __read_mostly;
+ 
+ static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = {
+ 	[0 ... NR_IRQS_LEGACY-1] = {
+@@ -130,15 +138,11 @@ static struct irq_desc irq_desc_legacy[N
+ 		.chip	    = &no_irq_chip,
+ 		.handle_irq = handle_bad_irq,
+ 		.depth	    = 1,
+-		.lock	    = __SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+-#ifdef CONFIG_SMP
+-		.affinity   = CPU_MASK_ALL
+-#endif
++		.lock	    = RAW_SPIN_LOCK_UNLOCKED(irq_desc_init.lock),
+ 	}
+ };
+ 
+-/* FIXME: use bootmem alloc ...*/
+-static unsigned int kstat_irqs_legacy[NR_IRQS_LEGACY][NR_CPUS];
++static unsigned int *kstat_irqs_legacy;
+ 
+ int __init early_irq_init(void)
+ {
+@@ -148,18 +152,30 @@ int __init early_irq_init(void)
+ 
+ 	init_irq_default_affinity();
+ 
++	 /* initialize nr_irqs based on nr_cpu_ids */
++	arch_probe_nr_irqs();
++	printk(KERN_INFO "NR_IRQS:%d nr_irqs:%d\n", NR_IRQS, nr_irqs);
++
+ 	desc = irq_desc_legacy;
+ 	legacy_count = ARRAY_SIZE(irq_desc_legacy);
+ 
++	/* allocate irq_desc_ptrs array based on nr_irqs */
++	irq_desc_ptrs = alloc_bootmem(nr_irqs * sizeof(void *));
++
++	/* allocate based on nr_cpu_ids */
++	/* FIXME: invert kstat_irgs, and it'd be a per_cpu_alloc'd thing */
++	kstat_irqs_legacy = alloc_bootmem(NR_IRQS_LEGACY * nr_cpu_ids *
++					  sizeof(int));
++
+ 	for (i = 0; i < legacy_count; i++) {
+ 		desc[i].irq = i;
+-		desc[i].kstat_irqs = kstat_irqs_legacy[i];
++		desc[i].kstat_irqs = kstat_irqs_legacy + i * nr_cpu_ids;
+ 		lockdep_set_class(&desc[i].lock, &irq_desc_lock_class);
+-
++		init_alloc_desc_masks(&desc[i], 0, true);
+ 		irq_desc_ptrs[i] = desc + i;
+ 	}
+ 
+-	for (i = legacy_count; i < NR_IRQS; i++)
++	for (i = legacy_count; i < nr_irqs; i++)
+ 		irq_desc_ptrs[i] = NULL;
+ 
+ 	return arch_early_irq_init();
+@@ -167,7 +183,10 @@ int __init early_irq_init(void)
+ 
+ struct irq_desc *irq_to_desc(unsigned int irq)
+ {
+-	return (irq < NR_IRQS) ? irq_desc_ptrs[irq] : NULL;
++	if (irq_desc_ptrs && irq < nr_irqs)
++		return irq_desc_ptrs[irq];
++
++	return NULL;
+ }
+ 
+ struct irq_desc *irq_to_desc_alloc_cpu(unsigned int irq, int cpu)
+@@ -176,10 +195,9 @@ struct irq_desc *irq_to_desc_alloc_cpu(u
+ 	unsigned long flags;
+ 	int node;
+ 
+-	if (irq >= NR_IRQS) {
+-		printk(KERN_WARNING "irq >= NR_IRQS in irq_to_desc_alloc: %d %d\n",
+-				irq, NR_IRQS);
+-		WARN_ON(1);
++	if (irq >= nr_irqs) {
++		WARN(1, "irq (%d) >= nr_irqs (%d) in irq_to_desc_alloc\n",
++			irq, nr_irqs);
+ 		return NULL;
+ 	}
+ 
+@@ -220,13 +238,11 @@ struct irq_desc irq_desc[NR_IRQS] __cach
+ 		.chip = &no_irq_chip,
+ 		.handle_irq = handle_bad_irq,
+ 		.depth = 1,
+-		.lock = __SPIN_LOCK_UNLOCKED(irq_desc->lock),
+-#ifdef CONFIG_SMP
+-		.affinity = CPU_MASK_ALL
+-#endif
++		.lock = RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
+ 	}
+ };
+ 
++static unsigned int kstat_irqs_all[NR_IRQS][NR_CPUS];
+ int __init early_irq_init(void)
+ {
+ 	struct irq_desc *desc;
+@@ -235,12 +251,16 @@ int __init early_irq_init(void)
+ 
+ 	init_irq_default_affinity();
+ 
++	printk(KERN_INFO "NR_IRQS:%d\n", NR_IRQS);
++
+ 	desc = irq_desc;
+ 	count = ARRAY_SIZE(irq_desc);
+ 
+-	for (i = 0; i < count; i++)
++	for (i = 0; i < count; i++) {
+ 		desc[i].irq = i;
+-
++		init_alloc_desc_masks(&desc[i], 0, true);
++		desc[i].kstat_irqs = kstat_irqs_all[i];
++	}
+ 	return arch_early_irq_init();
+ }
+ 
+@@ -255,6 +275,11 @@ struct irq_desc *irq_to_desc_alloc_cpu(u
+ }
+ #endif /* !CONFIG_SPARSE_IRQ */
+ 
++void clear_kstat_irqs(struct irq_desc *desc)
++{
++	memset(desc->kstat_irqs, 0, nr_cpu_ids * sizeof(*(desc->kstat_irqs)));
++}
++
+ /*
+  * What should we do if we get a hw irq event on an illegal vector?
+  * Each architecture has to answer this themself.
+@@ -316,6 +341,9 @@ irqreturn_t no_action(int cpl, void *dev
+ 	return IRQ_NONE;
+ }
+ 
++DEFINE_TRACE(irq_handler_entry);
++DEFINE_TRACE(irq_handler_exit);
++
+ /**
+  * handle_IRQ_event - irq action chain handler
+  * @irq:	the interrupt number
+@@ -328,25 +356,98 @@ irqreturn_t handle_IRQ_event(unsigned in
+ 	irqreturn_t ret, retval = IRQ_NONE;
+ 	unsigned int status = 0;
+ 
+-	if (!(action->flags & IRQF_DISABLED))
+-		local_irq_enable_in_hardirq();
++#ifdef __i386__
++	if (debug_direct_keyboard && irq == 1)
++		lockdep_off();
++#endif
++
++	/*
++	 * Unconditionally enable interrupts for threaded
++	 * IRQ handlers:
++	 */
++	if (!hardirq_count() || !(action->flags & IRQF_DISABLED))
++		local_irq_enable();
+ 
+ 	do {
++		unsigned int preempt_count = preempt_count();
++
++		trace_irq_handler_entry(irq, action);
+ 		ret = action->handler(irq, action->dev_id);
++		trace_irq_handler_exit(irq, action, ret);
++
++		if (preempt_count() != preempt_count) {
++			print_symbol("BUG: unbalanced irq-handler preempt count"
++				     " in %s!\n",
++				     (unsigned long) action->handler);
++			printk("entered with %08x, exited with %08x.\n",
++			       preempt_count, preempt_count());
++			dump_stack();
++			preempt_count() = preempt_count;
++		}
++
+ 		if (ret == IRQ_HANDLED)
+ 			status |= action->flags;
+ 		retval |= ret;
+ 		action = action->next;
+ 	} while (action);
+ 
+-	if (status & IRQF_SAMPLE_RANDOM)
++	if (status & IRQF_SAMPLE_RANDOM) {
++		local_irq_enable();
+ 		add_interrupt_randomness(irq);
++	}
+ 	local_irq_disable();
+ 
++#ifdef __i386__
++	if (debug_direct_keyboard && irq == 1)
++		lockdep_on();
++#endif
+ 	return retval;
+ }
+ 
++/*
++ * Hack - used for development only.
++ */
++int __read_mostly debug_direct_keyboard = 0;
++
++int __init debug_direct_keyboard_setup(char *str)
++{
++	debug_direct_keyboard = 1;
++	printk(KERN_INFO "Switching IRQ 1 (keyboard) to to direct!\n");
++#ifdef CONFIG_PREEMPT_RT
++	printk(KERN_INFO "WARNING: kernel may easily crash this way!\n");
++#endif
++	return 1;
++}
++
++__setup("debug_direct_keyboard", debug_direct_keyboard_setup);
++
++int redirect_hardirq(struct irq_desc *desc)
++{
++	/*
++	 * Direct execution:
++	 */
++	if (!hardirq_preemption || (desc->status & IRQ_NODELAY) ||
++	    !desc->thread)
++		return 0;
++
++#ifdef __i386__
++	if (debug_direct_keyboard && desc->irq == 1)
++		return 0;
++#endif
++
++	BUG_ON(!irqs_disabled());
++	if (desc->thread && desc->thread->state != TASK_RUNNING)
++		wake_up_process(desc->thread);
++
++	return 1;
++}
++
+ #ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ
++
++#ifdef CONFIG_ENABLE_WARN_DEPRECATED
++# warning __do_IRQ is deprecated. Please convert to proper flow handlers
++#endif
++
+ /**
+  * __do_IRQ - original all in one highlevel IRQ handler
+  * @irq:	the interrupt number
+@@ -364,6 +465,11 @@ unsigned int __do_IRQ(unsigned int irq)
+ 	struct irqaction *action;
+ 	unsigned int status;
+ 
++#ifdef CONFIG_PREEMPT_RT
++	printk(KERN_WARNING "__do_IRQ called for irq %d. "
++	       "PREEMPT_RT will crash your system soon\n", irq);
++	printk(KERN_WARNING "I hope you have a fire-extinguisher handy!\n");
++#endif
+ 	kstat_incr_irqs_this_cpu(irq, desc);
+ 
+ 	if (CHECK_IRQ_PER_CPU(desc->status)) {
+@@ -385,6 +491,13 @@ unsigned int __do_IRQ(unsigned int irq)
+ 		desc->chip->end(irq);
+ 		return 1;
+ 	}
++	/*
++	 * If the task is currently running in user mode, don't
++	 * detect soft lockups.  If CONFIG_DETECT_SOFTLOCKUP is not
++	 * configured, this should be optimized out.
++	 */
++	if (user_mode(get_irq_regs()))
++		touch_softlockup_watchdog();
+ 
+ 	spin_lock(&desc->lock);
+ 	if (desc->chip->ack) {
+@@ -467,12 +580,10 @@ void early_init_irq_lock_class(void)
+ 	}
+ }
+ 
+-#ifdef CONFIG_SPARSE_IRQ
+ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
+ {
+ 	struct irq_desc *desc = irq_to_desc(irq);
+ 	return desc ? desc->kstat_irqs[cpu] : 0;
+ }
+-#endif
+ EXPORT_SYMBOL(kstat_irqs_cpu);
+ 
+Index: linux-2.6-tip/kernel/irq/internals.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/irq/internals.h
++++ linux-2.6-tip/kernel/irq/internals.h
+@@ -15,8 +15,20 @@ extern int __irq_set_trigger(struct irq_
+ 
+ extern struct lock_class_key irq_desc_lock_class;
+ extern void init_kstat_irqs(struct irq_desc *desc, int cpu, int nr);
++extern void clear_kstat_irqs(struct irq_desc *desc);
+ extern spinlock_t sparse_irq_lock;
++
++#ifdef CONFIG_SPARSE_IRQ
++/* irq_desc_ptrs allocated at boot time */
++extern struct irq_desc **irq_desc_ptrs;
++#else
++/* irq_desc_ptrs is a fixed size array */
+ extern struct irq_desc *irq_desc_ptrs[NR_IRQS];
++#endif
++
++extern int redirect_hardirq(struct irq_desc *desc);
++
++void recalculate_desc_flags(struct irq_desc *desc);
+ 
+ #ifdef CONFIG_PROC_FS
+ extern void register_irq_proc(unsigned int irq, struct irq_desc *desc);
+Index: linux-2.6-tip/kernel/irq/manage.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/irq/manage.c
++++ linux-2.6-tip/kernel/irq/manage.c
+@@ -8,8 +8,10 @@
+  */
+ 
+ #include <linux/irq.h>
+-#include <linux/module.h>
+ #include <linux/random.h>
++#include <linux/module.h>
++#include <linux/kthread.h>
++#include <linux/syscalls.h>
+ #include <linux/interrupt.h>
+ #include <linux/slab.h>
+ 
+@@ -43,8 +45,12 @@ void synchronize_irq(unsigned int irq)
+ 		 * Wait until we're out of the critical section.  This might
+ 		 * give the wrong answer due to the lack of memory barriers.
+ 		 */
+-		while (desc->status & IRQ_INPROGRESS)
+-			cpu_relax();
++		if (hardirq_preemption && !(desc->status & IRQ_NODELAY))
++			wait_event(desc->wait_for_handler,
++				   !(desc->status & IRQ_INPROGRESS));
++		else
++			while (desc->status & IRQ_INPROGRESS)
++				cpu_relax();
+ 
+ 		/* Ok, that indicated we're done: double-check carefully. */
+ 		spin_lock_irqsave(&desc->lock, flags);
+@@ -90,14 +96,14 @@ int irq_set_affinity(unsigned int irq, c
+ 
+ #ifdef CONFIG_GENERIC_PENDING_IRQ
+ 	if (desc->status & IRQ_MOVE_PCNTXT || desc->status & IRQ_DISABLED) {
+-		cpumask_copy(&desc->affinity, cpumask);
++		cpumask_copy(desc->affinity, cpumask);
+ 		desc->chip->set_affinity(irq, cpumask);
+ 	} else {
+ 		desc->status |= IRQ_MOVE_PENDING;
+-		cpumask_copy(&desc->pending_mask, cpumask);
++		cpumask_copy(desc->pending_mask, cpumask);
+ 	}
+ #else
+-	cpumask_copy(&desc->affinity, cpumask);
++	cpumask_copy(desc->affinity, cpumask);
+ 	desc->chip->set_affinity(irq, cpumask);
+ #endif
+ 	desc->status |= IRQ_AFFINITY_SET;
+@@ -109,7 +115,7 @@ int irq_set_affinity(unsigned int irq, c
+ /*
+  * Generic version of the affinity autoselector.
+  */
+-int do_irq_select_affinity(unsigned int irq, struct irq_desc *desc)
++static int setup_affinity(unsigned int irq, struct irq_desc *desc)
+ {
+ 	if (!irq_can_set_affinity(irq))
+ 		return 0;
+@@ -119,21 +125,21 @@ int do_irq_select_affinity(unsigned int 
+ 	 * one of the targets is online.
+ 	 */
+ 	if (desc->status & (IRQ_AFFINITY_SET | IRQ_NO_BALANCING)) {
+-		if (cpumask_any_and(&desc->affinity, cpu_online_mask)
++		if (cpumask_any_and(desc->affinity, cpu_online_mask)
+ 		    < nr_cpu_ids)
+ 			goto set_affinity;
+ 		else
+ 			desc->status &= ~IRQ_AFFINITY_SET;
+ 	}
+ 
+-	cpumask_and(&desc->affinity, cpu_online_mask, irq_default_affinity);
++	cpumask_and(desc->affinity, cpu_online_mask, irq_default_affinity);
+ set_affinity:
+-	desc->chip->set_affinity(irq, &desc->affinity);
++	desc->chip->set_affinity(irq, desc->affinity);
+ 
+ 	return 0;
+ }
+ #else
+-static inline int do_irq_select_affinity(unsigned int irq, struct irq_desc *d)
++static inline int setup_affinity(unsigned int irq, struct irq_desc *d)
+ {
+ 	return irq_select_affinity(irq);
+ }
+@@ -149,14 +155,14 @@ int irq_select_affinity_usr(unsigned int
+ 	int ret;
+ 
+ 	spin_lock_irqsave(&desc->lock, flags);
+-	ret = do_irq_select_affinity(irq, desc);
++	ret = setup_affinity(irq, desc);
+ 	spin_unlock_irqrestore(&desc->lock, flags);
+ 
+ 	return ret;
+ }
+ 
+ #else
+-static inline int do_irq_select_affinity(int irq, struct irq_desc *desc)
++static inline int setup_affinity(unsigned int irq, struct irq_desc *desc)
+ {
+ 	return 0;
+ }
+@@ -255,6 +261,14 @@ void enable_irq(unsigned int irq)
+ 	spin_lock_irqsave(&desc->lock, flags);
+ 	__enable_irq(desc, irq);
+ 	spin_unlock_irqrestore(&desc->lock, flags);
++#ifdef CONFIG_HARDIRQS_SW_RESEND
++	/*
++	 * Do a bh disable/enable pair to trigger any pending
++	 * irq resend logic:
++	 */
++	local_bh_disable();
++	local_bh_enable();
++#endif
+ }
+ EXPORT_SYMBOL(enable_irq);
+ 
+@@ -317,6 +331,21 @@ int set_irq_wake(unsigned int irq, unsig
+ EXPORT_SYMBOL(set_irq_wake);
+ 
+ /*
++ * If any action has IRQF_NODELAY then turn IRQ_NODELAY on:
++ */
++void recalculate_desc_flags(struct irq_desc *desc)
++{
++	struct irqaction *action;
++
++	desc->status &= ~IRQ_NODELAY;
++	for (action = desc->action ; action; action = action->next)
++		if (action->flags & IRQF_NODELAY)
++			desc->status |= IRQ_NODELAY;
++}
++
++static int start_irq_thread(int irq, struct irq_desc *desc);
++
++/*
+  * Internal function that tells the architecture code whether a
+  * particular irq has been exclusively allocated or is available
+  * for driver use.
+@@ -389,9 +418,9 @@ int __irq_set_trigger(struct irq_desc *d
+  * allocate special interrupts that are part of the architecture.
+  */
+ static int
+-__setup_irq(unsigned int irq, struct irq_desc * desc, struct irqaction *new)
++__setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
+ {
+-	struct irqaction *old, **p;
++	struct irqaction *old, **old_ptr;
+ 	const char *old_name = NULL;
+ 	unsigned long flags;
+ 	int shared = 0;
+@@ -419,12 +448,15 @@ __setup_irq(unsigned int irq, struct irq
+ 		rand_initialize_irq(irq);
+ 	}
+ 
++	if (!(new->flags & IRQF_NODELAY))
++		if (start_irq_thread(irq, desc))
++			return -ENOMEM;
+ 	/*
+ 	 * The following block of code has to be executed atomically
+ 	 */
+ 	spin_lock_irqsave(&desc->lock, flags);
+-	p = &desc->action;
+-	old = *p;
++	old_ptr = &desc->action;
++	old = *old_ptr;
+ 	if (old) {
+ 		/*
+ 		 * Can't share interrupts unless both agree to and are
+@@ -447,8 +479,8 @@ __setup_irq(unsigned int irq, struct irq
+ 
+ 		/* add new interrupt at end of irq queue */
+ 		do {
+-			p = &old->next;
+-			old = *p;
++			old_ptr = &old->next;
++			old = *old_ptr;
+ 		} while (old);
+ 		shared = 1;
+ 	}
+@@ -488,7 +520,7 @@ __setup_irq(unsigned int irq, struct irq
+ 			desc->status |= IRQ_NO_BALANCING;
+ 
+ 		/* Set default affinity mask once everything is setup */
+-		do_irq_select_affinity(irq, desc);
++		setup_affinity(irq, desc);
+ 
+ 	} else if ((new->flags & IRQF_TRIGGER_MASK)
+ 			&& (new->flags & IRQF_TRIGGER_MASK)
+@@ -499,11 +531,17 @@ __setup_irq(unsigned int irq, struct irq
+ 				(int)(new->flags & IRQF_TRIGGER_MASK));
+ 	}
+ 
+-	*p = new;
++	*old_ptr = new;
++
++	/*
++	 * Propagate any possible IRQF_NODELAY flag into IRQ_NODELAY:
++	 */
++	recalculate_desc_flags(desc);
+ 
+ 	/* Reset broken irq detection when installing new handler */
+ 	desc->irq_count = 0;
+ 	desc->irqs_unhandled = 0;
++	init_waitqueue_head(&desc->wait_for_handler);
+ 
+ 	/*
+ 	 * Check whether we disabled the irq via the spurious handler
+@@ -518,7 +556,7 @@ __setup_irq(unsigned int irq, struct irq
+ 
+ 	new->irq = irq;
+ 	register_irq_proc(irq, desc);
+-	new->dir = NULL;
++	new->dir = new->threaded = NULL;
+ 	register_handler_proc(irq, new);
+ 
+ 	return 0;
+@@ -549,90 +587,118 @@ int setup_irq(unsigned int irq, struct i
+ 
+ 	return __setup_irq(irq, desc, act);
+ }
++EXPORT_SYMBOL_GPL(setup_irq);
+ 
+-/**
+- *	free_irq - free an interrupt
+- *	@irq: Interrupt line to free
+- *	@dev_id: Device identity to free
+- *
+- *	Remove an interrupt handler. The handler is removed and if the
+- *	interrupt line is no longer in use by any driver it is disabled.
+- *	On a shared IRQ the caller must ensure the interrupt is disabled
+- *	on the card it drives before calling this function. The function
+- *	does not return until any executing interrupts for this IRQ
+- *	have completed.
+- *
+- *	This function must not be called from interrupt context.
++ /*
++ * Internal function to unregister an irqaction - used to free
++ * regular and special interrupts that are part of the architecture.
+  */
+-void free_irq(unsigned int irq, void *dev_id)
++static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
+ {
+ 	struct irq_desc *desc = irq_to_desc(irq);
+-	struct irqaction **p;
++	struct irqaction *action, **action_ptr;
+ 	unsigned long flags;
+ 
+-	WARN_ON(in_interrupt());
++	WARN(in_interrupt(), "Trying to free IRQ %d from IRQ context!\n", irq);
+ 
+ 	if (!desc)
+-		return;
++		return NULL;
+ 
+ 	spin_lock_irqsave(&desc->lock, flags);
+-	p = &desc->action;
++
++	/*
++	 * There can be multiple actions per IRQ descriptor, find the right
++	 * one based on the dev_id:
++	 */
++	action_ptr = &desc->action;
+ 	for (;;) {
+-		struct irqaction *action = *p;
++		action = *action_ptr;
+ 
+-		if (action) {
+-			struct irqaction **pp = p;
++		if (!action) {
++			WARN(1, "Trying to free already-free IRQ %d\n", irq);
++			spin_unlock_irqrestore(&desc->lock, flags);
++
++			return NULL;
++		}
+ 
+-			p = &action->next;
+-			if (action->dev_id != dev_id)
+-				continue;
++		if (action->dev_id == dev_id)
++			break;
++		action_ptr = &action->next;
++	}
+ 
+-			/* Found it - now remove it from the list of entries */
+-			*pp = action->next;
++	/* Found it - now remove it from the list of entries: */
++	*action_ptr = action->next;
+ 
+-			/* Currently used only by UML, might disappear one day.*/
++	/* Currently used only by UML, might disappear one day: */
+ #ifdef CONFIG_IRQ_RELEASE_METHOD
+-			if (desc->chip->release)
+-				desc->chip->release(irq, dev_id);
++	if (desc->chip->release)
++		desc->chip->release(irq, dev_id);
+ #endif
+ 
+-			if (!desc->action) {
+-				desc->status |= IRQ_DISABLED;
+-				if (desc->chip->shutdown)
+-					desc->chip->shutdown(irq);
+-				else
+-					desc->chip->disable(irq);
+-			}
+-			spin_unlock_irqrestore(&desc->lock, flags);
+-			unregister_handler_proc(irq, action);
++	/* If this was the last handler, shut down the IRQ line: */
++	if (!desc->action) {
++		desc->status |= IRQ_DISABLED;
++		if (desc->chip->shutdown)
++			desc->chip->shutdown(irq);
++		else
++			desc->chip->disable(irq);
++	}
++	recalculate_desc_flags(desc);
++	spin_unlock_irqrestore(&desc->lock, flags);
++
++	unregister_handler_proc(irq, action);
++
++	/* Make sure it's not being used on another CPU: */
++	synchronize_irq(irq);
+ 
+-			/* Make sure it's not being used on another CPU */
+-			synchronize_irq(irq);
+-#ifdef CONFIG_DEBUG_SHIRQ
+-			/*
+-			 * It's a shared IRQ -- the driver ought to be
+-			 * prepared for it to happen even now it's
+-			 * being freed, so let's make sure....  We do
+-			 * this after actually deregistering it, to
+-			 * make sure that a 'real' IRQ doesn't run in
+-			 * parallel with our fake
+-			 */
+-			if (action->flags & IRQF_SHARED) {
+-				local_irq_save(flags);
+-				action->handler(irq, dev_id);
+-				local_irq_restore(flags);
+-			}
+-#endif
+-			kfree(action);
+-			return;
+-		}
+-		printk(KERN_ERR "Trying to free already-free IRQ %d\n", irq);
+ #ifdef CONFIG_DEBUG_SHIRQ
+-		dump_stack();
+-#endif
+-		spin_unlock_irqrestore(&desc->lock, flags);
+-		return;
++	/*
++	 * It's a shared IRQ -- the driver ought to be prepared for an IRQ
++	 * event to happen even now it's being freed, so let's make sure that
++	 * is so by doing an extra call to the handler ....
++	 *
++	 * ( We do this after actually deregistering it, to make sure that a
++	 *   'real' IRQ doesn't run in * parallel with our fake. )
++	 */
++	if (action->flags & IRQF_SHARED) {
++		local_irq_save_nort(flags);
++		action->handler(irq, dev_id);
++		local_irq_restore_nort(flags);
+ 	}
++#endif
++	return action;
++}
++
++/**
++ *	remove_irq - free an interrupt
++ *	@irq: Interrupt line to free
++ *	@act: irqaction for the interrupt
++ *
++ * Used to remove interrupts statically setup by the early boot process.
++ */
++void remove_irq(unsigned int irq, struct irqaction *act)
++{
++	__free_irq(irq, act->dev_id);
++}
++EXPORT_SYMBOL_GPL(remove_irq);
++
++/**
++ *	free_irq - free an interrupt allocated with request_irq
++ *	@irq: Interrupt line to free
++ *	@dev_id: Device identity to free
++ *
++ *	Remove an interrupt handler. The handler is removed and if the
++ *	interrupt line is no longer in use by any driver it is disabled.
++ *	On a shared IRQ the caller must ensure the interrupt is disabled
++ *	on the card it drives before calling this function. The function
++ *	does not return until any executing interrupts for this IRQ
++ *	have completed.
++ *
++ *	This function must not be called from interrupt context.
++ */
++void free_irq(unsigned int irq, void *dev_id)
++{
++	kfree(__free_irq(irq, dev_id));
+ }
+ EXPORT_SYMBOL(free_irq);
+ 
+@@ -679,11 +745,12 @@ int request_irq(unsigned int irq, irq_ha
+ 	 * the behavior is classified as "will not fix" so we need to
+ 	 * start nudging drivers away from using that idiom.
+ 	 */
+-	if ((irqflags & (IRQF_SHARED|IRQF_DISABLED))
+-			== (IRQF_SHARED|IRQF_DISABLED))
+-		pr_warning("IRQ %d/%s: IRQF_DISABLED is not "
+-				"guaranteed on shared IRQs\n",
+-				irq, devname);
++	if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) ==
++					(IRQF_SHARED|IRQF_DISABLED)) {
++		pr_warning(
++		  "IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n",
++			irq, devname);
++	}
+ 
+ #ifdef CONFIG_LOCKDEP
+ 	/*
+@@ -709,15 +776,13 @@ int request_irq(unsigned int irq, irq_ha
+ 	if (!handler)
+ 		return -EINVAL;
+ 
+-	action = kmalloc(sizeof(struct irqaction), GFP_ATOMIC);
++	action = kzalloc(sizeof(struct irqaction), GFP_KERNEL);
+ 	if (!action)
+ 		return -ENOMEM;
+ 
+ 	action->handler = handler;
+ 	action->flags = irqflags;
+-	cpus_clear(action->mask);
+ 	action->name = devname;
+-	action->next = NULL;
+ 	action->dev_id = dev_id;
+ 
+ 	retval = __setup_irq(irq, desc, action);
+@@ -735,14 +800,289 @@ int request_irq(unsigned int irq, irq_ha
+ 		unsigned long flags;
+ 
+ 		disable_irq(irq);
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 
+ 		handler(irq, dev_id);
+ 
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 		enable_irq(irq);
+ 	}
+ #endif
+ 	return retval;
+ }
+ EXPORT_SYMBOL(request_irq);
++
++#ifdef CONFIG_PREEMPT_HARDIRQS
++
++int hardirq_preemption = 1;
++
++EXPORT_SYMBOL(hardirq_preemption);
++
++/*
++ * Real-Time Preemption depends on hardirq threading:
++ */
++#ifndef CONFIG_PREEMPT_RT
++
++static int __init hardirq_preempt_setup (char *str)
++{
++	if (!strncmp(str, "off", 3))
++		hardirq_preemption = 0;
++	else
++		get_option(&str, &hardirq_preemption);
++	if (!hardirq_preemption)
++		printk("turning off hardirq preemption!\n");
++
++	return 1;
++}
++
++__setup("hardirq-preempt=", hardirq_preempt_setup);
++
++#endif
++
++/*
++ * threaded simple handler
++ */
++static void thread_simple_irq(irq_desc_t *desc)
++{
++	struct irqaction *action = desc->action;
++	unsigned int irq = desc->irq;
++	irqreturn_t action_ret;
++
++	do {
++		if (!action || desc->depth)
++			break;
++		desc->status &= ~IRQ_PENDING;
++		spin_unlock(&desc->lock);
++		action_ret = handle_IRQ_event(irq, action);
++		cond_resched_hardirq_context();
++		spin_lock_irq(&desc->lock);
++		if (!noirqdebug)
++			note_interrupt(irq, desc, action_ret);
++	} while (desc->status & IRQ_PENDING);
++	desc->status &= ~IRQ_INPROGRESS;
++}
++
++/*
++ * threaded level type irq handler
++ */
++static void thread_level_irq(irq_desc_t *desc)
++{
++	unsigned int irq = desc->irq;
++
++	thread_simple_irq(desc);
++	if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
++		desc->chip->unmask(irq);
++}
++
++/*
++ * threaded fasteoi type irq handler
++ */
++static void thread_fasteoi_irq(irq_desc_t *desc)
++{
++	unsigned int irq = desc->irq;
++
++	thread_simple_irq(desc);
++	if (!(desc->status & IRQ_DISABLED) && desc->chip->unmask)
++		desc->chip->unmask(irq);
++}
++
++/*
++ * threaded edge type IRQ handler
++ */
++static void thread_edge_irq(irq_desc_t *desc)
++{
++	unsigned int irq = desc->irq;
++
++	do {
++		struct irqaction *action = desc->action;
++		irqreturn_t action_ret;
++
++		if (unlikely(!action)) {
++			desc->status &= ~IRQ_INPROGRESS;
++			desc->chip->mask(irq);
++			return;
++		}
++
++		/*
++		 * When another irq arrived while we were handling
++		 * one, we could have masked the irq.
++		 * Renable it, if it was not disabled in meantime.
++		 */
++		if (unlikely(((desc->status & (IRQ_PENDING | IRQ_MASKED)) ==
++			    (IRQ_PENDING | IRQ_MASKED)) && !desc->depth))
++			desc->chip->unmask(irq);
++
++		desc->status &= ~IRQ_PENDING;
++		spin_unlock(&desc->lock);
++		action_ret = handle_IRQ_event(irq, action);
++		cond_resched_hardirq_context();
++		spin_lock_irq(&desc->lock);
++		if (!noirqdebug)
++			note_interrupt(irq, desc, action_ret);
++	} while ((desc->status & IRQ_PENDING) && !desc->depth);
++
++	desc->status &= ~IRQ_INPROGRESS;
++}
++
++/*
++ * threaded edge type IRQ handler
++ */
++static void thread_do_irq(irq_desc_t *desc)
++{
++	unsigned int irq = desc->irq;
++
++	do {
++		struct irqaction *action = desc->action;
++		irqreturn_t action_ret;
++
++		if (unlikely(!action)) {
++			desc->status &= ~IRQ_INPROGRESS;
++			desc->chip->disable(irq);
++			return;
++		}
++
++		desc->status &= ~IRQ_PENDING;
++		spin_unlock(&desc->lock);
++		action_ret = handle_IRQ_event(irq, action);
++		cond_resched_hardirq_context();
++		spin_lock_irq(&desc->lock);
++		if (!noirqdebug)
++			note_interrupt(irq, desc, action_ret);
++	} while ((desc->status & IRQ_PENDING) && !desc->depth);
++
++	desc->status &= ~IRQ_INPROGRESS;
++	desc->chip->end(irq);
++}
++
++static void do_hardirq(struct irq_desc *desc)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&desc->lock, flags);
++
++	if (!(desc->status & IRQ_INPROGRESS))
++		goto out;
++
++	if (desc->handle_irq == handle_simple_irq)
++		thread_simple_irq(desc);
++	else if (desc->handle_irq == handle_level_irq)
++		thread_level_irq(desc);
++	else if (desc->handle_irq == handle_fasteoi_irq)
++		thread_fasteoi_irq(desc);
++	else if (desc->handle_irq == handle_edge_irq)
++		thread_edge_irq(desc);
++	else
++		thread_do_irq(desc);
++ out:
++	spin_unlock_irqrestore(&desc->lock, flags);
++
++	if (waitqueue_active(&desc->wait_for_handler))
++		wake_up(&desc->wait_for_handler);
++}
++
++extern asmlinkage void __do_softirq(void);
++
++static int do_irqd(void * __desc)
++{
++	struct sched_param param = { 0, };
++	struct irq_desc *desc = __desc;
++
++#ifdef CONFIG_SMP
++	set_cpus_allowed_ptr(current, desc->affinity);
++#endif
++	current->flags |= PF_NOFREEZE | PF_HARDIRQ;
++
++	/*
++	 * Set irq thread priority to SCHED_FIFO/50:
++	 */
++	param.sched_priority = MAX_USER_RT_PRIO/2;
++
++	sys_sched_setscheduler(current->pid, SCHED_FIFO, &param);
++
++	while (!kthread_should_stop()) {
++		local_irq_disable_nort();
++		set_current_state(TASK_INTERRUPTIBLE);
++#ifndef CONFIG_PREEMPT_RT
++		irq_enter();
++#endif
++		do_hardirq(desc);
++#ifndef CONFIG_PREEMPT_RT
++		irq_exit();
++#endif
++		local_irq_enable_nort();
++		cond_resched();
++#ifdef CONFIG_SMP
++		/*
++		 * Did IRQ affinities change?
++		 */
++		if (!cpumask_equal(&current->cpus_allowed, desc->affinity))
++			set_cpus_allowed_ptr(current, desc->affinity);
++#endif
++		schedule();
++	}
++	__set_current_state(TASK_RUNNING);
++
++	return 0;
++}
++
++static int ok_to_create_irq_threads;
++
++static int start_irq_thread(int irq, struct irq_desc *desc)
++{
++	if (desc->thread || !ok_to_create_irq_threads)
++		return 0;
++
++	init_waitqueue_head(&desc->wait_for_handler);
++
++	desc->thread = kthread_create(do_irqd, desc, "IRQ-%d", irq);
++	if (!desc->thread) {
++		printk(KERN_ERR "irqd: could not create IRQ thread %d!\n", irq);
++		return -ENOMEM;
++	}
++
++	/*
++	 * An interrupt may have come in before the thread pointer was
++	 * stored in desc->thread; make sure the thread gets woken up in
++	 * such a case:
++	 */
++	smp_mb();
++	wake_up_process(desc->thread);
++
++	return 0;
++}
++
++/*
++ * Start hardirq threads for all IRQs that are registered already.
++ *
++ * New ones will be started at the time of IRQ setup from now on.
++ */
++void __init init_hardirqs(void)
++{
++	struct irq_desc *desc;
++	int irq;
++
++	ok_to_create_irq_threads = 1;
++
++	for_each_irq_desc(irq, desc) {
++		if (desc->action && !(desc->status & IRQ_NODELAY))
++			start_irq_thread(irq, desc);
++	}
++}
++
++#else
++
++static int start_irq_thread(int irq, struct irq_desc *desc)
++{
++	return 0;
++}
++
++#endif
++
++void __init early_init_hardirqs(void)
++{
++	struct irq_desc *desc;
++	int i;
++
++	for_each_irq_desc(i, desc)
++		init_waitqueue_head(&desc->wait_for_handler);
++}
+Index: linux-2.6-tip/kernel/irq/migration.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/irq/migration.c
++++ linux-2.6-tip/kernel/irq/migration.c
+@@ -18,7 +18,7 @@ void move_masked_irq(int irq)
+ 
+ 	desc->status &= ~IRQ_MOVE_PENDING;
+ 
+-	if (unlikely(cpumask_empty(&desc->pending_mask)))
++	if (unlikely(cpumask_empty(desc->pending_mask)))
+ 		return;
+ 
+ 	if (!desc->chip->set_affinity)
+@@ -38,18 +38,19 @@ void move_masked_irq(int irq)
+ 	 * For correct operation this depends on the caller
+ 	 * masking the irqs.
+ 	 */
+-	if (likely(cpumask_any_and(&desc->pending_mask, cpu_online_mask)
++	if (likely(cpumask_any_and(desc->pending_mask, cpu_online_mask)
+ 		   < nr_cpu_ids)) {
+-		cpumask_and(&desc->affinity,
+-			    &desc->pending_mask, cpu_online_mask);
+-		desc->chip->set_affinity(irq, &desc->affinity);
++		cpumask_and(desc->affinity,
++			    desc->pending_mask, cpu_online_mask);
++		desc->chip->set_affinity(irq, desc->affinity);
+ 	}
+-	cpumask_clear(&desc->pending_mask);
++	cpumask_clear(desc->pending_mask);
+ }
+ 
+ void move_native_irq(int irq)
+ {
+ 	struct irq_desc *desc = irq_to_desc(irq);
++	int mask = 1;
+ 
+ 	if (likely(!(desc->status & IRQ_MOVE_PENDING)))
+ 		return;
+@@ -57,8 +58,17 @@ void move_native_irq(int irq)
+ 	if (unlikely(desc->status & IRQ_DISABLED))
+ 		return;
+ 
+-	desc->chip->mask(irq);
++	/*
++	 * If the irq is already in progress, it should be masked.
++	 * If we unmask it, we might cause an interrupt storm on RT.
++	 */
++	if (unlikely(desc->status & IRQ_INPROGRESS))
++		mask = 0;
++
++	if (mask)
++		desc->chip->mask(irq);
+ 	move_masked_irq(irq);
+-	desc->chip->unmask(irq);
++	if (mask)
++		desc->chip->unmask(irq);
+ }
+ 
+Index: linux-2.6-tip/kernel/irq/numa_migrate.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/irq/numa_migrate.c
++++ linux-2.6-tip/kernel/irq/numa_migrate.c
+@@ -17,16 +17,11 @@ static void init_copy_kstat_irqs(struct 
+ 				 struct irq_desc *desc,
+ 				 int cpu, int nr)
+ {
+-	unsigned long bytes;
+-
+ 	init_kstat_irqs(desc, cpu, nr);
+ 
+-	if (desc->kstat_irqs != old_desc->kstat_irqs) {
+-		/* Compute how many bytes we need per irq and allocate them */
+-		bytes = nr * sizeof(unsigned int);
+-
+-		memcpy(desc->kstat_irqs, old_desc->kstat_irqs, bytes);
+-	}
++	if (desc->kstat_irqs != old_desc->kstat_irqs)
++		memcpy(desc->kstat_irqs, old_desc->kstat_irqs,
++			 nr * sizeof(*desc->kstat_irqs));
+ }
+ 
+ static void free_kstat_irqs(struct irq_desc *old_desc, struct irq_desc *desc)
+@@ -38,15 +33,23 @@ static void free_kstat_irqs(struct irq_d
+ 	old_desc->kstat_irqs = NULL;
+ }
+ 
+-static void init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
++static bool init_copy_one_irq_desc(int irq, struct irq_desc *old_desc,
+ 		 struct irq_desc *desc, int cpu)
+ {
+ 	memcpy(desc, old_desc, sizeof(struct irq_desc));
++	if (!init_alloc_desc_masks(desc, cpu, false)) {
++		printk(KERN_ERR "irq %d: can not get new irq_desc cpumask "
++				"for migration.\n", irq);
++		return false;
++	}
+ 	spin_lock_init(&desc->lock);
++	init_waitqueue_head(&desc->wait_for_handler);
+ 	desc->cpu = cpu;
+ 	lockdep_set_class(&desc->lock, &irq_desc_lock_class);
+ 	init_copy_kstat_irqs(old_desc, desc, cpu, nr_cpu_ids);
++	init_copy_desc_masks(old_desc, desc);
+ 	arch_init_copy_chip_data(old_desc, desc, cpu);
++	return true;
+ }
+ 
+ static void free_one_irq_desc(struct irq_desc *old_desc, struct irq_desc *desc)
+@@ -76,12 +79,18 @@ static struct irq_desc *__real_move_irq_
+ 	node = cpu_to_node(cpu);
+ 	desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node);
+ 	if (!desc) {
+-		printk(KERN_ERR "irq %d: can not get new irq_desc for migration.\n", irq);
++		printk(KERN_ERR "irq %d: can not get new irq_desc "
++				"for migration.\n", irq);
++		/* still use old one */
++		desc = old_desc;
++		goto out_unlock;
++	}
++	if (!init_copy_one_irq_desc(irq, old_desc, desc, cpu)) {
+ 		/* still use old one */
++		kfree(desc);
+ 		desc = old_desc;
+ 		goto out_unlock;
+ 	}
+-	init_copy_one_irq_desc(irq, old_desc, desc, cpu);
+ 
+ 	irq_desc_ptrs[irq] = desc;
+ 	spin_unlock_irqrestore(&sparse_irq_lock, flags);
+Index: linux-2.6-tip/kernel/irq/proc.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/irq/proc.c
++++ linux-2.6-tip/kernel/irq/proc.c
+@@ -7,6 +7,8 @@
+  */
+ 
+ #include <linux/irq.h>
++#include <asm/uaccess.h>
++#include <linux/profile.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+ #include <linux/interrupt.h>
+@@ -20,11 +22,11 @@ static struct proc_dir_entry *root_irq_d
+ static int irq_affinity_proc_show(struct seq_file *m, void *v)
+ {
+ 	struct irq_desc *desc = irq_to_desc((long)m->private);
+-	const struct cpumask *mask = &desc->affinity;
++	const struct cpumask *mask = desc->affinity;
+ 
+ #ifdef CONFIG_GENERIC_PENDING_IRQ
+ 	if (desc->status & IRQ_MOVE_PENDING)
+-		mask = &desc->pending_mask;
++		mask = desc->pending_mask;
+ #endif
+ 	seq_cpumask(m, mask);
+ 	seq_putc(m, '\n');
+@@ -116,6 +118,9 @@ static ssize_t default_affinity_write(st
+ 		goto out;
+ 	}
+ 
++	/* create /proc/irq/prof_cpu_mask */
++	create_prof_cpu_mask(root_irq_dir);
++
+ 	/*
+ 	 * Do not allow disabling IRQs completely - it's a too easy
+ 	 * way to make the system unusable accidentally :-) At least
+@@ -160,45 +165,6 @@ static int irq_spurious_read(char *page,
+ 			jiffies_to_msecs(desc->last_unhandled));
+ }
+ 
+-#define MAX_NAMELEN 128
+-
+-static int name_unique(unsigned int irq, struct irqaction *new_action)
+-{
+-	struct irq_desc *desc = irq_to_desc(irq);
+-	struct irqaction *action;
+-	unsigned long flags;
+-	int ret = 1;
+-
+-	spin_lock_irqsave(&desc->lock, flags);
+-	for (action = desc->action ; action; action = action->next) {
+-		if ((action != new_action) && action->name &&
+-				!strcmp(new_action->name, action->name)) {
+-			ret = 0;
+-			break;
+-		}
+-	}
+-	spin_unlock_irqrestore(&desc->lock, flags);
+-	return ret;
+-}
+-
+-void register_handler_proc(unsigned int irq, struct irqaction *action)
+-{
+-	char name [MAX_NAMELEN];
+-	struct irq_desc *desc = irq_to_desc(irq);
+-
+-	if (!desc->dir || action->dir || !action->name ||
+-					!name_unique(irq, action))
+-		return;
+-
+-	memset(name, 0, MAX_NAMELEN);
+-	snprintf(name, MAX_NAMELEN, "%s", action->name);
+-
+-	/* create /proc/irq/1234/handler/ */
+-	action->dir = proc_mkdir(name, desc->dir);
+-}
+-
+-#undef MAX_NAMELEN
+-
+ #define MAX_NAMELEN 10
+ 
+ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
+@@ -232,6 +198,8 @@ void register_irq_proc(unsigned int irq,
+ 
+ void unregister_handler_proc(unsigned int irq, struct irqaction *action)
+ {
++	if (action->threaded)
++		remove_proc_entry(action->threaded->name, action->dir);
+ 	if (action->dir) {
+ 		struct irq_desc *desc = irq_to_desc(irq);
+ 
+@@ -247,6 +215,91 @@ static void register_default_affinity_pr
+ #endif
+ }
+ 
++#ifndef CONFIG_PREEMPT_RT
++
++static int threaded_read_proc(char *page, char **start, off_t off,
++			      int count, int *eof, void *data)
++{
++	return sprintf(page, "%c\n",
++		((struct irqaction *)data)->flags & IRQF_NODELAY ? '0' : '1');
++}
++
++static int threaded_write_proc(struct file *file, const char __user *buffer,
++			       unsigned long count, void *data)
++{
++	int c;
++	struct irqaction *action = data;
++	irq_desc_t *desc = irq_to_desc(action->irq);
++
++	if (get_user(c, buffer))
++		return -EFAULT;
++	if (c != '0' && c != '1')
++		return -EINVAL;
++
++	spin_lock_irq(&desc->lock);
++
++	if (c == '0')
++		action->flags |= IRQF_NODELAY;
++	if (c == '1')
++		action->flags &= ~IRQF_NODELAY;
++	recalculate_desc_flags(desc);
++
++	spin_unlock_irq(&desc->lock);
++
++	return 1;
++}
++
++#endif
++
++#define MAX_NAMELEN 128
++
++static int name_unique(unsigned int irq, struct irqaction *new_action)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++	struct irqaction *action;
++
++	for (action = desc->action ; action; action = action->next)
++		if ((action != new_action) && action->name &&
++				!strcmp(new_action->name, action->name))
++			return 0;
++	return 1;
++}
++
++void register_handler_proc(unsigned int irq, struct irqaction *action)
++{
++	char name [MAX_NAMELEN];
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	if (!desc->dir || action->dir || !action->name ||
++					!name_unique(irq, action))
++		return;
++
++	memset(name, 0, MAX_NAMELEN);
++	snprintf(name, MAX_NAMELEN, "%s", action->name);
++
++	/* create /proc/irq/1234/handler/ */
++	action->dir = proc_mkdir(name, desc->dir);
++
++	if (!action->dir)
++		return;
++#ifndef CONFIG_PREEMPT_RT
++	{
++		struct proc_dir_entry *entry;
++		/* create /proc/irq/1234/handler/threaded */
++		entry = create_proc_entry("threaded", 0600, action->dir);
++		if (!entry)
++			return;
++		entry->nlink = 1;
++		entry->data = (void *)action;
++		entry->read_proc = threaded_read_proc;
++		entry->write_proc = threaded_write_proc;
++		action->threaded = entry;
++	}
++#endif
++}
++
++#undef MAX_NAMELEN
++
+ void init_irq_proc(void)
+ {
+ 	unsigned int irq;
+Index: linux-2.6-tip/kernel/irq/spurious.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/irq/spurious.c
++++ linux-2.6-tip/kernel/irq/spurious.c
+@@ -14,6 +14,11 @@
+ #include <linux/moduleparam.h>
+ #include <linux/timer.h>
+ 
++#ifdef CONFIG_X86_IO_APIC
++# include <asm/apicdef.h>
++# include <asm/io_apic.h>
++#endif
++
+ static int irqfixup __read_mostly;
+ 
+ #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
+@@ -54,9 +59,8 @@ static int try_one_irq(int irq, struct i
+ 		}
+ 		action = action->next;
+ 	}
+-	local_irq_disable();
+ 	/* Now clean up the flags */
+-	spin_lock(&desc->lock);
++		spin_lock_irq(&desc->lock);
+ 	action = desc->action;
+ 
+ 	/*
+@@ -104,7 +108,7 @@ static int misrouted_irq(int irq)
+ 	return ok;
+ }
+ 
+-static void poll_spurious_irqs(unsigned long dummy)
++static void poll_all_shared_irqs(void)
+ {
+ 	struct irq_desc *desc;
+ 	int i;
+@@ -123,11 +127,23 @@ static void poll_spurious_irqs(unsigned 
+ 
+ 		try_one_irq(i, desc);
+ 	}
++}
++
++static void poll_spurious_irqs(unsigned long dummy)
++{
++	poll_all_shared_irqs();
+ 
+ 	mod_timer(&poll_spurious_irq_timer,
+ 		  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
+ }
+ 
++#ifdef CONFIG_DEBUG_SHIRQ
++void debug_poll_all_shared_irqs(void)
++{
++	poll_all_shared_irqs();
++}
++#endif
++
+ /*
+  * If 99,900 of the previous 100,000 interrupts have not been handled
+  * then assume that the IRQ is stuck in some manner. Drop a diagnostic
+@@ -246,6 +262,12 @@ void note_interrupt(unsigned int irq, st
+ 		 * The interrupt is stuck
+ 		 */
+ 		__report_bad_irq(irq, desc, action_ret);
++#ifdef CONFIG_X86_IO_APIC
++		if (!sis_apic_bug) {
++			sis_apic_bug = 1;
++			printk(KERN_ERR "turning off IO-APIC fast mode.\n");
++		}
++#else
+ 		/*
+ 		 * Now kill the IRQ
+ 		 */
+@@ -256,6 +278,7 @@ void note_interrupt(unsigned int irq, st
+ 
+ 		mod_timer(&poll_spurious_irq_timer,
+ 			  jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
++#endif
+ 	}
+ 	desc->irqs_unhandled = 0;
+ }
+@@ -276,6 +299,11 @@ MODULE_PARM_DESC(noirqdebug, "Disable ir
+ 
+ static int __init irqfixup_setup(char *str)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	printk(KERN_WARNING "irqfixup boot option not supported "
++		"w/ CONFIG_PREEMPT_RT\n");
++	return 1;
++#endif
+ 	irqfixup = 1;
+ 	printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
+ 	printk(KERN_WARNING "This may impact system performance.\n");
+@@ -289,6 +317,11 @@ MODULE_PARM_DESC("irqfixup", "0: No fixu
+ 
+ static int __init irqpoll_setup(char *str)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	printk(KERN_WARNING "irqpoll boot option not supported "
++		"w/ CONFIG_PREEMPT_RT\n");
++	return 1;
++#endif
+ 	irqfixup = 2;
+ 	printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
+ 				"enabled\n");
+Index: linux-2.6-tip/kernel/kexec.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/kexec.c
++++ linux-2.6-tip/kernel/kexec.c
+@@ -1130,7 +1130,7 @@ void crash_save_cpu(struct pt_regs *regs
+ 		return;
+ 	memset(&prstatus, 0, sizeof(prstatus));
+ 	prstatus.pr_pid = current->pid;
+-	elf_core_copy_regs(&prstatus.pr_reg, regs);
++	elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
+ 	buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
+ 		      	      &prstatus, sizeof(prstatus));
+ 	final_note(buf);
+Index: linux-2.6-tip/kernel/kprobes.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/kprobes.c
++++ linux-2.6-tip/kernel/kprobes.c
+@@ -43,6 +43,7 @@
+ #include <linux/seq_file.h>
+ #include <linux/debugfs.h>
+ #include <linux/kdebug.h>
++#include <linux/memory.h>
+ 
+ #include <asm-generic/sections.h>
+ #include <asm/cacheflush.h>
+@@ -72,10 +73,10 @@ static bool kprobe_enabled;
+ static DEFINE_MUTEX(kprobe_mutex);	/* Protects kprobe_table */
+ static DEFINE_PER_CPU(struct kprobe *, kprobe_instance) = NULL;
+ static struct {
+-	spinlock_t lock ____cacheline_aligned_in_smp;
++	raw_spinlock_t lock ____cacheline_aligned_in_smp;
+ } kretprobe_table_locks[KPROBE_TABLE_SIZE];
+ 
+-static spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
++static raw_spinlock_t *kretprobe_table_lock_ptr(unsigned long hash)
+ {
+ 	return &(kretprobe_table_locks[hash].lock);
+ }
+@@ -414,7 +415,7 @@ void __kprobes kretprobe_hash_lock(struc
+ 			 struct hlist_head **head, unsigned long *flags)
+ {
+ 	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+-	spinlock_t *hlist_lock;
++	raw_spinlock_t *hlist_lock;
+ 
+ 	*head = &kretprobe_inst_table[hash];
+ 	hlist_lock = kretprobe_table_lock_ptr(hash);
+@@ -424,7 +425,7 @@ void __kprobes kretprobe_hash_lock(struc
+ static void __kprobes kretprobe_table_lock(unsigned long hash,
+ 	unsigned long *flags)
+ {
+-	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
++	raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+ 	spin_lock_irqsave(hlist_lock, *flags);
+ }
+ 
+@@ -432,7 +433,7 @@ void __kprobes kretprobe_hash_unlock(str
+ 	unsigned long *flags)
+ {
+ 	unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS);
+-	spinlock_t *hlist_lock;
++	raw_spinlock_t *hlist_lock;
+ 
+ 	hlist_lock = kretprobe_table_lock_ptr(hash);
+ 	spin_unlock_irqrestore(hlist_lock, *flags);
+@@ -440,7 +441,7 @@ void __kprobes kretprobe_hash_unlock(str
+ 
+ void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags)
+ {
+-	spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
++	raw_spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash);
+ 	spin_unlock_irqrestore(hlist_lock, *flags);
+ }
+ 
+@@ -699,9 +700,10 @@ int __kprobes register_kprobe(struct kpr
+ 		goto out;
+ 	}
+ 
++	mutex_lock(&text_mutex);
+ 	ret = arch_prepare_kprobe(p);
+ 	if (ret)
+-		goto out;
++		goto out_unlock_text;
+ 
+ 	INIT_HLIST_NODE(&p->hlist);
+ 	hlist_add_head_rcu(&p->hlist,
+@@ -710,6 +712,8 @@ int __kprobes register_kprobe(struct kpr
+ 	if (kprobe_enabled)
+ 		arch_arm_kprobe(p);
+ 
++out_unlock_text:
++	mutex_unlock(&text_mutex);
+ out:
+ 	mutex_unlock(&kprobe_mutex);
+ 
+@@ -746,8 +750,11 @@ valid_p:
+ 		 * enabled and not gone - otherwise, the breakpoint would
+ 		 * already have been removed. We save on flushing icache.
+ 		 */
+-		if (kprobe_enabled && !kprobe_gone(old_p))
++		if (kprobe_enabled && !kprobe_gone(old_p)) {
++			mutex_lock(&text_mutex);
+ 			arch_disarm_kprobe(p);
++			mutex_unlock(&text_mutex);
++		}
+ 		hlist_del_rcu(&old_p->hlist);
+ 	} else {
+ 		if (p->break_handler && !kprobe_gone(p))
+@@ -1278,12 +1285,14 @@ static void __kprobes enable_all_kprobes
+ 	if (kprobe_enabled)
+ 		goto already_enabled;
+ 
++	mutex_lock(&text_mutex);
+ 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+ 		head = &kprobe_table[i];
+ 		hlist_for_each_entry_rcu(p, node, head, hlist)
+ 			if (!kprobe_gone(p))
+ 				arch_arm_kprobe(p);
+ 	}
++	mutex_unlock(&text_mutex);
+ 
+ 	kprobe_enabled = true;
+ 	printk(KERN_INFO "Kprobes globally enabled\n");
+@@ -1308,6 +1317,7 @@ static void __kprobes disable_all_kprobe
+ 
+ 	kprobe_enabled = false;
+ 	printk(KERN_INFO "Kprobes globally disabled\n");
++	mutex_lock(&text_mutex);
+ 	for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
+ 		head = &kprobe_table[i];
+ 		hlist_for_each_entry_rcu(p, node, head, hlist) {
+@@ -1316,6 +1326,7 @@ static void __kprobes disable_all_kprobe
+ 		}
+ 	}
+ 
++	mutex_unlock(&text_mutex);
+ 	mutex_unlock(&kprobe_mutex);
+ 	/* Allow all currently running kprobes to complete */
+ 	synchronize_sched();
+Index: linux-2.6-tip/kernel/latencytop.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/latencytop.c
++++ linux-2.6-tip/kernel/latencytop.c
+@@ -9,6 +9,44 @@
+  * as published by the Free Software Foundation; version 2
+  * of the License.
+  */
++
++/*
++ * CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is
++ * used by the "latencytop" userspace tool. The latency that is tracked is not
++ * the 'traditional' interrupt latency (which is primarily caused by something
++ * else consuming CPU), but instead, it is the latency an application encounters
++ * because the kernel sleeps on its behalf for various reasons.
++ *
++ * This code tracks 2 levels of statistics:
++ * 1) System level latency
++ * 2) Per process latency
++ *
++ * The latency is stored in fixed sized data structures in an accumulated form;
++ * if the "same" latency cause is hit twice, this will be tracked as one entry
++ * in the data structure. Both the count, total accumulated latency and maximum
++ * latency are tracked in this data structure. When the fixed size structure is
++ * full, no new causes are tracked until the buffer is flushed by writing to
++ * the /proc file; the userspace tool does this on a regular basis.
++ *
++ * A latency cause is identified by a stringified backtrace at the point that
++ * the scheduler gets invoked. The userland tool will use this string to
++ * identify the cause of the latency in human readable form.
++ *
++ * The information is exported via /proc/latency_stats and /proc/<pid>/latency.
++ * These files look like this:
++ *
++ * Latency Top version : v0.1
++ * 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl
++ * |    |    |    |
++ * |    |    |    +----> the stringified backtrace
++ * |    |    +---------> The maximum latency for this entry in microseconds
++ * |    +--------------> The accumulated latency for this entry (microseconds)
++ * +-------------------> The number of times this entry is hit
++ *
++ * (note: the average latency is the accumulated latency divided by the number
++ * of times)
++ */
++
+ #include <linux/latencytop.h>
+ #include <linux/kallsyms.h>
+ #include <linux/seq_file.h>
+@@ -72,7 +110,7 @@ account_global_scheduler_latency(struct 
+ 				firstnonnull = i;
+ 			continue;
+ 		}
+-		for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
++		for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
+ 			unsigned long record = lat->backtrace[q];
+ 
+ 			if (latency_record[i].backtrace[q] != record) {
+@@ -101,31 +139,52 @@ account_global_scheduler_latency(struct 
+ 	memcpy(&latency_record[i], lat, sizeof(struct latency_record));
+ }
+ 
+-static inline void store_stacktrace(struct task_struct *tsk, struct latency_record *lat)
++/*
++ * Iterator to store a backtrace into a latency record entry
++ */
++static inline void store_stacktrace(struct task_struct *tsk,
++					struct latency_record *lat)
+ {
+ 	struct stack_trace trace;
+ 
+ 	memset(&trace, 0, sizeof(trace));
+ 	trace.max_entries = LT_BACKTRACEDEPTH;
+ 	trace.entries = &lat->backtrace[0];
+-	trace.skip = 0;
+ 	save_stack_trace_tsk(tsk, &trace);
+ }
+ 
++/**
++ * __account_scheduler_latency - record an occured latency
++ * @tsk - the task struct of the task hitting the latency
++ * @usecs - the duration of the latency in microseconds
++ * @inter - 1 if the sleep was interruptible, 0 if uninterruptible
++ *
++ * This function is the main entry point for recording latency entries
++ * as called by the scheduler.
++ *
++ * This function has a few special cases to deal with normal 'non-latency'
++ * sleeps: specifically, interruptible sleep longer than 5 msec is skipped
++ * since this usually is caused by waiting for events via select() and co.
++ *
++ * Negative latencies (caused by time going backwards) are also explicitly
++ * skipped.
++ */
+ void __sched
+-account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
++__account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
+ {
+ 	unsigned long flags;
+ 	int i, q;
+ 	struct latency_record lat;
+ 
+-	if (!latencytop_enabled)
+-		return;
+-
+ 	/* Long interruptible waits are generally user requested... */
+ 	if (inter && usecs > 5000)
+ 		return;
+ 
++	/* Negative sleeps are time going backwards */
++	/* Zero-time sleeps are non-interesting */
++	if (usecs <= 0)
++		return;
++
+ 	memset(&lat, 0, sizeof(lat));
+ 	lat.count = 1;
+ 	lat.time = usecs;
+@@ -143,12 +202,12 @@ account_scheduler_latency(struct task_st
+ 	if (tsk->latency_record_count >= LT_SAVECOUNT)
+ 		goto out_unlock;
+ 
+-	for (i = 0; i < LT_SAVECOUNT ; i++) {
++	for (i = 0; i < LT_SAVECOUNT; i++) {
+ 		struct latency_record *mylat;
+ 		int same = 1;
+ 
+ 		mylat = &tsk->latency_record[i];
+-		for (q = 0 ; q < LT_BACKTRACEDEPTH ; q++) {
++		for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
+ 			unsigned long record = lat.backtrace[q];
+ 
+ 			if (mylat->backtrace[q] != record) {
+@@ -186,7 +245,7 @@ static int lstats_show(struct seq_file *
+ 	for (i = 0; i < MAXLR; i++) {
+ 		if (latency_record[i].backtrace[0]) {
+ 			int q;
+-			seq_printf(m, "%i %li %li ",
++			seq_printf(m, "%i %lu %lu ",
+ 				latency_record[i].count,
+ 				latency_record[i].time,
+ 				latency_record[i].max);
+@@ -223,7 +282,7 @@ static int lstats_open(struct inode *ino
+ 	return single_open(filp, lstats_show, NULL);
+ }
+ 
+-static struct file_operations lstats_fops = {
++static const struct file_operations lstats_fops = {
+ 	.open		= lstats_open,
+ 	.read		= seq_read,
+ 	.write		= lstats_write,
+@@ -236,4 +295,4 @@ static int __init init_lstats_procfs(voi
+ 	proc_create("latency_stats", 0644, NULL, &lstats_fops);
+ 	return 0;
+ }
+-__initcall(init_lstats_procfs);
++device_initcall(init_lstats_procfs);
+Index: linux-2.6-tip/kernel/lockdep.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/lockdep.c
++++ linux-2.6-tip/kernel/lockdep.c
+@@ -41,6 +41,8 @@
+ #include <linux/utsname.h>
+ #include <linux/hash.h>
+ #include <linux/ftrace.h>
++#include <linux/stringify.h>
++#include <trace/lockdep.h>
+ 
+ #include <asm/sections.h>
+ 
+@@ -68,7 +70,7 @@ module_param(lock_stat, int, 0644);
+  * to use a raw spinlock - we really dont want the spinlock
+  * code to recurse back into the lockdep code...
+  */
+-static raw_spinlock_t lockdep_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
++static __raw_spinlock_t lockdep_lock = (__raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ 
+ static int graph_lock(void)
+ {
+@@ -310,12 +312,14 @@ EXPORT_SYMBOL(lockdep_on);
+ #if VERBOSE
+ # define HARDIRQ_VERBOSE	1
+ # define SOFTIRQ_VERBOSE	1
++# define RECLAIM_VERBOSE	1
+ #else
+ # define HARDIRQ_VERBOSE	0
+ # define SOFTIRQ_VERBOSE	0
++# define RECLAIM_VERBOSE	0
+ #endif
+ 
+-#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE
++#if VERBOSE || HARDIRQ_VERBOSE || SOFTIRQ_VERBOSE || RECLAIM_VERBOSE
+ /*
+  * Quick filtering for interesting events:
+  */
+@@ -430,30 +434,24 @@ atomic_t nr_find_usage_forwards_checks;
+ atomic_t nr_find_usage_forwards_recursions;
+ atomic_t nr_find_usage_backwards_checks;
+ atomic_t nr_find_usage_backwards_recursions;
+-# define debug_atomic_inc(ptr)		atomic_inc(ptr)
+-# define debug_atomic_dec(ptr)		atomic_dec(ptr)
+-# define debug_atomic_read(ptr)		atomic_read(ptr)
+-#else
+-# define debug_atomic_inc(ptr)		do { } while (0)
+-# define debug_atomic_dec(ptr)		do { } while (0)
+-# define debug_atomic_read(ptr)		0
+ #endif
+ 
+ /*
+  * Locking printouts:
+  */
+ 
++#define __USAGE(__STATE)						\
++	[LOCK_USED_IN_##__STATE] = "IN-"__stringify(__STATE)"-W",	\
++	[LOCK_ENABLED_##__STATE] = __stringify(__STATE)"-ON-W",		\
++	[LOCK_USED_IN_##__STATE##_READ] = "IN-"__stringify(__STATE)"-R",\
++	[LOCK_ENABLED_##__STATE##_READ] = __stringify(__STATE)"-ON-R",
++
+ static const char *usage_str[] =
+ {
+-	[LOCK_USED] =			"initial-use ",
+-	[LOCK_USED_IN_HARDIRQ] =	"in-hardirq-W",
+-	[LOCK_USED_IN_SOFTIRQ] =	"in-softirq-W",
+-	[LOCK_ENABLED_SOFTIRQS] =	"softirq-on-W",
+-	[LOCK_ENABLED_HARDIRQS] =	"hardirq-on-W",
+-	[LOCK_USED_IN_HARDIRQ_READ] =	"in-hardirq-R",
+-	[LOCK_USED_IN_SOFTIRQ_READ] =	"in-softirq-R",
+-	[LOCK_ENABLED_SOFTIRQS_READ] =	"softirq-on-R",
+-	[LOCK_ENABLED_HARDIRQS_READ] =	"hardirq-on-R",
++#define LOCKDEP_STATE(__STATE) __USAGE(__STATE)
++#include "lockdep_states.h"
++#undef LOCKDEP_STATE
++	[LOCK_USED] = "INITIAL USE",
+ };
+ 
+ const char * __get_key_name(struct lockdep_subclass_key *key, char *str)
+@@ -461,46 +459,45 @@ const char * __get_key_name(struct lockd
+ 	return kallsyms_lookup((unsigned long)key, NULL, NULL, NULL, str);
+ }
+ 
+-void
+-get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4)
++static inline unsigned long lock_flag(enum lock_usage_bit bit)
+ {
+-	*c1 = '.', *c2 = '.', *c3 = '.', *c4 = '.';
+-
+-	if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
+-		*c1 = '+';
+-	else
+-		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
+-			*c1 = '-';
++	return 1UL << bit;
++}
+ 
+-	if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
+-		*c2 = '+';
+-	else
+-		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
+-			*c2 = '-';
++static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
++{
++	char c = '.';
+ 
+-	if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
+-		*c3 = '-';
+-	if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ) {
+-		*c3 = '+';
+-		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
+-			*c3 = '?';
+-	}
+-
+-	if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
+-		*c4 = '-';
+-	if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ) {
+-		*c4 = '+';
+-		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
+-			*c4 = '?';
++	if (class->usage_mask & lock_flag(bit + 2))
++		c = '+';
++	if (class->usage_mask & lock_flag(bit)) {
++		c = '-';
++		if (class->usage_mask & lock_flag(bit + 2))
++			c = '?';
+ 	}
++
++	return c;
++}
++
++void get_usage_chars(struct lock_class *class, char usage[LOCK_USAGE_CHARS])
++{
++	int i = 0;
++
++#define LOCKDEP_STATE(__STATE) 						\
++	usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE);	\
++	usage[i++] = get_usage_char(class, LOCK_USED_IN_##__STATE##_READ);
++#include "lockdep_states.h"
++#undef LOCKDEP_STATE
++
++	usage[i] = '\0';
+ }
+ 
+ static void print_lock_name(struct lock_class *class)
+ {
+-	char str[KSYM_NAME_LEN], c1, c2, c3, c4;
++	char str[KSYM_NAME_LEN], usage[LOCK_USAGE_CHARS];
+ 	const char *name;
+ 
+-	get_usage_chars(class, &c1, &c2, &c3, &c4);
++	get_usage_chars(class, usage);
+ 
+ 	name = class->name;
+ 	if (!name) {
+@@ -513,7 +510,7 @@ static void print_lock_name(struct lock_
+ 		if (class->subclass)
+ 			printk("/%d", class->subclass);
+ 	}
+-	printk("){%c%c%c%c}", c1, c2, c3, c4);
++	printk("){%s}", usage);
+ }
+ 
+ static void print_lockdep_cache(struct lockdep_map *lock)
+@@ -760,7 +757,7 @@ register_lock_class(struct lockdep_map *
+  	 */
+ 	if (!static_obj(lock->key)) {
+ 		debug_locks_off();
+-		printk("INFO: trying to register non-static key.\n");
++		printk("INFO: trying to register non-static key %p.\n", lock->key);
+ 		printk("the code is fine but needs lockdep annotation.\n");
+ 		printk("turning off the locking correctness validator.\n");
+ 		dump_stack();
+@@ -796,6 +793,7 @@ register_lock_class(struct lockdep_map *
+ 
+ 		printk("BUG: MAX_LOCKDEP_KEYS too low!\n");
+ 		printk("turning off the locking correctness validator.\n");
++		dump_stack();
+ 		return NULL;
+ 	}
+ 	class = lock_classes + nr_lock_classes++;
+@@ -846,6 +844,21 @@ out_unlock_set:
+ 	return class;
+ }
+ 
++#if defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_TRACE_IRQFLAGS)
++
++#define RECURSION_LIMIT 40
++
++static int noinline print_infinite_recursion_bug(void)
++{
++	if (!debug_locks_off_graph_unlock())
++		return 0;
++
++	WARN_ON(1);
++
++	return 0;
++}
++#endif /* CONFIG_PROVE_LOCKING || CONFIG_TRACE_IRQFLAGS */
++
+ #ifdef CONFIG_PROVE_LOCKING
+ /*
+  * Allocate a lockdep entry. (assumes the graph_lock held, returns
+@@ -859,6 +872,7 @@ static struct lock_list *alloc_list_entr
+ 
+ 		printk("BUG: MAX_LOCKDEP_ENTRIES too low!\n");
+ 		printk("turning off the locking correctness validator.\n");
++		dump_stack();
+ 		return NULL;
+ 	}
+ 	return list_entries + nr_list_entries++;
+@@ -976,18 +990,6 @@ static noinline int print_circular_bug_t
+ 	return 0;
+ }
+ 
+-#define RECURSION_LIMIT 40
+-
+-static int noinline print_infinite_recursion_bug(void)
+-{
+-	if (!debug_locks_off_graph_unlock())
+-		return 0;
+-
+-	WARN_ON(1);
+-
+-	return 0;
+-}
+-
+ unsigned long __lockdep_count_forward_deps(struct lock_class *class,
+ 					   unsigned int depth)
+ {
+@@ -1180,6 +1182,7 @@ find_usage_backwards(struct lock_class *
+ 	return 1;
+ }
+ 
++#ifdef CONFIG_PROVE_LOCKING
+ static int
+ print_bad_irq_dependency(struct task_struct *curr,
+ 			 struct held_lock *prev,
+@@ -1240,6 +1243,7 @@ print_bad_irq_dependency(struct task_str
+ 
+ 	return 0;
+ }
++#endif /* CONFIG_PROVE_LOCKING */
+ 
+ static int
+ check_usage(struct task_struct *curr, struct held_lock *prev,
+@@ -1263,9 +1267,49 @@ check_usage(struct task_struct *curr, st
+ 			bit_backwards, bit_forwards, irqclass);
+ }
+ 
+-static int
+-check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
+-		struct held_lock *next)
++static const char *state_names[] = {
++#define LOCKDEP_STATE(__STATE) \
++	__stringify(__STATE),
++#include "lockdep_states.h"
++#undef LOCKDEP_STATE
++};
++
++static const char *state_rnames[] = {
++#define LOCKDEP_STATE(__STATE) \
++	__stringify(__STATE)"-READ",
++#include "lockdep_states.h"
++#undef LOCKDEP_STATE
++};
++
++static inline const char *state_name(enum lock_usage_bit bit)
++{
++	return (bit & 1) ? state_rnames[bit >> 2] : state_names[bit >> 2];
++}
++
++static int exclusive_bit(int new_bit)
++{
++	/*
++	 * USED_IN
++	 * USED_IN_READ
++	 * ENABLED
++	 * ENABLED_READ
++	 *
++	 * bit 0 - write/read
++	 * bit 1 - used_in/enabled
++	 * bit 2+  state
++	 */
++
++	int state = new_bit & ~3;
++	int dir = new_bit & 2;
++
++	/*
++	 * keep state, bit flip the direction and strip read.
++	 */
++	return state | (dir ^ 2);
++}
++
++static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
++			   struct held_lock *next, enum lock_usage_bit bit)
+ {
+ 	/*
+ 	 * Prove that the new dependency does not connect a hardirq-safe
+@@ -1273,38 +1317,34 @@ check_prev_add_irq(struct task_struct *c
+ 	 * the backwards-subgraph starting at <prev>, and the
+ 	 * forwards-subgraph starting at <next>:
+ 	 */
+-	if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ,
+-					LOCK_ENABLED_HARDIRQS, "hard"))
++	if (!check_usage(curr, prev, next, bit,
++			   exclusive_bit(bit), state_name(bit)))
+ 		return 0;
+ 
++	bit++; /* _READ */
++
+ 	/*
+ 	 * Prove that the new dependency does not connect a hardirq-safe-read
+ 	 * lock with a hardirq-unsafe lock - to achieve this we search
+ 	 * the backwards-subgraph starting at <prev>, and the
+ 	 * forwards-subgraph starting at <next>:
+ 	 */
+-	if (!check_usage(curr, prev, next, LOCK_USED_IN_HARDIRQ_READ,
+-					LOCK_ENABLED_HARDIRQS, "hard-read"))
++	if (!check_usage(curr, prev, next, bit,
++			   exclusive_bit(bit), state_name(bit)))
+ 		return 0;
+ 
+-	/*
+-	 * Prove that the new dependency does not connect a softirq-safe
+-	 * lock with a softirq-unsafe lock - to achieve this we search
+-	 * the backwards-subgraph starting at <prev>, and the
+-	 * forwards-subgraph starting at <next>:
+-	 */
+-	if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ,
+-					LOCK_ENABLED_SOFTIRQS, "soft"))
+-		return 0;
+-	/*
+-	 * Prove that the new dependency does not connect a softirq-safe-read
+-	 * lock with a softirq-unsafe lock - to achieve this we search
+-	 * the backwards-subgraph starting at <prev>, and the
+-	 * forwards-subgraph starting at <next>:
+-	 */
+-	if (!check_usage(curr, prev, next, LOCK_USED_IN_SOFTIRQ_READ,
+-					LOCK_ENABLED_SOFTIRQS, "soft"))
++	return 1;
++}
++
++static int
++check_prev_add_irq(struct task_struct *curr, struct held_lock *prev,
++		struct held_lock *next)
++{
++#define LOCKDEP_STATE(__STATE)						\
++	if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE))	\
+ 		return 0;
++#include "lockdep_states.h"
++#undef LOCKDEP_STATE
+ 
+ 	return 1;
+ }
+@@ -1649,6 +1689,7 @@ cache_hit:
+ 
+ 		printk("BUG: MAX_LOCKDEP_CHAINS too low!\n");
+ 		printk("turning off the locking correctness validator.\n");
++		dump_stack();
+ 		return 0;
+ 	}
+ 	chain = lock_chains + nr_lock_chains++;
+@@ -1861,9 +1902,9 @@ print_irq_inversion_bug(struct task_stru
+ 		curr->comm, task_pid_nr(curr));
+ 	print_lock(this);
+ 	if (forwards)
+-		printk("but this lock took another, %s-irq-unsafe lock in the past:\n", irqclass);
++		printk("but this lock took another, %s-unsafe lock in the past:\n", irqclass);
+ 	else
+-		printk("but this lock was taken by another, %s-irq-safe lock in the past:\n", irqclass);
++		printk("but this lock was taken by another, %s-safe lock in the past:\n", irqclass);
+ 	print_lock_name(other);
+ 	printk("\n\nand interrupts could create inverse lock ordering between them.\n\n");
+ 
+@@ -1933,7 +1974,7 @@ void print_irqtrace_events(struct task_s
+ 	print_ip_sym(curr->softirq_disable_ip);
+ }
+ 
+-static int hardirq_verbose(struct lock_class *class)
++static int HARDIRQ_verbose(struct lock_class *class)
+ {
+ #if HARDIRQ_VERBOSE
+ 	return class_filter(class);
+@@ -1941,7 +1982,7 @@ static int hardirq_verbose(struct lock_c
+ 	return 0;
+ }
+ 
+-static int softirq_verbose(struct lock_class *class)
++static int SOFTIRQ_verbose(struct lock_class *class)
+ {
+ #if SOFTIRQ_VERBOSE
+ 	return class_filter(class);
+@@ -1949,185 +1990,95 @@ static int softirq_verbose(struct lock_c
+ 	return 0;
+ }
+ 
++static int RECLAIM_FS_verbose(struct lock_class *class)
++{
++#if RECLAIM_VERBOSE
++	return class_filter(class);
++#endif
++	return 0;
++}
++
+ #define STRICT_READ_CHECKS	1
+ 
+-static int mark_lock_irq(struct task_struct *curr, struct held_lock *this,
++static int (*state_verbose_f[])(struct lock_class *class) = {
++#define LOCKDEP_STATE(__STATE) \
++	__STATE##_verbose,
++#include "lockdep_states.h"
++#undef LOCKDEP_STATE
++};
++
++static inline int state_verbose(enum lock_usage_bit bit,
++				struct lock_class *class)
++{
++	return state_verbose_f[bit >> 2](class);
++}
++
++typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
++			     enum lock_usage_bit bit, const char *name);
++
++static int
++mark_lock_irq(struct task_struct *curr, struct held_lock *this,
+ 		enum lock_usage_bit new_bit)
+ {
+-	int ret = 1;
++	int excl_bit = exclusive_bit(new_bit);
++	int read = new_bit & 1;
++	int dir = new_bit & 2;
+ 
+-	switch(new_bit) {
+-	case LOCK_USED_IN_HARDIRQ:
+-		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
+-			return 0;
+-		if (!valid_state(curr, this, new_bit,
+-				 LOCK_ENABLED_HARDIRQS_READ))
+-			return 0;
+-		/*
+-		 * just marked it hardirq-safe, check that this lock
+-		 * took no hardirq-unsafe lock in the past:
+-		 */
+-		if (!check_usage_forwards(curr, this,
+-					  LOCK_ENABLED_HARDIRQS, "hard"))
+-			return 0;
+-#if STRICT_READ_CHECKS
+-		/*
+-		 * just marked it hardirq-safe, check that this lock
+-		 * took no hardirq-unsafe-read lock in the past:
+-		 */
+-		if (!check_usage_forwards(curr, this,
+-				LOCK_ENABLED_HARDIRQS_READ, "hard-read"))
+-			return 0;
+-#endif
+-		if (hardirq_verbose(hlock_class(this)))
+-			ret = 2;
+-		break;
+-	case LOCK_USED_IN_SOFTIRQ:
+-		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
+-			return 0;
+-		if (!valid_state(curr, this, new_bit,
+-				 LOCK_ENABLED_SOFTIRQS_READ))
+-			return 0;
+-		/*
+-		 * just marked it softirq-safe, check that this lock
+-		 * took no softirq-unsafe lock in the past:
+-		 */
+-		if (!check_usage_forwards(curr, this,
+-					  LOCK_ENABLED_SOFTIRQS, "soft"))
+-			return 0;
+-#if STRICT_READ_CHECKS
+-		/*
+-		 * just marked it softirq-safe, check that this lock
+-		 * took no softirq-unsafe-read lock in the past:
+-		 */
+-		if (!check_usage_forwards(curr, this,
+-				LOCK_ENABLED_SOFTIRQS_READ, "soft-read"))
+-			return 0;
+-#endif
+-		if (softirq_verbose(hlock_class(this)))
+-			ret = 2;
+-		break;
+-	case LOCK_USED_IN_HARDIRQ_READ:
+-		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_HARDIRQS))
+-			return 0;
+-		/*
+-		 * just marked it hardirq-read-safe, check that this lock
+-		 * took no hardirq-unsafe lock in the past:
+-		 */
+-		if (!check_usage_forwards(curr, this,
+-					  LOCK_ENABLED_HARDIRQS, "hard"))
+-			return 0;
+-		if (hardirq_verbose(hlock_class(this)))
+-			ret = 2;
+-		break;
+-	case LOCK_USED_IN_SOFTIRQ_READ:
+-		if (!valid_state(curr, this, new_bit, LOCK_ENABLED_SOFTIRQS))
+-			return 0;
+-		/*
+-		 * just marked it softirq-read-safe, check that this lock
+-		 * took no softirq-unsafe lock in the past:
+-		 */
+-		if (!check_usage_forwards(curr, this,
+-					  LOCK_ENABLED_SOFTIRQS, "soft"))
+-			return 0;
+-		if (softirq_verbose(hlock_class(this)))
+-			ret = 2;
+-		break;
+-	case LOCK_ENABLED_HARDIRQS:
+-		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
+-			return 0;
+-		if (!valid_state(curr, this, new_bit,
+-				 LOCK_USED_IN_HARDIRQ_READ))
+-			return 0;
+-		/*
+-		 * just marked it hardirq-unsafe, check that no hardirq-safe
+-		 * lock in the system ever took it in the past:
+-		 */
+-		if (!check_usage_backwards(curr, this,
+-					   LOCK_USED_IN_HARDIRQ, "hard"))
+-			return 0;
+-#if STRICT_READ_CHECKS
+-		/*
+-		 * just marked it hardirq-unsafe, check that no
+-		 * hardirq-safe-read lock in the system ever took
+-		 * it in the past:
+-		 */
+-		if (!check_usage_backwards(curr, this,
+-				   LOCK_USED_IN_HARDIRQ_READ, "hard-read"))
+-			return 0;
+-#endif
+-		if (hardirq_verbose(hlock_class(this)))
+-			ret = 2;
+-		break;
+-	case LOCK_ENABLED_SOFTIRQS:
+-		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
+-			return 0;
+-		if (!valid_state(curr, this, new_bit,
+-				 LOCK_USED_IN_SOFTIRQ_READ))
+-			return 0;
+-		/*
+-		 * just marked it softirq-unsafe, check that no softirq-safe
+-		 * lock in the system ever took it in the past:
+-		 */
+-		if (!check_usage_backwards(curr, this,
+-					   LOCK_USED_IN_SOFTIRQ, "soft"))
+-			return 0;
+-#if STRICT_READ_CHECKS
+-		/*
+-		 * just marked it softirq-unsafe, check that no
+-		 * softirq-safe-read lock in the system ever took
+-		 * it in the past:
+-		 */
+-		if (!check_usage_backwards(curr, this,
+-				   LOCK_USED_IN_SOFTIRQ_READ, "soft-read"))
+-			return 0;
+-#endif
+-		if (softirq_verbose(hlock_class(this)))
+-			ret = 2;
+-		break;
+-	case LOCK_ENABLED_HARDIRQS_READ:
+-		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_HARDIRQ))
+-			return 0;
+-#if STRICT_READ_CHECKS
+-		/*
+-		 * just marked it hardirq-read-unsafe, check that no
+-		 * hardirq-safe lock in the system ever took it in the past:
+-		 */
+-		if (!check_usage_backwards(curr, this,
+-					   LOCK_USED_IN_HARDIRQ, "hard"))
+-			return 0;
+-#endif
+-		if (hardirq_verbose(hlock_class(this)))
+-			ret = 2;
+-		break;
+-	case LOCK_ENABLED_SOFTIRQS_READ:
+-		if (!valid_state(curr, this, new_bit, LOCK_USED_IN_SOFTIRQ))
++	/*
++	 * mark USED_IN has to look forwards -- to ensure no dependency
++	 * has ENABLED state, which would allow recursion deadlocks.
++	 *
++	 * mark ENABLED has to look backwards -- to ensure no dependee
++	 * has USED_IN state, which, again, would allow  recursion deadlocks.
++	 */
++	check_usage_f usage = dir ?
++		check_usage_backwards : check_usage_forwards;
++
++	/*
++	 * Validate that this particular lock does not have conflicting
++	 * usage states.
++	 */
++	if (!valid_state(curr, this, new_bit, excl_bit))
++		return 0;
++
++	/*
++	 * Validate that the lock dependencies don't have conflicting usage
++	 * states.
++	 */
++	if ((!read || !dir || STRICT_READ_CHECKS) &&
++			!usage(curr, this, excl_bit, state_name(new_bit & ~1)))
++		return 0;
++
++	/*
++	 * Check for read in write conflicts
++	 */
++	if (!read) {
++		if (!valid_state(curr, this, new_bit, excl_bit + 1))
+ 			return 0;
+-#if STRICT_READ_CHECKS
+-		/*
+-		 * just marked it softirq-read-unsafe, check that no
+-		 * softirq-safe lock in the system ever took it in the past:
+-		 */
+-		if (!check_usage_backwards(curr, this,
+-					   LOCK_USED_IN_SOFTIRQ, "soft"))
++
++		if (STRICT_READ_CHECKS &&
++			!usage(curr, this, excl_bit + 1,
++				state_name(new_bit + 1)))
+ 			return 0;
+-#endif
+-		if (softirq_verbose(hlock_class(this)))
+-			ret = 2;
+-		break;
+-	default:
+-		WARN_ON(1);
+-		break;
+ 	}
+ 
+-	return ret;
++	if (state_verbose(new_bit, hlock_class(this)))
++		return 2;
++
++	return 1;
+ }
+ 
++enum mark_type {
++#define LOCKDEP_STATE(__STATE)	__STATE,
++#include "lockdep_states.h"
++#undef LOCKDEP_STATE
++};
++
+ /*
+  * Mark all held locks with a usage bit:
+  */
+ static int
+-mark_held_locks(struct task_struct *curr, int hardirq)
++mark_held_locks(struct task_struct *curr, enum mark_type mark)
+ {
+ 	enum lock_usage_bit usage_bit;
+ 	struct held_lock *hlock;
+@@ -2136,17 +2087,12 @@ mark_held_locks(struct task_struct *curr
+ 	for (i = 0; i < curr->lockdep_depth; i++) {
+ 		hlock = curr->held_locks + i;
+ 
+-		if (hardirq) {
+-			if (hlock->read)
+-				usage_bit = LOCK_ENABLED_HARDIRQS_READ;
+-			else
+-				usage_bit = LOCK_ENABLED_HARDIRQS;
+-		} else {
+-			if (hlock->read)
+-				usage_bit = LOCK_ENABLED_SOFTIRQS_READ;
+-			else
+-				usage_bit = LOCK_ENABLED_SOFTIRQS;
+-		}
++		usage_bit = 2 + (mark << 2); /* ENABLED */
++		if (hlock->read)
++			usage_bit += 1; /* READ */
++
++		BUG_ON(usage_bit >= LOCK_USAGE_STATES);
++
+ 		if (!mark_lock(curr, hlock, usage_bit))
+ 			return 0;
+ 	}
+@@ -2200,7 +2146,7 @@ void trace_hardirqs_on_caller(unsigned l
+ 	 * We are going to turn hardirqs on, so set the
+ 	 * usage bit for all held locks:
+ 	 */
+-	if (!mark_held_locks(curr, 1))
++	if (!mark_held_locks(curr, HARDIRQ))
+ 		return;
+ 	/*
+ 	 * If we have softirqs enabled, then set the usage
+@@ -2208,7 +2154,7 @@ void trace_hardirqs_on_caller(unsigned l
+ 	 * this bit from being set before)
+ 	 */
+ 	if (curr->softirqs_enabled)
+-		if (!mark_held_locks(curr, 0))
++		if (!mark_held_locks(curr, SOFTIRQ))
+ 			return;
+ 
+ 	curr->hardirq_enable_ip = ip;
+@@ -2288,7 +2234,7 @@ void trace_softirqs_on(unsigned long ip)
+ 	 * enabled too:
+ 	 */
+ 	if (curr->hardirqs_enabled)
+-		mark_held_locks(curr, 0);
++		mark_held_locks(curr, SOFTIRQ);
+ }
+ 
+ /*
+@@ -2317,6 +2263,48 @@ void trace_softirqs_off(unsigned long ip
+ 		debug_atomic_inc(&redundant_softirqs_off);
+ }
+ 
++static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
++{
++	struct task_struct *curr = current;
++
++	if (unlikely(!debug_locks))
++		return;
++
++	/* no reclaim without waiting on it */
++	if (!(gfp_mask & __GFP_WAIT))
++		return;
++
++	/* this guy won't enter reclaim */
++	if ((curr->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
++		return;
++
++	/* We're only interested __GFP_FS allocations for now */
++	if (!(gfp_mask & __GFP_FS))
++		return;
++
++	if (DEBUG_LOCKS_WARN_ON(irqs_disabled_flags(flags)))
++		return;
++
++	mark_held_locks(curr, RECLAIM_FS);
++}
++
++static void check_flags(unsigned long flags);
++
++void lockdep_trace_alloc(gfp_t gfp_mask)
++{
++	unsigned long flags;
++
++	if (unlikely(current->lockdep_recursion))
++		return;
++
++	raw_local_irq_save(flags);
++	check_flags(flags);
++	current->lockdep_recursion = 1;
++	__lockdep_trace_alloc(gfp_mask, flags);
++	current->lockdep_recursion = 0;
++	raw_local_irq_restore(flags);
++}
++
+ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock)
+ {
+ 	/*
+@@ -2345,19 +2333,35 @@ static int mark_irqflags(struct task_str
+ 	if (!hlock->hardirqs_off) {
+ 		if (hlock->read) {
+ 			if (!mark_lock(curr, hlock,
+-					LOCK_ENABLED_HARDIRQS_READ))
++					LOCK_ENABLED_HARDIRQ_READ))
+ 				return 0;
+ 			if (curr->softirqs_enabled)
+ 				if (!mark_lock(curr, hlock,
+-						LOCK_ENABLED_SOFTIRQS_READ))
++						LOCK_ENABLED_SOFTIRQ_READ))
+ 					return 0;
+ 		} else {
+ 			if (!mark_lock(curr, hlock,
+-					LOCK_ENABLED_HARDIRQS))
++					LOCK_ENABLED_HARDIRQ))
+ 				return 0;
+ 			if (curr->softirqs_enabled)
+ 				if (!mark_lock(curr, hlock,
+-						LOCK_ENABLED_SOFTIRQS))
++						LOCK_ENABLED_SOFTIRQ))
++					return 0;
++		}
++	}
++
++	/*
++	 * We reuse the irq context infrastructure more broadly as a general
++	 * context checking code. This tests GFP_FS recursion (a lock taken
++	 * during reclaim for a GFP_FS allocation is held over a GFP_FS
++	 * allocation).
++	 */
++	if (!hlock->trylock && (curr->lockdep_reclaim_gfp & __GFP_FS)) {
++		if (hlock->read) {
++			if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS_READ))
++					return 0;
++		} else {
++			if (!mark_lock(curr, hlock, LOCK_USED_IN_RECLAIM_FS))
+ 					return 0;
+ 		}
+ 	}
+@@ -2412,6 +2416,10 @@ static inline int separate_irq_context(s
+ 	return 0;
+ }
+ 
++void lockdep_trace_alloc(gfp_t gfp_mask)
++{
++}
++
+ #endif
+ 
+ /*
+@@ -2445,14 +2453,13 @@ static int mark_lock(struct task_struct 
+ 		return 0;
+ 
+ 	switch (new_bit) {
+-	case LOCK_USED_IN_HARDIRQ:
+-	case LOCK_USED_IN_SOFTIRQ:
+-	case LOCK_USED_IN_HARDIRQ_READ:
+-	case LOCK_USED_IN_SOFTIRQ_READ:
+-	case LOCK_ENABLED_HARDIRQS:
+-	case LOCK_ENABLED_SOFTIRQS:
+-	case LOCK_ENABLED_HARDIRQS_READ:
+-	case LOCK_ENABLED_SOFTIRQS_READ:
++#define LOCKDEP_STATE(__STATE)			\
++	case LOCK_USED_IN_##__STATE:		\
++	case LOCK_USED_IN_##__STATE##_READ:	\
++	case LOCK_ENABLED_##__STATE:		\
++	case LOCK_ENABLED_##__STATE##_READ:
++#include "lockdep_states.h"
++#undef LOCKDEP_STATE
+ 		ret = mark_lock_irq(curr, this, new_bit);
+ 		if (!ret)
+ 			return 0;
+@@ -2542,6 +2549,7 @@ static int __lock_acquire(struct lockdep
+ 		debug_locks_off();
+ 		printk("BUG: MAX_LOCKDEP_SUBCLASSES too low!\n");
+ 		printk("turning off the locking correctness validator.\n");
++		dump_stack();
+ 		return 0;
+ 	}
+ 
+@@ -2638,6 +2646,7 @@ static int __lock_acquire(struct lockdep
+ 		debug_locks_off();
+ 		printk("BUG: MAX_LOCK_DEPTH too low!\n");
+ 		printk("turning off the locking correctness validator.\n");
++		dump_stack();
+ 		return 0;
+ 	}
+ 
+@@ -2925,6 +2934,8 @@ void lock_set_class(struct lockdep_map *
+ }
+ EXPORT_SYMBOL_GPL(lock_set_class);
+ 
++DEFINE_TRACE(lock_acquire);
++
+ /*
+  * We are not always called with irqs disabled - do that here,
+  * and also avoid lockdep recursion:
+@@ -2935,6 +2946,8 @@ void lock_acquire(struct lockdep_map *lo
+ {
+ 	unsigned long flags;
+ 
++	trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
++
+ 	if (unlikely(current->lockdep_recursion))
+ 		return;
+ 
+@@ -2949,11 +2962,15 @@ void lock_acquire(struct lockdep_map *lo
+ }
+ EXPORT_SYMBOL_GPL(lock_acquire);
+ 
++DEFINE_TRACE(lock_release);
++
+ void lock_release(struct lockdep_map *lock, int nested,
+ 			  unsigned long ip)
+ {
+ 	unsigned long flags;
+ 
++	trace_lock_release(lock, nested, ip);
++
+ 	if (unlikely(current->lockdep_recursion))
+ 		return;
+ 
+@@ -2966,6 +2983,16 @@ void lock_release(struct lockdep_map *lo
+ }
+ EXPORT_SYMBOL_GPL(lock_release);
+ 
++void lockdep_set_current_reclaim_state(gfp_t gfp_mask)
++{
++	current->lockdep_reclaim_gfp = gfp_mask;
++}
++
++void lockdep_clear_current_reclaim_state(void)
++{
++	current->lockdep_reclaim_gfp = 0;
++}
++
+ #ifdef CONFIG_LOCK_STAT
+ static int
+ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
+@@ -3092,10 +3119,14 @@ found_it:
+ 	lock->ip = ip;
+ }
+ 
++DEFINE_TRACE(lock_contended);
++
+ void lock_contended(struct lockdep_map *lock, unsigned long ip)
+ {
+ 	unsigned long flags;
+ 
++	trace_lock_contended(lock, ip);
++
+ 	if (unlikely(!lock_stat))
+ 		return;
+ 
+@@ -3111,10 +3142,14 @@ void lock_contended(struct lockdep_map *
+ }
+ EXPORT_SYMBOL_GPL(lock_contended);
+ 
++DEFINE_TRACE(lock_acquired);
++
+ void lock_acquired(struct lockdep_map *lock, unsigned long ip)
+ {
+ 	unsigned long flags;
+ 
++	trace_lock_acquired(lock, ip);
++
+ 	if (unlikely(!lock_stat))
+ 		return;
+ 
+Index: linux-2.6-tip/kernel/lockdep_internals.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/lockdep_internals.h
++++ linux-2.6-tip/kernel/lockdep_internals.h
+@@ -7,6 +7,45 @@
+  */
+ 
+ /*
++ * Lock-class usage-state bits:
++ */
++enum lock_usage_bit {
++#define LOCKDEP_STATE(__STATE)		\
++	LOCK_USED_IN_##__STATE,		\
++	LOCK_USED_IN_##__STATE##_READ,	\
++	LOCK_ENABLED_##__STATE,		\
++	LOCK_ENABLED_##__STATE##_READ,
++#include "lockdep_states.h"
++#undef LOCKDEP_STATE
++	LOCK_USED,
++	LOCK_USAGE_STATES
++};
++
++/*
++ * Usage-state bitmasks:
++ */
++#define __LOCKF(__STATE)	LOCKF_##__STATE = (1 << LOCK_##__STATE),
++
++enum {
++#define LOCKDEP_STATE(__STATE)						\
++	__LOCKF(USED_IN_##__STATE)					\
++	__LOCKF(USED_IN_##__STATE##_READ)				\
++	__LOCKF(ENABLED_##__STATE)					\
++	__LOCKF(ENABLED_##__STATE##_READ)
++#include "lockdep_states.h"
++#undef LOCKDEP_STATE
++	__LOCKF(USED)
++};
++
++#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ)
++#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ)
++
++#define LOCKF_ENABLED_IRQ_READ \
++		(LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ)
++#define LOCKF_USED_IN_IRQ_READ \
++		(LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
++
++/*
+  * MAX_LOCKDEP_ENTRIES is the maximum number of lock dependencies
+  * we track.
+  *
+@@ -31,8 +70,10 @@
+ extern struct list_head all_lock_classes;
+ extern struct lock_chain lock_chains[];
+ 
+-extern void
+-get_usage_chars(struct lock_class *class, char *c1, char *c2, char *c3, char *c4);
++#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2)
++
++extern void get_usage_chars(struct lock_class *class,
++			    char usage[LOCK_USAGE_CHARS]);
+ 
+ extern const char * __get_key_name(struct lockdep_subclass_key *key, char *str);
+ 
+Index: linux-2.6-tip/kernel/lockdep_proc.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/lockdep_proc.c
++++ linux-2.6-tip/kernel/lockdep_proc.c
+@@ -84,7 +84,7 @@ static int l_show(struct seq_file *m, vo
+ {
+ 	struct lock_class *class = v;
+ 	struct lock_list *entry;
+-	char c1, c2, c3, c4;
++	char usage[LOCK_USAGE_CHARS];
+ 
+ 	if (v == SEQ_START_TOKEN) {
+ 		seq_printf(m, "all lock classes:\n");
+@@ -100,8 +100,8 @@ static int l_show(struct seq_file *m, vo
+ 	seq_printf(m, " BD:%5ld", lockdep_count_backward_deps(class));
+ #endif
+ 
+-	get_usage_chars(class, &c1, &c2, &c3, &c4);
+-	seq_printf(m, " %c%c%c%c", c1, c2, c3, c4);
++	get_usage_chars(class, usage);
++	seq_printf(m, " %s", usage);
+ 
+ 	seq_printf(m, ": ");
+ 	print_name(m, class);
+@@ -300,27 +300,27 @@ static int lockdep_stats_show(struct seq
+ 			nr_uncategorized++;
+ 		if (class->usage_mask & LOCKF_USED_IN_IRQ)
+ 			nr_irq_safe++;
+-		if (class->usage_mask & LOCKF_ENABLED_IRQS)
++		if (class->usage_mask & LOCKF_ENABLED_IRQ)
+ 			nr_irq_unsafe++;
+ 		if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ)
+ 			nr_softirq_safe++;
+-		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS)
++		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ)
+ 			nr_softirq_unsafe++;
+ 		if (class->usage_mask & LOCKF_USED_IN_HARDIRQ)
+ 			nr_hardirq_safe++;
+-		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS)
++		if (class->usage_mask & LOCKF_ENABLED_HARDIRQ)
+ 			nr_hardirq_unsafe++;
+ 		if (class->usage_mask & LOCKF_USED_IN_IRQ_READ)
+ 			nr_irq_read_safe++;
+-		if (class->usage_mask & LOCKF_ENABLED_IRQS_READ)
++		if (class->usage_mask & LOCKF_ENABLED_IRQ_READ)
+ 			nr_irq_read_unsafe++;
+ 		if (class->usage_mask & LOCKF_USED_IN_SOFTIRQ_READ)
+ 			nr_softirq_read_safe++;
+-		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQS_READ)
++		if (class->usage_mask & LOCKF_ENABLED_SOFTIRQ_READ)
+ 			nr_softirq_read_unsafe++;
+ 		if (class->usage_mask & LOCKF_USED_IN_HARDIRQ_READ)
+ 			nr_hardirq_read_safe++;
+-		if (class->usage_mask & LOCKF_ENABLED_HARDIRQS_READ)
++		if (class->usage_mask & LOCKF_ENABLED_HARDIRQ_READ)
+ 			nr_hardirq_read_unsafe++;
+ 
+ #ifdef CONFIG_PROVE_LOCKING
+@@ -601,6 +601,10 @@ static void seq_stats(struct seq_file *m
+ static void seq_header(struct seq_file *m)
+ {
+ 	seq_printf(m, "lock_stat version 0.3\n");
++
++	if (unlikely(!debug_locks))
++		seq_printf(m, "*WARNING* lock debugging disabled!! - possibly due to a lockdep warning\n");
++
+ 	seq_line(m, '-', 0, 40 + 1 + 10 * (14 + 1));
+ 	seq_printf(m, "%40s %14s %14s %14s %14s %14s %14s %14s %14s "
+ 			"%14s %14s\n",
+Index: linux-2.6-tip/kernel/lockdep_states.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/lockdep_states.h
+@@ -0,0 +1,9 @@
++/*
++ * Lockdep states,
++ *
++ * please update XXX_LOCK_USAGE_STATES in include/linux/lockdep.h whenever
++ * you add one, or come up with a nice dynamic solution.
++ */
++LOCKDEP_STATE(HARDIRQ)
++LOCKDEP_STATE(SOFTIRQ)
++LOCKDEP_STATE(RECLAIM_FS)
+Index: linux-2.6-tip/kernel/marker.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/marker.c
++++ linux-2.6-tip/kernel/marker.c
+@@ -432,7 +432,7 @@ static int remove_marker(const char *nam
+ {
+ 	struct hlist_head *head;
+ 	struct hlist_node *node;
+-	struct marker_entry *e;
++	struct marker_entry *uninitialized_var(e);
+ 	int found = 0;
+ 	size_t len = strlen(name) + 1;
+ 	u32 hash = jhash(name, len-1, 0);
+Index: linux-2.6-tip/kernel/module.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/module.c
++++ linux-2.6-tip/kernel/module.c
+@@ -51,6 +51,7 @@
+ #include <linux/tracepoint.h>
+ #include <linux/ftrace.h>
+ #include <linux/async.h>
++#include <linux/percpu.h>
+ 
+ #if 0
+ #define DEBUGP printk
+@@ -366,6 +367,34 @@ static struct module *find_module(const 
+ }
+ 
+ #ifdef CONFIG_SMP
++
++#ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
++
++static void *percpu_modalloc(unsigned long size, unsigned long align,
++			     const char *name)
++{
++	void *ptr;
++
++	if (align > PAGE_SIZE) {
++		printk(KERN_WARNING "%s: per-cpu alignment %li > %li\n",
++		       name, align, PAGE_SIZE);
++		align = PAGE_SIZE;
++	}
++
++	ptr = __alloc_reserved_percpu(size, align);
++	if (!ptr)
++		printk(KERN_WARNING
++		       "Could not allocate %lu bytes percpu data\n", size);
++	return ptr;
++}
++
++static void percpu_modfree(void *freeme)
++{
++	free_percpu(freeme);
++}
++
++#else /* ... !CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
++
+ /* Number of blocks used and allocated. */
+ static unsigned int pcpu_num_used, pcpu_num_allocated;
+ /* Size of each block.  -ve means used. */
+@@ -480,21 +509,6 @@ static void percpu_modfree(void *freeme)
+ 	}
+ }
+ 
+-static unsigned int find_pcpusec(Elf_Ehdr *hdr,
+-				 Elf_Shdr *sechdrs,
+-				 const char *secstrings)
+-{
+-	return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
+-}
+-
+-static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
+-{
+-	int cpu;
+-
+-	for_each_possible_cpu(cpu)
+-		memcpy(pcpudest + per_cpu_offset(cpu), from, size);
+-}
+-
+ static int percpu_modinit(void)
+ {
+ 	pcpu_num_used = 2;
+@@ -513,7 +527,26 @@ static int percpu_modinit(void)
+ 	return 0;
+ }
+ __initcall(percpu_modinit);
++
++#endif /* CONFIG_HAVE_DYNAMIC_PER_CPU_AREA */
++
++static unsigned int find_pcpusec(Elf_Ehdr *hdr,
++				 Elf_Shdr *sechdrs,
++				 const char *secstrings)
++{
++	return find_sec(hdr, sechdrs, secstrings, ".data.percpu");
++}
++
++static void percpu_modcopy(void *pcpudest, const void *from, unsigned long size)
++{
++	int cpu;
++
++	for_each_possible_cpu(cpu)
++		memcpy(pcpudest + per_cpu_offset(cpu), from, size);
++}
++
+ #else /* ... !CONFIG_SMP */
++
+ static inline void *percpu_modalloc(unsigned long size, unsigned long align,
+ 				    const char *name)
+ {
+@@ -535,6 +568,7 @@ static inline void percpu_modcopy(void *
+ 	/* pcpusec should be 0, and size of that section should be 0. */
+ 	BUG_ON(size != 0);
+ }
++
+ #endif /* CONFIG_SMP */
+ 
+ #define MODINFO_ATTR(field)	\
+@@ -2288,8 +2322,8 @@ static noinline struct module *load_modu
+ 	ftrace_release(mod->module_core, mod->core_size);
+  free_unload:
+ 	module_unload_free(mod);
+- free_init:
+ #if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
++ free_init:
+ 	percpu_modfree(mod->refptr);
+ #endif
+ 	module_free(mod, mod->module_init);
+@@ -2737,7 +2771,7 @@ int is_module_address(unsigned long addr
+ 
+ 
+ /* Is this a valid kernel address? */
+-__notrace_funcgraph struct module *__module_text_address(unsigned long addr)
++struct module *__module_text_address(unsigned long addr)
+ {
+ 	struct module *mod;
+ 
+Index: linux-2.6-tip/kernel/mutex-debug.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/mutex-debug.c
++++ linux-2.6-tip/kernel/mutex-debug.c
+@@ -26,11 +26,6 @@
+ /*
+  * Must be called with lock->wait_lock held.
+  */
+-void debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner)
+-{
+-	lock->owner = new_owner;
+-}
+-
+ void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter)
+ {
+ 	memset(waiter, MUTEX_DEBUG_INIT, sizeof(*waiter));
+@@ -59,7 +54,6 @@ void debug_mutex_add_waiter(struct mutex
+ 
+ 	/* Mark the current thread as blocked on the lock: */
+ 	ti->task->blocked_on = waiter;
+-	waiter->lock = lock;
+ }
+ 
+ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
+@@ -82,7 +76,7 @@ void debug_mutex_unlock(struct mutex *lo
+ 	DEBUG_LOCKS_WARN_ON(lock->magic != lock);
+ 	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
+ 	DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next);
+-	DEBUG_LOCKS_WARN_ON(lock->owner != current_thread_info());
++	mutex_clear_owner(lock);
+ }
+ 
+ void debug_mutex_init(struct mutex *lock, const char *name,
+@@ -95,7 +89,6 @@ void debug_mutex_init(struct mutex *lock
+ 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ 	lockdep_init_map(&lock->dep_map, name, key, 0);
+ #endif
+-	lock->owner = NULL;
+ 	lock->magic = lock;
+ }
+ 
+Index: linux-2.6-tip/kernel/mutex-debug.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/mutex-debug.h
++++ linux-2.6-tip/kernel/mutex-debug.h
+@@ -13,14 +13,6 @@
+ /*
+  * This must be called with lock->wait_lock held.
+  */
+-extern void
+-debug_mutex_set_owner(struct mutex *lock, struct thread_info *new_owner);
+-
+-static inline void debug_mutex_clear_owner(struct mutex *lock)
+-{
+-	lock->owner = NULL;
+-}
+-
+ extern void debug_mutex_lock_common(struct mutex *lock,
+ 				    struct mutex_waiter *waiter);
+ extern void debug_mutex_wake_waiter(struct mutex *lock,
+@@ -35,6 +27,16 @@ extern void debug_mutex_unlock(struct mu
+ extern void debug_mutex_init(struct mutex *lock, const char *name,
+ 			     struct lock_class_key *key);
+ 
++static inline void mutex_set_owner(struct mutex *lock)
++{
++	lock->owner = current_thread_info();
++}
++
++static inline void mutex_clear_owner(struct mutex *lock)
++{
++	lock->owner = NULL;
++}
++
+ #define spin_lock_mutex(lock, flags)			\
+ 	do {						\
+ 		struct mutex *l = container_of(lock, struct mutex, wait_lock); \
+Index: linux-2.6-tip/kernel/mutex.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/mutex.c
++++ linux-2.6-tip/kernel/mutex.c
+@@ -10,6 +10,11 @@
+  * Many thanks to Arjan van de Ven, Thomas Gleixner, Steven Rostedt and
+  * David Howells for suggestions and improvements.
+  *
++ *  - Adaptive spinning for mutexes by Peter Zijlstra. (Ported to mainline
++ *    from the -rt tree, where it was originally implemented for rtmutexes
++ *    by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
++ *    and Sven Dietrich.
++ *
+  * Also see Documentation/mutex-design.txt.
+  */
+ #include <linux/mutex.h>
+@@ -46,6 +51,7 @@ __mutex_init(struct mutex *lock, const c
+ 	atomic_set(&lock->count, 1);
+ 	spin_lock_init(&lock->wait_lock);
+ 	INIT_LIST_HEAD(&lock->wait_list);
++	mutex_clear_owner(lock);
+ 
+ 	debug_mutex_init(lock, name, key);
+ }
+@@ -91,6 +97,7 @@ void inline __sched mutex_lock(struct mu
+ 	 * 'unlocked' into 'locked' state.
+ 	 */
+ 	__mutex_fastpath_lock(&lock->count, __mutex_lock_slowpath);
++	mutex_set_owner(lock);
+ }
+ 
+ EXPORT_SYMBOL(mutex_lock);
+@@ -115,6 +122,14 @@ void __sched mutex_unlock(struct mutex *
+ 	 * The unlocking fastpath is the 0->1 transition from 'locked'
+ 	 * into 'unlocked' state:
+ 	 */
++#ifndef CONFIG_DEBUG_MUTEXES
++	/*
++	 * When debugging is enabled we must not clear the owner before time,
++	 * the slow path will always be taken, and that clears the owner field
++	 * after verifying that it was indeed current.
++	 */
++	mutex_clear_owner(lock);
++#endif
+ 	__mutex_fastpath_unlock(&lock->count, __mutex_unlock_slowpath);
+ }
+ 
+@@ -129,21 +144,75 @@ __mutex_lock_common(struct mutex *lock, 
+ {
+ 	struct task_struct *task = current;
+ 	struct mutex_waiter waiter;
+-	unsigned int old_val;
+ 	unsigned long flags;
+ 
++	preempt_disable();
++	mutex_acquire(&lock->dep_map, subclass, 0, ip);
++#if defined(CONFIG_SMP) && !defined(CONFIG_DEBUG_MUTEXES)
++	/*
++	 * Optimistic spinning.
++	 *
++	 * We try to spin for acquisition when we find that there are no
++	 * pending waiters and the lock owner is currently running on a
++	 * (different) CPU.
++	 *
++	 * The rationale is that if the lock owner is running, it is likely to
++	 * release the lock soon.
++	 *
++	 * Since this needs the lock owner, and this mutex implementation
++	 * doesn't track the owner atomically in the lock field, we need to
++	 * track it non-atomically.
++	 *
++	 * We can't do this for DEBUG_MUTEXES because that relies on wait_lock
++	 * to serialize everything.
++	 */
++
++	for (;;) {
++		struct thread_info *owner;
++
++		/*
++		 * If there's an owner, wait for it to either
++		 * release the lock or go to sleep.
++		 */
++		owner = ACCESS_ONCE(lock->owner);
++		if (owner && !mutex_spin_on_owner(lock, owner))
++			break;
++
++		if (atomic_cmpxchg(&lock->count, 1, 0) == 1) {
++			lock_acquired(&lock->dep_map, ip);
++			mutex_set_owner(lock);
++			preempt_enable();
++			return 0;
++		}
++
++		/*
++		 * When there's no owner, we might have preempted between the
++		 * owner acquiring the lock and setting the owner field. If
++		 * we're an RT task that will live-lock because we won't let
++		 * the owner complete.
++		 */
++		if (!owner && (need_resched() || rt_task(task)))
++			break;
++
++		/*
++		 * The cpu_relax() call is a compiler barrier which forces
++		 * everything in this loop to be re-loaded. We don't need
++		 * memory barriers as we'll eventually observe the right
++		 * values at the cost of a few extra spins.
++		 */
++		cpu_relax();
++	}
++#endif
+ 	spin_lock_mutex(&lock->wait_lock, flags);
+ 
+ 	debug_mutex_lock_common(lock, &waiter);
+-	mutex_acquire(&lock->dep_map, subclass, 0, ip);
+ 	debug_mutex_add_waiter(lock, &waiter, task_thread_info(task));
+ 
+ 	/* add waiting tasks to the end of the waitqueue (FIFO): */
+ 	list_add_tail(&waiter.list, &lock->wait_list);
+ 	waiter.task = task;
+ 
+-	old_val = atomic_xchg(&lock->count, -1);
+-	if (old_val == 1)
++	if (atomic_xchg(&lock->count, -1) == 1)
+ 		goto done;
+ 
+ 	lock_contended(&lock->dep_map, ip);
+@@ -158,8 +227,7 @@ __mutex_lock_common(struct mutex *lock, 
+ 		 * that when we release the lock, we properly wake up the
+ 		 * other waiters:
+ 		 */
+-		old_val = atomic_xchg(&lock->count, -1);
+-		if (old_val == 1)
++		if (atomic_xchg(&lock->count, -1) == 1)
+ 			break;
+ 
+ 		/*
+@@ -173,21 +241,28 @@ __mutex_lock_common(struct mutex *lock, 
+ 			spin_unlock_mutex(&lock->wait_lock, flags);
+ 
+ 			debug_mutex_free_waiter(&waiter);
++			preempt_enable();
+ 			return -EINTR;
+ 		}
+ 		__set_task_state(task, state);
+ 
+ 		/* didnt get the lock, go to sleep: */
+ 		spin_unlock_mutex(&lock->wait_lock, flags);
+-		schedule();
++
++		local_irq_disable();
++		__preempt_enable_no_resched();
++		__schedule();
++		preempt_disable();
++		local_irq_enable();
++
+ 		spin_lock_mutex(&lock->wait_lock, flags);
+ 	}
+ 
+ done:
+ 	lock_acquired(&lock->dep_map, ip);
+ 	/* got the lock - rejoice! */
+-	mutex_remove_waiter(lock, &waiter, task_thread_info(task));
+-	debug_mutex_set_owner(lock, task_thread_info(task));
++	mutex_remove_waiter(lock, &waiter, current_thread_info());
++	mutex_set_owner(lock);
+ 
+ 	/* set it to 0 if there are no waiters left: */
+ 	if (likely(list_empty(&lock->wait_list)))
+@@ -196,6 +271,7 @@ done:
+ 	spin_unlock_mutex(&lock->wait_lock, flags);
+ 
+ 	debug_mutex_free_waiter(&waiter);
++	preempt_enable();
+ 
+ 	return 0;
+ }
+@@ -222,7 +298,8 @@ int __sched
+ mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass)
+ {
+ 	might_sleep();
+-	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE, subclass, _RET_IP_);
++	return __mutex_lock_common(lock, TASK_INTERRUPTIBLE,
++				   subclass, _RET_IP_);
+ }
+ 
+ EXPORT_SYMBOL_GPL(mutex_lock_interruptible_nested);
+@@ -260,8 +337,6 @@ __mutex_unlock_common_slowpath(atomic_t 
+ 		wake_up_process(waiter->task);
+ 	}
+ 
+-	debug_mutex_clear_owner(lock);
+-
+ 	spin_unlock_mutex(&lock->wait_lock, flags);
+ }
+ 
+@@ -298,18 +373,30 @@ __mutex_lock_interruptible_slowpath(atom
+  */
+ int __sched mutex_lock_interruptible(struct mutex *lock)
+ {
++	int ret;
++
+ 	might_sleep();
+-	return __mutex_fastpath_lock_retval
++	ret =  __mutex_fastpath_lock_retval
+ 			(&lock->count, __mutex_lock_interruptible_slowpath);
++	if (!ret)
++		mutex_set_owner(lock);
++
++	return ret;
+ }
+ 
+ EXPORT_SYMBOL(mutex_lock_interruptible);
+ 
+ int __sched mutex_lock_killable(struct mutex *lock)
+ {
++	int ret;
++
+ 	might_sleep();
+-	return __mutex_fastpath_lock_retval
++	ret = __mutex_fastpath_lock_retval
+ 			(&lock->count, __mutex_lock_killable_slowpath);
++	if (!ret)
++		mutex_set_owner(lock);
++
++	return ret;
+ }
+ EXPORT_SYMBOL(mutex_lock_killable);
+ 
+@@ -352,9 +439,10 @@ static inline int __mutex_trylock_slowpa
+ 
+ 	prev = atomic_xchg(&lock->count, -1);
+ 	if (likely(prev == 1)) {
+-		debug_mutex_set_owner(lock, current_thread_info());
++		mutex_set_owner(lock);
+ 		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
+ 	}
++
+ 	/* Set it back to 0 if there are no waiters: */
+ 	if (likely(list_empty(&lock->wait_list)))
+ 		atomic_set(&lock->count, 0);
+@@ -380,8 +468,13 @@ static inline int __mutex_trylock_slowpa
+  */
+ int __sched mutex_trylock(struct mutex *lock)
+ {
+-	return __mutex_fastpath_trylock(&lock->count,
+-					__mutex_trylock_slowpath);
++	int ret;
++
++	ret = __mutex_fastpath_trylock(&lock->count, __mutex_trylock_slowpath);
++	if (ret)
++		mutex_set_owner(lock);
++
++	return ret;
+ }
+ 
+ EXPORT_SYMBOL(mutex_trylock);
+Index: linux-2.6-tip/kernel/mutex.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/mutex.h
++++ linux-2.6-tip/kernel/mutex.h
+@@ -16,8 +16,26 @@
+ #define mutex_remove_waiter(lock, waiter, ti) \
+ 		__list_del((waiter)->list.prev, (waiter)->list.next)
+ 
+-#define debug_mutex_set_owner(lock, new_owner)		do { } while (0)
+-#define debug_mutex_clear_owner(lock)			do { } while (0)
++#ifdef CONFIG_SMP
++static inline void mutex_set_owner(struct mutex *lock)
++{
++	lock->owner = current_thread_info();
++}
++
++static inline void mutex_clear_owner(struct mutex *lock)
++{
++	lock->owner = NULL;
++}
++#else
++static inline void mutex_set_owner(struct mutex *lock)
++{
++}
++
++static inline void mutex_clear_owner(struct mutex *lock)
++{
++}
++#endif
++
+ #define debug_mutex_wake_waiter(lock, waiter)		do { } while (0)
+ #define debug_mutex_free_waiter(waiter)			do { } while (0)
+ #define debug_mutex_add_waiter(lock, waiter, ti)	do { } while (0)
+Index: linux-2.6-tip/kernel/panic.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/panic.c
++++ linux-2.6-tip/kernel/panic.c
+@@ -8,19 +8,19 @@
+  * This function is used through-out the kernel (including mm and fs)
+  * to indicate a major problem.
+  */
++#include <linux/debug_locks.h>
++#include <linux/interrupt.h>
++#include <linux/kallsyms.h>
++#include <linux/notifier.h>
+ #include <linux/module.h>
+-#include <linux/sched.h>
+-#include <linux/delay.h>
++#include <linux/random.h>
+ #include <linux/reboot.h>
+-#include <linux/notifier.h>
+-#include <linux/init.h>
++#include <linux/delay.h>
++#include <linux/kexec.h>
++#include <linux/sched.h>
+ #include <linux/sysrq.h>
+-#include <linux/interrupt.h>
++#include <linux/init.h>
+ #include <linux/nmi.h>
+-#include <linux/kexec.h>
+-#include <linux/debug_locks.h>
+-#include <linux/random.h>
+-#include <linux/kallsyms.h>
+ #include <linux/dmi.h>
+ 
+ int panic_on_oops;
+@@ -52,19 +52,15 @@ EXPORT_SYMBOL(panic_blink);
+  *
+  *	This function never returns.
+  */
+-
+ NORET_TYPE void panic(const char * fmt, ...)
+ {
+-	long i;
+ 	static char buf[1024];
+ 	va_list args;
+-#if defined(CONFIG_S390)
+-	unsigned long caller = (unsigned long) __builtin_return_address(0);
+-#endif
++	long i;
+ 
+ 	/*
+-	 * It's possible to come here directly from a panic-assertion and not
+-	 * have preempt disabled. Some functions called from here want
++	 * It's possible to come here directly from a panic-assertion and
++	 * not have preempt disabled. Some functions called from here want
+ 	 * preempt to be disabled. No point enabling it later though...
+ 	 */
+ 	preempt_disable();
+@@ -74,7 +70,9 @@ NORET_TYPE void panic(const char * fmt, 
+ 	vsnprintf(buf, sizeof(buf), fmt, args);
+ 	va_end(args);
+ 	printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
+-	bust_spinlocks(0);
++#ifdef CONFIG_DEBUG_BUGVERBOSE
++	dump_stack();
++#endif
+ 
+ 	/*
+ 	 * If we have crashed and we have a crash kernel loaded let it handle
+@@ -83,14 +81,12 @@ NORET_TYPE void panic(const char * fmt, 
+ 	 */
+ 	crash_kexec(NULL);
+ 
+-#ifdef CONFIG_SMP
+ 	/*
+ 	 * Note smp_send_stop is the usual smp shutdown function, which
+ 	 * unfortunately means it may not be hardened to work in a panic
+ 	 * situation.
+ 	 */
+ 	smp_send_stop();
+-#endif
+ 
+ 	atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
+ 
+@@ -99,19 +95,21 @@ NORET_TYPE void panic(const char * fmt, 
+ 
+ 	if (panic_timeout > 0) {
+ 		/*
+-	 	 * Delay timeout seconds before rebooting the machine. 
+-		 * We can't use the "normal" timers since we just panicked..
+-	 	 */
+-		printk(KERN_EMERG "Rebooting in %d seconds..",panic_timeout);
++		 * Delay timeout seconds before rebooting the machine.
++		 * We can't use the "normal" timers since we just panicked.
++		 */
++		printk(KERN_EMERG "Rebooting in %d seconds..", panic_timeout);
++
+ 		for (i = 0; i < panic_timeout*1000; ) {
+ 			touch_nmi_watchdog();
+ 			i += panic_blink(i);
+ 			mdelay(1);
+ 			i++;
+ 		}
+-		/*	This will not be a clean reboot, with everything
+-		 *	shutting down.  But if there is a chance of
+-		 *	rebooting the system it will be rebooted.
++		/*
++		 * This will not be a clean reboot, with everything
++		 * shutting down.  But if there is a chance of
++		 * rebooting the system it will be rebooted.
+ 		 */
+ 		emergency_restart();
+ 	}
+@@ -124,38 +122,44 @@ NORET_TYPE void panic(const char * fmt, 
+ 	}
+ #endif
+ #if defined(CONFIG_S390)
+-	disabled_wait(caller);
++	{
++		unsigned long caller;
++
++		caller = (unsigned long)__builtin_return_address(0);
++		disabled_wait(caller);
++	}
+ #endif
+ 	local_irq_enable();
+-	for (i = 0;;) {
++	for (i = 0; ; ) {
+ 		touch_softlockup_watchdog();
+ 		i += panic_blink(i);
+ 		mdelay(1);
+ 		i++;
+ 	}
++	bust_spinlocks(0);
+ }
+ 
+ EXPORT_SYMBOL(panic);
+ 
+ 
+ struct tnt {
+-	u8 bit;
+-	char true;
+-	char false;
++	u8	bit;
++	char	true;
++	char	false;
+ };
+ 
+ static const struct tnt tnts[] = {
+-	{ TAINT_PROPRIETARY_MODULE, 'P', 'G' },
+-	{ TAINT_FORCED_MODULE, 'F', ' ' },
+-	{ TAINT_UNSAFE_SMP, 'S', ' ' },
+-	{ TAINT_FORCED_RMMOD, 'R', ' ' },
+-	{ TAINT_MACHINE_CHECK, 'M', ' ' },
+-	{ TAINT_BAD_PAGE, 'B', ' ' },
+-	{ TAINT_USER, 'U', ' ' },
+-	{ TAINT_DIE, 'D', ' ' },
+-	{ TAINT_OVERRIDDEN_ACPI_TABLE, 'A', ' ' },
+-	{ TAINT_WARN, 'W', ' ' },
+-	{ TAINT_CRAP, 'C', ' ' },
++	{ TAINT_PROPRIETARY_MODULE,	'P', 'G' },
++	{ TAINT_FORCED_MODULE,		'F', ' ' },
++	{ TAINT_UNSAFE_SMP,		'S', ' ' },
++	{ TAINT_FORCED_RMMOD,		'R', ' ' },
++	{ TAINT_MACHINE_CHECK,		'M', ' ' },
++	{ TAINT_BAD_PAGE,		'B', ' ' },
++	{ TAINT_USER,			'U', ' ' },
++	{ TAINT_DIE,			'D', ' ' },
++	{ TAINT_OVERRIDDEN_ACPI_TABLE,	'A', ' ' },
++	{ TAINT_WARN,			'W', ' ' },
++	{ TAINT_CRAP,			'C', ' ' },
+ };
+ 
+ /**
+@@ -192,7 +196,8 @@ const char *print_tainted(void)
+ 		*s = 0;
+ 	} else
+ 		snprintf(buf, sizeof(buf), "Not tainted");
+-	return(buf);
++
++	return buf;
+ }
+ 
+ int test_taint(unsigned flag)
+@@ -208,7 +213,8 @@ unsigned long get_taint(void)
+ 
+ void add_taint(unsigned flag)
+ {
+-	debug_locks = 0; /* can't trust the integrity of the kernel anymore */
++	/* can't trust the integrity of the kernel anymore: */
++	debug_locks = 0;
+ 	set_bit(flag, &tainted_mask);
+ }
+ EXPORT_SYMBOL(add_taint);
+@@ -263,8 +269,8 @@ static void do_oops_enter_exit(void)
+ }
+ 
+ /*
+- * Return true if the calling CPU is allowed to print oops-related info.  This
+- * is a bit racy..
++ * Return true if the calling CPU is allowed to print oops-related info.
++ * This is a bit racy..
+  */
+ int oops_may_print(void)
+ {
+@@ -273,20 +279,23 @@ int oops_may_print(void)
+ 
+ /*
+  * Called when the architecture enters its oops handler, before it prints
+- * anything.  If this is the first CPU to oops, and it's oopsing the first time
+- * then let it proceed.
++ * anything.  If this is the first CPU to oops, and it's oopsing the first
++ * time then let it proceed.
+  *
+- * This is all enabled by the pause_on_oops kernel boot option.  We do all this
+- * to ensure that oopses don't scroll off the screen.  It has the side-effect
+- * of preventing later-oopsing CPUs from mucking up the display, too.
++ * This is all enabled by the pause_on_oops kernel boot option.  We do all
++ * this to ensure that oopses don't scroll off the screen.  It has the
++ * side-effect of preventing later-oopsing CPUs from mucking up the display,
++ * too.
+  *
+- * It turns out that the CPU which is allowed to print ends up pausing for the
+- * right duration, whereas all the other CPUs pause for twice as long: once in
+- * oops_enter(), once in oops_exit().
++ * It turns out that the CPU which is allowed to print ends up pausing for
++ * the right duration, whereas all the other CPUs pause for twice as long:
++ * once in oops_enter(), once in oops_exit().
+  */
+ void oops_enter(void)
+ {
+-	debug_locks_off(); /* can't trust the integrity of the kernel anymore */
++	tracing_off();
++	/* can't trust the integrity of the kernel anymore: */
++	debug_locks_off();
+ 	do_oops_enter_exit();
+ }
+ 
+@@ -355,15 +364,18 @@ EXPORT_SYMBOL(warn_slowpath);
+ #endif
+ 
+ #ifdef CONFIG_CC_STACKPROTECTOR
++
+ /*
+  * Called when gcc's -fstack-protector feature is used, and
+  * gcc detects corruption of the on-stack canary value
+  */
+ void __stack_chk_fail(void)
+ {
+-	panic("stack-protector: Kernel stack is corrupted");
++	panic("stack-protector: Kernel stack is corrupted in: %p\n",
++		__builtin_return_address(0));
+ }
+ EXPORT_SYMBOL(__stack_chk_fail);
++
+ #endif
+ 
+ core_param(panic, panic_timeout, int, 0644);
+Index: linux-2.6-tip/kernel/perf_counter.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/perf_counter.c
+@@ -0,0 +1,2787 @@
++/*
++ * Performance counter core code
++ *
++ *  Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
++ *  Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
++ *
++ *
++ *  For licensing details see kernel-base/COPYING
++ */
++
++#include <linux/fs.h>
++#include <linux/mm.h>
++#include <linux/cpu.h>
++#include <linux/smp.h>
++#include <linux/file.h>
++#include <linux/poll.h>
++#include <linux/sysfs.h>
++#include <linux/ptrace.h>
++#include <linux/percpu.h>
++#include <linux/vmstat.h>
++#include <linux/hardirq.h>
++#include <linux/rculist.h>
++#include <linux/uaccess.h>
++#include <linux/syscalls.h>
++#include <linux/anon_inodes.h>
++#include <linux/kernel_stat.h>
++#include <linux/perf_counter.h>
++
++#include <asm/irq_regs.h>
++
++/*
++ * Each CPU has a list of per CPU counters:
++ */
++DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
++
++int perf_max_counters __read_mostly = 1;
++static int perf_reserved_percpu __read_mostly;
++static int perf_overcommit __read_mostly = 1;
++
++/*
++ * Mutex for (sysadmin-configurable) counter reservations:
++ */
++static DEFINE_MUTEX(perf_resource_mutex);
++
++/*
++ * Architecture provided APIs - weak aliases:
++ */
++extern __weak const struct hw_perf_counter_ops *
++hw_perf_counter_init(struct perf_counter *counter)
++{
++	return NULL;
++}
++
++u64 __weak hw_perf_save_disable(void)		{ return 0; }
++void __weak hw_perf_restore(u64 ctrl)		{ barrier(); }
++void __weak hw_perf_counter_setup(int cpu)	{ barrier(); }
++int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
++	       struct perf_cpu_context *cpuctx,
++	       struct perf_counter_context *ctx, int cpu)
++{
++	return 0;
++}
++
++void __weak perf_counter_print_debug(void)	{ }
++
++static void
++list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
++{
++	struct perf_counter *group_leader = counter->group_leader;
++
++	/*
++	 * Depending on whether it is a standalone or sibling counter,
++	 * add it straight to the context's counter list, or to the group
++	 * leader's sibling list:
++	 */
++	if (counter->group_leader == counter)
++		list_add_tail(&counter->list_entry, &ctx->counter_list);
++	else {
++		list_add_tail(&counter->list_entry, &group_leader->sibling_list);
++		group_leader->nr_siblings++;
++	}
++
++	list_add_rcu(&counter->event_entry, &ctx->event_list);
++}
++
++static void
++list_del_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
++{
++	struct perf_counter *sibling, *tmp;
++
++	list_del_init(&counter->list_entry);
++	list_del_rcu(&counter->event_entry);
++
++	if (counter->group_leader != counter)
++		counter->group_leader->nr_siblings--;
++
++	/*
++	 * If this was a group counter with sibling counters then
++	 * upgrade the siblings to singleton counters by adding them
++	 * to the context list directly:
++	 */
++	list_for_each_entry_safe(sibling, tmp,
++				 &counter->sibling_list, list_entry) {
++
++		list_move_tail(&sibling->list_entry, &ctx->counter_list);
++		sibling->group_leader = sibling;
++	}
++}
++
++static void
++counter_sched_out(struct perf_counter *counter,
++		  struct perf_cpu_context *cpuctx,
++		  struct perf_counter_context *ctx)
++{
++	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
++		return;
++
++	counter->state = PERF_COUNTER_STATE_INACTIVE;
++	counter->tstamp_stopped = ctx->time_now;
++	counter->hw_ops->disable(counter);
++	counter->oncpu = -1;
++
++	if (!is_software_counter(counter))
++		cpuctx->active_oncpu--;
++	ctx->nr_active--;
++	if (counter->hw_event.exclusive || !cpuctx->active_oncpu)
++		cpuctx->exclusive = 0;
++}
++
++static void
++group_sched_out(struct perf_counter *group_counter,
++		struct perf_cpu_context *cpuctx,
++		struct perf_counter_context *ctx)
++{
++	struct perf_counter *counter;
++
++	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
++		return;
++
++	counter_sched_out(group_counter, cpuctx, ctx);
++
++	/*
++	 * Schedule out siblings (if any):
++	 */
++	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
++		counter_sched_out(counter, cpuctx, ctx);
++
++	if (group_counter->hw_event.exclusive)
++		cpuctx->exclusive = 0;
++}
++
++/*
++ * Cross CPU call to remove a performance counter
++ *
++ * We disable the counter on the hardware level first. After that we
++ * remove it from the context list.
++ */
++static void __perf_counter_remove_from_context(void *info)
++{
++	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
++	struct perf_counter *counter = info;
++	struct perf_counter_context *ctx = counter->ctx;
++	unsigned long flags;
++	u64 perf_flags;
++
++	/*
++	 * If this is a task context, we need to check whether it is
++	 * the current task context of this cpu. If not it has been
++	 * scheduled out before the smp call arrived.
++	 */
++	if (ctx->task && cpuctx->task_ctx != ctx)
++		return;
++
++	curr_rq_lock_irq_save(&flags);
++	spin_lock(&ctx->lock);
++
++	counter_sched_out(counter, cpuctx, ctx);
++
++	counter->task = NULL;
++	ctx->nr_counters--;
++
++	/*
++	 * Protect the list operation against NMI by disabling the
++	 * counters on a global level. NOP for non NMI based counters.
++	 */
++	perf_flags = hw_perf_save_disable();
++	list_del_counter(counter, ctx);
++	hw_perf_restore(perf_flags);
++
++	if (!ctx->task) {
++		/*
++		 * Allow more per task counters with respect to the
++		 * reservation:
++		 */
++		cpuctx->max_pertask =
++			min(perf_max_counters - ctx->nr_counters,
++			    perf_max_counters - perf_reserved_percpu);
++	}
++
++	spin_unlock(&ctx->lock);
++	curr_rq_unlock_irq_restore(&flags);
++}
++
++
++/*
++ * Remove the counter from a task's (or a CPU's) list of counters.
++ *
++ * Must be called with counter->mutex and ctx->mutex held.
++ *
++ * CPU counters are removed with a smp call. For task counters we only
++ * call when the task is on a CPU.
++ */
++static void perf_counter_remove_from_context(struct perf_counter *counter)
++{
++	struct perf_counter_context *ctx = counter->ctx;
++	struct task_struct *task = ctx->task;
++
++	if (!task) {
++		/*
++		 * Per cpu counters are removed via an smp call and
++		 * the removal is always sucessful.
++		 */
++		smp_call_function_single(counter->cpu,
++					 __perf_counter_remove_from_context,
++					 counter, 1);
++		return;
++	}
++
++retry:
++	task_oncpu_function_call(task, __perf_counter_remove_from_context,
++				 counter);
++
++	spin_lock_irq(&ctx->lock);
++	/*
++	 * If the context is active we need to retry the smp call.
++	 */
++	if (ctx->nr_active && !list_empty(&counter->list_entry)) {
++		spin_unlock_irq(&ctx->lock);
++		goto retry;
++	}
++
++	/*
++	 * The lock prevents that this context is scheduled in so we
++	 * can remove the counter safely, if the call above did not
++	 * succeed.
++	 */
++	if (!list_empty(&counter->list_entry)) {
++		ctx->nr_counters--;
++		list_del_counter(counter, ctx);
++		counter->task = NULL;
++	}
++	spin_unlock_irq(&ctx->lock);
++}
++
++/*
++ * Get the current time for this context.
++ * If this is a task context, we use the task's task clock,
++ * or for a per-cpu context, we use the cpu clock.
++ */
++static u64 get_context_time(struct perf_counter_context *ctx, int update)
++{
++	struct task_struct *curr = ctx->task;
++
++	if (!curr)
++		return cpu_clock(smp_processor_id());
++
++	return __task_delta_exec(curr, update) + curr->se.sum_exec_runtime;
++}
++
++/*
++ * Update the record of the current time in a context.
++ */
++static void update_context_time(struct perf_counter_context *ctx, int update)
++{
++	ctx->time_now = get_context_time(ctx, update) - ctx->time_lost;
++}
++
++/*
++ * Update the total_time_enabled and total_time_running fields for a counter.
++ */
++static void update_counter_times(struct perf_counter *counter)
++{
++	struct perf_counter_context *ctx = counter->ctx;
++	u64 run_end;
++
++	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
++		counter->total_time_enabled = ctx->time_now -
++			counter->tstamp_enabled;
++		if (counter->state == PERF_COUNTER_STATE_INACTIVE)
++			run_end = counter->tstamp_stopped;
++		else
++			run_end = ctx->time_now;
++		counter->total_time_running = run_end - counter->tstamp_running;
++	}
++}
++
++/*
++ * Update total_time_enabled and total_time_running for all counters in a group.
++ */
++static void update_group_times(struct perf_counter *leader)
++{
++	struct perf_counter *counter;
++
++	update_counter_times(leader);
++	list_for_each_entry(counter, &leader->sibling_list, list_entry)
++		update_counter_times(counter);
++}
++
++/*
++ * Cross CPU call to disable a performance counter
++ */
++static void __perf_counter_disable(void *info)
++{
++	struct perf_counter *counter = info;
++	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
++	struct perf_counter_context *ctx = counter->ctx;
++	unsigned long flags;
++
++	/*
++	 * If this is a per-task counter, need to check whether this
++	 * counter's task is the current task on this cpu.
++	 */
++	if (ctx->task && cpuctx->task_ctx != ctx)
++		return;
++
++	curr_rq_lock_irq_save(&flags);
++	spin_lock(&ctx->lock);
++
++	/*
++	 * If the counter is on, turn it off.
++	 * If it is in error state, leave it in error state.
++	 */
++	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
++		update_context_time(ctx, 1);
++		update_counter_times(counter);
++		if (counter == counter->group_leader)
++			group_sched_out(counter, cpuctx, ctx);
++		else
++			counter_sched_out(counter, cpuctx, ctx);
++		counter->state = PERF_COUNTER_STATE_OFF;
++	}
++
++	spin_unlock(&ctx->lock);
++	curr_rq_unlock_irq_restore(&flags);
++}
++
++/*
++ * Disable a counter.
++ */
++static void perf_counter_disable(struct perf_counter *counter)
++{
++	struct perf_counter_context *ctx = counter->ctx;
++	struct task_struct *task = ctx->task;
++
++	if (!task) {
++		/*
++		 * Disable the counter on the cpu that it's on
++		 */
++		smp_call_function_single(counter->cpu, __perf_counter_disable,
++					 counter, 1);
++		return;
++	}
++
++ retry:
++	task_oncpu_function_call(task, __perf_counter_disable, counter);
++
++	spin_lock_irq(&ctx->lock);
++	/*
++	 * If the counter is still active, we need to retry the cross-call.
++	 */
++	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
++		spin_unlock_irq(&ctx->lock);
++		goto retry;
++	}
++
++	/*
++	 * Since we have the lock this context can't be scheduled
++	 * in, so we can change the state safely.
++	 */
++	if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
++		update_counter_times(counter);
++		counter->state = PERF_COUNTER_STATE_OFF;
++	}
++
++	spin_unlock_irq(&ctx->lock);
++}
++
++/*
++ * Disable a counter and all its children.
++ */
++static void perf_counter_disable_family(struct perf_counter *counter)
++{
++	struct perf_counter *child;
++
++	perf_counter_disable(counter);
++
++	/*
++	 * Lock the mutex to protect the list of children
++	 */
++	mutex_lock(&counter->mutex);
++	list_for_each_entry(child, &counter->child_list, child_list)
++		perf_counter_disable(child);
++	mutex_unlock(&counter->mutex);
++}
++
++static int
++counter_sched_in(struct perf_counter *counter,
++		 struct perf_cpu_context *cpuctx,
++		 struct perf_counter_context *ctx,
++		 int cpu)
++{
++	if (counter->state <= PERF_COUNTER_STATE_OFF)
++		return 0;
++
++	counter->state = PERF_COUNTER_STATE_ACTIVE;
++	counter->oncpu = cpu;	/* TODO: put 'cpu' into cpuctx->cpu */
++	/*
++	 * The new state must be visible before we turn it on in the hardware:
++	 */
++	smp_wmb();
++
++	if (counter->hw_ops->enable(counter)) {
++		counter->state = PERF_COUNTER_STATE_INACTIVE;
++		counter->oncpu = -1;
++		return -EAGAIN;
++	}
++
++	counter->tstamp_running += ctx->time_now - counter->tstamp_stopped;
++
++	if (!is_software_counter(counter))
++		cpuctx->active_oncpu++;
++	ctx->nr_active++;
++
++	if (counter->hw_event.exclusive)
++		cpuctx->exclusive = 1;
++
++	return 0;
++}
++
++/*
++ * Return 1 for a group consisting entirely of software counters,
++ * 0 if the group contains any hardware counters.
++ */
++static int is_software_only_group(struct perf_counter *leader)
++{
++	struct perf_counter *counter;
++
++	if (!is_software_counter(leader))
++		return 0;
++
++	list_for_each_entry(counter, &leader->sibling_list, list_entry)
++		if (!is_software_counter(counter))
++			return 0;
++
++	return 1;
++}
++
++/*
++ * Work out whether we can put this counter group on the CPU now.
++ */
++static int group_can_go_on(struct perf_counter *counter,
++			   struct perf_cpu_context *cpuctx,
++			   int can_add_hw)
++{
++	/*
++	 * Groups consisting entirely of software counters can always go on.
++	 */
++	if (is_software_only_group(counter))
++		return 1;
++	/*
++	 * If an exclusive group is already on, no other hardware
++	 * counters can go on.
++	 */
++	if (cpuctx->exclusive)
++		return 0;
++	/*
++	 * If this group is exclusive and there are already
++	 * counters on the CPU, it can't go on.
++	 */
++	if (counter->hw_event.exclusive && cpuctx->active_oncpu)
++		return 0;
++	/*
++	 * Otherwise, try to add it if all previous groups were able
++	 * to go on.
++	 */
++	return can_add_hw;
++}
++
++static void add_counter_to_ctx(struct perf_counter *counter,
++			       struct perf_counter_context *ctx)
++{
++	list_add_counter(counter, ctx);
++	ctx->nr_counters++;
++	counter->prev_state = PERF_COUNTER_STATE_OFF;
++	counter->tstamp_enabled = ctx->time_now;
++	counter->tstamp_running = ctx->time_now;
++	counter->tstamp_stopped = ctx->time_now;
++}
++
++/*
++ * Cross CPU call to install and enable a performance counter
++ */
++static void __perf_install_in_context(void *info)
++{
++	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
++	struct perf_counter *counter = info;
++	struct perf_counter_context *ctx = counter->ctx;
++	struct perf_counter *leader = counter->group_leader;
++	int cpu = smp_processor_id();
++	unsigned long flags;
++	u64 perf_flags;
++	int err;
++
++	/*
++	 * If this is a task context, we need to check whether it is
++	 * the current task context of this cpu. If not it has been
++	 * scheduled out before the smp call arrived.
++	 */
++	if (ctx->task && cpuctx->task_ctx != ctx)
++		return;
++
++	curr_rq_lock_irq_save(&flags);
++	spin_lock(&ctx->lock);
++	update_context_time(ctx, 1);
++
++	/*
++	 * Protect the list operation against NMI by disabling the
++	 * counters on a global level. NOP for non NMI based counters.
++	 */
++	perf_flags = hw_perf_save_disable();
++
++	add_counter_to_ctx(counter, ctx);
++
++	/*
++	 * Don't put the counter on if it is disabled or if
++	 * it is in a group and the group isn't on.
++	 */
++	if (counter->state != PERF_COUNTER_STATE_INACTIVE ||
++	    (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
++		goto unlock;
++
++	/*
++	 * An exclusive counter can't go on if there are already active
++	 * hardware counters, and no hardware counter can go on if there
++	 * is already an exclusive counter on.
++	 */
++	if (!group_can_go_on(counter, cpuctx, 1))
++		err = -EEXIST;
++	else
++		err = counter_sched_in(counter, cpuctx, ctx, cpu);
++
++	if (err) {
++		/*
++		 * This counter couldn't go on.  If it is in a group
++		 * then we have to pull the whole group off.
++		 * If the counter group is pinned then put it in error state.
++		 */
++		if (leader != counter)
++			group_sched_out(leader, cpuctx, ctx);
++		if (leader->hw_event.pinned) {
++			update_group_times(leader);
++			leader->state = PERF_COUNTER_STATE_ERROR;
++		}
++	}
++
++	if (!err && !ctx->task && cpuctx->max_pertask)
++		cpuctx->max_pertask--;
++
++ unlock:
++	hw_perf_restore(perf_flags);
++
++	spin_unlock(&ctx->lock);
++	curr_rq_unlock_irq_restore(&flags);
++}
++
++/*
++ * Attach a performance counter to a context
++ *
++ * First we add the counter to the list with the hardware enable bit
++ * in counter->hw_config cleared.
++ *
++ * If the counter is attached to a task which is on a CPU we use a smp
++ * call to enable it in the task context. The task might have been
++ * scheduled away, but we check this in the smp call again.
++ *
++ * Must be called with ctx->mutex held.
++ */
++static void
++perf_install_in_context(struct perf_counter_context *ctx,
++			struct perf_counter *counter,
++			int cpu)
++{
++	struct task_struct *task = ctx->task;
++
++	if (!task) {
++		/*
++		 * Per cpu counters are installed via an smp call and
++		 * the install is always sucessful.
++		 */
++		smp_call_function_single(cpu, __perf_install_in_context,
++					 counter, 1);
++		return;
++	}
++
++	counter->task = task;
++retry:
++	task_oncpu_function_call(task, __perf_install_in_context,
++				 counter);
++
++	spin_lock_irq(&ctx->lock);
++	/*
++	 * we need to retry the smp call.
++	 */
++	if (ctx->is_active && list_empty(&counter->list_entry)) {
++		spin_unlock_irq(&ctx->lock);
++		goto retry;
++	}
++
++	/*
++	 * The lock prevents that this context is scheduled in so we
++	 * can add the counter safely, if it the call above did not
++	 * succeed.
++	 */
++	if (list_empty(&counter->list_entry))
++		add_counter_to_ctx(counter, ctx);
++	spin_unlock_irq(&ctx->lock);
++}
++
++/*
++ * Cross CPU call to enable a performance counter
++ */
++static void __perf_counter_enable(void *info)
++{
++	struct perf_counter *counter = info;
++	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
++	struct perf_counter_context *ctx = counter->ctx;
++	struct perf_counter *leader = counter->group_leader;
++	unsigned long flags;
++	int err;
++
++	/*
++	 * If this is a per-task counter, need to check whether this
++	 * counter's task is the current task on this cpu.
++	 */
++	if (ctx->task && cpuctx->task_ctx != ctx)
++		return;
++
++	curr_rq_lock_irq_save(&flags);
++	spin_lock(&ctx->lock);
++	update_context_time(ctx, 1);
++
++	counter->prev_state = counter->state;
++	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
++		goto unlock;
++	counter->state = PERF_COUNTER_STATE_INACTIVE;
++	counter->tstamp_enabled = ctx->time_now - counter->total_time_enabled;
++
++	/*
++	 * If the counter is in a group and isn't the group leader,
++	 * then don't put it on unless the group is on.
++	 */
++	if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
++		goto unlock;
++
++	if (!group_can_go_on(counter, cpuctx, 1))
++		err = -EEXIST;
++	else
++		err = counter_sched_in(counter, cpuctx, ctx,
++				       smp_processor_id());
++
++	if (err) {
++		/*
++		 * If this counter can't go on and it's part of a
++		 * group, then the whole group has to come off.
++		 */
++		if (leader != counter)
++			group_sched_out(leader, cpuctx, ctx);
++		if (leader->hw_event.pinned) {
++			update_group_times(leader);
++			leader->state = PERF_COUNTER_STATE_ERROR;
++		}
++	}
++
++ unlock:
++	spin_unlock(&ctx->lock);
++	curr_rq_unlock_irq_restore(&flags);
++}
++
++/*
++ * Enable a counter.
++ */
++static void perf_counter_enable(struct perf_counter *counter)
++{
++	struct perf_counter_context *ctx = counter->ctx;
++	struct task_struct *task = ctx->task;
++
++	if (!task) {
++		/*
++		 * Enable the counter on the cpu that it's on
++		 */
++		smp_call_function_single(counter->cpu, __perf_counter_enable,
++					 counter, 1);
++		return;
++	}
++
++	spin_lock_irq(&ctx->lock);
++	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
++		goto out;
++
++	/*
++	 * If the counter is in error state, clear that first.
++	 * That way, if we see the counter in error state below, we
++	 * know that it has gone back into error state, as distinct
++	 * from the task having been scheduled away before the
++	 * cross-call arrived.
++	 */
++	if (counter->state == PERF_COUNTER_STATE_ERROR)
++		counter->state = PERF_COUNTER_STATE_OFF;
++
++ retry:
++	spin_unlock_irq(&ctx->lock);
++	task_oncpu_function_call(task, __perf_counter_enable, counter);
++
++	spin_lock_irq(&ctx->lock);
++
++	/*
++	 * If the context is active and the counter is still off,
++	 * we need to retry the cross-call.
++	 */
++	if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
++		goto retry;
++
++	/*
++	 * Since we have the lock this context can't be scheduled
++	 * in, so we can change the state safely.
++	 */
++	if (counter->state == PERF_COUNTER_STATE_OFF) {
++		counter->state = PERF_COUNTER_STATE_INACTIVE;
++		counter->tstamp_enabled = ctx->time_now -
++			counter->total_time_enabled;
++	}
++ out:
++	spin_unlock_irq(&ctx->lock);
++}
++
++/*
++ * Enable a counter and all its children.
++ */
++static void perf_counter_enable_family(struct perf_counter *counter)
++{
++	struct perf_counter *child;
++
++	perf_counter_enable(counter);
++
++	/*
++	 * Lock the mutex to protect the list of children
++	 */
++	mutex_lock(&counter->mutex);
++	list_for_each_entry(child, &counter->child_list, child_list)
++		perf_counter_enable(child);
++	mutex_unlock(&counter->mutex);
++}
++
++void __perf_counter_sched_out(struct perf_counter_context *ctx,
++			      struct perf_cpu_context *cpuctx)
++{
++	struct perf_counter *counter;
++	u64 flags;
++
++	spin_lock(&ctx->lock);
++	ctx->is_active = 0;
++	if (likely(!ctx->nr_counters))
++		goto out;
++	update_context_time(ctx, 0);
++
++	flags = hw_perf_save_disable();
++	if (ctx->nr_active) {
++		list_for_each_entry(counter, &ctx->counter_list, list_entry)
++			group_sched_out(counter, cpuctx, ctx);
++	}
++	hw_perf_restore(flags);
++ out:
++	spin_unlock(&ctx->lock);
++}
++
++/*
++ * Called from scheduler to remove the counters of the current task,
++ * with interrupts disabled.
++ *
++ * We stop each counter and update the counter value in counter->count.
++ *
++ * This does not protect us against NMI, but disable()
++ * sets the disabled bit in the control field of counter _before_
++ * accessing the counter control register. If a NMI hits, then it will
++ * not restart the counter.
++ */
++void perf_counter_task_sched_out(struct task_struct *task, int cpu)
++{
++	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
++	struct perf_counter_context *ctx = &task->perf_counter_ctx;
++	struct pt_regs *regs;
++
++	if (likely(!cpuctx->task_ctx))
++		return;
++
++	regs = task_pt_regs(task);
++	perf_swcounter_event(PERF_COUNT_CONTEXT_SWITCHES, 1, 1, regs);
++	__perf_counter_sched_out(ctx, cpuctx);
++
++	cpuctx->task_ctx = NULL;
++}
++
++static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
++{
++	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
++}
++
++static int
++group_sched_in(struct perf_counter *group_counter,
++	       struct perf_cpu_context *cpuctx,
++	       struct perf_counter_context *ctx,
++	       int cpu)
++{
++	struct perf_counter *counter, *partial_group;
++	int ret;
++
++	if (group_counter->state == PERF_COUNTER_STATE_OFF)
++		return 0;
++
++	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
++	if (ret)
++		return ret < 0 ? ret : 0;
++
++	group_counter->prev_state = group_counter->state;
++	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
++		return -EAGAIN;
++
++	/*
++	 * Schedule in siblings as one group (if any):
++	 */
++	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
++		counter->prev_state = counter->state;
++		if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
++			partial_group = counter;
++			goto group_error;
++		}
++	}
++
++	return 0;
++
++group_error:
++	/*
++	 * Groups can be scheduled in as one unit only, so undo any
++	 * partial group before returning:
++	 */
++	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
++		if (counter == partial_group)
++			break;
++		counter_sched_out(counter, cpuctx, ctx);
++	}
++	counter_sched_out(group_counter, cpuctx, ctx);
++
++	return -EAGAIN;
++}
++
++static void
++__perf_counter_sched_in(struct perf_counter_context *ctx,
++			struct perf_cpu_context *cpuctx, int cpu)
++{
++	struct perf_counter *counter;
++	u64 flags;
++	int can_add_hw = 1;
++
++	spin_lock(&ctx->lock);
++	ctx->is_active = 1;
++	if (likely(!ctx->nr_counters))
++		goto out;
++
++	/*
++	 * Add any time since the last sched_out to the lost time
++	 * so it doesn't get included in the total_time_enabled and
++	 * total_time_running measures for counters in the context.
++	 */
++	ctx->time_lost = get_context_time(ctx, 0) - ctx->time_now;
++
++	flags = hw_perf_save_disable();
++
++	/*
++	 * First go through the list and put on any pinned groups
++	 * in order to give them the best chance of going on.
++	 */
++	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
++		if (counter->state <= PERF_COUNTER_STATE_OFF ||
++		    !counter->hw_event.pinned)
++			continue;
++		if (counter->cpu != -1 && counter->cpu != cpu)
++			continue;
++
++		if (group_can_go_on(counter, cpuctx, 1))
++			group_sched_in(counter, cpuctx, ctx, cpu);
++
++		/*
++		 * If this pinned group hasn't been scheduled,
++		 * put it in error state.
++		 */
++		if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
++			update_group_times(counter);
++			counter->state = PERF_COUNTER_STATE_ERROR;
++		}
++	}
++
++	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
++		/*
++		 * Ignore counters in OFF or ERROR state, and
++		 * ignore pinned counters since we did them already.
++		 */
++		if (counter->state <= PERF_COUNTER_STATE_OFF ||
++		    counter->hw_event.pinned)
++			continue;
++
++		/*
++		 * Listen to the 'cpu' scheduling filter constraint
++		 * of counters:
++		 */
++		if (counter->cpu != -1 && counter->cpu != cpu)
++			continue;
++
++		if (group_can_go_on(counter, cpuctx, can_add_hw)) {
++			if (group_sched_in(counter, cpuctx, ctx, cpu))
++				can_add_hw = 0;
++		}
++	}
++	hw_perf_restore(flags);
++ out:
++	spin_unlock(&ctx->lock);
++}
++
++/*
++ * Called from scheduler to add the counters of the current task
++ * with interrupts disabled.
++ *
++ * We restore the counter value and then enable it.
++ *
++ * This does not protect us against NMI, but enable()
++ * sets the enabled bit in the control field of counter _before_
++ * accessing the counter control register. If a NMI hits, then it will
++ * keep the counter running.
++ */
++void perf_counter_task_sched_in(struct task_struct *task, int cpu)
++{
++	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
++	struct perf_counter_context *ctx = &task->perf_counter_ctx;
++
++	__perf_counter_sched_in(ctx, cpuctx, cpu);
++	cpuctx->task_ctx = ctx;
++}
++
++static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
++{
++	struct perf_counter_context *ctx = &cpuctx->ctx;
++
++	__perf_counter_sched_in(ctx, cpuctx, cpu);
++}
++
++int perf_counter_task_disable(void)
++{
++	struct task_struct *curr = current;
++	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
++	struct perf_counter *counter;
++	unsigned long flags;
++	u64 perf_flags;
++	int cpu;
++
++	if (likely(!ctx->nr_counters))
++		return 0;
++
++	curr_rq_lock_irq_save(&flags);
++	cpu = smp_processor_id();
++
++	/* force the update of the task clock: */
++	__task_delta_exec(curr, 1);
++
++	perf_counter_task_sched_out(curr, cpu);
++
++	spin_lock(&ctx->lock);
++
++	/*
++	 * Disable all the counters:
++	 */
++	perf_flags = hw_perf_save_disable();
++
++	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
++		if (counter->state != PERF_COUNTER_STATE_ERROR) {
++			update_group_times(counter);
++			counter->state = PERF_COUNTER_STATE_OFF;
++		}
++	}
++
++	hw_perf_restore(perf_flags);
++
++	spin_unlock(&ctx->lock);
++
++	curr_rq_unlock_irq_restore(&flags);
++
++	return 0;
++}
++
++int perf_counter_task_enable(void)
++{
++	struct task_struct *curr = current;
++	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
++	struct perf_counter *counter;
++	unsigned long flags;
++	u64 perf_flags;
++	int cpu;
++
++	if (likely(!ctx->nr_counters))
++		return 0;
++
++	curr_rq_lock_irq_save(&flags);
++	cpu = smp_processor_id();
++
++	/* force the update of the task clock: */
++	__task_delta_exec(curr, 1);
++
++	perf_counter_task_sched_out(curr, cpu);
++
++	spin_lock(&ctx->lock);
++
++	/*
++	 * Disable all the counters:
++	 */
++	perf_flags = hw_perf_save_disable();
++
++	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
++		if (counter->state > PERF_COUNTER_STATE_OFF)
++			continue;
++		counter->state = PERF_COUNTER_STATE_INACTIVE;
++		counter->tstamp_enabled = ctx->time_now -
++			counter->total_time_enabled;
++		counter->hw_event.disabled = 0;
++	}
++	hw_perf_restore(perf_flags);
++
++	spin_unlock(&ctx->lock);
++
++	perf_counter_task_sched_in(curr, cpu);
++
++	curr_rq_unlock_irq_restore(&flags);
++
++	return 0;
++}
++
++/*
++ * Round-robin a context's counters:
++ */
++static void rotate_ctx(struct perf_counter_context *ctx)
++{
++	struct perf_counter *counter;
++	u64 perf_flags;
++
++	if (!ctx->nr_counters)
++		return;
++
++	spin_lock(&ctx->lock);
++	/*
++	 * Rotate the first entry last (works just fine for group counters too):
++	 */
++	perf_flags = hw_perf_save_disable();
++	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
++		list_move_tail(&counter->list_entry, &ctx->counter_list);
++		break;
++	}
++	hw_perf_restore(perf_flags);
++
++	spin_unlock(&ctx->lock);
++}
++
++void perf_counter_task_tick(struct task_struct *curr, int cpu)
++{
++	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
++	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
++	const int rotate_percpu = 0;
++
++	if (rotate_percpu)
++		perf_counter_cpu_sched_out(cpuctx);
++	perf_counter_task_sched_out(curr, cpu);
++
++	if (rotate_percpu)
++		rotate_ctx(&cpuctx->ctx);
++	rotate_ctx(ctx);
++
++	if (rotate_percpu)
++		perf_counter_cpu_sched_in(cpuctx, cpu);
++	perf_counter_task_sched_in(curr, cpu);
++}
++
++/*
++ * Cross CPU call to read the hardware counter
++ */
++static void __read(void *info)
++{
++	struct perf_counter *counter = info;
++	struct perf_counter_context *ctx = counter->ctx;
++	unsigned long flags;
++
++	curr_rq_lock_irq_save(&flags);
++	if (ctx->is_active)
++		update_context_time(ctx, 1);
++	counter->hw_ops->read(counter);
++	update_counter_times(counter);
++	curr_rq_unlock_irq_restore(&flags);
++}
++
++static u64 perf_counter_read(struct perf_counter *counter)
++{
++	/*
++	 * If counter is enabled and currently active on a CPU, update the
++	 * value in the counter structure:
++	 */
++	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
++		smp_call_function_single(counter->oncpu,
++					 __read, counter, 1);
++	} else if (counter->state == PERF_COUNTER_STATE_INACTIVE) {
++		update_counter_times(counter);
++	}
++
++	return atomic64_read(&counter->count);
++}
++
++static void put_context(struct perf_counter_context *ctx)
++{
++	if (ctx->task)
++		put_task_struct(ctx->task);
++}
++
++static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
++{
++	struct perf_cpu_context *cpuctx;
++	struct perf_counter_context *ctx;
++	struct task_struct *task;
++
++	/*
++	 * If cpu is not a wildcard then this is a percpu counter:
++	 */
++	if (cpu != -1) {
++		/* Must be root to operate on a CPU counter: */
++		if (!capable(CAP_SYS_ADMIN))
++			return ERR_PTR(-EACCES);
++
++		if (cpu < 0 || cpu > num_possible_cpus())
++			return ERR_PTR(-EINVAL);
++
++		/*
++		 * We could be clever and allow to attach a counter to an
++		 * offline CPU and activate it when the CPU comes up, but
++		 * that's for later.
++		 */
++		if (!cpu_isset(cpu, cpu_online_map))
++			return ERR_PTR(-ENODEV);
++
++		cpuctx = &per_cpu(perf_cpu_context, cpu);
++		ctx = &cpuctx->ctx;
++
++		return ctx;
++	}
++
++	rcu_read_lock();
++	if (!pid)
++		task = current;
++	else
++		task = find_task_by_vpid(pid);
++	if (task)
++		get_task_struct(task);
++	rcu_read_unlock();
++
++	if (!task)
++		return ERR_PTR(-ESRCH);
++
++	ctx = &task->perf_counter_ctx;
++	ctx->task = task;
++
++	/* Reuse ptrace permission checks for now. */
++	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
++		put_context(ctx);
++		return ERR_PTR(-EACCES);
++	}
++
++	return ctx;
++}
++
++static void free_counter_rcu(struct rcu_head *head)
++{
++	struct perf_counter *counter;
++
++	counter = container_of(head, struct perf_counter, rcu_head);
++	kfree(counter);
++}
++
++static void free_counter(struct perf_counter *counter)
++{
++	if (counter->destroy)
++		counter->destroy(counter);
++
++	call_rcu(&counter->rcu_head, free_counter_rcu);
++}
++
++/*
++ * Called when the last reference to the file is gone.
++ */
++static int perf_release(struct inode *inode, struct file *file)
++{
++	struct perf_counter *counter = file->private_data;
++	struct perf_counter_context *ctx = counter->ctx;
++
++	file->private_data = NULL;
++
++	mutex_lock(&ctx->mutex);
++	mutex_lock(&counter->mutex);
++
++	perf_counter_remove_from_context(counter);
++
++	mutex_unlock(&counter->mutex);
++	mutex_unlock(&ctx->mutex);
++
++	free_counter(counter);
++	put_context(ctx);
++
++	return 0;
++}
++
++/*
++ * Read the performance counter - simple non blocking version for now
++ */
++static ssize_t
++perf_read_hw(struct perf_counter *counter, char __user *buf, size_t count)
++{
++	u64 values[3];
++	int n;
++
++	/*
++	 * Return end-of-file for a read on a counter that is in
++	 * error state (i.e. because it was pinned but it couldn't be
++	 * scheduled on to the CPU at some point).
++	 */
++	if (counter->state == PERF_COUNTER_STATE_ERROR)
++		return 0;
++
++	mutex_lock(&counter->mutex);
++	values[0] = perf_counter_read(counter);
++	n = 1;
++	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
++		values[n++] = counter->total_time_enabled +
++			atomic64_read(&counter->child_total_time_enabled);
++	if (counter->hw_event.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
++		values[n++] = counter->total_time_running +
++			atomic64_read(&counter->child_total_time_running);
++	mutex_unlock(&counter->mutex);
++
++	if (count < n * sizeof(u64))
++		return -EINVAL;
++	count = n * sizeof(u64);
++
++	if (copy_to_user(buf, values, count))
++		return -EFAULT;
++
++	return count;
++}
++
++static ssize_t
++perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
++{
++	struct perf_counter *counter = file->private_data;
++
++	return perf_read_hw(counter, buf, count);
++}
++
++static unsigned int perf_poll(struct file *file, poll_table *wait)
++{
++	struct perf_counter *counter = file->private_data;
++	struct perf_mmap_data *data;
++	unsigned int events;
++
++	rcu_read_lock();
++	data = rcu_dereference(counter->data);
++	if (data)
++		events = atomic_xchg(&data->wakeup, 0);
++	else
++		events = POLL_HUP;
++	rcu_read_unlock();
++
++	poll_wait(file, &counter->waitq, wait);
++
++	return events;
++}
++
++static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++	struct perf_counter *counter = file->private_data;
++	int err = 0;
++
++	switch (cmd) {
++	case PERF_COUNTER_IOC_ENABLE:
++		perf_counter_enable_family(counter);
++		break;
++	case PERF_COUNTER_IOC_DISABLE:
++		perf_counter_disable_family(counter);
++		break;
++	default:
++		err = -ENOTTY;
++	}
++	return err;
++}
++
++static void __perf_counter_update_userpage(struct perf_counter *counter,
++					   struct perf_mmap_data *data)
++{
++	struct perf_counter_mmap_page *userpg = data->user_page;
++
++	/*
++	 * Disable preemption so as to not let the corresponding user-space
++	 * spin too long if we get preempted.
++	 */
++	preempt_disable();
++	++userpg->lock;
++	smp_wmb();
++	userpg->index = counter->hw.idx;
++	userpg->offset = atomic64_read(&counter->count);
++	if (counter->state == PERF_COUNTER_STATE_ACTIVE)
++		userpg->offset -= atomic64_read(&counter->hw.prev_count);
++
++	userpg->data_head = atomic_read(&data->head);
++	smp_wmb();
++	++userpg->lock;
++	preempt_enable();
++}
++
++void perf_counter_update_userpage(struct perf_counter *counter)
++{
++	struct perf_mmap_data *data;
++
++	rcu_read_lock();
++	data = rcu_dereference(counter->data);
++	if (data)
++		__perf_counter_update_userpage(counter, data);
++	rcu_read_unlock();
++}
++
++static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
++{
++	struct perf_counter *counter = vma->vm_file->private_data;
++	struct perf_mmap_data *data;
++	int ret = VM_FAULT_SIGBUS;
++
++	rcu_read_lock();
++	data = rcu_dereference(counter->data);
++	if (!data)
++		goto unlock;
++
++	if (vmf->pgoff == 0) {
++		vmf->page = virt_to_page(data->user_page);
++	} else {
++		int nr = vmf->pgoff - 1;
++
++		if ((unsigned)nr > data->nr_pages)
++			goto unlock;
++
++		vmf->page = virt_to_page(data->data_pages[nr]);
++	}
++	get_page(vmf->page);
++	ret = 0;
++unlock:
++	rcu_read_unlock();
++
++	return ret;
++}
++
++static int perf_mmap_data_alloc(struct perf_counter *counter, int nr_pages)
++{
++	struct perf_mmap_data *data;
++	unsigned long size;
++	int i;
++
++	WARN_ON(atomic_read(&counter->mmap_count));
++
++	size = sizeof(struct perf_mmap_data);
++	size += nr_pages * sizeof(void *);
++
++	data = kzalloc(size, GFP_KERNEL);
++	if (!data)
++		goto fail;
++
++	data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
++	if (!data->user_page)
++		goto fail_user_page;
++
++	for (i = 0; i < nr_pages; i++) {
++		data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
++		if (!data->data_pages[i])
++			goto fail_data_pages;
++	}
++
++	data->nr_pages = nr_pages;
++
++	rcu_assign_pointer(counter->data, data);
++
++	return 0;
++
++fail_data_pages:
++	for (i--; i >= 0; i--)
++		free_page((unsigned long)data->data_pages[i]);
++
++	free_page((unsigned long)data->user_page);
++
++fail_user_page:
++	kfree(data);
++
++fail:
++	return -ENOMEM;
++}
++
++static void __perf_mmap_data_free(struct rcu_head *rcu_head)
++{
++	struct perf_mmap_data *data = container_of(rcu_head,
++			struct perf_mmap_data, rcu_head);
++	int i;
++
++	free_page((unsigned long)data->user_page);
++	for (i = 0; i < data->nr_pages; i++)
++		free_page((unsigned long)data->data_pages[i]);
++	kfree(data);
++}
++
++static void perf_mmap_data_free(struct perf_counter *counter)
++{
++	struct perf_mmap_data *data = counter->data;
++
++	WARN_ON(atomic_read(&counter->mmap_count));
++
++	rcu_assign_pointer(counter->data, NULL);
++	call_rcu(&data->rcu_head, __perf_mmap_data_free);
++}
++
++static void perf_mmap_open(struct vm_area_struct *vma)
++{
++	struct perf_counter *counter = vma->vm_file->private_data;
++
++	atomic_inc(&counter->mmap_count);
++}
++
++static void perf_mmap_close(struct vm_area_struct *vma)
++{
++	struct perf_counter *counter = vma->vm_file->private_data;
++
++	if (atomic_dec_and_mutex_lock(&counter->mmap_count,
++				      &counter->mmap_mutex)) {
++		perf_mmap_data_free(counter);
++		mutex_unlock(&counter->mmap_mutex);
++	}
++}
++
++static struct vm_operations_struct perf_mmap_vmops = {
++	.open = perf_mmap_open,
++	.close = perf_mmap_close,
++	.fault = perf_mmap_fault,
++};
++
++static int perf_mmap(struct file *file, struct vm_area_struct *vma)
++{
++	struct perf_counter *counter = file->private_data;
++	unsigned long vma_size;
++	unsigned long nr_pages;
++	unsigned long locked, lock_limit;
++	int ret = 0;
++
++	if (!(vma->vm_flags & VM_SHARED) || (vma->vm_flags & VM_WRITE))
++		return -EINVAL;
++
++	vma_size = vma->vm_end - vma->vm_start;
++	nr_pages = (vma_size / PAGE_SIZE) - 1;
++
++	/*
++	 * If we have data pages ensure they're a power-of-two number, so we
++	 * can do bitmasks instead of modulo.
++	 */
++	if (nr_pages != 0 && !is_power_of_2(nr_pages))
++		return -EINVAL;
++
++	if (vma_size != PAGE_SIZE * (1 + nr_pages))
++		return -EINVAL;
++
++	if (vma->vm_pgoff != 0)
++		return -EINVAL;
++
++	locked = vma_size >>  PAGE_SHIFT;
++	locked += vma->vm_mm->locked_vm;
++
++	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
++	lock_limit >>= PAGE_SHIFT;
++
++	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK))
++		return -EPERM;
++
++	mutex_lock(&counter->mmap_mutex);
++	if (atomic_inc_not_zero(&counter->mmap_count))
++		goto out;
++
++	WARN_ON(counter->data);
++	ret = perf_mmap_data_alloc(counter, nr_pages);
++	if (!ret)
++		atomic_set(&counter->mmap_count, 1);
++out:
++	mutex_unlock(&counter->mmap_mutex);
++
++	vma->vm_flags &= ~VM_MAYWRITE;
++	vma->vm_flags |= VM_RESERVED;
++	vma->vm_ops = &perf_mmap_vmops;
++
++	return ret;
++}
++
++static const struct file_operations perf_fops = {
++	.release		= perf_release,
++	.read			= perf_read,
++	.poll			= perf_poll,
++	.unlocked_ioctl		= perf_ioctl,
++	.compat_ioctl		= perf_ioctl,
++	.mmap			= perf_mmap,
++};
++
++/*
++ * Output
++ */
++
++struct perf_output_handle {
++	struct perf_counter	*counter;
++	struct perf_mmap_data	*data;
++	unsigned int		offset;
++	unsigned int		head;
++	int			wakeup;
++};
++
++static int perf_output_begin(struct perf_output_handle *handle,
++			     struct perf_counter *counter, unsigned int size)
++{
++	struct perf_mmap_data *data;
++	unsigned int offset, head;
++
++	rcu_read_lock();
++	data = rcu_dereference(counter->data);
++	if (!data)
++		goto out;
++
++	if (!data->nr_pages)
++		goto out;
++
++	do {
++		offset = head = atomic_read(&data->head);
++		head += size;
++	} while (atomic_cmpxchg(&data->head, offset, head) != offset);
++
++	handle->counter	= counter;
++	handle->data	= data;
++	handle->offset	= offset;
++	handle->head	= head;
++	handle->wakeup	= (offset >> PAGE_SHIFT) != (head >> PAGE_SHIFT);
++
++	return 0;
++
++out:
++	rcu_read_unlock();
++
++	return -ENOSPC;
++}
++
++static void perf_output_copy(struct perf_output_handle *handle,
++			     void *buf, unsigned int len)
++{
++	unsigned int pages_mask;
++	unsigned int offset;
++	unsigned int size;
++	void **pages;
++
++	offset		= handle->offset;
++	pages_mask	= handle->data->nr_pages - 1;
++	pages		= handle->data->data_pages;
++
++	do {
++		unsigned int page_offset;
++		int nr;
++
++		nr	    = (offset >> PAGE_SHIFT) & pages_mask;
++		page_offset = offset & (PAGE_SIZE - 1);
++		size	    = min_t(unsigned int, PAGE_SIZE - page_offset, len);
++
++		memcpy(pages[nr] + page_offset, buf, size);
++
++		len	    -= size;
++		buf	    += size;
++		offset	    += size;
++	} while (len);
++
++	handle->offset = offset;
++
++	WARN_ON_ONCE(handle->offset > handle->head);
++}
++
++#define perf_output_put(handle, x) \
++	perf_output_copy((handle), &(x), sizeof(x))
++
++static void perf_output_end(struct perf_output_handle *handle, int nmi)
++{
++	if (handle->wakeup) {
++		(void)atomic_xchg(&handle->data->wakeup, POLL_IN);
++		__perf_counter_update_userpage(handle->counter, handle->data);
++		if (nmi) {
++			handle->counter->wakeup_pending = 1;
++			set_perf_counter_pending();
++		} else
++			wake_up(&handle->counter->waitq);
++	}
++	rcu_read_unlock();
++}
++
++static int perf_output_write(struct perf_counter *counter, int nmi,
++			     void *buf, ssize_t size)
++{
++	struct perf_output_handle handle;
++	int ret;
++
++	ret = perf_output_begin(&handle, counter, size);
++	if (ret)
++		goto out;
++
++	perf_output_copy(&handle, buf, size);
++	perf_output_end(&handle, nmi);
++
++out:
++	return ret;
++}
++
++static void perf_output_simple(struct perf_counter *counter,
++			       int nmi, struct pt_regs *regs)
++{
++	unsigned int size;
++	struct {
++		struct perf_event_header header;
++		u64 ip;
++		u32 pid, tid;
++	} event;
++
++	event.header.type = PERF_EVENT_IP;
++	event.ip = instruction_pointer(regs);
++
++	size = sizeof(event);
++
++	if (counter->hw_event.include_tid) {
++		/* namespace issues */
++		event.pid = current->group_leader->pid;
++		event.tid = current->pid;
++
++		event.header.type |= __PERF_EVENT_TID;
++	} else
++		size -= sizeof(u64);
++
++	event.header.size = size;
++
++	perf_output_write(counter, nmi, &event, size);
++}
++
++static void perf_output_group(struct perf_counter *counter, int nmi)
++{
++	struct perf_output_handle handle;
++	struct perf_event_header header;
++	struct perf_counter *leader, *sub;
++	unsigned int size;
++	struct {
++		u64 event;
++		u64 counter;
++	} entry;
++	int ret;
++
++	size = sizeof(header) + counter->nr_siblings * sizeof(entry);
++
++	ret = perf_output_begin(&handle, counter, size);
++	if (ret)
++		return;
++
++	header.type = PERF_EVENT_GROUP;
++	header.size = size;
++
++	perf_output_put(&handle, header);
++
++	leader = counter->group_leader;
++	list_for_each_entry(sub, &leader->sibling_list, list_entry) {
++		if (sub != counter)
++			sub->hw_ops->read(sub);
++
++		entry.event = sub->hw_event.config;
++		entry.counter = atomic64_read(&sub->count);
++
++		perf_output_put(&handle, entry);
++	}
++
++	perf_output_end(&handle, nmi);
++}
++
++void perf_counter_output(struct perf_counter *counter,
++			 int nmi, struct pt_regs *regs)
++{
++	switch (counter->hw_event.record_type) {
++	case PERF_RECORD_SIMPLE:
++		return;
++
++	case PERF_RECORD_IRQ:
++		perf_output_simple(counter, nmi, regs);
++		break;
++
++	case PERF_RECORD_GROUP:
++		perf_output_group(counter, nmi);
++		break;
++	}
++}
++
++/*
++ * Generic software counter infrastructure
++ */
++
++static void perf_swcounter_update(struct perf_counter *counter)
++{
++	struct hw_perf_counter *hwc = &counter->hw;
++	u64 prev, now;
++	s64 delta;
++
++again:
++	prev = atomic64_read(&hwc->prev_count);
++	now = atomic64_read(&hwc->count);
++	if (atomic64_cmpxchg(&hwc->prev_count, prev, now) != prev)
++		goto again;
++
++	delta = now - prev;
++
++	atomic64_add(delta, &counter->count);
++	atomic64_sub(delta, &hwc->period_left);
++}
++
++static void perf_swcounter_set_period(struct perf_counter *counter)
++{
++	struct hw_perf_counter *hwc = &counter->hw;
++	s64 left = atomic64_read(&hwc->period_left);
++	s64 period = hwc->irq_period;
++
++	if (unlikely(left <= -period)) {
++		left = period;
++		atomic64_set(&hwc->period_left, left);
++	}
++
++	if (unlikely(left <= 0)) {
++		left += period;
++		atomic64_add(period, &hwc->period_left);
++	}
++
++	atomic64_set(&hwc->prev_count, -left);
++	atomic64_set(&hwc->count, -left);
++}
++
++static enum hrtimer_restart perf_swcounter_hrtimer(struct hrtimer *hrtimer)
++{
++	struct perf_counter *counter;
++	struct pt_regs *regs;
++
++	counter	= container_of(hrtimer, struct perf_counter, hw.hrtimer);
++	counter->hw_ops->read(counter);
++
++	regs = get_irq_regs();
++	/*
++	 * In case we exclude kernel IPs or are somehow not in interrupt
++	 * context, provide the next best thing, the user IP.
++	 */
++	if ((counter->hw_event.exclude_kernel || !regs) &&
++			!counter->hw_event.exclude_user)
++		regs = task_pt_regs(current);
++
++	if (regs)
++		perf_counter_output(counter, 0, regs);
++
++	hrtimer_forward_now(hrtimer, ns_to_ktime(counter->hw.irq_period));
++
++	return HRTIMER_RESTART;
++}
++
++static void perf_swcounter_overflow(struct perf_counter *counter,
++				    int nmi, struct pt_regs *regs)
++{
++	perf_swcounter_update(counter);
++	perf_swcounter_set_period(counter);
++	perf_counter_output(counter, nmi, regs);
++}
++
++static int perf_swcounter_match(struct perf_counter *counter,
++				enum perf_event_types type,
++				u32 event, struct pt_regs *regs)
++{
++	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
++		return 0;
++
++	if (perf_event_raw(&counter->hw_event))
++		return 0;
++
++	if (perf_event_type(&counter->hw_event) != type)
++		return 0;
++
++	if (perf_event_id(&counter->hw_event) != event)
++		return 0;
++
++	if (counter->hw_event.exclude_user && user_mode(regs))
++		return 0;
++
++	if (counter->hw_event.exclude_kernel && !user_mode(regs))
++		return 0;
++
++	return 1;
++}
++
++static void perf_swcounter_add(struct perf_counter *counter, u64 nr,
++			       int nmi, struct pt_regs *regs)
++{
++	int neg = atomic64_add_negative(nr, &counter->hw.count);
++	if (counter->hw.irq_period && !neg)
++		perf_swcounter_overflow(counter, nmi, regs);
++}
++
++static void perf_swcounter_ctx_event(struct perf_counter_context *ctx,
++				     enum perf_event_types type, u32 event,
++				     u64 nr, int nmi, struct pt_regs *regs)
++{
++	struct perf_counter *counter;
++
++	if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list))
++		return;
++
++	rcu_read_lock();
++	list_for_each_entry_rcu(counter, &ctx->event_list, event_entry) {
++		if (perf_swcounter_match(counter, type, event, regs))
++			perf_swcounter_add(counter, nr, nmi, regs);
++	}
++	rcu_read_unlock();
++}
++
++static int *perf_swcounter_recursion_context(struct perf_cpu_context *cpuctx)
++{
++	if (in_nmi())
++		return &cpuctx->recursion[3];
++
++	if (in_irq())
++		return &cpuctx->recursion[2];
++
++	if (in_softirq())
++		return &cpuctx->recursion[1];
++
++	return &cpuctx->recursion[0];
++}
++
++static void __perf_swcounter_event(enum perf_event_types type, u32 event,
++				   u64 nr, int nmi, struct pt_regs *regs)
++{
++	struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context);
++	int *recursion = perf_swcounter_recursion_context(cpuctx);
++
++	if (*recursion)
++		goto out;
++
++	(*recursion)++;
++	barrier();
++
++	perf_swcounter_ctx_event(&cpuctx->ctx, type, event, nr, nmi, regs);
++	if (cpuctx->task_ctx) {
++		perf_swcounter_ctx_event(cpuctx->task_ctx, type, event,
++				nr, nmi, regs);
++	}
++
++	barrier();
++	(*recursion)--;
++
++out:
++	put_cpu_var(perf_cpu_context);
++}
++
++void perf_swcounter_event(u32 event, u64 nr, int nmi, struct pt_regs *regs)
++{
++	__perf_swcounter_event(PERF_TYPE_SOFTWARE, event, nr, nmi, regs);
++}
++
++static void perf_swcounter_read(struct perf_counter *counter)
++{
++	perf_swcounter_update(counter);
++}
++
++static int perf_swcounter_enable(struct perf_counter *counter)
++{
++	perf_swcounter_set_period(counter);
++	return 0;
++}
++
++static void perf_swcounter_disable(struct perf_counter *counter)
++{
++	perf_swcounter_update(counter);
++}
++
++static const struct hw_perf_counter_ops perf_ops_generic = {
++	.enable		= perf_swcounter_enable,
++	.disable	= perf_swcounter_disable,
++	.read		= perf_swcounter_read,
++};
++
++/*
++ * Software counter: cpu wall time clock
++ */
++
++static void cpu_clock_perf_counter_update(struct perf_counter *counter)
++{
++	int cpu = raw_smp_processor_id();
++	s64 prev;
++	u64 now;
++
++	now = cpu_clock(cpu);
++	prev = atomic64_read(&counter->hw.prev_count);
++	atomic64_set(&counter->hw.prev_count, now);
++	atomic64_add(now - prev, &counter->count);
++}
++
++static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
++{
++	struct hw_perf_counter *hwc = &counter->hw;
++	int cpu = raw_smp_processor_id();
++
++	atomic64_set(&hwc->prev_count, cpu_clock(cpu));
++	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++	hwc->hrtimer.function = perf_swcounter_hrtimer;
++	if (hwc->irq_period) {
++		__hrtimer_start_range_ns(&hwc->hrtimer,
++				ns_to_ktime(hwc->irq_period), 0,
++				HRTIMER_MODE_REL, 0);
++	}
++
++	return 0;
++}
++
++static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
++{
++	hrtimer_cancel(&counter->hw.hrtimer);
++	cpu_clock_perf_counter_update(counter);
++}
++
++static void cpu_clock_perf_counter_read(struct perf_counter *counter)
++{
++	cpu_clock_perf_counter_update(counter);
++}
++
++static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
++	.enable		= cpu_clock_perf_counter_enable,
++	.disable	= cpu_clock_perf_counter_disable,
++	.read		= cpu_clock_perf_counter_read,
++};
++
++/*
++ * Software counter: task time clock
++ */
++
++/*
++ * Called from within the scheduler:
++ */
++static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
++{
++	struct task_struct *curr = counter->task;
++	u64 delta;
++
++	delta = __task_delta_exec(curr, update);
++
++	return curr->se.sum_exec_runtime + delta;
++}
++
++static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
++{
++	u64 prev;
++	s64 delta;
++
++	prev = atomic64_read(&counter->hw.prev_count);
++
++	atomic64_set(&counter->hw.prev_count, now);
++
++	delta = now - prev;
++
++	atomic64_add(delta, &counter->count);
++}
++
++static int task_clock_perf_counter_enable(struct perf_counter *counter)
++{
++	struct hw_perf_counter *hwc = &counter->hw;
++
++	atomic64_set(&hwc->prev_count, task_clock_perf_counter_val(counter, 0));
++	hrtimer_init(&hwc->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++	hwc->hrtimer.function = perf_swcounter_hrtimer;
++	if (hwc->irq_period) {
++		__hrtimer_start_range_ns(&hwc->hrtimer,
++				ns_to_ktime(hwc->irq_period), 0,
++				HRTIMER_MODE_REL, 0);
++	}
++
++	return 0;
++}
++
++static void task_clock_perf_counter_disable(struct perf_counter *counter)
++{
++	hrtimer_cancel(&counter->hw.hrtimer);
++	task_clock_perf_counter_update(counter,
++			task_clock_perf_counter_val(counter, 0));
++}
++
++static void task_clock_perf_counter_read(struct perf_counter *counter)
++{
++	task_clock_perf_counter_update(counter,
++			task_clock_perf_counter_val(counter, 1));
++}
++
++static const struct hw_perf_counter_ops perf_ops_task_clock = {
++	.enable		= task_clock_perf_counter_enable,
++	.disable	= task_clock_perf_counter_disable,
++	.read		= task_clock_perf_counter_read,
++};
++
++/*
++ * Software counter: cpu migrations
++ */
++
++static inline u64 get_cpu_migrations(struct perf_counter *counter)
++{
++	struct task_struct *curr = counter->ctx->task;
++
++	if (curr)
++		return curr->se.nr_migrations;
++	return cpu_nr_migrations(smp_processor_id());
++}
++
++static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
++{
++	u64 prev, now;
++	s64 delta;
++
++	prev = atomic64_read(&counter->hw.prev_count);
++	now = get_cpu_migrations(counter);
++
++	atomic64_set(&counter->hw.prev_count, now);
++
++	delta = now - prev;
++
++	atomic64_add(delta, &counter->count);
++}
++
++static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
++{
++	cpu_migrations_perf_counter_update(counter);
++}
++
++static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
++{
++	if (counter->prev_state <= PERF_COUNTER_STATE_OFF)
++		atomic64_set(&counter->hw.prev_count,
++			     get_cpu_migrations(counter));
++	return 0;
++}
++
++static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
++{
++	cpu_migrations_perf_counter_update(counter);
++}
++
++static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
++	.enable		= cpu_migrations_perf_counter_enable,
++	.disable	= cpu_migrations_perf_counter_disable,
++	.read		= cpu_migrations_perf_counter_read,
++};
++
++#ifdef CONFIG_EVENT_PROFILE
++void perf_tpcounter_event(int event_id)
++{
++	struct pt_regs *regs = get_irq_regs();
++
++	if (!regs)
++		regs = task_pt_regs(current);
++
++	__perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, regs);
++}
++
++extern int ftrace_profile_enable(int);
++extern void ftrace_profile_disable(int);
++
++static void tp_perf_counter_destroy(struct perf_counter *counter)
++{
++	ftrace_profile_disable(perf_event_id(&counter->hw_event));
++}
++
++static const struct hw_perf_counter_ops *
++tp_perf_counter_init(struct perf_counter *counter)
++{
++	int event_id = perf_event_id(&counter->hw_event);
++	int ret;
++
++	ret = ftrace_profile_enable(event_id);
++	if (ret)
++		return NULL;
++
++	counter->destroy = tp_perf_counter_destroy;
++	counter->hw.irq_period = counter->hw_event.irq_period;
++
++	return &perf_ops_generic;
++}
++#else
++static const struct hw_perf_counter_ops *
++tp_perf_counter_init(struct perf_counter *counter)
++{
++	return NULL;
++}
++#endif
++
++static const struct hw_perf_counter_ops *
++sw_perf_counter_init(struct perf_counter *counter)
++{
++	struct perf_counter_hw_event *hw_event = &counter->hw_event;
++	const struct hw_perf_counter_ops *hw_ops = NULL;
++	struct hw_perf_counter *hwc = &counter->hw;
++
++	/*
++	 * Software counters (currently) can't in general distinguish
++	 * between user, kernel and hypervisor events.
++	 * However, context switches and cpu migrations are considered
++	 * to be kernel events, and page faults are never hypervisor
++	 * events.
++	 */
++	switch (perf_event_id(&counter->hw_event)) {
++	case PERF_COUNT_CPU_CLOCK:
++		hw_ops = &perf_ops_cpu_clock;
++
++		if (hw_event->irq_period && hw_event->irq_period < 10000)
++			hw_event->irq_period = 10000;
++		break;
++	case PERF_COUNT_TASK_CLOCK:
++		/*
++		 * If the user instantiates this as a per-cpu counter,
++		 * use the cpu_clock counter instead.
++		 */
++		if (counter->ctx->task)
++			hw_ops = &perf_ops_task_clock;
++		else
++			hw_ops = &perf_ops_cpu_clock;
++
++		if (hw_event->irq_period && hw_event->irq_period < 10000)
++			hw_event->irq_period = 10000;
++		break;
++	case PERF_COUNT_PAGE_FAULTS:
++	case PERF_COUNT_PAGE_FAULTS_MIN:
++	case PERF_COUNT_PAGE_FAULTS_MAJ:
++	case PERF_COUNT_CONTEXT_SWITCHES:
++		hw_ops = &perf_ops_generic;
++		break;
++	case PERF_COUNT_CPU_MIGRATIONS:
++		if (!counter->hw_event.exclude_kernel)
++			hw_ops = &perf_ops_cpu_migrations;
++		break;
++	}
++
++	if (hw_ops)
++		hwc->irq_period = hw_event->irq_period;
++
++	return hw_ops;
++}
++
++/*
++ * Allocate and initialize a counter structure
++ */
++static struct perf_counter *
++perf_counter_alloc(struct perf_counter_hw_event *hw_event,
++		   int cpu,
++		   struct perf_counter_context *ctx,
++		   struct perf_counter *group_leader,
++		   gfp_t gfpflags)
++{
++	const struct hw_perf_counter_ops *hw_ops;
++	struct perf_counter *counter;
++
++	counter = kzalloc(sizeof(*counter), gfpflags);
++	if (!counter)
++		return NULL;
++
++	/*
++	 * Single counters are their own group leaders, with an
++	 * empty sibling list:
++	 */
++	if (!group_leader)
++		group_leader = counter;
++
++	mutex_init(&counter->mutex);
++	INIT_LIST_HEAD(&counter->list_entry);
++	INIT_LIST_HEAD(&counter->event_entry);
++	INIT_LIST_HEAD(&counter->sibling_list);
++	init_waitqueue_head(&counter->waitq);
++
++	mutex_init(&counter->mmap_mutex);
++
++	INIT_LIST_HEAD(&counter->child_list);
++
++	counter->cpu			= cpu;
++	counter->hw_event		= *hw_event;
++	counter->wakeup_pending		= 0;
++	counter->group_leader		= group_leader;
++	counter->hw_ops			= NULL;
++	counter->ctx			= ctx;
++
++	counter->state = PERF_COUNTER_STATE_INACTIVE;
++	if (hw_event->disabled)
++		counter->state = PERF_COUNTER_STATE_OFF;
++
++	hw_ops = NULL;
++
++	if (perf_event_raw(hw_event)) {
++		hw_ops = hw_perf_counter_init(counter);
++		goto done;
++	}
++
++	switch (perf_event_type(hw_event)) {
++	case PERF_TYPE_HARDWARE:
++		hw_ops = hw_perf_counter_init(counter);
++		break;
++
++	case PERF_TYPE_SOFTWARE:
++		hw_ops = sw_perf_counter_init(counter);
++		break;
++
++	case PERF_TYPE_TRACEPOINT:
++		hw_ops = tp_perf_counter_init(counter);
++		break;
++	}
++
++	if (!hw_ops) {
++		kfree(counter);
++		return NULL;
++	}
++done:
++	counter->hw_ops = hw_ops;
++
++	return counter;
++}
++
++/**
++ * sys_perf_counter_open - open a performance counter, associate it to a task/cpu
++ *
++ * @hw_event_uptr:	event type attributes for monitoring/sampling
++ * @pid:		target pid
++ * @cpu:		target cpu
++ * @group_fd:		group leader counter fd
++ */
++SYSCALL_DEFINE5(perf_counter_open,
++		const struct perf_counter_hw_event __user *, hw_event_uptr,
++		pid_t, pid, int, cpu, int, group_fd, unsigned long, flags)
++{
++	struct perf_counter *counter, *group_leader;
++	struct perf_counter_hw_event hw_event;
++	struct perf_counter_context *ctx;
++	struct file *counter_file = NULL;
++	struct file *group_file = NULL;
++	int fput_needed = 0;
++	int fput_needed2 = 0;
++	int ret;
++
++	/* for future expandability... */
++	if (flags)
++		return -EINVAL;
++
++	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
++		return -EFAULT;
++
++	/*
++	 * Get the target context (task or percpu):
++	 */
++	ctx = find_get_context(pid, cpu);
++	if (IS_ERR(ctx))
++		return PTR_ERR(ctx);
++
++	/*
++	 * Look up the group leader (we will attach this counter to it):
++	 */
++	group_leader = NULL;
++	if (group_fd != -1) {
++		ret = -EINVAL;
++		group_file = fget_light(group_fd, &fput_needed);
++		if (!group_file)
++			goto err_put_context;
++		if (group_file->f_op != &perf_fops)
++			goto err_put_context;
++
++		group_leader = group_file->private_data;
++		/*
++		 * Do not allow a recursive hierarchy (this new sibling
++		 * becoming part of another group-sibling):
++		 */
++		if (group_leader->group_leader != group_leader)
++			goto err_put_context;
++		/*
++		 * Do not allow to attach to a group in a different
++		 * task or CPU context:
++		 */
++		if (group_leader->ctx != ctx)
++			goto err_put_context;
++		/*
++		 * Only a group leader can be exclusive or pinned
++		 */
++		if (hw_event.exclusive || hw_event.pinned)
++			goto err_put_context;
++	}
++
++	ret = -EINVAL;
++	counter = perf_counter_alloc(&hw_event, cpu, ctx, group_leader,
++				     GFP_KERNEL);
++	if (!counter)
++		goto err_put_context;
++
++	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
++	if (ret < 0)
++		goto err_free_put_context;
++
++	counter_file = fget_light(ret, &fput_needed2);
++	if (!counter_file)
++		goto err_free_put_context;
++
++	counter->filp = counter_file;
++	mutex_lock(&ctx->mutex);
++	perf_install_in_context(ctx, counter, cpu);
++	mutex_unlock(&ctx->mutex);
++
++	fput_light(counter_file, fput_needed2);
++
++out_fput:
++	fput_light(group_file, fput_needed);
++
++	return ret;
++
++err_free_put_context:
++	kfree(counter);
++
++err_put_context:
++	put_context(ctx);
++
++	goto out_fput;
++}
++
++/*
++ * Initialize the perf_counter context in a task_struct:
++ */
++static void
++__perf_counter_init_context(struct perf_counter_context *ctx,
++			    struct task_struct *task)
++{
++	memset(ctx, 0, sizeof(*ctx));
++	spin_lock_init(&ctx->lock);
++	mutex_init(&ctx->mutex);
++	INIT_LIST_HEAD(&ctx->counter_list);
++	INIT_LIST_HEAD(&ctx->event_list);
++	ctx->task = task;
++}
++
++/*
++ * inherit a counter from parent task to child task:
++ */
++static struct perf_counter *
++inherit_counter(struct perf_counter *parent_counter,
++	      struct task_struct *parent,
++	      struct perf_counter_context *parent_ctx,
++	      struct task_struct *child,
++	      struct perf_counter *group_leader,
++	      struct perf_counter_context *child_ctx)
++{
++	struct perf_counter *child_counter;
++
++	/*
++	 * Instead of creating recursive hierarchies of counters,
++	 * we link inherited counters back to the original parent,
++	 * which has a filp for sure, which we use as the reference
++	 * count:
++	 */
++	if (parent_counter->parent)
++		parent_counter = parent_counter->parent;
++
++	child_counter = perf_counter_alloc(&parent_counter->hw_event,
++					   parent_counter->cpu, child_ctx,
++					   group_leader, GFP_KERNEL);
++	if (!child_counter)
++		return NULL;
++
++	/*
++	 * Link it up in the child's context:
++	 */
++	child_counter->task = child;
++	add_counter_to_ctx(child_counter, child_ctx);
++
++	child_counter->parent = parent_counter;
++	/*
++	 * inherit into child's child as well:
++	 */
++	child_counter->hw_event.inherit = 1;
++
++	/*
++	 * Get a reference to the parent filp - we will fput it
++	 * when the child counter exits. This is safe to do because
++	 * we are in the parent and we know that the filp still
++	 * exists and has a nonzero count:
++	 */
++	atomic_long_inc(&parent_counter->filp->f_count);
++
++	/*
++	 * Link this into the parent counter's child list
++	 */
++	mutex_lock(&parent_counter->mutex);
++	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
++
++	/*
++	 * Make the child state follow the state of the parent counter,
++	 * not its hw_event.disabled bit.  We hold the parent's mutex,
++	 * so we won't race with perf_counter_{en,dis}able_family.
++	 */
++	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
++		child_counter->state = PERF_COUNTER_STATE_INACTIVE;
++	else
++		child_counter->state = PERF_COUNTER_STATE_OFF;
++
++	mutex_unlock(&parent_counter->mutex);
++
++	return child_counter;
++}
++
++static int inherit_group(struct perf_counter *parent_counter,
++	      struct task_struct *parent,
++	      struct perf_counter_context *parent_ctx,
++	      struct task_struct *child,
++	      struct perf_counter_context *child_ctx)
++{
++	struct perf_counter *leader;
++	struct perf_counter *sub;
++
++	leader = inherit_counter(parent_counter, parent, parent_ctx,
++				 child, NULL, child_ctx);
++	if (!leader)
++		return -ENOMEM;
++	list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
++		if (!inherit_counter(sub, parent, parent_ctx,
++				     child, leader, child_ctx))
++			return -ENOMEM;
++	}
++	return 0;
++}
++
++static void sync_child_counter(struct perf_counter *child_counter,
++			       struct perf_counter *parent_counter)
++{
++	u64 parent_val, child_val;
++
++	parent_val = atomic64_read(&parent_counter->count);
++	child_val = atomic64_read(&child_counter->count);
++
++	/*
++	 * Add back the child's count to the parent's count:
++	 */
++	atomic64_add(child_val, &parent_counter->count);
++	atomic64_add(child_counter->total_time_enabled,
++		     &parent_counter->child_total_time_enabled);
++	atomic64_add(child_counter->total_time_running,
++		     &parent_counter->child_total_time_running);
++
++	/*
++	 * Remove this counter from the parent's list
++	 */
++	mutex_lock(&parent_counter->mutex);
++	list_del_init(&child_counter->child_list);
++	mutex_unlock(&parent_counter->mutex);
++
++	/*
++	 * Release the parent counter, if this was the last
++	 * reference to it.
++	 */
++	fput(parent_counter->filp);
++}
++
++static void
++__perf_counter_exit_task(struct task_struct *child,
++			 struct perf_counter *child_counter,
++			 struct perf_counter_context *child_ctx)
++{
++	struct perf_counter *parent_counter;
++	struct perf_counter *sub, *tmp;
++
++	/*
++	 * If we do not self-reap then we have to wait for the
++	 * child task to unschedule (it will happen for sure),
++	 * so that its counter is at its final count. (This
++	 * condition triggers rarely - child tasks usually get
++	 * off their CPU before the parent has a chance to
++	 * get this far into the reaping action)
++	 */
++	if (child != current) {
++		wait_task_inactive(child, 0);
++		list_del_init(&child_counter->list_entry);
++		update_counter_times(child_counter);
++	} else {
++		struct perf_cpu_context *cpuctx;
++		unsigned long flags;
++		u64 perf_flags;
++
++		/*
++		 * Disable and unlink this counter.
++		 *
++		 * Be careful about zapping the list - IRQ/NMI context
++		 * could still be processing it:
++		 */
++		curr_rq_lock_irq_save(&flags);
++		perf_flags = hw_perf_save_disable();
++
++		cpuctx = &__get_cpu_var(perf_cpu_context);
++
++		group_sched_out(child_counter, cpuctx, child_ctx);
++		update_counter_times(child_counter);
++
++		list_del_init(&child_counter->list_entry);
++
++		child_ctx->nr_counters--;
++
++		hw_perf_restore(perf_flags);
++		curr_rq_unlock_irq_restore(&flags);
++	}
++
++	parent_counter = child_counter->parent;
++	/*
++	 * It can happen that parent exits first, and has counters
++	 * that are still around due to the child reference. These
++	 * counters need to be zapped - but otherwise linger.
++	 */
++	if (parent_counter) {
++		sync_child_counter(child_counter, parent_counter);
++		list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
++					 list_entry) {
++			if (sub->parent) {
++				sync_child_counter(sub, sub->parent);
++				free_counter(sub);
++			}
++		}
++		free_counter(child_counter);
++	}
++}
++
++/*
++ * When a child task exits, feed back counter values to parent counters.
++ *
++ * Note: we may be running in child context, but the PID is not hashed
++ * anymore so new counters will not be added.
++ */
++void perf_counter_exit_task(struct task_struct *child)
++{
++	struct perf_counter *child_counter, *tmp;
++	struct perf_counter_context *child_ctx;
++
++	child_ctx = &child->perf_counter_ctx;
++
++	if (likely(!child_ctx->nr_counters))
++		return;
++
++	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
++				 list_entry)
++		__perf_counter_exit_task(child, child_counter, child_ctx);
++}
++
++/*
++ * Initialize the perf_counter context in task_struct
++ */
++void perf_counter_init_task(struct task_struct *child)
++{
++	struct perf_counter_context *child_ctx, *parent_ctx;
++	struct perf_counter *counter;
++	struct task_struct *parent = current;
++
++	child_ctx  =  &child->perf_counter_ctx;
++	parent_ctx = &parent->perf_counter_ctx;
++
++	__perf_counter_init_context(child_ctx, child);
++
++	/*
++	 * This is executed from the parent task context, so inherit
++	 * counters that have been marked for cloning:
++	 */
++
++	if (likely(!parent_ctx->nr_counters))
++		return;
++
++	/*
++	 * Lock the parent list. No need to lock the child - not PID
++	 * hashed yet and not running, so nobody can access it.
++	 */
++	mutex_lock(&parent_ctx->mutex);
++
++	/*
++	 * We dont have to disable NMIs - we are only looking at
++	 * the list, not manipulating it:
++	 */
++	list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
++		if (!counter->hw_event.inherit)
++			continue;
++
++		if (inherit_group(counter, parent,
++				  parent_ctx, child, child_ctx))
++			break;
++	}
++
++	mutex_unlock(&parent_ctx->mutex);
++}
++
++static void __cpuinit perf_counter_init_cpu(int cpu)
++{
++	struct perf_cpu_context *cpuctx;
++
++	cpuctx = &per_cpu(perf_cpu_context, cpu);
++	__perf_counter_init_context(&cpuctx->ctx, NULL);
++
++	mutex_lock(&perf_resource_mutex);
++	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
++	mutex_unlock(&perf_resource_mutex);
++
++	hw_perf_counter_setup(cpu);
++}
++
++#ifdef CONFIG_HOTPLUG_CPU
++static void __perf_counter_exit_cpu(void *info)
++{
++	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
++	struct perf_counter_context *ctx = &cpuctx->ctx;
++	struct perf_counter *counter, *tmp;
++
++	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
++		__perf_counter_remove_from_context(counter);
++}
++static void perf_counter_exit_cpu(int cpu)
++{
++	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
++	struct perf_counter_context *ctx = &cpuctx->ctx;
++
++	mutex_lock(&ctx->mutex);
++	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
++	mutex_unlock(&ctx->mutex);
++}
++#else
++static inline void perf_counter_exit_cpu(int cpu) { }
++#endif
++
++static int __cpuinit
++perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
++{
++	unsigned int cpu = (long)hcpu;
++
++	switch (action) {
++
++	case CPU_UP_PREPARE:
++	case CPU_UP_PREPARE_FROZEN:
++		perf_counter_init_cpu(cpu);
++		break;
++
++	case CPU_DOWN_PREPARE:
++	case CPU_DOWN_PREPARE_FROZEN:
++		perf_counter_exit_cpu(cpu);
++		break;
++
++	default:
++		break;
++	}
++
++	return NOTIFY_OK;
++}
++
++static struct notifier_block __cpuinitdata perf_cpu_nb = {
++	.notifier_call		= perf_cpu_notify,
++};
++
++static int __init perf_counter_init(void)
++{
++	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
++			(void *)(long)smp_processor_id());
++	register_cpu_notifier(&perf_cpu_nb);
++
++	return 0;
++}
++early_initcall(perf_counter_init);
++
++static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, char *buf)
++{
++	return sprintf(buf, "%d\n", perf_reserved_percpu);
++}
++
++static ssize_t
++perf_set_reserve_percpu(struct sysdev_class *class,
++			const char *buf,
++			size_t count)
++{
++	struct perf_cpu_context *cpuctx;
++	unsigned long val;
++	int err, cpu, mpt;
++
++	err = strict_strtoul(buf, 10, &val);
++	if (err)
++		return err;
++	if (val > perf_max_counters)
++		return -EINVAL;
++
++	mutex_lock(&perf_resource_mutex);
++	perf_reserved_percpu = val;
++	for_each_online_cpu(cpu) {
++		cpuctx = &per_cpu(perf_cpu_context, cpu);
++		spin_lock_irq(&cpuctx->ctx.lock);
++		mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
++			  perf_max_counters - perf_reserved_percpu);
++		cpuctx->max_pertask = mpt;
++		spin_unlock_irq(&cpuctx->ctx.lock);
++	}
++	mutex_unlock(&perf_resource_mutex);
++
++	return count;
++}
++
++static ssize_t perf_show_overcommit(struct sysdev_class *class, char *buf)
++{
++	return sprintf(buf, "%d\n", perf_overcommit);
++}
++
++static ssize_t
++perf_set_overcommit(struct sysdev_class *class, const char *buf, size_t count)
++{
++	unsigned long val;
++	int err;
++
++	err = strict_strtoul(buf, 10, &val);
++	if (err)
++		return err;
++	if (val > 1)
++		return -EINVAL;
++
++	mutex_lock(&perf_resource_mutex);
++	perf_overcommit = val;
++	mutex_unlock(&perf_resource_mutex);
++
++	return count;
++}
++
++static SYSDEV_CLASS_ATTR(
++				reserve_percpu,
++				0644,
++				perf_show_reserve_percpu,
++				perf_set_reserve_percpu
++			);
++
++static SYSDEV_CLASS_ATTR(
++				overcommit,
++				0644,
++				perf_show_overcommit,
++				perf_set_overcommit
++			);
++
++static struct attribute *perfclass_attrs[] = {
++	&attr_reserve_percpu.attr,
++	&attr_overcommit.attr,
++	NULL
++};
++
++static struct attribute_group perfclass_attr_group = {
++	.attrs			= perfclass_attrs,
++	.name			= "perf_counters",
++};
++
++static int __init perf_counter_sysfs_init(void)
++{
++	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
++				  &perfclass_attr_group);
++}
++device_initcall(perf_counter_sysfs_init);
+Index: linux-2.6-tip/kernel/posix-cpu-timers.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/posix-cpu-timers.c
++++ linux-2.6-tip/kernel/posix-cpu-timers.c
+@@ -558,7 +558,7 @@ static void arm_timer(struct k_itimer *t
+ 		p->cpu_timers : p->signal->cpu_timers);
+ 	head += CPUCLOCK_WHICH(timer->it_clock);
+ 
+-	BUG_ON(!irqs_disabled());
++	BUG_ON_NONRT(!irqs_disabled());
+ 	spin_lock(&p->sighand->siglock);
+ 
+ 	listpos = head;
+@@ -746,7 +746,7 @@ int posix_cpu_timer_set(struct k_itimer 
+ 	/*
+ 	 * Disarm any old timer after extracting its expiry time.
+ 	 */
+-	BUG_ON(!irqs_disabled());
++	BUG_ON_NONRT(!irqs_disabled());
+ 
+ 	ret = 0;
+ 	spin_lock(&p->sighand->siglock);
+@@ -1371,7 +1371,8 @@ static inline int fastpath_timer_check(s
+ 		if (task_cputime_expired(&group_sample, &sig->cputime_expires))
+ 			return 1;
+ 	}
+-	return 0;
++
++	return sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY;
+ }
+ 
+ /*
+@@ -1379,12 +1380,11 @@ static inline int fastpath_timer_check(s
+  * already updated our counts.  We need to check if any timers fire now.
+  * Interrupts are disabled.
+  */
+-void run_posix_cpu_timers(struct task_struct *tsk)
++void __run_posix_cpu_timers(struct task_struct *tsk)
+ {
+ 	LIST_HEAD(firing);
+ 	struct k_itimer *timer, *next;
+ 
+-	BUG_ON(!irqs_disabled());
+ 
+ 	/*
+ 	 * The fast path checks that there are no expired thread or thread
+@@ -1436,6 +1436,177 @@ void run_posix_cpu_timers(struct task_st
+ 	}
+ }
+ 
++#include <linux/kthread.h>
++#include <linux/cpu.h>
++DEFINE_PER_CPU(struct task_struct *, posix_timer_task);
++DEFINE_PER_CPU(struct task_struct *, posix_timer_tasklist);
++
++static int posix_cpu_timers_thread(void *data)
++{
++	int cpu = (long)data;
++
++	BUG_ON(per_cpu(posix_timer_task,cpu) != current);
++
++	while (!kthread_should_stop()) {
++		struct task_struct *tsk = NULL;
++		struct task_struct *next = NULL;
++
++		if (cpu_is_offline(cpu))
++			goto wait_to_die;
++
++		/* grab task list */
++		raw_local_irq_disable();
++		tsk = per_cpu(posix_timer_tasklist, cpu);
++		per_cpu(posix_timer_tasklist, cpu) = NULL;
++		raw_local_irq_enable();
++
++		/* its possible the list is empty, just return */
++		if (!tsk) {
++			set_current_state(TASK_INTERRUPTIBLE);
++			schedule();
++			__set_current_state(TASK_RUNNING);
++			continue;
++		}
++
++		/* Process task list */
++		while (1) {
++			/* save next */
++			next = tsk->posix_timer_list;
++
++			/* run the task timers, clear its ptr and
++			 * unreference it
++			 */
++			__run_posix_cpu_timers(tsk);
++			tsk->posix_timer_list = NULL;
++			put_task_struct(tsk);
++
++			/* check if this is the last on the list */
++			if (next == tsk)
++				break;
++			tsk = next;
++		}
++	}
++	return 0;
++
++wait_to_die:
++	/* Wait for kthread_stop */
++	set_current_state(TASK_INTERRUPTIBLE);
++	while (!kthread_should_stop()) {
++		schedule();
++		set_current_state(TASK_INTERRUPTIBLE);
++	}
++	__set_current_state(TASK_RUNNING);
++	return 0;
++}
++
++static inline int __fastpath_timer_check(struct task_struct *tsk)
++{
++	/* tsk == current, ensure it is safe to use ->signal/sighand */
++	if (unlikely(tsk->exit_state))
++		return 0;
++
++	if (!task_cputime_zero(&tsk->cputime_expires))
++			return 1;
++
++	if (!task_cputime_zero(&tsk->signal->cputime_expires))
++			return 1;
++
++	return 0;
++}
++
++void run_posix_cpu_timers(struct task_struct *tsk)
++{
++	unsigned long cpu = smp_processor_id();
++	struct task_struct *tasklist;
++
++	BUG_ON(!irqs_disabled());
++	if(!per_cpu(posix_timer_task, cpu))
++		return;
++	/* get per-cpu references */
++	tasklist = per_cpu(posix_timer_tasklist, cpu);
++
++	/* check to see if we're already queued */
++	if (!tsk->posix_timer_list && __fastpath_timer_check(tsk)) {
++		get_task_struct(tsk);
++		if (tasklist) {
++			tsk->posix_timer_list = tasklist;
++		} else {
++			/*
++			 * The list is terminated by a self-pointing
++			 * task_struct
++			 */
++			tsk->posix_timer_list = tsk;
++		}
++		per_cpu(posix_timer_tasklist, cpu) = tsk;
++
++		wake_up_process(per_cpu(posix_timer_task, cpu));
++	}
++}
++
++/*
++ * posix_cpu_thread_call - callback that gets triggered when a CPU is added.
++ * Here we can start up the necessary migration thread for the new CPU.
++ */
++static int posix_cpu_thread_call(struct notifier_block *nfb,
++				 unsigned long action, void *hcpu)
++{
++	int cpu = (long)hcpu;
++	struct task_struct *p;
++	struct sched_param param;
++
++	switch (action) {
++	case CPU_UP_PREPARE:
++		p = kthread_create(posix_cpu_timers_thread, hcpu,
++					"posixcputmr/%d",cpu);
++		if (IS_ERR(p))
++			return NOTIFY_BAD;
++		p->flags |= PF_NOFREEZE;
++		kthread_bind(p, cpu);
++		/* Must be high prio to avoid getting starved */
++		param.sched_priority = MAX_RT_PRIO-1;
++		sched_setscheduler(p, SCHED_FIFO, &param);
++		per_cpu(posix_timer_task,cpu) = p;
++		break;
++	case CPU_ONLINE:
++		/* Strictly unneccessary, as first user will wake it. */
++		wake_up_process(per_cpu(posix_timer_task,cpu));
++		break;
++#ifdef CONFIG_HOTPLUG_CPU
++	case CPU_UP_CANCELED:
++		/* Unbind it from offline cpu so it can run.  Fall thru. */
++		kthread_bind(per_cpu(posix_timer_task,cpu),
++			     any_online_cpu(cpu_online_map));
++		kthread_stop(per_cpu(posix_timer_task,cpu));
++		per_cpu(posix_timer_task,cpu) = NULL;
++		break;
++	case CPU_DEAD:
++		kthread_stop(per_cpu(posix_timer_task,cpu));
++		per_cpu(posix_timer_task,cpu) = NULL;
++		break;
++#endif
++	}
++	return NOTIFY_OK;
++}
++
++/* Register at highest priority so that task migration (migrate_all_tasks)
++ * happens before everything else.
++ */
++static struct notifier_block __devinitdata posix_cpu_thread_notifier = {
++	.notifier_call = posix_cpu_thread_call,
++	.priority = 10
++};
++
++static int __init posix_cpu_thread_init(void)
++{
++	void *cpu = (void *)(long)smp_processor_id();
++	/* Start one for boot CPU. */
++	posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_UP_PREPARE, cpu);
++	posix_cpu_thread_call(&posix_cpu_thread_notifier, CPU_ONLINE, cpu);
++	register_cpu_notifier(&posix_cpu_thread_notifier);
++	return 0;
++}
++early_initcall(posix_cpu_thread_init);
++
+ /*
+  * Set one of the process-wide special case CPU timers.
+  * The tsk->sighand->siglock must be held by the caller.
+@@ -1701,6 +1872,12 @@ static __init int init_posix_cpu_timers(
+ 		.nsleep = thread_cpu_nsleep,
+ 		.nsleep_restart = thread_cpu_nsleep_restart,
+ 	};
++	unsigned long cpu;
++
++	/* init the per-cpu posix_timer_tasklets */
++	for_each_cpu_mask(cpu, cpu_possible_map) {
++		per_cpu(posix_timer_tasklist, cpu) = NULL;
++	}
+ 
+ 	register_posix_clock(CLOCK_PROCESS_CPUTIME_ID, &process);
+ 	register_posix_clock(CLOCK_THREAD_CPUTIME_ID, &thread);
+Index: linux-2.6-tip/kernel/power/snapshot.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/power/snapshot.c
++++ linux-2.6-tip/kernel/power/snapshot.c
+@@ -486,8 +486,8 @@ static int memory_bm_find_bit(struct mem
+ 
+ static void memory_bm_set_bit(struct memory_bitmap *bm, unsigned long pfn)
+ {
+-	void *addr;
+-	unsigned int bit;
++	unsigned int bit = 0;
++	void *addr = NULL;
+ 	int error;
+ 
+ 	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+@@ -520,8 +520,8 @@ static void memory_bm_clear_bit(struct m
+ 
+ static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
+ {
+-	void *addr;
+-	unsigned int bit;
++	unsigned int bit = 0;
++	void *addr = NULL;
+ 	int error;
+ 
+ 	error = memory_bm_find_bit(bm, pfn, &addr, &bit);
+Index: linux-2.6-tip/kernel/printk.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/printk.c
++++ linux-2.6-tip/kernel/printk.c
+@@ -91,7 +91,7 @@ static int console_locked, console_suspe
+  * It is also used in interesting ways to provide interlocking in
+  * release_console_sem().
+  */
+-static DEFINE_SPINLOCK(logbuf_lock);
++static DEFINE_RAW_SPINLOCK(logbuf_lock);
+ 
+ #define LOG_BUF_MASK (log_buf_len-1)
+ #define LOG_BUF(idx) (log_buf[(idx) & LOG_BUF_MASK])
+@@ -395,9 +395,13 @@ static void __call_console_drivers(unsig
+ 
+ 	for (con = console_drivers; con; con = con->next) {
+ 		if ((con->flags & CON_ENABLED) && con->write &&
+-				(cpu_online(smp_processor_id()) ||
+-				(con->flags & CON_ANYTIME)))
++				console_atomic_safe(con) &&
++				(cpu_online(raw_smp_processor_id()) ||
++				 (con->flags & CON_ANYTIME))) {
++			set_printk_might_sleep(1);
+ 			con->write(con, &LOG_BUF(start), end - start);
++			set_printk_might_sleep(0);
++		}
+ 	}
+ }
+ 
+@@ -511,6 +515,7 @@ static void zap_locks(void)
+ 	spin_lock_init(&logbuf_lock);
+ 	/* And make sure that we print immediately */
+ 	init_MUTEX(&console_sem);
++	zap_rt_locks();
+ }
+ 
+ #if defined(CONFIG_PRINTK_TIME)
+@@ -592,7 +597,8 @@ static inline int can_use_console(unsign
+  * interrupts disabled. It should return with 'lockbuf_lock'
+  * released but interrupts still disabled.
+  */
+-static int acquire_console_semaphore_for_printk(unsigned int cpu)
++static int acquire_console_semaphore_for_printk(unsigned int cpu,
++						unsigned long flags)
+ {
+ 	int retval = 0;
+ 
+@@ -613,6 +619,8 @@ static int acquire_console_semaphore_for
+ 	}
+ 	printk_cpu = UINT_MAX;
+ 	spin_unlock(&logbuf_lock);
++	lockdep_on();
++	local_irq_restore(flags);
+ 	return retval;
+ }
+ static const char recursion_bug_msg [] =
+@@ -634,7 +642,7 @@ asmlinkage int vprintk(const char *fmt, 
+ 	preempt_disable();
+ 	/* This stops the holder of console_sem just where we want him */
+ 	raw_local_irq_save(flags);
+-	this_cpu = smp_processor_id();
++	this_cpu = raw_smp_processor_id();
+ 
+ 	/*
+ 	 * Ouch, printk recursed into itself!
+@@ -649,7 +657,8 @@ asmlinkage int vprintk(const char *fmt, 
+ 		 */
+ 		if (!oops_in_progress) {
+ 			recursion_bug = 1;
+-			goto out_restore_irqs;
++			raw_local_irq_restore(flags);
++			goto out;
+ 		}
+ 		zap_locks();
+ 	}
+@@ -657,6 +666,7 @@ asmlinkage int vprintk(const char *fmt, 
+ 	lockdep_off();
+ 	spin_lock(&logbuf_lock);
+ 	printk_cpu = this_cpu;
++	preempt_enable();
+ 
+ 	if (recursion_bug) {
+ 		recursion_bug = 0;
+@@ -726,14 +736,10 @@ asmlinkage int vprintk(const char *fmt, 
+ 	 * will release 'logbuf_lock' regardless of whether it
+ 	 * actually gets the semaphore or not.
+ 	 */
+-	if (acquire_console_semaphore_for_printk(this_cpu))
++	if (acquire_console_semaphore_for_printk(this_cpu, flags))
+ 		release_console_sem();
+ 
+-	lockdep_on();
+-out_restore_irqs:
+-	raw_local_irq_restore(flags);
+-
+-	preempt_enable();
++out:
+ 	return printed_len;
+ }
+ EXPORT_SYMBOL(printk);
+@@ -996,15 +1002,35 @@ void release_console_sem(void)
+ 		_con_start = con_start;
+ 		_log_end = log_end;
+ 		con_start = log_end;		/* Flush */
++		/*
++		 * on PREEMPT_RT, call console drivers with
++		 * interrupts enabled (if printk was called
++		 * with interrupts disabled):
++		 */
++#ifdef CONFIG_PREEMPT_RT
++		spin_unlock_irqrestore(&logbuf_lock, flags);
++#else
+ 		spin_unlock(&logbuf_lock);
+ 		stop_critical_timings();	/* don't trace print latency */
++#endif
+ 		call_console_drivers(_con_start, _log_end);
+ 		start_critical_timings();
++#ifndef CONFIG_PREEMPT_RT
+ 		local_irq_restore(flags);
++#endif
+ 	}
+ 	console_locked = 0;
+-	up(&console_sem);
+ 	spin_unlock_irqrestore(&logbuf_lock, flags);
++	up(&console_sem);
++	/*
++	 * On PREEMPT_RT kernels __wake_up may sleep, so wake syslogd
++	 * up only if we are in a preemptible section. We normally dont
++	 * printk from non-preemptible sections so this is for the emergency
++	 * case only.
++	 */
++#ifdef CONFIG_PREEMPT_RT
++	if (!in_atomic() && !irqs_disabled())
++#endif
+ 	if (wake_klogd)
+ 		wake_up_klogd();
+ }
+@@ -1280,6 +1306,23 @@ int printk_ratelimit(void)
+ }
+ EXPORT_SYMBOL(printk_ratelimit);
+ 
++static DEFINE_RAW_SPINLOCK(warn_lock);
++
++void __WARN_ON(const char *func, const char *file, const int line)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&warn_lock, flags);
++	printk("%s/%d[CPU#%d]: BUG in %s at %s:%d\n",
++		current->comm, current->pid, raw_smp_processor_id(),
++		func, file, line);
++	dump_stack();
++	spin_unlock_irqrestore(&warn_lock, flags);
++}
++
++EXPORT_SYMBOL(__WARN_ON);
++
++
+ /**
+  * printk_timed_ratelimit - caller-controlled printk ratelimiting
+  * @caller_jiffies: pointer to caller's state
+@@ -1292,8 +1335,11 @@ EXPORT_SYMBOL(printk_ratelimit);
+ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+ 			unsigned int interval_msecs)
+ {
+-	if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) {
+-		*caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs);
++	if (*caller_jiffies == 0
++			|| !time_in_range(jiffies, *caller_jiffies,
++					*caller_jiffies
++					+ msecs_to_jiffies(interval_msecs))) {
++		*caller_jiffies = jiffies;
+ 		return true;
+ 	}
+ 	return false;
+Index: linux-2.6-tip/kernel/profile.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/profile.c
++++ linux-2.6-tip/kernel/profile.c
+@@ -263,6 +263,7 @@ EXPORT_SYMBOL_GPL(unregister_timer_hook)
+  *
+  * -- wli
+  */
++#ifdef CONFIG_PROC_FS
+ static void __profile_flip_buffers(void *unused)
+ {
+ 	int cpu = smp_processor_id();
+@@ -308,57 +309,6 @@ static void profile_discard_flip_buffers
+ 	mutex_unlock(&profile_flip_mutex);
+ }
+ 
+-void profile_hits(int type, void *__pc, unsigned int nr_hits)
+-{
+-	unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
+-	int i, j, cpu;
+-	struct profile_hit *hits;
+-
+-	if (prof_on != type || !prof_buffer)
+-		return;
+-	pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
+-	i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
+-	secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
+-	cpu = get_cpu();
+-	hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
+-	if (!hits) {
+-		put_cpu();
+-		return;
+-	}
+-	/*
+-	 * We buffer the global profiler buffer into a per-CPU
+-	 * queue and thus reduce the number of global (and possibly
+-	 * NUMA-alien) accesses. The write-queue is self-coalescing:
+-	 */
+-	local_irq_save(flags);
+-	do {
+-		for (j = 0; j < PROFILE_GRPSZ; ++j) {
+-			if (hits[i + j].pc == pc) {
+-				hits[i + j].hits += nr_hits;
+-				goto out;
+-			} else if (!hits[i + j].hits) {
+-				hits[i + j].pc = pc;
+-				hits[i + j].hits = nr_hits;
+-				goto out;
+-			}
+-		}
+-		i = (i + secondary) & (NR_PROFILE_HIT - 1);
+-	} while (i != primary);
+-
+-	/*
+-	 * Add the current hit(s) and flush the write-queue out
+-	 * to the global buffer:
+-	 */
+-	atomic_add(nr_hits, &prof_buffer[pc]);
+-	for (i = 0; i < NR_PROFILE_HIT; ++i) {
+-		atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
+-		hits[i].pc = hits[i].hits = 0;
+-	}
+-out:
+-	local_irq_restore(flags);
+-	put_cpu();
+-}
+-
+ static int __cpuinit profile_cpu_callback(struct notifier_block *info,
+ 					unsigned long action, void *__cpu)
+ {
+@@ -417,6 +367,60 @@ out_free:
+ 	}
+ 	return NOTIFY_OK;
+ }
++#endif /* CONFIG_PROC_FS */
++
++void profile_hits(int type, void *__pc, unsigned int nr_hits)
++{
++	unsigned long primary, secondary, flags, pc = (unsigned long)__pc;
++	int i, j, cpu;
++	struct profile_hit *hits;
++
++	if (prof_on != type || !prof_buffer)
++		return;
++	pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1);
++	i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
++	secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT;
++	cpu = get_cpu();
++	hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)];
++	if (!hits) {
++		put_cpu();
++		return;
++	}
++	/*
++	 * We buffer the global profiler buffer into a per-CPU
++	 * queue and thus reduce the number of global (and possibly
++	 * NUMA-alien) accesses. The write-queue is self-coalescing:
++	 */
++	local_irq_save(flags);
++	do {
++		for (j = 0; j < PROFILE_GRPSZ; ++j) {
++			if (hits[i + j].pc == pc) {
++				hits[i + j].hits += nr_hits;
++				goto out;
++			} else if (!hits[i + j].hits) {
++				hits[i + j].pc = pc;
++				hits[i + j].hits = nr_hits;
++				goto out;
++			}
++		}
++		i = (i + secondary) & (NR_PROFILE_HIT - 1);
++	} while (i != primary);
++
++	/*
++	 * Add the current hit(s) and flush the write-queue out
++	 * to the global buffer:
++	 */
++	atomic_add(nr_hits, &prof_buffer[pc]);
++	for (i = 0; i < NR_PROFILE_HIT; ++i) {
++		atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]);
++		hits[i].pc = hits[i].hits = 0;
++	}
++out:
++	local_irq_restore(flags);
++	put_cpu();
++}
++
++
+ #else /* !CONFIG_SMP */
+ #define profile_flip_buffers()		do { } while (0)
+ #define profile_discard_flip_buffers()	do { } while (0)
+@@ -610,7 +614,7 @@ out_cleanup:
+ #define create_hash_tables()			({ 0; })
+ #endif
+ 
+-int __ref create_proc_profile(void) /* false positive from hotcpu_notifier */
++int create_proc_profile(void)
+ {
+ 	struct proc_dir_entry *entry;
+ 
+Index: linux-2.6-tip/kernel/ptrace.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/ptrace.c
++++ linux-2.6-tip/kernel/ptrace.c
+@@ -613,8 +613,6 @@ SYSCALL_DEFINE4(ptrace, long, request, l
+ 		goto out_put_task_struct;
+ 
+ 	ret = arch_ptrace(child, request, addr, data);
+-	if (ret < 0)
+-		goto out_put_task_struct;
+ 
+  out_put_task_struct:
+ 	put_task_struct(child);
+Index: linux-2.6-tip/kernel/rcuclassic.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/rcuclassic.c
++++ linux-2.6-tip/kernel/rcuclassic.c
+@@ -65,6 +65,7 @@ static struct rcu_ctrlblk rcu_ctrlblk = 
+ 	.lock = __SPIN_LOCK_UNLOCKED(&rcu_ctrlblk.lock),
+ 	.cpumask = CPU_BITS_NONE,
+ };
++
+ static struct rcu_ctrlblk rcu_bh_ctrlblk = {
+ 	.cur = -300,
+ 	.completed = -300,
+@@ -73,8 +74,26 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk
+ 	.cpumask = CPU_BITS_NONE,
+ };
+ 
+-DEFINE_PER_CPU(struct rcu_data, rcu_data) = { 0L };
+-DEFINE_PER_CPU(struct rcu_data, rcu_bh_data) = { 0L };
++static DEFINE_PER_CPU(struct rcu_data, rcu_data);
++static DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
++
++/*
++ * Increment the quiescent state counter.
++ * The counter is a bit degenerated: We do not need to know
++ * how many quiescent states passed, just if there was at least
++ * one since the start of the grace period. Thus just a flag.
++ */
++void rcu_qsctr_inc(int cpu)
++{
++	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
++	rdp->passed_quiesc = 1;
++}
++
++void rcu_bh_qsctr_inc(int cpu)
++{
++	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
++	rdp->passed_quiesc = 1;
++}
+ 
+ static int blimit = 10;
+ static int qhimark = 10000;
+Index: linux-2.6-tip/kernel/rcupdate.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/rcupdate.c
++++ linux-2.6-tip/kernel/rcupdate.c
+@@ -122,6 +122,8 @@ static void rcu_barrier_func(void *type)
+ 	}
+ }
+ 
++static inline void wait_migrated_callbacks(void);
++
+ /*
+  * Orchestrate the specified type of RCU barrier, waiting for all
+  * RCU callbacks of the specified type to complete.
+@@ -147,6 +149,7 @@ static void _rcu_barrier(enum rcu_barrie
+ 		complete(&rcu_barrier_completion);
+ 	wait_for_completion(&rcu_barrier_completion);
+ 	mutex_unlock(&rcu_barrier_mutex);
++	wait_migrated_callbacks();
+ }
+ 
+ /**
+@@ -176,9 +179,50 @@ void rcu_barrier_sched(void)
+ }
+ EXPORT_SYMBOL_GPL(rcu_barrier_sched);
+ 
++static atomic_t rcu_migrate_type_count = ATOMIC_INIT(0);
++static struct rcu_head rcu_migrate_head[3];
++static DECLARE_WAIT_QUEUE_HEAD(rcu_migrate_wq);
++
++static void rcu_migrate_callback(struct rcu_head *notused)
++{
++	if (atomic_dec_and_test(&rcu_migrate_type_count))
++		wake_up(&rcu_migrate_wq);
++}
++
++static inline void wait_migrated_callbacks(void)
++{
++	wait_event(rcu_migrate_wq, !atomic_read(&rcu_migrate_type_count));
++}
++
++static int __cpuinit rcu_barrier_cpu_hotplug(struct notifier_block *self,
++		unsigned long action, void *hcpu)
++{
++	if (action == CPU_DYING) {
++		/*
++		 * preempt_disable() in on_each_cpu() prevents stop_machine(),
++		 * so when "on_each_cpu(rcu_barrier_func, (void *)type, 1);"
++		 * returns, all online cpus have queued rcu_barrier_func(),
++		 * and the dead cpu(if it exist) queues rcu_migrate_callback()s.
++		 *
++		 * These callbacks ensure _rcu_barrier() waits for all
++		 * RCU callbacks of the specified type to complete.
++		 */
++		atomic_set(&rcu_migrate_type_count, 3);
++		call_rcu_bh(rcu_migrate_head, rcu_migrate_callback);
++		call_rcu_sched(rcu_migrate_head + 1, rcu_migrate_callback);
++		call_rcu(rcu_migrate_head + 2, rcu_migrate_callback);
++	} else if (action == CPU_POST_DEAD) {
++		/* rcu_migrate_head is protected by cpu_add_remove_lock */
++		wait_migrated_callbacks();
++	}
++
++	return NOTIFY_OK;
++}
++
+ void __init rcu_init(void)
+ {
+ 	__rcu_init();
++	hotcpu_notifier(rcu_barrier_cpu_hotplug, 0);
+ }
+ 
+ void rcu_scheduler_starting(void)
+Index: linux-2.6-tip/kernel/rcupreempt.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/rcupreempt.c
++++ linux-2.6-tip/kernel/rcupreempt.c
+@@ -71,7 +71,7 @@
+  */
+ #define GP_STAGES    2
+ struct rcu_data {
+-	spinlock_t	lock;		/* Protect rcu_data fields. */
++	raw_spinlock_t	lock;		/* Protect rcu_data fields. */
+ 	long		completed;	/* Number of last completed batch. */
+ 	int		waitlistcount;
+ 	struct rcu_head *nextlist;
+@@ -138,7 +138,7 @@ enum rcu_sched_sleep_states {
+ };
+ 
+ struct rcu_ctrlblk {
+-	spinlock_t	fliplock;	/* Protect state-machine transitions. */
++	raw_spinlock_t	fliplock;	/* Protect state-machine transitions. */
+ 	long		completed;	/* Number of last completed batch. */
+ 	enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
+ 							the rcu state machine */
+@@ -147,9 +147,53 @@ struct rcu_ctrlblk {
+ 	wait_queue_head_t sched_wq;	/* Place for rcu_sched to sleep. */
+ };
+ 
++struct rcu_dyntick_sched {
++	int dynticks;
++	int dynticks_snap;
++	int sched_qs;
++	int sched_qs_snap;
++	int sched_dynticks_snap;
++};
++
++static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
++	.dynticks = 1,
++};
++
++void rcu_qsctr_inc(int cpu)
++{
++	struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
++
++	rdssp->sched_qs++;
++}
++
++#ifdef CONFIG_NO_HZ
++
++void rcu_enter_nohz(void)
++{
++	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
++
++	smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
++	__get_cpu_var(rcu_dyntick_sched).dynticks++;
++	WARN_ON_RATELIMIT(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1, &rs);
++}
++
++void rcu_exit_nohz(void)
++{
++	static DEFINE_RATELIMIT_STATE(rs, 10 * HZ, 1);
++
++	__get_cpu_var(rcu_dyntick_sched).dynticks++;
++	smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
++	WARN_ON_RATELIMIT(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1),
++				&rs);
++}
++
++#endif /* CONFIG_NO_HZ */
++
++
+ static DEFINE_PER_CPU(struct rcu_data, rcu_data);
++
+ static struct rcu_ctrlblk rcu_ctrlblk = {
+-	.fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
++	.fliplock = RAW_SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
+ 	.completed = 0,
+ 	.rcu_try_flip_state = rcu_try_flip_idle_state,
+ 	.schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
+@@ -427,10 +471,6 @@ static void __rcu_advance_callbacks(stru
+ 	}
+ }
+ 
+-DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
+-	.dynticks = 1,
+-};
+-
+ #ifdef CONFIG_NO_HZ
+ static DEFINE_PER_CPU(int, rcu_update_flag);
+ 
+Index: linux-2.6-tip/kernel/rcutree.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/rcutree.c
++++ linux-2.6-tip/kernel/rcutree.c
+@@ -78,6 +78,26 @@ DEFINE_PER_CPU(struct rcu_data, rcu_data
+ struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
+ DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
+ 
++/*
++ * Increment the quiescent state counter.
++ * The counter is a bit degenerated: We do not need to know
++ * how many quiescent states passed, just if there was at least
++ * one since the start of the grace period. Thus just a flag.
++ */
++void rcu_qsctr_inc(int cpu)
++{
++	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
++	rdp->passed_quiesc = 1;
++	rdp->passed_quiesc_completed = rdp->completed;
++}
++
++void rcu_bh_qsctr_inc(int cpu)
++{
++	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
++	rdp->passed_quiesc = 1;
++	rdp->passed_quiesc_completed = rdp->completed;
++}
++
+ #ifdef CONFIG_NO_HZ
+ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
+ 	.dynticks_nesting = 1,
+Index: linux-2.6-tip/kernel/rcutree.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/rcutree.h
+@@ -0,0 +1,10 @@
++
++/*
++ * RCU implementation internal declarations:
++ */
++extern struct rcu_state rcu_state;
++DECLARE_PER_CPU(struct rcu_data, rcu_data);
++
++extern struct rcu_state rcu_bh_state;
++DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
++
+Index: linux-2.6-tip/kernel/rcutree_trace.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/rcutree_trace.c
++++ linux-2.6-tip/kernel/rcutree_trace.c
+@@ -43,6 +43,8 @@
+ #include <linux/debugfs.h>
+ #include <linux/seq_file.h>
+ 
++#include "rcutree.h"
++
+ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
+ {
+ 	if (!rdp->beenonline)
+Index: linux-2.6-tip/kernel/relay.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/relay.c
++++ linux-2.6-tip/kernel/relay.c
+@@ -343,6 +343,10 @@ static void wakeup_readers(unsigned long
+ {
+ 	struct rchan_buf *buf = (struct rchan_buf *)data;
+ 	wake_up_interruptible(&buf->read_wait);
++	/*
++	 * Stupid polling for now:
++	 */
++	mod_timer(&buf->timer, jiffies + 1);
+ }
+ 
+ /**
+@@ -360,6 +364,7 @@ static void __relay_reset(struct rchan_b
+ 		init_waitqueue_head(&buf->read_wait);
+ 		kref_init(&buf->kref);
+ 		setup_timer(&buf->timer, wakeup_readers, (unsigned long)buf);
++		mod_timer(&buf->timer, jiffies + 1);
+ 	} else
+ 		del_timer_sync(&buf->timer);
+ 
+@@ -677,9 +682,7 @@ int relay_late_setup_files(struct rchan 
+ 	 */
+ 	for_each_online_cpu(i) {
+ 		if (unlikely(!chan->buf[i])) {
+-			printk(KERN_ERR "relay_late_setup_files: CPU %u "
+-					"has no buffer, it must have!\n", i);
+-			BUG();
++			WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n");
+ 			err = -EINVAL;
+ 			break;
+ 		}
+@@ -742,15 +745,6 @@ size_t relay_switch_subbuf(struct rchan_
+ 		else
+ 			buf->early_bytes += buf->chan->subbuf_size -
+ 					    buf->padding[old_subbuf];
+-		smp_mb();
+-		if (waitqueue_active(&buf->read_wait))
+-			/*
+-			 * Calling wake_up_interruptible() from here
+-			 * will deadlock if we happen to be logging
+-			 * from the scheduler (trying to re-grab
+-			 * rq->lock), so defer it.
+-			 */
+-			__mod_timer(&buf->timer, jiffies + 1);
+ 	}
+ 
+ 	old = buf->data;
+Index: linux-2.6-tip/kernel/rtmutex.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/rtmutex.c
++++ linux-2.6-tip/kernel/rtmutex.c
+@@ -8,12 +8,20 @@
+  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
+  *  Copyright (C) 2006 Esben Nielsen
+  *
++ * Adaptive Spinlocks:
++ *  Copyright (C) 2008 Novell, Inc., Gregory Haskins, Sven Dietrich,
++ *                                   and Peter Morreale,
++ * Adaptive Spinlocks simplification:
++ *  Copyright (C) 2008 Red Hat, Inc., Steven Rostedt <srostedt@redhat.com>
++ *
+  *  See Documentation/rt-mutex-design.txt for details.
+  */
+ #include <linux/spinlock.h>
+ #include <linux/module.h>
+ #include <linux/sched.h>
+ #include <linux/timer.h>
++#include <linux/hardirq.h>
++#include <linux/semaphore.h>
+ 
+ #include "rtmutex_common.h"
+ 
+@@ -97,6 +105,22 @@ static inline void mark_rt_mutex_waiters
+ }
+ #endif
+ 
++int pi_initialized;
++
++/*
++ * we initialize the wait_list runtime. (Could be done build-time and/or
++ * boot-time.)
++ */
++static inline void init_lists(struct rt_mutex *lock)
++{
++	if (unlikely(!lock->wait_list.prio_list.prev)) {
++		plist_head_init(&lock->wait_list, &lock->wait_lock);
++#ifdef CONFIG_DEBUG_RT_MUTEXES
++		pi_initialized++;
++#endif
++	}
++}
++
+ /*
+  * Calculate task priority from the waiter list priority
+  *
+@@ -253,13 +277,13 @@ static int rt_mutex_adjust_prio_chain(st
+ 	plist_add(&waiter->list_entry, &lock->wait_list);
+ 
+ 	/* Release the task */
+-	spin_unlock_irqrestore(&task->pi_lock, flags);
++	spin_unlock(&task->pi_lock);
+ 	put_task_struct(task);
+ 
+ 	/* Grab the next task */
+ 	task = rt_mutex_owner(lock);
+ 	get_task_struct(task);
+-	spin_lock_irqsave(&task->pi_lock, flags);
++	spin_lock(&task->pi_lock);
+ 
+ 	if (waiter == rt_mutex_top_waiter(lock)) {
+ 		/* Boost the owner */
+@@ -277,10 +301,10 @@ static int rt_mutex_adjust_prio_chain(st
+ 		__rt_mutex_adjust_prio(task);
+ 	}
+ 
+-	spin_unlock_irqrestore(&task->pi_lock, flags);
++	spin_unlock(&task->pi_lock);
+ 
+ 	top_waiter = rt_mutex_top_waiter(lock);
+-	spin_unlock(&lock->wait_lock);
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+ 	if (!detect_deadlock && waiter != top_waiter)
+ 		goto out_put_task;
+@@ -300,21 +324,21 @@ static int rt_mutex_adjust_prio_chain(st
+  * assigned pending owner [which might not have taken the
+  * lock yet]:
+  */
+-static inline int try_to_steal_lock(struct rt_mutex *lock)
++static inline int try_to_steal_lock(struct rt_mutex *lock,
++				    struct task_struct *task, int mode)
+ {
+ 	struct task_struct *pendowner = rt_mutex_owner(lock);
+ 	struct rt_mutex_waiter *next;
+-	unsigned long flags;
+ 
+ 	if (!rt_mutex_owner_pending(lock))
+ 		return 0;
+ 
+-	if (pendowner == current)
++	if (pendowner == task)
+ 		return 1;
+ 
+-	spin_lock_irqsave(&pendowner->pi_lock, flags);
+-	if (current->prio >= pendowner->prio) {
+-		spin_unlock_irqrestore(&pendowner->pi_lock, flags);
++	spin_lock(&pendowner->pi_lock);
++	if (!lock_is_stealable(task, pendowner, mode)) {
++		spin_unlock(&pendowner->pi_lock);
+ 		return 0;
+ 	}
+ 
+@@ -324,7 +348,7 @@ static inline int try_to_steal_lock(stru
+ 	 * priority.
+ 	 */
+ 	if (likely(!rt_mutex_has_waiters(lock))) {
+-		spin_unlock_irqrestore(&pendowner->pi_lock, flags);
++		spin_unlock(&pendowner->pi_lock);
+ 		return 1;
+ 	}
+ 
+@@ -332,27 +356,27 @@ static inline int try_to_steal_lock(stru
+ 	next = rt_mutex_top_waiter(lock);
+ 	plist_del(&next->pi_list_entry, &pendowner->pi_waiters);
+ 	__rt_mutex_adjust_prio(pendowner);
+-	spin_unlock_irqrestore(&pendowner->pi_lock, flags);
++	spin_unlock(&pendowner->pi_lock);
+ 
+ 	/*
+ 	 * We are going to steal the lock and a waiter was
+ 	 * enqueued on the pending owners pi_waiters queue. So
+ 	 * we have to enqueue this waiter into
+-	 * current->pi_waiters list. This covers the case,
+-	 * where current is boosted because it holds another
++	 * task->pi_waiters list. This covers the case,
++	 * where task is boosted because it holds another
+ 	 * lock and gets unboosted because the booster is
+ 	 * interrupted, so we would delay a waiter with higher
+-	 * priority as current->normal_prio.
++	 * priority as task->normal_prio.
+ 	 *
+ 	 * Note: in the rare case of a SCHED_OTHER task changing
+ 	 * its priority and thus stealing the lock, next->task
+-	 * might be current:
++	 * might be task:
+ 	 */
+-	if (likely(next->task != current)) {
+-		spin_lock_irqsave(&current->pi_lock, flags);
+-		plist_add(&next->pi_list_entry, &current->pi_waiters);
+-		__rt_mutex_adjust_prio(current);
+-		spin_unlock_irqrestore(&current->pi_lock, flags);
++	if (likely(next->task != task)) {
++		spin_lock(&task->pi_lock);
++		plist_add(&next->pi_list_entry, &task->pi_waiters);
++		__rt_mutex_adjust_prio(task);
++		spin_unlock(&task->pi_lock);
+ 	}
+ 	return 1;
+ }
+@@ -366,7 +390,7 @@ static inline int try_to_steal_lock(stru
+  *
+  * Must be called with lock->wait_lock held.
+  */
+-static int try_to_take_rt_mutex(struct rt_mutex *lock)
++static int do_try_to_take_rt_mutex(struct rt_mutex *lock, int mode)
+ {
+ 	/*
+ 	 * We have to be careful here if the atomic speedups are
+@@ -389,7 +413,7 @@ static int try_to_take_rt_mutex(struct r
+ 	 */
+ 	mark_rt_mutex_waiters(lock);
+ 
+-	if (rt_mutex_owner(lock) && !try_to_steal_lock(lock))
++	if (rt_mutex_owner(lock) && !try_to_steal_lock(lock, current, mode))
+ 		return 0;
+ 
+ 	/* We got the lock. */
+@@ -402,6 +426,11 @@ static int try_to_take_rt_mutex(struct r
+ 	return 1;
+ }
+ 
++static inline int try_to_take_rt_mutex(struct rt_mutex *lock)
++{
++	return do_try_to_take_rt_mutex(lock, STEAL_NORMAL);
++}
++
+ /*
+  * Task blocks on lock.
+  *
+@@ -411,38 +440,38 @@ static int try_to_take_rt_mutex(struct r
+  */
+ static int task_blocks_on_rt_mutex(struct rt_mutex *lock,
+ 				   struct rt_mutex_waiter *waiter,
+-				   int detect_deadlock)
++				   struct task_struct *task,
++				   int detect_deadlock, unsigned long flags)
+ {
+ 	struct task_struct *owner = rt_mutex_owner(lock);
+ 	struct rt_mutex_waiter *top_waiter = waiter;
+-	unsigned long flags;
+ 	int chain_walk = 0, res;
+ 
+-	spin_lock_irqsave(&current->pi_lock, flags);
+-	__rt_mutex_adjust_prio(current);
+-	waiter->task = current;
++	spin_lock(&task->pi_lock);
++	__rt_mutex_adjust_prio(task);
++	waiter->task = task;
+ 	waiter->lock = lock;
+-	plist_node_init(&waiter->list_entry, current->prio);
+-	plist_node_init(&waiter->pi_list_entry, current->prio);
++	plist_node_init(&waiter->list_entry, task->prio);
++	plist_node_init(&waiter->pi_list_entry, task->prio);
+ 
+ 	/* Get the top priority waiter on the lock */
+ 	if (rt_mutex_has_waiters(lock))
+ 		top_waiter = rt_mutex_top_waiter(lock);
+ 	plist_add(&waiter->list_entry, &lock->wait_list);
+ 
+-	current->pi_blocked_on = waiter;
++	task->pi_blocked_on = waiter;
+ 
+-	spin_unlock_irqrestore(&current->pi_lock, flags);
++	spin_unlock(&task->pi_lock);
+ 
+ 	if (waiter == rt_mutex_top_waiter(lock)) {
+-		spin_lock_irqsave(&owner->pi_lock, flags);
++		spin_lock(&owner->pi_lock);
+ 		plist_del(&top_waiter->pi_list_entry, &owner->pi_waiters);
+ 		plist_add(&waiter->pi_list_entry, &owner->pi_waiters);
+ 
+ 		__rt_mutex_adjust_prio(owner);
+ 		if (owner->pi_blocked_on)
+ 			chain_walk = 1;
+-		spin_unlock_irqrestore(&owner->pi_lock, flags);
++		spin_unlock(&owner->pi_lock);
+ 	}
+ 	else if (debug_rt_mutex_detect_deadlock(waiter, detect_deadlock))
+ 		chain_walk = 1;
+@@ -457,12 +486,12 @@ static int task_blocks_on_rt_mutex(struc
+ 	 */
+ 	get_task_struct(owner);
+ 
+-	spin_unlock(&lock->wait_lock);
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+ 	res = rt_mutex_adjust_prio_chain(owner, detect_deadlock, lock, waiter,
+-					 current);
++					 task);
+ 
+-	spin_lock(&lock->wait_lock);
++	spin_lock_irq(&lock->wait_lock);
+ 
+ 	return res;
+ }
+@@ -475,13 +504,13 @@ static int task_blocks_on_rt_mutex(struc
+  *
+  * Called with lock->wait_lock held.
+  */
+-static void wakeup_next_waiter(struct rt_mutex *lock)
++static void wakeup_next_waiter(struct rt_mutex *lock, int savestate)
+ {
+ 	struct rt_mutex_waiter *waiter;
+ 	struct task_struct *pendowner;
+-	unsigned long flags;
++	struct rt_mutex_waiter *next;
+ 
+-	spin_lock_irqsave(&current->pi_lock, flags);
++	spin_lock(&current->pi_lock);
+ 
+ 	waiter = rt_mutex_top_waiter(lock);
+ 	plist_del(&waiter->list_entry, &lock->wait_list);
+@@ -496,9 +525,44 @@ static void wakeup_next_waiter(struct rt
+ 	pendowner = waiter->task;
+ 	waiter->task = NULL;
+ 
++	/*
++	 * Do the wakeup before the ownership change to give any spinning
++	 * waiter grantees a headstart over the other threads that will
++	 * trigger once owner changes.
++	 */
++	if (!savestate)
++		wake_up_process(pendowner);
++	else {
++		/*
++		 * We can skip the actual (expensive) wakeup if the
++		 * waiter is already running, but we have to be careful
++		 * of race conditions because they may be about to sleep.
++		 *
++		 * The waiter-side protocol has the following pattern:
++		 * 1: Set state != RUNNING
++		 * 2: Conditionally sleep if waiter->task != NULL;
++		 *
++		 * And the owner-side has the following:
++		 * A: Set waiter->task = NULL
++		 * B: Conditionally wake if the state != RUNNING
++		 *
++		 * As long as we ensure 1->2 order, and A->B order, we
++		 * will never miss a wakeup.
++		 *
++		 * Therefore, this barrier ensures that waiter->task = NULL
++		 * is visible before we test the pendowner->state.  The
++		 * corresponding barrier is in the sleep logic.
++		 */
++		smp_mb();
++
++		/* If !RUNNING && !RUNNING_MUTEX */
++		if (pendowner->state & ~TASK_RUNNING_MUTEX)
++			wake_up_process_mutex(pendowner);
++	}
++
+ 	rt_mutex_set_owner(lock, pendowner, RT_MUTEX_OWNER_PENDING);
+ 
+-	spin_unlock_irqrestore(&current->pi_lock, flags);
++	spin_unlock(&current->pi_lock);
+ 
+ 	/*
+ 	 * Clear the pi_blocked_on variable and enqueue a possible
+@@ -507,7 +571,13 @@ static void wakeup_next_waiter(struct rt
+ 	 * waiter with higher priority than pending-owner->normal_prio
+ 	 * is blocked on the unboosted (pending) owner.
+ 	 */
+-	spin_lock_irqsave(&pendowner->pi_lock, flags);
++
++	if (rt_mutex_has_waiters(lock))
++		next = rt_mutex_top_waiter(lock);
++	else
++		next = NULL;
++
++	spin_lock(&pendowner->pi_lock);
+ 
+ 	WARN_ON(!pendowner->pi_blocked_on);
+ 	WARN_ON(pendowner->pi_blocked_on != waiter);
+@@ -515,15 +585,10 @@ static void wakeup_next_waiter(struct rt
+ 
+ 	pendowner->pi_blocked_on = NULL;
+ 
+-	if (rt_mutex_has_waiters(lock)) {
+-		struct rt_mutex_waiter *next;
+-
+-		next = rt_mutex_top_waiter(lock);
++	if (next)
+ 		plist_add(&next->pi_list_entry, &pendowner->pi_waiters);
+-	}
+-	spin_unlock_irqrestore(&pendowner->pi_lock, flags);
+ 
+-	wake_up_process(pendowner);
++	spin_unlock(&pendowner->pi_lock);
+ }
+ 
+ /*
+@@ -532,22 +597,22 @@ static void wakeup_next_waiter(struct rt
+  * Must be called with lock->wait_lock held
+  */
+ static void remove_waiter(struct rt_mutex *lock,
+-			  struct rt_mutex_waiter *waiter)
++			  struct rt_mutex_waiter *waiter,
++			  unsigned long flags)
+ {
+ 	int first = (waiter == rt_mutex_top_waiter(lock));
+ 	struct task_struct *owner = rt_mutex_owner(lock);
+-	unsigned long flags;
+ 	int chain_walk = 0;
+ 
+-	spin_lock_irqsave(&current->pi_lock, flags);
++	spin_lock(&current->pi_lock);
+ 	plist_del(&waiter->list_entry, &lock->wait_list);
+ 	waiter->task = NULL;
+ 	current->pi_blocked_on = NULL;
+-	spin_unlock_irqrestore(&current->pi_lock, flags);
++	spin_unlock(&current->pi_lock);
+ 
+ 	if (first && owner != current) {
+ 
+-		spin_lock_irqsave(&owner->pi_lock, flags);
++		spin_lock(&owner->pi_lock);
+ 
+ 		plist_del(&waiter->pi_list_entry, &owner->pi_waiters);
+ 
+@@ -562,7 +627,7 @@ static void remove_waiter(struct rt_mute
+ 		if (owner->pi_blocked_on)
+ 			chain_walk = 1;
+ 
+-		spin_unlock_irqrestore(&owner->pi_lock, flags);
++		spin_unlock(&owner->pi_lock);
+ 	}
+ 
+ 	WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
+@@ -573,11 +638,11 @@ static void remove_waiter(struct rt_mute
+ 	/* gets dropped in rt_mutex_adjust_prio_chain()! */
+ 	get_task_struct(owner);
+ 
+-	spin_unlock(&lock->wait_lock);
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+ 	rt_mutex_adjust_prio_chain(owner, 0, lock, NULL, current);
+ 
+-	spin_lock(&lock->wait_lock);
++	spin_lock_irq(&lock->wait_lock);
+ }
+ 
+ /*
+@@ -598,45 +663,413 @@ void rt_mutex_adjust_pi(struct task_stru
+ 		return;
+ 	}
+ 
+-	spin_unlock_irqrestore(&task->pi_lock, flags);
+-
+ 	/* gets dropped in rt_mutex_adjust_prio_chain()! */
+ 	get_task_struct(task);
++	spin_unlock_irqrestore(&task->pi_lock, flags);
++
+ 	rt_mutex_adjust_prio_chain(task, 0, NULL, NULL, task);
+ }
+ 
+ /*
+- * Slow path lock function:
++ * preemptible spin_lock functions:
+  */
+-static int __sched
+-rt_mutex_slowlock(struct rt_mutex *lock, int state,
+-		  struct hrtimer_sleeper *timeout,
+-		  int detect_deadlock)
++
++#ifdef CONFIG_PREEMPT_RT
++
++static inline void
++rt_spin_lock_fastlock(struct rt_mutex *lock,
++		void  (*slowfn)(struct rt_mutex *lock))
++{
++	/* Temporary HACK! */
++	if (likely(!current->in_printk))
++		might_sleep();
++	else if (in_atomic() || irqs_disabled())
++		/* don't grab locks for printk in atomic */
++		return;
++
++	if (likely(rt_mutex_cmpxchg(lock, NULL, current)))
++		rt_mutex_deadlock_account_lock(lock, current);
++	else
++		slowfn(lock);
++}
++
++static inline void
++rt_spin_lock_fastunlock(struct rt_mutex *lock,
++			void  (*slowfn)(struct rt_mutex *lock))
++{
++	/* Temporary HACK! */
++	if (unlikely(rt_mutex_owner(lock) != current) && current->in_printk)
++		/* don't grab locks for printk in atomic */
++		return;
++
++	if (likely(rt_mutex_cmpxchg(lock, current, NULL)))
++		rt_mutex_deadlock_account_unlock(current);
++	else
++		slowfn(lock);
++}
++
++
++#ifdef CONFIG_SMP
++static int adaptive_wait(struct rt_mutex_waiter *waiter,
++			 struct task_struct *orig_owner)
++{
++	for (;;) {
++
++		/* we are the owner? */
++		if (!waiter->task)
++			return 0;
++
++		/* Owner changed? Then lets update the original */
++		if (orig_owner != rt_mutex_owner(waiter->lock))
++			return 0;
++
++		/* Owner went to bed, so should we */
++		if (!task_is_current(orig_owner))
++			return 1;
++
++		cpu_relax();
++	}
++}
++#else
++static int adaptive_wait(struct rt_mutex_waiter *waiter,
++			 struct task_struct *orig_owner)
++{
++	return 1;
++}
++#endif
++
++/*
++ * The state setting needs to preserve the original state and needs to
++ * take care of non rtmutex wakeups.
++ *
++ * Called with rtmutex->wait_lock held to serialize against rtmutex
++ * wakeups().
++ */
++static inline unsigned long
++rt_set_current_blocked_state(unsigned long saved_state)
++{
++	unsigned long state, block_state;
++
++	/*
++	 * If state is TASK_INTERRUPTIBLE, then we set the state for
++	 * blocking to TASK_INTERRUPTIBLE as well, otherwise we would
++	 * miss real wakeups via wake_up_interruptible(). If such a
++	 * wakeup happens we see the running state and preserve it in
++	 * saved_state. Now we can ignore further wakeups as we will
++	 * return in state running from our "spin" sleep.
++	 */
++	if (saved_state == TASK_INTERRUPTIBLE)
++		block_state = TASK_INTERRUPTIBLE;
++	else
++		block_state = TASK_UNINTERRUPTIBLE;
++
++	state = xchg(&current->state, block_state);
++	/*
++	 * Take care of non rtmutex wakeups. rtmutex wakeups
++	 * or TASK_RUNNING_MUTEX to (UN)INTERRUPTIBLE.
++	 */
++	if (state == TASK_RUNNING)
++		saved_state = TASK_RUNNING;
++
++	return saved_state;
++}
++
++static inline void rt_restore_current_state(unsigned long saved_state)
++{
++	unsigned long state = xchg(&current->state, saved_state);
++
++	if (state == TASK_RUNNING)
++		current->state = TASK_RUNNING;
++}
++
++/*
++ * Slow path lock function spin_lock style: this variant is very
++ * careful not to miss any non-lock wakeups.
++ *
++ * The wakeup side uses wake_up_process_mutex, which, combined with
++ * the xchg code of this function is a transparent sleep/wakeup
++ * mechanism nested within any existing sleep/wakeup mechanism. This
++ * enables the seemless use of arbitrary (blocking) spinlocks within
++ * sleep/wakeup event loops.
++ */
++static void  noinline __sched
++rt_spin_lock_slowlock(struct rt_mutex *lock)
+ {
+ 	struct rt_mutex_waiter waiter;
+-	int ret = 0;
++	unsigned long saved_state, flags;
++	struct task_struct *orig_owner;
+ 
+ 	debug_rt_mutex_init_waiter(&waiter);
+ 	waiter.task = NULL;
+ 
+-	spin_lock(&lock->wait_lock);
++	spin_lock_irqsave(&lock->wait_lock, flags);
++	init_lists(lock);
+ 
+-	/* Try to acquire the lock again: */
+-	if (try_to_take_rt_mutex(lock)) {
+-		spin_unlock(&lock->wait_lock);
+-		return 0;
++	BUG_ON(rt_mutex_owner(lock) == current);
++
++	/*
++	 * Here we save whatever state the task was in originally,
++	 * we'll restore it at the end of the function and we'll take
++	 * any intermediate wakeup into account as well, independently
++	 * of the lock sleep/wakeup mechanism. When we get a real
++	 * wakeup the task->state is TASK_RUNNING and we change
++	 * saved_state accordingly. If we did not get a real wakeup
++	 * then we return with the saved state. We need to be careful
++	 * about original state TASK_INTERRUPTIBLE as well, as we
++	 * could miss a wakeup_interruptible()
++	 */
++	saved_state = rt_set_current_blocked_state(current->state);
++
++	for (;;) {
++		unsigned long saved_flags;
++		int saved_lock_depth = current->lock_depth;
++
++		/* Try to acquire the lock */
++		if (do_try_to_take_rt_mutex(lock, STEAL_LATERAL))
++			break;
++
++		/*
++		 * waiter.task is NULL the first time we come here and
++		 * when we have been woken up by the previous owner
++		 * but the lock got stolen by an higher prio task.
++		 */
++		if (!waiter.task) {
++			task_blocks_on_rt_mutex(lock, &waiter, current, 0,
++						flags);
++			/* Wakeup during boost ? */
++			if (unlikely(!waiter.task))
++				continue;
++		}
++
++		/*
++		 * Prevent schedule() to drop BKL, while waiting for
++		 * the lock ! We restore lock_depth when we come back.
++		 */
++		saved_flags = current->flags & PF_NOSCHED;
++		current->lock_depth = -1;
++		current->flags &= ~PF_NOSCHED;
++		orig_owner = rt_mutex_owner(lock);
++		get_task_struct(orig_owner);
++		spin_unlock_irqrestore(&lock->wait_lock, flags);
++
++		debug_rt_mutex_print_deadlock(&waiter);
++
++		if (adaptive_wait(&waiter, orig_owner)) {
++			put_task_struct(orig_owner);
++
++			if (waiter.task)
++				schedule_rt_mutex(lock);
++		} else
++			put_task_struct(orig_owner);
++
++		spin_lock_irqsave(&lock->wait_lock, flags);
++		current->flags |= saved_flags;
++		current->lock_depth = saved_lock_depth;
++		saved_state = rt_set_current_blocked_state(saved_state);
+ 	}
+ 
+-	set_current_state(state);
++	rt_restore_current_state(saved_state);
+ 
+-	/* Setup the timer, when timeout != NULL */
+-	if (unlikely(timeout)) {
+-		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
+-		if (!hrtimer_active(&timeout->timer))
+-			timeout->task = NULL;
++	/*
++	 * Extremely rare case, if we got woken up by a non-mutex wakeup,
++	 * and we managed to steal the lock despite us not being the
++	 * highest-prio waiter (due to SCHED_OTHER changing prio), then we
++	 * can end up with a non-NULL waiter.task:
++	 */
++	if (unlikely(waiter.task))
++		remove_waiter(lock, &waiter, flags);
++	/*
++	 * try_to_take_rt_mutex() sets the waiter bit
++	 * unconditionally. We might have to fix that up:
++	 */
++	fixup_rt_mutex_waiters(lock);
++
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
++
++	debug_rt_mutex_free_waiter(&waiter);
++}
++
++/*
++ * Slow path to release a rt_mutex spin_lock style
++ */
++static void  noinline __sched
++rt_spin_lock_slowunlock(struct rt_mutex *lock)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&lock->wait_lock, flags);
++
++	debug_rt_mutex_unlock(lock);
++
++	rt_mutex_deadlock_account_unlock(current);
++
++	if (!rt_mutex_has_waiters(lock)) {
++		lock->owner = NULL;
++		spin_unlock_irqrestore(&lock->wait_lock, flags);
++		return;
+ 	}
+ 
++	wakeup_next_waiter(lock, 1);
++
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
++
++	/* Undo pi boosting.when necessary */
++	rt_mutex_adjust_prio(current);
++}
++
++void __lockfunc rt_spin_lock(spinlock_t *lock)
++{
++	rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
++	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++}
++EXPORT_SYMBOL(rt_spin_lock);
++
++void __lockfunc __rt_spin_lock(struct rt_mutex *lock)
++{
++	rt_spin_lock_fastlock(lock, rt_spin_lock_slowlock);
++}
++EXPORT_SYMBOL(__rt_spin_lock);
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++
++void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass)
++{
++	rt_spin_lock_fastlock(&lock->lock, rt_spin_lock_slowlock);
++	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
++}
++EXPORT_SYMBOL(rt_spin_lock_nested);
++
++#endif
++
++void __lockfunc rt_spin_unlock(spinlock_t *lock)
++{
++	/* NOTE: we always pass in '1' for nested, for simplicity */
++	spin_release(&lock->dep_map, 1, _RET_IP_);
++	rt_spin_lock_fastunlock(&lock->lock, rt_spin_lock_slowunlock);
++}
++EXPORT_SYMBOL(rt_spin_unlock);
++
++void __lockfunc __rt_spin_unlock(struct rt_mutex *lock)
++{
++	rt_spin_lock_fastunlock(lock, rt_spin_lock_slowunlock);
++}
++EXPORT_SYMBOL(__rt_spin_unlock);
++
++/*
++ * Wait for the lock to get unlocked: instead of polling for an unlock
++ * (like raw spinlocks do), we lock and unlock, to force the kernel to
++ * schedule if there's contention:
++ */
++void __lockfunc rt_spin_unlock_wait(spinlock_t *lock)
++{
++	spin_lock(lock);
++	spin_unlock(lock);
++}
++EXPORT_SYMBOL(rt_spin_unlock_wait);
++
++int __lockfunc rt_spin_trylock(spinlock_t *lock)
++{
++	int ret = rt_mutex_trylock(&lock->lock);
++
++	if (ret)
++		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++
++	return ret;
++}
++EXPORT_SYMBOL(rt_spin_trylock);
++
++int __lockfunc rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags)
++{
++	int ret;
++
++	*flags = 0;
++	ret = rt_mutex_trylock(&lock->lock);
++	if (ret)
++		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++
++	return ret;
++}
++EXPORT_SYMBOL(rt_spin_trylock_irqsave);
++
++int _atomic_dec_and_spin_lock(spinlock_t *lock, atomic_t *atomic)
++{
++	/* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
++	if (atomic_add_unless(atomic, -1, 1))
++		return 0;
++	rt_spin_lock(lock);
++	if (atomic_dec_and_test(atomic))
++		return 1;
++	rt_spin_unlock(lock);
++	return 0;
++}
++EXPORT_SYMBOL(_atomic_dec_and_spin_lock);
++
++void
++__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++	/*
++	 * Make sure we are not reinitializing a held lock:
++	 */
++	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
++	lockdep_init_map(&lock->dep_map, name, key, 0);
++#endif
++	__rt_mutex_init(&lock->lock, name);
++}
++EXPORT_SYMBOL(__rt_spin_lock_init);
++
++#endif
++
++static inline int rt_release_bkl(struct rt_mutex *lock, unsigned long flags)
++{
++	int saved_lock_depth = current->lock_depth;
++
++#ifdef CONFIG_LOCK_KERNEL
++	current->lock_depth = -1;
++	/*
++	 * try_to_take_lock set the waiters, make sure it's
++	 * still correct.
++	 */
++	fixup_rt_mutex_waiters(lock);
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
++
++	up(&kernel_sem);
++
++	spin_lock_irq(&lock->wait_lock);
++#endif
++	return saved_lock_depth;
++}
++
++static inline void rt_reacquire_bkl(int saved_lock_depth)
++{
++#ifdef CONFIG_LOCK_KERNEL
++	down(&kernel_sem);
++	current->lock_depth = saved_lock_depth;
++#endif
++}
++
++/**
++ * __rt_mutex_slowlock() - Perform the wait-wake-try-to-take loop
++ * @lock:		 the rt_mutex to take
++ * @state:		 the state the task should block in (TASK_INTERRUPTIBLE
++ * 			 or TASK_UNINTERRUPTIBLE)
++ * @timeout:		 the pre-initialized and started timer, or NULL for none
++ * @waiter:		 the pre-initialized rt_mutex_waiter
++ * @detect_deadlock:	 passed to task_blocks_on_rt_mutex
++ *
++ * lock->wait_lock must be held by the caller.
++ */
++static int __sched
++__rt_mutex_slowlock(struct rt_mutex *lock, int state,
++		    struct hrtimer_sleeper *timeout,
++		    struct rt_mutex_waiter *waiter,
++		    int detect_deadlock, unsigned long flags)
++{
++	int ret = 0;
++
+ 	for (;;) {
++		unsigned long saved_flags;
++
+ 		/* Try to acquire the lock: */
+ 		if (try_to_take_rt_mutex(lock))
+ 			break;
+@@ -656,19 +1089,19 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+ 		}
+ 
+ 		/*
+-		 * waiter.task is NULL the first time we come here and
++		 * waiter->task is NULL the first time we come here and
+ 		 * when we have been woken up by the previous owner
+ 		 * but the lock got stolen by a higher prio task.
+ 		 */
+-		if (!waiter.task) {
+-			ret = task_blocks_on_rt_mutex(lock, &waiter,
+-						      detect_deadlock);
++		if (!waiter->task) {
++			ret = task_blocks_on_rt_mutex(lock, waiter, current,
++						      detect_deadlock, flags);
+ 			/*
+ 			 * If we got woken up by the owner then start loop
+ 			 * all over without going into schedule to try
+ 			 * to get the lock now:
+ 			 */
+-			if (unlikely(!waiter.task)) {
++			if (unlikely(!waiter->task)) {
+ 				/*
+ 				 * Reset the return value. We might
+ 				 * have returned with -EDEADLK and the
+@@ -682,21 +1115,72 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+ 				break;
+ 		}
+ 
+-		spin_unlock(&lock->wait_lock);
++		saved_flags = current->flags & PF_NOSCHED;
++		current->flags &= ~PF_NOSCHED;
+ 
+-		debug_rt_mutex_print_deadlock(&waiter);
++		spin_unlock_irq(&lock->wait_lock);
++
++		debug_rt_mutex_print_deadlock(waiter);
+ 
+-		if (waiter.task)
++		if (waiter->task)
+ 			schedule_rt_mutex(lock);
+ 
+-		spin_lock(&lock->wait_lock);
++		spin_lock_irq(&lock->wait_lock);
++
++		current->flags |= saved_flags;
+ 		set_current_state(state);
+ 	}
+ 
++	return ret;
++}
++
++/*
++ * Slow path lock function:
++ */
++static int __sched
++rt_mutex_slowlock(struct rt_mutex *lock, int state,
++		  struct hrtimer_sleeper *timeout,
++		  int detect_deadlock)
++{
++	int ret = 0, saved_lock_depth = -1;
++	struct rt_mutex_waiter waiter;
++	unsigned long flags;
++
++	debug_rt_mutex_init_waiter(&waiter);
++	waiter.task = NULL;
++
++	spin_lock_irqsave(&lock->wait_lock, flags);
++	init_lists(lock);
++
++	/* Try to acquire the lock again: */
++	if (try_to_take_rt_mutex(lock)) {
++		spin_unlock_irqrestore(&lock->wait_lock, flags);
++		return 0;
++	}
++
++	/*
++	 * We drop the BKL here before we go into the wait loop to avoid a
++	 * possible deadlock in the scheduler.
++	 */
++	if (unlikely(current->lock_depth >= 0))
++		saved_lock_depth = rt_release_bkl(lock, flags);
++
++	set_current_state(state);
++
++	/* Setup the timer, when timeout != NULL */
++	if (unlikely(timeout)) {
++		hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
++		if (!hrtimer_active(&timeout->timer))
++			timeout->task = NULL;
++	}
++
++	ret = __rt_mutex_slowlock(lock, state, timeout, &waiter,
++				  detect_deadlock, flags);
++
+ 	set_current_state(TASK_RUNNING);
+ 
+ 	if (unlikely(waiter.task))
+-		remove_waiter(lock, &waiter);
++		remove_waiter(lock, &waiter, flags);
+ 
+ 	/*
+ 	 * try_to_take_rt_mutex() sets the waiter bit
+@@ -704,7 +1188,7 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+ 	 */
+ 	fixup_rt_mutex_waiters(lock);
+ 
+-	spin_unlock(&lock->wait_lock);
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+ 	/* Remove pending timer: */
+ 	if (unlikely(timeout))
+@@ -718,6 +1202,10 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+ 	if (unlikely(ret))
+ 		rt_mutex_adjust_prio(current);
+ 
++	/* Must we reaquire the BKL? */
++	if (unlikely(saved_lock_depth >= 0))
++		rt_reacquire_bkl(saved_lock_depth);
++
+ 	debug_rt_mutex_free_waiter(&waiter);
+ 
+ 	return ret;
+@@ -729,12 +1217,15 @@ rt_mutex_slowlock(struct rt_mutex *lock,
+ static inline int
+ rt_mutex_slowtrylock(struct rt_mutex *lock)
+ {
++	unsigned long flags;
+ 	int ret = 0;
+ 
+-	spin_lock(&lock->wait_lock);
++	spin_lock_irqsave(&lock->wait_lock, flags);
+ 
+ 	if (likely(rt_mutex_owner(lock) != current)) {
+ 
++		init_lists(lock);
++
+ 		ret = try_to_take_rt_mutex(lock);
+ 		/*
+ 		 * try_to_take_rt_mutex() sets the lock waiters
+@@ -743,7 +1234,7 @@ rt_mutex_slowtrylock(struct rt_mutex *lo
+ 		fixup_rt_mutex_waiters(lock);
+ 	}
+ 
+-	spin_unlock(&lock->wait_lock);
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+ 	return ret;
+ }
+@@ -754,7 +1245,9 @@ rt_mutex_slowtrylock(struct rt_mutex *lo
+ static void __sched
+ rt_mutex_slowunlock(struct rt_mutex *lock)
+ {
+-	spin_lock(&lock->wait_lock);
++	unsigned long flags;
++
++	spin_lock_irqsave(&lock->wait_lock, flags);
+ 
+ 	debug_rt_mutex_unlock(lock);
+ 
+@@ -762,13 +1255,13 @@ rt_mutex_slowunlock(struct rt_mutex *loc
+ 
+ 	if (!rt_mutex_has_waiters(lock)) {
+ 		lock->owner = NULL;
+-		spin_unlock(&lock->wait_lock);
++		spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 		return;
+ 	}
+ 
+-	wakeup_next_waiter(lock);
++	wakeup_next_waiter(lock, 0);
+ 
+-	spin_unlock(&lock->wait_lock);
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
+ 
+ 	/* Undo pi boosting if necessary: */
+ 	rt_mutex_adjust_prio(current);
+@@ -830,6 +1323,27 @@ rt_mutex_fastunlock(struct rt_mutex *loc
+ }
+ 
+ /**
++ * rt_mutex_lock_killable - lock a rt_mutex killable
++ *
++ * @lock: 		the rt_mutex to be locked
++ * @detect_deadlock:	deadlock detection on/off
++ *
++ * Returns:
++ *  0 		on success
++ * -EINTR 	when interrupted by a signal
++ * -EDEADLK	when the lock would deadlock (when deadlock detection is on)
++ */
++int __sched rt_mutex_lock_killable(struct rt_mutex *lock,
++				   int detect_deadlock)
++{
++	might_sleep();
++
++	return rt_mutex_fastlock(lock, TASK_KILLABLE,
++				 detect_deadlock, rt_mutex_slowlock);
++}
++EXPORT_SYMBOL_GPL(rt_mutex_lock_killable);
++
++/**
+  * rt_mutex_lock - lock a rt_mutex
+  *
+  * @lock: the rt_mutex to be locked
+@@ -986,6 +1500,62 @@ void rt_mutex_proxy_unlock(struct rt_mut
+ }
+ 
+ /**
++ * rt_mutex_start_proxy_lock() - Start lock acquisition for another task
++ * @lock:		the rt_mutex to take
++ * @waiter:		the pre-initialized rt_mutex_waiter
++ * @task:		the task to prepare
++ * @detect_deadlock:	perform deadlock detection (1) or not (0)
++ *
++ * Returns:
++ *  0 - task blocked on lock
++ *  1 - acquired the lock for task, caller should wake it up
++ * <0 - error
++ *
++ * Special API call for FUTEX_REQUEUE_PI support.
++ */
++int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
++			      struct rt_mutex_waiter *waiter,
++			      struct task_struct *task, int detect_deadlock)
++{
++	unsigned long flags;
++	int ret;
++
++	spin_lock_irqsave(&lock->wait_lock, flags);
++
++	mark_rt_mutex_waiters(lock);
++
++	if (!rt_mutex_owner(lock) ||
++	    try_to_steal_lock(lock, task, STEAL_NORMAL)) {
++		/* We got the lock for task. */
++		debug_rt_mutex_lock(lock);
++
++		rt_mutex_set_owner(lock, task, 0);
++
++		rt_mutex_deadlock_account_lock(lock, task);
++		return 1;
++	}
++
++	ret = task_blocks_on_rt_mutex(lock, waiter, task, detect_deadlock,
++				      flags);
++
++
++	if (ret && !waiter->task) {
++		/*
++		 * Reset the return value. We might have
++		 * returned with -EDEADLK and the owner
++		 * released the lock while we were walking the
++		 * pi chain.  Let the waiter sort it out.
++		 */
++		ret = 0;
++	}
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
++
++	debug_rt_mutex_print_deadlock(waiter);
++
++	return ret;
++}
++
++/**
+  * rt_mutex_next_owner - return the next owner of the lock
+  *
+  * @lock: the rt lock query
+@@ -1004,3 +1574,58 @@ struct task_struct *rt_mutex_next_owner(
+ 
+ 	return rt_mutex_top_waiter(lock)->task;
+ }
++
++/**
++ * rt_mutex_finish_proxy_lock() - Complete lock acquisition
++ * @lock:		the rt_mutex we were woken on
++ * @to:			the timeout, null if none. hrtimer should already have
++ * 			been started.
++ * @waiter:		the pre-initialized rt_mutex_waiter
++ * @detect_deadlock:	perform deadlock detection (1) or not (0)
++ *
++ * Complete the lock acquisition started our behalf by another thread.
++ *
++ * Returns:
++ *  0 - success
++ * <0 - error, one of -EINTR, -ETIMEDOUT, or -EDEADLK
++ *
++ * Special API call for PI-futex requeue support
++ */
++int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
++			       struct hrtimer_sleeper *to,
++			       struct rt_mutex_waiter *waiter,
++			       int detect_deadlock)
++{
++	unsigned long flags;
++	int ret;
++
++	spin_lock_irqsave(&lock->wait_lock, flags);
++
++	set_current_state(TASK_INTERRUPTIBLE);
++
++	ret = __rt_mutex_slowlock(lock, TASK_INTERRUPTIBLE, to, waiter,
++				  detect_deadlock, flags);
++
++	set_current_state(TASK_RUNNING);
++
++	if (unlikely(waiter->task))
++		remove_waiter(lock, waiter, flags);
++
++	/*
++	 * try_to_take_rt_mutex() sets the waiter bit unconditionally. We might
++	 * have to fix that up.
++	 */
++	fixup_rt_mutex_waiters(lock);
++
++	spin_unlock_irqrestore(&lock->wait_lock, flags);
++
++	/*
++	 * Readjust priority, when we did not get the lock. We might have been
++	 * the pending owner and boosted. Since we did not take the lock, the
++	 * PI boost has to go.
++	 */
++	if (unlikely(ret))
++		rt_mutex_adjust_prio(current);
++
++	return ret;
++}
+Index: linux-2.6-tip/kernel/rtmutex_common.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/rtmutex_common.h
++++ linux-2.6-tip/kernel/rtmutex_common.h
+@@ -120,6 +120,34 @@ extern void rt_mutex_init_proxy_locked(s
+ 				       struct task_struct *proxy_owner);
+ extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
+ 				  struct task_struct *proxy_owner);
++extern int rt_mutex_start_proxy_lock(struct rt_mutex *lock,
++				     struct rt_mutex_waiter *waiter,
++				     struct task_struct *task,
++				     int detect_deadlock);
++extern int rt_mutex_finish_proxy_lock(struct rt_mutex *lock,
++				      struct hrtimer_sleeper *to,
++				      struct rt_mutex_waiter *waiter,
++				      int detect_deadlock);
++
++
++#define STEAL_LATERAL 1
++#define STEAL_NORMAL  0
++
++/*
++ * Note that RT tasks are excluded from lateral-steals to prevent the
++ * introduction of an unbounded latency
++ */
++static inline int lock_is_stealable(struct task_struct *task,
++				    struct task_struct *pendowner, int mode)
++{
++    if (mode == STEAL_NORMAL || rt_task(task)) {
++	    if (task->prio >= pendowner->prio)
++		    return 0;
++    } else if (task->prio > pendowner->prio)
++	    return 0;
++
++    return 1;
++}
+ 
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+ # include "rtmutex-debug.h"
+Index: linux-2.6-tip/kernel/sched.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/sched.c
++++ linux-2.6-tip/kernel/sched.c
+@@ -4,6 +4,7 @@
+  *  Kernel scheduler and related syscalls
+  *
+  *  Copyright (C) 1991-2002  Linus Torvalds
++ *  Copyright (C) 2004 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+  *
+  *  1996-12-23  Modified by Dave Grothe to fix bugs in semaphores and
+  *		make semaphores SMP safe
+@@ -16,6 +17,7 @@
+  *		by Davide Libenzi, preemptible kernel bits by Robert Love.
+  *  2003-09-03	Interactivity tuning by Con Kolivas.
+  *  2004-04-02	Scheduler domains code by Nick Piggin
++ *  2004-10-13  Real-Time Preemption support by Ingo Molnar
+  *  2007-04-15  Work begun on replacing all interactivity tuning with a
+  *              fair scheduling design by Con Kolivas.
+  *  2007-05-05  Load balancing (smp-nice) and other improvements
+@@ -60,6 +62,7 @@
+ #include <linux/sysctl.h>
+ #include <linux/syscalls.h>
+ #include <linux/times.h>
++#include <linux/kallsyms.h>
+ #include <linux/tsacct_kern.h>
+ #include <linux/kprobes.h>
+ #include <linux/delayacct.h>
+@@ -105,6 +108,20 @@
+ #define NICE_0_LOAD		SCHED_LOAD_SCALE
+ #define NICE_0_SHIFT		SCHED_LOAD_SHIFT
+ 
++#if (BITS_PER_LONG < 64)
++#define JIFFIES_TO_NS64(TIME) \
++	((unsigned long long)(TIME) * ((unsigned long) (1000000000 / HZ)))
++
++#define NS64_TO_JIFFIES(TIME) \
++	((((unsigned long long)((TIME)) >> BITS_PER_LONG) * \
++	(1 + NS_TO_JIFFIES(~0UL))) + NS_TO_JIFFIES((unsigned long)(TIME)))
++#else /* BITS_PER_LONG < 64 */
++
++#define NS64_TO_JIFFIES(TIME) NS_TO_JIFFIES(TIME)
++#define JIFFIES_TO_NS64(TIME) JIFFIES_TO_NS(TIME)
++
++#endif /* BITS_PER_LONG < 64 */
++
+ /*
+  * These are the 'tuning knobs' of the scheduler:
+  *
+@@ -123,6 +140,7 @@ DEFINE_TRACE(sched_wakeup);
+ DEFINE_TRACE(sched_wakeup_new);
+ DEFINE_TRACE(sched_switch);
+ DEFINE_TRACE(sched_migrate_task);
++DEFINE_TRACE(sched_task_setprio);
+ 
+ #ifdef CONFIG_SMP
+ 
+@@ -148,6 +166,32 @@ static inline void sg_inc_cpu_power(stru
+ }
+ #endif
+ 
++#define TASK_PREEMPTS_CURR(p, rq) \
++	((p)->prio < (rq)->curr->prio)
++
++/*
++ * Tweaks for current
++ */
++
++#ifdef CURRENT_PTR
++struct task_struct * const ___current = &init_task;
++struct task_struct ** const current_ptr = (struct task_struct ** const)&___current;
++struct thread_info * const current_ti = &init_thread_union.thread_info;
++struct thread_info ** const current_ti_ptr = (struct thread_info ** const)&current_ti;
++
++EXPORT_SYMBOL(___current);
++EXPORT_SYMBOL(current_ti);
++
++/*
++ * The scheduler itself doesnt want 'current' to be cached
++ * during context-switches:
++ */
++# undef current
++# define current __current()
++# undef current_thread_info
++# define current_thread_info() __current_thread_info()
++#endif
++
+ static inline int rt_policy(int policy)
+ {
+ 	if (unlikely(policy == SCHED_FIFO || policy == SCHED_RR))
+@@ -170,7 +214,7 @@ struct rt_prio_array {
+ 
+ struct rt_bandwidth {
+ 	/* nests inside the rq lock: */
+-	spinlock_t		rt_runtime_lock;
++	raw_spinlock_t		rt_runtime_lock;
+ 	ktime_t			rt_period;
+ 	u64			rt_runtime;
+ 	struct hrtimer		rt_period_timer;
+@@ -211,6 +255,7 @@ void init_rt_bandwidth(struct rt_bandwid
+ 
+ 	hrtimer_init(&rt_b->rt_period_timer,
+ 			CLOCK_MONOTONIC, HRTIMER_MODE_REL);
++	rt_b->rt_period_timer.irqsafe = 1;
+ 	rt_b->rt_period_timer.function = sched_rt_period_timer;
+ }
+ 
+@@ -338,6 +383,13 @@ static DEFINE_PER_CPU(struct rt_rq, init
+  */
+ static DEFINE_SPINLOCK(task_group_lock);
+ 
++#ifdef CONFIG_SMP
++static int root_task_group_empty(void)
++{
++	return list_empty(&root_task_group.children);
++}
++#endif
++
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ #ifdef CONFIG_USER_SCHED
+ # define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
+@@ -398,6 +450,13 @@ static inline void set_task_rq(struct ta
+ 
+ #else
+ 
++#ifdef CONFIG_SMP
++static int root_task_group_empty(void)
++{
++	return 1;
++}
++#endif
++
+ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+ static inline struct task_group *task_group(struct task_struct *p)
+ {
+@@ -474,17 +533,24 @@ struct rt_rq {
+ 	struct rt_prio_array active;
+ 	unsigned long rt_nr_running;
+ #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
+-	int highest_prio; /* highest queued rt task prio */
++	struct {
++		int curr; /* highest queued rt task prio */
++#ifdef CONFIG_SMP
++		int next; /* next highest */
++#endif
++	} highest_prio;
+ #endif
+ #ifdef CONFIG_SMP
+ 	unsigned long rt_nr_migratory;
+ 	int overloaded;
++	struct plist_head pushable_tasks;
+ #endif
++	unsigned long rt_nr_uninterruptible;
+ 	int rt_throttled;
+ 	u64 rt_time;
+ 	u64 rt_runtime;
+ 	/* Nests inside the rq lock: */
+-	spinlock_t rt_runtime_lock;
++	raw_spinlock_t rt_runtime_lock;
+ 
+ #ifdef CONFIG_RT_GROUP_SCHED
+ 	unsigned long rt_nr_boosted;
+@@ -547,7 +613,7 @@ static struct root_domain def_root_domai
+  */
+ struct rq {
+ 	/* runqueue lock: */
+-	spinlock_t lock;
++	raw_spinlock_t lock;
+ 
+ 	/*
+ 	 * nr_running and cpu_load should be in the same cacheline because
+@@ -556,7 +622,6 @@ struct rq {
+ 	unsigned long nr_running;
+ 	#define CPU_LOAD_IDX_MAX 5
+ 	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
+-	unsigned char idle_at_tick;
+ #ifdef CONFIG_NO_HZ
+ 	unsigned long last_tick_seen;
+ 	unsigned char in_nohz_recently;
+@@ -565,6 +630,7 @@ struct rq {
+ 	struct load_weight load;
+ 	unsigned long nr_load_updates;
+ 	u64 nr_switches;
++	u64 nr_migrations_in;
+ 
+ 	struct cfs_rq cfs;
+ 	struct rt_rq rt;
+@@ -585,6 +651,8 @@ struct rq {
+ 	 */
+ 	unsigned long nr_uninterruptible;
+ 
++	unsigned long switch_timestamp;
++	unsigned long slice_avg;
+ 	struct task_struct *curr, *idle;
+ 	unsigned long next_balance;
+ 	struct mm_struct *prev_mm;
+@@ -597,6 +665,7 @@ struct rq {
+ 	struct root_domain *rd;
+ 	struct sched_domain *sd;
+ 
++	unsigned char idle_at_tick;
+ 	/* For active balancing */
+ 	int active_balance;
+ 	int push_cpu;
+@@ -610,6 +679,10 @@ struct rq {
+ 	struct list_head migration_queue;
+ #endif
+ 
++	/* calc_load related fields */
++	unsigned long calc_load_update;
++	long calc_load_active;
++
+ #ifdef CONFIG_SCHED_HRTICK
+ #ifdef CONFIG_SMP
+ 	int hrtick_csd_pending;
+@@ -625,9 +698,6 @@ struct rq {
+ 	/* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
+ 
+ 	/* sys_sched_yield() stats */
+-	unsigned int yld_exp_empty;
+-	unsigned int yld_act_empty;
+-	unsigned int yld_both_empty;
+ 	unsigned int yld_count;
+ 
+ 	/* schedule() stats */
+@@ -641,6 +711,13 @@ struct rq {
+ 
+ 	/* BKL stats */
+ 	unsigned int bkl_count;
++
++	/* RT-overload stats: */
++	unsigned long rto_schedule;
++	unsigned long rto_schedule_tail;
++	unsigned long rto_wakeup;
++	unsigned long rto_pulled;
++	unsigned long rto_pushed;
+ #endif
+ };
+ 
+@@ -675,11 +752,18 @@ static inline int cpu_of(struct rq *rq)
+ #define task_rq(p)		cpu_rq(task_cpu(p))
+ #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
+ 
+-static inline void update_rq_clock(struct rq *rq)
++inline void update_rq_clock(struct rq *rq)
+ {
+ 	rq->clock = sched_clock_cpu(cpu_of(rq));
+ }
+ 
++#ifndef CONFIG_SMP
++int task_is_current(struct task_struct *task)
++{
++	return task_rq(task)->curr == task;
++}
++#endif
++
+ /*
+  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
+  */
+@@ -868,11 +952,23 @@ static inline u64 global_rt_runtime(void
+ 	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
+ }
+ 
++/*
++ * We really dont want to do anything complex within switch_to()
++ * on PREEMPT_RT - this check enforces this.
++ */
++#ifdef prepare_arch_switch
++# ifdef CONFIG_PREEMPT_RT
++#   error FIXME
++# else
++#  define _finish_arch_switch finish_arch_switch
++# endif
++#endif
++
+ #ifndef prepare_arch_switch
+ # define prepare_arch_switch(next)	do { } while (0)
+ #endif
+ #ifndef finish_arch_switch
+-# define finish_arch_switch(prev)	do { } while (0)
++# define _finish_arch_switch(prev)	do { } while (0)
+ #endif
+ 
+ static inline int task_current(struct rq *rq, struct task_struct *p)
+@@ -880,18 +976,39 @@ static inline int task_current(struct rq
+ 	return rq->curr == p;
+ }
+ 
+-#ifndef __ARCH_WANT_UNLOCKED_CTXSW
+ static inline int task_running(struct rq *rq, struct task_struct *p)
+ {
++#ifdef CONFIG_SMP
++	return p->oncpu;
++#else
+ 	return task_current(rq, p);
++#endif
+ }
+ 
++#ifndef __ARCH_WANT_UNLOCKED_CTXSW
+ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+ {
++#ifdef CONFIG_SMP
++	/*
++	 * We can optimise this out completely for !SMP, because the
++	 * SMP rebalancing from interrupt is the only thing that cares
++	 * here.
++	 */
++	next->oncpu = 1;
++#endif
+ }
+ 
+ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+ {
++#ifdef CONFIG_SMP
++	/*
++	 * After ->oncpu is cleared, the task can be moved to a different CPU.
++	 * We must ensure this doesn't happen until the switch is completely
++	 * finished.
++	 */
++	smp_wmb();
++	prev->oncpu = 0;
++#endif
+ #ifdef CONFIG_DEBUG_SPINLOCK
+ 	/* this is a valid case when another task releases the spinlock */
+ 	rq->lock.owner = current;
+@@ -903,18 +1020,10 @@ static inline void finish_lock_switch(st
+ 	 */
+ 	spin_acquire(&rq->lock.dep_map, 0, 0, _THIS_IP_);
+ 
+-	spin_unlock_irq(&rq->lock);
++	spin_unlock(&rq->lock);
+ }
+ 
+ #else /* __ARCH_WANT_UNLOCKED_CTXSW */
+-static inline int task_running(struct rq *rq, struct task_struct *p)
+-{
+-#ifdef CONFIG_SMP
+-	return p->oncpu;
+-#else
+-	return task_current(rq, p);
+-#endif
+-}
+ 
+ static inline void prepare_lock_switch(struct rq *rq, struct task_struct *next)
+ {
+@@ -944,8 +1053,8 @@ static inline void finish_lock_switch(st
+ 	smp_wmb();
+ 	prev->oncpu = 0;
+ #endif
+-#ifndef __ARCH_WANT_INTERRUPTS_ON_CTXSW
+-	local_irq_enable();
++#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
++	local_irq_disable();
+ #endif
+ }
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+@@ -986,6 +1095,26 @@ static struct rq *task_rq_lock(struct ta
+ 	}
+ }
+ 
++void curr_rq_lock_irq_save(unsigned long *flags)
++	__acquires(rq->lock)
++{
++	struct rq *rq;
++
++	local_irq_save(*flags);
++	rq = cpu_rq(smp_processor_id());
++	spin_lock(&rq->lock);
++}
++
++void curr_rq_unlock_irq_restore(unsigned long *flags)
++	__releases(rq->lock)
++{
++	struct rq *rq;
++
++	rq = cpu_rq(smp_processor_id());
++	spin_unlock(&rq->lock);
++	local_irq_restore(*flags);
++}
++
+ void task_rq_unlock_wait(struct task_struct *p)
+ {
+ 	struct rq *rq = task_rq(p);
+@@ -1100,7 +1229,7 @@ static void hrtick_start(struct rq *rq, 
+ 	if (rq == this_rq()) {
+ 		hrtimer_restart(timer);
+ 	} else if (!rq->hrtick_csd_pending) {
+-		__smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
++		__smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
+ 		rq->hrtick_csd_pending = 1;
+ 	}
+ }
+@@ -1157,6 +1286,7 @@ static void init_rq_hrtick(struct rq *rq
+ 
+ 	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ 	rq->hrtick_timer.function = hrtick;
++	rq->hrtick_timer.irqsafe = 1;
+ }
+ #else	/* CONFIG_SCHED_HRTICK */
+ static inline void hrtick_clear(struct rq *rq)
+@@ -1191,10 +1321,10 @@ static void resched_task(struct task_str
+ 
+ 	assert_spin_locked(&task_rq(p)->lock);
+ 
+-	if (unlikely(test_tsk_thread_flag(p, TIF_NEED_RESCHED)))
++	if (test_tsk_need_resched(p))
+ 		return;
+ 
+-	set_tsk_thread_flag(p, TIF_NEED_RESCHED);
++	set_tsk_need_resched(p);
+ 
+ 	cpu = task_cpu(p);
+ 	if (cpu == smp_processor_id())
+@@ -1232,7 +1362,7 @@ void wake_up_idle_cpu(int cpu)
+ {
+ 	struct rq *rq = cpu_rq(cpu);
+ 
+-	if (cpu == smp_processor_id())
++	if (cpu == raw_smp_processor_id())
+ 		return;
+ 
+ 	/*
+@@ -1250,7 +1380,7 @@ void wake_up_idle_cpu(int cpu)
+ 	 * lockless. The worst case is that the other CPU runs the
+ 	 * idle task through an additional NOOP schedule()
+ 	 */
+-	set_tsk_thread_flag(rq->idle, TIF_NEED_RESCHED);
++	set_tsk_need_resched(rq->idle);
+ 
+ 	/* NEED_RESCHED must be visible before we test polling */
+ 	smp_mb();
+@@ -1618,21 +1748,42 @@ static inline void update_shares_locked(
+ 
+ #endif
+ 
++#ifdef CONFIG_PREEMPT
++
+ /*
+- * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
++ * fair double_lock_balance: Safely acquires both rq->locks in a fair
++ * way at the expense of forcing extra atomic operations in all
++ * invocations.  This assures that the double_lock is acquired using the
++ * same underlying policy as the spinlock_t on this architecture, which
++ * reduces latency compared to the unfair variant below.  However, it
++ * also adds more overhead and therefore may reduce throughput.
+  */
+-static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
++static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
++	__releases(this_rq->lock)
++	__acquires(busiest->lock)
++	__acquires(this_rq->lock)
++{
++	spin_unlock(&this_rq->lock);
++	double_rq_lock(this_rq, busiest);
++
++	return 1;
++}
++
++#else
++/*
++ * Unfair double_lock_balance: Optimizes throughput at the expense of
++ * latency by eliminating extra atomic operations when the locks are
++ * already in proper order on entry.  This favors lower cpu-ids and will
++ * grant the double lock to lower cpus over higher ids under contention,
++ * regardless of entry order into the function.
++ */
++static int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
+ 	__releases(this_rq->lock)
+ 	__acquires(busiest->lock)
+ 	__acquires(this_rq->lock)
+ {
+ 	int ret = 0;
+ 
+-	if (unlikely(!irqs_disabled())) {
+-		/* printk() doesn't work good under rq->lock */
+-		spin_unlock(&this_rq->lock);
+-		BUG_ON(1);
+-	}
+ 	if (unlikely(!spin_trylock(&busiest->lock))) {
+ 		if (busiest < this_rq) {
+ 			spin_unlock(&this_rq->lock);
+@@ -1645,6 +1796,22 @@ static int double_lock_balance(struct rq
+ 	return ret;
+ }
+ 
++#endif /* CONFIG_PREEMPT */
++
++/*
++ * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
++ */
++static int double_lock_balance(struct rq *this_rq, struct rq *busiest)
++{
++	if (unlikely(!irqs_disabled())) {
++		/* printk() doesn't work good under rq->lock */
++		spin_unlock(&this_rq->lock);
++		BUG_ON(1);
++	}
++
++	return _double_lock_balance(this_rq, busiest);
++}
++
+ static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
+ 	__releases(busiest->lock)
+ {
+@@ -1662,6 +1829,8 @@ static void cfs_rq_set_shares(struct cfs
+ }
+ #endif
+ 
++static void calc_load_account_active(struct rq *this_rq);
++
+ #include "sched_stats.h"
+ #include "sched_idletask.c"
+ #include "sched_fair.c"
+@@ -1713,6 +1882,9 @@ static void update_avg(u64 *avg, u64 sam
+ 
+ static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
+ {
++	if (wakeup)
++		p->se.start_runtime = p->se.sum_exec_runtime;
++
+ 	sched_info_queued(p);
+ 	p->sched_class->enqueue_task(rq, p, wakeup);
+ 	p->se.on_rq = 1;
+@@ -1720,10 +1892,15 @@ static void enqueue_task(struct rq *rq, 
+ 
+ static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
+ {
+-	if (sleep && p->se.last_wakeup) {
+-		update_avg(&p->se.avg_overlap,
+-			   p->se.sum_exec_runtime - p->se.last_wakeup);
+-		p->se.last_wakeup = 0;
++	if (sleep) {
++		if (p->se.last_wakeup) {
++			update_avg(&p->se.avg_overlap,
++				p->se.sum_exec_runtime - p->se.last_wakeup);
++			p->se.last_wakeup = 0;
++		} else {
++			update_avg(&p->se.avg_wakeup,
++				sysctl_sched_wakeup_granularity);
++		}
+ 	}
+ 
+ 	sched_info_dequeued(p);
+@@ -1754,6 +1931,8 @@ static inline int normal_prio(struct tas
+ 		prio = MAX_RT_PRIO-1 - p->rt_priority;
+ 	else
+ 		prio = __normal_prio(p);
++
++//	trace_special_pid(p->pid, PRIO(p), __PRIO(prio));
+ 	return prio;
+ }
+ 
+@@ -1893,12 +2072,15 @@ void set_task_cpu(struct task_struct *p,
+ 		p->se.sleep_start -= clock_offset;
+ 	if (p->se.block_start)
+ 		p->se.block_start -= clock_offset;
++#endif
+ 	if (old_cpu != new_cpu) {
+-		schedstat_inc(p, se.nr_migrations);
++		p->se.nr_migrations++;
++		new_rq->nr_migrations_in++;
++#ifdef CONFIG_SCHEDSTATS
+ 		if (task_hot(p, old_rq->clock, NULL))
+ 			schedstat_inc(p, se.nr_forced2_migrations);
+-	}
+ #endif
++	}
+ 	p->se.vruntime -= old_cfsrq->min_vruntime -
+ 					 new_cfsrq->min_vruntime;
+ 
+@@ -2025,7 +2207,7 @@ unsigned long wait_task_inactive(struct 
+ 		 * it must be off the runqueue _entirely_, and not
+ 		 * preempted!
+ 		 *
+-		 * So if it wa still runnable (but just not actively
++		 * So if it was still runnable (but just not actively
+ 		 * running right now), it's preempted, and we should
+ 		 * yield - it could be a while.
+ 		 */
+@@ -2250,6 +2432,47 @@ static int sched_balance_self(int cpu, i
+ 
+ #endif /* CONFIG_SMP */
+ 
++#ifdef CONFIG_DEBUG_PREEMPT
++void notrace preempt_enable_no_resched(void)
++{
++	static int once = 1;
++
++	barrier();
++	dec_preempt_count();
++
++	if (once && !preempt_count()) {
++		once = 0;
++		printk(KERN_ERR "BUG: %s:%d task might have lost a preemption check!\n",
++			current->comm, current->pid);
++		dump_stack();
++	}
++}
++
++EXPORT_SYMBOL(preempt_enable_no_resched);
++#endif
++
++
++/**
++ * task_oncpu_function_call - call a function on the cpu on which a task runs
++ * @p:		the task to evaluate
++ * @func:	the function to be called
++ * @info:	the function call argument
++ *
++ * Calls the function @func when the task is currently running. This might
++ * be on the current CPU, which just calls the function directly
++ */
++void task_oncpu_function_call(struct task_struct *p,
++			      void (*func) (void *info), void *info)
++{
++	int cpu;
++
++	preempt_disable();
++	cpu = task_cpu(p);
++	if (task_curr(p))
++		smp_call_function_single(cpu, func, info, 1);
++	preempt_enable();
++}
++
+ /***
+  * try_to_wake_up - wake up a thread
+  * @p: the to-be-woken-up thread
+@@ -2264,7 +2487,8 @@ static int sched_balance_self(int cpu, i
+  *
+  * returns failure only if the task is already active.
+  */
+-static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
++static int
++try_to_wake_up(struct task_struct *p, unsigned int state, int sync, int mutex)
+ {
+ 	int cpu, orig_cpu, this_cpu, success = 0;
+ 	unsigned long flags;
+@@ -2275,7 +2499,7 @@ static int try_to_wake_up(struct task_st
+ 		sync = 0;
+ 
+ #ifdef CONFIG_SMP
+-	if (sched_feat(LB_WAKEUP_UPDATE)) {
++	if (sched_feat(LB_WAKEUP_UPDATE) && !root_task_group_empty()) {
+ 		struct sched_domain *sd;
+ 
+ 		this_cpu = raw_smp_processor_id();
+@@ -2290,6 +2514,13 @@ static int try_to_wake_up(struct task_st
+ 	}
+ #endif
+ 
++#ifdef CONFIG_PREEMPT_RT
++	/*
++	 * sync wakeups can increase wakeup latencies:
++	 */
++	if (rt_task(p))
++		sync = 0;
++#endif
+ 	smp_wmb();
+ 	rq = task_rq_lock(p, &flags);
+ 	update_rq_clock(rq);
+@@ -2353,18 +2584,43 @@ out_activate:
+ 	activate_task(rq, p, 1);
+ 	success = 1;
+ 
++	/*
++	 * Only attribute actual wakeups done by this task.
++	 */
++	if (!in_interrupt()) {
++		struct sched_entity *se = &current->se;
++		u64 sample = se->sum_exec_runtime;
++
++		if (se->last_wakeup)
++			sample -= se->last_wakeup;
++		else
++			sample -= se->start_runtime;
++		update_avg(&se->avg_wakeup, sample);
++
++		se->last_wakeup = se->sum_exec_runtime;
++	}
++
+ out_running:
+ 	trace_sched_wakeup(rq, p, success);
+ 	check_preempt_curr(rq, p, sync);
+ 
+-	p->state = TASK_RUNNING;
++	/*
++	 * For a mutex wakeup we or TASK_RUNNING_MUTEX to the task
++	 * state to preserve the original state, so a real wakeup
++	 * still can see the (UN)INTERRUPTIBLE bits in the state check
++	 * above. We dont have to worry about the | TASK_RUNNING_MUTEX
++	 * here. The waiter is serialized by the mutex lock and nobody
++	 * else can fiddle with p->state as we hold rq lock.
++	 */
++	if (mutex)
++		p->state |= TASK_RUNNING_MUTEX;
++	else
++		p->state = TASK_RUNNING;
+ #ifdef CONFIG_SMP
+ 	if (p->sched_class->task_wake_up)
+ 		p->sched_class->task_wake_up(rq, p);
+ #endif
+ out:
+-	current->se.last_wakeup = current->se.sum_exec_runtime;
+-
+ 	task_rq_unlock(rq, &flags);
+ 
+ 	return success;
+@@ -2372,13 +2628,31 @@ out:
+ 
+ int wake_up_process(struct task_struct *p)
+ {
+-	return try_to_wake_up(p, TASK_ALL, 0);
++	return try_to_wake_up(p, TASK_ALL, 0, 0);
+ }
+ EXPORT_SYMBOL(wake_up_process);
+ 
++int  wake_up_process_sync(struct task_struct * p)
++{
++	return try_to_wake_up(p, TASK_ALL, 1, 0);
++}
++EXPORT_SYMBOL(wake_up_process_sync);
++
++int  wake_up_process_mutex(struct task_struct * p)
++{
++	return try_to_wake_up(p, TASK_ALL, 0, 1);
++}
++EXPORT_SYMBOL(wake_up_process_mutex);
++
++int  wake_up_process_mutex_sync(struct task_struct * p)
++{
++	return try_to_wake_up(p, TASK_ALL, 1, 1);
++}
++EXPORT_SYMBOL(wake_up_process_mutex_sync);
++
+ int wake_up_state(struct task_struct *p, unsigned int state)
+ {
+-	return try_to_wake_up(p, state, 0);
++	return try_to_wake_up(p, state, 0, 0);
+ }
+ 
+ /*
+@@ -2392,8 +2666,11 @@ static void __sched_fork(struct task_str
+ 	p->se.exec_start		= 0;
+ 	p->se.sum_exec_runtime		= 0;
+ 	p->se.prev_sum_exec_runtime	= 0;
++	p->se.nr_migrations		= 0;
+ 	p->se.last_wakeup		= 0;
+ 	p->se.avg_overlap		= 0;
++	p->se.start_runtime		= 0;
++	p->se.avg_wakeup		= sysctl_sched_wakeup_granularity;
+ 
+ #ifdef CONFIG_SCHEDSTATS
+ 	p->se.wait_start		= 0;
+@@ -2449,13 +2726,15 @@ void sched_fork(struct task_struct *p, i
+ 	if (likely(sched_info_on()))
+ 		memset(&p->sched_info, 0, sizeof(p->sched_info));
+ #endif
+-#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
++#if defined(CONFIG_SMP)
+ 	p->oncpu = 0;
+ #endif
+ #ifdef CONFIG_PREEMPT
+ 	/* Want to start with kernel preemption disabled. */
+ 	task_thread_info(p)->preempt_count = 1;
+ #endif
++	plist_node_init(&p->pushable_tasks, MAX_PRIO);
++
+ 	put_cpu();
+ }
+ 
+@@ -2499,7 +2778,7 @@ void wake_up_new_task(struct task_struct
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ 
+ /**
+- * preempt_notifier_register - tell me when current is being being preempted & rescheduled
++ * preempt_notifier_register - tell me when current is being preempted & rescheduled
+  * @notifier: notifier struct to register
+  */
+ void preempt_notifier_register(struct preempt_notifier *notifier)
+@@ -2525,8 +2804,17 @@ static void fire_sched_in_preempt_notifi
+ 	struct preempt_notifier *notifier;
+ 	struct hlist_node *node;
+ 
++	if (hlist_empty(&curr->preempt_notifiers))
++		return;
++
++	/*
++	 * The KVM sched in notifier expects to be called with
++	 * interrupts enabled.
++	 */
++	local_irq_enable();
+ 	hlist_for_each_entry(notifier, node, &curr->preempt_notifiers, link)
+ 		notifier->ops->sched_in(notifier, raw_smp_processor_id());
++	local_irq_disable();
+ }
+ 
+ static void
+@@ -2596,6 +2884,12 @@ static void finish_task_switch(struct rq
+ {
+ 	struct mm_struct *mm = rq->prev_mm;
+ 	long prev_state;
++#ifdef CONFIG_SMP
++	int post_schedule = 0;
++
++	if (current->sched_class->needs_post_schedule)
++		post_schedule = current->sched_class->needs_post_schedule(rq);
++#endif
+ 
+ 	rq->prev_mm = NULL;
+ 
+@@ -2611,16 +2905,21 @@ static void finish_task_switch(struct rq
+ 	 *		Manfred Spraul <manfred@colorfullife.com>
+ 	 */
+ 	prev_state = prev->state;
+-	finish_arch_switch(prev);
++	_finish_arch_switch(prev);
++	perf_counter_task_sched_in(current, cpu_of(rq));
+ 	finish_lock_switch(rq, prev);
+ #ifdef CONFIG_SMP
+-	if (current->sched_class->post_schedule)
++	if (post_schedule)
+ 		current->sched_class->post_schedule(rq);
+ #endif
+ 
+ 	fire_sched_in_preempt_notifiers(current);
++	/*
++	 * Delay the final freeing of the mm or task, so that we dont have
++	 * to do complex work from within the scheduler:
++	 */
+ 	if (mm)
+-		mmdrop(mm);
++ 		mmdrop_delayed(mm);
+ 	if (unlikely(prev_state == TASK_DEAD)) {
+ 		/*
+ 		 * Remove function-return probe instances associated with this
+@@ -2638,12 +2937,15 @@ static void finish_task_switch(struct rq
+ asmlinkage void schedule_tail(struct task_struct *prev)
+ 	__releases(rq->lock)
+ {
+-	struct rq *rq = this_rq();
+-
+-	finish_task_switch(rq, prev);
++	preempt_disable();
++	finish_task_switch(this_rq(), prev);
++	__preempt_enable_no_resched();
++	local_irq_enable();
+ #ifdef __ARCH_WANT_UNLOCKED_CTXSW
+ 	/* In this case, finish_task_switch does not reenable preemption */
+ 	preempt_enable();
++#else
++	preempt_check_resched();
+ #endif
+ 	if (current->set_child_tid)
+ 		put_user(task_pid_vnr(current), current->set_child_tid);
+@@ -2691,6 +2993,11 @@ context_switch(struct rq *rq, struct tas
+ 	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+ #endif
+ 
++#ifdef CURRENT_PTR
++	barrier();
++	*current_ptr = next;
++	*current_ti_ptr = next->thread_info;
++#endif
+ 	/* Here we just switch the register state and the stack. */
+ 	switch_to(prev, next, prev);
+ 
+@@ -2737,6 +3044,11 @@ unsigned long nr_uninterruptible(void)
+ 	return sum;
+ }
+ 
++unsigned long nr_uninterruptible_cpu(int cpu)
++{
++	return cpu_rq(cpu)->nr_uninterruptible;
++}
++
+ unsigned long long nr_context_switches(void)
+ {
+ 	int i;
+@@ -2755,22 +3067,91 @@ unsigned long nr_iowait(void)
+ 	for_each_possible_cpu(i)
+ 		sum += atomic_read(&cpu_rq(i)->nr_iowait);
+ 
++	/*
++	 * Since we read the counters lockless, it might be slightly
++	 * inaccurate. Do not allow it to go below zero though:
++	 */
++	if (unlikely((long)sum < 0))
++		sum = 0;
++
+ 	return sum;
+ }
+ 
+-unsigned long nr_active(void)
++/* Variables and functions for calc_load */
++static atomic_long_t calc_load_tasks;
++static unsigned long calc_load_update;
++unsigned long avenrun[3];
++EXPORT_SYMBOL(avenrun);
++
++/**
++ * get_avenrun - get the load average array
++ * @loads:	pointer to dest load array
++ * @offset:	offset to add
++ * @shift:	shift count to shift the result left
++ *
++ * These values are estimates at best, so no need for locking.
++ */
++void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
++{
++	loads[0] = (avenrun[0] + offset) << shift;
++	loads[1] = (avenrun[1] + offset) << shift;
++	loads[2] = (avenrun[2] + offset) << shift;
++}
++
++static unsigned long
++calc_load(unsigned long load, unsigned long exp, unsigned long active)
+ {
+-	unsigned long i, running = 0, uninterruptible = 0;
++	load *= exp;
++	load += active * (FIXED_1 - exp);
++	return load >> FSHIFT;
++}
+ 
+-	for_each_online_cpu(i) {
+-		running += cpu_rq(i)->nr_running;
+-		uninterruptible += cpu_rq(i)->nr_uninterruptible;
+-	}
++/*
++ * calc_load - update the avenrun load estimates 10 ticks after the
++ * CPUs have updated calc_load_tasks.
++ */
++void calc_global_load(void)
++{
++	unsigned long upd = calc_load_update + 10;
++	long active;
++
++	if (time_before(jiffies, upd))
++		return;
++
++	active = atomic_long_read(&calc_load_tasks);
++	active = active > 0 ? active * FIXED_1 : 0;
++
++	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
++	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
++	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
++
++	calc_load_update += LOAD_FREQ;
++}
++
++/*
++ * Either called from update_cpu_load() or from a cpu going idle
++ */
++static void calc_load_account_active(struct rq *this_rq)
++{
++	long nr_active, delta;
++
++	nr_active = this_rq->nr_running;
++	nr_active += (long) this_rq->nr_uninterruptible;
+ 
+-	if (unlikely((long)uninterruptible < 0))
+-		uninterruptible = 0;
++	if (nr_active != this_rq->calc_load_active) {
++		delta = nr_active - this_rq->calc_load_active;
++		this_rq->calc_load_active = nr_active;
++		atomic_long_add(delta, &calc_load_tasks);
++	}
++}
+ 
+-	return running + uninterruptible;
++/*
++ * Externally visible per-cpu scheduler statistics:
++ * cpu_nr_migrations(cpu) - number of migrations into that cpu
++ */
++u64 cpu_nr_migrations(int cpu)
++{
++	return cpu_rq(cpu)->nr_migrations_in;
+ }
+ 
+ /*
+@@ -2801,6 +3182,11 @@ static void update_cpu_load(struct rq *t
+ 			new_load += scale-1;
+ 		this_rq->cpu_load[i] = (old_load*(scale-1) + new_load) >> i;
+ 	}
++
++	if (time_after_eq(jiffies, this_rq->calc_load_update)) {
++		this_rq->calc_load_update += LOAD_FREQ;
++		calc_load_account_active(this_rq);
++	}
+ }
+ 
+ #ifdef CONFIG_SMP
+@@ -2921,6 +3307,7 @@ int can_migrate_task(struct task_struct 
+ 		     struct sched_domain *sd, enum cpu_idle_type idle,
+ 		     int *all_pinned)
+ {
++	int tsk_cache_hot = 0;
+ 	/*
+ 	 * We do not migrate tasks that are:
+ 	 * 1) running (obviously), or
+@@ -2944,10 +3331,11 @@ int can_migrate_task(struct task_struct 
+ 	 * 2) too many balance attempts have failed.
+ 	 */
+ 
+-	if (!task_hot(p, rq->clock, sd) ||
+-			sd->nr_balance_failed > sd->cache_nice_tries) {
++	tsk_cache_hot = task_hot(p, rq->clock, sd);
++	if (!tsk_cache_hot ||
++		sd->nr_balance_failed > sd->cache_nice_tries) {
+ #ifdef CONFIG_SCHEDSTATS
+-		if (task_hot(p, rq->clock, sd)) {
++		if (tsk_cache_hot) {
+ 			schedstat_inc(sd, lb_hot_gained[idle]);
+ 			schedstat_inc(p, se.nr_forced_migrations);
+ 		}
+@@ -2955,7 +3343,7 @@ int can_migrate_task(struct task_struct 
+ 		return 1;
+ 	}
+ 
+-	if (task_hot(p, rq->clock, sd)) {
++	if (tsk_cache_hot) {
+ 		schedstat_inc(p, se.nr_failed_migrations_hot);
+ 		return 0;
+ 	}
+@@ -2995,6 +3383,16 @@ next:
+ 	pulled++;
+ 	rem_load_move -= p->se.load.weight;
+ 
++#ifdef CONFIG_PREEMPT
++	/*
++	 * NEWIDLE balancing is a source of latency, so preemptible kernels
++	 * will stop after the first task is pulled to minimize the critical
++	 * section.
++	 */
++	if (idle == CPU_NEWLY_IDLE)
++		goto out;
++#endif
++
+ 	/*
+ 	 * We only want to steal up to the prescribed amount of weighted load.
+ 	 */
+@@ -3041,9 +3439,15 @@ static int move_tasks(struct rq *this_rq
+ 				sd, idle, all_pinned, &this_best_prio);
+ 		class = class->next;
+ 
++#ifdef CONFIG_PREEMPT
++		/*
++		 * NEWIDLE balancing is a source of latency, so preemptible
++		 * kernels will stop after the first task is pulled to minimize
++		 * the critical section.
++		 */
+ 		if (idle == CPU_NEWLY_IDLE && this_rq->nr_running)
+ 			break;
+-
++#endif
+ 	} while (class && max_load_move > total_load_moved);
+ 
+ 	return total_load_moved > 0;
+@@ -3093,246 +3497,479 @@ static int move_one_task(struct rq *this
+ 
+ 	return 0;
+ }
+-
+-/*
+- * find_busiest_group finds and returns the busiest CPU group within the
+- * domain. It calculates and returns the amount of weighted load which
+- * should be moved to restore balance via the imbalance parameter.
++/********** Helpers for find_busiest_group ************************/
++/**
++ * sd_lb_stats - Structure to store the statistics of a sched_domain
++ * 		during load balancing.
+  */
+-static struct sched_group *
+-find_busiest_group(struct sched_domain *sd, int this_cpu,
+-		   unsigned long *imbalance, enum cpu_idle_type idle,
+-		   int *sd_idle, const struct cpumask *cpus, int *balance)
+-{
+-	struct sched_group *busiest = NULL, *this = NULL, *group = sd->groups;
+-	unsigned long max_load, avg_load, total_load, this_load, total_pwr;
+-	unsigned long max_pull;
+-	unsigned long busiest_load_per_task, busiest_nr_running;
+-	unsigned long this_load_per_task, this_nr_running;
+-	int load_idx, group_imb = 0;
++struct sd_lb_stats {
++	struct sched_group *busiest; /* Busiest group in this sd */
++	struct sched_group *this;  /* Local group in this sd */
++	unsigned long total_load;  /* Total load of all groups in sd */
++	unsigned long total_pwr;   /*	Total power of all groups in sd */
++	unsigned long avg_load;	   /* Average load across all groups in sd */
++
++	/** Statistics of this group */
++	unsigned long this_load;
++	unsigned long this_load_per_task;
++	unsigned long this_nr_running;
++
++	/* Statistics of the busiest group */
++	unsigned long max_load;
++	unsigned long busiest_load_per_task;
++	unsigned long busiest_nr_running;
++
++	int group_imb; /* Is there imbalance in this sd */
+ #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+-	int power_savings_balance = 1;
+-	unsigned long leader_nr_running = 0, min_load_per_task = 0;
+-	unsigned long min_nr_running = ULONG_MAX;
+-	struct sched_group *group_min = NULL, *group_leader = NULL;
++	int power_savings_balance; /* Is powersave balance needed for this sd */
++	struct sched_group *group_min; /* Least loaded group in sd */
++	struct sched_group *group_leader; /* Group which relieves group_min */
++	unsigned long min_load_per_task; /* load_per_task in group_min */
++	unsigned long leader_nr_running; /* Nr running of group_leader */
++	unsigned long min_nr_running; /* Nr running of group_min */
+ #endif
++};
+ 
+-	max_load = this_load = total_load = total_pwr = 0;
+-	busiest_load_per_task = busiest_nr_running = 0;
+-	this_load_per_task = this_nr_running = 0;
++/**
++ * sg_lb_stats - stats of a sched_group required for load_balancing
++ */
++struct sg_lb_stats {
++	unsigned long avg_load; /*Avg load across the CPUs of the group */
++	unsigned long group_load; /* Total load over the CPUs of the group */
++	unsigned long sum_nr_running; /* Nr tasks running in the group */
++	unsigned long sum_weighted_load; /* Weighted load of group's tasks */
++	unsigned long group_capacity;
++	int group_imb; /* Is there an imbalance in the group ? */
++};
++
++/**
++ * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
++ * @group: The group whose first cpu is to be returned.
++ */
++static inline unsigned int group_first_cpu(struct sched_group *group)
++{
++	return cpumask_first(sched_group_cpus(group));
++}
++
++/**
++ * get_sd_load_idx - Obtain the load index for a given sched domain.
++ * @sd: The sched_domain whose load_idx is to be obtained.
++ * @idle: The Idle status of the CPU for whose sd load_icx is obtained.
++ */
++static inline int get_sd_load_idx(struct sched_domain *sd,
++					enum cpu_idle_type idle)
++{
++	int load_idx;
+ 
+-	if (idle == CPU_NOT_IDLE)
++	switch (idle) {
++	case CPU_NOT_IDLE:
+ 		load_idx = sd->busy_idx;
+-	else if (idle == CPU_NEWLY_IDLE)
++		break;
++
++	case CPU_NEWLY_IDLE:
+ 		load_idx = sd->newidle_idx;
+-	else
++		break;
++	default:
+ 		load_idx = sd->idle_idx;
++		break;
++	}
+ 
+-	do {
+-		unsigned long load, group_capacity, max_cpu_load, min_cpu_load;
+-		int local_group;
+-		int i;
+-		int __group_imb = 0;
+-		unsigned int balance_cpu = -1, first_idle_cpu = 0;
+-		unsigned long sum_nr_running, sum_weighted_load;
+-		unsigned long sum_avg_load_per_task;
+-		unsigned long avg_load_per_task;
++	return load_idx;
++}
+ 
+-		local_group = cpumask_test_cpu(this_cpu,
+-					       sched_group_cpus(group));
+ 
+-		if (local_group)
+-			balance_cpu = cpumask_first(sched_group_cpus(group));
++#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
++/**
++ * init_sd_power_savings_stats - Initialize power savings statistics for
++ * the given sched_domain, during load balancing.
++ *
++ * @sd: Sched domain whose power-savings statistics are to be initialized.
++ * @sds: Variable containing the statistics for sd.
++ * @idle: Idle status of the CPU at which we're performing load-balancing.
++ */
++static inline void init_sd_power_savings_stats(struct sched_domain *sd,
++	struct sd_lb_stats *sds, enum cpu_idle_type idle)
++{
++	/*
++	 * Busy processors will not participate in power savings
++	 * balance.
++	 */
++	if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
++		sds->power_savings_balance = 0;
++	else {
++		sds->power_savings_balance = 1;
++		sds->min_nr_running = ULONG_MAX;
++		sds->leader_nr_running = 0;
++	}
++}
+ 
+-		/* Tally up the load of all CPUs in the group */
+-		sum_weighted_load = sum_nr_running = avg_load = 0;
+-		sum_avg_load_per_task = avg_load_per_task = 0;
++/**
++ * update_sd_power_savings_stats - Update the power saving stats for a
++ * sched_domain while performing load balancing.
++ *
++ * @group: sched_group belonging to the sched_domain under consideration.
++ * @sds: Variable containing the statistics of the sched_domain
++ * @local_group: Does group contain the CPU for which we're performing
++ * 		load balancing ?
++ * @sgs: Variable containing the statistics of the group.
++ */
++static inline void update_sd_power_savings_stats(struct sched_group *group,
++	struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
++{
+ 
+-		max_cpu_load = 0;
+-		min_cpu_load = ~0UL;
++	if (!sds->power_savings_balance)
++		return;
+ 
+-		for_each_cpu_and(i, sched_group_cpus(group), cpus) {
+-			struct rq *rq = cpu_rq(i);
++	/*
++	 * If the local group is idle or completely loaded
++	 * no need to do power savings balance at this domain
++	 */
++	if (local_group && (sds->this_nr_running >= sgs->group_capacity ||
++				!sds->this_nr_running))
++		sds->power_savings_balance = 0;
+ 
+-			if (*sd_idle && rq->nr_running)
+-				*sd_idle = 0;
++	/*
++	 * If a group is already running at full capacity or idle,
++	 * don't include that group in power savings calculations
++	 */
++	if (!sds->power_savings_balance ||
++		sgs->sum_nr_running >= sgs->group_capacity ||
++		!sgs->sum_nr_running)
++		return;
+ 
+-			/* Bias balancing toward cpus of our domain */
+-			if (local_group) {
+-				if (idle_cpu(i) && !first_idle_cpu) {
+-					first_idle_cpu = 1;
+-					balance_cpu = i;
+-				}
++	/*
++	 * Calculate the group which has the least non-idle load.
++	 * This is the group from where we need to pick up the load
++	 * for saving power
++	 */
++	if ((sgs->sum_nr_running < sds->min_nr_running) ||
++	    (sgs->sum_nr_running == sds->min_nr_running &&
++	     group_first_cpu(group) > group_first_cpu(sds->group_min))) {
++		sds->group_min = group;
++		sds->min_nr_running = sgs->sum_nr_running;
++		sds->min_load_per_task = sgs->sum_weighted_load /
++						sgs->sum_nr_running;
++	}
+ 
+-				load = target_load(i, load_idx);
+-			} else {
+-				load = source_load(i, load_idx);
+-				if (load > max_cpu_load)
+-					max_cpu_load = load;
+-				if (min_cpu_load > load)
+-					min_cpu_load = load;
+-			}
++	/*
++	 * Calculate the group which is almost near its
++	 * capacity but still has some space to pick up some load
++	 * from other group and save more power
++	 */
++	if (sgs->sum_nr_running > sgs->group_capacity - 1)
++		return;
+ 
+-			avg_load += load;
+-			sum_nr_running += rq->nr_running;
+-			sum_weighted_load += weighted_cpuload(i);
++	if (sgs->sum_nr_running > sds->leader_nr_running ||
++	    (sgs->sum_nr_running == sds->leader_nr_running &&
++	     group_first_cpu(group) < group_first_cpu(sds->group_leader))) {
++		sds->group_leader = group;
++		sds->leader_nr_running = sgs->sum_nr_running;
++	}
++}
+ 
+-			sum_avg_load_per_task += cpu_avg_load_per_task(i);
+-		}
++/**
++ * check_power_save_busiest_group - Check if we have potential to perform
++ *	some power-savings balance. If yes, set the busiest group to be
++ *	the least loaded group in the sched_domain, so that it's CPUs can
++ *	be put to idle.
++ *
++ * @sds: Variable containing the statistics of the sched_domain
++ *	under consideration.
++ * @this_cpu: Cpu at which we're currently performing load-balancing.
++ * @imbalance: Variable to store the imbalance.
++ *
++ * Returns 1 if there is potential to perform power-savings balance.
++ * Else returns 0.
++ */
++static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
++					int this_cpu, unsigned long *imbalance)
++{
++	if (!sds->power_savings_balance)
++		return 0;
+ 
+-		/*
+-		 * First idle cpu or the first cpu(busiest) in this sched group
+-		 * is eligible for doing load balancing at this and above
+-		 * domains. In the newly idle case, we will allow all the cpu's
+-		 * to do the newly idle load balance.
+-		 */
+-		if (idle != CPU_NEWLY_IDLE && local_group &&
+-		    balance_cpu != this_cpu && balance) {
+-			*balance = 0;
+-			goto ret;
+-		}
++	if (sds->this != sds->group_leader ||
++			sds->group_leader == sds->group_min)
++		return 0;
+ 
+-		total_load += avg_load;
+-		total_pwr += group->__cpu_power;
++	*imbalance = sds->min_load_per_task;
++	sds->busiest = sds->group_min;
+ 
+-		/* Adjust by relative CPU power of the group */
+-		avg_load = sg_div_cpu_power(group,
+-				avg_load * SCHED_LOAD_SCALE);
++	if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
++		cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
++			group_first_cpu(sds->group_leader);
++	}
+ 
++	return 1;
+ 
+-		/*
+-		 * Consider the group unbalanced when the imbalance is larger
+-		 * than the average weight of two tasks.
+-		 *
+-		 * APZ: with cgroup the avg task weight can vary wildly and
+-		 *      might not be a suitable number - should we keep a
+-		 *      normalized nr_running number somewhere that negates
+-		 *      the hierarchy?
+-		 */
+-		avg_load_per_task = sg_div_cpu_power(group,
+-				sum_avg_load_per_task * SCHED_LOAD_SCALE);
++}
++#else /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
++static inline void init_sd_power_savings_stats(struct sched_domain *sd,
++	struct sd_lb_stats *sds, enum cpu_idle_type idle)
++{
++	return;
++}
++
++static inline void update_sd_power_savings_stats(struct sched_group *group,
++	struct sd_lb_stats *sds, int local_group, struct sg_lb_stats *sgs)
++{
++	return;
++}
++
++static inline int check_power_save_busiest_group(struct sd_lb_stats *sds,
++					int this_cpu, unsigned long *imbalance)
++{
++	return 0;
++}
++#endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
++
++
++/**
++ * update_sg_lb_stats - Update sched_group's statistics for load balancing.
++ * @group: sched_group whose statistics are to be updated.
++ * @this_cpu: Cpu for which load balance is currently performed.
++ * @idle: Idle status of this_cpu
++ * @load_idx: Load index of sched_domain of this_cpu for load calc.
++ * @sd_idle: Idle status of the sched_domain containing group.
++ * @local_group: Does group contain this_cpu.
++ * @cpus: Set of cpus considered for load balancing.
++ * @balance: Should we balance.
++ * @sgs: variable to hold the statistics for this group.
++ */
++static inline void update_sg_lb_stats(struct sched_group *group, int this_cpu,
++			enum cpu_idle_type idle, int load_idx, int *sd_idle,
++			int local_group, const struct cpumask *cpus,
++			int *balance, struct sg_lb_stats *sgs)
++{
++	unsigned long load, max_cpu_load, min_cpu_load;
++	int i;
++	unsigned int balance_cpu = -1, first_idle_cpu = 0;
++	unsigned long sum_avg_load_per_task;
++	unsigned long avg_load_per_task;
++
++	if (local_group)
++		balance_cpu = group_first_cpu(group);
+ 
+-		if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
+-			__group_imb = 1;
++	/* Tally up the load of all CPUs in the group */
++	sum_avg_load_per_task = avg_load_per_task = 0;
++	max_cpu_load = 0;
++	min_cpu_load = ~0UL;
+ 
+-		group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
++	for_each_cpu_and(i, sched_group_cpus(group), cpus) {
++		struct rq *rq = cpu_rq(i);
+ 
++		if (*sd_idle && rq->nr_running)
++			*sd_idle = 0;
++
++		/* Bias balancing toward cpus of our domain */
+ 		if (local_group) {
+-			this_load = avg_load;
+-			this = group;
+-			this_nr_running = sum_nr_running;
+-			this_load_per_task = sum_weighted_load;
+-		} else if (avg_load > max_load &&
+-			   (sum_nr_running > group_capacity || __group_imb)) {
+-			max_load = avg_load;
+-			busiest = group;
+-			busiest_nr_running = sum_nr_running;
+-			busiest_load_per_task = sum_weighted_load;
+-			group_imb = __group_imb;
++			if (idle_cpu(i) && !first_idle_cpu) {
++				first_idle_cpu = 1;
++				balance_cpu = i;
++			}
++
++			load = target_load(i, load_idx);
++		} else {
++			load = source_load(i, load_idx);
++			if (load > max_cpu_load)
++				max_cpu_load = load;
++			if (min_cpu_load > load)
++				min_cpu_load = load;
+ 		}
+ 
+-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+-		/*
+-		 * Busy processors will not participate in power savings
+-		 * balance.
+-		 */
+-		if (idle == CPU_NOT_IDLE ||
+-				!(sd->flags & SD_POWERSAVINGS_BALANCE))
+-			goto group_next;
++		sgs->group_load += load;
++		sgs->sum_nr_running += rq->nr_running;
++		sgs->sum_weighted_load += weighted_cpuload(i);
+ 
+-		/*
+-		 * If the local group is idle or completely loaded
+-		 * no need to do power savings balance at this domain
+-		 */
+-		if (local_group && (this_nr_running >= group_capacity ||
+-				    !this_nr_running))
+-			power_savings_balance = 0;
++		sum_avg_load_per_task += cpu_avg_load_per_task(i);
++	}
+ 
+-		/*
+-		 * If a group is already running at full capacity or idle,
+-		 * don't include that group in power savings calculations
+-		 */
+-		if (!power_savings_balance || sum_nr_running >= group_capacity
+-		    || !sum_nr_running)
+-			goto group_next;
++	/*
++	 * First idle cpu or the first cpu(busiest) in this sched group
++	 * is eligible for doing load balancing at this and above
++	 * domains. In the newly idle case, we will allow all the cpu's
++	 * to do the newly idle load balance.
++	 */
++	if (idle != CPU_NEWLY_IDLE && local_group &&
++	    balance_cpu != this_cpu && balance) {
++		*balance = 0;
++		return;
++	}
+ 
+-		/*
+-		 * Calculate the group which has the least non-idle load.
+-		 * This is the group from where we need to pick up the load
+-		 * for saving power
+-		 */
+-		if ((sum_nr_running < min_nr_running) ||
+-		    (sum_nr_running == min_nr_running &&
+-		     cpumask_first(sched_group_cpus(group)) >
+-		     cpumask_first(sched_group_cpus(group_min)))) {
+-			group_min = group;
+-			min_nr_running = sum_nr_running;
+-			min_load_per_task = sum_weighted_load /
+-						sum_nr_running;
+-		}
++	/* Adjust by relative CPU power of the group */
++	sgs->avg_load = sg_div_cpu_power(group,
++			sgs->group_load * SCHED_LOAD_SCALE);
+ 
+-		/*
+-		 * Calculate the group which is almost near its
+-		 * capacity but still has some space to pick up some load
+-		 * from other group and save more power
+-		 */
+-		if (sum_nr_running <= group_capacity - 1) {
+-			if (sum_nr_running > leader_nr_running ||
+-			    (sum_nr_running == leader_nr_running &&
+-			     cpumask_first(sched_group_cpus(group)) <
+-			     cpumask_first(sched_group_cpus(group_leader)))) {
+-				group_leader = group;
+-				leader_nr_running = sum_nr_running;
+-			}
++
++	/*
++	 * Consider the group unbalanced when the imbalance is larger
++	 * than the average weight of two tasks.
++	 *
++	 * APZ: with cgroup the avg task weight can vary wildly and
++	 *      might not be a suitable number - should we keep a
++	 *      normalized nr_running number somewhere that negates
++	 *      the hierarchy?
++	 */
++	avg_load_per_task = sg_div_cpu_power(group,
++			sum_avg_load_per_task * SCHED_LOAD_SCALE);
++
++	if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task)
++		sgs->group_imb = 1;
++
++	sgs->group_capacity = group->__cpu_power / SCHED_LOAD_SCALE;
++
++}
++
++/**
++ * update_sd_lb_stats - Update sched_group's statistics for load balancing.
++ * @sd: sched_domain whose statistics are to be updated.
++ * @this_cpu: Cpu for which load balance is currently performed.
++ * @idle: Idle status of this_cpu
++ * @sd_idle: Idle status of the sched_domain containing group.
++ * @cpus: Set of cpus considered for load balancing.
++ * @balance: Should we balance.
++ * @sds: variable to hold the statistics for this sched_domain.
++ */
++static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu,
++			enum cpu_idle_type idle, int *sd_idle,
++			const struct cpumask *cpus, int *balance,
++			struct sd_lb_stats *sds)
++{
++	struct sched_group *group = sd->groups;
++	struct sg_lb_stats sgs;
++	int load_idx;
++
++	init_sd_power_savings_stats(sd, sds, idle);
++	load_idx = get_sd_load_idx(sd, idle);
++
++	do {
++		int local_group;
++
++		local_group = cpumask_test_cpu(this_cpu,
++					       sched_group_cpus(group));
++		memset(&sgs, 0, sizeof(sgs));
++		update_sg_lb_stats(group, this_cpu, idle, load_idx, sd_idle,
++				local_group, cpus, balance, &sgs);
++
++		if (local_group && balance && !(*balance))
++			return;
++
++		sds->total_load += sgs.group_load;
++		sds->total_pwr += group->__cpu_power;
++
++		if (local_group) {
++			sds->this_load = sgs.avg_load;
++			sds->this = group;
++			sds->this_nr_running = sgs.sum_nr_running;
++			sds->this_load_per_task = sgs.sum_weighted_load;
++		} else if (sgs.avg_load > sds->max_load &&
++			   (sgs.sum_nr_running > sgs.group_capacity ||
++				sgs.group_imb)) {
++			sds->max_load = sgs.avg_load;
++			sds->busiest = group;
++			sds->busiest_nr_running = sgs.sum_nr_running;
++			sds->busiest_load_per_task = sgs.sum_weighted_load;
++			sds->group_imb = sgs.group_imb;
+ 		}
+-group_next:
+-#endif
++
++		update_sd_power_savings_stats(group, sds, local_group, &sgs);
+ 		group = group->next;
+ 	} while (group != sd->groups);
+ 
+-	if (!busiest || this_load >= max_load || busiest_nr_running == 0)
+-		goto out_balanced;
+-
+-	avg_load = (SCHED_LOAD_SCALE * total_load) / total_pwr;
++}
+ 
+-	if (this_load >= avg_load ||
+-			100*max_load <= sd->imbalance_pct*this_load)
+-		goto out_balanced;
++/**
++ * fix_small_imbalance - Calculate the minor imbalance that exists
++ *			amongst the groups of a sched_domain, during
++ *			load balancing.
++ * @sds: Statistics of the sched_domain whose imbalance is to be calculated.
++ * @this_cpu: The cpu at whose sched_domain we're performing load-balance.
++ * @imbalance: Variable to store the imbalance.
++ */
++static inline void fix_small_imbalance(struct sd_lb_stats *sds,
++				int this_cpu, unsigned long *imbalance)
++{
++	unsigned long tmp, pwr_now = 0, pwr_move = 0;
++	unsigned int imbn = 2;
++
++	if (sds->this_nr_running) {
++		sds->this_load_per_task /= sds->this_nr_running;
++		if (sds->busiest_load_per_task >
++				sds->this_load_per_task)
++			imbn = 1;
++	} else
++		sds->this_load_per_task =
++			cpu_avg_load_per_task(this_cpu);
+ 
+-	busiest_load_per_task /= busiest_nr_running;
+-	if (group_imb)
+-		busiest_load_per_task = min(busiest_load_per_task, avg_load);
++	if (sds->max_load - sds->this_load + sds->busiest_load_per_task >=
++			sds->busiest_load_per_task * imbn) {
++		*imbalance = sds->busiest_load_per_task;
++		return;
++	}
+ 
+ 	/*
+-	 * We're trying to get all the cpus to the average_load, so we don't
+-	 * want to push ourselves above the average load, nor do we wish to
+-	 * reduce the max loaded cpu below the average load, as either of these
+-	 * actions would just result in more rebalancing later, and ping-pong
+-	 * tasks around. Thus we look for the minimum possible imbalance.
+-	 * Negative imbalances (*we* are more loaded than anyone else) will
+-	 * be counted as no imbalance for these purposes -- we can't fix that
+-	 * by pulling tasks to us. Be careful of negative numbers as they'll
+-	 * appear as very large values with unsigned longs.
+-	 */
+-	if (max_load <= busiest_load_per_task)
+-		goto out_balanced;
++	 * OK, we don't have enough imbalance to justify moving tasks,
++	 * however we may be able to increase total CPU power used by
++	 * moving them.
++	 */
++
++	pwr_now += sds->busiest->__cpu_power *
++			min(sds->busiest_load_per_task, sds->max_load);
++	pwr_now += sds->this->__cpu_power *
++			min(sds->this_load_per_task, sds->this_load);
++	pwr_now /= SCHED_LOAD_SCALE;
++
++	/* Amount of load we'd subtract */
++	tmp = sg_div_cpu_power(sds->busiest,
++			sds->busiest_load_per_task * SCHED_LOAD_SCALE);
++	if (sds->max_load > tmp)
++		pwr_move += sds->busiest->__cpu_power *
++			min(sds->busiest_load_per_task, sds->max_load - tmp);
++
++	/* Amount of load we'd add */
++	if (sds->max_load * sds->busiest->__cpu_power <
++		sds->busiest_load_per_task * SCHED_LOAD_SCALE)
++		tmp = sg_div_cpu_power(sds->this,
++			sds->max_load * sds->busiest->__cpu_power);
++	else
++		tmp = sg_div_cpu_power(sds->this,
++			sds->busiest_load_per_task * SCHED_LOAD_SCALE);
++	pwr_move += sds->this->__cpu_power *
++			min(sds->this_load_per_task, sds->this_load + tmp);
++	pwr_move /= SCHED_LOAD_SCALE;
++
++	/* Move if we gain throughput */
++	if (pwr_move > pwr_now)
++		*imbalance = sds->busiest_load_per_task;
++}
+ 
++/**
++ * calculate_imbalance - Calculate the amount of imbalance present within the
++ *			 groups of a given sched_domain during load balance.
++ * @sds: statistics of the sched_domain whose imbalance is to be calculated.
++ * @this_cpu: Cpu for which currently load balance is being performed.
++ * @imbalance: The variable to store the imbalance.
++ */
++static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu,
++		unsigned long *imbalance)
++{
++	unsigned long max_pull;
+ 	/*
+ 	 * In the presence of smp nice balancing, certain scenarios can have
+ 	 * max load less than avg load(as we skip the groups at or below
+ 	 * its cpu_power, while calculating max_load..)
+ 	 */
+-	if (max_load < avg_load) {
++	if (sds->max_load < sds->avg_load) {
+ 		*imbalance = 0;
+-		goto small_imbalance;
++		return fix_small_imbalance(sds, this_cpu, imbalance);
+ 	}
+ 
+ 	/* Don't want to pull so many tasks that a group would go idle */
+-	max_pull = min(max_load - avg_load, max_load - busiest_load_per_task);
++	max_pull = min(sds->max_load - sds->avg_load,
++			sds->max_load - sds->busiest_load_per_task);
+ 
+ 	/* How much load to actually move to equalise the imbalance */
+-	*imbalance = min(max_pull * busiest->__cpu_power,
+-				(avg_load - this_load) * this->__cpu_power)
++	*imbalance = min(max_pull * sds->busiest->__cpu_power,
++		(sds->avg_load - sds->this_load) * sds->this->__cpu_power)
+ 			/ SCHED_LOAD_SCALE;
+ 
+ 	/*
+@@ -3341,78 +3978,110 @@ group_next:
+ 	 * a think about bumping its value to force at least one task to be
+ 	 * moved
+ 	 */
+-	if (*imbalance < busiest_load_per_task) {
+-		unsigned long tmp, pwr_now, pwr_move;
+-		unsigned int imbn;
+-
+-small_imbalance:
+-		pwr_move = pwr_now = 0;
+-		imbn = 2;
+-		if (this_nr_running) {
+-			this_load_per_task /= this_nr_running;
+-			if (busiest_load_per_task > this_load_per_task)
+-				imbn = 1;
+-		} else
+-			this_load_per_task = cpu_avg_load_per_task(this_cpu);
++	if (*imbalance < sds->busiest_load_per_task)
++		return fix_small_imbalance(sds, this_cpu, imbalance);
+ 
+-		if (max_load - this_load + busiest_load_per_task >=
+-					busiest_load_per_task * imbn) {
+-			*imbalance = busiest_load_per_task;
+-			return busiest;
+-		}
++}
++/******* find_busiest_group() helpers end here *********************/
+ 
+-		/*
+-		 * OK, we don't have enough imbalance to justify moving tasks,
+-		 * however we may be able to increase total CPU power used by
+-		 * moving them.
+-		 */
+-
+-		pwr_now += busiest->__cpu_power *
+-				min(busiest_load_per_task, max_load);
+-		pwr_now += this->__cpu_power *
+-				min(this_load_per_task, this_load);
+-		pwr_now /= SCHED_LOAD_SCALE;
+-
+-		/* Amount of load we'd subtract */
+-		tmp = sg_div_cpu_power(busiest,
+-				busiest_load_per_task * SCHED_LOAD_SCALE);
+-		if (max_load > tmp)
+-			pwr_move += busiest->__cpu_power *
+-				min(busiest_load_per_task, max_load - tmp);
+-
+-		/* Amount of load we'd add */
+-		if (max_load * busiest->__cpu_power <
+-				busiest_load_per_task * SCHED_LOAD_SCALE)
+-			tmp = sg_div_cpu_power(this,
+-					max_load * busiest->__cpu_power);
+-		else
+-			tmp = sg_div_cpu_power(this,
+-				busiest_load_per_task * SCHED_LOAD_SCALE);
+-		pwr_move += this->__cpu_power *
+-				min(this_load_per_task, this_load + tmp);
+-		pwr_move /= SCHED_LOAD_SCALE;
+-
+-		/* Move if we gain throughput */
+-		if (pwr_move > pwr_now)
+-			*imbalance = busiest_load_per_task;
+-	}
++/**
++ * find_busiest_group - Returns the busiest group within the sched_domain
++ * if there is an imbalance. If there isn't an imbalance, and
++ * the user has opted for power-savings, it returns a group whose
++ * CPUs can be put to idle by rebalancing those tasks elsewhere, if
++ * such a group exists.
++ *
++ * Also calculates the amount of weighted load which should be moved
++ * to restore balance.
++ *
++ * @sd: The sched_domain whose busiest group is to be returned.
++ * @this_cpu: The cpu for which load balancing is currently being performed.
++ * @imbalance: Variable which stores amount of weighted load which should
++ *		be moved to restore balance/put a group to idle.
++ * @idle: The idle status of this_cpu.
++ * @sd_idle: The idleness of sd
++ * @cpus: The set of CPUs under consideration for load-balancing.
++ * @balance: Pointer to a variable indicating if this_cpu
++ *	is the appropriate cpu to perform load balancing at this_level.
++ *
++ * Returns:	- the busiest group if imbalance exists.
++ *		- If no imbalance and user has opted for power-savings balance,
++ *		   return the least loaded group whose CPUs can be
++ *		   put to idle by rebalancing its tasks onto our group.
++ */
++static struct sched_group *
++find_busiest_group(struct sched_domain *sd, int this_cpu,
++		   unsigned long *imbalance, enum cpu_idle_type idle,
++		   int *sd_idle, const struct cpumask *cpus, int *balance)
++{
++	struct sd_lb_stats sds;
+ 
+-	return busiest;
++	memset(&sds, 0, sizeof(sds));
+ 
+-out_balanced:
+-#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+-	if (idle == CPU_NOT_IDLE || !(sd->flags & SD_POWERSAVINGS_BALANCE))
++	/*
++	 * Compute the various statistics relavent for load balancing at
++	 * this level.
++	 */
++	update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus,
++					balance, &sds);
++
++	/* Cases where imbalance does not exist from POV of this_cpu */
++	/* 1) this_cpu is not the appropriate cpu to perform load balancing
++	 *    at this level.
++	 * 2) There is no busy sibling group to pull from.
++	 * 3) This group is the busiest group.
++	 * 4) This group is more busy than the avg busieness at this
++	 *    sched_domain.
++	 * 5) The imbalance is within the specified limit.
++	 * 6) Any rebalance would lead to ping-pong
++	 */
++	if (balance && !(*balance))
+ 		goto ret;
+ 
+-	if (this == group_leader && group_leader != group_min) {
+-		*imbalance = min_load_per_task;
+-		if (sched_mc_power_savings >= POWERSAVINGS_BALANCE_WAKEUP) {
+-			cpu_rq(this_cpu)->rd->sched_mc_preferred_wakeup_cpu =
+-				cpumask_first(sched_group_cpus(group_leader));
+-		}
+-		return group_min;
+-	}
+-#endif
++	if (!sds.busiest || sds.busiest_nr_running == 0)
++		goto out_balanced;
++
++	if (sds.this_load >= sds.max_load)
++		goto out_balanced;
++
++	sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr;
++
++	if (sds.this_load >= sds.avg_load)
++		goto out_balanced;
++
++	if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load)
++		goto out_balanced;
++
++	sds.busiest_load_per_task /= sds.busiest_nr_running;
++	if (sds.group_imb)
++		sds.busiest_load_per_task =
++			min(sds.busiest_load_per_task, sds.avg_load);
++
++	/*
++	 * We're trying to get all the cpus to the average_load, so we don't
++	 * want to push ourselves above the average load, nor do we wish to
++	 * reduce the max loaded cpu below the average load, as either of these
++	 * actions would just result in more rebalancing later, and ping-pong
++	 * tasks around. Thus we look for the minimum possible imbalance.
++	 * Negative imbalances (*we* are more loaded than anyone else) will
++	 * be counted as no imbalance for these purposes -- we can't fix that
++	 * by pulling tasks to us. Be careful of negative numbers as they'll
++	 * appear as very large values with unsigned longs.
++	 */
++	if (sds.max_load <= sds.busiest_load_per_task)
++		goto out_balanced;
++
++	/* Looks like there is an imbalance. Compute it */
++	calculate_imbalance(&sds, this_cpu, imbalance);
++	return sds.busiest;
++
++out_balanced:
++	/*
++	 * There is no obvious imbalance. But check if we can do some balancing
++	 * to save power.
++	 */
++	if (check_power_save_busiest_group(&sds, this_cpu, imbalance))
++		return sds.busiest;
+ ret:
+ 	*imbalance = 0;
+ 	return NULL;
+@@ -3456,19 +4125,23 @@ find_busiest_queue(struct sched_group *g
+  */
+ #define MAX_PINNED_INTERVAL	512
+ 
++/* Working cpumask for load_balance and load_balance_newidle. */
++static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
++
+ /*
+  * Check this_cpu to ensure it is balanced within domain. Attempt to move
+  * tasks if there is an imbalance.
+  */
+ static int load_balance(int this_cpu, struct rq *this_rq,
+ 			struct sched_domain *sd, enum cpu_idle_type idle,
+-			int *balance, struct cpumask *cpus)
++			int *balance)
+ {
+ 	int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
+ 	struct sched_group *group;
+ 	unsigned long imbalance;
+ 	struct rq *busiest;
+ 	unsigned long flags;
++	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
+ 
+ 	cpumask_setall(cpus);
+ 
+@@ -3623,8 +4296,7 @@ out:
+  * this_rq is locked.
+  */
+ static int
+-load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
+-			struct cpumask *cpus)
++load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
+ {
+ 	struct sched_group *group;
+ 	struct rq *busiest = NULL;
+@@ -3632,6 +4304,7 @@ load_balance_newidle(int this_cpu, struc
+ 	int ld_moved = 0;
+ 	int sd_idle = 0;
+ 	int all_pinned = 0;
++	struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
+ 
+ 	cpumask_setall(cpus);
+ 
+@@ -3772,10 +4445,6 @@ static void idle_balance(int this_cpu, s
+ 	struct sched_domain *sd;
+ 	int pulled_task = 0;
+ 	unsigned long next_balance = jiffies + HZ;
+-	cpumask_var_t tmpmask;
+-
+-	if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
+-		return;
+ 
+ 	for_each_domain(this_cpu, sd) {
+ 		unsigned long interval;
+@@ -3786,7 +4455,7 @@ static void idle_balance(int this_cpu, s
+ 		if (sd->flags & SD_BALANCE_NEWIDLE)
+ 			/* If we've pulled tasks over stop searching: */
+ 			pulled_task = load_balance_newidle(this_cpu, this_rq,
+-							   sd, tmpmask);
++							   sd);
+ 
+ 		interval = msecs_to_jiffies(sd->balance_interval);
+ 		if (time_after(next_balance, sd->last_balance + interval))
+@@ -3801,7 +4470,6 @@ static void idle_balance(int this_cpu, s
+ 		 */
+ 		this_rq->next_balance = next_balance;
+ 	}
+-	free_cpumask_var(tmpmask);
+ }
+ 
+ /*
+@@ -3951,11 +4619,6 @@ static void rebalance_domains(int cpu, e
+ 	unsigned long next_balance = jiffies + 60*HZ;
+ 	int update_next_balance = 0;
+ 	int need_serialize;
+-	cpumask_var_t tmp;
+-
+-	/* Fails alloc?  Rebalancing probably not a priority right now. */
+-	if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
+-		return;
+ 
+ 	for_each_domain(cpu, sd) {
+ 		if (!(sd->flags & SD_LOAD_BALANCE))
+@@ -3980,7 +4643,7 @@ static void rebalance_domains(int cpu, e
+ 		}
+ 
+ 		if (time_after_eq(jiffies, sd->last_balance + interval)) {
+-			if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
++			if (load_balance(cpu, rq, sd, idle, &balance)) {
+ 				/*
+ 				 * We've pulled tasks over so either we're no
+ 				 * longer idle, or one of our SMT siblings is
+@@ -4014,8 +4677,6 @@ out:
+ 	 */
+ 	if (likely(update_next_balance))
+ 		rq->next_balance = next_balance;
+-
+-	free_cpumask_var(tmp);
+ }
+ 
+ /*
+@@ -4025,7 +4686,7 @@ out:
+  */
+ static void run_rebalance_domains(struct softirq_action *h)
+ {
+-	int this_cpu = smp_processor_id();
++	int this_cpu = raw_smp_processor_id();
+ 	struct rq *this_rq = cpu_rq(this_cpu);
+ 	enum cpu_idle_type idle = this_rq->idle_at_tick ?
+ 						CPU_IDLE : CPU_NOT_IDLE;
+@@ -4065,6 +4726,11 @@ static void run_rebalance_domains(struct
+ #endif
+ }
+ 
++static inline int on_null_domain(int cpu)
++{
++	return !rcu_dereference(cpu_rq(cpu)->sd);
++}
++
+ /*
+  * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
+  *
+@@ -4122,7 +4788,9 @@ static inline void trigger_load_balance(
+ 	    cpumask_test_cpu(cpu, nohz.cpu_mask))
+ 		return;
+ #endif
+-	if (time_after_eq(jiffies, rq->next_balance))
++	/* Don't need to rebalance while attached to NULL domain */
++	if (time_after_eq(jiffies, rq->next_balance) &&
++	    likely(!on_null_domain(cpu)))
+ 		raise_softirq(SCHED_SOFTIRQ);
+ }
+ 
+@@ -4161,6 +4829,29 @@ static u64 do_task_delta_exec(struct tas
+ 	return ns;
+ }
+ 
++unsigned long long __task_delta_exec(struct task_struct *p, int update)
++{
++	s64 delta_exec;
++	struct rq *rq;
++
++	rq = task_rq(p);
++	WARN_ON_ONCE(!runqueue_is_locked());
++	WARN_ON_ONCE(!task_current(rq, p));
++
++	if (update)
++		update_rq_clock(rq);
++
++	delta_exec = rq->clock - p->se.exec_start;
++
++	WARN_ON_ONCE(delta_exec < 0);
++
++	return delta_exec;
++}
++
++/*
++ * Return any ns on the sched_clock that have not yet been banked in
++ * @p in case that task is currently running.
++ */
+ unsigned long long task_delta_exec(struct task_struct *p)
+ {
+ 	unsigned long flags;
+@@ -4235,7 +4926,9 @@ void account_user_time(struct task_struc
+ 
+ 	/* Add user time to cpustat. */
+ 	tmp = cputime_to_cputime64(cputime);
+-	if (TASK_NICE(p) > 0)
++	if (rt_task(p))
++		cpustat->user_rt = cputime64_add(cpustat->user_rt, tmp);
++	else if (TASK_NICE(p) > 0)
+ 		cpustat->nice = cputime64_add(cpustat->nice, tmp);
+ 	else
+ 		cpustat->user = cputime64_add(cpustat->user, tmp);
+@@ -4293,10 +4986,12 @@ void account_system_time(struct task_str
+ 
+ 	/* Add system time to cpustat. */
+ 	tmp = cputime_to_cputime64(cputime);
+-	if (hardirq_count() - hardirq_offset)
++	if (hardirq_count() - hardirq_offset || (p->flags & PF_HARDIRQ))
+ 		cpustat->irq = cputime64_add(cpustat->irq, tmp);
+-	else if (softirq_count())
++	else if (softirq_count() || (p->flags & PF_SOFTIRQ))
+ 		cpustat->softirq = cputime64_add(cpustat->softirq, tmp);
++	else if (rt_task(p))
++		cpustat->system_rt = cputime64_add(cpustat->system_rt, tmp);
+ 	else
+ 		cpustat->system = cputime64_add(cpustat->system, tmp);
+ 
+@@ -4449,10 +5144,14 @@ void scheduler_tick(void)
+ 
+ 	sched_clock_tick();
+ 
++	BUG_ON(!irqs_disabled());
++
+ 	spin_lock(&rq->lock);
+ 	update_rq_clock(rq);
+ 	update_cpu_load(rq);
+-	curr->sched_class->task_tick(rq, curr, 0);
++	if (curr != rq->idle && curr->se.on_rq)
++		curr->sched_class->task_tick(rq, curr, 0);
++	perf_counter_task_tick(curr, cpu);
+ 	spin_unlock(&rq->lock);
+ 
+ #ifdef CONFIG_SMP
+@@ -4461,10 +5160,7 @@ void scheduler_tick(void)
+ #endif
+ }
+ 
+-#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
+-				defined(CONFIG_PREEMPT_TRACER))
+-
+-static inline unsigned long get_parent_ip(unsigned long addr)
++unsigned long notrace get_parent_ip(unsigned long addr)
+ {
+ 	if (in_lock_functions(addr)) {
+ 		addr = CALLER_ADDR2;
+@@ -4474,6 +5170,9 @@ static inline unsigned long get_parent_i
+ 	return addr;
+ }
+ 
++#if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
++				defined(CONFIG_PREEMPT_TRACER))
++
+ void __kprobes add_preempt_count(int val)
+ {
+ #ifdef CONFIG_DEBUG_PREEMPT
+@@ -4527,8 +5226,8 @@ static noinline void __schedule_bug(stru
+ {
+ 	struct pt_regs *regs = get_irq_regs();
+ 
+-	printk(KERN_ERR "BUG: scheduling while atomic: %s/%d/0x%08x\n",
+-		prev->comm, prev->pid, preempt_count());
++	printk(KERN_ERR "BUG: scheduling while atomic: %s/0x%08x/%d, CPU#%d\n",
++	       prev->comm, preempt_count(), prev->pid, smp_processor_id());
+ 
+ 	debug_show_held_locks(prev);
+ 	print_modules();
+@@ -4546,12 +5245,14 @@ static noinline void __schedule_bug(stru
+  */
+ static inline void schedule_debug(struct task_struct *prev)
+ {
++//	WARN_ON(system_state == SYSTEM_BOOTING);
++
+ 	/*
+ 	 * Test if we are atomic. Since do_exit() needs to call into
+ 	 * schedule() atomically, we ignore that path for now.
+ 	 * Otherwise, whine if we are scheduling when we should not be.
+ 	 */
+-	if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
++	if (unlikely(in_atomic() && !prev->exit_state))
+ 		__schedule_bug(prev);
+ 
+ 	profile_hit(SCHED_PROFILING, __builtin_return_address(0));
+@@ -4565,11 +5266,33 @@ static inline void schedule_debug(struct
+ #endif
+ }
+ 
++static void put_prev_task(struct rq *rq, struct task_struct *prev)
++{
++	if (prev->state == TASK_RUNNING) {
++		u64 runtime = prev->se.sum_exec_runtime;
++
++		runtime -= prev->se.prev_sum_exec_runtime;
++		runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
++
++		/*
++		 * In order to avoid avg_overlap growing stale when we are
++		 * indeed overlapping and hence not getting put to sleep, grow
++		 * the avg_overlap on preemption.
++		 *
++		 * We use the average preemption runtime because that
++		 * correlates to the amount of cache footprint a task can
++		 * build up.
++		 */
++		update_avg(&prev->se.avg_overlap, runtime);
++	}
++	prev->sched_class->put_prev_task(rq, prev);
++}
++
+ /*
+  * Pick up the highest-prio task:
+  */
+ static inline struct task_struct *
+-pick_next_task(struct rq *rq, struct task_struct *prev)
++pick_next_task(struct rq *rq)
+ {
+ 	const struct sched_class *class;
+ 	struct task_struct *p;
+@@ -4600,15 +5323,13 @@ pick_next_task(struct rq *rq, struct tas
+ /*
+  * schedule() is the main scheduler function.
+  */
+-asmlinkage void __sched schedule(void)
++asmlinkage void __sched __schedule(void)
+ {
+ 	struct task_struct *prev, *next;
+ 	unsigned long *switch_count;
+ 	struct rq *rq;
+ 	int cpu;
+ 
+-need_resched:
+-	preempt_disable();
+ 	cpu = smp_processor_id();
+ 	rq = cpu_rq(cpu);
+ 	rcu_qsctr_inc(cpu);
+@@ -4616,10 +5337,11 @@ need_resched:
+ 	switch_count = &prev->nivcsw;
+ 
+ 	release_kernel_lock(prev);
+-need_resched_nonpreemptible:
+ 
+ 	schedule_debug(prev);
+ 
++	preempt_disable();
++
+ 	if (sched_feat(HRTICK))
+ 		hrtick_clear(rq);
+ 
+@@ -4627,52 +5349,158 @@ need_resched_nonpreemptible:
+ 	update_rq_clock(rq);
+ 	clear_tsk_need_resched(prev);
+ 
+-	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
++	if (!(prev->state & TASK_RUNNING_MUTEX) && prev->state &&
++	    !(preempt_count() & PREEMPT_ACTIVE)) {
+ 		if (unlikely(signal_pending_state(prev->state, prev)))
+ 			prev->state = TASK_RUNNING;
+-		else
++		else {
++			touch_softlockup_watchdog();
+ 			deactivate_task(rq, prev, 1);
++		}
+ 		switch_count = &prev->nvcsw;
+ 	}
+ 
++	if (preempt_count() & PREEMPT_ACTIVE)
++		sub_preempt_count(PREEMPT_ACTIVE);
++
+ #ifdef CONFIG_SMP
+ 	if (prev->sched_class->pre_schedule)
+ 		prev->sched_class->pre_schedule(rq, prev);
+ #endif
+ 
+-	if (unlikely(!rq->nr_running))
+-		idle_balance(cpu, rq);
+-
+-	prev->sched_class->put_prev_task(rq, prev);
+-	next = pick_next_task(rq, prev);
++	if (unlikely(!rq->nr_running))
++		idle_balance(cpu, rq);
++
++	put_prev_task(rq, prev);
++	next = pick_next_task(rq);
++
++	if (likely(prev != next)) {
++		sched_info_switch(prev, next);
++		perf_counter_task_sched_out(prev, cpu);
++
++		rq->nr_switches++;
++		rq->curr = next;
++		++*switch_count;
++
++		context_switch(rq, prev, next); /* unlocks the rq */
++		/*
++		 * the context switch might have flipped the stack from under
++		 * us, hence refresh the local variables.
++		 */
++		cpu = smp_processor_id();
++		rq = cpu_rq(cpu);
++		__preempt_enable_no_resched();
++	} else {
++		__preempt_enable_no_resched();
++		spin_unlock(&rq->lock);
++	}
++
++	reacquire_kernel_lock(current);
++}
++
++asmlinkage void __sched schedule(void)
++{
++need_resched:
++	local_irq_disable();
++	__schedule();
++	local_irq_enable();
++
++	if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
++		goto need_resched;
++}
++EXPORT_SYMBOL(schedule);
++
++#if defined(CONFIG_SMP) && !defined(CONFIG_PREEMPT_RT)
++/*
++ * Look out! "owner" is an entirely speculative pointer
++ * access and not reliable.
++ */
++int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
++{
++	unsigned int cpu;
++	struct rq *rq;
++
++	if (!sched_feat(OWNER_SPIN))
++		return 0;
++
++#ifdef CONFIG_DEBUG_PAGEALLOC
++	/*
++	 * Need to access the cpu field knowing that
++	 * DEBUG_PAGEALLOC could have unmapped it if
++	 * the mutex owner just released it and exited.
++	 */
++	if (probe_kernel_address(&owner->cpu, cpu))
++		goto out;
++#else
++	cpu = owner->cpu;
++#endif
++
++	/*
++	 * Even if the access succeeded (likely case),
++	 * the cpu field may no longer be valid.
++	 */
++	if (cpu >= nr_cpumask_bits)
++		goto out;
+ 
+-	if (likely(prev != next)) {
+-		sched_info_switch(prev, next);
++	/*
++	 * We need to validate that we can do a
++	 * get_cpu() and that we have the percpu area.
++	 */
++	if (!cpu_online(cpu))
++		goto out;
+ 
+-		rq->nr_switches++;
+-		rq->curr = next;
+-		++*switch_count;
++	rq = cpu_rq(cpu);
+ 
+-		context_switch(rq, prev, next); /* unlocks the rq */
++	for (;;) {
+ 		/*
+-		 * the context switch might have flipped the stack from under
+-		 * us, hence refresh the local variables.
++		 * Owner changed, break to re-assess state.
+ 		 */
+-		cpu = smp_processor_id();
+-		rq = cpu_rq(cpu);
+-	} else
+-		spin_unlock_irq(&rq->lock);
++		if (lock->owner != owner)
++			break;
+ 
+-	if (unlikely(reacquire_kernel_lock(current) < 0))
+-		goto need_resched_nonpreemptible;
++		/*
++		 * Is that owner really running on that cpu?
++		 */
++		if (task_thread_info(rq->curr) != owner || need_resched())
++			return 0;
+ 
+-	preempt_enable_no_resched();
+-	if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
+-		goto need_resched;
++		cpu_relax();
++	}
++out:
++	return 1;
+ }
+-EXPORT_SYMBOL(schedule);
++#endif
+ 
+ #ifdef CONFIG_PREEMPT
++
++/*
++ * Global flag to turn preemption off on a CONFIG_PREEMPT kernel:
++ */
++int kernel_preemption = 1;
++
++static int __init preempt_setup (char *str)
++{
++	if (!strncmp(str, "off", 3)) {
++		if (kernel_preemption) {
++			printk(KERN_INFO "turning off kernel preemption!\n");
++			kernel_preemption = 0;
++		}
++		return 1;
++	}
++	if (!strncmp(str, "on", 2)) {
++		if (!kernel_preemption) {
++			printk(KERN_INFO "turning on kernel preemption!\n");
++			kernel_preemption = 1;
++		}
++		return 1;
++	}
++	get_option(&str, &kernel_preemption);
++
++	return 1;
++}
++
++__setup("preempt=", preempt_setup);
++
+ /*
+  * this is the entry point to schedule() from in-kernel preemption
+  * off of preempt_enable. Kernel preemptions off return from interrupt
+@@ -4681,7 +5509,11 @@ EXPORT_SYMBOL(schedule);
+ asmlinkage void __sched preempt_schedule(void)
+ {
+ 	struct thread_info *ti = current_thread_info();
++	struct task_struct *task = current;
++	int saved_lock_depth;
+ 
++	if (!kernel_preemption)
++		return;
+ 	/*
+ 	 * If there is a non-zero preempt_count or interrupts are disabled,
+ 	 * we do not want to preempt the current task. Just return..
+@@ -4690,45 +5522,71 @@ asmlinkage void __sched preempt_schedule
+ 		return;
+ 
+ 	do {
++		local_irq_disable();
+ 		add_preempt_count(PREEMPT_ACTIVE);
+-		schedule();
+-		sub_preempt_count(PREEMPT_ACTIVE);
++
++		/*
++		 * We keep the big kernel semaphore locked, but we
++		 * clear ->lock_depth so that schedule() doesnt
++		 * auto-release the semaphore:
++		 */
++		saved_lock_depth = task->lock_depth;
++		task->lock_depth = -1;
++		__schedule();
++		task->lock_depth = saved_lock_depth;
++		local_irq_enable();
+ 
+ 		/*
+ 		 * Check again in case we missed a preemption opportunity
+ 		 * between schedule and now.
+ 		 */
+ 		barrier();
+-	} while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
++	} while (need_resched());
+ }
+ EXPORT_SYMBOL(preempt_schedule);
+ 
+ /*
+- * this is the entry point to schedule() from kernel preemption
+- * off of irq context.
+- * Note, that this is called and return with irqs disabled. This will
+- * protect us against recursive calling from irq.
++ * this is is the entry point for the IRQ return path. Called with
++ * interrupts disabled.  To avoid infinite irq-entry recursion problems
++ * with fast-paced IRQ sources we do all of this carefully to never
++ * enable interrupts again.
+  */
+ asmlinkage void __sched preempt_schedule_irq(void)
+ {
+ 	struct thread_info *ti = current_thread_info();
++	struct task_struct *task = current;
++	int saved_lock_depth;
+ 
+-	/* Catch callers which need to be fixed */
+-	BUG_ON(ti->preempt_count || !irqs_disabled());
++	if (!kernel_preemption)
++		return;
++	/*
++	 * If there is a non-zero preempt_count then just return.
++	 * (interrupts are disabled)
++	 */
++	if (unlikely(ti->preempt_count))
++		return;
+ 
+ 	do {
++		local_irq_disable();
+ 		add_preempt_count(PREEMPT_ACTIVE);
+-		local_irq_enable();
+-		schedule();
++
++		/*
++		 * We keep the big kernel semaphore locked, but we
++		 * clear ->lock_depth so that schedule() doesnt
++		 * auto-release the semaphore:
++		 */
++		saved_lock_depth = task->lock_depth;
++		task->lock_depth = -1;
++		__schedule();
+ 		local_irq_disable();
+-		sub_preempt_count(PREEMPT_ACTIVE);
++		task->lock_depth = saved_lock_depth;
+ 
+ 		/*
+ 		 * Check again in case we missed a preemption opportunity
+ 		 * between schedule and now.
+ 		 */
+ 		barrier();
+-	} while (unlikely(test_thread_flag(TIF_NEED_RESCHED)));
++	} while (need_resched());
+ }
+ 
+ #endif /* CONFIG_PREEMPT */
+@@ -4736,7 +5594,7 @@ asmlinkage void __sched preempt_schedule
+ int default_wake_function(wait_queue_t *curr, unsigned mode, int sync,
+ 			  void *key)
+ {
+-	return try_to_wake_up(curr->private, mode, sync);
++	return try_to_wake_up(curr->private, mode, sync, 0);
+ }
+ EXPORT_SYMBOL(default_wake_function);
+ 
+@@ -4776,7 +5634,7 @@ void __wake_up(wait_queue_head_t *q, uns
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&q->lock, flags);
+-	__wake_up_common(q, mode, nr_exclusive, 0, key);
++	__wake_up_common(q, mode, nr_exclusive, 1, key);
+ 	spin_unlock_irqrestore(&q->lock, flags);
+ }
+ EXPORT_SYMBOL(__wake_up);
+@@ -4835,7 +5693,7 @@ void complete(struct completion *x)
+ 
+ 	spin_lock_irqsave(&x->wait.lock, flags);
+ 	x->done++;
+-	__wake_up_common(&x->wait, TASK_NORMAL, 1, 0, NULL);
++	__wake_up_common(&x->wait, TASK_NORMAL, 1, 1, NULL);
+ 	spin_unlock_irqrestore(&x->wait.lock, flags);
+ }
+ EXPORT_SYMBOL(complete);
+@@ -4852,7 +5710,7 @@ void complete_all(struct completion *x)
+ 
+ 	spin_lock_irqsave(&x->wait.lock, flags);
+ 	x->done += UINT_MAX/2;
+-	__wake_up_common(&x->wait, TASK_NORMAL, 0, 0, NULL);
++	__wake_up_common(&x->wait, TASK_NORMAL, 0, 1, NULL);
+ 	spin_unlock_irqrestore(&x->wait.lock, flags);
+ }
+ EXPORT_SYMBOL(complete_all);
+@@ -5066,19 +5924,19 @@ long __sched sleep_on_timeout(wait_queue
+ }
+ EXPORT_SYMBOL(sleep_on_timeout);
+ 
+-#ifdef CONFIG_RT_MUTEXES
+-
+ /*
+- * rt_mutex_setprio - set the current priority of a task
++ * task_setprio - set the current priority of a task
+  * @p: task
+  * @prio: prio value (kernel-internal form)
+  *
+  * This function changes the 'effective' priority of a task. It does
+  * not touch ->normal_prio like __setscheduler().
+  *
+- * Used by the rt_mutex code to implement priority inheritance logic.
++ * Used by the rt_mutex code to implement priority inheritance logic
++ * and by rcupreempt-boost to boost priorities of tasks sleeping
++ * with rcu locks.
+  */
+-void rt_mutex_setprio(struct task_struct *p, int prio)
++void task_setprio(struct task_struct *p, int prio)
+ {
+ 	unsigned long flags;
+ 	int oldprio, on_rq, running;
+@@ -5088,6 +5946,25 @@ void rt_mutex_setprio(struct task_struct
+ 	BUG_ON(prio < 0 || prio > MAX_PRIO);
+ 
+ 	rq = task_rq_lock(p, &flags);
++
++	/*
++	 * Idle task boosting is a nono in general. There is one
++	 * exception, when NOHZ is active:
++	 *
++	 * The idle task calls get_next_timer_interrupt() and holds
++	 * the timer wheel base->lock on the CPU and another CPU wants
++	 * to access the timer (probably to cancel it). We can safely
++	 * ignore the boosting request, as the idle CPU runs this code
++	 * with interrupts disabled and will complete the lock
++	 * protected section without being interrupted. So there is no
++	 * real need to boost.
++	 */
++	if (unlikely(p == rq->idle)) {
++		WARN_ON(p != rq->curr);
++		WARN_ON(p->pi_blocked_on);
++		goto out_unlock;
++	}
++
+ 	update_rq_clock(rq);
+ 
+ 	oldprio = p->prio;
+@@ -5105,6 +5982,8 @@ void rt_mutex_setprio(struct task_struct
+ 
+ 	p->prio = prio;
+ 
++	trace_sched_task_setprio(rq, p, oldprio);
++
+ 	if (running)
+ 		p->sched_class->set_curr_task(rq);
+ 	if (on_rq) {
+@@ -5112,11 +5991,11 @@ void rt_mutex_setprio(struct task_struct
+ 
+ 		check_class_changed(rq, p, prev_class, oldprio, running);
+ 	}
++
++out_unlock:
+ 	task_rq_unlock(rq, &flags);
+ }
+ 
+-#endif
+-
+ void set_user_nice(struct task_struct *p, long nice)
+ {
+ 	int old_prio, delta, on_rq;
+@@ -5202,7 +6081,7 @@ SYSCALL_DEFINE1(nice, int, increment)
+ 	if (increment > 40)
+ 		increment = 40;
+ 
+-	nice = PRIO_TO_NICE(current->static_prio) + increment;
++	nice = TASK_NICE(current) + increment;
+ 	if (nice < -20)
+ 		nice = -20;
+ 	if (nice > 19)
+@@ -5751,19 +6630,53 @@ SYSCALL_DEFINE0(sched_yield)
+ 	 * Since we are going to call schedule() anyway, there's
+ 	 * no need to preempt or enable interrupts:
+ 	 */
+-	__release(rq->lock);
+-	spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
+-	_raw_spin_unlock(&rq->lock);
+-	preempt_enable_no_resched();
++	spin_unlock_no_resched(&rq->lock);
+ 
+-	schedule();
++	__schedule();
++
++	local_irq_enable();
++	preempt_check_resched();
+ 
+ 	return 0;
+ }
+ 
++#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT)
++void __might_sleep(char *file, int line)
++{
++#ifdef in_atomic
++	static unsigned long prev_jiffy;	/* ratelimiting */
++
++	if ((!in_atomic() && !irqs_disabled()) ||
++		    system_state != SYSTEM_RUNNING || oops_in_progress)
++		return;
++
++	if (debug_direct_keyboard && hardirq_count())
++		return;
++
++	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
++		return;
++	prev_jiffy = jiffies;
++
++	printk(KERN_ERR
++		"BUG: sleeping function called from invalid context at %s:%d\n",
++			file, line);
++	printk(KERN_ERR
++		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
++			in_atomic(), irqs_disabled(),
++			current->pid, current->comm);
++
++	debug_show_held_locks(current);
++	if (irqs_disabled())
++		print_irqtrace_events(current);
++	dump_stack();
++#endif
++}
++EXPORT_SYMBOL(__might_sleep);
++#endif
++
+ static void __cond_resched(void)
+ {
+-#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
++#if defined(CONFIG_DEBUG_SPINLOCK_SLEEP) || defined(CONFIG_DEBUG_PREEMPT)
+ 	__might_sleep(__FILE__, __LINE__);
+ #endif
+ 	/*
+@@ -5772,10 +6685,11 @@ static void __cond_resched(void)
+ 	 * cond_resched() call.
+ 	 */
+ 	do {
++		local_irq_disable();
+ 		add_preempt_count(PREEMPT_ACTIVE);
+-		schedule();
+-		sub_preempt_count(PREEMPT_ACTIVE);
++		__schedule();
+ 	} while (need_resched());
++	local_irq_enable();
+ }
+ 
+ int __sched _cond_resched(void)
+@@ -5797,13 +6711,13 @@ EXPORT_SYMBOL(_cond_resched);
+  * operations here to prevent schedule() from being called twice (once via
+  * spin_unlock(), once by hand).
+  */
+-int cond_resched_lock(spinlock_t *lock)
++int __cond_resched_raw_spinlock(raw_spinlock_t *lock)
+ {
+ 	int resched = need_resched() && system_state == SYSTEM_RUNNING;
+ 	int ret = 0;
+ 
+ 	if (spin_needbreak(lock) || resched) {
+-		spin_unlock(lock);
++		spin_unlock_no_resched(lock);
+ 		if (resched && need_resched())
+ 			__cond_resched();
+ 		else
+@@ -5813,12 +6727,36 @@ int cond_resched_lock(spinlock_t *lock)
+ 	}
+ 	return ret;
+ }
+-EXPORT_SYMBOL(cond_resched_lock);
++EXPORT_SYMBOL(__cond_resched_raw_spinlock);
+ 
+-int __sched cond_resched_softirq(void)
++#ifdef CONFIG_PREEMPT_RT
++
++int __cond_resched_spinlock(spinlock_t *lock)
+ {
+-	BUG_ON(!in_softirq());
++	int resched = need_resched() && system_state == SYSTEM_RUNNING;
+ 
++	if (spin_needbreak(lock) || resched) {
++		spin_unlock_no_resched(lock);
++		__cond_resched();
++		spin_lock(lock);
++		return 1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(__cond_resched_spinlock);
++
++#endif
++
++/*
++ * Voluntarily preempt a process context that has softirqs disabled:
++ */
++int __sched cond_resched_softirq(void)
++{
++#ifndef CONFIG_PREEMPT_SOFTIRQS
++	WARN_ON_ONCE(!in_softirq());
++	if (!in_softirq())
++		return 0;
++#endif
+ 	if (need_resched() && system_state == SYSTEM_RUNNING) {
+ 		local_bh_enable();
+ 		__cond_resched();
+@@ -5829,17 +6767,102 @@ int __sched cond_resched_softirq(void)
+ }
+ EXPORT_SYMBOL(cond_resched_softirq);
+ 
++/*
++ * Voluntarily preempt a softirq context (possible with softirq threading):
++ */
++int __sched cond_resched_softirq_context(void)
++{
++	WARN_ON_ONCE(!in_softirq());
++
++	if (softirq_need_resched() && system_state == SYSTEM_RUNNING) {
++		raw_local_irq_disable();
++		_local_bh_enable();
++		raw_local_irq_enable();
++		__cond_resched();
++		local_bh_disable();
++		return 1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(cond_resched_softirq_context);
++
++/*
++ * Preempt a hardirq context if necessary (possible with hardirq threading):
++ */
++int cond_resched_hardirq_context(void)
++{
++	WARN_ON_ONCE(!in_irq());
++	WARN_ON_ONCE(!irqs_disabled());
++
++	if (hardirq_need_resched()) {
++#ifndef CONFIG_PREEMPT_RT
++		irq_exit();
++#endif
++		local_irq_enable();
++		__cond_resched();
++#ifndef CONFIG_PREEMPT_RT
++		local_irq_disable();
++		__irq_enter();
++#endif
++
++		return 1;
++	}
++	return 0;
++}
++EXPORT_SYMBOL(cond_resched_hardirq_context);
++
++#ifdef CONFIG_PREEMPT_VOLUNTARY
++
++int voluntary_preemption = 1;
++
++EXPORT_SYMBOL(voluntary_preemption);
++
++static int __init voluntary_preempt_setup (char *str)
++{
++	if (!strncmp(str, "off", 3))
++		voluntary_preemption = 0;
++	else
++		get_option(&str, &voluntary_preemption);
++	if (!voluntary_preemption)
++		printk("turning off voluntary preemption!\n");
++
++	return 1;
++}
++
++__setup("voluntary-preempt=", voluntary_preempt_setup);
++
++#endif
++
+ /**
+  * yield - yield the current processor to other threads.
+  *
+  * This is a shortcut for kernel-space yielding - it marks the
+  * thread runnable and calls sys_sched_yield().
+  */
+-void __sched yield(void)
++void __sched __yield(void)
+ {
+ 	set_current_state(TASK_RUNNING);
+ 	sys_sched_yield();
+ }
++
++void __sched yield(void)
++{
++	static int once = 1;
++
++	/*
++	 * it's a bug to rely on yield() with RT priorities. We print
++	 * the first occurance after bootup ... this will still give
++	 * us an idea about the scope of the problem, without spamming
++	 * the syslog:
++	 */
++	if (once && rt_task(current)) {
++		once = 0;
++		printk(KERN_ERR "BUG: %s:%d RT task yield()-ing!\n",
++			current->comm, current->pid);
++		dump_stack();
++	}
++	__yield();
++}
+ EXPORT_SYMBOL(yield);
+ 
+ /*
+@@ -5987,26 +7010,26 @@ void sched_show_task(struct task_struct 
+ 	unsigned state;
+ 
+ 	state = p->state ? __ffs(p->state) + 1 : 0;
+-	printk(KERN_INFO "%-13.13s %c", p->comm,
+-		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
++	printk("%-13.13s %c (%03lx) [%p]", p->comm,
++	       state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?',
++	       (unsigned long) p->state, p);
+ #if BITS_PER_LONG == 32
+-	if (state == TASK_RUNNING)
++	if (0 && (state == TASK_RUNNING))
+ 		printk(KERN_CONT " running  ");
+ 	else
+ 		printk(KERN_CONT " %08lx ", thread_saved_pc(p));
+ #else
+-	if (state == TASK_RUNNING)
++	if (0 && (state == TASK_RUNNING))
+ 		printk(KERN_CONT "  running task    ");
+ 	else
+ 		printk(KERN_CONT " %016lx ", thread_saved_pc(p));
+ #endif
++	if (task_curr(p))
++		printk("[curr] ");
++	else if (p->se.on_rq)
++		printk("[on rq #%d] ", task_cpu(p));
+ #ifdef CONFIG_DEBUG_STACK_USAGE
+-	{
+-		unsigned long *n = end_of_stack(p);
+-		while (!*n)
+-			n++;
+-		free = (unsigned long)n - (unsigned long)end_of_stack(p);
+-	}
++	free = stack_not_used(p);
+ #endif
+ 	printk(KERN_CONT "%5lu %5d %6d\n", free,
+ 		task_pid_nr(p), task_pid_nr(p->real_parent));
+@@ -6017,6 +7040,7 @@ void sched_show_task(struct task_struct 
+ void show_state_filter(unsigned long state_filter)
+ {
+ 	struct task_struct *g, *p;
++	int do_unlock = 1;
+ 
+ #if BITS_PER_LONG == 32
+ 	printk(KERN_INFO
+@@ -6025,7 +7049,16 @@ void show_state_filter(unsigned long sta
+ 	printk(KERN_INFO
+ 		"  task                        PC stack   pid father\n");
+ #endif
++#ifdef CONFIG_PREEMPT_RT
++	if (!read_trylock(&tasklist_lock)) {
++		printk("hm, tasklist_lock write-locked.\n");
++		printk("ignoring ...\n");
++		do_unlock = 0;
++	}
++#else
+ 	read_lock(&tasklist_lock);
++#endif
++
+ 	do_each_thread(g, p) {
+ 		/*
+ 		 * reset the NMI-timeout, listing all files on a slow
+@@ -6041,7 +7074,8 @@ void show_state_filter(unsigned long sta
+ #ifdef CONFIG_SCHED_DEBUG
+ 	sysrq_sched_debug_show();
+ #endif
+-	read_unlock(&tasklist_lock);
++	if (do_unlock)
++		read_unlock(&tasklist_lock);
+ 	/*
+ 	 * Only show locks if all tasks are dumped:
+ 	 */
+@@ -6077,17 +7111,14 @@ void __cpuinit init_idle(struct task_str
+ 	__set_task_cpu(idle, cpu);
+ 
+ 	rq->curr = rq->idle = idle;
+-#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
++#if defined(CONFIG_SMP)
+ 	idle->oncpu = 1;
+ #endif
+ 	spin_unlock_irqrestore(&rq->lock, flags);
+ 
+ 	/* Set the preempt count _outside_ the spinlocks! */
+-#if defined(CONFIG_PREEMPT)
+-	task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
+-#else
+ 	task_thread_info(idle)->preempt_count = 0;
+-#endif
++
+ 	/*
+ 	 * The idle tasks have their own, simple scheduling class:
+ 	 */
+@@ -6216,11 +7247,18 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+ {
+ 	struct rq *rq_dest, *rq_src;
++	unsigned long flags;
+ 	int ret = 0, on_rq;
+ 
+ 	if (unlikely(!cpu_active(dest_cpu)))
+ 		return ret;
+ 
++	 /*
++	  * PREEMPT_RT: this relies on write_lock_irq(&tasklist_lock)
++	  * disabling interrupts - which on PREEMPT_RT does not do:
++	  */
++	local_irq_save(flags);
++
+ 	rq_src = cpu_rq(src_cpu);
+ 	rq_dest = cpu_rq(dest_cpu);
+ 
+@@ -6245,6 +7283,8 @@ done:
+ 	ret = 1;
+ fail:
+ 	double_rq_unlock(rq_src, rq_dest);
++	local_irq_restore(flags);
++
+ 	return ret;
+ }
+ 
+@@ -6442,7 +7482,11 @@ void idle_task_exit(void)
+ 
+ 	if (mm != &init_mm)
+ 		switch_mm(mm, &init_mm, current);
++#ifdef CONFIG_PREEMPT_RT
++	mmdrop_delayed(mm);
++#else
+ 	mmdrop(mm);
++#endif
+ }
+ 
+ /* called under rq->lock with disabled interrupts */
+@@ -6480,7 +7524,7 @@ static void migrate_dead_tasks(unsigned 
+ 		if (!rq->nr_running)
+ 			break;
+ 		update_rq_clock(rq);
+-		next = pick_next_task(rq, rq->curr);
++		next = pick_next_task(rq);
+ 		if (!next)
+ 			break;
+ 		next->sched_class->put_prev_task(rq, next);
+@@ -6488,6 +7532,14 @@ static void migrate_dead_tasks(unsigned 
+ 
+ 	}
+ }
++
++/*
++ * remove the tasks which were accounted by rq from calc_load_tasks.
++ */
++static void calc_global_load_remove(struct rq *rq)
++{
++	atomic_long_sub(rq->calc_load_active, &calc_load_tasks);
++}
+ #endif /* CONFIG_HOTPLUG_CPU */
+ 
+ #if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
+@@ -6722,6 +7774,8 @@ migration_call(struct notifier_block *nf
+ 		/* Update our root-domain */
+ 		rq = cpu_rq(cpu);
+ 		spin_lock_irqsave(&rq->lock, flags);
++		rq->calc_load_update = calc_load_update;
++		rq->calc_load_active = 0;
+ 		if (rq->rd) {
+ 			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
+ 
+@@ -6761,7 +7815,7 @@ migration_call(struct notifier_block *nf
+ 		cpuset_unlock();
+ 		migrate_nr_uninterruptible(rq);
+ 		BUG_ON(rq->nr_running != 0);
+-
++		calc_global_load_remove(rq);
+ 		/*
+ 		 * No need to migrate the tasks: it was best-effort if
+ 		 * they didn't take sched_hotcpu_mutex. Just wake up
+@@ -7311,7 +8365,7 @@ cpu_to_core_group(int cpu, const struct 
+ {
+ 	int group;
+ 
+-	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
++	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
+ 	group = cpumask_first(mask);
+ 	if (sg)
+ 		*sg = &per_cpu(sched_group_core, group).sg;
+@@ -7340,7 +8394,7 @@ cpu_to_phys_group(int cpu, const struct 
+ 	cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
+ 	group = cpumask_first(mask);
+ #elif defined(CONFIG_SCHED_SMT)
+-	cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
++	cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
+ 	group = cpumask_first(mask);
+ #else
+ 	group = cpu;
+@@ -7683,7 +8737,7 @@ static int __build_sched_domains(const s
+ 		SD_INIT(sd, SIBLING);
+ 		set_domain_attribute(sd, attr);
+ 		cpumask_and(sched_domain_span(sd),
+-			    &per_cpu(cpu_sibling_map, i), cpu_map);
++			    topology_thread_cpumask(i), cpu_map);
+ 		sd->parent = p;
+ 		p->child = sd;
+ 		cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
+@@ -7694,7 +8748,7 @@ static int __build_sched_domains(const s
+ 	/* Set up CPU (sibling) groups */
+ 	for_each_cpu(i, cpu_map) {
+ 		cpumask_and(this_sibling_map,
+-			    &per_cpu(cpu_sibling_map, i), cpu_map);
++			    topology_thread_cpumask(i), cpu_map);
+ 		if (i != cpumask_first(this_sibling_map))
+ 			continue;
+ 
+@@ -8275,11 +9329,15 @@ static void init_rt_rq(struct rt_rq *rt_
+ 	__set_bit(MAX_RT_PRIO, array->bitmap);
+ 
+ #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
+-	rt_rq->highest_prio = MAX_RT_PRIO;
++	rt_rq->highest_prio.curr = MAX_RT_PRIO;
++#ifdef CONFIG_SMP
++	rt_rq->highest_prio.next = MAX_RT_PRIO;
++#endif
+ #endif
+ #ifdef CONFIG_SMP
+ 	rt_rq->rt_nr_migratory = 0;
+ 	rt_rq->overloaded = 0;
++	plist_head_init(&rq->rt.pushable_tasks, &rq->lock);
+ #endif
+ 
+ 	rt_rq->rt_time = 0;
+@@ -8366,6 +9424,9 @@ void __init sched_init(void)
+ #ifdef CONFIG_USER_SCHED
+ 	alloc_size *= 2;
+ #endif
++#ifdef CONFIG_CPUMASK_OFFSTACK
++	alloc_size += num_possible_cpus() * cpumask_size();
++#endif
+ 	/*
+ 	 * As sched_init() is called before page_alloc is setup,
+ 	 * we use alloc_bootmem().
+@@ -8403,6 +9464,12 @@ void __init sched_init(void)
+ 		ptr += nr_cpu_ids * sizeof(void **);
+ #endif /* CONFIG_USER_SCHED */
+ #endif /* CONFIG_RT_GROUP_SCHED */
++#ifdef CONFIG_CPUMASK_OFFSTACK
++		for_each_possible_cpu(i) {
++			per_cpu(load_balance_tmpmask, i) = (void *)ptr;
++			ptr += cpumask_size();
++		}
++#endif /* CONFIG_CPUMASK_OFFSTACK */
+ 	}
+ 
+ #ifdef CONFIG_SMP
+@@ -8438,6 +9505,8 @@ void __init sched_init(void)
+ 		rq = cpu_rq(i);
+ 		spin_lock_init(&rq->lock);
+ 		rq->nr_running = 0;
++		rq->calc_load_active = 0;
++		rq->calc_load_update = jiffies + LOAD_FREQ;
+ 		init_cfs_rq(&rq->cfs, rq);
+ 		init_rt_rq(&rq->rt, rq);
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -8538,6 +9607,9 @@ void __init sched_init(void)
+ 	atomic_inc(&init_mm.mm_count);
+ 	enter_lazy_tlb(&init_mm, current);
+ 
++#ifdef CONFIG_PREEMPT_RT
++	printk("Real-Time Preemption Support (C) 2004-2007 Ingo Molnar\n");
++#endif
+ 	/*
+ 	 * Make us the idle thread. Technically, schedule() should not be
+ 	 * called from this thread, however somewhere below it might be,
+@@ -8545,6 +9617,9 @@ void __init sched_init(void)
+ 	 * when this runqueue becomes "idle".
+ 	 */
+ 	init_idle(current, smp_processor_id());
++
++	calc_load_update = jiffies + LOAD_FREQ;
++
+ 	/*
+ 	 * During early bootup we pretend to be a normal task:
+ 	 */
+@@ -8562,36 +9637,6 @@ void __init sched_init(void)
+ 	scheduler_running = 1;
+ }
+ 
+-#ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
+-void __might_sleep(char *file, int line)
+-{
+-#ifdef in_atomic
+-	static unsigned long prev_jiffy;	/* ratelimiting */
+-
+-	if ((!in_atomic() && !irqs_disabled()) ||
+-		    system_state != SYSTEM_RUNNING || oops_in_progress)
+-		return;
+-	if (time_before(jiffies, prev_jiffy + HZ) && prev_jiffy)
+-		return;
+-	prev_jiffy = jiffies;
+-
+-	printk(KERN_ERR
+-		"BUG: sleeping function called from invalid context at %s:%d\n",
+-			file, line);
+-	printk(KERN_ERR
+-		"in_atomic(): %d, irqs_disabled(): %d, pid: %d, name: %s\n",
+-			in_atomic(), irqs_disabled(),
+-			current->pid, current->comm);
+-
+-	debug_show_held_locks(current);
+-	if (irqs_disabled())
+-		print_irqtrace_events(current);
+-	dump_stack();
+-#endif
+-}
+-EXPORT_SYMBOL(__might_sleep);
+-#endif
+-
+ #ifdef CONFIG_MAGIC_SYSRQ
+ static void normalize_task(struct rq *rq, struct task_struct *p)
+ {
+@@ -9547,7 +10592,7 @@ cpuacct_destroy(struct cgroup_subsys *ss
+ 
+ static u64 cpuacct_cpuusage_read(struct cpuacct *ca, int cpu)
+ {
+-	u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
++	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+ 	u64 data;
+ 
+ #ifndef CONFIG_64BIT
+@@ -9566,7 +10611,7 @@ static u64 cpuacct_cpuusage_read(struct 
+ 
+ static void cpuacct_cpuusage_write(struct cpuacct *ca, int cpu, u64 val)
+ {
+-	u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
++	u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+ 
+ #ifndef CONFIG_64BIT
+ 	/*
+@@ -9655,16 +10700,20 @@ static void cpuacct_charge(struct task_s
+ 	struct cpuacct *ca;
+ 	int cpu;
+ 
+-	if (!cpuacct_subsys.active)
++	if (unlikely(!cpuacct_subsys.active))
+ 		return;
+ 
+ 	cpu = task_cpu(tsk);
++
++	rcu_read_lock();
+ 	ca = task_ca(tsk);
+ 
+-	for (; ca; ca = ca->parent) {
+-		u64 *cpuusage = percpu_ptr(ca->cpuusage, cpu);
++	do {
++		u64 *cpuusage = per_cpu_ptr(ca->cpuusage, cpu);
+ 		*cpuusage += cputime;
+-	}
++		ca = ca->parent;
++	} while (ca);
++	rcu_read_unlock();
+ }
+ 
+ struct cgroup_subsys cpuacct_subsys = {
+Index: linux-2.6-tip/kernel/sched_clock.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/sched_clock.c
++++ linux-2.6-tip/kernel/sched_clock.c
+@@ -24,11 +24,12 @@
+  * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
+  * consistent between cpus (never more than 2 jiffies difference).
+  */
+-#include <linux/sched.h>
+-#include <linux/percpu.h>
+ #include <linux/spinlock.h>
+-#include <linux/ktime.h>
++#include <linux/hardirq.h>
+ #include <linux/module.h>
++#include <linux/percpu.h>
++#include <linux/ktime.h>
++#include <linux/sched.h>
+ 
+ /*
+  * Scheduler clock - returns current time in nanosec units.
+@@ -43,6 +44,7 @@ unsigned long long __attribute__((weak))
+ static __read_mostly int sched_clock_running;
+ 
+ #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
++__read_mostly int sched_clock_stable;
+ 
+ struct sched_clock_data {
+ 	/*
+@@ -50,7 +52,7 @@ struct sched_clock_data {
+ 	 * from within instrumentation code so we dont want to do any
+ 	 * instrumentation ourselves.
+ 	 */
+-	raw_spinlock_t		lock;
++	__raw_spinlock_t	lock;
+ 
+ 	u64			tick_raw;
+ 	u64			tick_gtod;
+@@ -77,7 +79,7 @@ void sched_clock_init(void)
+ 	for_each_possible_cpu(cpu) {
+ 		struct sched_clock_data *scd = cpu_sdc(cpu);
+ 
+-		scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
++		scd->lock = (__raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ 		scd->tick_raw = 0;
+ 		scd->tick_gtod = ktime_now;
+ 		scd->clock = ktime_now;
+@@ -87,7 +89,7 @@ void sched_clock_init(void)
+ }
+ 
+ /*
+- * min,max except they take wrapping into account
++ * min, max except they take wrapping into account
+  */
+ 
+ static inline u64 wrap_min(u64 x, u64 y)
+@@ -111,15 +113,13 @@ static u64 __update_sched_clock(struct s
+ 	s64 delta = now - scd->tick_raw;
+ 	u64 clock, min_clock, max_clock;
+ 
+-	WARN_ON_ONCE(!irqs_disabled());
+-
+ 	if (unlikely(delta < 0))
+ 		delta = 0;
+ 
+ 	/*
+ 	 * scd->clock = clamp(scd->tick_gtod + delta,
+-	 * 		      max(scd->tick_gtod, scd->clock),
+-	 * 		      scd->tick_gtod + TICK_NSEC);
++	 *		      max(scd->tick_gtod, scd->clock),
++	 *		      scd->tick_gtod + TICK_NSEC);
+ 	 */
+ 
+ 	clock = scd->tick_gtod + delta;
+@@ -148,8 +148,20 @@ static void lock_double_clock(struct sch
+ 
+ u64 sched_clock_cpu(int cpu)
+ {
+-	struct sched_clock_data *scd = cpu_sdc(cpu);
+ 	u64 now, clock, this_clock, remote_clock;
++	struct sched_clock_data *scd;
++
++	if (sched_clock_stable)
++		return sched_clock();
++
++	scd = cpu_sdc(cpu);
++
++	/*
++	 * Normally this is not called in NMI context - but if it is,
++	 * trying to do any locking here is totally lethal.
++	 */
++	if (unlikely(in_nmi()))
++		return scd->clock;
+ 
+ 	if (unlikely(!sched_clock_running))
+ 		return 0ull;
+@@ -195,14 +207,18 @@ u64 sched_clock_cpu(int cpu)
+ 
+ void sched_clock_tick(void)
+ {
+-	struct sched_clock_data *scd = this_scd();
++	struct sched_clock_data *scd;
+ 	u64 now, now_gtod;
+ 
++	if (sched_clock_stable)
++		return;
++
+ 	if (unlikely(!sched_clock_running))
+ 		return;
+ 
+ 	WARN_ON_ONCE(!irqs_disabled());
+ 
++	scd = this_scd();
+ 	now_gtod = ktime_to_ns(ktime_get());
+ 	now = sched_clock();
+ 
+@@ -250,7 +266,7 @@ u64 sched_clock_cpu(int cpu)
+ 	return sched_clock();
+ }
+ 
+-#endif
++#endif /* CONFIG_HAVE_UNSTABLE_SCHED_CLOCK */
+ 
+ unsigned long long cpu_clock(int cpu)
+ {
+Index: linux-2.6-tip/kernel/sched_debug.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/sched_debug.c
++++ linux-2.6-tip/kernel/sched_debug.c
+@@ -272,7 +272,6 @@ static void print_cpu(struct seq_file *m
+ 	P(nr_switches);
+ 	P(nr_load_updates);
+ 	P(nr_uninterruptible);
+-	SEQ_printf(m, "  .%-30s: %lu\n", "jiffies", jiffies);
+ 	PN(next_balance);
+ 	P(curr->pid);
+ 	PN(clock);
+@@ -281,15 +280,25 @@ static void print_cpu(struct seq_file *m
+ 	P(cpu_load[2]);
+ 	P(cpu_load[3]);
+ 	P(cpu_load[4]);
++#ifdef CONFIG_PREEMPT_RT
++	/* Print rt related rq stats */
++	P(rt.rt_nr_running);
++	P(rt.rt_nr_uninterruptible);
++# ifdef CONFIG_SCHEDSTATS
++	P(rto_schedule);
++	P(rto_schedule_tail);
++	P(rto_wakeup);
++	P(rto_pulled);
++	P(rto_pushed);
++# endif
++#endif
++
+ #undef P
+ #undef PN
+ 
+ #ifdef CONFIG_SCHEDSTATS
+ #define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
+ 
+-	P(yld_exp_empty);
+-	P(yld_act_empty);
+-	P(yld_both_empty);
+ 	P(yld_count);
+ 
+ 	P(sched_switch);
+@@ -314,7 +323,7 @@ static int sched_debug_show(struct seq_f
+ 	u64 now = ktime_to_ns(ktime_get());
+ 	int cpu;
+ 
+-	SEQ_printf(m, "Sched Debug Version: v0.08, %s %.*s\n",
++	SEQ_printf(m, "Sched Debug Version: v0.09, %s %.*s\n",
+ 		init_utsname()->release,
+ 		(int)strcspn(init_utsname()->version, " "),
+ 		init_utsname()->version);
+@@ -325,6 +334,7 @@ static int sched_debug_show(struct seq_f
+ 	SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
+ #define PN(x) \
+ 	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
++	P(jiffies);
+ 	PN(sysctl_sched_latency);
+ 	PN(sysctl_sched_min_granularity);
+ 	PN(sysctl_sched_wakeup_granularity);
+@@ -397,6 +407,7 @@ void proc_sched_show_task(struct task_st
+ 	PN(se.vruntime);
+ 	PN(se.sum_exec_runtime);
+ 	PN(se.avg_overlap);
++	PN(se.avg_wakeup);
+ 
+ 	nr_switches = p->nvcsw + p->nivcsw;
+ 
+Index: linux-2.6-tip/kernel/sched_fair.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/sched_fair.c
++++ linux-2.6-tip/kernel/sched_fair.c
+@@ -1314,16 +1314,63 @@ out:
+ }
+ #endif /* CONFIG_SMP */
+ 
+-static unsigned long wakeup_gran(struct sched_entity *se)
++/*
++ * Adaptive granularity
++ *
++ * se->avg_wakeup gives the average time a task runs until it does a wakeup,
++ * with the limit of wakeup_gran -- when it never does a wakeup.
++ *
++ * So the smaller avg_wakeup is the faster we want this task to preempt,
++ * but we don't want to treat the preemptee unfairly and therefore allow it
++ * to run for at least the amount of time we'd like to run.
++ *
++ * NOTE: we use 2*avg_wakeup to increase the probability of actually doing one
++ *
++ * NOTE: we use *nr_running to scale with load, this nicely matches the
++ *       degrading latency on load.
++ */
++static unsigned long
++adaptive_gran(struct sched_entity *curr, struct sched_entity *se)
++{
++	u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
++	u64 expected_wakeup = 2*se->avg_wakeup * cfs_rq_of(se)->nr_running;
++	u64 gran = 0;
++
++	if (this_run < expected_wakeup)
++		gran = expected_wakeup - this_run;
++
++	return min_t(s64, gran, sysctl_sched_wakeup_granularity);
++}
++
++static unsigned long
++wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
+ {
+ 	unsigned long gran = sysctl_sched_wakeup_granularity;
+ 
++	if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN))
++		gran = adaptive_gran(curr, se);
++
+ 	/*
+-	 * More easily preempt - nice tasks, while not making it harder for
+-	 * + nice tasks.
++	 * Since its curr running now, convert the gran from real-time
++	 * to virtual-time in his units.
+ 	 */
+-	if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
+-		gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
++	if (sched_feat(ASYM_GRAN)) {
++		/*
++		 * By using 'se' instead of 'curr' we penalize light tasks, so
++		 * they get preempted easier. That is, if 'se' < 'curr' then
++		 * the resulting gran will be larger, therefore penalizing the
++		 * lighter, if otoh 'se' > 'curr' then the resulting gran will
++		 * be smaller, again penalizing the lighter task.
++		 *
++		 * This is especially important for buddies when the leftmost
++		 * task is higher priority than the buddy.
++		 */
++		if (unlikely(se->load.weight != NICE_0_LOAD))
++			gran = calc_delta_fair(gran, se);
++	} else {
++		if (unlikely(curr->load.weight != NICE_0_LOAD))
++			gran = calc_delta_fair(gran, curr);
++	}
+ 
+ 	return gran;
+ }
+@@ -1350,7 +1397,7 @@ wakeup_preempt_entity(struct sched_entit
+ 	if (vdiff <= 0)
+ 		return -1;
+ 
+-	gran = wakeup_gran(curr);
++	gran = wakeup_gran(curr, se);
+ 	if (vdiff > gran)
+ 		return 1;
+ 
+Index: linux-2.6-tip/kernel/sched_features.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/sched_features.h
++++ linux-2.6-tip/kernel/sched_features.h
+@@ -1,5 +1,6 @@
+ SCHED_FEAT(NEW_FAIR_SLEEPERS, 1)
+-SCHED_FEAT(NORMALIZED_SLEEPER, 1)
++SCHED_FEAT(NORMALIZED_SLEEPER, 0)
++SCHED_FEAT(ADAPTIVE_GRAN, 1)
+ SCHED_FEAT(WAKEUP_PREEMPT, 1)
+ SCHED_FEAT(START_DEBIT, 1)
+ SCHED_FEAT(AFFINE_WAKEUPS, 1)
+@@ -13,3 +14,4 @@ SCHED_FEAT(LB_WAKEUP_UPDATE, 1)
+ SCHED_FEAT(ASYM_EFF_LOAD, 1)
+ SCHED_FEAT(WAKEUP_OVERLAP, 0)
+ SCHED_FEAT(LAST_BUDDY, 1)
++SCHED_FEAT(OWNER_SPIN, 1)
+Index: linux-2.6-tip/kernel/sched_idletask.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/sched_idletask.c
++++ linux-2.6-tip/kernel/sched_idletask.c
+@@ -22,7 +22,8 @@ static void check_preempt_curr_idle(stru
+ static struct task_struct *pick_next_task_idle(struct rq *rq)
+ {
+ 	schedstat_inc(rq, sched_goidle);
+-
++	/* adjust the active tasks as we might go into a long sleep */
++	calc_load_account_active(rq);
+ 	return rq->idle;
+ }
+ 
+Index: linux-2.6-tip/kernel/sched_rt.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/sched_rt.c
++++ linux-2.6-tip/kernel/sched_rt.c
+@@ -3,6 +3,44 @@
+  * policies)
+  */
+ 
++static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
++{
++	return container_of(rt_se, struct task_struct, rt);
++}
++
++#ifdef CONFIG_RT_GROUP_SCHED
++
++#define rt_entity_is_task(rt_se) (!(rt_se)->my_q)
++
++static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
++{
++	return rt_rq->rq;
++}
++
++static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
++{
++	return rt_se->rt_rq;
++}
++
++#else /* CONFIG_RT_GROUP_SCHED */
++
++#define rt_entity_is_task(rt_se) (1)
++
++static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
++{
++	return container_of(rt_rq, struct rq, rt);
++}
++
++static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
++{
++	struct task_struct *p = rt_task_of(rt_se);
++	struct rq *rq = task_rq(p);
++
++	return &rq->rt;
++}
++
++#endif /* CONFIG_RT_GROUP_SCHED */
++
+ #ifdef CONFIG_SMP
+ 
+ static inline int rt_overloaded(struct rq *rq)
+@@ -37,25 +75,79 @@ static inline void rt_clear_overload(str
+ 	cpumask_clear_cpu(rq->cpu, rq->rd->rto_mask);
+ }
+ 
+-static void update_rt_migration(struct rq *rq)
++static void update_rt_migration(struct rt_rq *rt_rq)
+ {
+-	if (rq->rt.rt_nr_migratory && (rq->rt.rt_nr_running > 1)) {
+-		if (!rq->rt.overloaded) {
+-			rt_set_overload(rq);
+-			rq->rt.overloaded = 1;
++	if (rt_rq->rt_nr_migratory > 1) {
++		if (!rt_rq->overloaded) {
++			rt_set_overload(rq_of_rt_rq(rt_rq));
++			rt_rq->overloaded = 1;
+ 		}
+-	} else if (rq->rt.overloaded) {
+-		rt_clear_overload(rq);
+-		rq->rt.overloaded = 0;
++	} else if (rt_rq->overloaded) {
++		rt_clear_overload(rq_of_rt_rq(rt_rq));
++		rt_rq->overloaded = 0;
+ 	}
+ }
+-#endif /* CONFIG_SMP */
+ 
+-static inline struct task_struct *rt_task_of(struct sched_rt_entity *rt_se)
++static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+ {
+-	return container_of(rt_se, struct task_struct, rt);
++	if (!rt_entity_is_task(rt_se))
++		return;
++
++	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
++
++	if (rt_se->nr_cpus_allowed > 1)
++		rt_rq->rt_nr_migratory++;
++
++	update_rt_migration(rt_rq);
++}
++
++static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
++{
++	if (!rt_entity_is_task(rt_se))
++		return;
++
++	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
++
++	if (rt_se->nr_cpus_allowed > 1)
++		rt_rq->rt_nr_migratory--;
++
++	update_rt_migration(rt_rq);
++}
++
++static void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
++{
++	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
++	plist_node_init(&p->pushable_tasks, p->prio);
++	plist_add(&p->pushable_tasks, &rq->rt.pushable_tasks);
++}
++
++static void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
++{
++	plist_del(&p->pushable_tasks, &rq->rt.pushable_tasks);
+ }
+ 
++#else
++
++static inline void enqueue_pushable_task(struct rq *rq, struct task_struct *p)
++{
++}
++
++static inline void dequeue_pushable_task(struct rq *rq, struct task_struct *p)
++{
++}
++
++static inline
++void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
++{
++}
++
++static inline
++void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
++{
++}
++
++#endif /* CONFIG_SMP */
++
+ static inline int on_rt_rq(struct sched_rt_entity *rt_se)
+ {
+ 	return !list_empty(&rt_se->run_list);
+@@ -79,16 +171,6 @@ static inline u64 sched_rt_period(struct
+ #define for_each_leaf_rt_rq(rt_rq, rq) \
+ 	list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
+ 
+-static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
+-{
+-	return rt_rq->rq;
+-}
+-
+-static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
+-{
+-	return rt_se->rt_rq;
+-}
+-
+ #define for_each_sched_rt_entity(rt_se) \
+ 	for (; rt_se; rt_se = rt_se->parent)
+ 
+@@ -108,7 +190,7 @@ static void sched_rt_rq_enqueue(struct r
+ 	if (rt_rq->rt_nr_running) {
+ 		if (rt_se && !on_rt_rq(rt_se))
+ 			enqueue_rt_entity(rt_se);
+-		if (rt_rq->highest_prio < curr->prio)
++		if (rt_rq->highest_prio.curr < curr->prio)
+ 			resched_task(curr);
+ 	}
+ }
+@@ -176,19 +258,6 @@ static inline u64 sched_rt_period(struct
+ #define for_each_leaf_rt_rq(rt_rq, rq) \
+ 	for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
+ 
+-static inline struct rq *rq_of_rt_rq(struct rt_rq *rt_rq)
+-{
+-	return container_of(rt_rq, struct rq, rt);
+-}
+-
+-static inline struct rt_rq *rt_rq_of_se(struct sched_rt_entity *rt_se)
+-{
+-	struct task_struct *p = rt_task_of(rt_se);
+-	struct rq *rq = task_rq(p);
+-
+-	return &rq->rt;
+-}
+-
+ #define for_each_sched_rt_entity(rt_se) \
+ 	for (; rt_se; rt_se = NULL)
+ 
+@@ -473,7 +542,7 @@ static inline int rt_se_prio(struct sche
+ 	struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ 
+ 	if (rt_rq)
+-		return rt_rq->highest_prio;
++		return rt_rq->highest_prio.curr;
+ #endif
+ 
+ 	return rt_task_of(rt_se)->prio;
+@@ -547,91 +616,174 @@ static void update_curr_rt(struct rq *rq
+ 	}
+ }
+ 
+-static inline
+-void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
++#if defined CONFIG_SMP
++
++static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu);
++
++static inline int next_prio(struct rq *rq)
+ {
+-	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
+-	rt_rq->rt_nr_running++;
+-#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
+-	if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
+-#ifdef CONFIG_SMP
+-		struct rq *rq = rq_of_rt_rq(rt_rq);
+-#endif
++	struct task_struct *next = pick_next_highest_task_rt(rq, rq->cpu);
++
++	if (next && rt_prio(next->prio))
++		return next->prio;
++	else
++		return MAX_RT_PRIO;
++}
++
++static void
++inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
++{
++	struct rq *rq = rq_of_rt_rq(rt_rq);
++
++	if (prio < prev_prio) {
++
++		/*
++		 * If the new task is higher in priority than anything on the
++		 * run-queue, we know that the previous high becomes our
++		 * next-highest.
++		 */
++		rt_rq->highest_prio.next = prev_prio;
+ 
+-		rt_rq->highest_prio = rt_se_prio(rt_se);
+-#ifdef CONFIG_SMP
+ 		if (rq->online)
+-			cpupri_set(&rq->rd->cpupri, rq->cpu,
+-				   rt_se_prio(rt_se));
+-#endif
+-	}
+-#endif
+-#ifdef CONFIG_SMP
+-	if (rt_se->nr_cpus_allowed > 1) {
+-		struct rq *rq = rq_of_rt_rq(rt_rq);
++			cpupri_set(&rq->rd->cpupri, rq->cpu, prio);
+ 
+-		rq->rt.rt_nr_migratory++;
+-	}
++	} else if (prio == rt_rq->highest_prio.curr)
++		/*
++		 * If the next task is equal in priority to the highest on
++		 * the run-queue, then we implicitly know that the next highest
++		 * task cannot be any lower than current
++		 */
++		rt_rq->highest_prio.next = prio;
++	else if (prio < rt_rq->highest_prio.next)
++		/*
++		 * Otherwise, we need to recompute next-highest
++		 */
++		rt_rq->highest_prio.next = next_prio(rq);
++}
+ 
+-	update_rt_migration(rq_of_rt_rq(rt_rq));
+-#endif
+-#ifdef CONFIG_RT_GROUP_SCHED
+-	if (rt_se_boosted(rt_se))
+-		rt_rq->rt_nr_boosted++;
++static void
++dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio)
++{
++	struct rq *rq = rq_of_rt_rq(rt_rq);
+ 
+-	if (rt_rq->tg)
+-		start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
+-#else
+-	start_rt_bandwidth(&def_rt_bandwidth);
+-#endif
++	if (rt_rq->rt_nr_running && (prio <= rt_rq->highest_prio.next))
++		rt_rq->highest_prio.next = next_prio(rq);
++
++	if (rq->online && rt_rq->highest_prio.curr != prev_prio)
++		cpupri_set(&rq->rd->cpupri, rq->cpu, rt_rq->highest_prio.curr);
+ }
+ 
++#else /* CONFIG_SMP */
++
+ static inline
+-void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+-{
+-#ifdef CONFIG_SMP
+-	int highest_prio = rt_rq->highest_prio;
+-#endif
++void inc_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
++static inline
++void dec_rt_prio_smp(struct rt_rq *rt_rq, int prio, int prev_prio) {}
++
++#endif /* CONFIG_SMP */
+ 
+-	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
+-	WARN_ON(!rt_rq->rt_nr_running);
+-	rt_rq->rt_nr_running--;
+ #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
++static void
++inc_rt_prio(struct rt_rq *rt_rq, int prio)
++{
++	int prev_prio = rt_rq->highest_prio.curr;
++
++	if (prio < prev_prio)
++		rt_rq->highest_prio.curr = prio;
++
++	inc_rt_prio_smp(rt_rq, prio, prev_prio);
++}
++
++static void
++dec_rt_prio(struct rt_rq *rt_rq, int prio)
++{
++	int prev_prio = rt_rq->highest_prio.curr;
++
+ 	if (rt_rq->rt_nr_running) {
+-		struct rt_prio_array *array;
+ 
+-		WARN_ON(rt_se_prio(rt_se) < rt_rq->highest_prio);
+-		if (rt_se_prio(rt_se) == rt_rq->highest_prio) {
+-			/* recalculate */
+-			array = &rt_rq->active;
+-			rt_rq->highest_prio =
++		WARN_ON(prio < prev_prio);
++
++		/*
++		 * This may have been our highest task, and therefore
++		 * we may have some recomputation to do
++		 */
++		if (prio == prev_prio) {
++			struct rt_prio_array *array = &rt_rq->active;
++
++			rt_rq->highest_prio.curr =
+ 				sched_find_first_bit(array->bitmap);
+-		} /* otherwise leave rq->highest prio alone */
++		}
++
+ 	} else
+-		rt_rq->highest_prio = MAX_RT_PRIO;
+-#endif
+-#ifdef CONFIG_SMP
+-	if (rt_se->nr_cpus_allowed > 1) {
+-		struct rq *rq = rq_of_rt_rq(rt_rq);
+-		rq->rt.rt_nr_migratory--;
+-	}
++		rt_rq->highest_prio.curr = MAX_RT_PRIO;
+ 
+-	if (rt_rq->highest_prio != highest_prio) {
+-		struct rq *rq = rq_of_rt_rq(rt_rq);
++	dec_rt_prio_smp(rt_rq, prio, prev_prio);
++}
+ 
+-		if (rq->online)
+-			cpupri_set(&rq->rd->cpupri, rq->cpu,
+-				   rt_rq->highest_prio);
+-	}
++#else
++
++static inline void inc_rt_prio(struct rt_rq *rt_rq, int prio) {}
++static inline void dec_rt_prio(struct rt_rq *rt_rq, int prio) {}
++
++#endif /* CONFIG_SMP || CONFIG_RT_GROUP_SCHED */
+ 
+-	update_rt_migration(rq_of_rt_rq(rt_rq));
+-#endif /* CONFIG_SMP */
+ #ifdef CONFIG_RT_GROUP_SCHED
++
++static void
++inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
++{
++	if (rt_se_boosted(rt_se))
++		rt_rq->rt_nr_boosted++;
++
++	if (rt_rq->tg)
++		start_rt_bandwidth(&rt_rq->tg->rt_bandwidth);
++}
++
++static void
++dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
++{
+ 	if (rt_se_boosted(rt_se))
+ 		rt_rq->rt_nr_boosted--;
+ 
+ 	WARN_ON(!rt_rq->rt_nr_running && rt_rq->rt_nr_boosted);
+-#endif
++}
++
++#else /* CONFIG_RT_GROUP_SCHED */
++
++static void
++inc_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
++{
++	start_rt_bandwidth(&def_rt_bandwidth);
++}
++
++static inline
++void dec_rt_group(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) {}
++
++#endif /* CONFIG_RT_GROUP_SCHED */
++
++static inline
++void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
++{
++	int prio = rt_se_prio(rt_se);
++
++	WARN_ON(!rt_prio(prio));
++	rt_rq->rt_nr_running++;
++
++	inc_rt_prio(rt_rq, prio);
++	inc_rt_migration(rt_se, rt_rq);
++	inc_rt_group(rt_se, rt_rq);
++}
++
++static inline
++void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
++{
++	WARN_ON(!rt_prio(rt_se_prio(rt_se)));
++	WARN_ON(!rt_rq->rt_nr_running);
++	rt_rq->rt_nr_running--;
++
++	dec_rt_prio(rt_rq, rt_se_prio(rt_se));
++	dec_rt_migration(rt_se, rt_rq);
++	dec_rt_group(rt_se, rt_rq);
+ }
+ 
+ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
+@@ -706,6 +858,55 @@ static void dequeue_rt_entity(struct sch
+ 	}
+ }
+ 
++static inline void incr_rt_nr_uninterruptible(struct task_struct *p,
++					      struct rq *rq)
++{
++	rq->rt.rt_nr_uninterruptible++;
++}
++
++static inline void decr_rt_nr_uninterruptible(struct task_struct *p,
++					      struct rq *rq)
++{
++	rq->rt.rt_nr_uninterruptible--;
++}
++
++unsigned long rt_nr_running(void)
++{
++	unsigned long i, sum = 0;
++
++	for_each_online_cpu(i)
++		sum += cpu_rq(i)->rt.rt_nr_running;
++
++	return sum;
++}
++
++unsigned long rt_nr_running_cpu(int cpu)
++{
++	return cpu_rq(cpu)->rt.rt_nr_running;
++}
++
++unsigned long rt_nr_uninterruptible(void)
++{
++	unsigned long i, sum = 0;
++
++	for_each_online_cpu(i)
++		sum += cpu_rq(i)->rt.rt_nr_uninterruptible;
++
++	/*
++	 * Since we read the counters lockless, it might be slightly
++	 * inaccurate. Do not allow it to go below zero though:
++	 */
++	if (unlikely((long)sum < 0))
++		sum = 0;
++
++	return sum;
++}
++
++unsigned long rt_nr_uninterruptible_cpu(int cpu)
++{
++	return cpu_rq(cpu)->rt.rt_nr_uninterruptible;
++}
++
+ /*
+  * Adding/removing a task to/from a priority array:
+  */
+@@ -718,6 +919,12 @@ static void enqueue_task_rt(struct rq *r
+ 
+ 	enqueue_rt_entity(rt_se);
+ 
++	if (p->state == TASK_UNINTERRUPTIBLE)
++		decr_rt_nr_uninterruptible(p, rq);
++
++	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
++		enqueue_pushable_task(rq, p);
++
+ 	inc_cpu_load(rq, p->se.load.weight);
+ }
+ 
+@@ -726,8 +933,14 @@ static void dequeue_task_rt(struct rq *r
+ 	struct sched_rt_entity *rt_se = &p->rt;
+ 
+ 	update_curr_rt(rq);
++
++	if (p->state == TASK_UNINTERRUPTIBLE)
++		incr_rt_nr_uninterruptible(p, rq);
++
+ 	dequeue_rt_entity(rt_se);
+ 
++	dequeue_pushable_task(rq, p);
++
+ 	dec_cpu_load(rq, p->se.load.weight);
+ }
+ 
+@@ -878,7 +1091,7 @@ static struct sched_rt_entity *pick_next
+ 	return next;
+ }
+ 
+-static struct task_struct *pick_next_task_rt(struct rq *rq)
++static struct task_struct *_pick_next_task_rt(struct rq *rq)
+ {
+ 	struct sched_rt_entity *rt_se;
+ 	struct task_struct *p;
+@@ -900,6 +1113,18 @@ static struct task_struct *pick_next_tas
+ 
+ 	p = rt_task_of(rt_se);
+ 	p->se.exec_start = rq->clock;
++
++	return p;
++}
++
++static struct task_struct *pick_next_task_rt(struct rq *rq)
++{
++	struct task_struct *p = _pick_next_task_rt(rq);
++
++	/* The running task is never eligible for pushing */
++	if (p)
++		dequeue_pushable_task(rq, p);
++
+ 	return p;
+ }
+ 
+@@ -907,6 +1132,13 @@ static void put_prev_task_rt(struct rq *
+ {
+ 	update_curr_rt(rq);
+ 	p->se.exec_start = 0;
++
++	/*
++	 * The previous task needs to be made eligible for pushing
++	 * if it is still active
++	 */
++	if (p->se.on_rq && p->rt.nr_cpus_allowed > 1)
++		enqueue_pushable_task(rq, p);
+ }
+ 
+ #ifdef CONFIG_SMP
+@@ -960,12 +1192,13 @@ static struct task_struct *pick_next_hig
+ 
+ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
+ 
+-static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
++static inline int pick_optimal_cpu(int this_cpu,
++				   const struct cpumask *mask)
+ {
+ 	int first;
+ 
+ 	/* "this_cpu" is cheaper to preempt than a remote processor */
+-	if ((this_cpu != -1) && cpu_isset(this_cpu, *mask))
++	if ((this_cpu != -1) && cpumask_test_cpu(this_cpu, mask))
+ 		return this_cpu;
+ 
+ 	first = cpumask_first(mask);
+@@ -981,6 +1214,7 @@ static int find_lowest_rq(struct task_st
+ 	struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
+ 	int this_cpu = smp_processor_id();
+ 	int cpu      = task_cpu(task);
++	cpumask_var_t domain_mask;
+ 
+ 	if (task->rt.nr_cpus_allowed == 1)
+ 		return -1; /* No other targets possible */
+@@ -1013,19 +1247,25 @@ static int find_lowest_rq(struct task_st
+ 	if (this_cpu == cpu)
+ 		this_cpu = -1; /* Skip this_cpu opt if the same */
+ 
+-	for_each_domain(cpu, sd) {
+-		if (sd->flags & SD_WAKE_AFFINE) {
+-			cpumask_t domain_mask;
+-			int       best_cpu;
+-
+-			cpumask_and(&domain_mask, sched_domain_span(sd),
+-				    lowest_mask);
+-
+-			best_cpu = pick_optimal_cpu(this_cpu,
+-						    &domain_mask);
+-			if (best_cpu != -1)
+-				return best_cpu;
++	if (alloc_cpumask_var(&domain_mask, GFP_ATOMIC)) {
++		for_each_domain(cpu, sd) {
++			if (sd->flags & SD_WAKE_AFFINE) {
++				int best_cpu;
++
++				cpumask_and(domain_mask,
++					    sched_domain_span(sd),
++					    lowest_mask);
++
++				best_cpu = pick_optimal_cpu(this_cpu,
++							    domain_mask);
++
++				if (best_cpu != -1) {
++					free_cpumask_var(domain_mask);
++					return best_cpu;
++				}
++			}
+ 		}
++		free_cpumask_var(domain_mask);
+ 	}
+ 
+ 	/*
+@@ -1072,7 +1312,7 @@ static struct rq *find_lock_lowest_rq(st
+ 		}
+ 
+ 		/* If this rq is still suitable use it. */
+-		if (lowest_rq->rt.highest_prio > task->prio)
++		if (lowest_rq->rt.highest_prio.curr > task->prio)
+ 			break;
+ 
+ 		/* try again */
+@@ -1083,6 +1323,31 @@ static struct rq *find_lock_lowest_rq(st
+ 	return lowest_rq;
+ }
+ 
++static inline int has_pushable_tasks(struct rq *rq)
++{
++	return !plist_head_empty(&rq->rt.pushable_tasks);
++}
++
++static struct task_struct *pick_next_pushable_task(struct rq *rq)
++{
++	struct task_struct *p;
++
++	if (!has_pushable_tasks(rq))
++		return NULL;
++
++	p = plist_first_entry(&rq->rt.pushable_tasks,
++			      struct task_struct, pushable_tasks);
++
++	BUG_ON(rq->cpu != task_cpu(p));
++	BUG_ON(task_current(rq, p));
++	BUG_ON(p->rt.nr_cpus_allowed <= 1);
++
++	BUG_ON(!p->se.on_rq);
++	BUG_ON(!rt_task(p));
++
++	return p;
++}
++
+ /*
+  * If the current CPU has more than one RT task, see if the non
+  * running task can migrate over to a CPU that is running a task
+@@ -1092,13 +1357,11 @@ static int push_rt_task(struct rq *rq)
+ {
+ 	struct task_struct *next_task;
+ 	struct rq *lowest_rq;
+-	int ret = 0;
+-	int paranoid = RT_MAX_TRIES;
+ 
+ 	if (!rq->rt.overloaded)
+ 		return 0;
+ 
+-	next_task = pick_next_highest_task_rt(rq, -1);
++	next_task = pick_next_pushable_task(rq);
+ 	if (!next_task)
+ 		return 0;
+ 
+@@ -1127,16 +1390,34 @@ static int push_rt_task(struct rq *rq)
+ 		struct task_struct *task;
+ 		/*
+ 		 * find lock_lowest_rq releases rq->lock
+-		 * so it is possible that next_task has changed.
+-		 * If it has, then try again.
++		 * so it is possible that next_task has migrated.
++		 *
++		 * We need to make sure that the task is still on the same
++		 * run-queue and is also still the next task eligible for
++		 * pushing.
+ 		 */
+-		task = pick_next_highest_task_rt(rq, -1);
+-		if (unlikely(task != next_task) && task && paranoid--) {
+-			put_task_struct(next_task);
+-			next_task = task;
+-			goto retry;
++		task = pick_next_pushable_task(rq);
++		if (task_cpu(next_task) == rq->cpu && task == next_task) {
++			/*
++			 * If we get here, the task hasnt moved at all, but
++			 * it has failed to push.  We will not try again,
++			 * since the other cpus will pull from us when they
++			 * are ready.
++			 */
++			dequeue_pushable_task(rq, next_task);
++			goto out;
+ 		}
+-		goto out;
++
++		if (!task)
++			/* No more tasks, just exit */
++			goto out;
++
++		/*
++		 * Something has shifted, try again.
++		 */
++		put_task_struct(next_task);
++		next_task = task;
++		goto retry;
+ 	}
+ 
+ 	deactivate_task(rq, next_task, 0);
+@@ -1147,23 +1428,12 @@ static int push_rt_task(struct rq *rq)
+ 
+ 	double_unlock_balance(rq, lowest_rq);
+ 
+-	ret = 1;
+ out:
+ 	put_task_struct(next_task);
+ 
+-	return ret;
++	return 1;
+ }
+ 
+-/*
+- * TODO: Currently we just use the second highest prio task on
+- *       the queue, and stop when it can't migrate (or there's
+- *       no more RT tasks).  There may be a case where a lower
+- *       priority RT task has a different affinity than the
+- *       higher RT task. In this case the lower RT task could
+- *       possibly be able to migrate where as the higher priority
+- *       RT task could not.  We currently ignore this issue.
+- *       Enhancements are welcome!
+- */
+ static void push_rt_tasks(struct rq *rq)
+ {
+ 	/* push_rt_task will return true if it moved an RT */
+@@ -1174,33 +1444,35 @@ static void push_rt_tasks(struct rq *rq)
+ static int pull_rt_task(struct rq *this_rq)
+ {
+ 	int this_cpu = this_rq->cpu, ret = 0, cpu;
+-	struct task_struct *p, *next;
++	struct task_struct *p;
+ 	struct rq *src_rq;
+ 
+ 	if (likely(!rt_overloaded(this_rq)))
+ 		return 0;
+ 
+-	next = pick_next_task_rt(this_rq);
+-
+ 	for_each_cpu(cpu, this_rq->rd->rto_mask) {
+ 		if (this_cpu == cpu)
+ 			continue;
+ 
+ 		src_rq = cpu_rq(cpu);
++
++		/*
++		 * Don't bother taking the src_rq->lock if the next highest
++		 * task is known to be lower-priority than our current task.
++		 * This may look racy, but if this value is about to go
++		 * logically higher, the src_rq will push this task away.
++		 * And if its going logically lower, we do not care
++		 */
++		if (src_rq->rt.highest_prio.next >=
++		    this_rq->rt.highest_prio.curr)
++			continue;
++
+ 		/*
+ 		 * We can potentially drop this_rq's lock in
+ 		 * double_lock_balance, and another CPU could
+-		 * steal our next task - hence we must cause
+-		 * the caller to recalculate the next task
+-		 * in that case:
++		 * alter this_rq
+ 		 */
+-		if (double_lock_balance(this_rq, src_rq)) {
+-			struct task_struct *old_next = next;
+-
+-			next = pick_next_task_rt(this_rq);
+-			if (next != old_next)
+-				ret = 1;
+-		}
++		double_lock_balance(this_rq, src_rq);
+ 
+ 		/*
+ 		 * Are there still pullable RT tasks?
+@@ -1214,7 +1486,7 @@ static int pull_rt_task(struct rq *this_
+ 		 * Do we have an RT task that preempts
+ 		 * the to-be-scheduled task?
+ 		 */
+-		if (p && (!next || (p->prio < next->prio))) {
++		if (p && (p->prio < this_rq->rt.highest_prio.curr)) {
+ 			WARN_ON(p == src_rq->curr);
+ 			WARN_ON(!p->se.on_rq);
+ 
+@@ -1224,12 +1496,9 @@ static int pull_rt_task(struct rq *this_
+ 			 * This is just that p is wakeing up and hasn't
+ 			 * had a chance to schedule. We only pull
+ 			 * p if it is lower in priority than the
+-			 * current task on the run queue or
+-			 * this_rq next task is lower in prio than
+-			 * the current task on that rq.
++			 * current task on the run queue
+ 			 */
+-			if (p->prio < src_rq->curr->prio ||
+-			    (next && next->prio < src_rq->curr->prio))
++			if (p->prio < src_rq->curr->prio)
+ 				goto skip;
+ 
+ 			ret = 1;
+@@ -1242,13 +1511,7 @@ static int pull_rt_task(struct rq *this_
+ 			 * case there's an even higher prio task
+ 			 * in another runqueue. (low likelyhood
+ 			 * but possible)
+-			 *
+-			 * Update next so that we won't pick a task
+-			 * on another cpu with a priority lower (or equal)
+-			 * than the one we just picked.
+ 			 */
+-			next = p;
+-
+ 		}
+  skip:
+ 		double_unlock_balance(this_rq, src_rq);
+@@ -1260,24 +1523,29 @@ static int pull_rt_task(struct rq *this_
+ static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
+ {
+ 	/* Try to pull RT tasks here if we lower this rq's prio */
+-	if (unlikely(rt_task(prev)) && rq->rt.highest_prio > prev->prio)
++	if (unlikely(rt_task(prev)) && rq->rt.highest_prio.curr > prev->prio) {
+ 		pull_rt_task(rq);
++		schedstat_inc(rq, rto_schedule);
++	}
++}
++
++/*
++ * assumes rq->lock is held
++ */
++static int needs_post_schedule_rt(struct rq *rq)
++{
++	return has_pushable_tasks(rq);
+ }
+ 
+ static void post_schedule_rt(struct rq *rq)
+ {
+ 	/*
+-	 * If we have more than one rt_task queued, then
+-	 * see if we can push the other rt_tasks off to other CPUS.
+-	 * Note we may release the rq lock, and since
+-	 * the lock was owned by prev, we need to release it
+-	 * first via finish_lock_switch and then reaquire it here.
++	 * This is only called if needs_post_schedule_rt() indicates that
++	 * we need to push tasks away
+ 	 */
+-	if (unlikely(rq->rt.overloaded)) {
+-		spin_lock_irq(&rq->lock);
+-		push_rt_tasks(rq);
+-		spin_unlock_irq(&rq->lock);
+-	}
++	spin_lock_irq(&rq->lock);
++	push_rt_tasks(rq);
++	spin_unlock_irq(&rq->lock);
+ }
+ 
+ /*
+@@ -1288,7 +1556,8 @@ static void task_wake_up_rt(struct rq *r
+ {
+ 	if (!task_running(rq, p) &&
+ 	    !test_tsk_need_resched(rq->curr) &&
+-	    rq->rt.overloaded)
++	    has_pushable_tasks(rq) &&
++	    p->rt.nr_cpus_allowed > 1)
+ 		push_rt_tasks(rq);
+ }
+ 
+@@ -1324,6 +1593,23 @@ static void set_cpus_allowed_rt(struct t
+ 	if (p->se.on_rq && (weight != p->rt.nr_cpus_allowed)) {
+ 		struct rq *rq = task_rq(p);
+ 
++		if (!task_current(rq, p)) {
++			/*
++			 * Make sure we dequeue this task from the pushable list
++			 * before going further.  It will either remain off of
++			 * the list because we are no longer pushable, or it
++			 * will be requeued.
++			 */
++			if (p->rt.nr_cpus_allowed > 1)
++				dequeue_pushable_task(rq, p);
++
++			/*
++			 * Requeue if our weight is changing and still > 1
++			 */
++			if (weight > 1)
++				enqueue_pushable_task(rq, p);
++		}
++
+ 		if ((p->rt.nr_cpus_allowed <= 1) && (weight > 1)) {
+ 			rq->rt.rt_nr_migratory++;
+ 		} else if ((p->rt.nr_cpus_allowed > 1) && (weight <= 1)) {
+@@ -1331,7 +1617,7 @@ static void set_cpus_allowed_rt(struct t
+ 			rq->rt.rt_nr_migratory--;
+ 		}
+ 
+-		update_rt_migration(rq);
++		update_rt_migration(&rq->rt);
+ 	}
+ 
+ 	cpumask_copy(&p->cpus_allowed, new_mask);
+@@ -1346,7 +1632,7 @@ static void rq_online_rt(struct rq *rq)
+ 
+ 	__enable_runtime(rq);
+ 
+-	cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
++	cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio.curr);
+ }
+ 
+ /* Assumes rq->lock is held */
+@@ -1438,7 +1724,7 @@ static void prio_changed_rt(struct rq *r
+ 		 * can release the rq lock and p could migrate.
+ 		 * Only reschedule if p is still on the same runqueue.
+ 		 */
+-		if (p->prio > rq->rt.highest_prio && rq->curr == p)
++		if (p->prio > rq->rt.highest_prio.curr && rq->curr == p)
+ 			resched_task(p);
+ #else
+ 		/* For UP simply resched on drop of prio */
+@@ -1509,6 +1795,9 @@ static void set_curr_task_rt(struct rq *
+ 	struct task_struct *p = rq->curr;
+ 
+ 	p->se.exec_start = rq->clock;
++
++	/* The running task is never eligible for pushing */
++	dequeue_pushable_task(rq, p);
+ }
+ 
+ static const struct sched_class rt_sched_class = {
+@@ -1531,6 +1820,7 @@ static const struct sched_class rt_sched
+ 	.rq_online              = rq_online_rt,
+ 	.rq_offline             = rq_offline_rt,
+ 	.pre_schedule		= pre_schedule_rt,
++	.needs_post_schedule	= needs_post_schedule_rt,
+ 	.post_schedule		= post_schedule_rt,
+ 	.task_wake_up		= task_wake_up_rt,
+ 	.switched_from		= switched_from_rt,
+Index: linux-2.6-tip/kernel/sched_stats.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/sched_stats.h
++++ linux-2.6-tip/kernel/sched_stats.h
+@@ -4,7 +4,7 @@
+  * bump this up when changing the output format or the meaning of an existing
+  * format, so that tools can adapt (or abort)
+  */
+-#define SCHEDSTAT_VERSION 14
++#define SCHEDSTAT_VERSION 15
+ 
+ static int show_schedstat(struct seq_file *seq, void *v)
+ {
+@@ -26,9 +26,8 @@ static int show_schedstat(struct seq_fil
+ 
+ 		/* runqueue-specific stats */
+ 		seq_printf(seq,
+-		    "cpu%d %u %u %u %u %u %u %u %u %u %llu %llu %lu",
+-		    cpu, rq->yld_both_empty,
+-		    rq->yld_act_empty, rq->yld_exp_empty, rq->yld_count,
++		    "cpu%d %u %u %u %u %u %u %llu %llu %lu",
++		    cpu, rq->yld_count,
+ 		    rq->sched_switch, rq->sched_count, rq->sched_goidle,
+ 		    rq->ttwu_count, rq->ttwu_local,
+ 		    rq->rq_cpu_time,
+Index: linux-2.6-tip/kernel/signal.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/signal.c
++++ linux-2.6-tip/kernel/signal.c
+@@ -179,13 +179,46 @@ int next_signal(struct sigpending *pendi
+ 	return sig;
+ }
+ 
++#ifdef __HAVE_ARCH_CMPXCHG
++static inline struct sigqueue *get_task_cache(struct task_struct *t)
++{
++	struct sigqueue *q = t->sigqueue_cache;
++
++	if (cmpxchg(&t->sigqueue_cache, q, NULL) != q)
++		return NULL;
++
++	return q;
++}
++
++static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
++{
++	if (cmpxchg(&t->sigqueue_cache, NULL, q) == NULL)
++		return 0;
++
++	return 1;
++}
++
++#else
++
++static inline struct sigqueue *get_task_cache(struct task_struct *t)
++{
++	return NULL;
++}
++
++static inline int put_task_cache(struct task_struct *t, struct sigqueue *q)
++{
++	return 1;
++}
++
++#endif
++
+ /*
+  * allocate a new signal queue record
+  * - this may be called without locks if and only if t == current, otherwise an
+  *   appopriate lock must be held to stop the target task from exiting
+  */
+-static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
+-					 int override_rlimit)
++static struct sigqueue *__sigqueue_do_alloc(struct task_struct *t, gfp_t flags,
++					    int override_rlimit, int fromslab)
+ {
+ 	struct sigqueue *q = NULL;
+ 	struct user_struct *user;
+@@ -200,8 +233,14 @@ static struct sigqueue *__sigqueue_alloc
+ 	atomic_inc(&user->sigpending);
+ 	if (override_rlimit ||
+ 	    atomic_read(&user->sigpending) <=
+-			t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur)
+-		q = kmem_cache_alloc(sigqueue_cachep, flags);
++	    t->signal->rlim[RLIMIT_SIGPENDING].rlim_cur) {
++
++		if (!fromslab)
++			q = get_task_cache(t);
++		if (!q)
++			q = kmem_cache_alloc(sigqueue_cachep, flags);
++	}
++
+ 	if (unlikely(q == NULL)) {
+ 		atomic_dec(&user->sigpending);
+ 		free_uid(user);
+@@ -214,6 +253,12 @@ static struct sigqueue *__sigqueue_alloc
+ 	return q;
+ }
+ 
++static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags,
++					 int override_rlimit)
++{
++	return __sigqueue_do_alloc(t, flags, override_rlimit, 0);
++}
++
+ static void __sigqueue_free(struct sigqueue *q)
+ {
+ 	if (q->flags & SIGQUEUE_PREALLOC)
+@@ -223,6 +268,21 @@ static void __sigqueue_free(struct sigqu
+ 	kmem_cache_free(sigqueue_cachep, q);
+ }
+ 
++static void sigqueue_free_current(struct sigqueue *q)
++{
++	struct user_struct *up;
++
++	if (q->flags & SIGQUEUE_PREALLOC)
++		return;
++
++	up = q->user;
++	if (rt_prio(current->normal_prio) && !put_task_cache(current, q)) {
++		atomic_dec(&up->sigpending);
++		free_uid(up);
++	} else
++		  __sigqueue_free(q);
++}
++
+ void flush_sigqueue(struct sigpending *queue)
+ {
+ 	struct sigqueue *q;
+@@ -236,6 +296,21 @@ void flush_sigqueue(struct sigpending *q
+ }
+ 
+ /*
++ * Called from __exit_signal. Flush tsk->pending and
++ * tsk->sigqueue_cache
++ */
++void flush_task_sigqueue(struct task_struct *tsk)
++{
++	struct sigqueue *q;
++
++	flush_sigqueue(&tsk->pending);
++
++	q = get_task_cache(tsk);
++	if (q)
++		kmem_cache_free(sigqueue_cachep, q);
++}
++
++/*
+  * Flush all pending signals for a task.
+  */
+ void flush_signals(struct task_struct *t)
+@@ -378,7 +453,7 @@ static void collect_signal(int sig, stru
+ still_pending:
+ 		list_del_init(&first->list);
+ 		copy_siginfo(info, &first->info);
+-		__sigqueue_free(first);
++		sigqueue_free_current(first);
+ 	} else {
+ 		/* Ok, it wasn't in the queue.  This must be
+ 		   a fast-pathed signal or we must have been
+@@ -423,6 +498,8 @@ int dequeue_signal(struct task_struct *t
+ {
+ 	int signr;
+ 
++	WARN_ON_ONCE(tsk != current);
++
+ 	/* We only dequeue private signals from ourselves, we don't let
+ 	 * signalfd steal them
+ 	 */
+@@ -505,6 +582,9 @@ void signal_wake_up(struct task_struct *
+ 
+ 	set_tsk_thread_flag(t, TIF_SIGPENDING);
+ 
++	if (unlikely(t == current))
++		return;
++
+ 	/*
+ 	 * For SIGKILL, we want to wake it up in the stopped/traced/killable
+ 	 * case. We don't check t->state here because there is a race with it
+@@ -821,7 +901,9 @@ static int send_signal(int sig, struct s
+ 
+ 	trace_sched_signal_send(sig, t);
+ 
++#ifdef CONFIG_SMP
+ 	assert_spin_locked(&t->sighand->siglock);
++#endif
+ 	if (!prepare_signal(sig, t))
+ 		return 0;
+ 
+@@ -1276,7 +1358,8 @@ struct sigqueue *sigqueue_alloc(void)
+ {
+ 	struct sigqueue *q;
+ 
+-	if ((q = __sigqueue_alloc(current, GFP_KERNEL, 0)))
++	/* Preallocated sigqueue objects always from the slabcache ! */
++	if ((q = __sigqueue_do_alloc(current, GFP_KERNEL, 0, 1)))
+ 		q->flags |= SIGQUEUE_PREALLOC;
+ 	return(q);
+ }
+@@ -1575,15 +1658,9 @@ static void ptrace_stop(int exit_code, i
+ 	read_lock(&tasklist_lock);
+ 	if (may_ptrace_stop()) {
+ 		do_notify_parent_cldstop(current, CLD_TRAPPED);
+-		/*
+-		 * Don't want to allow preemption here, because
+-		 * sys_ptrace() needs this task to be inactive.
+-		 *
+-		 * XXX: implement read_unlock_no_resched().
+-		 */
+-		preempt_disable();
++
++		current->flags &= ~PF_NOSCHED;
+ 		read_unlock(&tasklist_lock);
+-		preempt_enable_no_resched();
+ 		schedule();
+ 	} else {
+ 		/*
+@@ -1652,6 +1729,7 @@ finish_stop(int stop_count)
+ 	}
+ 
+ 	do {
++		current->flags &= ~PF_NOSCHED;
+ 		schedule();
+ 	} while (try_to_freeze());
+ 	/*
+@@ -2243,24 +2321,17 @@ SYSCALL_DEFINE2(kill, pid_t, pid, int, s
+ 	return kill_something_info(sig, &info, pid);
+ }
+ 
+-static int do_tkill(pid_t tgid, pid_t pid, int sig)
++static int
++do_send_specific(pid_t tgid, pid_t pid, int sig, struct siginfo *info)
+ {
+-	int error;
+-	struct siginfo info;
+ 	struct task_struct *p;
+ 	unsigned long flags;
+-
+-	error = -ESRCH;
+-	info.si_signo = sig;
+-	info.si_errno = 0;
+-	info.si_code = SI_TKILL;
+-	info.si_pid = task_tgid_vnr(current);
+-	info.si_uid = current_uid();
++	int error = -ESRCH;
+ 
+ 	rcu_read_lock();
+ 	p = find_task_by_vpid(pid);
+ 	if (p && (tgid <= 0 || task_tgid_vnr(p) == tgid)) {
+-		error = check_kill_permission(sig, &info, p);
++		error = check_kill_permission(sig, info, p);
+ 		/*
+ 		 * The null signal is a permissions and process existence
+ 		 * probe.  No signal is actually delivered.
+@@ -2270,7 +2341,7 @@ static int do_tkill(pid_t tgid, pid_t pi
+ 		 * signal is private anyway.
+ 		 */
+ 		if (!error && sig && lock_task_sighand(p, &flags)) {
+-			error = specific_send_sig_info(sig, &info, p);
++			error = specific_send_sig_info(sig, info, p);
+ 			unlock_task_sighand(p, &flags);
+ 		}
+ 	}
+@@ -2279,6 +2350,19 @@ static int do_tkill(pid_t tgid, pid_t pi
+ 	return error;
+ }
+ 
++static int do_tkill(pid_t tgid, pid_t pid, int sig)
++{
++	struct siginfo info;
++
++	info.si_signo = sig;
++	info.si_errno = 0;
++	info.si_code = SI_TKILL;
++	info.si_pid = task_tgid_vnr(current);
++	info.si_uid = current_uid();
++
++	return do_send_specific(tgid, pid, sig, &info);
++}
++
+ /**
+  *  sys_tgkill - send signal to one specific thread
+  *  @tgid: the thread group ID of the thread
+@@ -2328,6 +2412,32 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, 
+ 	return kill_proc_info(sig, &info, pid);
+ }
+ 
++long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
++{
++	/* This is only valid for single tasks */
++	if (pid <= 0 || tgid <= 0)
++		return -EINVAL;
++
++	/* Not even root can pretend to send signals from the kernel.
++	   Nor can they impersonate a kill(), which adds source info.  */
++	if (info->si_code >= 0)
++		return -EPERM;
++	info->si_signo = sig;
++
++	return do_send_specific(tgid, pid, sig, info);
++}
++
++SYSCALL_DEFINE4(rt_tgsigqueueinfo, pid_t, tgid, pid_t, pid, int, sig,
++		siginfo_t __user *, uinfo)
++{
++	siginfo_t info;
++
++	if (copy_from_user(&info, uinfo, sizeof(siginfo_t)))
++		return -EFAULT;
++
++	return do_rt_tgsigqueueinfo(tgid, pid, sig, &info);
++}
++
+ int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
+ {
+ 	struct task_struct *t = current;
+Index: linux-2.6-tip/kernel/smp.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/smp.c
++++ linux-2.6-tip/kernel/smp.c
+@@ -2,40 +2,82 @@
+  * Generic helpers for smp ipi calls
+  *
+  * (C) Jens Axboe <jens.axboe@oracle.com> 2008
+- *
+  */
+-#include <linux/init.h>
+-#include <linux/module.h>
+-#include <linux/percpu.h>
+ #include <linux/rcupdate.h>
+ #include <linux/rculist.h>
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/percpu.h>
++#include <linux/init.h>
+ #include <linux/smp.h>
++#include <linux/cpu.h>
+ 
+ static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
+-static LIST_HEAD(call_function_queue);
+-__cacheline_aligned_in_smp DEFINE_SPINLOCK(call_function_lock);
++
++static struct {
++	struct list_head	queue;
++	raw_spinlock_t		lock;
++} call_function __cacheline_aligned_in_smp =
++	{
++		.queue		= LIST_HEAD_INIT(call_function.queue),
++		.lock		= RAW_SPIN_LOCK_UNLOCKED(call_function.lock),
++	};
+ 
+ enum {
+-	CSD_FLAG_WAIT		= 0x01,
+-	CSD_FLAG_ALLOC		= 0x02,
+-	CSD_FLAG_LOCK		= 0x04,
++	CSD_FLAG_LOCK		= 0x01,
+ };
+ 
+ struct call_function_data {
+-	struct call_single_data csd;
+-	spinlock_t lock;
+-	unsigned int refs;
+-	struct rcu_head rcu_head;
+-	unsigned long cpumask_bits[];
++	struct call_single_data	csd;
++	raw_spinlock_t		lock;
++	unsigned int		refs;
++	cpumask_var_t		cpumask;
+ };
+ 
+ struct call_single_queue {
+-	struct list_head list;
+-	spinlock_t lock;
++	struct list_head	list;
++	raw_spinlock_t		lock;
++};
++
++static DEFINE_PER_CPU(struct call_function_data, cfd_data) = {
++	.lock			= RAW_SPIN_LOCK_UNLOCKED(cfd_data.lock),
++};
++
++static int
++hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
++{
++	long cpu = (long)hcpu;
++	struct call_function_data *cfd = &per_cpu(cfd_data, cpu);
++
++	switch (action) {
++	case CPU_UP_PREPARE:
++	case CPU_UP_PREPARE_FROZEN:
++		if (!alloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL,
++				cpu_to_node(cpu)))
++			return NOTIFY_BAD;
++		break;
++
++#ifdef CONFIG_CPU_HOTPLUG
++	case CPU_UP_CANCELED:
++	case CPU_UP_CANCELED_FROZEN:
++
++	case CPU_DEAD:
++	case CPU_DEAD_FROZEN:
++		free_cpumask_var(cfd->cpumask);
++		break;
++#endif
++	};
++
++	return NOTIFY_OK;
++}
++
++static struct notifier_block __cpuinitdata hotplug_cfd_notifier = {
++	.notifier_call		= hotplug_cfd,
+ };
+ 
+ static int __cpuinit init_call_single_data(void)
+ {
++	void *cpu = (void *)(long)smp_processor_id();
+ 	int i;
+ 
+ 	for_each_possible_cpu(i) {
+@@ -44,29 +86,63 @@ static int __cpuinit init_call_single_da
+ 		spin_lock_init(&q->lock);
+ 		INIT_LIST_HEAD(&q->list);
+ 	}
++
++	hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu);
++	register_cpu_notifier(&hotplug_cfd_notifier);
++
+ 	return 0;
+ }
+ early_initcall(init_call_single_data);
+ 
+-static void csd_flag_wait(struct call_single_data *data)
++/*
++ * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
++ *
++ * For non-synchronous ipi calls the csd can still be in use by the
++ * previous function call. For multi-cpu calls its even more interesting
++ * as we'll have to ensure no other cpu is observing our csd.
++ */
++static void csd_lock_wait(struct call_single_data *data)
+ {
+-	/* Wait for response */
+-	do {
+-		if (!(data->flags & CSD_FLAG_WAIT))
+-			break;
++	while (data->flags & CSD_FLAG_LOCK)
+ 		cpu_relax();
+-	} while (1);
++}
++
++static void csd_lock(struct call_single_data *data)
++{
++	csd_lock_wait(data);
++	data->flags = CSD_FLAG_LOCK;
++
++	/*
++	 * prevent CPU from reordering the above assignment
++	 * to ->flags with any subsequent assignments to other
++	 * fields of the specified call_single_data structure:
++	 */
++	smp_mb();
++}
++
++static void csd_unlock(struct call_single_data *data)
++{
++	WARN_ON(!(data->flags & CSD_FLAG_LOCK));
++
++	/*
++	 * ensure we're all done before releasing data:
++	 */
++	smp_mb();
++
++	data->flags &= ~CSD_FLAG_LOCK;
+ }
+ 
+ /*
+- * Insert a previously allocated call_single_data element for execution
+- * on the given CPU. data must already have ->func, ->info, and ->flags set.
++ * Insert a previously allocated call_single_data element
++ * for execution on the given CPU. data must already have
++ * ->func, ->info, and ->flags set.
+  */
+-static void generic_exec_single(int cpu, struct call_single_data *data)
++static
++void generic_exec_single(int cpu, struct call_single_data *data, int wait)
+ {
+ 	struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
+-	int wait = data->flags & CSD_FLAG_WAIT, ipi;
+ 	unsigned long flags;
++	int ipi;
+ 
+ 	spin_lock_irqsave(&dst->lock, flags);
+ 	ipi = list_empty(&dst->list);
+@@ -74,24 +150,21 @@ static void generic_exec_single(int cpu,
+ 	spin_unlock_irqrestore(&dst->lock, flags);
+ 
+ 	/*
+-	 * Make the list addition visible before sending the ipi.
++	 * The list addition should be visible before sending the IPI
++	 * handler locks the list to pull the entry off it because of
++	 * normal cache coherency rules implied by spinlocks.
++	 *
++	 * If IPIs can go out of order to the cache coherency protocol
++	 * in an architecture, sufficient synchronisation should be added
++	 * to arch code to make it appear to obey cache coherency WRT
++	 * locking and barrier primitives. Generic code isn't really
++	 * equipped to do the right thing...
+ 	 */
+-	smp_mb();
+-
+ 	if (ipi)
+ 		arch_send_call_function_single_ipi(cpu);
+ 
+ 	if (wait)
+-		csd_flag_wait(data);
+-}
+-
+-static void rcu_free_call_data(struct rcu_head *head)
+-{
+-	struct call_function_data *data;
+-
+-	data = container_of(head, struct call_function_data, rcu_head);
+-
+-	kfree(data);
++		csd_lock_wait(data);
+ }
+ 
+ /*
+@@ -104,99 +177,83 @@ void generic_smp_call_function_interrupt
+ 	int cpu = get_cpu();
+ 
+ 	/*
+-	 * It's ok to use list_for_each_rcu() here even though we may delete
+-	 * 'pos', since list_del_rcu() doesn't clear ->next
++	 * Ensure entry is visible on call_function_queue after we have
++	 * entered the IPI. See comment in smp_call_function_many.
++	 * If we don't have this, then we may miss an entry on the list
++	 * and never get another IPI to process it.
++	 */
++	smp_mb();
++
++	/*
++	 * It's ok to use list_for_each_rcu() here even though we may
++	 * delete 'pos', since list_del_rcu() doesn't clear ->next
+ 	 */
+-	rcu_read_lock();
+-	list_for_each_entry_rcu(data, &call_function_queue, csd.list) {
++	list_for_each_entry_rcu(data, &call_function.queue, csd.list) {
+ 		int refs;
+ 
+-		if (!cpumask_test_cpu(cpu, to_cpumask(data->cpumask_bits)))
++		spin_lock(&data->lock);
++		if (!cpumask_test_cpu(cpu, data->cpumask)) {
++			spin_unlock(&data->lock);
+ 			continue;
++		}
++		cpumask_clear_cpu(cpu, data->cpumask);
++		spin_unlock(&data->lock);
+ 
+ 		data->csd.func(data->csd.info);
+ 
+ 		spin_lock(&data->lock);
+-		cpumask_clear_cpu(cpu, to_cpumask(data->cpumask_bits));
+ 		WARN_ON(data->refs == 0);
+-		data->refs--;
+-		refs = data->refs;
++		refs = --data->refs;
++		if (!refs) {
++			spin_lock(&call_function.lock);
++			list_del_rcu(&data->csd.list);
++			spin_unlock(&call_function.lock);
++		}
+ 		spin_unlock(&data->lock);
+ 
+ 		if (refs)
+ 			continue;
+ 
+-		spin_lock(&call_function_lock);
+-		list_del_rcu(&data->csd.list);
+-		spin_unlock(&call_function_lock);
+-
+-		if (data->csd.flags & CSD_FLAG_WAIT) {
+-			/*
+-			 * serialize stores to data with the flag clear
+-			 * and wakeup
+-			 */
+-			smp_wmb();
+-			data->csd.flags &= ~CSD_FLAG_WAIT;
+-		}
+-		if (data->csd.flags & CSD_FLAG_ALLOC)
+-			call_rcu(&data->rcu_head, rcu_free_call_data);
++		csd_unlock(&data->csd);
+ 	}
+-	rcu_read_unlock();
+ 
+ 	put_cpu();
+ }
+ 
+ /*
+- * Invoked by arch to handle an IPI for call function single. Must be called
+- * from the arch with interrupts disabled.
++ * Invoked by arch to handle an IPI for call function single. Must be
++ * called from the arch with interrupts disabled.
+  */
+ void generic_smp_call_function_single_interrupt(void)
+ {
+ 	struct call_single_queue *q = &__get_cpu_var(call_single_queue);
++	unsigned int data_flags;
+ 	LIST_HEAD(list);
+ 
+-	/*
+-	 * Need to see other stores to list head for checking whether
+-	 * list is empty without holding q->lock
+-	 */
+-	smp_read_barrier_depends();
+-	while (!list_empty(&q->list)) {
+-		unsigned int data_flags;
+-
+-		spin_lock(&q->lock);
+-		list_replace_init(&q->list, &list);
+-		spin_unlock(&q->lock);
+-
+-		while (!list_empty(&list)) {
+-			struct call_single_data *data;
+-
+-			data = list_entry(list.next, struct call_single_data,
+-						list);
+-			list_del(&data->list);
+-
+-			/*
+-			 * 'data' can be invalid after this call if
+-			 * flags == 0 (when called through
+-			 * generic_exec_single(), so save them away before
+-			 * making the call.
+-			 */
+-			data_flags = data->flags;
+-
+-			data->func(data->info);
+-
+-			if (data_flags & CSD_FLAG_WAIT) {
+-				smp_wmb();
+-				data->flags &= ~CSD_FLAG_WAIT;
+-			} else if (data_flags & CSD_FLAG_LOCK) {
+-				smp_wmb();
+-				data->flags &= ~CSD_FLAG_LOCK;
+-			} else if (data_flags & CSD_FLAG_ALLOC)
+-				kfree(data);
+-		}
++	spin_lock(&q->lock);
++	list_replace_init(&q->list, &list);
++	spin_unlock(&q->lock);
++
++	while (!list_empty(&list)) {
++		struct call_single_data *data;
++
++		data = list_entry(list.next, struct call_single_data, list);
++		list_del(&data->list);
++
+ 		/*
+-		 * See comment on outer loop
++		 * 'data' can be invalid after this call if flags == 0
++		 * (when called through generic_exec_single()),
++		 * so save them away before making the call:
+ 		 */
+-		smp_read_barrier_depends();
++		data_flags = data->flags;
++
++		data->func(data->info);
++
++		/*
++		 * Unlocked CSDs are valid through generic_exec_single():
++		 */
++		if (data_flags & CSD_FLAG_LOCK)
++			csd_unlock(data);
+ 	}
+ }
+ 
+@@ -215,65 +272,45 @@ static DEFINE_PER_CPU(struct call_single
+ int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
+ 			     int wait)
+ {
+-	struct call_single_data d;
++	struct call_single_data d = {
++		.flags = 0,
++	};
+ 	unsigned long flags;
+-	/* prevent preemption and reschedule on another processor,
+-	   as well as CPU removal */
+-	int me = get_cpu();
++	int this_cpu;
+ 	int err = 0;
+ 
++	/*
++	 * prevent preemption and reschedule on another processor,
++	 * as well as CPU removal
++	 */
++	this_cpu = get_cpu();
++
+ 	/* Can deadlock when called with interrupts disabled */
+-	WARN_ON(irqs_disabled());
++	WARN_ON_ONCE(irqs_disabled() && !oops_in_progress);
+ 
+-	if (cpu == me) {
++	if (cpu == this_cpu) {
+ 		local_irq_save(flags);
+ 		func(info);
+ 		local_irq_restore(flags);
+-	} else if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
+-		struct call_single_data *data;
++	} else {
++		if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
++			struct call_single_data *data = &d;
++
++			if (!wait)
++				data = &__get_cpu_var(csd_data);
++
++			csd_lock(data);
+ 
+-		if (!wait) {
+-			/*
+-			 * We are calling a function on a single CPU
+-			 * and we are not going to wait for it to finish.
+-			 * We first try to allocate the data, but if we
+-			 * fail, we fall back to use a per cpu data to pass
+-			 * the information to that CPU. Since all callers
+-			 * of this code will use the same data, we must
+-			 * synchronize the callers to prevent a new caller
+-			 * from corrupting the data before the callee
+-			 * can access it.
+-			 *
+-			 * The CSD_FLAG_LOCK is used to let us know when
+-			 * the IPI handler is done with the data.
+-			 * The first caller will set it, and the callee
+-			 * will clear it. The next caller must wait for
+-			 * it to clear before we set it again. This
+-			 * will make sure the callee is done with the
+-			 * data before a new caller will use it.
+-			 */
+-			data = kmalloc(sizeof(*data), GFP_ATOMIC);
+-			if (data)
+-				data->flags = CSD_FLAG_ALLOC;
+-			else {
+-				data = &per_cpu(csd_data, me);
+-				while (data->flags & CSD_FLAG_LOCK)
+-					cpu_relax();
+-				data->flags = CSD_FLAG_LOCK;
+-			}
++			data->func = func;
++			data->info = info;
++			generic_exec_single(cpu, data, wait);
+ 		} else {
+-			data = &d;
+-			data->flags = CSD_FLAG_WAIT;
++			err = -ENXIO;	/* CPU not online */
+ 		}
+-
+-		data->func = func;
+-		data->info = info;
+-		generic_exec_single(cpu, data);
+-	} else {
+-		err = -ENXIO;	/* CPU not online */
+ 	}
+ 
+ 	put_cpu();
++
+ 	return err;
+ }
+ EXPORT_SYMBOL(smp_call_function_single);
+@@ -283,23 +320,26 @@ EXPORT_SYMBOL(smp_call_function_single);
+  * @cpu: The CPU to run on.
+  * @data: Pre-allocated and setup data structure
+  *
+- * Like smp_call_function_single(), but allow caller to pass in a pre-allocated
+- * data structure. Useful for embedding @data inside other structures, for
+- * instance.
+- *
++ * Like smp_call_function_single(), but allow caller to pass in a
++ * pre-allocated data structure. Useful for embedding @data inside
++ * other structures, for instance.
+  */
+-void __smp_call_function_single(int cpu, struct call_single_data *data)
++void __smp_call_function_single(int cpu, struct call_single_data *data,
++				int wait)
+ {
++	csd_lock(data);
++
+ 	/* Can deadlock when called with interrupts disabled */
+-	WARN_ON((data->flags & CSD_FLAG_WAIT) && irqs_disabled());
++	WARN_ON_ONCE(wait && irqs_disabled() && !oops_in_progress);
+ 
+-	generic_exec_single(cpu, data);
++	generic_exec_single(cpu, data, wait);
+ }
+ 
+-/* FIXME: Shim for archs using old arch_send_call_function_ipi API. */
++/* Deprecated: shim for archs using old arch_send_call_function_ipi API. */
++
+ #ifndef arch_send_call_function_ipi_mask
+-#define arch_send_call_function_ipi_mask(maskp) \
+-	arch_send_call_function_ipi(*(maskp))
++# define arch_send_call_function_ipi_mask(maskp) \
++	 arch_send_call_function_ipi(*(maskp))
+ #endif
+ 
+ /**
+@@ -307,7 +347,8 @@ void __smp_call_function_single(int cpu,
+  * @mask: The set of cpus to run on (only runs on online subset).
+  * @func: The function to run. This must be fast and non-blocking.
+  * @info: An arbitrary pointer to pass to the function.
+- * @wait: If true, wait (atomically) until function has completed on other CPUs.
++ * @wait: If true, wait (atomically) until function has completed
++ *        on other CPUs.
+  *
+  * If @wait is true, then returns once @func has returned. Note that @wait
+  * will be implicitly turned on in case of allocation failures, since
+@@ -318,27 +359,27 @@ void __smp_call_function_single(int cpu,
+  * must be disabled when calling this function.
+  */
+ void smp_call_function_many(const struct cpumask *mask,
+-			    void (*func)(void *), void *info,
+-			    bool wait)
++			    void (*func)(void *), void *info, bool wait)
+ {
+ 	struct call_function_data *data;
+ 	unsigned long flags;
+-	int cpu, next_cpu;
++	int cpu, next_cpu, this_cpu = smp_processor_id();
+ 
+ 	/* Can deadlock when called with interrupts disabled */
+-	WARN_ON(irqs_disabled());
++	WARN_ON_ONCE(irqs_disabled() && !oops_in_progress);
+ 
+-	/* So, what's a CPU they want?  Ignoring this one. */
++	/* So, what's a CPU they want? Ignoring this one. */
+ 	cpu = cpumask_first_and(mask, cpu_online_mask);
+-	if (cpu == smp_processor_id())
++	if (cpu == this_cpu)
+ 		cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
++
+ 	/* No online cpus?  We're done. */
+ 	if (cpu >= nr_cpu_ids)
+ 		return;
+ 
+ 	/* Do we have another CPU which isn't us? */
+ 	next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask);
+-	if (next_cpu == smp_processor_id())
++	if (next_cpu == this_cpu)
+ 		next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask);
+ 
+ 	/* Fastpath: do that cpu by itself. */
+@@ -347,43 +388,40 @@ void smp_call_function_many(const struct
+ 		return;
+ 	}
+ 
+-	data = kmalloc(sizeof(*data) + cpumask_size(), GFP_ATOMIC);
+-	if (unlikely(!data)) {
+-		/* Slow path. */
+-		for_each_online_cpu(cpu) {
+-			if (cpu == smp_processor_id())
+-				continue;
+-			if (cpumask_test_cpu(cpu, mask))
+-				smp_call_function_single(cpu, func, info, wait);
+-		}
+-		return;
+-	}
++	data = &__get_cpu_var(cfd_data);
++	csd_lock(&data->csd);
+ 
+-	spin_lock_init(&data->lock);
+-	data->csd.flags = CSD_FLAG_ALLOC;
+-	if (wait)
+-		data->csd.flags |= CSD_FLAG_WAIT;
++	spin_lock_irqsave(&data->lock, flags);
+ 	data->csd.func = func;
+ 	data->csd.info = info;
+-	cpumask_and(to_cpumask(data->cpumask_bits), mask, cpu_online_mask);
+-	cpumask_clear_cpu(smp_processor_id(), to_cpumask(data->cpumask_bits));
+-	data->refs = cpumask_weight(to_cpumask(data->cpumask_bits));
+-
+-	spin_lock_irqsave(&call_function_lock, flags);
+-	list_add_tail_rcu(&data->csd.list, &call_function_queue);
+-	spin_unlock_irqrestore(&call_function_lock, flags);
++	cpumask_and(data->cpumask, mask, cpu_online_mask);
++	cpumask_clear_cpu(this_cpu, data->cpumask);
++	data->refs = cpumask_weight(data->cpumask);
++
++	spin_lock(&call_function.lock);
++	/*
++	 * Place entry at the _HEAD_ of the list, so that any cpu still
++	 * observing the entry in generic_smp_call_function_interrupt()
++	 * will not miss any other list entries:
++	 */
++	list_add_rcu(&data->csd.list, &call_function.queue);
++	spin_unlock(&call_function.lock);
++
++	spin_unlock_irqrestore(&data->lock, flags);
+ 
+ 	/*
+ 	 * Make the list addition visible before sending the ipi.
++	 * (IPIs must obey or appear to obey normal Linux cache
++	 * coherency rules -- see comment in generic_exec_single).
+ 	 */
+ 	smp_mb();
+ 
+ 	/* Send a message to all CPUs in the map */
+-	arch_send_call_function_ipi_mask(to_cpumask(data->cpumask_bits));
++	arch_send_call_function_ipi_mask(data->cpumask);
+ 
+-	/* optionally wait for the CPUs to complete */
++	/* Optionally wait for the CPUs to complete */
+ 	if (wait)
+-		csd_flag_wait(&data->csd);
++		csd_lock_wait(&data->csd);
+ }
+ EXPORT_SYMBOL(smp_call_function_many);
+ 
+@@ -391,7 +429,8 @@ EXPORT_SYMBOL(smp_call_function_many);
+  * smp_call_function(): Run a function on all other CPUs.
+  * @func: The function to run. This must be fast and non-blocking.
+  * @info: An arbitrary pointer to pass to the function.
+- * @wait: If true, wait (atomically) until function has completed on other CPUs.
++ * @wait: If true, wait (atomically) until function has completed
++ *        on other CPUs.
+  *
+  * Returns 0.
+  *
+@@ -407,26 +446,27 @@ int smp_call_function(void (*func)(void 
+ 	preempt_disable();
+ 	smp_call_function_many(cpu_online_mask, func, info, wait);
+ 	preempt_enable();
++
+ 	return 0;
+ }
+ EXPORT_SYMBOL(smp_call_function);
+ 
+ void ipi_call_lock(void)
+ {
+-	spin_lock(&call_function_lock);
++	spin_lock(&call_function.lock);
+ }
+ 
+ void ipi_call_unlock(void)
+ {
+-	spin_unlock(&call_function_lock);
++	spin_unlock(&call_function.lock);
+ }
+ 
+ void ipi_call_lock_irq(void)
+ {
+-	spin_lock_irq(&call_function_lock);
++	spin_lock_irq(&call_function.lock);
+ }
+ 
+ void ipi_call_unlock_irq(void)
+ {
+-	spin_unlock_irq(&call_function_lock);
++	spin_unlock_irq(&call_function.lock);
+ }
+Index: linux-2.6-tip/kernel/softirq.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/softirq.c
++++ linux-2.6-tip/kernel/softirq.c
+@@ -8,21 +8,31 @@
+  *	Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
+  *
+  *	Remote softirq infrastructure is by Jens Axboe.
++ *
++ *	Softirq-split implemetation by
++ *	Copyright (C) 2005 Thomas Gleixner, Ingo Molnar
+  */
+ 
+ #include <linux/module.h>
++#include <linux/kallsyms.h>
++#include <linux/syscalls.h>
++#include <linux/wait.h>
+ #include <linux/kernel_stat.h>
+ #include <linux/interrupt.h>
+ #include <linux/init.h>
++#include <linux/delay.h>
+ #include <linux/mm.h>
+ #include <linux/notifier.h>
+ #include <linux/percpu.h>
++#include <linux/delay.h>
+ #include <linux/cpu.h>
+ #include <linux/freezer.h>
+ #include <linux/kthread.h>
+ #include <linux/rcupdate.h>
++#include <linux/ftrace.h>
+ #include <linux/smp.h>
+ #include <linux/tick.h>
++#include <trace/irq.h>
+ 
+ #include <asm/irq.h>
+ /*
+@@ -50,7 +60,116 @@ EXPORT_SYMBOL(irq_stat);
+ 
+ static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
+ 
+-static DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
++struct softirqdata {
++	int			nr;
++	unsigned long		cpu;
++	struct task_struct	*tsk;
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++	wait_queue_head_t	wait;
++	int			running;
++#endif
++};
++
++static DEFINE_PER_CPU(struct softirqdata [MAX_SOFTIRQ], ksoftirqd);
++
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++/*
++ * Preempting the softirq causes cases that would not be a
++ * problem when the softirq is not preempted. That is a
++ * process may have code to spin while waiting for a softirq
++ * to finish on another CPU.  But if it happens that the
++ * process has preempted the softirq, this could cause a
++ * deadlock.
++ */
++void wait_for_softirq(int softirq)
++{
++	struct softirqdata *data = &__get_cpu_var(ksoftirqd)[softirq];
++
++	if (data->running) {
++		DECLARE_WAITQUEUE(wait, current);
++
++		set_current_state(TASK_UNINTERRUPTIBLE);
++		add_wait_queue(&data->wait, &wait);
++		if (data->running)
++			schedule();
++		remove_wait_queue(&data->wait, &wait);
++		__set_current_state(TASK_RUNNING);
++	}
++}
++#endif
++
++#ifdef CONFIG_PREEMPT_RT
++/*
++ * On preempt-rt a softirq might be blocked on a lock. There might be
++ * no other runnable task on this CPU because the lock owner runs on
++ * some other CPU. So we have to go into idle with the pending bit
++ * set. Therefor we need to check this otherwise we warn about false
++ * positives which confuses users and defeats the whole purpose of
++ * this test.
++ *
++ * This code is called with interrupts disabled.
++ */
++void softirq_check_pending_idle(void)
++{
++	static int rate_limit;
++	u32 warnpending = 0, pending = local_softirq_pending();
++	int curr = 0;
++
++	if (rate_limit >= 10)
++		return;
++
++	while (pending) {
++		if (pending & 1) {
++			struct task_struct *tsk;
++
++			tsk = __get_cpu_var(ksoftirqd)[curr].tsk;
++			/*
++			 * The wakeup code in rtmutex.c wakes up the
++			 * task _before_ it sets pi_blocked_on to NULL
++			 * under tsk->pi_lock. So we need to check for
++			 * both: state and pi_blocked_on.
++			 */
++			spin_lock(&tsk->pi_lock);
++
++			if (!tsk->pi_blocked_on &&
++			    !(tsk->state == TASK_RUNNING) &&
++			    !(tsk->state & TASK_RUNNING_MUTEX))
++				warnpending |= 1 << curr;
++
++			spin_unlock(&tsk->pi_lock);
++		}
++		pending >>= 1;
++		curr++;
++	}
++
++	if (warnpending) {
++		printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
++		       warnpending);
++		rate_limit++;
++	}
++}
++
++#else
++/*
++ * On !PREEMPT_RT we just printk rate limited:
++ */
++void softirq_check_pending_idle(void)
++{
++	static int ratelimit;
++
++	if (ratelimit < 10) {
++		printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
++		       local_softirq_pending());
++		rate_limit++;
++	}
++}
++
++#endif
++
++char *softirq_to_name[NR_SOFTIRQS] = {
++	"HI", "TIMER", "NET_TX", "NET_RX", "BLOCK",
++	"TASKLET", "SCHED", "HRTIMER",	"RCU"
++};
+ 
+ /*
+  * we cannot loop indefinitely here to avoid userspace starvation,
+@@ -58,16 +177,34 @@ static DEFINE_PER_CPU(struct task_struct
+  * to the pending events, so lets the scheduler to balance
+  * the softirq load for us.
+  */
+-void wakeup_softirqd(void)
++static void wakeup_softirqd(int softirq)
+ {
+ 	/* Interrupts are disabled: no need to stop preemption */
+-	struct task_struct *tsk = __get_cpu_var(ksoftirqd);
++	struct task_struct *tsk = __get_cpu_var(ksoftirqd)[softirq].tsk;
+ 
+ 	if (tsk && tsk->state != TASK_RUNNING)
+ 		wake_up_process(tsk);
+ }
+ 
+ /*
++ * Wake up the softirq threads which have work
++ */
++static void trigger_softirqs(void)
++{
++	u32 pending = local_softirq_pending();
++	int curr = 0;
++
++	while (pending) {
++		if (pending & 1)
++			wakeup_softirqd(curr);
++		pending >>= 1;
++		curr++;
++	}
++}
++
++#ifndef CONFIG_PREEMPT_HARDIRQS
++
++/*
+  * This one is for softirq.c-internal use,
+  * where hardirqs are disabled legitimately:
+  */
+@@ -79,13 +216,23 @@ static void __local_bh_disable(unsigned 
+ 	WARN_ON_ONCE(in_irq());
+ 
+ 	raw_local_irq_save(flags);
+-	add_preempt_count(SOFTIRQ_OFFSET);
++	/*
++	 * The preempt tracer hooks into add_preempt_count and will break
++	 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
++	 * is set and before current->softirq_enabled is cleared.
++	 * We must manually increment preempt_count here and manually
++	 * call the trace_preempt_off later.
++	 */
++	preempt_count() += SOFTIRQ_OFFSET;
+ 	/*
+ 	 * Were softirqs turned off above:
+ 	 */
+ 	if (softirq_count() == SOFTIRQ_OFFSET)
+ 		trace_softirqs_off(ip);
+ 	raw_local_irq_restore(flags);
++
++	if (preempt_count() == SOFTIRQ_OFFSET)
++		trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
+ }
+ #else /* !CONFIG_TRACE_IRQFLAGS */
+ static inline void __local_bh_disable(unsigned long ip)
+@@ -109,7 +256,6 @@ EXPORT_SYMBOL(local_bh_disable);
+  */
+ void _local_bh_enable(void)
+ {
+-	WARN_ON_ONCE(in_irq());
+ 	WARN_ON_ONCE(!irqs_disabled());
+ 
+ 	if (softirq_count() == SOFTIRQ_OFFSET)
+@@ -119,17 +265,22 @@ void _local_bh_enable(void)
+ 
+ EXPORT_SYMBOL(_local_bh_enable);
+ 
+-static inline void _local_bh_enable_ip(unsigned long ip)
++void local_bh_enable(void)
+ {
+-	WARN_ON_ONCE(in_irq() || irqs_disabled());
+ #ifdef CONFIG_TRACE_IRQFLAGS
+-	local_irq_disable();
++	unsigned long flags;
++
++	WARN_ON_ONCE(in_irq());
++#endif
++
++#ifdef CONFIG_TRACE_IRQFLAGS
++	local_irq_save(flags);
+ #endif
+ 	/*
+ 	 * Are softirqs going to be turned on now:
+ 	 */
+ 	if (softirq_count() == SOFTIRQ_OFFSET)
+-		trace_softirqs_on(ip);
++		trace_softirqs_on((unsigned long)__builtin_return_address(0));
+ 	/*
+ 	 * Keep preemption disabled until we are done with
+ 	 * softirq processing:
+@@ -141,23 +292,45 @@ static inline void _local_bh_enable_ip(u
+ 
+ 	dec_preempt_count();
+ #ifdef CONFIG_TRACE_IRQFLAGS
+-	local_irq_enable();
++	local_irq_restore(flags);
+ #endif
+ 	preempt_check_resched();
+ }
+-
+-void local_bh_enable(void)
+-{
+-	_local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+-}
+ EXPORT_SYMBOL(local_bh_enable);
+ 
+ void local_bh_enable_ip(unsigned long ip)
+ {
+-	_local_bh_enable_ip(ip);
++#ifdef CONFIG_TRACE_IRQFLAGS
++	unsigned long flags;
++
++	WARN_ON_ONCE(in_irq());
++
++	local_irq_save(flags);
++#endif
++	/*
++	 * Are softirqs going to be turned on now:
++	 */
++	if (softirq_count() == SOFTIRQ_OFFSET)
++		trace_softirqs_on(ip);
++	/*
++	 * Keep preemption disabled until we are done with
++	 * softirq processing:
++ 	 */
++ 	sub_preempt_count(SOFTIRQ_OFFSET - 1);
++
++	if (unlikely(!in_interrupt() && local_softirq_pending()))
++		do_softirq();
++
++	dec_preempt_count();
++#ifdef CONFIG_TRACE_IRQFLAGS
++	local_irq_restore(flags);
++#endif
++	preempt_check_resched();
+ }
+ EXPORT_SYMBOL(local_bh_enable_ip);
+ 
++#endif
++
+ /*
+  * We restart softirq processing MAX_SOFTIRQ_RESTART times,
+  * and we fall back to softirqd after that.
+@@ -167,63 +340,146 @@ EXPORT_SYMBOL(local_bh_enable_ip);
+  * we want to handle softirqs as soon as possible, but they
+  * should not be able to lock up the box.
+  */
+-#define MAX_SOFTIRQ_RESTART 10
++#define MAX_SOFTIRQ_RESTART 20
+ 
+-asmlinkage void __do_softirq(void)
++DEFINE_TRACE(softirq_entry);
++DEFINE_TRACE(softirq_exit);
++
++static DEFINE_PER_CPU(u32, softirq_running);
++
++/*
++ * Debug check for leaking preempt counts in h->action handlers:
++ */
++
++static inline void debug_check_preempt_count_start(__u32 *preempt_count)
+ {
+-	struct softirq_action *h;
+-	__u32 pending;
++#ifdef CONFIG_DEBUG_PREEMPT
++	*preempt_count = preempt_count();
++#endif
++}
++
++static inline void
++ debug_check_preempt_count_stop(__u32 *preempt_count, struct softirq_action *h)
++{
++#ifdef CONFIG_DEBUG_PREEMPT
++	if (*preempt_count == preempt_count())
++		return;
++
++	print_symbol("BUG: %Ps exited with wrong preemption count!\n",
++		     (unsigned long)h->action);
++	printk("=> enter: %08x, exit: %08x.\n", *preempt_count, preempt_count());
++	preempt_count() = *preempt_count;
++#endif
++}
++
++/*
++ * Execute softirq handlers:
++ */
++static void ___do_softirq(const int same_prio_only)
++{
++	__u32 pending, available_mask, same_prio_skipped, preempt_count;
+ 	int max_restart = MAX_SOFTIRQ_RESTART;
+-	int cpu;
++	struct softirq_action *h;
++	int cpu, softirq;
+ 
+ 	pending = local_softirq_pending();
+ 	account_system_vtime(current);
+ 
+-	__local_bh_disable((unsigned long)__builtin_return_address(0));
+-	trace_softirq_enter();
+-
+ 	cpu = smp_processor_id();
+ restart:
++	available_mask = -1;
++	softirq = 0;
++	same_prio_skipped = 0;
+ 	/* Reset the pending bitmask before enabling irqs */
+ 	set_softirq_pending(0);
+ 
+-	local_irq_enable();
+-
+ 	h = softirq_vec;
+ 
+ 	do {
+-		if (pending & 1) {
+-			int prev_count = preempt_count();
++		u32 softirq_mask = 1 << softirq;
+ 
+-			h->action(h);
++		if (!(pending & 1))
++			goto next;
+ 
+-			if (unlikely(prev_count != preempt_count())) {
+-				printk(KERN_ERR "huh, entered softirq %td %p"
+-				       "with preempt_count %08x,"
+-				       " exited with %08x?\n", h - softirq_vec,
+-				       h->action, prev_count, preempt_count());
+-				preempt_count() = prev_count;
+-			}
++		debug_check_preempt_count_start(&preempt_count);
+ 
+-			rcu_bh_qsctr_inc(cpu);
++#if defined(CONFIG_PREEMPT_SOFTIRQS) && defined(CONFIG_PREEMPT_HARDIRQS)
++		/*
++		 * If executed by a same-prio hardirq thread
++		 * then skip pending softirqs that belong
++		 * to softirq threads with different priority:
++		 */
++		if (same_prio_only) {
++			struct task_struct *tsk;
++
++			tsk = __get_cpu_var(ksoftirqd)[softirq].tsk;
++			if (tsk && tsk->normal_prio != current->normal_prio) {
++				same_prio_skipped |= softirq_mask;
++				available_mask &= ~softirq_mask;
++				goto next;
++			}
+ 		}
++#endif
++		/*
++		 * Is this softirq already being processed?
++		 */
++		if (per_cpu(softirq_running, cpu) & softirq_mask) {
++			available_mask &= ~softirq_mask;
++			goto next;
++		}
++		per_cpu(softirq_running, cpu) |= softirq_mask;
++		local_irq_enable();
++
++		h->action(h);
++
++		debug_check_preempt_count_stop(&preempt_count, h);
++
++		rcu_bh_qsctr_inc(cpu);
++		cond_resched_softirq_context();
++		local_irq_disable();
++		per_cpu(softirq_running, cpu) &= ~softirq_mask;
++next:
+ 		h++;
++		softirq++;
+ 		pending >>= 1;
+ 	} while (pending);
+ 
+-	local_irq_disable();
+-
++	or_softirq_pending(same_prio_skipped);
+ 	pending = local_softirq_pending();
+-	if (pending && --max_restart)
+-		goto restart;
++	if (pending & available_mask) {
++		if (--max_restart)
++			goto restart;
++	}
+ 
+ 	if (pending)
+-		wakeup_softirqd();
++		trigger_softirqs();
++}
++
++asmlinkage void __do_softirq(void)
++{
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++	/*
++	 * 'preempt harder'. Push all softirq processing off to ksoftirqd.
++	 */
++	if (softirq_preemption) {
++		if (local_softirq_pending())
++			trigger_softirqs();
++		return;
++	}
++#endif
++	/*
++	 * 'immediate' softirq execution:
++	 */
++	__local_bh_disable((unsigned long)__builtin_return_address(0));
++	lockdep_softirq_enter();
++
++	___do_softirq(0);
+ 
+-	trace_softirq_exit();
++	lockdep_softirq_exit();
+ 
+ 	account_system_vtime(current);
+ 	_local_bh_enable();
++
+ }
+ 
+ #ifndef __ARCH_HAS_DO_SOFTIRQ
+@@ -286,7 +542,7 @@ void irq_exit(void)
+ 	if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
+ 		tick_nohz_stop_sched_tick(0);
+ #endif
+-	preempt_enable_no_resched();
++	__preempt_enable_no_resched();
+ }
+ 
+ /*
+@@ -294,19 +550,11 @@ void irq_exit(void)
+  */
+ inline void raise_softirq_irqoff(unsigned int nr)
+ {
+-	__raise_softirq_irqoff(nr);
++	__do_raise_softirq_irqoff(nr);
+ 
+-	/*
+-	 * If we're in an interrupt or softirq, we're done
+-	 * (this also catches softirq-disabled code). We will
+-	 * actually run the softirq once we return from
+-	 * the irq or softirq.
+-	 *
+-	 * Otherwise we wake up ksoftirqd to make sure we
+-	 * schedule the softirq soon.
+-	 */
+-	if (!in_interrupt())
+-		wakeup_softirqd();
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++	wakeup_softirqd(nr);
++#endif
+ }
+ 
+ void raise_softirq(unsigned int nr)
+@@ -333,15 +581,45 @@ struct tasklet_head
+ static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
+ static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
+ 
++static void inline
++__tasklet_common_schedule(struct tasklet_struct *t, struct tasklet_head *head, unsigned int nr)
++{
++	if (tasklet_trylock(t)) {
++again:
++		/* We may have been preempted before tasklet_trylock
++		 * and __tasklet_action may have already run.
++		 * So double check the sched bit while the takslet
++		 * is locked before adding it to the list.
++		 */
++		if (test_bit(TASKLET_STATE_SCHED, &t->state)) {
++			t->next = NULL;
++			*head->tail = t;
++			head->tail = &(t->next);
++			raise_softirq_irqoff(nr);
++			tasklet_unlock(t);
++		} else {
++			/* This is subtle. If we hit the corner case above
++			 * It is possible that we get preempted right here,
++			 * and another task has successfully called
++			 * tasklet_schedule(), then this function, and
++			 * failed on the trylock. Thus we must be sure
++			 * before releasing the tasklet lock, that the
++			 * SCHED_BIT is clear. Otherwise the tasklet
++			 * may get its SCHED_BIT set, but not added to the
++			 * list
++			 */
++			if (!tasklet_tryunlock(t))
++				goto again;
++		}
++	}
++}
++
+ void __tasklet_schedule(struct tasklet_struct *t)
+ {
+ 	unsigned long flags;
+ 
+ 	local_irq_save(flags);
+-	t->next = NULL;
+-	*__get_cpu_var(tasklet_vec).tail = t;
+-	__get_cpu_var(tasklet_vec).tail = &(t->next);
+-	raise_softirq_irqoff(TASKLET_SOFTIRQ);
++	__tasklet_common_schedule(t, &__get_cpu_var(tasklet_vec), TASKLET_SOFTIRQ);
+ 	local_irq_restore(flags);
+ }
+ 
+@@ -352,50 +630,127 @@ void __tasklet_hi_schedule(struct taskle
+ 	unsigned long flags;
+ 
+ 	local_irq_save(flags);
+-	t->next = NULL;
+-	*__get_cpu_var(tasklet_hi_vec).tail = t;
+-	__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
+-	raise_softirq_irqoff(HI_SOFTIRQ);
++	__tasklet_common_schedule(t, &__get_cpu_var(tasklet_hi_vec), HI_SOFTIRQ);
+ 	local_irq_restore(flags);
+ }
+ 
+ EXPORT_SYMBOL(__tasklet_hi_schedule);
+ 
+-static void tasklet_action(struct softirq_action *a)
++void __tasklet_hi_schedule_first(struct tasklet_struct *t)
+ {
+-	struct tasklet_struct *list;
++	__tasklet_hi_schedule(t);
++}
+ 
+-	local_irq_disable();
+-	list = __get_cpu_var(tasklet_vec).head;
+-	__get_cpu_var(tasklet_vec).head = NULL;
+-	__get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
+-	local_irq_enable();
++EXPORT_SYMBOL(__tasklet_hi_schedule_first);
++
++void  tasklet_enable(struct tasklet_struct *t)
++{
++	if (!atomic_dec_and_test(&t->count))
++		return;
++	if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
++		tasklet_schedule(t);
++}
++
++EXPORT_SYMBOL(tasklet_enable);
++
++void  tasklet_hi_enable(struct tasklet_struct *t)
++{
++	if (!atomic_dec_and_test(&t->count))
++		return;
++	if (test_and_clear_bit(TASKLET_STATE_PENDING, &t->state))
++		tasklet_hi_schedule(t);
++}
++
++EXPORT_SYMBOL(tasklet_hi_enable);
++
++static void
++__tasklet_action(struct softirq_action *a, struct tasklet_struct *list)
++{
++	int loops = 1000000;
+ 
+ 	while (list) {
+ 		struct tasklet_struct *t = list;
+ 
+ 		list = list->next;
+ 
+-		if (tasklet_trylock(t)) {
+-			if (!atomic_read(&t->count)) {
+-				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+-					BUG();
+-				t->func(t->data);
+-				tasklet_unlock(t);
+-				continue;
+-			}
+-			tasklet_unlock(t);
++		/*
++		 * Should always succeed - after a tasklist got on the
++		 * list (after getting the SCHED bit set from 0 to 1),
++		 * nothing but the tasklet softirq it got queued to can
++		 * lock it:
++		 */
++		if (!tasklet_trylock(t)) {
++			WARN_ON(1);
++			continue;
+ 		}
+ 
+-		local_irq_disable();
+ 		t->next = NULL;
+-		*__get_cpu_var(tasklet_vec).tail = t;
+-		__get_cpu_var(tasklet_vec).tail = &(t->next);
+-		__raise_softirq_irqoff(TASKLET_SOFTIRQ);
+-		local_irq_enable();
++
++		/*
++		 * If we cannot handle the tasklet because it's disabled,
++		 * mark it as pending. tasklet_enable() will later
++		 * re-schedule the tasklet.
++		 */
++		if (unlikely(atomic_read(&t->count))) {
++out_disabled:
++			/* implicit unlock: */
++			wmb();
++			t->state = TASKLET_STATEF_PENDING;
++			continue;
++		}
++
++		/*
++		 * After this point on the tasklet might be rescheduled
++		 * on another CPU, but it can only be added to another
++		 * CPU's tasklet list if we unlock the tasklet (which we
++		 * dont do yet).
++		 */
++		if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
++			WARN_ON(1);
++
++again:
++		t->func(t->data);
++
++		/*
++		 * Try to unlock the tasklet. We must use cmpxchg, because
++		 * another CPU might have scheduled or disabled the tasklet.
++		 * We only allow the STATE_RUN -> 0 transition here.
++		 */
++		while (!tasklet_tryunlock(t)) {
++			/*
++			 * If it got disabled meanwhile, bail out:
++			 */
++			if (atomic_read(&t->count))
++				goto out_disabled;
++			/*
++			 * If it got scheduled meanwhile, re-execute
++			 * the tasklet function:
++			 */
++			if (test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
++				goto again;
++			if (!--loops) {
++				printk("hm, tasklet state: %08lx\n", t->state);
++				WARN_ON(1);
++				tasklet_unlock(t);
++				break;
++			}
++		}
+ 	}
+ }
+ 
++static void tasklet_action(struct softirq_action *a)
++{
++	struct tasklet_struct *list;
++
++	local_irq_disable();
++	list = __get_cpu_var(tasklet_vec).head;
++	__get_cpu_var(tasklet_vec).head = NULL;
++	__get_cpu_var(tasklet_vec).tail = &__get_cpu_var(tasklet_vec).head;
++	local_irq_enable();
++
++	__tasklet_action(a, list);
++}
++
+ static void tasklet_hi_action(struct softirq_action *a)
+ {
+ 	struct tasklet_struct *list;
+@@ -406,29 +761,7 @@ static void tasklet_hi_action(struct sof
+ 	__get_cpu_var(tasklet_hi_vec).tail = &__get_cpu_var(tasklet_hi_vec).head;
+ 	local_irq_enable();
+ 
+-	while (list) {
+-		struct tasklet_struct *t = list;
+-
+-		list = list->next;
+-
+-		if (tasklet_trylock(t)) {
+-			if (!atomic_read(&t->count)) {
+-				if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
+-					BUG();
+-				t->func(t->data);
+-				tasklet_unlock(t);
+-				continue;
+-			}
+-			tasklet_unlock(t);
+-		}
+-
+-		local_irq_disable();
+-		t->next = NULL;
+-		*__get_cpu_var(tasklet_hi_vec).tail = t;
+-		__get_cpu_var(tasklet_hi_vec).tail = &(t->next);
+-		__raise_softirq_irqoff(HI_SOFTIRQ);
+-		local_irq_enable();
+-	}
++	__tasklet_action(a, list);
+ }
+ 
+ 
+@@ -451,7 +784,7 @@ void tasklet_kill(struct tasklet_struct 
+ 
+ 	while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
+ 		do
+-			yield();
++			msleep(1);
+ 		while (test_bit(TASKLET_STATE_SCHED, &t->state));
+ 	}
+ 	tasklet_unlock_wait(t);
+@@ -496,7 +829,7 @@ static int __try_remote_softirq(struct c
+ 		cp->flags = 0;
+ 		cp->priv = softirq;
+ 
+-		__smp_call_function_single(cpu, cp);
++		__smp_call_function_single(cpu, cp, 0);
+ 		return 0;
+ 	}
+ 	return 1;
+@@ -602,34 +935,100 @@ void __init softirq_init(void)
+ 	open_softirq(HI_SOFTIRQ, tasklet_hi_action);
+ }
+ 
+-static int ksoftirqd(void * __bind_cpu)
++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
++
++void tasklet_unlock_wait(struct tasklet_struct *t)
+ {
++	while (test_bit(TASKLET_STATE_RUN, &(t)->state)) {
++		/*
++		 * Hack for now to avoid this busy-loop:
++		 */
++#ifdef CONFIG_PREEMPT_RT
++		msleep(1);
++#else
++		barrier();
++#endif
++	}
++}
++EXPORT_SYMBOL(tasklet_unlock_wait);
++
++#endif
++
++static int ksoftirqd(void * __data)
++{
++	/* Priority needs to be below hardirqs */
++	struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 - 1};
++	struct softirqdata *data = __data;
++	u32 softirq_mask = (1 << data->nr);
++	struct softirq_action *h;
++	int cpu = data->cpu;
++
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++	init_waitqueue_head(&data->wait);
++#endif
++
++	sys_sched_setscheduler(current->pid, SCHED_FIFO, &param);
++	current->flags |= PF_SOFTIRQ;
+ 	set_current_state(TASK_INTERRUPTIBLE);
+ 
+ 	while (!kthread_should_stop()) {
+ 		preempt_disable();
+-		if (!local_softirq_pending()) {
+-			preempt_enable_no_resched();
++		if (!(local_softirq_pending() & softirq_mask)) {
++sleep_more:
++			__preempt_enable_no_resched();
+ 			schedule();
+ 			preempt_disable();
+ 		}
+ 
+ 		__set_current_state(TASK_RUNNING);
+ 
+-		while (local_softirq_pending()) {
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++		data->running = 1;
++#endif
++
++		while (local_softirq_pending() & softirq_mask) {
+ 			/* Preempt disable stops cpu going offline.
+ 			   If already offline, we'll be on wrong CPU:
+ 			   don't process */
+-			if (cpu_is_offline((long)__bind_cpu))
++			if (cpu_is_offline(cpu))
+ 				goto wait_to_die;
+-			do_softirq();
+-			preempt_enable_no_resched();
++
++			local_irq_disable();
++			/*
++			 * Is the softirq already being executed by
++			 * a hardirq context?
++			 */
++			if (per_cpu(softirq_running, cpu) & softirq_mask) {
++				local_irq_enable();
++				set_current_state(TASK_INTERRUPTIBLE);
++				goto sleep_more;
++			}
++			per_cpu(softirq_running, cpu) |= softirq_mask;
++			__preempt_enable_no_resched();
++			set_softirq_pending(local_softirq_pending() & ~softirq_mask);
++			local_bh_disable();
++			local_irq_enable();
++
++			h = &softirq_vec[data->nr];
++			if (h)
++				h->action(h);
++			rcu_bh_qsctr_inc(data->cpu);
++
++			local_irq_disable();
++			per_cpu(softirq_running, cpu) &= ~softirq_mask;
++			_local_bh_enable();
++			local_irq_enable();
++
+ 			cond_resched();
+ 			preempt_disable();
+-			rcu_qsctr_inc((long)__bind_cpu);
++			rcu_qsctr_inc(data->cpu);
+ 		}
+ 		preempt_enable();
+ 		set_current_state(TASK_INTERRUPTIBLE);
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++		data->running = 0;
++		wake_up(&data->wait);
++#endif
+ 	}
+ 	__set_current_state(TASK_RUNNING);
+ 	return 0;
+@@ -679,7 +1078,7 @@ void tasklet_kill_immediate(struct taskl
+ 	BUG();
+ }
+ 
+-static void takeover_tasklets(unsigned int cpu)
++void takeover_tasklets(unsigned int cpu)
+ {
+ 	/* CPU is dead, so no lock needed. */
+ 	local_irq_disable();
+@@ -705,49 +1104,83 @@ static void takeover_tasklets(unsigned i
+ }
+ #endif /* CONFIG_HOTPLUG_CPU */
+ 
++static const char *softirq_names [] =
++{
++  [HI_SOFTIRQ]		= "high",
++  [SCHED_SOFTIRQ]	= "sched",
++  [TIMER_SOFTIRQ]	= "timer",
++  [NET_TX_SOFTIRQ]	= "net-tx",
++  [NET_RX_SOFTIRQ]	= "net-rx",
++  [BLOCK_SOFTIRQ]	= "block",
++  [TASKLET_SOFTIRQ]	= "tasklet",
++#ifdef CONFIG_HIGH_RES_TIMERS
++  [HRTIMER_SOFTIRQ]	= "hrtimer",
++#endif
++  [RCU_SOFTIRQ]		= "rcu",
++};
++
+ static int __cpuinit cpu_callback(struct notifier_block *nfb,
+ 				  unsigned long action,
+ 				  void *hcpu)
+ {
+-	int hotcpu = (unsigned long)hcpu;
++	int hotcpu = (unsigned long)hcpu, i;
+ 	struct task_struct *p;
+ 
+ 	switch (action) {
+ 	case CPU_UP_PREPARE:
+ 	case CPU_UP_PREPARE_FROZEN:
+-		p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);
+-		if (IS_ERR(p)) {
+-			printk("ksoftirqd for %i failed\n", hotcpu);
+-			return NOTIFY_BAD;
+-		}
+-		kthread_bind(p, hotcpu);
+-  		per_cpu(ksoftirqd, hotcpu) = p;
+- 		break;
++		for (i = 0; i < MAX_SOFTIRQ; i++) {
++			per_cpu(ksoftirqd, hotcpu)[i].nr = i;
++			per_cpu(ksoftirqd, hotcpu)[i].cpu = hotcpu;
++			per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL;
++		}
++		for (i = 0; i < MAX_SOFTIRQ; i++) {
++			p = kthread_create(ksoftirqd,
++					   &per_cpu(ksoftirqd, hotcpu)[i],
++					   "sirq-%s/%d", softirq_names[i],
++					   hotcpu);
++			if (IS_ERR(p)) {
++				printk("ksoftirqd %d for %i failed\n", i,
++				       hotcpu);
++				return NOTIFY_BAD;
++			}
++			kthread_bind(p, hotcpu);
++			per_cpu(ksoftirqd, hotcpu)[i].tsk = p;
++		}
++		break;
++	break;
+ 	case CPU_ONLINE:
+ 	case CPU_ONLINE_FROZEN:
+-		wake_up_process(per_cpu(ksoftirqd, hotcpu));
++		for (i = 0; i < MAX_SOFTIRQ; i++)
++			wake_up_process(per_cpu(ksoftirqd, hotcpu)[i].tsk);
+ 		break;
+ #ifdef CONFIG_HOTPLUG_CPU
+ 	case CPU_UP_CANCELED:
+ 	case CPU_UP_CANCELED_FROZEN:
+-		if (!per_cpu(ksoftirqd, hotcpu))
+-			break;
+-		/* Unbind so it can run.  Fall thru. */
+-		kthread_bind(per_cpu(ksoftirqd, hotcpu),
+-			     cpumask_any(cpu_online_mask));
++#if 0
++		for (i = 0; i < MAX_SOFTIRQ; i++) {
++			if (!per_cpu(ksoftirqd, hotcpu)[i].tsk)
++				continue;
++			kthread_bind(per_cpu(ksoftirqd, hotcpu)[i].tsk,
++				     any_online_cpu(cpu_online_map));
++		}
++#endif
+ 	case CPU_DEAD:
+ 	case CPU_DEAD_FROZEN: {
+-		struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
++		struct sched_param param;
+ 
+-		p = per_cpu(ksoftirqd, hotcpu);
+-		per_cpu(ksoftirqd, hotcpu) = NULL;
+-		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
+-		kthread_stop(p);
++		for (i = 0; i < MAX_SOFTIRQ; i++) {
++			param.sched_priority = MAX_RT_PRIO-1;
++			p = per_cpu(ksoftirqd, hotcpu)[i].tsk;
++			sched_setscheduler(p, SCHED_FIFO, &param);
++			per_cpu(ksoftirqd, hotcpu)[i].tsk = NULL;
++			kthread_stop(p);
++		}
+ 		takeover_tasklets(hotcpu);
+ 		break;
+-	}
+-#endif /* CONFIG_HOTPLUG_CPU */
+  	}
++#endif /* CONFIG_HOTPLUG_CPU */
++	}
+ 	return NOTIFY_OK;
+ }
+ 
+@@ -767,6 +1200,34 @@ static __init int spawn_ksoftirqd(void)
+ }
+ early_initcall(spawn_ksoftirqd);
+ 
++
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++
++int softirq_preemption = 1;
++
++EXPORT_SYMBOL(softirq_preemption);
++
++/*
++ * Real-Time Preemption depends on softirq threading:
++ */
++#ifndef CONFIG_PREEMPT_RT
++
++static int __init softirq_preempt_setup (char *str)
++{
++	if (!strncmp(str, "off", 3))
++		softirq_preemption = 0;
++	else
++		get_option(&str, &softirq_preemption);
++	if (!softirq_preemption)
++		printk("turning off softirq preemption!\n");
++
++	return 1;
++}
++
++__setup("softirq-preempt=", softirq_preempt_setup);
++#endif
++#endif
++
+ #ifdef CONFIG_SMP
+ /*
+  * Call a function on all processors
+@@ -796,6 +1257,11 @@ int __init __weak early_irq_init(void)
+ 	return 0;
+ }
+ 
++int __init __weak arch_probe_nr_irqs(void)
++{
++	return 0;
++}
++
+ int __init __weak arch_early_irq_init(void)
+ {
+ 	return 0;
+Index: linux-2.6-tip/kernel/softlockup.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/softlockup.c
++++ linux-2.6-tip/kernel/softlockup.c
+@@ -20,7 +20,7 @@
+ 
+ #include <asm/irq_regs.h>
+ 
+-static DEFINE_SPINLOCK(print_lock);
++static DEFINE_RAW_SPINLOCK(print_lock);
+ 
+ static DEFINE_PER_CPU(unsigned long, touch_timestamp);
+ static DEFINE_PER_CPU(unsigned long, print_timestamp);
+@@ -166,97 +166,11 @@ void softlockup_tick(void)
+ }
+ 
+ /*
+- * Have a reasonable limit on the number of tasks checked:
+- */
+-unsigned long __read_mostly sysctl_hung_task_check_count = 1024;
+-
+-/*
+- * Zero means infinite timeout - no checking done:
+- */
+-unsigned long __read_mostly sysctl_hung_task_timeout_secs = 480;
+-
+-unsigned long __read_mostly sysctl_hung_task_warnings = 10;
+-
+-/*
+- * Only do the hung-tasks check on one CPU:
+- */
+-static int check_cpu __read_mostly = -1;
+-
+-static void check_hung_task(struct task_struct *t, unsigned long now)
+-{
+-	unsigned long switch_count = t->nvcsw + t->nivcsw;
+-
+-	if (t->flags & PF_FROZEN)
+-		return;
+-
+-	if (switch_count != t->last_switch_count || !t->last_switch_timestamp) {
+-		t->last_switch_count = switch_count;
+-		t->last_switch_timestamp = now;
+-		return;
+-	}
+-	if ((long)(now - t->last_switch_timestamp) <
+-					sysctl_hung_task_timeout_secs)
+-		return;
+-	if (!sysctl_hung_task_warnings)
+-		return;
+-	sysctl_hung_task_warnings--;
+-
+-	/*
+-	 * Ok, the task did not get scheduled for more than 2 minutes,
+-	 * complain:
+-	 */
+-	printk(KERN_ERR "INFO: task %s:%d blocked for more than "
+-			"%ld seconds.\n", t->comm, t->pid,
+-			sysctl_hung_task_timeout_secs);
+-	printk(KERN_ERR "\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
+-			" disables this message.\n");
+-	sched_show_task(t);
+-	__debug_show_held_locks(t);
+-
+-	t->last_switch_timestamp = now;
+-	touch_nmi_watchdog();
+-
+-	if (softlockup_panic)
+-		panic("softlockup: blocked tasks");
+-}
+-
+-/*
+- * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
+- * a really long time (120 seconds). If that happens, print out
+- * a warning.
+- */
+-static void check_hung_uninterruptible_tasks(int this_cpu)
+-{
+-	int max_count = sysctl_hung_task_check_count;
+-	unsigned long now = get_timestamp(this_cpu);
+-	struct task_struct *g, *t;
+-
+-	/*
+-	 * If the system crashed already then all bets are off,
+-	 * do not report extra hung tasks:
+-	 */
+-	if (test_taint(TAINT_DIE) || did_panic)
+-		return;
+-
+-	read_lock(&tasklist_lock);
+-	do_each_thread(g, t) {
+-		if (!--max_count)
+-			goto unlock;
+-		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
+-		if (t->state == TASK_UNINTERRUPTIBLE)
+-			check_hung_task(t, now);
+-	} while_each_thread(g, t);
+- unlock:
+-	read_unlock(&tasklist_lock);
+-}
+-
+-/*
+  * The watchdog thread - runs every second and touches the timestamp.
+  */
+ static int watchdog(void *__bind_cpu)
+ {
+ 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+-	int this_cpu = (long)__bind_cpu;
+ 
+ 	sched_setscheduler(current, SCHED_FIFO, &param);
+ 
+@@ -276,11 +190,6 @@ static int watchdog(void *__bind_cpu)
+ 		if (kthread_should_stop())
+ 			break;
+ 
+-		if (this_cpu == check_cpu) {
+-			if (sysctl_hung_task_timeout_secs)
+-				check_hung_uninterruptible_tasks(this_cpu);
+-		}
+-
+ 		set_current_state(TASK_INTERRUPTIBLE);
+ 	}
+ 	__set_current_state(TASK_RUNNING);
+@@ -312,18 +221,9 @@ cpu_callback(struct notifier_block *nfb,
+ 		break;
+ 	case CPU_ONLINE:
+ 	case CPU_ONLINE_FROZEN:
+-		check_cpu = cpumask_any(cpu_online_mask);
+ 		wake_up_process(per_cpu(watchdog_task, hotcpu));
+ 		break;
+ #ifdef CONFIG_HOTPLUG_CPU
+-	case CPU_DOWN_PREPARE:
+-	case CPU_DOWN_PREPARE_FROZEN:
+-		if (hotcpu == check_cpu) {
+-			/* Pick any other online cpu. */
+-			check_cpu = cpumask_any_but(cpu_online_mask, hotcpu);
+-		}
+-		break;
+-
+ 	case CPU_UP_CANCELED:
+ 	case CPU_UP_CANCELED_FROZEN:
+ 		if (!per_cpu(watchdog_task, hotcpu))
+Index: linux-2.6-tip/kernel/stop_machine.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/stop_machine.c
++++ linux-2.6-tip/kernel/stop_machine.c
+@@ -40,6 +40,8 @@ static atomic_t thread_ack;
+ static DEFINE_MUTEX(lock);
+ /* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
+ static DEFINE_MUTEX(setup_lock);
++/* do not start up until all worklets have been placed: */
++static DEFINE_MUTEX(startup_lock);
+ /* Users of stop_machine. */
+ static int refcount;
+ static struct workqueue_struct *stop_machine_wq;
+@@ -71,6 +73,15 @@ static void stop_cpu(struct work_struct 
+ 	int cpu = smp_processor_id();
+ 	int err;
+ 
++	/*
++	 * Wait for the startup loop to finish:
++	 */
++	mutex_lock(&startup_lock);
++	/*
++	 * Let other threads continue too:
++	 */
++	mutex_unlock(&startup_lock);
++
+ 	if (!active_cpus) {
+ 		if (cpu == cpumask_first(cpu_online_mask))
+ 			smdata = &active;
+@@ -166,16 +177,21 @@ int __stop_machine(int (*fn)(void *), vo
+ 
+ 	set_state(STOPMACHINE_PREPARE);
+ 
+-	/* Schedule the stop_cpu work on all cpus: hold this CPU so one
+-	 * doesn't hit this CPU until we're ready. */
+-	get_cpu();
++	/*
++	 * Schedule the stop_cpu work on all cpus before allowing any
++	 * of the CPUs to execute it:
++	 */
++	mutex_lock(&startup_lock);
++
+ 	for_each_online_cpu(i) {
+-		sm_work = percpu_ptr(stop_machine_work, i);
++		sm_work = per_cpu_ptr(stop_machine_work, i);
+ 		INIT_WORK(sm_work, stop_cpu);
+ 		queue_work_on(i, stop_machine_wq, sm_work);
+ 	}
+-	/* This will release the thread on our CPU. */
+-	put_cpu();
++
++	/* This will release the thread on all CPUs: */
++	mutex_unlock(&startup_lock);
++
+ 	flush_workqueue(stop_machine_wq);
+ 	ret = active.fnret;
+ 	mutex_unlock(&lock);
+Index: linux-2.6-tip/kernel/sys.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/sys.c
++++ linux-2.6-tip/kernel/sys.c
+@@ -14,6 +14,7 @@
+ #include <linux/prctl.h>
+ #include <linux/highuid.h>
+ #include <linux/fs.h>
++#include <linux/perf_counter.h>
+ #include <linux/resource.h>
+ #include <linux/kernel.h>
+ #include <linux/kexec.h>
+@@ -32,11 +33,13 @@
+ #include <linux/getcpu.h>
+ #include <linux/task_io_accounting_ops.h>
+ #include <linux/seccomp.h>
++#include <linux/hardirq.h>
+ #include <linux/cpu.h>
+ #include <linux/ptrace.h>
+ 
+ #include <linux/compat.h>
+ #include <linux/syscalls.h>
++#include <linux/rt_lock.h>
+ #include <linux/kprobes.h>
+ #include <linux/user_namespace.h>
+ 
+@@ -278,6 +281,15 @@ out_unlock:
+  */
+ void emergency_restart(void)
+ {
++	/*
++	 * Call the notifier chain if we are not in an
++	 * atomic context:
++	 */
++#ifdef CONFIG_PREEMPT
++	if (!in_atomic() && !irqs_disabled())
++		blocking_notifier_call_chain(&reboot_notifier_list,
++					     SYS_RESTART, NULL);
++#endif
+ 	machine_emergency_restart();
+ }
+ EXPORT_SYMBOL_GPL(emergency_restart);
+@@ -1800,6 +1812,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsi
+ 		case PR_SET_TSC:
+ 			error = SET_TSC_CTL(arg2);
+ 			break;
++		case PR_TASK_PERF_COUNTERS_DISABLE:
++			error = perf_counter_task_disable();
++			break;
++		case PR_TASK_PERF_COUNTERS_ENABLE:
++			error = perf_counter_task_enable();
++			break;
+ 		case PR_GET_TIMERSLACK:
+ 			error = current->timer_slack_ns;
+ 			break;
+Index: linux-2.6-tip/kernel/sys_ni.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/sys_ni.c
++++ linux-2.6-tip/kernel/sys_ni.c
+@@ -175,3 +175,6 @@ cond_syscall(compat_sys_timerfd_settime)
+ cond_syscall(compat_sys_timerfd_gettime);
+ cond_syscall(sys_eventfd);
+ cond_syscall(sys_eventfd2);
++
++/* performance counters: */
++cond_syscall(sys_perf_counter_open);
+Index: linux-2.6-tip/kernel/sysctl.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/sysctl.c
++++ linux-2.6-tip/kernel/sysctl.c
+@@ -27,6 +27,7 @@
+ #include <linux/security.h>
+ #include <linux/ctype.h>
+ #include <linux/utsname.h>
++#include <linux/kmemcheck.h>
+ #include <linux/smp_lock.h>
+ #include <linux/fs.h>
+ #include <linux/init.h>
+@@ -812,6 +813,19 @@ static struct ctl_table kern_table[] = {
+ 		.extra1		= &neg_one,
+ 		.extra2		= &sixty,
+ 	},
++#endif
++#ifdef CONFIG_DETECT_HUNG_TASK
++	{
++		.ctl_name	= CTL_UNNUMBERED,
++		.procname	= "hung_task_panic",
++		.data		= &sysctl_hung_task_panic,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec_minmax,
++		.strategy	= &sysctl_intvec,
++		.extra1		= &zero,
++		.extra2		= &one,
++	},
+ 	{
+ 		.ctl_name	= CTL_UNNUMBERED,
+ 		.procname	= "hung_task_check_count",
+@@ -827,7 +841,7 @@ static struct ctl_table kern_table[] = {
+ 		.data		= &sysctl_hung_task_timeout_secs,
+ 		.maxlen		= sizeof(unsigned long),
+ 		.mode		= 0644,
+-		.proc_handler	= &proc_doulongvec_minmax,
++		.proc_handler	= &proc_dohung_task_timeout_secs,
+ 		.strategy	= &sysctl_intvec,
+ 	},
+ 	{
+@@ -887,6 +901,16 @@ static struct ctl_table kern_table[] = {
+ 		.proc_handler   = &proc_dointvec,
+ 	},
+ #endif
++#ifdef CONFIG_KMEMCHECK
++	{
++		.ctl_name	= CTL_UNNUMBERED,
++		.procname	= "kmemcheck",
++		.data		= &kmemcheck_enabled,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
++#endif
+ #ifdef CONFIG_UNEVICTABLE_LRU
+ 	{
+ 		.ctl_name	= CTL_UNNUMBERED,
+Index: linux-2.6-tip/kernel/time/clockevents.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/time/clockevents.c
++++ linux-2.6-tip/kernel/time/clockevents.c
+@@ -27,7 +27,7 @@ static LIST_HEAD(clockevents_released);
+ static RAW_NOTIFIER_HEAD(clockevents_chain);
+ 
+ /* Protection for the above */
+-static DEFINE_SPINLOCK(clockevents_lock);
++static DEFINE_RAW_SPINLOCK(clockevents_lock);
+ 
+ /**
+  * clockevents_delta2ns - Convert a latch value (device ticks) to nanoseconds
+@@ -68,6 +68,17 @@ void clockevents_set_mode(struct clock_e
+ 	if (dev->mode != mode) {
+ 		dev->set_mode(mode, dev);
+ 		dev->mode = mode;
++
++		/*
++		 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
++		 * on it, so fix it up and emit a warning:
++		 */
++		if (mode == CLOCK_EVT_MODE_ONESHOT) {
++			if (unlikely(!dev->mult)) {
++				dev->mult = 1;
++				WARN_ON(1);
++			}
++		}
+ 	}
+ }
+ 
+@@ -168,15 +179,6 @@ void clockevents_register_device(struct 
+ 	BUG_ON(dev->mode != CLOCK_EVT_MODE_UNUSED);
+ 	BUG_ON(!dev->cpumask);
+ 
+-	/*
+-	 * A nsec2cyc multiplicator of 0 is invalid and we'd crash
+-	 * on it, so fix it up and emit a warning:
+-	 */
+-	if (unlikely(!dev->mult)) {
+-		dev->mult = 1;
+-		WARN_ON(1);
+-	}
+-
+ 	spin_lock(&clockevents_lock);
+ 
+ 	list_add(&dev->list, &clockevent_devices);
+Index: linux-2.6-tip/kernel/time/ntp.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/time/ntp.c
++++ linux-2.6-tip/kernel/time/ntp.c
+@@ -1,71 +1,129 @@
+ /*
+- * linux/kernel/time/ntp.c
+- *
+  * NTP state machine interfaces and logic.
+  *
+  * This code was mainly moved from kernel/timer.c and kernel/time.c
+  * Please see those files for relevant copyright info and historical
+  * changelogs.
+  */
+-
+-#include <linux/mm.h>
+-#include <linux/time.h>
+-#include <linux/timex.h>
+-#include <linux/jiffies.h>
+-#include <linux/hrtimer.h>
+ #include <linux/capability.h>
+-#include <linux/math64.h>
+ #include <linux/clocksource.h>
+ #include <linux/workqueue.h>
+-#include <asm/timex.h>
++#include <linux/hrtimer.h>
++#include <linux/jiffies.h>
++#include <linux/math64.h>
++#include <linux/timex.h>
++#include <linux/time.h>
++#include <linux/mm.h>
+ 
+ /*
+- * Timekeeping variables
++ * NTP timekeeping variables:
+  */
+-unsigned long tick_usec = TICK_USEC; 		/* USER_HZ period (usec) */
+-unsigned long tick_nsec;			/* ACTHZ period (nsec) */
+-u64 tick_length;
+-static u64 tick_length_base;
+ 
+-static struct hrtimer leap_timer;
++/* USER_HZ period (usecs): */
++unsigned long			tick_usec = TICK_USEC;
++
++/* ACTHZ period (nsecs): */
++unsigned long			tick_nsec;
++
++u64				tick_length;
++static u64			tick_length_base;
++
++static struct hrtimer		leap_timer;
+ 
+-#define MAX_TICKADJ		500		/* microsecs */
+-#define MAX_TICKADJ_SCALED	(((u64)(MAX_TICKADJ * NSEC_PER_USEC) << \
+-				  NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
++#define MAX_TICKADJ		500LL		/* usecs */
++#define MAX_TICKADJ_SCALED \
++	(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
+ 
+ /*
+  * phase-lock loop variables
+  */
+-/* TIME_ERROR prevents overwriting the CMOS clock */
+-static int time_state = TIME_OK;	/* clock synchronization status	*/
+-int time_status = STA_UNSYNC;		/* clock status bits		*/
+-static long time_tai;			/* TAI offset (s)		*/
+-static s64 time_offset;			/* time adjustment (ns)		*/
+-static long time_constant = 2;		/* pll time constant		*/
+-long time_maxerror = NTP_PHASE_LIMIT;	/* maximum error (us)		*/
+-long time_esterror = NTP_PHASE_LIMIT;	/* estimated error (us)		*/
+-static s64 time_freq;			/* frequency offset (scaled ns/s)*/
+-static long time_reftime;		/* time at last adjustment (s)	*/
+-long time_adjust;
+-static long ntp_tick_adj;
+ 
++/*
++ * clock synchronization status
++ *
++ * (TIME_ERROR prevents overwriting the CMOS clock)
++ */
++static int			time_state = TIME_OK;
++
++/* clock status bits:							*/
++int				time_status = STA_UNSYNC;
++
++/* TAI offset (secs):							*/
++static long			time_tai;
++
++/* time adjustment (nsecs):						*/
++static s64			time_offset;
++
++/* pll time constant:							*/
++static long			time_constant = 2;
++
++/* maximum error (usecs):						*/
++long				time_maxerror = NTP_PHASE_LIMIT;
++
++/* estimated error (usecs):						*/
++long				time_esterror = NTP_PHASE_LIMIT;
++
++/* frequency offset (scaled nsecs/secs):				*/
++static s64			time_freq;
++
++/* time at last adjustment (secs):					*/
++static long			time_reftime;
++
++long				time_adjust;
++
++/* constant (boot-param configurable) NTP tick adjustment (upscaled)	*/
++static s64			ntp_tick_adj;
++
++/*
++ * NTP methods:
++ */
++
++/*
++ * Update (tick_length, tick_length_base, tick_nsec), based
++ * on (tick_usec, ntp_tick_adj, time_freq):
++ */
+ static void ntp_update_frequency(void)
+ {
+-	u64 second_length = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
+-				<< NTP_SCALE_SHIFT;
+-	second_length += (s64)ntp_tick_adj << NTP_SCALE_SHIFT;
+-	second_length += time_freq;
++	u64 second_length;
++	u64 new_base;
++
++	second_length		 = (u64)(tick_usec * NSEC_PER_USEC * USER_HZ)
++						<< NTP_SCALE_SHIFT;
+ 
+-	tick_length_base = second_length;
++	second_length		+= ntp_tick_adj;
++	second_length		+= time_freq;
+ 
+-	tick_nsec = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
+-	tick_length_base = div_u64(tick_length_base, NTP_INTERVAL_FREQ);
++	tick_nsec		 = div_u64(second_length, HZ) >> NTP_SCALE_SHIFT;
++	new_base		 = div_u64(second_length, NTP_INTERVAL_FREQ);
++
++	/*
++	 * Don't wait for the next second_overflow, apply
++	 * the change to the tick length immediately:
++	 */
++	tick_length		+= new_base - tick_length_base;
++	tick_length_base	 = new_base;
++}
++
++static inline s64 ntp_update_offset_fll(s64 offset64, long secs)
++{
++	time_status &= ~STA_MODE;
++
++	if (secs < MINSEC)
++		return 0;
++
++	if (!(time_status & STA_FLL) && (secs <= MAXSEC))
++		return 0;
++
++	time_status |= STA_MODE;
++
++	return div_s64(offset64 << (NTP_SCALE_SHIFT - SHIFT_FLL), secs);
+ }
+ 
+ static void ntp_update_offset(long offset)
+ {
+-	long mtemp;
+ 	s64 freq_adj;
++	s64 offset64;
++	long secs;
+ 
+ 	if (!(time_status & STA_PLL))
+ 		return;
+@@ -84,24 +142,23 @@ static void ntp_update_offset(long offse
+ 	 * Select how the frequency is to be controlled
+ 	 * and in which mode (PLL or FLL).
+ 	 */
+-	if (time_status & STA_FREQHOLD || time_reftime == 0)
+-		time_reftime = xtime.tv_sec;
+-	mtemp = xtime.tv_sec - time_reftime;
++	secs = xtime.tv_sec - time_reftime;
++	if (unlikely(time_status & STA_FREQHOLD))
++		secs = 0;
++
+ 	time_reftime = xtime.tv_sec;
+ 
+-	freq_adj = (s64)offset * mtemp;
+-	freq_adj <<= NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant);
+-	time_status &= ~STA_MODE;
+-	if (mtemp >= MINSEC && (time_status & STA_FLL || mtemp > MAXSEC)) {
+-		freq_adj += div_s64((s64)offset << (NTP_SCALE_SHIFT - SHIFT_FLL),
+-				    mtemp);
+-		time_status |= STA_MODE;
+-	}
+-	freq_adj += time_freq;
+-	freq_adj = min(freq_adj, MAXFREQ_SCALED);
+-	time_freq = max(freq_adj, -MAXFREQ_SCALED);
++	offset64    = offset;
++	freq_adj    = (offset64 * secs) <<
++			(NTP_SCALE_SHIFT - 2 * (SHIFT_PLL + 2 + time_constant));
+ 
+-	time_offset = div_s64((s64)offset << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
++	freq_adj    += ntp_update_offset_fll(offset64, secs);
++
++	freq_adj    = min(freq_adj + time_freq, MAXFREQ_SCALED);
++
++	time_freq   = max(freq_adj, -MAXFREQ_SCALED);
++
++	time_offset = div_s64(offset64 << NTP_SCALE_SHIFT, NTP_INTERVAL_FREQ);
+ }
+ 
+ /**
+@@ -111,15 +168,15 @@ static void ntp_update_offset(long offse
+  */
+ void ntp_clear(void)
+ {
+-	time_adjust = 0;		/* stop active adjtime() */
+-	time_status |= STA_UNSYNC;
+-	time_maxerror = NTP_PHASE_LIMIT;
+-	time_esterror = NTP_PHASE_LIMIT;
++	time_adjust	= 0;		/* stop active adjtime() */
++	time_status	|= STA_UNSYNC;
++	time_maxerror	= NTP_PHASE_LIMIT;
++	time_esterror	= NTP_PHASE_LIMIT;
+ 
+ 	ntp_update_frequency();
+ 
+-	tick_length = tick_length_base;
+-	time_offset = 0;
++	tick_length	= tick_length_base;
++	time_offset	= 0;
+ }
+ 
+ /*
+@@ -140,8 +197,8 @@ static enum hrtimer_restart ntp_leap_sec
+ 		xtime.tv_sec--;
+ 		wall_to_monotonic.tv_sec++;
+ 		time_state = TIME_OOP;
+-		printk(KERN_NOTICE "Clock: "
+-		       "inserting leap second 23:59:60 UTC\n");
++		printk(KERN_NOTICE
++			"Clock: inserting leap second 23:59:60 UTC\n");
+ 		hrtimer_add_expires_ns(&leap_timer, NSEC_PER_SEC);
+ 		res = HRTIMER_RESTART;
+ 		break;
+@@ -150,8 +207,8 @@ static enum hrtimer_restart ntp_leap_sec
+ 		time_tai--;
+ 		wall_to_monotonic.tv_sec--;
+ 		time_state = TIME_WAIT;
+-		printk(KERN_NOTICE "Clock: "
+-		       "deleting leap second 23:59:59 UTC\n");
++		printk(KERN_NOTICE
++			"Clock: deleting leap second 23:59:59 UTC\n");
+ 		break;
+ 	case TIME_OOP:
+ 		time_tai++;
+@@ -179,7 +236,7 @@ static enum hrtimer_restart ntp_leap_sec
+  */
+ void second_overflow(void)
+ {
+-	s64 time_adj;
++	s64 delta;
+ 
+ 	/* Bump the maxerror field */
+ 	time_maxerror += MAXFREQ / NSEC_PER_USEC;
+@@ -192,24 +249,30 @@ void second_overflow(void)
+ 	 * Compute the phase adjustment for the next second. The offset is
+ 	 * reduced by a fixed factor times the time constant.
+ 	 */
+-	tick_length = tick_length_base;
+-	time_adj = shift_right(time_offset, SHIFT_PLL + time_constant);
+-	time_offset -= time_adj;
+-	tick_length += time_adj;
+-
+-	if (unlikely(time_adjust)) {
+-		if (time_adjust > MAX_TICKADJ) {
+-			time_adjust -= MAX_TICKADJ;
+-			tick_length += MAX_TICKADJ_SCALED;
+-		} else if (time_adjust < -MAX_TICKADJ) {
+-			time_adjust += MAX_TICKADJ;
+-			tick_length -= MAX_TICKADJ_SCALED;
+-		} else {
+-			tick_length += (s64)(time_adjust * NSEC_PER_USEC /
+-					NTP_INTERVAL_FREQ) << NTP_SCALE_SHIFT;
+-			time_adjust = 0;
+-		}
++	tick_length	 = tick_length_base;
++
++	delta		 = shift_right(time_offset, SHIFT_PLL + time_constant);
++	time_offset	-= delta;
++	tick_length	+= delta;
++
++	if (!time_adjust)
++		return;
++
++	if (time_adjust > MAX_TICKADJ) {
++		time_adjust -= MAX_TICKADJ;
++		tick_length += MAX_TICKADJ_SCALED;
++		return;
+ 	}
++
++	if (time_adjust < -MAX_TICKADJ) {
++		time_adjust += MAX_TICKADJ;
++		tick_length -= MAX_TICKADJ_SCALED;
++		return;
++	}
++
++	tick_length += (s64)(time_adjust * NSEC_PER_USEC / NTP_INTERVAL_FREQ)
++							 << NTP_SCALE_SHIFT;
++	time_adjust = 0;
+ }
+ 
+ #ifdef CONFIG_GENERIC_CMOS_UPDATE
+@@ -233,12 +296,13 @@ static void sync_cmos_clock(struct work_
+ 	 * This code is run on a timer.  If the clock is set, that timer
+ 	 * may not expire at the correct time.  Thus, we adjust...
+ 	 */
+-	if (!ntp_synced())
++	if (!ntp_synced()) {
+ 		/*
+ 		 * Not synced, exit, do not restart a timer (if one is
+ 		 * running, let it run out).
+ 		 */
+ 		return;
++	}
+ 
+ 	getnstimeofday(&now);
+ 	if (abs(now.tv_nsec - (NSEC_PER_SEC / 2)) <= tick_nsec / 2)
+@@ -270,7 +334,116 @@ static void notify_cmos_timer(void)
+ static inline void notify_cmos_timer(void) { }
+ #endif
+ 
+-/* adjtimex mainly allows reading (and writing, if superuser) of
++/*
++ * Start the leap seconds timer:
++ */
++static inline void ntp_start_leap_timer(struct timespec *ts)
++{
++	long now = ts->tv_sec;
++
++	if (time_status & STA_INS) {
++		time_state = TIME_INS;
++		now += 86400 - now % 86400;
++		hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
++
++		return;
++	}
++
++	if (time_status & STA_DEL) {
++		time_state = TIME_DEL;
++		now += 86400 - (now + 1) % 86400;
++		hrtimer_start(&leap_timer, ktime_set(now, 0), HRTIMER_MODE_ABS);
++	}
++}
++
++/*
++ * Propagate a new txc->status value into the NTP state:
++ */
++static inline void process_adj_status(struct timex *txc, struct timespec *ts)
++{
++	if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
++		time_state = TIME_OK;
++		time_status = STA_UNSYNC;
++	}
++
++	/*
++	 * If we turn on PLL adjustments then reset the
++	 * reference time to current time.
++	 */
++	if (!(time_status & STA_PLL) && (txc->status & STA_PLL))
++		time_reftime = xtime.tv_sec;
++
++	/* only set allowed bits */
++	time_status &= STA_RONLY;
++	time_status |= txc->status & ~STA_RONLY;
++
++	switch (time_state) {
++	case TIME_OK:
++		ntp_start_leap_timer(ts);
++		break;
++	case TIME_INS:
++	case TIME_DEL:
++		time_state = TIME_OK;
++		ntp_start_leap_timer(ts);
++	case TIME_WAIT:
++		if (!(time_status & (STA_INS | STA_DEL)))
++			time_state = TIME_OK;
++		break;
++	case TIME_OOP:
++		hrtimer_restart(&leap_timer);
++		break;
++	}
++}
++/*
++ * Called with the xtime lock held, so we can access and modify
++ * all the global NTP state:
++ */
++static inline void process_adjtimex_modes(struct timex *txc, struct timespec *ts)
++{
++	if (txc->modes & ADJ_STATUS)
++		process_adj_status(txc, ts);
++
++	if (txc->modes & ADJ_NANO)
++		time_status |= STA_NANO;
++
++	if (txc->modes & ADJ_MICRO)
++		time_status &= ~STA_NANO;
++
++	if (txc->modes & ADJ_FREQUENCY) {
++		time_freq = txc->freq * PPM_SCALE;
++		time_freq = min(time_freq, MAXFREQ_SCALED);
++		time_freq = max(time_freq, -MAXFREQ_SCALED);
++	}
++
++	if (txc->modes & ADJ_MAXERROR)
++		time_maxerror = txc->maxerror;
++
++	if (txc->modes & ADJ_ESTERROR)
++		time_esterror = txc->esterror;
++
++	if (txc->modes & ADJ_TIMECONST) {
++		time_constant = txc->constant;
++		if (!(time_status & STA_NANO))
++			time_constant += 4;
++		time_constant = min(time_constant, (long)MAXTC);
++		time_constant = max(time_constant, 0l);
++	}
++
++	if (txc->modes & ADJ_TAI && txc->constant > 0)
++		time_tai = txc->constant;
++
++	if (txc->modes & ADJ_OFFSET)
++		ntp_update_offset(txc->offset);
++
++	if (txc->modes & ADJ_TICK)
++		tick_usec = txc->tick;
++
++	if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
++		ntp_update_frequency();
++}
++
++/*
++ * adjtimex mainly allows reading (and writing, if superuser) of
+  * kernel time-keeping variables. used by xntpd.
+  */
+ int do_adjtimex(struct timex *txc)
+@@ -291,11 +464,14 @@ int do_adjtimex(struct timex *txc)
+ 		 if (txc->modes && !capable(CAP_SYS_TIME))
+ 			return -EPERM;
+ 
+-		/* if the quartz is off by more than 10% something is VERY wrong! */
++		/*
++		 * if the quartz is off by more than 10% then
++		 * something is VERY wrong!
++		 */
+ 		if (txc->modes & ADJ_TICK &&
+ 		    (txc->tick <  900000/USER_HZ ||
+ 		     txc->tick > 1100000/USER_HZ))
+-				return -EINVAL;
++			return -EINVAL;
+ 
+ 		if (txc->modes & ADJ_STATUS && time_state != TIME_OK)
+ 			hrtimer_cancel(&leap_timer);
+@@ -305,7 +481,6 @@ int do_adjtimex(struct timex *txc)
+ 
+ 	write_seqlock_irq(&xtime_lock);
+ 
+-	/* If there are input parameters, then process them */
+ 	if (txc->modes & ADJ_ADJTIME) {
+ 		long save_adjust = time_adjust;
+ 
+@@ -315,98 +490,24 @@ int do_adjtimex(struct timex *txc)
+ 			ntp_update_frequency();
+ 		}
+ 		txc->offset = save_adjust;
+-		goto adj_done;
+-	}
+-	if (txc->modes) {
+-		long sec;
+-
+-		if (txc->modes & ADJ_STATUS) {
+-			if ((time_status & STA_PLL) &&
+-			    !(txc->status & STA_PLL)) {
+-				time_state = TIME_OK;
+-				time_status = STA_UNSYNC;
+-			}
+-			/* only set allowed bits */
+-			time_status &= STA_RONLY;
+-			time_status |= txc->status & ~STA_RONLY;
+-
+-			switch (time_state) {
+-			case TIME_OK:
+-			start_timer:
+-				sec = ts.tv_sec;
+-				if (time_status & STA_INS) {
+-					time_state = TIME_INS;
+-					sec += 86400 - sec % 86400;
+-					hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS);
+-				} else if (time_status & STA_DEL) {
+-					time_state = TIME_DEL;
+-					sec += 86400 - (sec + 1) % 86400;
+-					hrtimer_start(&leap_timer, ktime_set(sec, 0), HRTIMER_MODE_ABS);
+-				}
+-				break;
+-			case TIME_INS:
+-			case TIME_DEL:
+-				time_state = TIME_OK;
+-				goto start_timer;
+-				break;
+-			case TIME_WAIT:
+-				if (!(time_status & (STA_INS | STA_DEL)))
+-					time_state = TIME_OK;
+-				break;
+-			case TIME_OOP:
+-				hrtimer_restart(&leap_timer);
+-				break;
+-			}
+-		}
+-
+-		if (txc->modes & ADJ_NANO)
+-			time_status |= STA_NANO;
+-		if (txc->modes & ADJ_MICRO)
+-			time_status &= ~STA_NANO;
+-
+-		if (txc->modes & ADJ_FREQUENCY) {
+-			time_freq = (s64)txc->freq * PPM_SCALE;
+-			time_freq = min(time_freq, MAXFREQ_SCALED);
+-			time_freq = max(time_freq, -MAXFREQ_SCALED);
+-		}
+-
+-		if (txc->modes & ADJ_MAXERROR)
+-			time_maxerror = txc->maxerror;
+-		if (txc->modes & ADJ_ESTERROR)
+-			time_esterror = txc->esterror;
+-
+-		if (txc->modes & ADJ_TIMECONST) {
+-			time_constant = txc->constant;
+-			if (!(time_status & STA_NANO))
+-				time_constant += 4;
+-			time_constant = min(time_constant, (long)MAXTC);
+-			time_constant = max(time_constant, 0l);
+-		}
+-
+-		if (txc->modes & ADJ_TAI && txc->constant > 0)
+-			time_tai = txc->constant;
+-
+-		if (txc->modes & ADJ_OFFSET)
+-			ntp_update_offset(txc->offset);
+-		if (txc->modes & ADJ_TICK)
+-			tick_usec = txc->tick;
++	} else {
+ 
+-		if (txc->modes & (ADJ_TICK|ADJ_FREQUENCY|ADJ_OFFSET))
+-			ntp_update_frequency();
+-	}
++		/* If there are input parameters, then process them: */
++		if (txc->modes)
++			process_adjtimex_modes(txc, &ts);
+ 
+-	txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
++		txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
+ 				  NTP_SCALE_SHIFT);
+-	if (!(time_status & STA_NANO))
+-		txc->offset /= NSEC_PER_USEC;
++		if (!(time_status & STA_NANO))
++			txc->offset /= NSEC_PER_USEC;
++	}
+ 
+-adj_done:
+ 	result = time_state;	/* mostly `TIME_OK' */
+ 	if (time_status & (STA_UNSYNC|STA_CLOCKERR))
+ 		result = TIME_ERROR;
+ 
+ 	txc->freq	   = shift_right((time_freq >> PPM_SCALE_INV_SHIFT) *
+-					 (s64)PPM_SCALE_INV, NTP_SCALE_SHIFT);
++					 PPM_SCALE_INV, NTP_SCALE_SHIFT);
+ 	txc->maxerror	   = time_maxerror;
+ 	txc->esterror	   = time_esterror;
+ 	txc->status	   = time_status;
+@@ -425,6 +526,7 @@ adj_done:
+ 	txc->calcnt	   = 0;
+ 	txc->errcnt	   = 0;
+ 	txc->stbcnt	   = 0;
++
+ 	write_sequnlock_irq(&xtime_lock);
+ 
+ 	txc->time.tv_sec = ts.tv_sec;
+@@ -440,6 +542,8 @@ adj_done:
+ static int __init ntp_tick_adj_setup(char *str)
+ {
+ 	ntp_tick_adj = simple_strtol(str, NULL, 0);
++	ntp_tick_adj <<= NTP_SCALE_SHIFT;
++
+ 	return 1;
+ }
+ 
+Index: linux-2.6-tip/kernel/time/timekeeping.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/time/timekeeping.c
++++ linux-2.6-tip/kernel/time/timekeeping.c
+@@ -22,9 +22,9 @@
+ 
+ /*
+  * This read-write spinlock protects us from races in SMP while
+- * playing with xtime and avenrun.
++ * playing with xtime.
+  */
+-__cacheline_aligned_in_smp DEFINE_SEQLOCK(xtime_lock);
++__cacheline_aligned_in_smp DEFINE_RAW_SEQLOCK(xtime_lock);
+ 
+ 
+ /*
+Index: linux-2.6-tip/kernel/timer.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/timer.c
++++ linux-2.6-tip/kernel/timer.c
+@@ -34,6 +34,7 @@
+ #include <linux/posix-timers.h>
+ #include <linux/cpu.h>
+ #include <linux/syscalls.h>
++#include <linux/kallsyms.h>
+ #include <linux/delay.h>
+ #include <linux/tick.h>
+ #include <linux/kallsyms.h>
+@@ -69,6 +70,7 @@ struct tvec_root {
+ struct tvec_base {
+ 	spinlock_t lock;
+ 	struct timer_list *running_timer;
++	wait_queue_head_t wait_for_running_timer;
+ 	unsigned long timer_jiffies;
+ 	struct tvec_root tv1;
+ 	struct tvec tv2;
+@@ -316,9 +318,7 @@ EXPORT_SYMBOL_GPL(round_jiffies_up_relat
+ static inline void set_running_timer(struct tvec_base *base,
+ 					struct timer_list *timer)
+ {
+-#ifdef CONFIG_SMP
+ 	base->running_timer = timer;
+-#endif
+ }
+ 
+ static void internal_add_timer(struct tvec_base *base, struct timer_list *timer)
+@@ -491,14 +491,18 @@ static inline void debug_timer_free(stru
+ 	debug_object_free(timer, &timer_debug_descr);
+ }
+ 
+-static void __init_timer(struct timer_list *timer);
+-
+-void init_timer_on_stack(struct timer_list *timer)
++static void __init_timer(struct timer_list *timer,
++			 const char *name,
++			 struct lock_class_key *key);
++
++void init_timer_on_stack_key(struct timer_list *timer,
++			     const char *name,
++			     struct lock_class_key *key)
+ {
+ 	debug_object_init_on_stack(timer, &timer_debug_descr);
+-	__init_timer(timer);
++	__init_timer(timer, name, key);
+ }
+-EXPORT_SYMBOL_GPL(init_timer_on_stack);
++EXPORT_SYMBOL_GPL(init_timer_on_stack_key);
+ 
+ void destroy_timer_on_stack(struct timer_list *timer)
+ {
+@@ -512,7 +516,9 @@ static inline void debug_timer_activate(
+ static inline void debug_timer_deactivate(struct timer_list *timer) { }
+ #endif
+ 
+-static void __init_timer(struct timer_list *timer)
++static void __init_timer(struct timer_list *timer,
++			 const char *name,
++			 struct lock_class_key *key)
+ {
+ 	timer->entry.next = NULL;
+ 	timer->base = __raw_get_cpu_var(tvec_bases);
+@@ -521,6 +527,7 @@ static void __init_timer(struct timer_li
+ 	timer->start_pid = -1;
+ 	memset(timer->start_comm, 0, TASK_COMM_LEN);
+ #endif
++	lockdep_init_map(&timer->lockdep_map, name, key, 0);
+ }
+ 
+ /**
+@@ -530,19 +537,23 @@ static void __init_timer(struct timer_li
+  * init_timer() must be done to a timer prior calling *any* of the
+  * other timer functions.
+  */
+-void init_timer(struct timer_list *timer)
++void init_timer_key(struct timer_list *timer,
++		    const char *name,
++		    struct lock_class_key *key)
+ {
+ 	debug_timer_init(timer);
+-	__init_timer(timer);
++	__init_timer(timer, name, key);
+ }
+-EXPORT_SYMBOL(init_timer);
++EXPORT_SYMBOL(init_timer_key);
+ 
+-void init_timer_deferrable(struct timer_list *timer)
++void init_timer_deferrable_key(struct timer_list *timer,
++			       const char *name,
++			       struct lock_class_key *key)
+ {
+-	init_timer(timer);
++	init_timer_key(timer, name, key);
+ 	timer_set_deferrable(timer);
+ }
+-EXPORT_SYMBOL(init_timer_deferrable);
++EXPORT_SYMBOL(init_timer_deferrable_key);
+ 
+ static inline void detach_timer(struct timer_list *timer,
+ 				int clear_pending)
+@@ -589,11 +600,12 @@ static struct tvec_base *lock_timer_base
+ 	}
+ }
+ 
+-int __mod_timer(struct timer_list *timer, unsigned long expires)
++static inline int
++__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
+ {
+ 	struct tvec_base *base, *new_base;
+ 	unsigned long flags;
+-	int ret = 0;
++	int cpu, ret = 0;
+ 
+ 	timer_stats_timer_set_start_info(timer);
+ 	BUG_ON(!timer->function);
+@@ -603,11 +615,15 @@ int __mod_timer(struct timer_list *timer
+ 	if (timer_pending(timer)) {
+ 		detach_timer(timer, 0);
+ 		ret = 1;
++	} else {
++		if (pending_only)
++			goto out_unlock;
+ 	}
+ 
+ 	debug_timer_activate(timer);
+ 
+-	new_base = __get_cpu_var(tvec_bases);
++	cpu = raw_smp_processor_id();
++	new_base = per_cpu(tvec_bases, cpu);
+ 
+ 	if (base != new_base) {
+ 		/*
+@@ -629,42 +645,28 @@ int __mod_timer(struct timer_list *timer
+ 
+ 	timer->expires = expires;
+ 	internal_add_timer(base, timer);
++
++out_unlock:
+ 	spin_unlock_irqrestore(&base->lock, flags);
+ 
+ 	return ret;
+ }
+ 
+-EXPORT_SYMBOL(__mod_timer);
+-
+ /**
+- * add_timer_on - start a timer on a particular CPU
+- * @timer: the timer to be added
+- * @cpu: the CPU to start it on
++ * mod_timer_pending - modify a pending timer's timeout
++ * @timer: the pending timer to be modified
++ * @expires: new timeout in jiffies
+  *
+- * This is not very scalable on SMP. Double adds are not possible.
++ * mod_timer_pending() is the same for pending timers as mod_timer(),
++ * but will not re-activate and modify already deleted timers.
++ *
++ * It is useful for unserialized use of timers.
+  */
+-void add_timer_on(struct timer_list *timer, int cpu)
++int mod_timer_pending(struct timer_list *timer, unsigned long expires)
+ {
+-	struct tvec_base *base = per_cpu(tvec_bases, cpu);
+-	unsigned long flags;
+-
+-	timer_stats_timer_set_start_info(timer);
+-	BUG_ON(timer_pending(timer) || !timer->function);
+-	spin_lock_irqsave(&base->lock, flags);
+-	timer_set_base(timer, base);
+-	debug_timer_activate(timer);
+-	internal_add_timer(base, timer);
+-	/*
+-	 * Check whether the other CPU is idle and needs to be
+-	 * triggered to reevaluate the timer wheel when nohz is
+-	 * active. We are protected against the other CPU fiddling
+-	 * with the timer by holding the timer base lock. This also
+-	 * makes sure that a CPU on the way to idle can not evaluate
+-	 * the timer wheel.
+-	 */
+-	wake_up_idle_cpu(cpu);
+-	spin_unlock_irqrestore(&base->lock, flags);
++	return __mod_timer(timer, expires, true);
+ }
++EXPORT_SYMBOL(mod_timer_pending);
+ 
+ /**
+  * mod_timer - modify a timer's timeout
+@@ -688,9 +690,6 @@ void add_timer_on(struct timer_list *tim
+  */
+ int mod_timer(struct timer_list *timer, unsigned long expires)
+ {
+-	BUG_ON(!timer->function);
+-
+-	timer_stats_timer_set_start_info(timer);
+ 	/*
+ 	 * This is a common optimization triggered by the
+ 	 * networking code - if the timer is re-modified
+@@ -699,12 +698,74 @@ int mod_timer(struct timer_list *timer, 
+ 	if (timer->expires == expires && timer_pending(timer))
+ 		return 1;
+ 
+-	return __mod_timer(timer, expires);
++	return __mod_timer(timer, expires, false);
+ }
+-
+ EXPORT_SYMBOL(mod_timer);
+ 
+ /**
++ * add_timer - start a timer
++ * @timer: the timer to be added
++ *
++ * The kernel will do a ->function(->data) callback from the
++ * timer interrupt at the ->expires point in the future. The
++ * current time is 'jiffies'.
++ *
++ * The timer's ->expires, ->function (and if the handler uses it, ->data)
++ * fields must be set prior calling this function.
++ *
++ * Timers with an ->expires field in the past will be executed in the next
++ * timer tick.
++ */
++void add_timer(struct timer_list *timer)
++{
++	BUG_ON(timer_pending(timer));
++	mod_timer(timer, timer->expires);
++}
++EXPORT_SYMBOL(add_timer);
++
++/**
++ * add_timer_on - start a timer on a particular CPU
++ * @timer: the timer to be added
++ * @cpu: the CPU to start it on
++ *
++ * This is not very scalable on SMP. Double adds are not possible.
++ */
++void add_timer_on(struct timer_list *timer, int cpu)
++{
++	struct tvec_base *base = per_cpu(tvec_bases, cpu);
++	unsigned long flags;
++
++	timer_stats_timer_set_start_info(timer);
++	BUG_ON(timer_pending(timer) || !timer->function);
++	spin_lock_irqsave(&base->lock, flags);
++	timer_set_base(timer, base);
++	debug_timer_activate(timer);
++	internal_add_timer(base, timer);
++	/*
++	 * Check whether the other CPU is idle and needs to be
++	 * triggered to reevaluate the timer wheel when nohz is
++	 * active. We are protected against the other CPU fiddling
++	 * with the timer by holding the timer base lock. This also
++	 * makes sure that a CPU on the way to idle can not evaluate
++	 * the timer wheel.
++	 */
++	wake_up_idle_cpu(cpu);
++	spin_unlock_irqrestore(&base->lock, flags);
++}
++
++/*
++ * Wait for a running timer
++ */
++void wait_for_running_timer(struct timer_list *timer)
++{
++	struct tvec_base *base = timer->base;
++
++	if (base->running_timer == timer)
++		wait_event(base->wait_for_running_timer,
++			   base->running_timer != timer);
++}
++
++/**
+  * del_timer - deactive a timer.
+  * @timer: the timer to be deactivated
+  *
+@@ -733,10 +794,36 @@ int del_timer(struct timer_list *timer)
+ 
+ 	return ret;
+ }
+-
+ EXPORT_SYMBOL(del_timer);
+ 
+-#ifdef CONFIG_SMP
++#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_SOFTIRQS)
++/*
++ * This function checks whether a timer is active and not running on any
++ * CPU. Upon successful (ret >= 0) exit the timer is not queued and the
++ * handler is not running on any CPU.
++ *
++ * It must not be called from interrupt contexts.
++ */
++int timer_pending_sync(struct timer_list *timer)
++{
++	struct tvec_base *base;
++	unsigned long flags;
++	int ret = -1;
++
++	base = lock_timer_base(timer, &flags);
++
++	if (base->running_timer == timer)
++		goto out;
++
++	ret = 0;
++	if (timer_pending(timer))
++		ret = 1;
++out:
++	spin_unlock_irqrestore(&base->lock, flags);
++
++	return ret;
++}
++
+ /**
+  * try_to_del_timer_sync - Try to deactivate a timer
+  * @timer: timer do del
+@@ -767,7 +854,6 @@ out:
+ 
+ 	return ret;
+ }
+-
+ EXPORT_SYMBOL(try_to_del_timer_sync);
+ 
+ /**
+@@ -789,14 +875,22 @@ EXPORT_SYMBOL(try_to_del_timer_sync);
+  */
+ int del_timer_sync(struct timer_list *timer)
+ {
++#ifdef CONFIG_LOCKDEP
++	unsigned long flags;
++
++	local_irq_save(flags);
++	lock_map_acquire(&timer->lockdep_map);
++	lock_map_release(&timer->lockdep_map);
++	local_irq_restore(flags);
++#endif
++
+ 	for (;;) {
+ 		int ret = try_to_del_timer_sync(timer);
+ 		if (ret >= 0)
+ 			return ret;
+-		cpu_relax();
++		wait_for_running_timer(timer);
+ 	}
+ }
+-
+ EXPORT_SYMBOL(del_timer_sync);
+ #endif
+ 
+@@ -839,6 +933,20 @@ static inline void __run_timers(struct t
+ 		struct list_head *head = &work_list;
+ 		int index = base->timer_jiffies & TVR_MASK;
+ 
++		if (softirq_need_resched()) {
++			spin_unlock_irq(&base->lock);
++			wake_up(&base->wait_for_running_timer);
++			cond_resched_softirq_context();
++			cpu_relax();
++			spin_lock_irq(&base->lock);
++			/*
++			 * We can simply continue after preemption, nobody
++			 * else can touch timer_jiffies so 'index' is still
++			 * valid. Any new jiffy will be taken care of in
++			 * subsequent loops:
++			 */
++		}
++
+ 		/*
+ 		 * Cascade timers:
+ 		 */
+@@ -861,23 +969,48 @@ static inline void __run_timers(struct t
+ 
+ 			set_running_timer(base, timer);
+ 			detach_timer(timer, 1);
++
+ 			spin_unlock_irq(&base->lock);
+ 			{
+ 				int preempt_count = preempt_count();
++
++#ifdef CONFIG_LOCKDEP
++				/*
++				 * It is permissible to free the timer from
++				 * inside the function that is called from
++				 * it, this we need to take into account for
++				 * lockdep too. To avoid bogus "held lock
++				 * freed" warnings as well as problems when
++				 * looking into timer->lockdep_map, make a
++				 * copy and use that here.
++				 */
++				struct lockdep_map lockdep_map =
++					timer->lockdep_map;
++#endif
++				/*
++				 * Couple the lock chain with the lock chain at
++				 * del_timer_sync() by acquiring the lock_map
++				 * around the fn() call here and in
++				 * del_timer_sync().
++				 */
++				lock_map_acquire(&lockdep_map);
++
+ 				fn(data);
++
++				lock_map_release(&lockdep_map);
++
+ 				if (preempt_count != preempt_count()) {
+-					printk(KERN_ERR "huh, entered %p "
+-					       "with preempt_count %08x, exited"
+-					       " with %08x?\n",
+-					       fn, preempt_count,
+-					       preempt_count());
+-					BUG();
++					print_symbol("BUG: unbalanced timer-handler preempt count in %s!\n", (unsigned long) fn);
++					printk("entered with %08x, exited with %08x.\n", preempt_count, preempt_count());
++					preempt_count() = preempt_count;
+ 				}
+ 			}
++			set_running_timer(base, NULL);
++			cond_resched_softirq_context();
+ 			spin_lock_irq(&base->lock);
+ 		}
+ 	}
+-	set_running_timer(base, NULL);
++	wake_up(&base->wait_for_running_timer);
+ 	spin_unlock_irq(&base->lock);
+ }
+ 
+@@ -1007,9 +1140,22 @@ unsigned long get_next_timer_interrupt(u
+ 	struct tvec_base *base = __get_cpu_var(tvec_bases);
+ 	unsigned long expires;
+ 
++#ifdef CONFIG_PREEMPT_RT
++	/*
++	 * On PREEMPT_RT we cannot sleep here. If the trylock does not
++	 * succeed then we return the worst-case 'expires in 1 tick'
++	 * value:
++	 */
++	if (spin_trylock(&base->lock)) {
++		expires = __next_timer_interrupt(base);
++		spin_unlock(&base->lock);
++	} else
++		expires = now + 1;
++#else
+ 	spin_lock(&base->lock);
+ 	expires = __next_timer_interrupt(base);
+ 	spin_unlock(&base->lock);
++#endif
+ 
+ 	if (time_before_eq(expires, now))
+ 		return now;
+@@ -1029,62 +1175,21 @@ void update_process_times(int user_tick)
+ 
+ 	/* Note: this timer irq context must be accounted for as well. */
+ 	account_process_tick(p, user_tick);
++	scheduler_tick();
+ 	run_local_timers();
+ 	if (rcu_pending(cpu))
+ 		rcu_check_callbacks(cpu, user_tick);
+-	printk_tick();
+-	scheduler_tick();
+ 	run_posix_cpu_timers(p);
+ }
+ 
+ /*
+- * Nr of active tasks - counted in fixed-point numbers
+- */
+-static unsigned long count_active_tasks(void)
+-{
+-	return nr_active() * FIXED_1;
+-}
+-
+-/*
+- * Hmm.. Changed this, as the GNU make sources (load.c) seems to
+- * imply that avenrun[] is the standard name for this kind of thing.
+- * Nothing else seems to be standardized: the fractional size etc
+- * all seem to differ on different machines.
+- *
+- * Requires xtime_lock to access.
+- */
+-unsigned long avenrun[3];
+-
+-EXPORT_SYMBOL(avenrun);
+-
+-/*
+- * calc_load - given tick count, update the avenrun load estimates.
+- * This is called while holding a write_lock on xtime_lock.
+- */
+-static inline void calc_load(unsigned long ticks)
+-{
+-	unsigned long active_tasks; /* fixed-point */
+-	static int count = LOAD_FREQ;
+-
+-	count -= ticks;
+-	if (unlikely(count < 0)) {
+-		active_tasks = count_active_tasks();
+-		do {
+-			CALC_LOAD(avenrun[0], EXP_1, active_tasks);
+-			CALC_LOAD(avenrun[1], EXP_5, active_tasks);
+-			CALC_LOAD(avenrun[2], EXP_15, active_tasks);
+-			count += LOAD_FREQ;
+-		} while (count < 0);
+-	}
+-}
+-
+-/*
+  * This function runs timers and the timer-tq in bottom half context.
+  */
+ static void run_timer_softirq(struct softirq_action *h)
+ {
+-	struct tvec_base *base = __get_cpu_var(tvec_bases);
++	struct tvec_base *base = per_cpu(tvec_bases, raw_smp_processor_id());
+ 
++	printk_tick();
+ 	hrtimer_run_pending();
+ 
+ 	if (time_after_eq(jiffies, base->timer_jiffies))
+@@ -1102,16 +1207,6 @@ void run_local_timers(void)
+ }
+ 
+ /*
+- * Called by the timer interrupt. xtime_lock must already be taken
+- * by the timer IRQ!
+- */
+-static inline void update_times(unsigned long ticks)
+-{
+-	update_wall_time();
+-	calc_load(ticks);
+-}
+-
+-/*
+  * The 64-bit jiffies value is not atomic - you MUST NOT read it
+  * without sampling the sequence number in xtime_lock.
+  * jiffies is defined in the linker script...
+@@ -1120,7 +1215,8 @@ static inline void update_times(unsigned
+ void do_timer(unsigned long ticks)
+ {
+ 	jiffies_64 += ticks;
+-	update_times(ticks);
++	update_wall_time();
++	calc_global_load();
+ }
+ 
+ #ifdef __ARCH_WANT_SYS_ALARM
+@@ -1268,7 +1364,7 @@ signed long __sched schedule_timeout(sig
+ 	expire = timeout + jiffies;
+ 
+ 	setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
+-	__mod_timer(&timer, expire);
++	__mod_timer(&timer, expire, false);
+ 	schedule();
+ 	del_singleshot_timer_sync(&timer);
+ 
+@@ -1321,37 +1417,17 @@ int do_sysinfo(struct sysinfo *info)
+ {
+ 	unsigned long mem_total, sav_total;
+ 	unsigned int mem_unit, bitcount;
+-	unsigned long seq;
++	struct timespec tp;
+ 
+ 	memset(info, 0, sizeof(struct sysinfo));
+ 
+-	do {
+-		struct timespec tp;
+-		seq = read_seqbegin(&xtime_lock);
+-
+-		/*
+-		 * This is annoying.  The below is the same thing
+-		 * posix_get_clock_monotonic() does, but it wants to
+-		 * take the lock which we want to cover the loads stuff
+-		 * too.
+-		 */
+-
+-		getnstimeofday(&tp);
+-		tp.tv_sec += wall_to_monotonic.tv_sec;
+-		tp.tv_nsec += wall_to_monotonic.tv_nsec;
+-		monotonic_to_bootbased(&tp);
+-		if (tp.tv_nsec - NSEC_PER_SEC >= 0) {
+-			tp.tv_nsec = tp.tv_nsec - NSEC_PER_SEC;
+-			tp.tv_sec++;
+-		}
+-		info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
++	ktime_get_ts(&tp);
++	monotonic_to_bootbased(&tp);
++	info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0);
+ 
+-		info->loads[0] = avenrun[0] << (SI_LOAD_SHIFT - FSHIFT);
+-		info->loads[1] = avenrun[1] << (SI_LOAD_SHIFT - FSHIFT);
+-		info->loads[2] = avenrun[2] << (SI_LOAD_SHIFT - FSHIFT);
++	get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT);
+ 
+-		info->procs = nr_threads;
+-	} while (read_seqretry(&xtime_lock, seq));
++	info->procs = nr_threads;
+ 
+ 	si_meminfo(info);
+ 	si_swapinfo(info);
+@@ -1454,6 +1530,7 @@ static int __cpuinit init_timers_cpu(int
+ 	}
+ 
+ 	spin_lock_init(&base->lock);
++	init_waitqueue_head(&base->wait_for_running_timer);
+ 
+ 	for (j = 0; j < TVN_SIZE; j++) {
+ 		INIT_LIST_HEAD(base->tv5.vec + j);
+@@ -1485,6 +1562,7 @@ static void __cpuinit migrate_timers(int
+ {
+ 	struct tvec_base *old_base;
+ 	struct tvec_base *new_base;
++	unsigned long flags;
+ 	int i;
+ 
+ 	BUG_ON(cpu_online(cpu));
+@@ -1494,8 +1572,11 @@ static void __cpuinit migrate_timers(int
+ 	 * The caller is globally serialized and nobody else
+ 	 * takes two locks at once, deadlock is not possible.
+ 	 */
+-	spin_lock_irq(&new_base->lock);
+-	spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING);
++	local_irq_save(flags);
++	while (!spin_trylock(&new_base->lock))
++		cpu_relax();
++	while (!spin_trylock(&old_base->lock))
++		cpu_relax();
+ 
+ 	BUG_ON(old_base->running_timer);
+ 
+@@ -1509,7 +1590,9 @@ static void __cpuinit migrate_timers(int
+ 	}
+ 
+ 	spin_unlock(&old_base->lock);
+-	spin_unlock_irq(&new_base->lock);
++	spin_unlock(&new_base->lock);
++	local_irq_restore(flags);
++
+ 	put_cpu_var(tvec_bases);
+ }
+ #endif /* CONFIG_HOTPLUG_CPU */
+Index: linux-2.6-tip/kernel/trace/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/Kconfig
++++ linux-2.6-tip/kernel/trace/Kconfig
+@@ -9,6 +9,9 @@ config USER_STACKTRACE_SUPPORT
+ config NOP_TRACER
+ 	bool
+ 
++config HAVE_FTRACE_NMI_ENTER
++	bool
++
+ config HAVE_FUNCTION_TRACER
+ 	bool
+ 
+@@ -31,12 +34,20 @@ config HAVE_FTRACE_MCOUNT_RECORD
+ config HAVE_HW_BRANCH_TRACER
+ 	bool
+ 
++config HAVE_FTRACE_SYSCALLS
++	bool
++
+ config TRACER_MAX_TRACE
+ 	bool
+ 
+ config RING_BUFFER
+ 	bool
+ 
++config FTRACE_NMI_ENTER
++       bool
++       depends on HAVE_FTRACE_NMI_ENTER
++       default y
++
+ config TRACING
+ 	bool
+ 	select DEBUG_FS
+@@ -44,13 +55,29 @@ config TRACING
+ 	select STACKTRACE if STACKTRACE_SUPPORT
+ 	select TRACEPOINTS
+ 	select NOP_TRACER
++	select BINARY_PRINTF
++
++#
++# Minimum requirements an architecture has to meet for us to
++# be able to offer generic tracing facilities:
++#
++config TRACING_SUPPORT
++	bool
++	# PPC32 has no irqflags tracing support, but it can use most of the
++	# tracers anyway, they were tested to build and work. Note that new
++	# exceptions to this list aren't welcomed, better implement the
++	# irqflags tracing for your architecture.
++	depends on TRACE_IRQFLAGS_SUPPORT || PPC32
++	depends on STACKTRACE_SUPPORT
++	default y
++
++if TRACING_SUPPORT
+ 
+ menu "Tracers"
+ 
+ config FUNCTION_TRACER
+ 	bool "Kernel Function Tracer"
+ 	depends on HAVE_FUNCTION_TRACER
+-	depends on DEBUG_KERNEL
+ 	select FRAME_POINTER
+ 	select KALLSYMS
+ 	select TRACING
+@@ -83,7 +110,6 @@ config IRQSOFF_TRACER
+ 	default n
+ 	depends on TRACE_IRQFLAGS_SUPPORT
+ 	depends on GENERIC_TIME
+-	depends on DEBUG_KERNEL
+ 	select TRACE_IRQFLAGS
+ 	select TRACING
+ 	select TRACER_MAX_TRACE
+@@ -106,7 +132,6 @@ config PREEMPT_TRACER
+ 	default n
+ 	depends on GENERIC_TIME
+ 	depends on PREEMPT
+-	depends on DEBUG_KERNEL
+ 	select TRACING
+ 	select TRACER_MAX_TRACE
+ 	help
+@@ -127,13 +152,13 @@ config SYSPROF_TRACER
+ 	bool "Sysprof Tracer"
+ 	depends on X86
+ 	select TRACING
++	select CONTEXT_SWITCH_TRACER
+ 	help
+ 	  This tracer provides the trace needed by the 'Sysprof' userspace
+ 	  tool.
+ 
+ config SCHED_TRACER
+ 	bool "Scheduling Latency Tracer"
+-	depends on DEBUG_KERNEL
+ 	select TRACING
+ 	select CONTEXT_SWITCH_TRACER
+ 	select TRACER_MAX_TRACE
+@@ -143,16 +168,30 @@ config SCHED_TRACER
+ 
+ config CONTEXT_SWITCH_TRACER
+ 	bool "Trace process context switches"
+-	depends on DEBUG_KERNEL
+ 	select TRACING
+ 	select MARKERS
+ 	help
+ 	  This tracer gets called from the context switch and records
+ 	  all switching of tasks.
+ 
++config EVENT_TRACER
++	bool "Trace various events in the kernel"
++	select TRACING
++	help
++	  This tracer hooks to various trace points in the kernel
++	  allowing the user to pick and choose which trace point they
++	  want to trace.
++
++config FTRACE_SYSCALLS
++	bool "Trace syscalls"
++	depends on HAVE_FTRACE_SYSCALLS
++	select TRACING
++	select KALLSYMS
++	help
++	  Basic tracer to catch the syscall entry and exit events.
++
+ config BOOT_TRACER
+ 	bool "Trace boot initcalls"
+-	depends on DEBUG_KERNEL
+ 	select TRACING
+ 	select CONTEXT_SWITCH_TRACER
+ 	help
+@@ -165,13 +204,11 @@ config BOOT_TRACER
+ 	  representation of the delays during initcalls - but the raw
+ 	  /debug/tracing/trace text output is readable too.
+ 
+-	  ( Note that tracing self tests can't be enabled if this tracer is
+-	    selected, because the self-tests are an initcall as well and that
+-	    would invalidate the boot trace. )
++	  You must pass in ftrace=initcall to the kernel command line
++	  to enable this on bootup.
+ 
+ config TRACE_BRANCH_PROFILING
+ 	bool "Trace likely/unlikely profiler"
+-	depends on DEBUG_KERNEL
+ 	select TRACING
+ 	help
+ 	  This tracer profiles all the the likely and unlikely macros
+@@ -224,7 +261,6 @@ config BRANCH_TRACER
+ 
+ config POWER_TRACER
+ 	bool "Trace power consumption behavior"
+-	depends on DEBUG_KERNEL
+ 	depends on X86
+ 	select TRACING
+ 	help
+@@ -236,7 +272,6 @@ config POWER_TRACER
+ config STACK_TRACER
+ 	bool "Trace max stack"
+ 	depends on HAVE_FUNCTION_TRACER
+-	depends on DEBUG_KERNEL
+ 	select FUNCTION_TRACER
+ 	select STACKTRACE
+ 	select KALLSYMS
+@@ -260,17 +295,73 @@ config STACK_TRACER
+ 
+ config HW_BRANCH_TRACER
+ 	depends on HAVE_HW_BRANCH_TRACER
++	depends on !PREEMPT_RT
+ 	bool "Trace hw branches"
+ 	select TRACING
+ 	help
+ 	  This tracer records all branches on the system in a circular
+ 	  buffer giving access to the last N branches for each cpu.
+ 
++config KMEMTRACE
++	bool "Trace SLAB allocations"
++	select TRACING
++	help
++	  kmemtrace provides tracing for slab allocator functions, such as
++	  kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
++	  data is then fed to the userspace application in order to analyse
++	  allocation hotspots, internal fragmentation and so on, making it
++	  possible to see how well an allocator performs, as well as debug
++	  and profile kernel code.
++
++	  This requires an userspace application to use. See
++	  Documentation/vm/kmemtrace.txt for more information.
++
++	  Saying Y will make the kernel somewhat larger and slower. However,
++	  if you disable kmemtrace at run-time or boot-time, the performance
++	  impact is minimal (depending on the arch the kernel is built for).
++
++	  If unsure, say N.
++
++config WORKQUEUE_TRACER
++	bool "Trace workqueues" if !PREEMPT_RT
++	select TRACING
++	help
++	  The workqueue tracer provides some statistical informations
++          about each cpu workqueue thread such as the number of the
++          works inserted and executed since their creation. It can help
++          to evaluate the amount of work each of them have to perform.
++          For example it can help a developer to decide whether he should
++          choose a per cpu workqueue instead of a singlethreaded one.
++
++config BLK_DEV_IO_TRACE
++	bool "Support for tracing block io actions"
++	depends on SYSFS
++	depends on BLOCK
++	select RELAY
++	select DEBUG_FS
++	select TRACEPOINTS
++	select TRACING
++	select STACKTRACE
++	help
++	  Say Y here if you want to be able to trace the block layer actions
++	  on a given queue. Tracing allows you to see any traffic happening
++	  on a block device queue. For more information (and the userspace
++	  support tools needed), fetch the blktrace tools from:
++
++	  git://git.kernel.dk/blktrace.git
++
++	  Tracing also is possible using the ftrace interface, e.g.:
++
++	    echo 1 > /sys/block/sda/sda1/trace/enable
++	    echo blk > /sys/kernel/debug/tracing/current_tracer
++	    cat /sys/kernel/debug/tracing/trace_pipe
++
++	  If unsure, say N.
++
+ config DYNAMIC_FTRACE
+ 	bool "enable/disable ftrace tracepoints dynamically"
+ 	depends on FUNCTION_TRACER
+ 	depends on HAVE_DYNAMIC_FTRACE
+-	depends on DEBUG_KERNEL
+ 	default y
+ 	help
+          This option will modify all the calls to ftrace dynamically
+@@ -286,6 +377,20 @@ config DYNAMIC_FTRACE
+ 	 were made. If so, it runs stop_machine (stops all CPUS)
+ 	 and modifies the code to jump over the call to ftrace.
+ 
++config FUNCTION_PROFILER
++	bool "Kernel function profiler"
++	depends on FUNCTION_TRACER
++	default n
++	help
++	 This option enables the kernel function profiler. A file is created
++	 in debugfs called function_profile_enabled which defaults to zero.
++	 When a 1 is echoed into this file profiling begins, and when a
++	 zero is entered, profiling stops. A file in the trace_stats
++	 directory called functions, that show the list of functions that
++	 have been hit and their counters.
++
++	 If in doubt, say N
++
+ config FTRACE_MCOUNT_RECORD
+ 	def_bool y
+ 	depends on DYNAMIC_FTRACE
+@@ -296,7 +401,7 @@ config FTRACE_SELFTEST
+ 
+ config FTRACE_STARTUP_TEST
+ 	bool "Perform a startup test on ftrace"
+-	depends on TRACING && DEBUG_KERNEL && !BOOT_TRACER
++	depends on TRACING
+ 	select FTRACE_SELFTEST
+ 	help
+ 	  This option performs a series of startup tests on ftrace. On bootup
+@@ -304,9 +409,23 @@ config FTRACE_STARTUP_TEST
+ 	  functioning properly. It will do tests on all the configured
+ 	  tracers of ftrace.
+ 
++config INTERRUPT_OFF_HIST
++	bool "Interrupts off critical timings histogram"
++	depends on IRQSOFF_TRACER
++	help
++	  This option uses the infrastructure of the critical
++	  irqs off timings to create a histogram of latencies.
++
++config PREEMPT_OFF_HIST
++	bool "Preempt off critical timings histogram"
++	depends on PREEMPT_TRACER
++	help
++	  This option uses the infrastructure of the critical
++	  preemption off timings to create a histogram of latencies.
++
+ config MMIOTRACE
+ 	bool "Memory mapped IO tracing"
+-	depends on HAVE_MMIOTRACE_SUPPORT && DEBUG_KERNEL && PCI
++	depends on HAVE_MMIOTRACE_SUPPORT && PCI
+ 	select TRACING
+ 	help
+ 	  Mmiotrace traces Memory Mapped I/O access and is meant for
+@@ -328,3 +447,6 @@ config MMIOTRACE_TEST
+ 	  Say N, unless you absolutely know what you are doing.
+ 
+ endmenu
++
++endif # TRACING_SUPPORT
++
+Index: linux-2.6-tip/kernel/trace/Makefile
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/Makefile
++++ linux-2.6-tip/kernel/trace/Makefile
+@@ -19,6 +19,10 @@ obj-$(CONFIG_FUNCTION_TRACER) += libftra
+ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
+ 
+ obj-$(CONFIG_TRACING) += trace.o
++obj-$(CONFIG_TRACING) += trace_clock.o
++obj-$(CONFIG_TRACING) += trace_output.o
++obj-$(CONFIG_TRACING) += trace_stat.o
++obj-$(CONFIG_TRACING) += trace_printk.o
+ obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
+ obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
+ obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
+@@ -33,5 +37,17 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += t
+ obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
+ obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
+ obj-$(CONFIG_POWER_TRACER) += trace_power.o
++obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
++obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
++obj-$(CONFIG_BLK_DEV_IO_TRACE)	+= blktrace.o
++obj-$(CONFIG_EVENT_TRACER) += trace_events.o
++obj-$(CONFIG_EVENT_TRACER) += events.o
++obj-$(CONFIG_EVENT_TRACER) += trace_export.o
++obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o
++obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o
++obj-$(CONFIG_EVENT_TRACER) += trace_events_filter.o
++
++obj-$(CONFIG_INTERRUPT_OFF_HIST) += trace_hist.o
++obj-$(CONFIG_PREEMPT_OFF_HIST) += trace_hist.o
+ 
+ libftrace-y := ftrace.o
+Index: linux-2.6-tip/kernel/trace/blktrace.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/blktrace.c
+@@ -0,0 +1,1515 @@
++/*
++ * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ *
++ */
++#include <linux/kernel.h>
++#include <linux/blkdev.h>
++#include <linux/blktrace_api.h>
++#include <linux/percpu.h>
++#include <linux/init.h>
++#include <linux/mutex.h>
++#include <linux/debugfs.h>
++#include <linux/time.h>
++#include <trace/block.h>
++#include <linux/uaccess.h>
++#include "trace_output.h"
++
++static unsigned int blktrace_seq __read_mostly = 1;
++
++static struct trace_array *blk_tr;
++static bool blk_tracer_enabled __read_mostly;
++
++/* Select an alternative, minimalistic output than the original one */
++#define TRACE_BLK_OPT_CLASSIC	0x1
++
++static struct tracer_opt blk_tracer_opts[] = {
++	/* Default disable the minimalistic output */
++	{ TRACER_OPT(blk_classic, TRACE_BLK_OPT_CLASSIC) },
++	{ }
++};
++
++static struct tracer_flags blk_tracer_flags = {
++	.val  = 0,
++	.opts = blk_tracer_opts,
++};
++
++/* Global reference count of probes */
++static atomic_t blk_probes_ref = ATOMIC_INIT(0);
++
++static void blk_register_tracepoints(void);
++static void blk_unregister_tracepoints(void);
++
++/*
++ * Send out a notify message.
++ */
++static void trace_note(struct blk_trace *bt, pid_t pid, int action,
++		       const void *data, size_t len)
++{
++	struct blk_io_trace *t;
++
++	if (!bt->rchan)
++		return;
++
++	t = relay_reserve(bt->rchan, sizeof(*t) + len);
++	if (t) {
++		const int cpu = smp_processor_id();
++
++		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
++		t->time = ktime_to_ns(ktime_get());
++		t->device = bt->dev;
++		t->action = action;
++		t->pid = pid;
++		t->cpu = cpu;
++		t->pdu_len = len;
++		memcpy((void *) t + sizeof(*t), data, len);
++	}
++}
++
++/*
++ * Send out a notify for this process, if we haven't done so since a trace
++ * started
++ */
++static void trace_note_tsk(struct blk_trace *bt, struct task_struct *tsk)
++{
++	tsk->btrace_seq = blktrace_seq;
++	trace_note(bt, tsk->pid, BLK_TN_PROCESS, tsk->comm, sizeof(tsk->comm));
++}
++
++static void trace_note_time(struct blk_trace *bt)
++{
++	struct timespec now;
++	unsigned long flags;
++	u32 words[2];
++
++	getnstimeofday(&now);
++	words[0] = now.tv_sec;
++	words[1] = now.tv_nsec;
++
++	local_irq_save(flags);
++	trace_note(bt, 0, BLK_TN_TIMESTAMP, words, sizeof(words));
++	local_irq_restore(flags);
++}
++
++void __trace_note_message(struct blk_trace *bt, const char *fmt, ...)
++{
++	int n;
++	va_list args;
++	unsigned long flags;
++	char *buf;
++
++	if (blk_tracer_enabled) {
++		va_start(args, fmt);
++		ftrace_vprintk(fmt, args);
++		va_end(args);
++		return;
++	}
++
++	if (!bt->msg_data)
++		return;
++
++	local_irq_save(flags);
++	buf = per_cpu_ptr(bt->msg_data, smp_processor_id());
++	va_start(args, fmt);
++	n = vscnprintf(buf, BLK_TN_MAX_MSG, fmt, args);
++	va_end(args);
++
++	trace_note(bt, 0, BLK_TN_MESSAGE, buf, n);
++	local_irq_restore(flags);
++}
++EXPORT_SYMBOL_GPL(__trace_note_message);
++
++static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector,
++			 pid_t pid)
++{
++	if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0)
++		return 1;
++	if (sector < bt->start_lba || sector > bt->end_lba)
++		return 1;
++	if (bt->pid && pid != bt->pid)
++		return 1;
++
++	return 0;
++}
++
++/*
++ * Data direction bit lookup
++ */
++static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ),
++				 BLK_TC_ACT(BLK_TC_WRITE) };
++
++/* The ilog2() calls fall out because they're constant */
++#define MASK_TC_BIT(rw, __name) ((rw & (1 << BIO_RW_ ## __name)) << \
++	  (ilog2(BLK_TC_ ## __name) + BLK_TC_SHIFT - BIO_RW_ ## __name))
++
++/*
++ * The worker for the various blk_add_trace*() types. Fills out a
++ * blk_io_trace structure and places it in a per-cpu subbuffer.
++ */
++static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes,
++		     int rw, u32 what, int error, int pdu_len, void *pdu_data)
++{
++	struct task_struct *tsk = current;
++	struct ring_buffer_event *event = NULL;
++	struct blk_io_trace *t;
++	unsigned long flags = 0;
++	unsigned long *sequence;
++	pid_t pid;
++	int cpu, pc = 0;
++
++	if (unlikely(bt->trace_state != Blktrace_running &&
++		     !blk_tracer_enabled))
++		return;
++
++	what |= ddir_act[rw & WRITE];
++	what |= MASK_TC_BIT(rw, BARRIER);
++	what |= MASK_TC_BIT(rw, SYNCIO);
++	what |= MASK_TC_BIT(rw, AHEAD);
++	what |= MASK_TC_BIT(rw, META);
++	what |= MASK_TC_BIT(rw, DISCARD);
++
++	pid = tsk->pid;
++	if (unlikely(act_log_check(bt, what, sector, pid)))
++		return;
++	cpu = raw_smp_processor_id();
++
++	if (blk_tracer_enabled) {
++		tracing_record_cmdline(current);
++
++		pc = preempt_count();
++		event = trace_buffer_lock_reserve(blk_tr, TRACE_BLK,
++						  sizeof(*t) + pdu_len,
++						  0, pc);
++		if (!event)
++			return;
++		t = ring_buffer_event_data(event);
++		goto record_it;
++	}
++
++	/*
++	 * A word about the locking here - we disable interrupts to reserve
++	 * some space in the relay per-cpu buffer, to prevent an irq
++	 * from coming in and stepping on our toes.
++	 */
++	local_irq_save(flags);
++
++	if (unlikely(tsk->btrace_seq != blktrace_seq))
++		trace_note_tsk(bt, tsk);
++
++	t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
++	if (t) {
++		sequence = per_cpu_ptr(bt->sequence, cpu);
++
++		t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
++		t->sequence = ++(*sequence);
++		t->time = ktime_to_ns(ktime_get());
++record_it:
++		/*
++		 * These two are not needed in ftrace as they are in the
++		 * generic trace_entry, filled by tracing_generic_entry_update,
++		 * but for the trace_event->bin() synthesizer benefit we do it
++		 * here too.
++		 */
++		t->cpu = cpu;
++		t->pid = pid;
++
++		t->sector = sector;
++		t->bytes = bytes;
++		t->action = what;
++		t->device = bt->dev;
++		t->error = error;
++		t->pdu_len = pdu_len;
++
++		if (pdu_len)
++			memcpy((void *) t + sizeof(*t), pdu_data, pdu_len);
++
++		if (blk_tracer_enabled) {
++			trace_buffer_unlock_commit(blk_tr, event, 0, pc);
++			return;
++		}
++	}
++
++	local_irq_restore(flags);
++}
++
++static struct dentry *blk_tree_root;
++static DEFINE_MUTEX(blk_tree_mutex);
++
++static void blk_trace_cleanup(struct blk_trace *bt)
++{
++	debugfs_remove(bt->msg_file);
++	debugfs_remove(bt->dropped_file);
++	relay_close(bt->rchan);
++	free_percpu(bt->sequence);
++	free_percpu(bt->msg_data);
++	kfree(bt);
++	if (atomic_dec_and_test(&blk_probes_ref))
++		blk_unregister_tracepoints();
++}
++
++int blk_trace_remove(struct request_queue *q)
++{
++	struct blk_trace *bt;
++
++	bt = xchg(&q->blk_trace, NULL);
++	if (!bt)
++		return -EINVAL;
++
++	if (bt->trace_state != Blktrace_running)
++		blk_trace_cleanup(bt);
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(blk_trace_remove);
++
++static int blk_dropped_open(struct inode *inode, struct file *filp)
++{
++	filp->private_data = inode->i_private;
++
++	return 0;
++}
++
++static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
++				size_t count, loff_t *ppos)
++{
++	struct blk_trace *bt = filp->private_data;
++	char buf[16];
++
++	snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
++
++	return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
++}
++
++static const struct file_operations blk_dropped_fops = {
++	.owner =	THIS_MODULE,
++	.open =		blk_dropped_open,
++	.read =		blk_dropped_read,
++};
++
++static int blk_msg_open(struct inode *inode, struct file *filp)
++{
++	filp->private_data = inode->i_private;
++
++	return 0;
++}
++
++static ssize_t blk_msg_write(struct file *filp, const char __user *buffer,
++				size_t count, loff_t *ppos)
++{
++	char *msg;
++	struct blk_trace *bt;
++
++	if (count > BLK_TN_MAX_MSG)
++		return -EINVAL;
++
++	msg = kmalloc(count, GFP_KERNEL);
++	if (msg == NULL)
++		return -ENOMEM;
++
++	if (copy_from_user(msg, buffer, count)) {
++		kfree(msg);
++		return -EFAULT;
++	}
++
++	bt = filp->private_data;
++	__trace_note_message(bt, "%s", msg);
++	kfree(msg);
++
++	return count;
++}
++
++static const struct file_operations blk_msg_fops = {
++	.owner =	THIS_MODULE,
++	.open =		blk_msg_open,
++	.write =	blk_msg_write,
++};
++
++/*
++ * Keep track of how many times we encountered a full subbuffer, to aid
++ * the user space app in telling how many lost events there were.
++ */
++static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
++				     void *prev_subbuf, size_t prev_padding)
++{
++	struct blk_trace *bt;
++
++	if (!relay_buf_full(buf))
++		return 1;
++
++	bt = buf->chan->private_data;
++	atomic_inc(&bt->dropped);
++	return 0;
++}
++
++static int blk_remove_buf_file_callback(struct dentry *dentry)
++{
++	struct dentry *parent = dentry->d_parent;
++	debugfs_remove(dentry);
++
++	/*
++	* this will fail for all but the last file, but that is ok. what we
++	* care about is the top level buts->name directory going away, when
++	* the last trace file is gone. Then we don't have to rmdir() that
++	* manually on trace stop, so it nicely solves the issue with
++	* force killing of running traces.
++	*/
++
++	debugfs_remove(parent);
++	return 0;
++}
++
++static struct dentry *blk_create_buf_file_callback(const char *filename,
++						   struct dentry *parent,
++						   int mode,
++						   struct rchan_buf *buf,
++						   int *is_global)
++{
++	return debugfs_create_file(filename, mode, parent, buf,
++					&relay_file_operations);
++}
++
++static struct rchan_callbacks blk_relay_callbacks = {
++	.subbuf_start		= blk_subbuf_start_callback,
++	.create_buf_file	= blk_create_buf_file_callback,
++	.remove_buf_file	= blk_remove_buf_file_callback,
++};
++
++/*
++ * Setup everything required to start tracing
++ */
++int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
++			struct blk_user_trace_setup *buts)
++{
++	struct blk_trace *old_bt, *bt = NULL;
++	struct dentry *dir = NULL;
++	int ret, i;
++
++	if (!buts->buf_size || !buts->buf_nr)
++		return -EINVAL;
++
++	strncpy(buts->name, name, BLKTRACE_BDEV_SIZE);
++	buts->name[BLKTRACE_BDEV_SIZE - 1] = '\0';
++
++	/*
++	 * some device names have larger paths - convert the slashes
++	 * to underscores for this to work as expected
++	 */
++	for (i = 0; i < strlen(buts->name); i++)
++		if (buts->name[i] == '/')
++			buts->name[i] = '_';
++
++	ret = -ENOMEM;
++	bt = kzalloc(sizeof(*bt), GFP_KERNEL);
++	if (!bt)
++		goto err;
++
++	bt->sequence = alloc_percpu(unsigned long);
++	if (!bt->sequence)
++		goto err;
++
++	bt->msg_data = __alloc_percpu(BLK_TN_MAX_MSG, __alignof__(char));
++	if (!bt->msg_data)
++		goto err;
++
++	ret = -ENOENT;
++
++	mutex_lock(&blk_tree_mutex);
++	if (!blk_tree_root) {
++		blk_tree_root = debugfs_create_dir("block", NULL);
++		if (!blk_tree_root) {
++			mutex_unlock(&blk_tree_mutex);
++			goto err;
++		}
++	}
++	mutex_unlock(&blk_tree_mutex);
++
++	dir = debugfs_create_dir(buts->name, blk_tree_root);
++
++	if (!dir)
++		goto err;
++
++	bt->dir = dir;
++	bt->dev = dev;
++	atomic_set(&bt->dropped, 0);
++
++	ret = -EIO;
++	bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
++					       &blk_dropped_fops);
++	if (!bt->dropped_file)
++		goto err;
++
++	bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
++	if (!bt->msg_file)
++		goto err;
++
++	bt->rchan = relay_open("trace", dir, buts->buf_size,
++				buts->buf_nr, &blk_relay_callbacks, bt);
++	if (!bt->rchan)
++		goto err;
++
++	bt->act_mask = buts->act_mask;
++	if (!bt->act_mask)
++		bt->act_mask = (u16) -1;
++
++	bt->start_lba = buts->start_lba;
++	bt->end_lba = buts->end_lba;
++	if (!bt->end_lba)
++		bt->end_lba = -1ULL;
++
++	bt->pid = buts->pid;
++	bt->trace_state = Blktrace_setup;
++
++	ret = -EBUSY;
++	old_bt = xchg(&q->blk_trace, bt);
++	if (old_bt) {
++		(void) xchg(&q->blk_trace, old_bt);
++		goto err;
++	}
++
++	if (atomic_add_return(1, &blk_probes_ref) == 1)
++		blk_register_tracepoints();
++
++	return 0;
++err:
++	if (bt) {
++		if (bt->msg_file)
++			debugfs_remove(bt->msg_file);
++		if (bt->dropped_file)
++			debugfs_remove(bt->dropped_file);
++		free_percpu(bt->sequence);
++		free_percpu(bt->msg_data);
++		if (bt->rchan)
++			relay_close(bt->rchan);
++		kfree(bt);
++	}
++	return ret;
++}
++
++int blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
++		    char __user *arg)
++{
++	struct blk_user_trace_setup buts;
++	int ret;
++
++	ret = copy_from_user(&buts, arg, sizeof(buts));
++	if (ret)
++		return -EFAULT;
++
++	ret = do_blk_trace_setup(q, name, dev, &buts);
++	if (ret)
++		return ret;
++
++	if (copy_to_user(arg, &buts, sizeof(buts)))
++		return -EFAULT;
++
++	return 0;
++}
++EXPORT_SYMBOL_GPL(blk_trace_setup);
++
++int blk_trace_startstop(struct request_queue *q, int start)
++{
++	int ret;
++	struct blk_trace *bt = q->blk_trace;
++
++	if (bt == NULL)
++		return -EINVAL;
++
++	/*
++	 * For starting a trace, we can transition from a setup or stopped
++	 * trace. For stopping a trace, the state must be running
++	 */
++	ret = -EINVAL;
++	if (start) {
++		if (bt->trace_state == Blktrace_setup ||
++		    bt->trace_state == Blktrace_stopped) {
++			blktrace_seq++;
++			smp_mb();
++			bt->trace_state = Blktrace_running;
++
++			trace_note_time(bt);
++			ret = 0;
++		}
++	} else {
++		if (bt->trace_state == Blktrace_running) {
++			bt->trace_state = Blktrace_stopped;
++			relay_flush(bt->rchan);
++			ret = 0;
++		}
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL_GPL(blk_trace_startstop);
++
++/**
++ * blk_trace_ioctl: - handle the ioctls associated with tracing
++ * @bdev:	the block device
++ * @cmd:	the ioctl cmd
++ * @arg:	the argument data, if any
++ *
++ **/
++int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
++{
++	struct request_queue *q;
++	int ret, start = 0;
++	char b[BDEVNAME_SIZE];
++
++	q = bdev_get_queue(bdev);
++	if (!q)
++		return -ENXIO;
++
++	mutex_lock(&bdev->bd_mutex);
++
++	switch (cmd) {
++	case BLKTRACESETUP:
++		bdevname(bdev, b);
++		ret = blk_trace_setup(q, b, bdev->bd_dev, arg);
++		break;
++	case BLKTRACESTART:
++		start = 1;
++	case BLKTRACESTOP:
++		ret = blk_trace_startstop(q, start);
++		break;
++	case BLKTRACETEARDOWN:
++		ret = blk_trace_remove(q);
++		break;
++	default:
++		ret = -ENOTTY;
++		break;
++	}
++
++	mutex_unlock(&bdev->bd_mutex);
++	return ret;
++}
++
++/**
++ * blk_trace_shutdown: - stop and cleanup trace structures
++ * @q:    the request queue associated with the device
++ *
++ **/
++void blk_trace_shutdown(struct request_queue *q)
++{
++	if (q->blk_trace) {
++		blk_trace_startstop(q, 0);
++		blk_trace_remove(q);
++	}
++}
++
++/*
++ * blktrace probes
++ */
++
++/**
++ * blk_add_trace_rq - Add a trace for a request oriented action
++ * @q:		queue the io is for
++ * @rq:		the source request
++ * @what:	the action
++ *
++ * Description:
++ *     Records an action against a request. Will log the bio offset + size.
++ *
++ **/
++static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
++				    u32 what)
++{
++	struct blk_trace *bt = q->blk_trace;
++	int rw = rq->cmd_flags & 0x03;
++
++	if (likely(!bt))
++		return;
++
++	if (blk_discard_rq(rq))
++		rw |= (1 << BIO_RW_DISCARD);
++
++	if (blk_pc_request(rq)) {
++		what |= BLK_TC_ACT(BLK_TC_PC);
++		__blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors,
++				sizeof(rq->cmd), rq->cmd);
++	} else  {
++		what |= BLK_TC_ACT(BLK_TC_FS);
++		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
++				rw, what, rq->errors, 0, NULL);
++	}
++}
++
++static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq)
++{
++	blk_add_trace_rq(q, rq, BLK_TA_ABORT);
++}
++
++static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq)
++{
++	blk_add_trace_rq(q, rq, BLK_TA_INSERT);
++}
++
++static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq)
++{
++	blk_add_trace_rq(q, rq, BLK_TA_ISSUE);
++}
++
++static void blk_add_trace_rq_requeue(struct request_queue *q,
++				     struct request *rq)
++{
++	blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
++}
++
++static void blk_add_trace_rq_complete(struct request_queue *q,
++				      struct request *rq)
++{
++	blk_add_trace_rq(q, rq, BLK_TA_COMPLETE);
++}
++
++/**
++ * blk_add_trace_bio - Add a trace for a bio oriented action
++ * @q:		queue the io is for
++ * @bio:	the source bio
++ * @what:	the action
++ *
++ * Description:
++ *     Records an action against a bio. Will log the bio offset + size.
++ *
++ **/
++static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
++				     u32 what)
++{
++	struct blk_trace *bt = q->blk_trace;
++
++	if (likely(!bt))
++		return;
++
++	__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what,
++			!bio_flagged(bio, BIO_UPTODATE), 0, NULL);
++}
++
++static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio)
++{
++	blk_add_trace_bio(q, bio, BLK_TA_BOUNCE);
++}
++
++static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio)
++{
++	blk_add_trace_bio(q, bio, BLK_TA_COMPLETE);
++}
++
++static void blk_add_trace_bio_backmerge(struct request_queue *q,
++					struct bio *bio)
++{
++	blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE);
++}
++
++static void blk_add_trace_bio_frontmerge(struct request_queue *q,
++					 struct bio *bio)
++{
++	blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE);
++}
++
++static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio)
++{
++	blk_add_trace_bio(q, bio, BLK_TA_QUEUE);
++}
++
++static void blk_add_trace_getrq(struct request_queue *q,
++				struct bio *bio, int rw)
++{
++	if (bio)
++		blk_add_trace_bio(q, bio, BLK_TA_GETRQ);
++	else {
++		struct blk_trace *bt = q->blk_trace;
++
++		if (bt)
++			__blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL);
++	}
++}
++
++
++static void blk_add_trace_sleeprq(struct request_queue *q,
++				  struct bio *bio, int rw)
++{
++	if (bio)
++		blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ);
++	else {
++		struct blk_trace *bt = q->blk_trace;
++
++		if (bt)
++			__blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ,
++					0, 0, NULL);
++	}
++}
++
++static void blk_add_trace_plug(struct request_queue *q)
++{
++	struct blk_trace *bt = q->blk_trace;
++
++	if (bt)
++		__blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL);
++}
++
++static void blk_add_trace_unplug_io(struct request_queue *q)
++{
++	struct blk_trace *bt = q->blk_trace;
++
++	if (bt) {
++		unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
++		__be64 rpdu = cpu_to_be64(pdu);
++
++		__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0,
++				sizeof(rpdu), &rpdu);
++	}
++}
++
++static void blk_add_trace_unplug_timer(struct request_queue *q)
++{
++	struct blk_trace *bt = q->blk_trace;
++
++	if (bt) {
++		unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE];
++		__be64 rpdu = cpu_to_be64(pdu);
++
++		__blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0,
++				sizeof(rpdu), &rpdu);
++	}
++}
++
++static void blk_add_trace_split(struct request_queue *q, struct bio *bio,
++				unsigned int pdu)
++{
++	struct blk_trace *bt = q->blk_trace;
++
++	if (bt) {
++		__be64 rpdu = cpu_to_be64(pdu);
++
++		__blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw,
++				BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE),
++				sizeof(rpdu), &rpdu);
++	}
++}
++
++/**
++ * blk_add_trace_remap - Add a trace for a remap operation
++ * @q:		queue the io is for
++ * @bio:	the source bio
++ * @dev:	target device
++ * @from:	source sector
++ * @to:		target sector
++ *
++ * Description:
++ *     Device mapper or raid target sometimes need to split a bio because
++ *     it spans a stripe (or similar). Add a trace for that action.
++ *
++ **/
++static void blk_add_trace_remap(struct request_queue *q, struct bio *bio,
++				       dev_t dev, sector_t from, sector_t to)
++{
++	struct blk_trace *bt = q->blk_trace;
++	struct blk_io_trace_remap r;
++
++	if (likely(!bt))
++		return;
++
++	r.device = cpu_to_be32(dev);
++	r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev);
++	r.sector = cpu_to_be64(to);
++
++	__blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP,
++			!bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r);
++}
++
++/**
++ * blk_add_driver_data - Add binary message with driver-specific data
++ * @q:		queue the io is for
++ * @rq:		io request
++ * @data:	driver-specific data
++ * @len:	length of driver-specific data
++ *
++ * Description:
++ *     Some drivers might want to write driver-specific data per request.
++ *
++ **/
++void blk_add_driver_data(struct request_queue *q,
++			 struct request *rq,
++			 void *data, size_t len)
++{
++	struct blk_trace *bt = q->blk_trace;
++
++	if (likely(!bt))
++		return;
++
++	if (blk_pc_request(rq))
++		__blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA,
++				rq->errors, len, data);
++	else
++		__blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9,
++				0, BLK_TA_DRV_DATA, rq->errors, len, data);
++}
++EXPORT_SYMBOL_GPL(blk_add_driver_data);
++
++static void blk_register_tracepoints(void)
++{
++	int ret;
++
++	ret = register_trace_block_rq_abort(blk_add_trace_rq_abort);
++	WARN_ON(ret);
++	ret = register_trace_block_rq_insert(blk_add_trace_rq_insert);
++	WARN_ON(ret);
++	ret = register_trace_block_rq_issue(blk_add_trace_rq_issue);
++	WARN_ON(ret);
++	ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue);
++	WARN_ON(ret);
++	ret = register_trace_block_rq_complete(blk_add_trace_rq_complete);
++	WARN_ON(ret);
++	ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce);
++	WARN_ON(ret);
++	ret = register_trace_block_bio_complete(blk_add_trace_bio_complete);
++	WARN_ON(ret);
++	ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
++	WARN_ON(ret);
++	ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
++	WARN_ON(ret);
++	ret = register_trace_block_bio_queue(blk_add_trace_bio_queue);
++	WARN_ON(ret);
++	ret = register_trace_block_getrq(blk_add_trace_getrq);
++	WARN_ON(ret);
++	ret = register_trace_block_sleeprq(blk_add_trace_sleeprq);
++	WARN_ON(ret);
++	ret = register_trace_block_plug(blk_add_trace_plug);
++	WARN_ON(ret);
++	ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer);
++	WARN_ON(ret);
++	ret = register_trace_block_unplug_io(blk_add_trace_unplug_io);
++	WARN_ON(ret);
++	ret = register_trace_block_split(blk_add_trace_split);
++	WARN_ON(ret);
++	ret = register_trace_block_remap(blk_add_trace_remap);
++	WARN_ON(ret);
++}
++
++static void blk_unregister_tracepoints(void)
++{
++	unregister_trace_block_remap(blk_add_trace_remap);
++	unregister_trace_block_split(blk_add_trace_split);
++	unregister_trace_block_unplug_io(blk_add_trace_unplug_io);
++	unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer);
++	unregister_trace_block_plug(blk_add_trace_plug);
++	unregister_trace_block_sleeprq(blk_add_trace_sleeprq);
++	unregister_trace_block_getrq(blk_add_trace_getrq);
++	unregister_trace_block_bio_queue(blk_add_trace_bio_queue);
++	unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge);
++	unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge);
++	unregister_trace_block_bio_complete(blk_add_trace_bio_complete);
++	unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce);
++	unregister_trace_block_rq_complete(blk_add_trace_rq_complete);
++	unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue);
++	unregister_trace_block_rq_issue(blk_add_trace_rq_issue);
++	unregister_trace_block_rq_insert(blk_add_trace_rq_insert);
++	unregister_trace_block_rq_abort(blk_add_trace_rq_abort);
++
++	tracepoint_synchronize_unregister();
++}
++
++/*
++ * struct blk_io_tracer formatting routines
++ */
++
++static void fill_rwbs(char *rwbs, const struct blk_io_trace *t)
++{
++	int i = 0;
++	int tc = t->action >> BLK_TC_SHIFT;
++
++	if (tc & BLK_TC_DISCARD)
++		rwbs[i++] = 'D';
++	else if (tc & BLK_TC_WRITE)
++		rwbs[i++] = 'W';
++	else if (t->bytes)
++		rwbs[i++] = 'R';
++	else
++		rwbs[i++] = 'N';
++
++	if (tc & BLK_TC_AHEAD)
++		rwbs[i++] = 'A';
++	if (tc & BLK_TC_BARRIER)
++		rwbs[i++] = 'B';
++	if (tc & BLK_TC_SYNC)
++		rwbs[i++] = 'S';
++	if (tc & BLK_TC_META)
++		rwbs[i++] = 'M';
++
++	rwbs[i] = '\0';
++}
++
++static inline
++const struct blk_io_trace *te_blk_io_trace(const struct trace_entry *ent)
++{
++	return (const struct blk_io_trace *)ent;
++}
++
++static inline const void *pdu_start(const struct trace_entry *ent)
++{
++	return te_blk_io_trace(ent) + 1;
++}
++
++static inline u32 t_sec(const struct trace_entry *ent)
++{
++	return te_blk_io_trace(ent)->bytes >> 9;
++}
++
++static inline unsigned long long t_sector(const struct trace_entry *ent)
++{
++	return te_blk_io_trace(ent)->sector;
++}
++
++static inline __u16 t_error(const struct trace_entry *ent)
++{
++	return te_blk_io_trace(ent)->error;
++}
++
++static __u64 get_pdu_int(const struct trace_entry *ent)
++{
++	const __u64 *val = pdu_start(ent);
++	return be64_to_cpu(*val);
++}
++
++static void get_pdu_remap(const struct trace_entry *ent,
++			  struct blk_io_trace_remap *r)
++{
++	const struct blk_io_trace_remap *__r = pdu_start(ent);
++	__u64 sector = __r->sector;
++
++	r->device = be32_to_cpu(__r->device);
++	r->device_from = be32_to_cpu(__r->device_from);
++	r->sector = be64_to_cpu(sector);
++}
++
++static int blk_log_action_iter(struct trace_iterator *iter, const char *act)
++{
++	char rwbs[6];
++	unsigned long long ts  = ns2usecs(iter->ts);
++	unsigned long usec_rem = do_div(ts, USEC_PER_SEC);
++	unsigned secs	       = (unsigned long)ts;
++	const struct trace_entry *ent = iter->ent;
++	const struct blk_io_trace *t = (const struct blk_io_trace *)ent;
++
++	fill_rwbs(rwbs, t);
++
++	return trace_seq_printf(&iter->seq,
++				"%3d,%-3d %2d %5d.%06lu %5u %2s %3s ",
++				MAJOR(t->device), MINOR(t->device), iter->cpu,
++				secs, usec_rem, ent->pid, act, rwbs);
++}
++
++static int blk_log_action_seq(struct trace_seq *s, const struct blk_io_trace *t,
++			      const char *act)
++{
++	char rwbs[6];
++	fill_rwbs(rwbs, t);
++	return trace_seq_printf(s, "%3d,%-3d %2s %3s ",
++				MAJOR(t->device), MINOR(t->device), act, rwbs);
++}
++
++static int blk_log_generic(struct trace_seq *s, const struct trace_entry *ent)
++{
++	char cmd[TASK_COMM_LEN];
++
++	trace_find_cmdline(ent->pid, cmd);
++
++	if (t_sec(ent))
++		return trace_seq_printf(s, "%llu + %u [%s]\n",
++					t_sector(ent), t_sec(ent), cmd);
++	return trace_seq_printf(s, "[%s]\n", cmd);
++}
++
++static int blk_log_with_error(struct trace_seq *s,
++			      const struct trace_entry *ent)
++{
++	if (t_sec(ent))
++		return trace_seq_printf(s, "%llu + %u [%d]\n", t_sector(ent),
++					t_sec(ent), t_error(ent));
++	return trace_seq_printf(s, "%llu [%d]\n", t_sector(ent), t_error(ent));
++}
++
++static int blk_log_remap(struct trace_seq *s, const struct trace_entry *ent)
++{
++	struct blk_io_trace_remap r = { .device = 0, };
++
++	get_pdu_remap(ent, &r);
++	return trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
++			       t_sector(ent),
++			       t_sec(ent), MAJOR(r.device), MINOR(r.device),
++			       (unsigned long long)r.sector);
++}
++
++static int blk_log_plug(struct trace_seq *s, const struct trace_entry *ent)
++{
++	char cmd[TASK_COMM_LEN];
++
++	trace_find_cmdline(ent->pid, cmd);
++
++	return trace_seq_printf(s, "[%s]\n", cmd);
++}
++
++static int blk_log_unplug(struct trace_seq *s, const struct trace_entry *ent)
++{
++	char cmd[TASK_COMM_LEN];
++
++	trace_find_cmdline(ent->pid, cmd);
++
++	return trace_seq_printf(s, "[%s] %llu\n", cmd, get_pdu_int(ent));
++}
++
++static int blk_log_split(struct trace_seq *s, const struct trace_entry *ent)
++{
++	char cmd[TASK_COMM_LEN];
++
++	trace_find_cmdline(ent->pid, cmd);
++
++	return trace_seq_printf(s, "%llu / %llu [%s]\n", t_sector(ent),
++				get_pdu_int(ent), cmd);
++}
++
++/*
++ * struct tracer operations
++ */
++
++static void blk_tracer_print_header(struct seq_file *m)
++{
++	if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
++		return;
++	seq_puts(m, "# DEV   CPU TIMESTAMP     PID ACT FLG\n"
++		    "#  |     |     |           |   |   |\n");
++}
++
++static void blk_tracer_start(struct trace_array *tr)
++{
++	if (atomic_add_return(1, &blk_probes_ref) == 1)
++		blk_register_tracepoints();
++	trace_flags &= ~TRACE_ITER_CONTEXT_INFO;
++}
++
++static int blk_tracer_init(struct trace_array *tr)
++{
++	blk_tr = tr;
++	blk_tracer_start(tr);
++	blk_tracer_enabled = true;
++	return 0;
++}
++
++static void blk_tracer_stop(struct trace_array *tr)
++{
++	trace_flags |= TRACE_ITER_CONTEXT_INFO;
++	if (atomic_dec_and_test(&blk_probes_ref))
++		blk_unregister_tracepoints();
++}
++
++static void blk_tracer_reset(struct trace_array *tr)
++{
++	if (!atomic_read(&blk_probes_ref))
++		return;
++
++	blk_tracer_enabled = false;
++	blk_tracer_stop(tr);
++}
++
++static const struct {
++	const char *act[2];
++	int	   (*print)(struct trace_seq *s, const struct trace_entry *ent);
++} what2act[] = {
++	[__BLK_TA_QUEUE]	= {{  "Q", "queue" },	   blk_log_generic },
++	[__BLK_TA_BACKMERGE]	= {{  "M", "backmerge" },  blk_log_generic },
++	[__BLK_TA_FRONTMERGE]	= {{  "F", "frontmerge" }, blk_log_generic },
++	[__BLK_TA_GETRQ]	= {{  "G", "getrq" },	   blk_log_generic },
++	[__BLK_TA_SLEEPRQ]	= {{  "S", "sleeprq" },	   blk_log_generic },
++	[__BLK_TA_REQUEUE]	= {{  "R", "requeue" },	   blk_log_with_error },
++	[__BLK_TA_ISSUE]	= {{  "D", "issue" },	   blk_log_generic },
++	[__BLK_TA_COMPLETE]	= {{  "C", "complete" },   blk_log_with_error },
++	[__BLK_TA_PLUG]		= {{  "P", "plug" },	   blk_log_plug },
++	[__BLK_TA_UNPLUG_IO]	= {{  "U", "unplug_io" },  blk_log_unplug },
++	[__BLK_TA_UNPLUG_TIMER]	= {{ "UT", "unplug_timer" }, blk_log_unplug },
++	[__BLK_TA_INSERT]	= {{  "I", "insert" },	   blk_log_generic },
++	[__BLK_TA_SPLIT]	= {{  "X", "split" },	   blk_log_split },
++	[__BLK_TA_BOUNCE]	= {{  "B", "bounce" },	   blk_log_generic },
++	[__BLK_TA_REMAP]	= {{  "A", "remap" },	   blk_log_remap },
++};
++
++static enum print_line_t blk_trace_event_print(struct trace_iterator *iter,
++					       int flags)
++{
++	struct trace_seq *s = &iter->seq;
++	const struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
++	const u16 what = t->action & ((1 << BLK_TC_SHIFT) - 1);
++	int ret;
++
++	if (!trace_print_context(iter))
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
++		ret = trace_seq_printf(s, "Bad pc action %x\n", what);
++	else {
++		const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
++		ret = blk_log_action_seq(s, t, what2act[what].act[long_act]);
++		if (ret)
++			ret = what2act[what].print(s, iter->ent);
++	}
++
++	return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
++}
++
++static int blk_trace_synthesize_old_trace(struct trace_iterator *iter)
++{
++	struct trace_seq *s = &iter->seq;
++	struct blk_io_trace *t = (struct blk_io_trace *)iter->ent;
++	const int offset = offsetof(struct blk_io_trace, sector);
++	struct blk_io_trace old = {
++		.magic	  = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION,
++		.time     = iter->ts,
++	};
++
++	if (!trace_seq_putmem(s, &old, offset))
++		return 0;
++	return trace_seq_putmem(s, &t->sector,
++				sizeof(old) - offset + t->pdu_len);
++}
++
++static enum print_line_t
++blk_trace_event_print_binary(struct trace_iterator *iter, int flags)
++{
++	return blk_trace_synthesize_old_trace(iter) ?
++			TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
++}
++
++static enum print_line_t blk_tracer_print_line(struct trace_iterator *iter)
++{
++	const struct blk_io_trace *t;
++	u16 what;
++	int ret;
++
++	if (!(blk_tracer_flags.val & TRACE_BLK_OPT_CLASSIC))
++		return TRACE_TYPE_UNHANDLED;
++
++	t = (const struct blk_io_trace *)iter->ent;
++	what = t->action & ((1 << BLK_TC_SHIFT) - 1);
++
++	if (unlikely(what == 0 || what >= ARRAY_SIZE(what2act)))
++		ret = trace_seq_printf(&iter->seq, "Bad pc action %x\n", what);
++	else {
++		const bool long_act = !!(trace_flags & TRACE_ITER_VERBOSE);
++		ret = blk_log_action_iter(iter, what2act[what].act[long_act]);
++		if (ret)
++			ret = what2act[what].print(&iter->seq, iter->ent);
++	}
++
++	return ret ? TRACE_TYPE_HANDLED : TRACE_TYPE_PARTIAL_LINE;
++}
++
++static struct tracer blk_tracer __read_mostly = {
++	.name		= "blk",
++	.init		= blk_tracer_init,
++	.reset		= blk_tracer_reset,
++	.start		= blk_tracer_start,
++	.stop		= blk_tracer_stop,
++	.print_header	= blk_tracer_print_header,
++	.print_line	= blk_tracer_print_line,
++	.flags		= &blk_tracer_flags,
++};
++
++static struct trace_event trace_blk_event = {
++	.type		= TRACE_BLK,
++	.trace		= blk_trace_event_print,
++	.binary		= blk_trace_event_print_binary,
++};
++
++static int __init init_blk_tracer(void)
++{
++	if (!register_ftrace_event(&trace_blk_event)) {
++		pr_warning("Warning: could not register block events\n");
++		return 1;
++	}
++
++	if (register_tracer(&blk_tracer) != 0) {
++		pr_warning("Warning: could not register the block tracer\n");
++		unregister_ftrace_event(&trace_blk_event);
++		return 1;
++	}
++
++	return 0;
++}
++
++device_initcall(init_blk_tracer);
++
++static int blk_trace_remove_queue(struct request_queue *q)
++{
++	struct blk_trace *bt;
++
++	bt = xchg(&q->blk_trace, NULL);
++	if (bt == NULL)
++		return -EINVAL;
++
++	kfree(bt);
++	return 0;
++}
++
++/*
++ * Setup everything required to start tracing
++ */
++static int blk_trace_setup_queue(struct request_queue *q, dev_t dev)
++{
++	struct blk_trace *old_bt, *bt = NULL;
++
++	bt = kzalloc(sizeof(*bt), GFP_KERNEL);
++	if (!bt)
++		return -ENOMEM;
++
++	bt->dev = dev;
++	bt->act_mask = (u16)-1;
++	bt->end_lba = -1ULL;
++
++	old_bt = xchg(&q->blk_trace, bt);
++	if (old_bt != NULL) {
++		(void)xchg(&q->blk_trace, old_bt);
++		kfree(bt);
++		return -EBUSY;
++	}
++
++	return 0;
++}
++
++/*
++ * sysfs interface to enable and configure tracing
++ */
++
++static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
++					 struct device_attribute *attr,
++					 char *buf);
++static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
++					  struct device_attribute *attr,
++					  const char *buf, size_t count);
++#define BLK_TRACE_DEVICE_ATTR(_name) \
++	DEVICE_ATTR(_name, S_IRUGO | S_IWUSR, \
++		    sysfs_blk_trace_attr_show, \
++		    sysfs_blk_trace_attr_store)
++
++static BLK_TRACE_DEVICE_ATTR(enable);
++static BLK_TRACE_DEVICE_ATTR(act_mask);
++static BLK_TRACE_DEVICE_ATTR(pid);
++static BLK_TRACE_DEVICE_ATTR(start_lba);
++static BLK_TRACE_DEVICE_ATTR(end_lba);
++
++static struct attribute *blk_trace_attrs[] = {
++	&dev_attr_enable.attr,
++	&dev_attr_act_mask.attr,
++	&dev_attr_pid.attr,
++	&dev_attr_start_lba.attr,
++	&dev_attr_end_lba.attr,
++	NULL
++};
++
++struct attribute_group blk_trace_attr_group = {
++	.name  = "trace",
++	.attrs = blk_trace_attrs,
++};
++
++static const struct {
++	int mask;
++	const char *str;
++} mask_maps[] = {
++	{ BLK_TC_READ,		"read"		},
++	{ BLK_TC_WRITE,		"write"		},
++	{ BLK_TC_BARRIER,	"barrier"	},
++	{ BLK_TC_SYNC,		"sync"		},
++	{ BLK_TC_QUEUE,		"queue"		},
++	{ BLK_TC_REQUEUE,	"requeue"	},
++	{ BLK_TC_ISSUE,		"issue"		},
++	{ BLK_TC_COMPLETE,	"complete"	},
++	{ BLK_TC_FS,		"fs"		},
++	{ BLK_TC_PC,		"pc"		},
++	{ BLK_TC_AHEAD,		"ahead"		},
++	{ BLK_TC_META,		"meta"		},
++	{ BLK_TC_DISCARD,	"discard"	},
++	{ BLK_TC_DRV_DATA,	"drv_data"	},
++};
++
++static int blk_trace_str2mask(const char *str)
++{
++	int i;
++	int mask = 0;
++	char *s, *token;
++
++	s = kstrdup(str, GFP_KERNEL);
++	if (s == NULL)
++		return -ENOMEM;
++	s = strstrip(s);
++
++	while (1) {
++		token = strsep(&s, ",");
++		if (token == NULL)
++			break;
++
++		if (*token == '\0')
++			continue;
++
++		for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
++			if (strcasecmp(token, mask_maps[i].str) == 0) {
++				mask |= mask_maps[i].mask;
++				break;
++			}
++		}
++		if (i == ARRAY_SIZE(mask_maps)) {
++			mask = -EINVAL;
++			break;
++		}
++	}
++	kfree(s);
++
++	return mask;
++}
++
++static ssize_t blk_trace_mask2str(char *buf, int mask)
++{
++	int i;
++	char *p = buf;
++
++	for (i = 0; i < ARRAY_SIZE(mask_maps); i++) {
++		if (mask & mask_maps[i].mask) {
++			p += sprintf(p, "%s%s",
++				    (p == buf) ? "" : ",", mask_maps[i].str);
++		}
++	}
++	*p++ = '\n';
++
++	return p - buf;
++}
++
++static struct request_queue *blk_trace_get_queue(struct block_device *bdev)
++{
++	if (bdev->bd_disk == NULL)
++		return NULL;
++
++	return bdev_get_queue(bdev);
++}
++
++static ssize_t sysfs_blk_trace_attr_show(struct device *dev,
++					 struct device_attribute *attr,
++					 char *buf)
++{
++	struct hd_struct *p = dev_to_part(dev);
++	struct request_queue *q;
++	struct block_device *bdev;
++	ssize_t ret = -ENXIO;
++
++	lock_kernel();
++	bdev = bdget(part_devt(p));
++	if (bdev == NULL)
++		goto out_unlock_kernel;
++
++	q = blk_trace_get_queue(bdev);
++	if (q == NULL)
++		goto out_bdput;
++
++	mutex_lock(&bdev->bd_mutex);
++
++	if (attr == &dev_attr_enable) {
++		ret = sprintf(buf, "%u\n", !!q->blk_trace);
++		goto out_unlock_bdev;
++	}
++
++	if (q->blk_trace == NULL)
++		ret = sprintf(buf, "disabled\n");
++	else if (attr == &dev_attr_act_mask)
++		ret = blk_trace_mask2str(buf, q->blk_trace->act_mask);
++	else if (attr == &dev_attr_pid)
++		ret = sprintf(buf, "%u\n", q->blk_trace->pid);
++	else if (attr == &dev_attr_start_lba)
++		ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba);
++	else if (attr == &dev_attr_end_lba)
++		ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba);
++
++out_unlock_bdev:
++	mutex_unlock(&bdev->bd_mutex);
++out_bdput:
++	bdput(bdev);
++out_unlock_kernel:
++	unlock_kernel();
++	return ret;
++}
++
++static ssize_t sysfs_blk_trace_attr_store(struct device *dev,
++					  struct device_attribute *attr,
++					  const char *buf, size_t count)
++{
++	struct block_device *bdev;
++	struct request_queue *q;
++	struct hd_struct *p;
++	u64 value;
++	ssize_t ret = -EINVAL;
++
++	if (count == 0)
++		goto out;
++
++	if (attr == &dev_attr_act_mask) {
++		if (sscanf(buf, "%llx", &value) != 1) {
++			/* Assume it is a list of trace category names */
++			ret = blk_trace_str2mask(buf);
++			if (ret < 0)
++				goto out;
++			value = ret;
++		}
++	} else if (sscanf(buf, "%llu", &value) != 1)
++		goto out;
++
++	ret = -ENXIO;
++
++	lock_kernel();
++	p = dev_to_part(dev);
++	bdev = bdget(part_devt(p));
++	if (bdev == NULL)
++		goto out_unlock_kernel;
++
++	q = blk_trace_get_queue(bdev);
++	if (q == NULL)
++		goto out_bdput;
++
++	mutex_lock(&bdev->bd_mutex);
++
++	if (attr == &dev_attr_enable) {
++		if (value)
++			ret = blk_trace_setup_queue(q, bdev->bd_dev);
++		else
++			ret = blk_trace_remove_queue(q);
++		goto out_unlock_bdev;
++	}
++
++	ret = 0;
++	if (q->blk_trace == NULL)
++		ret = blk_trace_setup_queue(q, bdev->bd_dev);
++
++	if (ret == 0) {
++		if (attr == &dev_attr_act_mask)
++			q->blk_trace->act_mask = value;
++		else if (attr == &dev_attr_pid)
++			q->blk_trace->pid = value;
++		else if (attr == &dev_attr_start_lba)
++			q->blk_trace->start_lba = value;
++		else if (attr == &dev_attr_end_lba)
++			q->blk_trace->end_lba = value;
++	}
++
++out_unlock_bdev:
++	mutex_unlock(&bdev->bd_mutex);
++out_bdput:
++	bdput(bdev);
++out_unlock_kernel:
++	unlock_kernel();
++out:
++	return ret ? ret : count;
++}
++
+Index: linux-2.6-tip/kernel/trace/events.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/events.c
+@@ -0,0 +1,14 @@
++/*
++ * This is the place to register all trace points as events.
++ */
++
++#include <linux/stringify.h>
++
++#include <trace/trace_events.h>
++
++#include "trace_output.h"
++
++#include "trace_events_stage_1.h"
++#include "trace_events_stage_2.h"
++#include "trace_events_stage_3.h"
++
+Index: linux-2.6-tip/kernel/trace/ftrace.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/ftrace.c
++++ linux-2.6-tip/kernel/trace/ftrace.c
+@@ -27,10 +27,14 @@
+ #include <linux/sysctl.h>
+ #include <linux/ctype.h>
+ #include <linux/list.h>
++#include <linux/hash.h>
++
++#include <trace/sched.h>
+ 
+ #include <asm/ftrace.h>
+ 
+-#include "trace.h"
++#include "trace_output.h"
++#include "trace_stat.h"
+ 
+ #define FTRACE_WARN_ON(cond)			\
+ 	do {					\
+@@ -44,14 +48,14 @@
+ 			ftrace_kill();		\
+ 	} while (0)
+ 
++/* hash bits for specific function selection */
++#define FTRACE_HASH_BITS 7
++#define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS)
++
+ /* ftrace_enabled is a method to turn ftrace on or off */
+ int ftrace_enabled __read_mostly;
+ static int last_ftrace_enabled;
+ 
+-/* set when tracing only a pid */
+-struct pid *ftrace_pid_trace;
+-static struct pid * const ftrace_swapper_pid = &init_struct_pid;
+-
+ /* Quick disabling of function tracer. */
+ int function_trace_stop;
+ 
+@@ -61,9 +65,7 @@ int function_trace_stop;
+  */
+ static int ftrace_disabled __read_mostly;
+ 
+-static DEFINE_SPINLOCK(ftrace_lock);
+-static DEFINE_MUTEX(ftrace_sysctl_lock);
+-static DEFINE_MUTEX(ftrace_start_lock);
++static DEFINE_MUTEX(ftrace_lock);
+ 
+ static struct ftrace_ops ftrace_list_end __read_mostly =
+ {
+@@ -134,9 +136,6 @@ static void ftrace_test_stop_func(unsign
+ 
+ static int __register_ftrace_function(struct ftrace_ops *ops)
+ {
+-	/* should not be called from interrupt context */
+-	spin_lock(&ftrace_lock);
+-
+ 	ops->next = ftrace_list;
+ 	/*
+ 	 * We are entering ops into the ftrace_list but another
+@@ -172,18 +171,12 @@ static int __register_ftrace_function(st
+ #endif
+ 	}
+ 
+-	spin_unlock(&ftrace_lock);
+-
+ 	return 0;
+ }
+ 
+ static int __unregister_ftrace_function(struct ftrace_ops *ops)
+ {
+ 	struct ftrace_ops **p;
+-	int ret = 0;
+-
+-	/* should not be called from interrupt context */
+-	spin_lock(&ftrace_lock);
+ 
+ 	/*
+ 	 * If we are removing the last function, then simply point
+@@ -192,17 +185,15 @@ static int __unregister_ftrace_function(
+ 	if (ftrace_list == ops && ops->next == &ftrace_list_end) {
+ 		ftrace_trace_function = ftrace_stub;
+ 		ftrace_list = &ftrace_list_end;
+-		goto out;
++		return 0;
+ 	}
+ 
+ 	for (p = &ftrace_list; *p != &ftrace_list_end; p = &(*p)->next)
+ 		if (*p == ops)
+ 			break;
+ 
+-	if (*p != ops) {
+-		ret = -1;
+-		goto out;
+-	}
++	if (*p != ops)
++		return -1;
+ 
+ 	*p = (*p)->next;
+ 
+@@ -223,21 +214,15 @@ static int __unregister_ftrace_function(
+ 		}
+ 	}
+ 
+- out:
+-	spin_unlock(&ftrace_lock);
+-
+-	return ret;
++	return 0;
+ }
+ 
+ static void ftrace_update_pid_func(void)
+ {
+ 	ftrace_func_t func;
+ 
+-	/* should not be called from interrupt context */
+-	spin_lock(&ftrace_lock);
+-
+ 	if (ftrace_trace_function == ftrace_stub)
+-		goto out;
++		return;
+ 
+ 	func = ftrace_trace_function;
+ 
+@@ -254,498 +239,1103 @@ static void ftrace_update_pid_func(void)
+ #else
+ 	__ftrace_trace_function = func;
+ #endif
+-
+- out:
+-	spin_unlock(&ftrace_lock);
+ }
+ 
+-#ifdef CONFIG_DYNAMIC_FTRACE
+-#ifndef CONFIG_FTRACE_MCOUNT_RECORD
+-# error Dynamic ftrace depends on MCOUNT_RECORD
++#ifdef CONFIG_FUNCTION_PROFILER
++struct ftrace_profile {
++	struct hlist_node		node;
++	unsigned long			ip;
++	unsigned long			counter;
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	unsigned long long		time;
+ #endif
+-
+-/*
+- * Since MCOUNT_ADDR may point to mcount itself, we do not want
+- * to get it confused by reading a reference in the code as we
+- * are parsing on objcopy output of text. Use a variable for
+- * it instead.
+- */
+-static unsigned long mcount_addr = MCOUNT_ADDR;
+-
+-enum {
+-	FTRACE_ENABLE_CALLS		= (1 << 0),
+-	FTRACE_DISABLE_CALLS		= (1 << 1),
+-	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
+-	FTRACE_ENABLE_MCOUNT		= (1 << 3),
+-	FTRACE_DISABLE_MCOUNT		= (1 << 4),
+-	FTRACE_START_FUNC_RET		= (1 << 5),
+-	FTRACE_STOP_FUNC_RET		= (1 << 6),
+ };
+ 
+-static int ftrace_filtered;
+-
+-static LIST_HEAD(ftrace_new_addrs);
+-
+-static DEFINE_MUTEX(ftrace_regex_lock);
++struct ftrace_profile_page {
++	struct ftrace_profile_page	*next;
++	unsigned long			index;
++	struct ftrace_profile		records[];
++};
+ 
+-struct ftrace_page {
+-	struct ftrace_page	*next;
+-	unsigned long		index;
+-	struct dyn_ftrace	records[];
++struct ftrace_profile_stat {
++	atomic_t			disabled;
++	struct hlist_head		*hash;
++	struct ftrace_profile_page	*pages;
++	struct ftrace_profile_page	*start;
++	struct tracer_stat		stat;
+ };
+ 
+-#define ENTRIES_PER_PAGE \
+-  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
++#define PROFILE_RECORDS_SIZE						\
++	(PAGE_SIZE - offsetof(struct ftrace_profile_page, records))
+ 
+-/* estimate from running different kernels */
+-#define NR_TO_INIT		10000
++#define PROFILES_PER_PAGE					\
++	(PROFILE_RECORDS_SIZE / sizeof(struct ftrace_profile))
+ 
+-static struct ftrace_page	*ftrace_pages_start;
+-static struct ftrace_page	*ftrace_pages;
++static int ftrace_profile_bits;
++static int ftrace_profile_enabled;
++static DEFINE_MUTEX(ftrace_profile_lock);
+ 
+-static struct dyn_ftrace *ftrace_free_records;
++static DEFINE_PER_CPU(struct ftrace_profile_stat, ftrace_profile_stats);
+ 
++#define FTRACE_PROFILE_HASH_SIZE 1024 /* must be power of 2 */
+ 
+-#ifdef CONFIG_KPROBES
++static void *
++function_stat_next(void *v, int idx)
++{
++	struct ftrace_profile *rec = v;
++	struct ftrace_profile_page *pg;
+ 
+-static int frozen_record_count;
++	pg = (struct ftrace_profile_page *)((unsigned long)rec & PAGE_MASK);
+ 
+-static inline void freeze_record(struct dyn_ftrace *rec)
+-{
+-	if (!(rec->flags & FTRACE_FL_FROZEN)) {
+-		rec->flags |= FTRACE_FL_FROZEN;
+-		frozen_record_count++;
++ again:
++	rec++;
++	if ((void *)rec >= (void *)&pg->records[pg->index]) {
++		pg = pg->next;
++		if (!pg)
++			return NULL;
++		rec = &pg->records[0];
++		if (!rec->counter)
++			goto again;
+ 	}
++
++	return rec;
+ }
+ 
+-static inline void unfreeze_record(struct dyn_ftrace *rec)
++static void *function_stat_start(struct tracer_stat *trace)
+ {
+-	if (rec->flags & FTRACE_FL_FROZEN) {
+-		rec->flags &= ~FTRACE_FL_FROZEN;
+-		frozen_record_count--;
+-	}
++	struct ftrace_profile_stat *stat =
++		container_of(trace, struct ftrace_profile_stat, stat);
++
++	if (!stat || !stat->start)
++		return NULL;
++
++	return function_stat_next(&stat->start->records[0], 0);
+ }
+ 
+-static inline int record_frozen(struct dyn_ftrace *rec)
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++/* function graph compares on total time */
++static int function_stat_cmp(void *p1, void *p2)
+ {
+-	return rec->flags & FTRACE_FL_FROZEN;
++	struct ftrace_profile *a = p1;
++	struct ftrace_profile *b = p2;
++
++	if (a->time < b->time)
++		return -1;
++	if (a->time > b->time)
++		return 1;
++	else
++		return 0;
+ }
+ #else
+-# define freeze_record(rec)			({ 0; })
+-# define unfreeze_record(rec)			({ 0; })
+-# define record_frozen(rec)			({ 0; })
+-#endif /* CONFIG_KPROBES */
++/* not function graph compares against hits */
++static int function_stat_cmp(void *p1, void *p2)
++{
++	struct ftrace_profile *a = p1;
++	struct ftrace_profile *b = p2;
+ 
+-static void ftrace_free_rec(struct dyn_ftrace *rec)
++	if (a->counter < b->counter)
++		return -1;
++	if (a->counter > b->counter)
++		return 1;
++	else
++		return 0;
++}
++#endif
++
++static int function_stat_headers(struct seq_file *m)
+ {
+-	rec->ip = (unsigned long)ftrace_free_records;
+-	ftrace_free_records = rec;
+-	rec->flags |= FTRACE_FL_FREE;
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	seq_printf(m, "  Function                               "
++		   "Hit    Time            Avg\n"
++		      "  --------                               "
++		   "---    ----            ---\n");
++#else
++	seq_printf(m, "  Function                               Hit\n"
++		      "  --------                               ---\n");
++#endif
++	return 0;
+ }
+ 
+-void ftrace_release(void *start, unsigned long size)
++static int function_stat_show(struct seq_file *m, void *v)
+ {
+-	struct dyn_ftrace *rec;
+-	struct ftrace_page *pg;
+-	unsigned long s = (unsigned long)start;
+-	unsigned long e = s + size;
+-	int i;
++	struct ftrace_profile *rec = v;
++	char str[KSYM_SYMBOL_LEN];
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	static DEFINE_MUTEX(mutex);
++	static struct trace_seq s;
++	unsigned long long avg;
++#endif
+ 
+-	if (ftrace_disabled || !start)
+-		return;
++	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
++	seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);
+ 
+-	/* should not be called from interrupt context */
+-	spin_lock(&ftrace_lock);
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++	seq_printf(m, "    ");
++	avg = rec->time;
++	if (rec->counter)
++		do_div(avg, rec->counter);
++
++	mutex_lock(&mutex);
++	trace_seq_init(&s);
++	trace_print_graph_duration(rec->time, &s);
++	trace_seq_puts(&s, "    ");
++	trace_print_graph_duration(avg, &s);
++	trace_print_seq(m, &s);
++	mutex_unlock(&mutex);
++#endif
++	seq_putc(m, '\n');
+ 
+-	for (pg = ftrace_pages_start; pg; pg = pg->next) {
+-		for (i = 0; i < pg->index; i++) {
+-			rec = &pg->records[i];
++	return 0;
++}
+ 
+-			if ((rec->ip >= s) && (rec->ip < e))
+-				ftrace_free_rec(rec);
+-		}
++static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
++{
++	struct ftrace_profile_page *pg;
++
++	pg = stat->pages = stat->start;
++
++	while (pg) {
++		memset(pg->records, 0, PROFILE_RECORDS_SIZE);
++		pg->index = 0;
++		pg = pg->next;
+ 	}
+-	spin_unlock(&ftrace_lock);
++
++	memset(stat->hash, 0,
++	       FTRACE_PROFILE_HASH_SIZE * sizeof(struct hlist_head));
+ }
+ 
+-static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
++int ftrace_profile_pages_init(struct ftrace_profile_stat *stat)
+ {
+-	struct dyn_ftrace *rec;
++	struct ftrace_profile_page *pg;
++	int functions;
++	int pages;
++	int i;
+ 
+-	/* First check for freed records */
+-	if (ftrace_free_records) {
+-		rec = ftrace_free_records;
++	/* If we already allocated, do nothing */
++	if (stat->pages)
++		return 0;
+ 
+-		if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
+-			FTRACE_WARN_ON_ONCE(1);
+-			ftrace_free_records = NULL;
+-			return NULL;
+-		}
++	stat->pages = (void *)get_zeroed_page(GFP_KERNEL);
++	if (!stat->pages)
++		return -ENOMEM;
+ 
+-		ftrace_free_records = (void *)rec->ip;
+-		memset(rec, 0, sizeof(*rec));
+-		return rec;
++#ifdef CONFIG_DYNAMIC_FTRACE
++	functions = ftrace_update_tot_cnt;
++#else
++	/*
++	 * We do not know the number of functions that exist because
++	 * dynamic tracing is what counts them. With past experience
++	 * we have around 20K functions. That should be more than enough.
++	 * It is highly unlikely we will execute every function in
++	 * the kernel.
++	 */
++	functions = 20000;
++#endif
++
++	pg = stat->start = stat->pages;
++
++	pages = DIV_ROUND_UP(functions, PROFILES_PER_PAGE);
++
++	for (i = 0; i < pages; i++) {
++		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
++		if (!pg->next)
++			goto out_free;
++		pg = pg->next;
+ 	}
+ 
+-	if (ftrace_pages->index == ENTRIES_PER_PAGE) {
+-		if (!ftrace_pages->next) {
+-			/* allocate another page */
+-			ftrace_pages->next =
+-				(void *)get_zeroed_page(GFP_KERNEL);
+-			if (!ftrace_pages->next)
+-				return NULL;
+-		}
+-		ftrace_pages = ftrace_pages->next;
++	return 0;
++
++ out_free:
++	pg = stat->start;
++	while (pg) {
++		unsigned long tmp = (unsigned long)pg;
++
++		pg = pg->next;
++		free_page(tmp);
+ 	}
+ 
+-	return &ftrace_pages->records[ftrace_pages->index++];
++	free_page((unsigned long)stat->pages);
++	stat->pages = NULL;
++	stat->start = NULL;
++
++	return -ENOMEM;
+ }
+ 
+-static struct dyn_ftrace *
+-ftrace_record_ip(unsigned long ip)
++static int ftrace_profile_init_cpu(int cpu)
+ {
+-	struct dyn_ftrace *rec;
++	struct ftrace_profile_stat *stat;
++	int size;
+ 
+-	if (ftrace_disabled)
+-		return NULL;
++	stat = &per_cpu(ftrace_profile_stats, cpu);
+ 
+-	rec = ftrace_alloc_dyn_node(ip);
+-	if (!rec)
+-		return NULL;
++	if (stat->hash) {
++		/* If the profile is already created, simply reset it */
++		ftrace_profile_reset(stat);
++		return 0;
++	}
+ 
+-	rec->ip = ip;
++	/*
++	 * We are profiling all functions, but usually only a few thousand
++	 * functions are hit. We'll make a hash of 1024 items.
++	 */
++	size = FTRACE_PROFILE_HASH_SIZE;
+ 
+-	list_add(&rec->list, &ftrace_new_addrs);
++	stat->hash = kzalloc(sizeof(struct hlist_head) * size, GFP_KERNEL);
+ 
+-	return rec;
+-}
++	if (!stat->hash)
++		return -ENOMEM;
+ 
+-static void print_ip_ins(const char *fmt, unsigned char *p)
+-{
+-	int i;
++	if (!ftrace_profile_bits) {
++		size--;
+ 
+-	printk(KERN_CONT "%s", fmt);
++		for (; size; size >>= 1)
++			ftrace_profile_bits++;
++	}
+ 
+-	for (i = 0; i < MCOUNT_INSN_SIZE; i++)
+-		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
++	/* Preallocate the function profiling pages */
++	if (ftrace_profile_pages_init(stat) < 0) {
++		kfree(stat->hash);
++		stat->hash = NULL;
++		return -ENOMEM;
++	}
++
++	return 0;
+ }
+ 
+-static void ftrace_bug(int failed, unsigned long ip)
++static int ftrace_profile_init(void)
+ {
+-	switch (failed) {
+-	case -EFAULT:
+-		FTRACE_WARN_ON_ONCE(1);
+-		pr_info("ftrace faulted on modifying ");
+-		print_ip_sym(ip);
+-		break;
+-	case -EINVAL:
+-		FTRACE_WARN_ON_ONCE(1);
+-		pr_info("ftrace failed to modify ");
+-		print_ip_sym(ip);
+-		print_ip_ins(" actual: ", (unsigned char *)ip);
+-		printk(KERN_CONT "\n");
+-		break;
+-	case -EPERM:
+-		FTRACE_WARN_ON_ONCE(1);
+-		pr_info("ftrace faulted on writing ");
+-		print_ip_sym(ip);
+-		break;
+-	default:
+-		FTRACE_WARN_ON_ONCE(1);
+-		pr_info("ftrace faulted on unknown error ");
+-		print_ip_sym(ip);
++	int cpu;
++	int ret = 0;
++
++	for_each_online_cpu(cpu) {
++		ret = ftrace_profile_init_cpu(cpu);
++		if (ret)
++			break;
+ 	}
++
++	return ret;
+ }
+ 
++/* interrupts must be disabled */
++static struct ftrace_profile *
++ftrace_find_profiled_func(struct ftrace_profile_stat *stat, unsigned long ip)
++{
++	struct ftrace_profile *rec;
++	struct hlist_head *hhd;
++	struct hlist_node *n;
++	unsigned long key;
+ 
+-static int
+-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
++	key = hash_long(ip, ftrace_profile_bits);
++	hhd = &stat->hash[key];
++
++	if (hlist_empty(hhd))
++		return NULL;
++
++	hlist_for_each_entry_rcu(rec, n, hhd, node) {
++		if (rec->ip == ip)
++			return rec;
++	}
++
++	return NULL;
++}
++
++static void ftrace_add_profile(struct ftrace_profile_stat *stat,
++			       struct ftrace_profile *rec)
+ {
+-	unsigned long ip, fl;
+-	unsigned long ftrace_addr;
++	unsigned long key;
+ 
+-	ftrace_addr = (unsigned long)ftrace_caller;
++	key = hash_long(rec->ip, ftrace_profile_bits);
++	hlist_add_head_rcu(&rec->node, &stat->hash[key]);
++}
+ 
+-	ip = rec->ip;
++/*
++ * The memory is already allocated, this simply finds a new record to use.
++ */
++static struct ftrace_profile *
++ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
++{
++	struct ftrace_profile *rec = NULL;
++
++	/* prevent recursion (from NMIs) */
++	if (atomic_inc_return(&stat->disabled) != 1)
++		goto out;
+ 
+ 	/*
+-	 * If this record is not to be traced and
+-	 * it is not enabled then do nothing.
+-	 *
+-	 * If this record is not to be traced and
+-	 * it is enabled then disabled it.
+-	 *
++	 * Try to find the function again since an NMI
++	 * could have added it
+ 	 */
+-	if (rec->flags & FTRACE_FL_NOTRACE) {
+-		if (rec->flags & FTRACE_FL_ENABLED)
+-			rec->flags &= ~FTRACE_FL_ENABLED;
+-		else
+-			return 0;
++	rec = ftrace_find_profiled_func(stat, ip);
++	if (rec)
++		goto out;
+ 
+-	} else if (ftrace_filtered && enable) {
+-		/*
+-		 * Filtering is on:
+-		 */
+-
+-		fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
++	if (stat->pages->index == PROFILES_PER_PAGE) {
++		if (!stat->pages->next)
++			goto out;
++		stat->pages = stat->pages->next;
++	}
+ 
+-		/* Record is filtered and enabled, do nothing */
+-		if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
+-			return 0;
++	rec = &stat->pages->records[stat->pages->index++];
++	rec->ip = ip;
++	ftrace_add_profile(stat, rec);
+ 
+-		/* Record is not filtered and is not enabled do nothing */
+-		if (!fl)
+-			return 0;
++ out:
++	atomic_dec(&stat->disabled);
+ 
+-		/* Record is not filtered but enabled, disable it */
+-		if (fl == FTRACE_FL_ENABLED)
+-			rec->flags &= ~FTRACE_FL_ENABLED;
+-		else
+-		/* Otherwise record is filtered but not enabled, enable it */
+-			rec->flags |= FTRACE_FL_ENABLED;
+-	} else {
+-		/* Disable or not filtered */
++	return rec;
++}
+ 
+-		if (enable) {
+-			/* if record is enabled, do nothing */
+-			if (rec->flags & FTRACE_FL_ENABLED)
+-				return 0;
++static void
++function_profile_call(unsigned long ip, unsigned long parent_ip)
++{
++	struct ftrace_profile_stat *stat;
++	struct ftrace_profile *rec;
++	unsigned long flags;
+ 
+-			rec->flags |= FTRACE_FL_ENABLED;
++	if (!ftrace_profile_enabled)
++		return;
+ 
+-		} else {
++	local_irq_save(flags);
+ 
+-			/* if record is not enabled do nothing */
+-			if (!(rec->flags & FTRACE_FL_ENABLED))
+-				return 0;
++	stat = &__get_cpu_var(ftrace_profile_stats);
++	if (!stat->hash || !ftrace_profile_enabled)
++		goto out;
+ 
+-			rec->flags &= ~FTRACE_FL_ENABLED;
+-		}
++	rec = ftrace_find_profiled_func(stat, ip);
++	if (!rec) {
++		rec = ftrace_profile_alloc(stat, ip);
++		if (!rec)
++			goto out;
+ 	}
+ 
+-	if (rec->flags & FTRACE_FL_ENABLED)
+-		return ftrace_make_call(rec, ftrace_addr);
+-	else
+-		return ftrace_make_nop(NULL, rec, ftrace_addr);
++	rec->counter++;
++ out:
++	local_irq_restore(flags);
+ }
+ 
+-static void ftrace_replace_code(int enable)
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++static int profile_graph_entry(struct ftrace_graph_ent *trace)
+ {
+-	int i, failed;
+-	struct dyn_ftrace *rec;
+-	struct ftrace_page *pg;
+-
+-	for (pg = ftrace_pages_start; pg; pg = pg->next) {
+-		for (i = 0; i < pg->index; i++) {
+-			rec = &pg->records[i];
+-
+-			/*
+-			 * Skip over free records and records that have
+-			 * failed.
+-			 */
+-			if (rec->flags & FTRACE_FL_FREE ||
+-			    rec->flags & FTRACE_FL_FAILED)
+-				continue;
+-
+-			/* ignore updates to this record's mcount site */
+-			if (get_kprobe((void *)rec->ip)) {
+-				freeze_record(rec);
+-				continue;
+-			} else {
+-				unfreeze_record(rec);
+-			}
+-
+-			failed = __ftrace_replace_code(rec, enable);
+-			if (failed && (rec->flags & FTRACE_FL_CONVERTED)) {
+-				rec->flags |= FTRACE_FL_FAILED;
+-				if ((system_state == SYSTEM_BOOTING) ||
+-				    !core_kernel_text(rec->ip)) {
+-					ftrace_free_rec(rec);
+-				} else
+-					ftrace_bug(failed, rec->ip);
+-			}
+-		}
+-	}
++	function_profile_call(trace->func, 0);
++	return 1;
+ }
+ 
+-static int
+-ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
++static void profile_graph_return(struct ftrace_graph_ret *trace)
+ {
+-	unsigned long ip;
+-	int ret;
++	struct ftrace_profile_stat *stat;
++	unsigned long long calltime;
++	struct ftrace_profile *rec;
++	unsigned long flags;
+ 
+-	ip = rec->ip;
++	local_irq_save(flags);
++	stat = &__get_cpu_var(ftrace_profile_stats);
++	if (!stat->hash || !ftrace_profile_enabled)
++		goto out;
+ 
+-	ret = ftrace_make_nop(mod, rec, mcount_addr);
+-	if (ret) {
+-		ftrace_bug(ret, ip);
+-		rec->flags |= FTRACE_FL_FAILED;
+-		return 0;
+-	}
+-	return 1;
+-}
++	calltime = trace->rettime - trace->calltime;
+ 
+-static int __ftrace_modify_code(void *data)
+-{
+-	int *command = data;
++	if (!(trace_flags & TRACE_ITER_GRAPH_TIME)) {
++		int index;
+ 
+-	if (*command & FTRACE_ENABLE_CALLS)
+-		ftrace_replace_code(1);
+-	else if (*command & FTRACE_DISABLE_CALLS)
+-		ftrace_replace_code(0);
++		index = trace->depth;
+ 
+-	if (*command & FTRACE_UPDATE_TRACE_FUNC)
+-		ftrace_update_ftrace_func(ftrace_trace_function);
++		/* Append this call time to the parent time to subtract */
++		if (index)
++			current->ret_stack[index - 1].subtime += calltime;
+ 
+-	if (*command & FTRACE_START_FUNC_RET)
+-		ftrace_enable_ftrace_graph_caller();
+-	else if (*command & FTRACE_STOP_FUNC_RET)
+-		ftrace_disable_ftrace_graph_caller();
++		if (current->ret_stack[index].subtime < calltime)
++			calltime -= current->ret_stack[index].subtime;
++		else
++			calltime = 0;
++	}
+ 
+-	return 0;
++	rec = ftrace_find_profiled_func(stat, trace->func);
++	if (rec)
++		rec->time += calltime;
++
++ out:
++	local_irq_restore(flags);
+ }
+ 
+-static void ftrace_run_update_code(int command)
++static int register_ftrace_profiler(void)
+ {
+-	stop_machine(__ftrace_modify_code, &command, NULL);
++	return register_ftrace_graph(&profile_graph_return,
++				     &profile_graph_entry);
+ }
+ 
+-static ftrace_func_t saved_ftrace_func;
+-static int ftrace_start_up;
+-
+-static void ftrace_startup_enable(int command)
++static void unregister_ftrace_profiler(void)
+ {
+-	if (saved_ftrace_func != ftrace_trace_function) {
+-		saved_ftrace_func = ftrace_trace_function;
+-		command |= FTRACE_UPDATE_TRACE_FUNC;
+-	}
++	unregister_ftrace_graph();
++}
++#else
++static struct ftrace_ops ftrace_profile_ops __read_mostly =
++{
++	.func = function_profile_call,
++};
+ 
+-	if (!command || !ftrace_enabled)
+-		return;
++static int register_ftrace_profiler(void)
++{
++	return register_ftrace_function(&ftrace_profile_ops);
++}
+ 
+-	ftrace_run_update_code(command);
++static void unregister_ftrace_profiler(void)
++{
++	unregister_ftrace_function(&ftrace_profile_ops);
+ }
++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ 
+-static void ftrace_startup(int command)
++static ssize_t
++ftrace_profile_write(struct file *filp, const char __user *ubuf,
++		     size_t cnt, loff_t *ppos)
+ {
+-	if (unlikely(ftrace_disabled))
+-		return;
++	unsigned long val;
++	char buf[64];
++	int ret;
+ 
+-	mutex_lock(&ftrace_start_lock);
+-	ftrace_start_up++;
+-	command |= FTRACE_ENABLE_CALLS;
++	if (cnt >= sizeof(buf))
++		return -EINVAL;
+ 
+-	ftrace_startup_enable(command);
++	if (copy_from_user(&buf, ubuf, cnt))
++		return -EFAULT;
+ 
+-	mutex_unlock(&ftrace_start_lock);
+-}
++	buf[cnt] = 0;
+ 
+-static void ftrace_shutdown(int command)
+-{
+-	if (unlikely(ftrace_disabled))
+-		return;
++	ret = strict_strtoul(buf, 10, &val);
++	if (ret < 0)
++		return ret;
+ 
+-	mutex_lock(&ftrace_start_lock);
+-	ftrace_start_up--;
+-	if (!ftrace_start_up)
+-		command |= FTRACE_DISABLE_CALLS;
++	val = !!val;
+ 
+-	if (saved_ftrace_func != ftrace_trace_function) {
+-		saved_ftrace_func = ftrace_trace_function;
+-		command |= FTRACE_UPDATE_TRACE_FUNC;
++	mutex_lock(&ftrace_profile_lock);
++	if (ftrace_profile_enabled ^ val) {
++		if (val) {
++			ret = ftrace_profile_init();
++			if (ret < 0) {
++				cnt = ret;
++				goto out;
++			}
++
++			ret = register_ftrace_profiler();
++			if (ret < 0) {
++				cnt = ret;
++				goto out;
++			}
++			ftrace_profile_enabled = 1;
++		} else {
++			ftrace_profile_enabled = 0;
++			/*
++			 * unregister_ftrace_profiler calls stop_machine
++			 * so this acts like an synchronize_sched.
++			 */
++			unregister_ftrace_profiler();
++		}
+ 	}
++ out:
++	mutex_unlock(&ftrace_profile_lock);
+ 
+-	if (!command || !ftrace_enabled)
+-		goto out;
++	filp->f_pos += cnt;
+ 
+-	ftrace_run_update_code(command);
+- out:
+-	mutex_unlock(&ftrace_start_lock);
++	return cnt;
+ }
+ 
+-static void ftrace_startup_sysctl(void)
++static ssize_t
++ftrace_profile_read(struct file *filp, char __user *ubuf,
++		     size_t cnt, loff_t *ppos)
+ {
+-	int command = FTRACE_ENABLE_MCOUNT;
++	char buf[64];
++	int r;
+ 
+-	if (unlikely(ftrace_disabled))
+-		return;
++	r = sprintf(buf, "%u\n", ftrace_profile_enabled);
++	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
++}
+ 
+-	mutex_lock(&ftrace_start_lock);
+-	/* Force update next time */
+-	saved_ftrace_func = NULL;
+-	/* ftrace_start_up is true if we want ftrace running */
+-	if (ftrace_start_up)
+-		command |= FTRACE_ENABLE_CALLS;
++static const struct file_operations ftrace_profile_fops = {
++	.open		= tracing_open_generic,
++	.read		= ftrace_profile_read,
++	.write		= ftrace_profile_write,
++};
+ 
+-	ftrace_run_update_code(command);
+-	mutex_unlock(&ftrace_start_lock);
+-}
++/* used to initialize the real stat files */
++static struct tracer_stat function_stats __initdata = {
++	.name = "functions",
++	.stat_start = function_stat_start,
++	.stat_next = function_stat_next,
++	.stat_cmp = function_stat_cmp,
++	.stat_headers = function_stat_headers,
++	.stat_show = function_stat_show
++};
+ 
+-static void ftrace_shutdown_sysctl(void)
++static void ftrace_profile_debugfs(struct dentry *d_tracer)
+ {
+-	int command = FTRACE_DISABLE_MCOUNT;
++	struct ftrace_profile_stat *stat;
++	struct dentry *entry;
++	char *name;
++	int ret;
++	int cpu;
+ 
+-	if (unlikely(ftrace_disabled))
+-		return;
++	for_each_possible_cpu(cpu) {
++		stat = &per_cpu(ftrace_profile_stats, cpu);
+ 
+-	mutex_lock(&ftrace_start_lock);
+-	/* ftrace_start_up is true if ftrace is running */
+-	if (ftrace_start_up)
+-		command |= FTRACE_DISABLE_CALLS;
++		/* allocate enough for function name + cpu number */
++		name = kmalloc(32, GFP_KERNEL);
++		if (!name) {
++			/*
++			 * The files created are permanent, if something happens
++			 * we still do not free memory.
++			 */
++			kfree(stat);
++			WARN(1,
++			     "Could not allocate stat file for cpu %d\n",
++			     cpu);
++			return;
++		}
++		stat->stat = function_stats;
++		snprintf(name, 32, "function%d", cpu);
++		stat->stat.name = name;
++		ret = register_stat_tracer(&stat->stat);
++		if (ret) {
++			WARN(1,
++			     "Could not register function stat for cpu %d\n",
++			     cpu);
++			kfree(name);
++			return;
++		}
++	}
+ 
+-	ftrace_run_update_code(command);
+-	mutex_unlock(&ftrace_start_lock);
++	entry = debugfs_create_file("function_profile_enabled", 0644,
++				    d_tracer, NULL, &ftrace_profile_fops);
++	if (!entry)
++		pr_warning("Could not create debugfs "
++			   "'function_profile_enabled' entry\n");
+ }
+ 
+-static cycle_t		ftrace_update_time;
+-static unsigned long	ftrace_update_cnt;
+-unsigned long		ftrace_update_tot_cnt;
+-
+-static int ftrace_update_code(struct module *mod)
++#else /* CONFIG_FUNCTION_PROFILER */
++static void ftrace_profile_debugfs(struct dentry *d_tracer)
+ {
+-	struct dyn_ftrace *p, *t;
+-	cycle_t start, stop;
++}
++#endif /* CONFIG_FUNCTION_PROFILER */
+ 
+-	start = ftrace_now(raw_smp_processor_id());
+-	ftrace_update_cnt = 0;
++/* set when tracing only a pid */
++struct pid *ftrace_pid_trace;
++static struct pid * const ftrace_swapper_pid = &init_struct_pid;
+ 
+-	list_for_each_entry_safe(p, t, &ftrace_new_addrs, list) {
++#ifdef CONFIG_DYNAMIC_FTRACE
+ 
+-		/* If something went wrong, bail without enabling anything */
+-		if (unlikely(ftrace_disabled))
+-			return -1;
++#ifndef CONFIG_FTRACE_MCOUNT_RECORD
++# error Dynamic ftrace depends on MCOUNT_RECORD
++#endif
+ 
+-		list_del_init(&p->list);
++static struct hlist_head ftrace_func_hash[FTRACE_FUNC_HASHSIZE] __read_mostly;
+ 
+-		/* convert record (i.e, patch mcount-call with NOP) */
+-		if (ftrace_code_disable(mod, p)) {
+-			p->flags |= FTRACE_FL_CONVERTED;
+-			ftrace_update_cnt++;
+-		} else
+-			ftrace_free_rec(p);
+-	}
++struct ftrace_func_probe {
++	struct hlist_node	node;
++	struct ftrace_probe_ops	*ops;
++	unsigned long		flags;
++	unsigned long		ip;
++	void			*data;
++	struct rcu_head		rcu;
++};
+ 
+-	stop = ftrace_now(raw_smp_processor_id());
+-	ftrace_update_time = stop - start;
+-	ftrace_update_tot_cnt += ftrace_update_cnt;
++enum {
++	FTRACE_ENABLE_CALLS		= (1 << 0),
++	FTRACE_DISABLE_CALLS		= (1 << 1),
++	FTRACE_UPDATE_TRACE_FUNC	= (1 << 2),
++	FTRACE_ENABLE_MCOUNT		= (1 << 3),
++	FTRACE_DISABLE_MCOUNT		= (1 << 4),
++	FTRACE_START_FUNC_RET		= (1 << 5),
++	FTRACE_STOP_FUNC_RET		= (1 << 6),
++};
+ 
+-	return 0;
+-}
++static int ftrace_filtered;
+ 
+-static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
+-{
+-	struct ftrace_page *pg;
+-	int cnt;
+-	int i;
++static struct dyn_ftrace *ftrace_new_addrs;
+ 
+-	/* allocate a few pages */
+-	ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
+-	if (!ftrace_pages_start)
+-		return -1;
++static DEFINE_MUTEX(ftrace_regex_lock);
+ 
+-	/*
+-	 * Allocate a few more pages.
+-	 *
++struct ftrace_page {
++	struct ftrace_page	*next;
++	int			index;
++	struct dyn_ftrace	records[];
++};
++
++#define ENTRIES_PER_PAGE \
++  ((PAGE_SIZE - sizeof(struct ftrace_page)) / sizeof(struct dyn_ftrace))
++
++/* estimate from running different kernels */
++#define NR_TO_INIT		10000
++
++static struct ftrace_page	*ftrace_pages_start;
++static struct ftrace_page	*ftrace_pages;
++
++static struct dyn_ftrace *ftrace_free_records;
++
++/*
++ * This is a double for. Do not use 'break' to break out of the loop,
++ * you must use a goto.
++ */
++#define do_for_each_ftrace_rec(pg, rec)					\
++	for (pg = ftrace_pages_start; pg; pg = pg->next) {		\
++		int _____i;						\
++		for (_____i = 0; _____i < pg->index; _____i++) {	\
++			rec = &pg->records[_____i];
++
++#define while_for_each_ftrace_rec()		\
++		}				\
++	}
++
++#ifdef CONFIG_KPROBES
++
++static int frozen_record_count;
++
++static inline void freeze_record(struct dyn_ftrace *rec)
++{
++	if (!(rec->flags & FTRACE_FL_FROZEN)) {
++		rec->flags |= FTRACE_FL_FROZEN;
++		frozen_record_count++;
++	}
++}
++
++static inline void unfreeze_record(struct dyn_ftrace *rec)
++{
++	if (rec->flags & FTRACE_FL_FROZEN) {
++		rec->flags &= ~FTRACE_FL_FROZEN;
++		frozen_record_count--;
++	}
++}
++
++static inline int record_frozen(struct dyn_ftrace *rec)
++{
++	return rec->flags & FTRACE_FL_FROZEN;
++}
++#else
++# define freeze_record(rec)			({ 0; })
++# define unfreeze_record(rec)			({ 0; })
++# define record_frozen(rec)			({ 0; })
++#endif /* CONFIG_KPROBES */
++
++static void ftrace_free_rec(struct dyn_ftrace *rec)
++{
++	rec->freelist = ftrace_free_records;
++	ftrace_free_records = rec;
++	rec->flags |= FTRACE_FL_FREE;
++}
++
++void ftrace_release(void *start, unsigned long size)
++{
++	struct dyn_ftrace *rec;
++	struct ftrace_page *pg;
++	unsigned long s = (unsigned long)start;
++	unsigned long e = s + size;
++
++	if (ftrace_disabled || !start)
++		return;
++
++	mutex_lock(&ftrace_lock);
++	do_for_each_ftrace_rec(pg, rec) {
++		if ((rec->ip >= s) && (rec->ip < e) &&
++		    !(rec->flags & FTRACE_FL_FREE))
++			ftrace_free_rec(rec);
++	} while_for_each_ftrace_rec();
++	mutex_unlock(&ftrace_lock);
++}
++
++static struct dyn_ftrace *ftrace_alloc_dyn_node(unsigned long ip)
++{
++	struct dyn_ftrace *rec;
++
++	/* First check for freed records */
++	if (ftrace_free_records) {
++		rec = ftrace_free_records;
++
++		if (unlikely(!(rec->flags & FTRACE_FL_FREE))) {
++			FTRACE_WARN_ON_ONCE(1);
++			ftrace_free_records = NULL;
++			return NULL;
++		}
++
++		ftrace_free_records = rec->freelist;
++		memset(rec, 0, sizeof(*rec));
++		return rec;
++	}
++
++	if (ftrace_pages->index == ENTRIES_PER_PAGE) {
++		if (!ftrace_pages->next) {
++			/* allocate another page */
++			ftrace_pages->next =
++				(void *)get_zeroed_page(GFP_KERNEL);
++			if (!ftrace_pages->next)
++				return NULL;
++		}
++		ftrace_pages = ftrace_pages->next;
++	}
++
++	return &ftrace_pages->records[ftrace_pages->index++];
++}
++
++static struct dyn_ftrace *
++ftrace_record_ip(unsigned long ip)
++{
++	struct dyn_ftrace *rec;
++
++	if (ftrace_disabled)
++		return NULL;
++
++	rec = ftrace_alloc_dyn_node(ip);
++	if (!rec)
++		return NULL;
++
++	rec->ip = ip;
++	rec->newlist = ftrace_new_addrs;
++	ftrace_new_addrs = rec;
++
++	return rec;
++}
++
++static void print_ip_ins(const char *fmt, unsigned char *p)
++{
++	int i;
++
++	printk(KERN_CONT "%s", fmt);
++
++	for (i = 0; i < MCOUNT_INSN_SIZE; i++)
++		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
++}
++
++static void ftrace_bug(int failed, unsigned long ip)
++{
++	switch (failed) {
++	case -EFAULT:
++		FTRACE_WARN_ON_ONCE(1);
++		pr_info("ftrace faulted on modifying ");
++		print_ip_sym(ip);
++		break;
++	case -EINVAL:
++		FTRACE_WARN_ON_ONCE(1);
++		pr_info("ftrace failed to modify ");
++		print_ip_sym(ip);
++		print_ip_ins(" actual: ", (unsigned char *)ip);
++		printk(KERN_CONT "\n");
++		break;
++	case -EPERM:
++		FTRACE_WARN_ON_ONCE(1);
++		pr_info("ftrace faulted on writing ");
++		print_ip_sym(ip);
++		break;
++	default:
++		FTRACE_WARN_ON_ONCE(1);
++		pr_info("ftrace faulted on unknown error ");
++		print_ip_sym(ip);
++	}
++}
++
++
++static int
++__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
++{
++	unsigned long ftrace_addr;
++	unsigned long ip, fl;
++
++	ftrace_addr = (unsigned long)FTRACE_ADDR;
++
++	ip = rec->ip;
++
++	/*
++	 * If this record is not to be traced and
++	 * it is not enabled then do nothing.
++	 *
++	 * If this record is not to be traced and
++	 * it is enabled then disable it.
++	 *
++	 */
++	if (rec->flags & FTRACE_FL_NOTRACE) {
++		if (rec->flags & FTRACE_FL_ENABLED)
++			rec->flags &= ~FTRACE_FL_ENABLED;
++		else
++			return 0;
++
++	} else if (ftrace_filtered && enable) {
++		/*
++		 * Filtering is on:
++		 */
++
++		fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED);
++
++		/* Record is filtered and enabled, do nothing */
++		if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED))
++			return 0;
++
++		/* Record is not filtered or enabled, do nothing */
++		if (!fl)
++			return 0;
++
++		/* Record is not filtered but enabled, disable it */
++		if (fl == FTRACE_FL_ENABLED)
++			rec->flags &= ~FTRACE_FL_ENABLED;
++		else
++		/* Otherwise record is filtered but not enabled, enable it */
++			rec->flags |= FTRACE_FL_ENABLED;
++	} else {
++		/* Disable or not filtered */
++
++		if (enable) {
++			/* if record is enabled, do nothing */
++			if (rec->flags & FTRACE_FL_ENABLED)
++				return 0;
++
++			rec->flags |= FTRACE_FL_ENABLED;
++
++		} else {
++
++			/* if record is not enabled, do nothing */
++			if (!(rec->flags & FTRACE_FL_ENABLED))
++				return 0;
++
++			rec->flags &= ~FTRACE_FL_ENABLED;
++		}
++	}
++
++	if (rec->flags & FTRACE_FL_ENABLED)
++		return ftrace_make_call(rec, ftrace_addr);
++	else
++		return ftrace_make_nop(NULL, rec, ftrace_addr);
++}
++
++static void ftrace_replace_code(int enable)
++{
++	struct dyn_ftrace *rec;
++	struct ftrace_page *pg;
++	int failed;
++
++	do_for_each_ftrace_rec(pg, rec) {
++		/*
++		 * Skip over free records, records that have
++		 * failed and not converted.
++		 */
++		if (rec->flags & FTRACE_FL_FREE ||
++		    rec->flags & FTRACE_FL_FAILED ||
++		    !(rec->flags & FTRACE_FL_CONVERTED))
++			continue;
++
++		/* ignore updates to this record's mcount site */
++		if (get_kprobe((void *)rec->ip)) {
++			freeze_record(rec);
++			continue;
++		} else {
++			unfreeze_record(rec);
++		}
++
++		failed = __ftrace_replace_code(rec, enable);
++		if (failed) {
++			rec->flags |= FTRACE_FL_FAILED;
++			if ((system_state == SYSTEM_BOOTING) ||
++			    !core_kernel_text(rec->ip)) {
++				ftrace_free_rec(rec);
++				} else {
++				ftrace_bug(failed, rec->ip);
++					/* Stop processing */
++					return;
++				}
++		}
++	} while_for_each_ftrace_rec();
++}
++
++static int
++ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
++{
++	unsigned long ip;
++	int ret;
++
++	ip = rec->ip;
++
++	ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
++	if (ret) {
++		ftrace_bug(ret, ip);
++		rec->flags |= FTRACE_FL_FAILED;
++		return 0;
++	}
++	return 1;
++}
++
++/*
++ * archs can override this function if they must do something
++ * before the modifying code is performed.
++ */
++int __weak ftrace_arch_code_modify_prepare(void)
++{
++	return 0;
++}
++
++/*
++ * archs can override this function if they must do something
++ * after the modifying code is performed.
++ */
++int __weak ftrace_arch_code_modify_post_process(void)
++{
++	return 0;
++}
++
++static int __ftrace_modify_code(void *data)
++{
++	int *command = data;
++
++	if (*command & FTRACE_ENABLE_CALLS)
++		ftrace_replace_code(1);
++	else if (*command & FTRACE_DISABLE_CALLS)
++		ftrace_replace_code(0);
++
++	if (*command & FTRACE_UPDATE_TRACE_FUNC)
++		ftrace_update_ftrace_func(ftrace_trace_function);
++
++	if (*command & FTRACE_START_FUNC_RET)
++		ftrace_enable_ftrace_graph_caller();
++	else if (*command & FTRACE_STOP_FUNC_RET)
++		ftrace_disable_ftrace_graph_caller();
++
++	return 0;
++}
++
++static void ftrace_run_update_code(int command)
++{
++	int ret;
++
++	ret = ftrace_arch_code_modify_prepare();
++	FTRACE_WARN_ON(ret);
++	if (ret)
++		return;
++
++	stop_machine(__ftrace_modify_code, &command, NULL);
++
++	ret = ftrace_arch_code_modify_post_process();
++	FTRACE_WARN_ON(ret);
++}
++
++static ftrace_func_t saved_ftrace_func;
++static int ftrace_start_up;
++
++static void ftrace_startup_enable(int command)
++{
++	if (saved_ftrace_func != ftrace_trace_function) {
++		saved_ftrace_func = ftrace_trace_function;
++		command |= FTRACE_UPDATE_TRACE_FUNC;
++	}
++
++	if (!command || !ftrace_enabled)
++		return;
++
++	ftrace_run_update_code(command);
++}
++
++static void ftrace_startup(int command)
++{
++	if (unlikely(ftrace_disabled))
++		return;
++
++	ftrace_start_up++;
++	command |= FTRACE_ENABLE_CALLS;
++
++	ftrace_startup_enable(command);
++}
++
++static void ftrace_shutdown(int command)
++{
++	if (unlikely(ftrace_disabled))
++		return;
++
++	ftrace_start_up--;
++	if (!ftrace_start_up)
++		command |= FTRACE_DISABLE_CALLS;
++
++	if (saved_ftrace_func != ftrace_trace_function) {
++		saved_ftrace_func = ftrace_trace_function;
++		command |= FTRACE_UPDATE_TRACE_FUNC;
++	}
++
++	if (!command || !ftrace_enabled)
++		return;
++
++	ftrace_run_update_code(command);
++}
++
++static void ftrace_startup_sysctl(void)
++{
++	int command = FTRACE_ENABLE_MCOUNT;
++
++	if (unlikely(ftrace_disabled))
++		return;
++
++	/* Force update next time */
++	saved_ftrace_func = NULL;
++	/* ftrace_start_up is true if we want ftrace running */
++	if (ftrace_start_up)
++		command |= FTRACE_ENABLE_CALLS;
++
++	ftrace_run_update_code(command);
++}
++
++static void ftrace_shutdown_sysctl(void)
++{
++	int command = FTRACE_DISABLE_MCOUNT;
++
++	if (unlikely(ftrace_disabled))
++		return;
++
++	/* ftrace_start_up is true if ftrace is running */
++	if (ftrace_start_up)
++		command |= FTRACE_DISABLE_CALLS;
++
++	ftrace_run_update_code(command);
++}
++
++static cycle_t		ftrace_update_time;
++static unsigned long	ftrace_update_cnt;
++unsigned long		ftrace_update_tot_cnt;
++
++static int ftrace_update_code(struct module *mod)
++{
++	struct dyn_ftrace *p;
++	cycle_t start, stop;
++
++	start = ftrace_now(raw_smp_processor_id());
++	ftrace_update_cnt = 0;
++
++	while (ftrace_new_addrs) {
++
++		/* If something went wrong, bail without enabling anything */
++		if (unlikely(ftrace_disabled))
++			return -1;
++
++		p = ftrace_new_addrs;
++		ftrace_new_addrs = p->newlist;
++		p->flags = 0L;
++
++		/* convert record (i.e, patch mcount-call with NOP) */
++		if (ftrace_code_disable(mod, p)) {
++			p->flags |= FTRACE_FL_CONVERTED;
++			ftrace_update_cnt++;
++		} else
++			ftrace_free_rec(p);
++	}
++
++	stop = ftrace_now(raw_smp_processor_id());
++	ftrace_update_time = stop - start;
++	ftrace_update_tot_cnt += ftrace_update_cnt;
++
++	return 0;
++}
++
++static int __init ftrace_dyn_table_alloc(unsigned long num_to_init)
++{
++	struct ftrace_page *pg;
++	int cnt;
++	int i;
++
++	/* allocate a few pages */
++	ftrace_pages_start = (void *)get_zeroed_page(GFP_KERNEL);
++	if (!ftrace_pages_start)
++		return -1;
++
++	/*
++	 * Allocate a few more pages.
++	 *
+ 	 * TODO: have some parser search vmlinux before
+ 	 *   final linking to find all calls to ftrace.
+ 	 *   Then we can:
+@@ -759,365 +1349,904 @@ static int __init ftrace_dyn_table_alloc
+ 
+ 	pg = ftrace_pages = ftrace_pages_start;
+ 
+-	cnt = num_to_init / ENTRIES_PER_PAGE;
+-	pr_info("ftrace: allocating %ld entries in %d pages\n",
+-		num_to_init, cnt + 1);
++	cnt = num_to_init / ENTRIES_PER_PAGE;
++	pr_info("ftrace: allocating %ld entries in %d pages\n",
++		num_to_init, cnt + 1);
++
++	for (i = 0; i < cnt; i++) {
++		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
++
++		/* If we fail, we'll try later anyway */
++		if (!pg->next)
++			break;
++
++		pg = pg->next;
++	}
++
++	return 0;
++}
++
++enum {
++	FTRACE_ITER_FILTER	= (1 << 0),
++	FTRACE_ITER_CONT	= (1 << 1),
++	FTRACE_ITER_NOTRACE	= (1 << 2),
++	FTRACE_ITER_FAILURES	= (1 << 3),
++	FTRACE_ITER_PRINTALL	= (1 << 4),
++	FTRACE_ITER_HASH	= (1 << 5),
++};
++
++#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
++
++struct ftrace_iterator {
++	struct ftrace_page	*pg;
++	int			hidx;
++	int			idx;
++	unsigned		flags;
++	unsigned char		buffer[FTRACE_BUFF_MAX+1];
++	unsigned		buffer_idx;
++	unsigned		filtered;
++};
++
++static void *
++t_hash_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	struct hlist_node *hnd = v;
++	struct hlist_head *hhd;
++
++	WARN_ON(!(iter->flags & FTRACE_ITER_HASH));
++
++	(*pos)++;
++
++ retry:
++	if (iter->hidx >= FTRACE_FUNC_HASHSIZE)
++		return NULL;
++
++	hhd = &ftrace_func_hash[iter->hidx];
++
++	if (hlist_empty(hhd)) {
++		iter->hidx++;
++		hnd = NULL;
++		goto retry;
++	}
++
++	if (!hnd)
++		hnd = hhd->first;
++	else {
++		hnd = hnd->next;
++		if (!hnd) {
++			iter->hidx++;
++			goto retry;
++		}
++	}
++
++	return hnd;
++}
++
++static void *t_hash_start(struct seq_file *m, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	void *p = NULL;
++
++	iter->flags |= FTRACE_ITER_HASH;
++
++	return t_hash_next(m, p, pos);
++}
++
++static int t_hash_show(struct seq_file *m, void *v)
++{
++	struct ftrace_func_probe *rec;
++	struct hlist_node *hnd = v;
++	char str[KSYM_SYMBOL_LEN];
++
++	rec = hlist_entry(hnd, struct ftrace_func_probe, node);
++
++	if (rec->ops->print)
++		return rec->ops->print(m, rec->ip, rec->ops, rec->data);
++
++	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
++	seq_printf(m, "%s:", str);
++
++	kallsyms_lookup((unsigned long)rec->ops->func, NULL, NULL, NULL, str);
++	seq_printf(m, "%s", str);
++
++	if (rec->data)
++		seq_printf(m, ":%p", rec->data);
++	seq_putc(m, '\n');
++
++	return 0;
++}
++
++static void *
++t_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	struct dyn_ftrace *rec = NULL;
++
++	if (iter->flags & FTRACE_ITER_HASH)
++		return t_hash_next(m, v, pos);
++
++	(*pos)++;
++
++	if (iter->flags & FTRACE_ITER_PRINTALL)
++		return NULL;
++
++ retry:
++	if (iter->idx >= iter->pg->index) {
++		if (iter->pg->next) {
++			iter->pg = iter->pg->next;
++			iter->idx = 0;
++			goto retry;
++		} else {
++			iter->idx = -1;
++		}
++	} else {
++		rec = &iter->pg->records[iter->idx++];
++		if ((rec->flags & FTRACE_FL_FREE) ||
++
++		    (!(iter->flags & FTRACE_ITER_FAILURES) &&
++		     (rec->flags & FTRACE_FL_FAILED)) ||
++
++		    ((iter->flags & FTRACE_ITER_FAILURES) &&
++		     !(rec->flags & FTRACE_FL_FAILED)) ||
++
++		    ((iter->flags & FTRACE_ITER_FILTER) &&
++		     !(rec->flags & FTRACE_FL_FILTER)) ||
++
++		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
++		     !(rec->flags & FTRACE_FL_NOTRACE))) {
++			rec = NULL;
++			goto retry;
++		}
++	}
++
++	return rec;
++}
++
++static void *t_start(struct seq_file *m, loff_t *pos)
++{
++	struct ftrace_iterator *iter = m->private;
++	void *p = NULL;
++
++	mutex_lock(&ftrace_lock);
++	/*
++	 * For set_ftrace_filter reading, if we have the filter
++	 * off, we can short cut and just print out that all
++	 * functions are enabled.
++	 */
++	if (iter->flags & FTRACE_ITER_FILTER && !ftrace_filtered) {
++		if (*pos > 0)
++			return t_hash_start(m, pos);
++		iter->flags |= FTRACE_ITER_PRINTALL;
++		(*pos)++;
++		return iter;
++	}
++
++	if (iter->flags & FTRACE_ITER_HASH)
++		return t_hash_start(m, pos);
++
++	if (*pos > 0) {
++		if (iter->idx < 0)
++			return p;
++		(*pos)--;
++		iter->idx--;
++	}
++
++	p = t_next(m, p, pos);
++
++	if (!p)
++		return t_hash_start(m, pos);
++
++	return p;
++}
++
++static void t_stop(struct seq_file *m, void *p)
++{
++	mutex_unlock(&ftrace_lock);
++}
++
++static int t_show(struct seq_file *m, void *v)
++{
++	struct ftrace_iterator *iter = m->private;
++	struct dyn_ftrace *rec = v;
++	char str[KSYM_SYMBOL_LEN];
++
++	if (iter->flags & FTRACE_ITER_HASH)
++		return t_hash_show(m, v);
++
++	if (iter->flags & FTRACE_ITER_PRINTALL) {
++		seq_printf(m, "#### all functions enabled ####\n");
++		return 0;
++	}
++
++	if (!rec)
++		return 0;
++
++	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
++
++	seq_printf(m, "%s\n", str);
++
++	return 0;
++}
++
++static struct seq_operations show_ftrace_seq_ops = {
++	.start = t_start,
++	.next = t_next,
++	.stop = t_stop,
++	.show = t_show,
++};
++
++static int
++ftrace_avail_open(struct inode *inode, struct file *file)
++{
++	struct ftrace_iterator *iter;
++	int ret;
++
++	if (unlikely(ftrace_disabled))
++		return -ENODEV;
++
++	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
++	if (!iter)
++		return -ENOMEM;
++
++	iter->pg = ftrace_pages_start;
++
++	ret = seq_open(file, &show_ftrace_seq_ops);
++	if (!ret) {
++		struct seq_file *m = file->private_data;
++
++		m->private = iter;
++	} else {
++		kfree(iter);
++	}
++
++	return ret;
++}
++
++int ftrace_avail_release(struct inode *inode, struct file *file)
++{
++	struct seq_file *m = (struct seq_file *)file->private_data;
++	struct ftrace_iterator *iter = m->private;
++
++	seq_release(inode, file);
++	kfree(iter);
+ 
+-	for (i = 0; i < cnt; i++) {
+-		pg->next = (void *)get_zeroed_page(GFP_KERNEL);
++	return 0;
++}
+ 
+-		/* If we fail, we'll try later anyway */
+-		if (!pg->next)
+-			break;
++static int
++ftrace_failures_open(struct inode *inode, struct file *file)
++{
++	int ret;
++	struct seq_file *m;
++	struct ftrace_iterator *iter;
+ 
+-		pg = pg->next;
++	ret = ftrace_avail_open(inode, file);
++	if (!ret) {
++		m = (struct seq_file *)file->private_data;
++		iter = (struct ftrace_iterator *)m->private;
++		iter->flags = FTRACE_ITER_FAILURES;
+ 	}
+ 
+-	return 0;
++	return ret;
+ }
+ 
+-enum {
+-	FTRACE_ITER_FILTER	= (1 << 0),
+-	FTRACE_ITER_CONT	= (1 << 1),
+-	FTRACE_ITER_NOTRACE	= (1 << 2),
+-	FTRACE_ITER_FAILURES	= (1 << 3),
+-};
+ 
+-#define FTRACE_BUFF_MAX (KSYM_SYMBOL_LEN+4) /* room for wildcards */
++static void ftrace_filter_reset(int enable)
++{
++	struct ftrace_page *pg;
++	struct dyn_ftrace *rec;
++	unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
+ 
+-struct ftrace_iterator {
+-	struct ftrace_page	*pg;
+-	unsigned		idx;
+-	unsigned		flags;
+-	unsigned char		buffer[FTRACE_BUFF_MAX+1];
+-	unsigned		buffer_idx;
+-	unsigned		filtered;
+-};
++	mutex_lock(&ftrace_lock);
++	if (enable)
++		ftrace_filtered = 0;
++	do_for_each_ftrace_rec(pg, rec) {
++		if (rec->flags & FTRACE_FL_FAILED)
++			continue;
++		rec->flags &= ~type;
++	} while_for_each_ftrace_rec();
++	mutex_unlock(&ftrace_lock);
++}
+ 
+-static void *
+-t_next(struct seq_file *m, void *v, loff_t *pos)
++static int
++ftrace_regex_open(struct inode *inode, struct file *file, int enable)
+ {
+-	struct ftrace_iterator *iter = m->private;
+-	struct dyn_ftrace *rec = NULL;
++	struct ftrace_iterator *iter;
++	int ret = 0;
+ 
+-	(*pos)++;
++	if (unlikely(ftrace_disabled))
++		return -ENODEV;
+ 
+-	/* should not be called from interrupt context */
+-	spin_lock(&ftrace_lock);
+- retry:
+-	if (iter->idx >= iter->pg->index) {
+-		if (iter->pg->next) {
+-			iter->pg = iter->pg->next;
+-			iter->idx = 0;
+-			goto retry;
+-		} else {
+-			iter->idx = -1;
+-		}
+-	} else {
+-		rec = &iter->pg->records[iter->idx++];
+-		if ((rec->flags & FTRACE_FL_FREE) ||
++	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
++	if (!iter)
++		return -ENOMEM;
+ 
+-		    (!(iter->flags & FTRACE_ITER_FAILURES) &&
+-		     (rec->flags & FTRACE_FL_FAILED)) ||
++	mutex_lock(&ftrace_regex_lock);
++	if ((file->f_mode & FMODE_WRITE) &&
++	    !(file->f_flags & O_APPEND))
++		ftrace_filter_reset(enable);
+ 
+-		    ((iter->flags & FTRACE_ITER_FAILURES) &&
+-		     !(rec->flags & FTRACE_FL_FAILED)) ||
++	if (file->f_mode & FMODE_READ) {
++		iter->pg = ftrace_pages_start;
++		iter->flags = enable ? FTRACE_ITER_FILTER :
++			FTRACE_ITER_NOTRACE;
+ 
+-		    ((iter->flags & FTRACE_ITER_FILTER) &&
+-		     !(rec->flags & FTRACE_FL_FILTER)) ||
++		ret = seq_open(file, &show_ftrace_seq_ops);
++		if (!ret) {
++			struct seq_file *m = file->private_data;
++			m->private = iter;
++		} else
++			kfree(iter);
++	} else
++		file->private_data = iter;
++	mutex_unlock(&ftrace_regex_lock);
+ 
+-		    ((iter->flags & FTRACE_ITER_NOTRACE) &&
+-		     !(rec->flags & FTRACE_FL_NOTRACE))) {
+-			rec = NULL;
+-			goto retry;
++	return ret;
++}
++
++static int
++ftrace_filter_open(struct inode *inode, struct file *file)
++{
++	return ftrace_regex_open(inode, file, 1);
++}
++
++static int
++ftrace_notrace_open(struct inode *inode, struct file *file)
++{
++	return ftrace_regex_open(inode, file, 0);
++}
++
++static loff_t
++ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
++{
++	loff_t ret;
++
++	if (file->f_mode & FMODE_READ)
++		ret = seq_lseek(file, offset, origin);
++	else
++		file->f_pos = ret = 1;
++
++	return ret;
++}
++
++enum {
++	MATCH_FULL,
++	MATCH_FRONT_ONLY,
++	MATCH_MIDDLE_ONLY,
++	MATCH_END_ONLY,
++};
++
++/*
++ * (static function - no need for kernel doc)
++ *
++ * Pass in a buffer containing a glob and this function will
++ * set search to point to the search part of the buffer and
++ * return the type of search it is (see enum above).
++ * This does modify buff.
++ *
++ * Returns enum type.
++ *  search returns the pointer to use for comparison.
++ *  not returns 1 if buff started with a '!'
++ *     0 otherwise.
++ */
++static int
++ftrace_setup_glob(char *buff, int len, char **search, int *not)
++{
++	int type = MATCH_FULL;
++	int i;
++
++	if (buff[0] == '!') {
++		*not = 1;
++		buff++;
++		len--;
++	} else
++		*not = 0;
++
++	*search = buff;
++
++	for (i = 0; i < len; i++) {
++		if (buff[i] == '*') {
++			if (!i) {
++				*search = buff + 1;
++				type = MATCH_END_ONLY;
++			} else {
++				if (type == MATCH_END_ONLY)
++					type = MATCH_MIDDLE_ONLY;
++				else
++					type = MATCH_FRONT_ONLY;
++				buff[i] = 0;
++				break;
++			}
+ 		}
+ 	}
+-	spin_unlock(&ftrace_lock);
+ 
+-	return rec;
++	return type;
+ }
+ 
+-static void *t_start(struct seq_file *m, loff_t *pos)
++static int ftrace_match(char *str, char *regex, int len, int type)
+ {
+-	struct ftrace_iterator *iter = m->private;
+-	void *p = NULL;
++	int matched = 0;
++	char *ptr;
+ 
+-	if (*pos > 0) {
+-		if (iter->idx < 0)
+-			return p;
+-		(*pos)--;
+-		iter->idx--;
++	switch (type) {
++	case MATCH_FULL:
++		if (strcmp(str, regex) == 0)
++			matched = 1;
++		break;
++	case MATCH_FRONT_ONLY:
++		if (strncmp(str, regex, len) == 0)
++			matched = 1;
++		break;
++	case MATCH_MIDDLE_ONLY:
++		if (strstr(str, regex))
++			matched = 1;
++		break;
++	case MATCH_END_ONLY:
++		ptr = strstr(str, regex);
++		if (ptr && (ptr[len] == 0))
++			matched = 1;
++		break;
+ 	}
+ 
+-	p = t_next(m, p, pos);
++	return matched;
++}
+ 
+-	return p;
++static int
++ftrace_match_record(struct dyn_ftrace *rec, char *regex, int len, int type)
++{
++	char str[KSYM_SYMBOL_LEN];
++
++	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
++	return ftrace_match(str, regex, len, type);
+ }
+ 
+-static void t_stop(struct seq_file *m, void *p)
++static void ftrace_match_records(char *buff, int len, int enable)
+ {
++	unsigned int search_len;
++	struct ftrace_page *pg;
++	struct dyn_ftrace *rec;
++	unsigned long flag;
++	char *search;
++	int type;
++	int not;
++
++	flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
++	type = ftrace_setup_glob(buff, len, &search, &not);
++
++	search_len = strlen(search);
++
++	mutex_lock(&ftrace_lock);
++	do_for_each_ftrace_rec(pg, rec) {
++
++		if (rec->flags & FTRACE_FL_FAILED)
++			continue;
++
++		if (ftrace_match_record(rec, search, search_len, type)) {
++			if (not)
++				rec->flags &= ~flag;
++			else
++				rec->flags |= flag;
++		}
++		/*
++		 * Only enable filtering if we have a function that
++		 * is filtered on.
++		 */
++		if (enable && (rec->flags & FTRACE_FL_FILTER))
++			ftrace_filtered = 1;
++	} while_for_each_ftrace_rec();
++	mutex_unlock(&ftrace_lock);
+ }
+ 
+-static int t_show(struct seq_file *m, void *v)
++static int
++ftrace_match_module_record(struct dyn_ftrace *rec, char *mod,
++			   char *regex, int len, int type)
+ {
+-	struct dyn_ftrace *rec = v;
+ 	char str[KSYM_SYMBOL_LEN];
++	char *modname;
+ 
+-	if (!rec)
++	kallsyms_lookup(rec->ip, NULL, NULL, &modname, str);
++
++	if (!modname || strcmp(modname, mod))
+ 		return 0;
+ 
+-	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
++	/* blank search means to match all funcs in the mod */
++	if (len)
++		return ftrace_match(str, regex, len, type);
++	else
++		return 1;
++}
+ 
+-	seq_printf(m, "%s\n", str);
++static void ftrace_match_module_records(char *buff, char *mod, int enable)
++{
++	unsigned search_len = 0;
++	struct ftrace_page *pg;
++	struct dyn_ftrace *rec;
++	int type = MATCH_FULL;
++	char *search = buff;
++	unsigned long flag;
++	int not = 0;
++
++	flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
++
++	/* blank or '*' mean the same */
++	if (strcmp(buff, "*") == 0)
++		buff[0] = 0;
++
++	/* handle the case of 'dont filter this module' */
++	if (strcmp(buff, "!") == 0 || strcmp(buff, "!*") == 0) {
++		buff[0] = 0;
++		not = 1;
++	}
++
++	if (strlen(buff)) {
++		type = ftrace_setup_glob(buff, strlen(buff), &search, &not);
++		search_len = strlen(search);
++	}
++
++	mutex_lock(&ftrace_lock);
++	do_for_each_ftrace_rec(pg, rec) {
++
++		if (rec->flags & FTRACE_FL_FAILED)
++			continue;
++
++		if (ftrace_match_module_record(rec, mod,
++					       search, search_len, type)) {
++			if (not)
++				rec->flags &= ~flag;
++			else
++				rec->flags |= flag;
++		}
++		if (enable && (rec->flags & FTRACE_FL_FILTER))
++			ftrace_filtered = 1;
++
++	} while_for_each_ftrace_rec();
++	mutex_unlock(&ftrace_lock);
++}
++
++/*
++ * We register the module command as a template to show others how
++ * to register the a command as well.
++ */
++
++static int
++ftrace_mod_callback(char *func, char *cmd, char *param, int enable)
++{
++	char *mod;
++
++	/*
++	 * cmd == 'mod' because we only registered this func
++	 * for the 'mod' ftrace_func_command.
++	 * But if you register one func with multiple commands,
++	 * you can tell which command was used by the cmd
++	 * parameter.
++	 */
++
++	/* we must have a module name */
++	if (!param)
++		return -EINVAL;
++
++	mod = strsep(&param, ":");
++	if (!strlen(mod))
++		return -EINVAL;
+ 
++	ftrace_match_module_records(func, mod, enable);
+ 	return 0;
+ }
+ 
+-static struct seq_operations show_ftrace_seq_ops = {
+-	.start = t_start,
+-	.next = t_next,
+-	.stop = t_stop,
+-	.show = t_show,
++static struct ftrace_func_command ftrace_mod_cmd = {
++	.name			= "mod",
++	.func			= ftrace_mod_callback,
+ };
+ 
+-static int
+-ftrace_avail_open(struct inode *inode, struct file *file)
++static int __init ftrace_mod_cmd_init(void)
++{
++	return register_ftrace_command(&ftrace_mod_cmd);
++}
++device_initcall(ftrace_mod_cmd_init);
++
++static void
++function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
++{
++	struct ftrace_func_probe *entry;
++	struct hlist_head *hhd;
++	struct hlist_node *n;
++	unsigned long key;
++	int resched;
++
++	key = hash_long(ip, FTRACE_HASH_BITS);
++
++	hhd = &ftrace_func_hash[key];
++
++	if (hlist_empty(hhd))
++		return;
++
++	/*
++	 * Disable preemption for these calls to prevent a RCU grace
++	 * period. This syncs the hash iteration and freeing of items
++	 * on the hash. rcu_read_lock is too dangerous here.
++	 */
++	resched = ftrace_preempt_disable();
++	hlist_for_each_entry_rcu(entry, n, hhd, node) {
++		if (entry->ip == ip)
++			entry->ops->func(ip, parent_ip, &entry->data);
++	}
++	ftrace_preempt_enable(resched);
++}
++
++static struct ftrace_ops trace_probe_ops __read_mostly =
+ {
+-	struct ftrace_iterator *iter;
+-	int ret;
+-
+-	if (unlikely(ftrace_disabled))
+-		return -ENODEV;
++	.func = function_trace_probe_call,
++};
+ 
+-	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+-	if (!iter)
+-		return -ENOMEM;
++static int ftrace_probe_registered;
+ 
+-	iter->pg = ftrace_pages_start;
++static void __enable_ftrace_function_probe(void)
++{
++	int i;
+ 
+-	ret = seq_open(file, &show_ftrace_seq_ops);
+-	if (!ret) {
+-		struct seq_file *m = file->private_data;
++	if (ftrace_probe_registered)
++		return;
+ 
+-		m->private = iter;
+-	} else {
+-		kfree(iter);
++	for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
++		struct hlist_head *hhd = &ftrace_func_hash[i];
++		if (hhd->first)
++			break;
+ 	}
++	/* Nothing registered? */
++	if (i == FTRACE_FUNC_HASHSIZE)
++		return;
+ 
+-	return ret;
++	__register_ftrace_function(&trace_probe_ops);
++	ftrace_startup(0);
++	ftrace_probe_registered = 1;
+ }
+ 
+-int ftrace_avail_release(struct inode *inode, struct file *file)
++static void __disable_ftrace_function_probe(void)
+ {
+-	struct seq_file *m = (struct seq_file *)file->private_data;
+-	struct ftrace_iterator *iter = m->private;
++	int i;
+ 
+-	seq_release(inode, file);
+-	kfree(iter);
++	if (!ftrace_probe_registered)
++		return;
+ 
+-	return 0;
++	for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
++		struct hlist_head *hhd = &ftrace_func_hash[i];
++		if (hhd->first)
++			return;
++	}
++
++	/* no more funcs left */
++	__unregister_ftrace_function(&trace_probe_ops);
++	ftrace_shutdown(0);
++	ftrace_probe_registered = 0;
+ }
+ 
+-static int
+-ftrace_failures_open(struct inode *inode, struct file *file)
+-{
+-	int ret;
+-	struct seq_file *m;
+-	struct ftrace_iterator *iter;
+ 
+-	ret = ftrace_avail_open(inode, file);
+-	if (!ret) {
+-		m = (struct seq_file *)file->private_data;
+-		iter = (struct ftrace_iterator *)m->private;
+-		iter->flags = FTRACE_ITER_FAILURES;
+-	}
++static void ftrace_free_entry_rcu(struct rcu_head *rhp)
++{
++	struct ftrace_func_probe *entry =
++		container_of(rhp, struct ftrace_func_probe, rcu);
+ 
+-	return ret;
++	if (entry->ops->free)
++		entry->ops->free(&entry->data);
++	kfree(entry);
+ }
+ 
+ 
+-static void ftrace_filter_reset(int enable)
++int
++register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
++			      void *data)
+ {
++	struct ftrace_func_probe *entry;
+ 	struct ftrace_page *pg;
+ 	struct dyn_ftrace *rec;
+-	unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
+-	unsigned i;
++	int type, len, not;
++	unsigned long key;
++	int count = 0;
++	char *search;
+ 
+-	/* should not be called from interrupt context */
+-	spin_lock(&ftrace_lock);
+-	if (enable)
+-		ftrace_filtered = 0;
+-	pg = ftrace_pages_start;
+-	while (pg) {
+-		for (i = 0; i < pg->index; i++) {
+-			rec = &pg->records[i];
+-			if (rec->flags & FTRACE_FL_FAILED)
++	type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
++	len = strlen(search);
++
++	/* we do not support '!' for function probes */
++	if (WARN_ON(not))
++		return -EINVAL;
++
++	mutex_lock(&ftrace_lock);
++	do_for_each_ftrace_rec(pg, rec) {
++
++		if (rec->flags & FTRACE_FL_FAILED)
++			continue;
++
++		if (!ftrace_match_record(rec, search, len, type))
++			continue;
++
++		entry = kmalloc(sizeof(*entry), GFP_KERNEL);
++		if (!entry) {
++			/* If we did not process any, then return error */
++			if (!count)
++				count = -ENOMEM;
++			goto out_unlock;
++		}
++
++		count++;
++
++		entry->data = data;
++
++		/*
++		 * The caller might want to do something special
++		 * for each function we find. We call the callback
++		 * to give the caller an opportunity to do so.
++		 */
++		if (ops->callback) {
++			if (ops->callback(rec->ip, &entry->data) < 0) {
++				/* caller does not like this func */
++				kfree(entry);
+ 				continue;
+-			rec->flags &= ~type;
++			}
+ 		}
+-		pg = pg->next;
+-	}
+-	spin_unlock(&ftrace_lock);
++
++		entry->ops = ops;
++		entry->ip = rec->ip;
++
++		key = hash_long(entry->ip, FTRACE_HASH_BITS);
++		hlist_add_head_rcu(&entry->node, &ftrace_func_hash[key]);
++
++	} while_for_each_ftrace_rec();
++	__enable_ftrace_function_probe();
++
++ out_unlock:
++	mutex_unlock(&ftrace_lock);
++
++	return count;
+ }
+ 
+-static int
+-ftrace_regex_open(struct inode *inode, struct file *file, int enable)
++enum {
++	PROBE_TEST_FUNC		= 1,
++	PROBE_TEST_DATA		= 2
++};
++
++static void
++__unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
++				  void *data, int flags)
+ {
+-	struct ftrace_iterator *iter;
+-	int ret = 0;
++	struct ftrace_func_probe *entry;
++	struct hlist_node *n, *tmp;
++	char str[KSYM_SYMBOL_LEN];
++	int type = MATCH_FULL;
++	int i, len = 0;
++	char *search;
+ 
+-	if (unlikely(ftrace_disabled))
+-		return -ENODEV;
++	if (glob && (strcmp(glob, "*") || !strlen(glob)))
++		glob = NULL;
++	else {
++		int not;
+ 
+-	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+-	if (!iter)
+-		return -ENOMEM;
++		type = ftrace_setup_glob(glob, strlen(glob), &search, &not);
++		len = strlen(search);
+ 
+-	mutex_lock(&ftrace_regex_lock);
+-	if ((file->f_mode & FMODE_WRITE) &&
+-	    !(file->f_flags & O_APPEND))
+-		ftrace_filter_reset(enable);
++		/* we do not support '!' for function probes */
++		if (WARN_ON(not))
++			return;
++	}
+ 
+-	if (file->f_mode & FMODE_READ) {
+-		iter->pg = ftrace_pages_start;
+-		iter->flags = enable ? FTRACE_ITER_FILTER :
+-			FTRACE_ITER_NOTRACE;
++	mutex_lock(&ftrace_lock);
++	for (i = 0; i < FTRACE_FUNC_HASHSIZE; i++) {
++		struct hlist_head *hhd = &ftrace_func_hash[i];
+ 
+-		ret = seq_open(file, &show_ftrace_seq_ops);
+-		if (!ret) {
+-			struct seq_file *m = file->private_data;
+-			m->private = iter;
+-		} else
+-			kfree(iter);
+-	} else
+-		file->private_data = iter;
+-	mutex_unlock(&ftrace_regex_lock);
++		hlist_for_each_entry_safe(entry, n, tmp, hhd, node) {
+ 
+-	return ret;
++			/* break up if statements for readability */
++			if ((flags & PROBE_TEST_FUNC) && entry->ops != ops)
++				continue;
++
++			if ((flags & PROBE_TEST_DATA) && entry->data != data)
++				continue;
++
++			/* do this last, since it is the most expensive */
++			if (glob) {
++				kallsyms_lookup(entry->ip, NULL, NULL,
++						NULL, str);
++				if (!ftrace_match(str, glob, len, type))
++					continue;
++			}
++
++			hlist_del(&entry->node);
++			call_rcu(&entry->rcu, ftrace_free_entry_rcu);
++		}
++	}
++	__disable_ftrace_function_probe();
++	mutex_unlock(&ftrace_lock);
+ }
+ 
+-static int
+-ftrace_filter_open(struct inode *inode, struct file *file)
++void
++unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops,
++				void *data)
+ {
+-	return ftrace_regex_open(inode, file, 1);
++	__unregister_ftrace_function_probe(glob, ops, data,
++					  PROBE_TEST_FUNC | PROBE_TEST_DATA);
+ }
+ 
+-static int
+-ftrace_notrace_open(struct inode *inode, struct file *file)
++void
++unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops)
+ {
+-	return ftrace_regex_open(inode, file, 0);
++	__unregister_ftrace_function_probe(glob, ops, NULL, PROBE_TEST_FUNC);
+ }
+ 
+-static ssize_t
+-ftrace_regex_read(struct file *file, char __user *ubuf,
+-		       size_t cnt, loff_t *ppos)
++void unregister_ftrace_function_probe_all(char *glob)
+ {
+-	if (file->f_mode & FMODE_READ)
+-		return seq_read(file, ubuf, cnt, ppos);
+-	else
+-		return -EPERM;
++	__unregister_ftrace_function_probe(glob, NULL, NULL, 0);
+ }
+ 
+-static loff_t
+-ftrace_regex_lseek(struct file *file, loff_t offset, int origin)
++static LIST_HEAD(ftrace_commands);
++static DEFINE_MUTEX(ftrace_cmd_mutex);
++
++int register_ftrace_command(struct ftrace_func_command *cmd)
+ {
+-	loff_t ret;
++	struct ftrace_func_command *p;
++	int ret = 0;
+ 
+-	if (file->f_mode & FMODE_READ)
+-		ret = seq_lseek(file, offset, origin);
+-	else
+-		file->f_pos = ret = 1;
++	mutex_lock(&ftrace_cmd_mutex);
++	list_for_each_entry(p, &ftrace_commands, list) {
++		if (strcmp(cmd->name, p->name) == 0) {
++			ret = -EBUSY;
++			goto out_unlock;
++		}
++	}
++	list_add(&cmd->list, &ftrace_commands);
++ out_unlock:
++	mutex_unlock(&ftrace_cmd_mutex);
+ 
+ 	return ret;
+ }
+ 
+-enum {
+-	MATCH_FULL,
+-	MATCH_FRONT_ONLY,
+-	MATCH_MIDDLE_ONLY,
+-	MATCH_END_ONLY,
+-};
+-
+-static void
+-ftrace_match(unsigned char *buff, int len, int enable)
++int unregister_ftrace_command(struct ftrace_func_command *cmd)
+ {
+-	char str[KSYM_SYMBOL_LEN];
+-	char *search = NULL;
+-	struct ftrace_page *pg;
+-	struct dyn_ftrace *rec;
+-	int type = MATCH_FULL;
+-	unsigned long flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE;
+-	unsigned i, match = 0, search_len = 0;
+-	int not = 0;
++	struct ftrace_func_command *p, *n;
++	int ret = -ENODEV;
+ 
+-	if (buff[0] == '!') {
+-		not = 1;
+-		buff++;
+-		len--;
++	mutex_lock(&ftrace_cmd_mutex);
++	list_for_each_entry_safe(p, n, &ftrace_commands, list) {
++		if (strcmp(cmd->name, p->name) == 0) {
++			ret = 0;
++			list_del_init(&p->list);
++			goto out_unlock;
++		}
+ 	}
++ out_unlock:
++	mutex_unlock(&ftrace_cmd_mutex);
+ 
+-	for (i = 0; i < len; i++) {
+-		if (buff[i] == '*') {
+-			if (!i) {
+-				search = buff + i + 1;
+-				type = MATCH_END_ONLY;
+-				search_len = len - (i + 1);
+-			} else {
+-				if (type == MATCH_END_ONLY) {
+-					type = MATCH_MIDDLE_ONLY;
+-				} else {
+-					match = i;
+-					type = MATCH_FRONT_ONLY;
+-				}
+-				buff[i] = 0;
+-				break;
+-			}
+-		}
++	return ret;
++}
++
++static int ftrace_process_regex(char *buff, int len, int enable)
++{
++	char *func, *command, *next = buff;
++	struct ftrace_func_command *p;
++	int ret = -EINVAL;
++
++	func = strsep(&next, ":");
++
++	if (!next) {
++		ftrace_match_records(func, len, enable);
++		return 0;
+ 	}
+ 
+-	/* should not be called from interrupt context */
+-	spin_lock(&ftrace_lock);
+-	if (enable)
+-		ftrace_filtered = 1;
+-	pg = ftrace_pages_start;
+-	while (pg) {
+-		for (i = 0; i < pg->index; i++) {
+-			int matched = 0;
+-			char *ptr;
++	/* command found */
+ 
+-			rec = &pg->records[i];
+-			if (rec->flags & FTRACE_FL_FAILED)
+-				continue;
+-			kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+-			switch (type) {
+-			case MATCH_FULL:
+-				if (strcmp(str, buff) == 0)
+-					matched = 1;
+-				break;
+-			case MATCH_FRONT_ONLY:
+-				if (memcmp(str, buff, match) == 0)
+-					matched = 1;
+-				break;
+-			case MATCH_MIDDLE_ONLY:
+-				if (strstr(str, search))
+-					matched = 1;
+-				break;
+-			case MATCH_END_ONLY:
+-				ptr = strstr(str, search);
+-				if (ptr && (ptr[search_len] == 0))
+-					matched = 1;
+-				break;
+-			}
+-			if (matched) {
+-				if (not)
+-					rec->flags &= ~flag;
+-				else
+-					rec->flags |= flag;
+-			}
++	command = strsep(&next, ":");
++
++	mutex_lock(&ftrace_cmd_mutex);
++	list_for_each_entry(p, &ftrace_commands, list) {
++		if (strcmp(p->name, command) == 0) {
++			ret = p->func(func, command, next, enable);
++			goto out_unlock;
+ 		}
+-		pg = pg->next;
+ 	}
+-	spin_unlock(&ftrace_lock);
++ out_unlock:
++	mutex_unlock(&ftrace_cmd_mutex);
++
++	return ret;
+ }
+ 
+ static ssize_t
+@@ -1187,7 +2316,10 @@ ftrace_regex_write(struct file *file, co
+ 	if (isspace(ch)) {
+ 		iter->filtered++;
+ 		iter->buffer[iter->buffer_idx] = 0;
+-		ftrace_match(iter->buffer, iter->buffer_idx, enable);
++		ret = ftrace_process_regex(iter->buffer,
++					   iter->buffer_idx, enable);
++		if (ret)
++			goto out;
+ 		iter->buffer_idx = 0;
+ 	} else
+ 		iter->flags |= FTRACE_ITER_CONT;
+@@ -1226,7 +2358,7 @@ ftrace_set_regex(unsigned char *buf, int
+ 	if (reset)
+ 		ftrace_filter_reset(enable);
+ 	if (buf)
+-		ftrace_match(buf, len, enable);
++		ftrace_match_records(buf, len, enable);
+ 	mutex_unlock(&ftrace_regex_lock);
+ }
+ 
+@@ -1276,15 +2408,13 @@ ftrace_regex_release(struct inode *inode
+ 	if (iter->buffer_idx) {
+ 		iter->filtered++;
+ 		iter->buffer[iter->buffer_idx] = 0;
+-		ftrace_match(iter->buffer, iter->buffer_idx, enable);
++		ftrace_match_records(iter->buffer, iter->buffer_idx, enable);
+ 	}
+ 
+-	mutex_lock(&ftrace_sysctl_lock);
+-	mutex_lock(&ftrace_start_lock);
++	mutex_lock(&ftrace_lock);
+ 	if (ftrace_start_up && ftrace_enabled)
+ 		ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+-	mutex_unlock(&ftrace_start_lock);
+-	mutex_unlock(&ftrace_sysctl_lock);
++	mutex_unlock(&ftrace_lock);
+ 
+ 	kfree(iter);
+ 	mutex_unlock(&ftrace_regex_lock);
+@@ -1303,31 +2433,31 @@ ftrace_notrace_release(struct inode *ino
+ 	return ftrace_regex_release(inode, file, 0);
+ }
+ 
+-static struct file_operations ftrace_avail_fops = {
++static const struct file_operations ftrace_avail_fops = {
+ 	.open = ftrace_avail_open,
+ 	.read = seq_read,
+ 	.llseek = seq_lseek,
+ 	.release = ftrace_avail_release,
+ };
+ 
+-static struct file_operations ftrace_failures_fops = {
++static const struct file_operations ftrace_failures_fops = {
+ 	.open = ftrace_failures_open,
+ 	.read = seq_read,
+ 	.llseek = seq_lseek,
+ 	.release = ftrace_avail_release,
+ };
+ 
+-static struct file_operations ftrace_filter_fops = {
++static const struct file_operations ftrace_filter_fops = {
+ 	.open = ftrace_filter_open,
+-	.read = ftrace_regex_read,
++	.read = seq_read,
+ 	.write = ftrace_filter_write,
+ 	.llseek = ftrace_regex_lseek,
+ 	.release = ftrace_filter_release,
+ };
+ 
+-static struct file_operations ftrace_notrace_fops = {
++static const struct file_operations ftrace_notrace_fops = {
+ 	.open = ftrace_notrace_open,
+-	.read = ftrace_regex_read,
++	.read = seq_read,
+ 	.write = ftrace_notrace_write,
+ 	.llseek = ftrace_regex_lseek,
+ 	.release = ftrace_notrace_release,
+@@ -1360,6 +2490,10 @@ static void *g_start(struct seq_file *m,
+ 
+ 	mutex_lock(&graph_lock);
+ 
++	/* Nothing, tell g_show to print all functions are enabled */
++	if (!ftrace_graph_count && !*pos)
++		return (void *)1;
++
+ 	p = g_next(m, p, pos);
+ 
+ 	return p;
+@@ -1378,6 +2512,11 @@ static int g_show(struct seq_file *m, vo
+ 	if (!ptr)
+ 		return 0;
+ 
++	if (ptr == (unsigned long *)1) {
++		seq_printf(m, "#### all functions enabled ####\n");
++		return 0;
++	}
++
+ 	kallsyms_lookup(*ptr, NULL, NULL, NULL, str);
+ 
+ 	seq_printf(m, "%s\n", str);
+@@ -1420,53 +2559,53 @@ ftrace_graph_open(struct inode *inode, s
+ 	return ret;
+ }
+ 
+-static ssize_t
+-ftrace_graph_read(struct file *file, char __user *ubuf,
+-		       size_t cnt, loff_t *ppos)
+-{
+-	if (file->f_mode & FMODE_READ)
+-		return seq_read(file, ubuf, cnt, ppos);
+-	else
+-		return -EPERM;
+-}
+-
+ static int
+-ftrace_set_func(unsigned long *array, int idx, char *buffer)
++ftrace_set_func(unsigned long *array, int *idx, char *buffer)
+ {
+-	char str[KSYM_SYMBOL_LEN];
+ 	struct dyn_ftrace *rec;
+ 	struct ftrace_page *pg;
++	int search_len;
+ 	int found = 0;
+-	int i, j;
++	int type, not;
++	char *search;
++	bool exists;
++	int i;
+ 
+ 	if (ftrace_disabled)
+ 		return -ENODEV;
+ 
+-	/* should not be called from interrupt context */
+-	spin_lock(&ftrace_lock);
++	/* decode regex */
++	type = ftrace_setup_glob(buffer, strlen(buffer), &search, &not);
++	if (not)
++		return -EINVAL;
+ 
+-	for (pg = ftrace_pages_start; pg; pg = pg->next) {
+-		for (i = 0; i < pg->index; i++) {
+-			rec = &pg->records[i];
++	search_len = strlen(search);
+ 
+-			if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
+-				continue;
++	mutex_lock(&ftrace_lock);
++	do_for_each_ftrace_rec(pg, rec) {
++
++		if (*idx >= FTRACE_GRAPH_MAX_FUNCS)
++			break;
+ 
+-			kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+-			if (strcmp(str, buffer) == 0) {
++		if (rec->flags & (FTRACE_FL_FAILED | FTRACE_FL_FREE))
++			continue;
++
++		if (ftrace_match_record(rec, search, search_len, type)) {
++			/* ensure it is not already in the array */
++			exists = false;
++			for (i = 0; i < *idx; i++)
++				if (array[i] == rec->ip) {
++					exists = true;
++					break;
++				}
++			if (!exists) {
++				array[(*idx)++] = rec->ip;
+ 				found = 1;
+-				for (j = 0; j < idx; j++)
+-					if (array[j] == rec->ip) {
+-						found = 0;
+-						break;
+-					}
+-				if (found)
+-					array[idx] = rec->ip;
+-				break;
+ 			}
+ 		}
+-	}
+-	spin_unlock(&ftrace_lock);
++	} while_for_each_ftrace_rec();
++
++	mutex_unlock(&ftrace_lock);
+ 
+ 	return found ? 0 : -EINVAL;
+ }
+@@ -1534,13 +2673,11 @@ ftrace_graph_write(struct file *file, co
+ 	}
+ 	buffer[index] = 0;
+ 
+-	/* we allow only one at a time */
+-	ret = ftrace_set_func(array, ftrace_graph_count, buffer);
++	/* we allow only one expression at a time */
++	ret = ftrace_set_func(array, &ftrace_graph_count, buffer);
+ 	if (ret)
+ 		goto out;
+ 
+-	ftrace_graph_count++;
+-
+ 	file->f_pos += read;
+ 
+ 	ret = read;
+@@ -1552,7 +2689,7 @@ ftrace_graph_write(struct file *file, co
+ 
+ static const struct file_operations ftrace_graph_fops = {
+ 	.open = ftrace_graph_open,
+-	.read = ftrace_graph_read,
++	.read = seq_read,
+ 	.write = ftrace_graph_write,
+ };
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+@@ -1604,7 +2741,7 @@ static int ftrace_convert_nops(struct mo
+ 	unsigned long addr;
+ 	unsigned long flags;
+ 
+-	mutex_lock(&ftrace_start_lock);
++	mutex_lock(&ftrace_lock);
+ 	p = start;
+ 	while (p < end) {
+ 		addr = ftrace_call_adjust(*p++);
+@@ -1623,7 +2760,7 @@ static int ftrace_convert_nops(struct mo
+ 	local_irq_save(flags);
+ 	ftrace_update_code(mod);
+ 	local_irq_restore(flags);
+-	mutex_unlock(&ftrace_start_lock);
++	mutex_unlock(&ftrace_lock);
+ 
+ 	return 0;
+ }
+@@ -1700,7 +2837,7 @@ ftrace_pid_read(struct file *file, char 
+ 	if (ftrace_pid_trace == ftrace_swapper_pid)
+ 		r = sprintf(buf, "swapper tasks\n");
+ 	else if (ftrace_pid_trace)
+-		r = sprintf(buf, "%u\n", pid_nr(ftrace_pid_trace));
++		r = sprintf(buf, "%u\n", pid_vnr(ftrace_pid_trace));
+ 	else
+ 		r = sprintf(buf, "no pid\n");
+ 
+@@ -1796,7 +2933,7 @@ ftrace_pid_write(struct file *filp, cons
+ 	if (ret < 0)
+ 		return ret;
+ 
+-	mutex_lock(&ftrace_start_lock);
++	mutex_lock(&ftrace_lock);
+ 	if (val < 0) {
+ 		/* disable pid tracing */
+ 		if (!ftrace_pid_trace)
+@@ -1835,12 +2972,12 @@ ftrace_pid_write(struct file *filp, cons
+ 	ftrace_startup_enable(0);
+ 
+  out:
+-	mutex_unlock(&ftrace_start_lock);
++	mutex_unlock(&ftrace_lock);
+ 
+ 	return cnt;
+ }
+ 
+-static struct file_operations ftrace_pid_fops = {
++static const struct file_operations ftrace_pid_fops = {
+ 	.read = ftrace_pid_read,
+ 	.write = ftrace_pid_write,
+ };
+@@ -1861,9 +2998,11 @@ static __init int ftrace_init_debugfs(vo
+ 	if (!entry)
+ 		pr_warning("Could not create debugfs "
+ 			   "'set_ftrace_pid' entry\n");
++
++	ftrace_profile_debugfs(d_tracer);
++
+ 	return 0;
+ }
+-
+ fs_initcall(ftrace_init_debugfs);
+ 
+ /**
+@@ -1898,17 +3037,17 @@ int register_ftrace_function(struct ftra
+ 	if (unlikely(ftrace_disabled))
+ 		return -1;
+ 
+-	mutex_lock(&ftrace_sysctl_lock);
++	mutex_lock(&ftrace_lock);
+ 
+ 	ret = __register_ftrace_function(ops);
+ 	ftrace_startup(0);
+ 
+-	mutex_unlock(&ftrace_sysctl_lock);
++	mutex_unlock(&ftrace_lock);
+ 	return ret;
+ }
+ 
+ /**
+- * unregister_ftrace_function - unresgister a function for profiling.
++ * unregister_ftrace_function - unregister a function for profiling.
+  * @ops - ops structure that holds the function to unregister
+  *
+  * Unregister a function that was added to be called by ftrace profiling.
+@@ -1917,10 +3056,10 @@ int unregister_ftrace_function(struct ft
+ {
+ 	int ret;
+ 
+-	mutex_lock(&ftrace_sysctl_lock);
++	mutex_lock(&ftrace_lock);
+ 	ret = __unregister_ftrace_function(ops);
+ 	ftrace_shutdown(0);
+-	mutex_unlock(&ftrace_sysctl_lock);
++	mutex_unlock(&ftrace_lock);
+ 
+ 	return ret;
+ }
+@@ -1935,7 +3074,7 @@ ftrace_enable_sysctl(struct ctl_table *t
+ 	if (unlikely(ftrace_disabled))
+ 		return -ENODEV;
+ 
+-	mutex_lock(&ftrace_sysctl_lock);
++	mutex_lock(&ftrace_lock);
+ 
+ 	ret  = proc_dointvec(table, write, file, buffer, lenp, ppos);
+ 
+@@ -1964,7 +3103,7 @@ ftrace_enable_sysctl(struct ctl_table *t
+ 	}
+ 
+  out:
+-	mutex_unlock(&ftrace_sysctl_lock);
++	mutex_unlock(&ftrace_lock);
+ 	return ret;
+ }
+ 
+@@ -2029,6 +3168,38 @@ free:
+ 	return ret;
+ }
+ 
++static void
++ftrace_graph_probe_sched_switch(struct rq *__rq, struct task_struct *prev,
++				struct task_struct *next)
++{
++	unsigned long long timestamp;
++	int index;
++
++	/*
++	 * Does the user want to count the time a function was asleep.
++	 * If so, do not update the time stamps.
++	 */
++	if (trace_flags & TRACE_ITER_SLEEP_TIME)
++		return;
++
++	timestamp = trace_clock_local();
++
++	prev->ftrace_timestamp = timestamp;
++
++	/* only process tasks that we timestamped */
++	if (!next->ftrace_timestamp)
++		return;
++
++	/*
++	 * Update all the counters in next to make up for the
++	 * time next was sleeping.
++	 */
++	timestamp -= next->ftrace_timestamp;
++
++	for (index = next->curr_ret_stack; index >= 0; index--)
++		next->ret_stack[index].calltime += timestamp;
++}
++
+ /* Allocate a return stack for each task */
+ static int start_graph_tracing(void)
+ {
+@@ -2050,6 +3221,13 @@ static int start_graph_tracing(void)
+ 		ret = alloc_retstack_tasklist(ret_stack_list);
+ 	} while (ret == -EAGAIN);
+ 
++	if (!ret) {
++		ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch);
++		if (ret)
++			pr_info("ftrace_graph: Couldn't activate tracepoint"
++				" probe to kernel_sched_switch\n");
++	}
++
+ 	kfree(ret_stack_list);
+ 	return ret;
+ }
+@@ -2080,7 +3258,13 @@ int register_ftrace_graph(trace_func_gra
+ {
+ 	int ret = 0;
+ 
+-	mutex_lock(&ftrace_sysctl_lock);
++	mutex_lock(&ftrace_lock);
++
++	/* we currently allow only one tracer registered at a time */
++	if (atomic_read(&ftrace_graph_active)) {
++		ret = -EBUSY;
++		goto out;
++	}
+ 
+ 	ftrace_suspend_notifier.notifier_call = ftrace_suspend_notifier_call;
+ 	register_pm_notifier(&ftrace_suspend_notifier);
+@@ -2098,21 +3282,22 @@ int register_ftrace_graph(trace_func_gra
+ 	ftrace_startup(FTRACE_START_FUNC_RET);
+ 
+ out:
+-	mutex_unlock(&ftrace_sysctl_lock);
++	mutex_unlock(&ftrace_lock);
+ 	return ret;
+ }
+ 
+ void unregister_ftrace_graph(void)
+ {
+-	mutex_lock(&ftrace_sysctl_lock);
++	mutex_lock(&ftrace_lock);
+ 
+ 	atomic_dec(&ftrace_graph_active);
++	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch);
+ 	ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
+ 	ftrace_graph_entry = ftrace_graph_entry_stub;
+ 	ftrace_shutdown(FTRACE_STOP_FUNC_RET);
+ 	unregister_pm_notifier(&ftrace_suspend_notifier);
+ 
+-	mutex_unlock(&ftrace_sysctl_lock);
++	mutex_unlock(&ftrace_lock);
+ }
+ 
+ /* Allocate a return stack for newly created task */
+@@ -2127,6 +3312,7 @@ void ftrace_graph_init_task(struct task_
+ 		t->curr_ret_stack = -1;
+ 		atomic_set(&t->tracing_graph_pause, 0);
+ 		atomic_set(&t->trace_overrun, 0);
++		t->ftrace_timestamp = 0;
+ 	} else
+ 		t->ret_stack = NULL;
+ }
+Index: linux-2.6-tip/kernel/trace/kmemtrace.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/kmemtrace.c
+@@ -0,0 +1,464 @@
++/*
++ * Memory allocator tracing
++ *
++ * Copyright (C) 2008 Eduard - Gabriel Munteanu
++ * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
++ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
++ */
++
++#include <linux/tracepoint.h>
++#include <linux/seq_file.h>
++#include <linux/debugfs.h>
++#include <linux/dcache.h>
++#include <linux/fs.h>
++
++#include <trace/kmemtrace.h>
++
++#include "trace_output.h"
++#include "trace.h"
++
++/* Select an alternative, minimalistic output than the original one */
++#define TRACE_KMEM_OPT_MINIMAL	0x1
++
++static struct tracer_opt kmem_opts[] = {
++	/* Default disable the minimalistic output */
++	{ TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
++	{ }
++};
++
++static struct tracer_flags kmem_tracer_flags = {
++	.val			= 0,
++	.opts			= kmem_opts
++};
++
++static struct trace_array *kmemtrace_array;
++
++/* Trace allocations */
++static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
++				   unsigned long call_site,
++				   const void *ptr,
++				   size_t bytes_req,
++				   size_t bytes_alloc,
++				   gfp_t gfp_flags,
++				   int node)
++{
++	struct trace_array *tr = kmemtrace_array;
++	struct kmemtrace_alloc_entry *entry;
++	struct ring_buffer_event *event;
++
++	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
++	if (!event)
++		return;
++
++	entry = ring_buffer_event_data(event);
++	tracing_generic_entry_update(&entry->ent, 0, 0);
++
++	entry->ent.type		= TRACE_KMEM_ALLOC;
++	entry->type_id		= type_id;
++	entry->call_site	= call_site;
++	entry->ptr		= ptr;
++	entry->bytes_req	= bytes_req;
++	entry->bytes_alloc	= bytes_alloc;
++	entry->gfp_flags	= gfp_flags;
++	entry->node		= node;
++
++	ring_buffer_unlock_commit(tr->buffer, event);
++
++	trace_wake_up();
++}
++
++static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
++				  unsigned long call_site,
++				  const void *ptr)
++{
++	struct trace_array *tr = kmemtrace_array;
++	struct kmemtrace_free_entry *entry;
++	struct ring_buffer_event *event;
++
++	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
++	if (!event)
++		return;
++	entry	= ring_buffer_event_data(event);
++	tracing_generic_entry_update(&entry->ent, 0, 0);
++
++	entry->ent.type		= TRACE_KMEM_FREE;
++	entry->type_id		= type_id;
++	entry->call_site	= call_site;
++	entry->ptr		= ptr;
++
++	ring_buffer_unlock_commit(tr->buffer, event);
++
++	trace_wake_up();
++}
++
++static void kmemtrace_kmalloc(unsigned long call_site,
++			      const void *ptr,
++			      size_t bytes_req,
++			      size_t bytes_alloc,
++			      gfp_t gfp_flags)
++{
++	kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
++			bytes_req, bytes_alloc, gfp_flags, -1);
++}
++
++static void kmemtrace_kmem_cache_alloc(unsigned long call_site,
++				       const void *ptr,
++				       size_t bytes_req,
++				       size_t bytes_alloc,
++				       gfp_t gfp_flags)
++{
++	kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
++			bytes_req, bytes_alloc, gfp_flags, -1);
++}
++
++static void kmemtrace_kmalloc_node(unsigned long call_site,
++				   const void *ptr,
++				   size_t bytes_req,
++				   size_t bytes_alloc,
++				   gfp_t gfp_flags,
++				   int node)
++{
++	kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
++			bytes_req, bytes_alloc, gfp_flags, node);
++}
++
++static void kmemtrace_kmem_cache_alloc_node(unsigned long call_site,
++					    const void *ptr,
++					    size_t bytes_req,
++					    size_t bytes_alloc,
++					    gfp_t gfp_flags,
++					    int node)
++{
++	kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
++			bytes_req, bytes_alloc, gfp_flags, node);
++}
++
++static void kmemtrace_kfree(unsigned long call_site, const void *ptr)
++{
++	kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
++}
++
++static void kmemtrace_kmem_cache_free(unsigned long call_site, const void *ptr)
++{
++	kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
++}
++
++static int kmemtrace_start_probes(void)
++{
++	int err;
++
++	err = register_trace_kmalloc(kmemtrace_kmalloc);
++	if (err)
++		return err;
++	err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
++	if (err)
++		return err;
++	err = register_trace_kmalloc_node(kmemtrace_kmalloc_node);
++	if (err)
++		return err;
++	err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
++	if (err)
++		return err;
++	err = register_trace_kfree(kmemtrace_kfree);
++	if (err)
++		return err;
++	err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
++
++	return err;
++}
++
++static void kmemtrace_stop_probes(void)
++{
++	unregister_trace_kmalloc(kmemtrace_kmalloc);
++	unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc);
++	unregister_trace_kmalloc_node(kmemtrace_kmalloc_node);
++	unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node);
++	unregister_trace_kfree(kmemtrace_kfree);
++	unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free);
++}
++
++static int kmem_trace_init(struct trace_array *tr)
++{
++	int cpu;
++	kmemtrace_array = tr;
++
++	for_each_cpu_mask(cpu, cpu_possible_map)
++		tracing_reset(tr, cpu);
++
++	kmemtrace_start_probes();
++
++	return 0;
++}
++
++static void kmem_trace_reset(struct trace_array *tr)
++{
++	kmemtrace_stop_probes();
++}
++
++static void kmemtrace_headers(struct seq_file *s)
++{
++	/* Don't need headers for the original kmemtrace output */
++	if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
++		return;
++
++	seq_printf(s, "#\n");
++	seq_printf(s, "# ALLOC  TYPE  REQ   GIVEN  FLAGS     "
++			"      POINTER         NODE    CALLER\n");
++	seq_printf(s, "# FREE   |      |     |       |       "
++			"       |   |            |        |\n");
++	seq_printf(s, "# |\n\n");
++}
++
++/*
++ * The following functions give the original output from kmemtrace,
++ * plus the origin CPU, since reordering occurs in-kernel now.
++ */
++
++#define KMEMTRACE_USER_ALLOC	0
++#define KMEMTRACE_USER_FREE	1
++
++struct kmemtrace_user_event {
++	u8			event_id;
++	u8			type_id;
++	u16			event_size;
++	u32			cpu;
++	u64			timestamp;
++	unsigned long		call_site;
++	unsigned long		ptr;
++};
++
++struct kmemtrace_user_event_alloc {
++	size_t			bytes_req;
++	size_t			bytes_alloc;
++	unsigned		gfp_flags;
++	int			node;
++};
++
++static enum print_line_t
++kmemtrace_print_alloc_user(struct trace_iterator *iter,
++			   struct kmemtrace_alloc_entry *entry)
++{
++	struct kmemtrace_user_event_alloc *ev_alloc;
++	struct trace_seq *s = &iter->seq;
++	struct kmemtrace_user_event *ev;
++
++	ev = trace_seq_reserve(s, sizeof(*ev));
++	if (!ev)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	ev->event_id		= KMEMTRACE_USER_ALLOC;
++	ev->type_id		= entry->type_id;
++	ev->event_size		= sizeof(*ev) + sizeof(*ev_alloc);
++	ev->cpu			= iter->cpu;
++	ev->timestamp		= iter->ts;
++	ev->call_site		= entry->call_site;
++	ev->ptr			= (unsigned long)entry->ptr;
++
++	ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
++	if (!ev_alloc)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	ev_alloc->bytes_req	= entry->bytes_req;
++	ev_alloc->bytes_alloc	= entry->bytes_alloc;
++	ev_alloc->gfp_flags	= entry->gfp_flags;
++	ev_alloc->node		= entry->node;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static enum print_line_t
++kmemtrace_print_free_user(struct trace_iterator *iter,
++			  struct kmemtrace_free_entry *entry)
++{
++	struct trace_seq *s = &iter->seq;
++	struct kmemtrace_user_event *ev;
++
++	ev = trace_seq_reserve(s, sizeof(*ev));
++	if (!ev)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	ev->event_id		= KMEMTRACE_USER_FREE;
++	ev->type_id		= entry->type_id;
++	ev->event_size		= sizeof(*ev);
++	ev->cpu			= iter->cpu;
++	ev->timestamp		= iter->ts;
++	ev->call_site		= entry->call_site;
++	ev->ptr			= (unsigned long)entry->ptr;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++/* The two other following provide a more minimalistic output */
++static enum print_line_t
++kmemtrace_print_alloc_compress(struct trace_iterator *iter,
++					struct kmemtrace_alloc_entry *entry)
++{
++	struct trace_seq *s = &iter->seq;
++	int ret;
++
++	/* Alloc entry */
++	ret = trace_seq_printf(s, "  +      ");
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Type */
++	switch (entry->type_id) {
++	case KMEMTRACE_TYPE_KMALLOC:
++		ret = trace_seq_printf(s, "K   ");
++		break;
++	case KMEMTRACE_TYPE_CACHE:
++		ret = trace_seq_printf(s, "C   ");
++		break;
++	case KMEMTRACE_TYPE_PAGES:
++		ret = trace_seq_printf(s, "P   ");
++		break;
++	default:
++		ret = trace_seq_printf(s, "?   ");
++	}
++
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Requested */
++	ret = trace_seq_printf(s, "%4zu   ", entry->bytes_req);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Allocated */
++	ret = trace_seq_printf(s, "%4zu   ", entry->bytes_alloc);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Flags
++	 * TODO: would be better to see the name of the GFP flag names
++	 */
++	ret = trace_seq_printf(s, "%08x   ", entry->gfp_flags);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Pointer to allocated */
++	ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Node */
++	ret = trace_seq_printf(s, "%4d   ", entry->node);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Call site */
++	ret = seq_print_ip_sym(s, entry->call_site, 0);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	if (!trace_seq_printf(s, "\n"))
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static enum print_line_t
++kmemtrace_print_free_compress(struct trace_iterator *iter,
++			      struct kmemtrace_free_entry *entry)
++{
++	struct trace_seq *s = &iter->seq;
++	int ret;
++
++	/* Free entry */
++	ret = trace_seq_printf(s, "  -      ");
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Type */
++	switch (entry->type_id) {
++	case KMEMTRACE_TYPE_KMALLOC:
++		ret = trace_seq_printf(s, "K     ");
++		break;
++	case KMEMTRACE_TYPE_CACHE:
++		ret = trace_seq_printf(s, "C     ");
++		break;
++	case KMEMTRACE_TYPE_PAGES:
++		ret = trace_seq_printf(s, "P     ");
++		break;
++	default:
++		ret = trace_seq_printf(s, "?     ");
++	}
++
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Skip requested/allocated/flags */
++	ret = trace_seq_printf(s, "                       ");
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Pointer to allocated */
++	ret = trace_seq_printf(s, "0x%tx   ", (ptrdiff_t)entry->ptr);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Skip node */
++	ret = trace_seq_printf(s, "       ");
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Call site */
++	ret = seq_print_ip_sym(s, entry->call_site, 0);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	if (!trace_seq_printf(s, "\n"))
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
++{
++	struct trace_entry *entry = iter->ent;
++
++	switch (entry->type) {
++	case TRACE_KMEM_ALLOC: {
++		struct kmemtrace_alloc_entry *field;
++
++		trace_assign_type(field, entry);
++		if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
++			return kmemtrace_print_alloc_compress(iter, field);
++		else
++			return kmemtrace_print_alloc_user(iter, field);
++	}
++
++	case TRACE_KMEM_FREE: {
++		struct kmemtrace_free_entry *field;
++
++		trace_assign_type(field, entry);
++		if (kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL)
++			return kmemtrace_print_free_compress(iter, field);
++		else
++			return kmemtrace_print_free_user(iter, field);
++	}
++
++	default:
++		return TRACE_TYPE_UNHANDLED;
++	}
++}
++
++static struct tracer kmem_tracer __read_mostly = {
++	.name			= "kmemtrace",
++	.init			= kmem_trace_init,
++	.reset			= kmem_trace_reset,
++	.print_line		= kmemtrace_print_line,
++	.print_header		= kmemtrace_headers,
++	.flags			= &kmem_tracer_flags
++};
++
++void kmemtrace_init(void)
++{
++	/* earliest opportunity to start kmem tracing */
++}
++
++static int __init init_kmem_tracer(void)
++{
++	return register_tracer(&kmem_tracer);
++}
++device_initcall(init_kmem_tracer);
+Index: linux-2.6-tip/kernel/trace/ring_buffer.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/ring_buffer.c
++++ linux-2.6-tip/kernel/trace/ring_buffer.c
+@@ -4,21 +4,93 @@
+  * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
+  */
+ #include <linux/ring_buffer.h>
++#include <linux/trace_clock.h>
++#include <linux/ftrace_irq.h>
++#include <linux/kmemcheck.h>
+ #include <linux/spinlock.h>
+ #include <linux/debugfs.h>
+ #include <linux/uaccess.h>
++#include <linux/hardirq.h>
+ #include <linux/module.h>
+ #include <linux/percpu.h>
+ #include <linux/mutex.h>
+-#include <linux/sched.h>	/* used for sched_clock() (for now) */
+ #include <linux/init.h>
+ #include <linux/hash.h>
+ #include <linux/list.h>
++#include <linux/cpu.h>
+ #include <linux/fs.h>
+ 
+ #include "trace.h"
+ 
+ /*
++ * The ring buffer is made up of a list of pages. A separate list of pages is
++ * allocated for each CPU. A writer may only write to a buffer that is
++ * associated with the CPU it is currently executing on.  A reader may read
++ * from any per cpu buffer.
++ *
++ * The reader is special. For each per cpu buffer, the reader has its own
++ * reader page. When a reader has read the entire reader page, this reader
++ * page is swapped with another page in the ring buffer.
++ *
++ * Now, as long as the writer is off the reader page, the reader can do what
++ * ever it wants with that page. The writer will never write to that page
++ * again (as long as it is out of the ring buffer).
++ *
++ * Here's some silly ASCII art.
++ *
++ *   +------+
++ *   |reader|          RING BUFFER
++ *   |page  |
++ *   +------+        +---+   +---+   +---+
++ *                   |   |-->|   |-->|   |
++ *                   +---+   +---+   +---+
++ *                     ^               |
++ *                     |               |
++ *                     +---------------+
++ *
++ *
++ *   +------+
++ *   |reader|          RING BUFFER
++ *   |page  |------------------v
++ *   +------+        +---+   +---+   +---+
++ *                   |   |-->|   |-->|   |
++ *                   +---+   +---+   +---+
++ *                     ^               |
++ *                     |               |
++ *                     +---------------+
++ *
++ *
++ *   +------+
++ *   |reader|          RING BUFFER
++ *   |page  |------------------v
++ *   +------+        +---+   +---+   +---+
++ *      ^            |   |-->|   |-->|   |
++ *      |            +---+   +---+   +---+
++ *      |                              |
++ *      |                              |
++ *      +------------------------------+
++ *
++ *
++ *   +------+
++ *   |buffer|          RING BUFFER
++ *   |page  |------------------v
++ *   +------+        +---+   +---+   +---+
++ *      ^            |   |   |   |-->|   |
++ *      |   New      +---+   +---+   +---+
++ *      |  Reader------^               |
++ *      |   page                       |
++ *      +------------------------------+
++ *
++ *
++ * After we make this swap, the reader can hand this page off to the splice
++ * code and be done with it. It can even allocate a new page if it needs to
++ * and swap that into the ring buffer.
++ *
++ * We will be using cmpxchg soon to make all this lockless.
++ *
++ */
++
++/*
+  * A fast way to enable or disable all ring buffers is to
+  * call tracing_on or tracing_off. Turning off the ring buffers
+  * prevents all ring buffers from being recorded to.
+@@ -57,7 +129,9 @@ enum {
+ 	RB_BUFFERS_DISABLED	= 1 << RB_BUFFERS_DISABLED_BIT,
+ };
+ 
+-static long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
++static unsigned long ring_buffer_flags __read_mostly = RB_BUFFERS_ON;
++
++#define BUF_PAGE_HDR_SIZE offsetof(struct buffer_data_page, data)
+ 
+ /**
+  * tracing_on - enable all tracing buffers
+@@ -89,59 +163,92 @@ EXPORT_SYMBOL_GPL(tracing_off);
+  * tracing_off_permanent - permanently disable ring buffers
+  *
+  * This function, once called, will disable all ring buffers
+- * permanenty.
++ * permanently.
+  */
+ void tracing_off_permanent(void)
+ {
+ 	set_bit(RB_BUFFERS_DISABLED_BIT, &ring_buffer_flags);
+ }
+ 
++/**
++ * tracing_is_on - show state of ring buffers enabled
++ */
++int tracing_is_on(void)
++{
++	return ring_buffer_flags == RB_BUFFERS_ON;
++}
++EXPORT_SYMBOL_GPL(tracing_is_on);
++
+ #include "trace.h"
+ 
+-/* Up this if you want to test the TIME_EXTENTS and normalization */
+-#define DEBUG_SHIFT 0
++#define RB_EVNT_HDR_SIZE (offsetof(struct ring_buffer_event, array))
++#define RB_ALIGNMENT		4U
++#define RB_MAX_SMALL_DATA	28
++
++enum {
++	RB_LEN_TIME_EXTEND = 8,
++	RB_LEN_TIME_STAMP = 16,
++};
+ 
+-/* FIXME!!! */
+-u64 ring_buffer_time_stamp(int cpu)
++static inline int rb_null_event(struct ring_buffer_event *event)
+ {
+-	u64 time;
++	return event->type == RINGBUF_TYPE_PADDING && event->time_delta == 0;
++}
+ 
+-	preempt_disable_notrace();
+-	/* shift to debug/test normalization and TIME_EXTENTS */
+-	time = sched_clock() << DEBUG_SHIFT;
+-	preempt_enable_no_resched_notrace();
++static inline int rb_discarded_event(struct ring_buffer_event *event)
++{
++	return event->type == RINGBUF_TYPE_PADDING && event->time_delta;
++}
+ 
+-	return time;
++static void rb_event_set_padding(struct ring_buffer_event *event)
++{
++	event->type = RINGBUF_TYPE_PADDING;
++	event->time_delta = 0;
+ }
+-EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
+ 
+-void ring_buffer_normalize_time_stamp(int cpu, u64 *ts)
++/**
++ * ring_buffer_event_discard - discard an event in the ring buffer
++ * @buffer: the ring buffer
++ * @event: the event to discard
++ *
++ * Sometimes a event that is in the ring buffer needs to be ignored.
++ * This function lets the user discard an event in the ring buffer
++ * and then that event will not be read later.
++ *
++ * Note, it is up to the user to be careful with this, and protect
++ * against races. If the user discards an event that has been consumed
++ * it is possible that it could corrupt the ring buffer.
++ */
++void ring_buffer_event_discard(struct ring_buffer_event *event)
+ {
+-	/* Just stupid testing the normalize function and deltas */
+-	*ts >>= DEBUG_SHIFT;
++	event->type = RINGBUF_TYPE_PADDING;
++	/* time delta must be non zero */
++	if (!event->time_delta)
++		event->time_delta = 1;
+ }
+-EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
+ 
+-#define RB_EVNT_HDR_SIZE (sizeof(struct ring_buffer_event))
+-#define RB_ALIGNMENT_SHIFT	2
+-#define RB_ALIGNMENT		(1 << RB_ALIGNMENT_SHIFT)
+-#define RB_MAX_SMALL_DATA	28
++static unsigned
++rb_event_data_length(struct ring_buffer_event *event)
++{
++	unsigned length;
+ 
+-enum {
+-	RB_LEN_TIME_EXTEND = 8,
+-	RB_LEN_TIME_STAMP = 16,
+-};
++	if (event->len)
++		length = event->len * RB_ALIGNMENT;
++	else
++		length = event->array[0];
++	return length + RB_EVNT_HDR_SIZE;
++}
+ 
+ /* inline for ring buffer fast paths */
+-static inline unsigned
++static unsigned
+ rb_event_length(struct ring_buffer_event *event)
+ {
+-	unsigned length;
+-
+ 	switch (event->type) {
+ 	case RINGBUF_TYPE_PADDING:
+-		/* undefined */
+-		return -1;
++		if (rb_null_event(event))
++			/* undefined */
++			return -1;
++		return rb_event_data_length(event);
+ 
+ 	case RINGBUF_TYPE_TIME_EXTEND:
+ 		return RB_LEN_TIME_EXTEND;
+@@ -150,11 +257,7 @@ rb_event_length(struct ring_buffer_event
+ 		return RB_LEN_TIME_STAMP;
+ 
+ 	case RINGBUF_TYPE_DATA:
+-		if (event->len)
+-			length = event->len << RB_ALIGNMENT_SHIFT;
+-		else
+-			length = event->array[0];
+-		return length + RB_EVNT_HDR_SIZE;
++		return rb_event_data_length(event);
+ 	default:
+ 		BUG();
+ 	}
+@@ -179,7 +282,7 @@ unsigned ring_buffer_event_length(struct
+ EXPORT_SYMBOL_GPL(ring_buffer_event_length);
+ 
+ /* inline for ring buffer fast paths */
+-static inline void *
++static void *
+ rb_event_data(struct ring_buffer_event *event)
+ {
+ 	BUG_ON(event->type != RINGBUF_TYPE_DATA);
+@@ -209,7 +312,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data
+ 
+ struct buffer_data_page {
+ 	u64		 time_stamp;	/* page time stamp */
+-	local_t		 commit;	/* write commited index */
++	local_t		 commit;	/* write committed index */
+ 	unsigned char	 data[];	/* data of buffer page */
+ };
+ 
+@@ -225,14 +328,25 @@ static void rb_init_page(struct buffer_d
+ 	local_set(&bpage->commit, 0);
+ }
+ 
++/**
++ * ring_buffer_page_len - the size of data on the page.
++ * @page: The page to read
++ *
++ * Returns the amount of data on the page, including buffer page header.
++ */
++size_t ring_buffer_page_len(void *page)
++{
++	return local_read(&((struct buffer_data_page *)page)->commit)
++		+ BUF_PAGE_HDR_SIZE;
++}
++
+ /*
+  * Also stolen from mm/slob.c. Thanks to Mathieu Desnoyers for pointing
+  * this issue out.
+  */
+-static inline void free_buffer_page(struct buffer_page *bpage)
++static void free_buffer_page(struct buffer_page *bpage)
+ {
+-	if (bpage->page)
+-		free_page((unsigned long)bpage->page);
++	free_page((unsigned long)bpage->page);
+ 	kfree(bpage);
+ }
+ 
+@@ -246,7 +360,7 @@ static inline int test_time_stamp(u64 de
+ 	return 0;
+ }
+ 
+-#define BUF_PAGE_SIZE (PAGE_SIZE - offsetof(struct buffer_data_page, data))
++#define BUF_PAGE_SIZE (PAGE_SIZE - BUF_PAGE_HDR_SIZE)
+ 
+ /*
+  * head_page == tail_page && head == tail then buffer is empty.
+@@ -254,13 +368,13 @@ static inline int test_time_stamp(u64 de
+ struct ring_buffer_per_cpu {
+ 	int				cpu;
+ 	struct ring_buffer		*buffer;
+-	spinlock_t			reader_lock; /* serialize readers */
+-	raw_spinlock_t			lock;
++	raw_spinlock_t			reader_lock; /* serialize readers */
++	__raw_spinlock_t		lock;
+ 	struct lock_class_key		lock_key;
+ 	struct list_head		pages;
+ 	struct buffer_page		*head_page;	/* read from head */
+ 	struct buffer_page		*tail_page;	/* write to tail */
+-	struct buffer_page		*commit_page;	/* commited pages */
++	struct buffer_page		*commit_page;	/* committed pages */
+ 	struct buffer_page		*reader_page;
+ 	unsigned long			overrun;
+ 	unsigned long			entries;
+@@ -273,12 +387,17 @@ struct ring_buffer {
+ 	unsigned			pages;
+ 	unsigned			flags;
+ 	int				cpus;
+-	cpumask_var_t			cpumask;
+ 	atomic_t			record_disabled;
++	cpumask_var_t			cpumask;
+ 
+ 	struct mutex			mutex;
+ 
+ 	struct ring_buffer_per_cpu	**buffers;
++
++#ifdef CONFIG_HOTPLUG_CPU
++	struct notifier_block		cpu_notify;
++#endif
++	u64				(*clock)(void);
+ };
+ 
+ struct ring_buffer_iter {
+@@ -299,11 +418,35 @@ struct ring_buffer_iter {
+ 		_____ret;					\
+ 	})
+ 
++/* Up this if you want to test the TIME_EXTENTS and normalization */
++#define DEBUG_SHIFT 0
++
++u64 ring_buffer_time_stamp(struct ring_buffer *buffer, int cpu)
++{
++	u64 time;
++
++	preempt_disable_notrace();
++	/* shift to debug/test normalization and TIME_EXTENTS */
++	time = buffer->clock() << DEBUG_SHIFT;
++	preempt_enable_no_resched_notrace();
++
++	return time;
++}
++EXPORT_SYMBOL_GPL(ring_buffer_time_stamp);
++
++void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer,
++				      int cpu, u64 *ts)
++{
++	/* Just stupid testing the normalize function and deltas */
++	*ts >>= DEBUG_SHIFT;
++}
++EXPORT_SYMBOL_GPL(ring_buffer_normalize_time_stamp);
++
+ /**
+  * check_pages - integrity check of buffer pages
+  * @cpu_buffer: CPU buffer with pages to test
+  *
+- * As a safty measure we check to make sure the data pages have not
++ * As a safety measure we check to make sure the data pages have not
+  * been corrupted.
+  */
+ static int rb_check_pages(struct ring_buffer_per_cpu *cpu_buffer)
+@@ -381,7 +524,7 @@ rb_allocate_cpu_buffer(struct ring_buffe
+ 	cpu_buffer->cpu = cpu;
+ 	cpu_buffer->buffer = buffer;
+ 	spin_lock_init(&cpu_buffer->reader_lock);
+-	cpu_buffer->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
++	cpu_buffer->lock = (__raw_spinlock_t) __RAW_SPIN_LOCK_UNLOCKED;
+ 	INIT_LIST_HEAD(&cpu_buffer->pages);
+ 
+ 	bpage = kzalloc_node(ALIGN(sizeof(*bpage), cache_line_size()),
+@@ -437,6 +580,11 @@ static void rb_free_cpu_buffer(struct ri
+  */
+ extern int ring_buffer_page_too_big(void);
+ 
++#ifdef CONFIG_HOTPLUG_CPU
++static int rb_cpu_notify(struct notifier_block *self,
++			 unsigned long action, void *hcpu);
++#endif
++
+ /**
+  * ring_buffer_alloc - allocate a new ring_buffer
+  * @size: the size in bytes per cpu that is needed.
+@@ -469,12 +617,23 @@ struct ring_buffer *ring_buffer_alloc(un
+ 
+ 	buffer->pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+ 	buffer->flags = flags;
++	buffer->clock = trace_clock_local;
+ 
+ 	/* need at least two pages */
+ 	if (buffer->pages == 1)
+ 		buffer->pages++;
+ 
++	/*
++	 * In case of non-hotplug cpu, if the ring-buffer is allocated
++	 * in early initcall, it will not be notified of secondary cpus.
++	 * In that off case, we need to allocate for all possible cpus.
++	 */
++#ifdef CONFIG_HOTPLUG_CPU
++	get_online_cpus();
++	cpumask_copy(buffer->cpumask, cpu_online_mask);
++#else
+ 	cpumask_copy(buffer->cpumask, cpu_possible_mask);
++#endif
+ 	buffer->cpus = nr_cpu_ids;
+ 
+ 	bsize = sizeof(void *) * nr_cpu_ids;
+@@ -490,6 +649,13 @@ struct ring_buffer *ring_buffer_alloc(un
+ 			goto fail_free_buffers;
+ 	}
+ 
++#ifdef CONFIG_HOTPLUG_CPU
++	buffer->cpu_notify.notifier_call = rb_cpu_notify;
++	buffer->cpu_notify.priority = 0;
++	register_cpu_notifier(&buffer->cpu_notify);
++#endif
++
++	put_online_cpus();
+ 	mutex_init(&buffer->mutex);
+ 
+ 	return buffer;
+@@ -503,6 +669,7 @@ struct ring_buffer *ring_buffer_alloc(un
+ 
+  fail_free_cpumask:
+ 	free_cpumask_var(buffer->cpumask);
++	put_online_cpus();
+ 
+  fail_free_buffer:
+ 	kfree(buffer);
+@@ -519,15 +686,29 @@ ring_buffer_free(struct ring_buffer *buf
+ {
+ 	int cpu;
+ 
++	get_online_cpus();
++
++#ifdef CONFIG_HOTPLUG_CPU
++	unregister_cpu_notifier(&buffer->cpu_notify);
++#endif
++
+ 	for_each_buffer_cpu(buffer, cpu)
+ 		rb_free_cpu_buffer(buffer->buffers[cpu]);
+ 
++	put_online_cpus();
++
+ 	free_cpumask_var(buffer->cpumask);
+ 
+ 	kfree(buffer);
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_free);
+ 
++void ring_buffer_set_clock(struct ring_buffer *buffer,
++			   u64 (*clock)(void))
++{
++	buffer->clock = clock;
++}
++
+ static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer);
+ 
+ static void
+@@ -627,16 +808,15 @@ int ring_buffer_resize(struct ring_buffe
+ 		return size;
+ 
+ 	mutex_lock(&buffer->mutex);
++	get_online_cpus();
+ 
+ 	nr_pages = DIV_ROUND_UP(size, BUF_PAGE_SIZE);
+ 
+ 	if (size < buffer_size) {
+ 
+ 		/* easy case, just free pages */
+-		if (RB_WARN_ON(buffer, nr_pages >= buffer->pages)) {
+-			mutex_unlock(&buffer->mutex);
+-			return -1;
+-		}
++		if (RB_WARN_ON(buffer, nr_pages >= buffer->pages))
++			goto out_fail;
+ 
+ 		rm_pages = buffer->pages - nr_pages;
+ 
+@@ -655,10 +835,8 @@ int ring_buffer_resize(struct ring_buffe
+ 	 * add these pages to the cpu_buffers. Otherwise we just free
+ 	 * them all and return -ENOMEM;
+ 	 */
+-	if (RB_WARN_ON(buffer, nr_pages <= buffer->pages)) {
+-		mutex_unlock(&buffer->mutex);
+-		return -1;
+-	}
++	if (RB_WARN_ON(buffer, nr_pages <= buffer->pages))
++		goto out_fail;
+ 
+ 	new_pages = nr_pages - buffer->pages;
+ 
+@@ -683,13 +861,12 @@ int ring_buffer_resize(struct ring_buffe
+ 		rb_insert_pages(cpu_buffer, &pages, new_pages);
+ 	}
+ 
+-	if (RB_WARN_ON(buffer, !list_empty(&pages))) {
+-		mutex_unlock(&buffer->mutex);
+-		return -1;
+-	}
++	if (RB_WARN_ON(buffer, !list_empty(&pages)))
++		goto out_fail;
+ 
+  out:
+ 	buffer->pages = nr_pages;
++	put_online_cpus();
+ 	mutex_unlock(&buffer->mutex);
+ 
+ 	return size;
+@@ -699,15 +876,20 @@ int ring_buffer_resize(struct ring_buffe
+ 		list_del_init(&bpage->list);
+ 		free_buffer_page(bpage);
+ 	}
++	put_online_cpus();
+ 	mutex_unlock(&buffer->mutex);
+ 	return -ENOMEM;
+-}
+-EXPORT_SYMBOL_GPL(ring_buffer_resize);
+ 
+-static inline int rb_null_event(struct ring_buffer_event *event)
+-{
+-	return event->type == RINGBUF_TYPE_PADDING;
++	/*
++	 * Something went totally wrong, and we are too paranoid
++	 * to even clean up the mess.
++	 */
++ out_fail:
++	put_online_cpus();
++	mutex_unlock(&buffer->mutex);
++	return -1;
+ }
++EXPORT_SYMBOL_GPL(ring_buffer_resize);
+ 
+ static inline void *
+ __rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
+@@ -811,7 +993,7 @@ rb_event_index(struct ring_buffer_event 
+ 	return (addr & ~PAGE_MASK) - (PAGE_SIZE - BUF_PAGE_SIZE);
+ }
+ 
+-static inline int
++static int
+ rb_is_commit(struct ring_buffer_per_cpu *cpu_buffer,
+ 	     struct ring_buffer_event *event)
+ {
+@@ -825,7 +1007,7 @@ rb_is_commit(struct ring_buffer_per_cpu 
+ 		rb_commit_index(cpu_buffer) == index;
+ }
+ 
+-static inline void
++static void
+ rb_set_commit_event(struct ring_buffer_per_cpu *cpu_buffer,
+ 		    struct ring_buffer_event *event)
+ {
+@@ -850,7 +1032,7 @@ rb_set_commit_event(struct ring_buffer_p
+ 	local_set(&cpu_buffer->commit_page->page->commit, index);
+ }
+ 
+-static inline void
++static void
+ rb_set_commit_to_write(struct ring_buffer_per_cpu *cpu_buffer)
+ {
+ 	/*
+@@ -896,7 +1078,7 @@ static void rb_reset_reader_page(struct 
+ 	cpu_buffer->reader_page->read = 0;
+ }
+ 
+-static inline void rb_inc_iter(struct ring_buffer_iter *iter)
++static void rb_inc_iter(struct ring_buffer_iter *iter)
+ {
+ 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ 
+@@ -926,7 +1108,7 @@ static inline void rb_inc_iter(struct ri
+  * and with this, we can determine what to place into the
+  * data field.
+  */
+-static inline void
++static void
+ rb_update_event(struct ring_buffer_event *event,
+ 			 unsigned type, unsigned length)
+ {
+@@ -938,15 +1120,11 @@ rb_update_event(struct ring_buffer_event
+ 		break;
+ 
+ 	case RINGBUF_TYPE_TIME_EXTEND:
+-		event->len =
+-			(RB_LEN_TIME_EXTEND + (RB_ALIGNMENT-1))
+-			>> RB_ALIGNMENT_SHIFT;
++		event->len = DIV_ROUND_UP(RB_LEN_TIME_EXTEND, RB_ALIGNMENT);
+ 		break;
+ 
+ 	case RINGBUF_TYPE_TIME_STAMP:
+-		event->len =
+-			(RB_LEN_TIME_STAMP + (RB_ALIGNMENT-1))
+-			>> RB_ALIGNMENT_SHIFT;
++		event->len = DIV_ROUND_UP(RB_LEN_TIME_STAMP, RB_ALIGNMENT);
+ 		break;
+ 
+ 	case RINGBUF_TYPE_DATA:
+@@ -955,16 +1133,14 @@ rb_update_event(struct ring_buffer_event
+ 			event->len = 0;
+ 			event->array[0] = length;
+ 		} else
+-			event->len =
+-				(length + (RB_ALIGNMENT-1))
+-				>> RB_ALIGNMENT_SHIFT;
++			event->len = DIV_ROUND_UP(length, RB_ALIGNMENT);
+ 		break;
+ 	default:
+ 		BUG();
+ 	}
+ }
+ 
+-static inline unsigned rb_calculate_event_length(unsigned length)
++static unsigned rb_calculate_event_length(unsigned length)
+ {
+ 	struct ring_buffer_event event; /* Used only for sizeof array */
+ 
+@@ -990,6 +1166,7 @@ __rb_reserve_next(struct ring_buffer_per
+ 	struct ring_buffer *buffer = cpu_buffer->buffer;
+ 	struct ring_buffer_event *event;
+ 	unsigned long flags;
++	bool lock_taken = false;
+ 
+ 	commit_page = cpu_buffer->commit_page;
+ 	/* we just need to protect against interrupts */
+@@ -1003,7 +1180,30 @@ __rb_reserve_next(struct ring_buffer_per
+ 		struct buffer_page *next_page = tail_page;
+ 
+ 		local_irq_save(flags);
+-		__raw_spin_lock(&cpu_buffer->lock);
++		/*
++		 * Since the write to the buffer is still not
++		 * fully lockless, we must be careful with NMIs.
++		 * The locks in the writers are taken when a write
++		 * crosses to a new page. The locks protect against
++		 * races with the readers (this will soon be fixed
++		 * with a lockless solution).
++		 *
++		 * Because we can not protect against NMIs, and we
++		 * want to keep traces reentrant, we need to manage
++		 * what happens when we are in an NMI.
++		 *
++		 * NMIs can happen after we take the lock.
++		 * If we are in an NMI, only take the lock
++		 * if it is not already taken. Otherwise
++		 * simply fail.
++		 */
++		if (unlikely(in_nmi())) {
++			if (!__raw_spin_trylock(&cpu_buffer->lock))
++				goto out_reset;
++		} else
++			__raw_spin_lock(&cpu_buffer->lock);
++
++		lock_taken = true;
+ 
+ 		rb_inc_page(cpu_buffer, &next_page);
+ 
+@@ -1012,7 +1212,7 @@ __rb_reserve_next(struct ring_buffer_per
+ 
+ 		/* we grabbed the lock before incrementing */
+ 		if (RB_WARN_ON(cpu_buffer, next_page == reader_page))
+-			goto out_unlock;
++			goto out_reset;
+ 
+ 		/*
+ 		 * If for some reason, we had an interrupt storm that made
+@@ -1021,12 +1221,12 @@ __rb_reserve_next(struct ring_buffer_per
+ 		 */
+ 		if (unlikely(next_page == commit_page)) {
+ 			WARN_ON_ONCE(1);
+-			goto out_unlock;
++			goto out_reset;
+ 		}
+ 
+ 		if (next_page == head_page) {
+ 			if (!(buffer->flags & RB_FL_OVERWRITE))
+-				goto out_unlock;
++				goto out_reset;
+ 
+ 			/* tail_page has not moved yet? */
+ 			if (tail_page == cpu_buffer->tail_page) {
+@@ -1050,7 +1250,7 @@ __rb_reserve_next(struct ring_buffer_per
+ 			cpu_buffer->tail_page = next_page;
+ 
+ 			/* reread the time stamp */
+-			*ts = ring_buffer_time_stamp(cpu_buffer->cpu);
++			*ts = ring_buffer_time_stamp(buffer, cpu_buffer->cpu);
+ 			cpu_buffer->tail_page->page->time_stamp = *ts;
+ 		}
+ 
+@@ -1060,7 +1260,8 @@ __rb_reserve_next(struct ring_buffer_per
+ 		if (tail < BUF_PAGE_SIZE) {
+ 			/* Mark the rest of the page with padding */
+ 			event = __rb_page_index(tail_page, tail);
+-			event->type = RINGBUF_TYPE_PADDING;
++			kmemcheck_annotate_bitfield(event->bitfield);
++			rb_event_set_padding(event);
+ 		}
+ 
+ 		if (tail <= BUF_PAGE_SIZE)
+@@ -1089,6 +1290,7 @@ __rb_reserve_next(struct ring_buffer_per
+ 		return NULL;
+ 
+ 	event = __rb_page_index(tail_page, tail);
++	kmemcheck_annotate_bitfield(event->bitfield);
+ 	rb_update_event(event, type, length);
+ 
+ 	/*
+@@ -1100,12 +1302,13 @@ __rb_reserve_next(struct ring_buffer_per
+ 
+ 	return event;
+ 
+- out_unlock:
++ out_reset:
+ 	/* reset write */
+ 	if (tail <= BUF_PAGE_SIZE)
+ 		local_set(&tail_page->write, tail);
+ 
+-	__raw_spin_unlock(&cpu_buffer->lock);
++	if (likely(lock_taken))
++		__raw_spin_unlock(&cpu_buffer->lock);
+ 	local_irq_restore(flags);
+ 	return NULL;
+ }
+@@ -1192,7 +1395,7 @@ rb_reserve_next_event(struct ring_buffer
+ 	if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
+ 		return NULL;
+ 
+-	ts = ring_buffer_time_stamp(cpu_buffer->cpu);
++	ts = ring_buffer_time_stamp(cpu_buffer->buffer, cpu_buffer->cpu);
+ 
+ 	/*
+ 	 * Only the first commit can update the timestamp.
+@@ -1265,7 +1468,6 @@ static DEFINE_PER_CPU(int, rb_need_resch
+  * ring_buffer_lock_reserve - reserve a part of the buffer
+  * @buffer: the ring buffer to reserve from
+  * @length: the length of the data to reserve (excluding event header)
+- * @flags: a pointer to save the interrupt flags
+  *
+  * Returns a reseverd event on the ring buffer to copy directly to.
+  * The user of this interface will need to get the body to write into
+@@ -1278,9 +1480,7 @@ static DEFINE_PER_CPU(int, rb_need_resch
+  * If NULL is returned, then nothing has been allocated or locked.
+  */
+ struct ring_buffer_event *
+-ring_buffer_lock_reserve(struct ring_buffer *buffer,
+-			 unsigned long length,
+-			 unsigned long *flags)
++ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
+ {
+ 	struct ring_buffer_per_cpu *cpu_buffer;
+ 	struct ring_buffer_event *event;
+@@ -1347,15 +1547,13 @@ static void rb_commit(struct ring_buffer
+  * ring_buffer_unlock_commit - commit a reserved
+  * @buffer: The buffer to commit to
+  * @event: The event pointer to commit.
+- * @flags: the interrupt flags received from ring_buffer_lock_reserve.
+  *
+  * This commits the data to the ring buffer, and releases any locks held.
+  *
+  * Must be paired with ring_buffer_lock_reserve.
+  */
+ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
+-			      struct ring_buffer_event *event,
+-			      unsigned long flags)
++			      struct ring_buffer_event *event)
+ {
+ 	struct ring_buffer_per_cpu *cpu_buffer;
+ 	int cpu = raw_smp_processor_id();
+@@ -1438,7 +1636,7 @@ int ring_buffer_write(struct ring_buffer
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_write);
+ 
+-static inline int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
++static int rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer)
+ {
+ 	struct buffer_page *reader = cpu_buffer->reader_page;
+ 	struct buffer_page *head = cpu_buffer->head_page;
+@@ -1528,12 +1726,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_record_ena
+ unsigned long ring_buffer_entries_cpu(struct ring_buffer *buffer, int cpu)
+ {
+ 	struct ring_buffer_per_cpu *cpu_buffer;
++	unsigned long ret;
+ 
+ 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ 		return 0;
+ 
+ 	cpu_buffer = buffer->buffers[cpu];
+-	return cpu_buffer->entries;
++	ret = cpu_buffer->entries;
++
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_entries_cpu);
+ 
+@@ -1545,12 +1746,15 @@ EXPORT_SYMBOL_GPL(ring_buffer_entries_cp
+ unsigned long ring_buffer_overrun_cpu(struct ring_buffer *buffer, int cpu)
+ {
+ 	struct ring_buffer_per_cpu *cpu_buffer;
++	unsigned long ret;
+ 
+ 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ 		return 0;
+ 
+ 	cpu_buffer = buffer->buffers[cpu];
+-	return cpu_buffer->overrun;
++	ret = cpu_buffer->overrun;
++
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_overrun_cpu);
+ 
+@@ -1627,9 +1831,14 @@ static void rb_iter_reset(struct ring_bu
+  */
+ void ring_buffer_iter_reset(struct ring_buffer_iter *iter)
+ {
+-	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
++	struct ring_buffer_per_cpu *cpu_buffer;
+ 	unsigned long flags;
+ 
++	if (!iter)
++		return;
++
++	cpu_buffer = iter->cpu_buffer;
++
+ 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ 	rb_iter_reset(iter);
+ 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+@@ -1803,7 +2012,7 @@ static void rb_advance_reader(struct rin
+ 
+ 	event = rb_reader_event(cpu_buffer);
+ 
+-	if (event->type == RINGBUF_TYPE_DATA)
++	if (event->type == RINGBUF_TYPE_DATA || rb_discarded_event(event))
+ 		cpu_buffer->entries--;
+ 
+ 	rb_update_read_stamp(cpu_buffer, event);
+@@ -1864,9 +2073,6 @@ rb_buffer_peek(struct ring_buffer *buffe
+ 	struct buffer_page *reader;
+ 	int nr_loops = 0;
+ 
+-	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+-		return NULL;
+-
+ 	cpu_buffer = buffer->buffers[cpu];
+ 
+  again:
+@@ -1889,9 +2095,18 @@ rb_buffer_peek(struct ring_buffer *buffe
+ 
+ 	switch (event->type) {
+ 	case RINGBUF_TYPE_PADDING:
+-		RB_WARN_ON(cpu_buffer, 1);
++		if (rb_null_event(event))
++			RB_WARN_ON(cpu_buffer, 1);
++		/*
++		 * Because the writer could be discarding every
++		 * event it creates (which would probably be bad)
++		 * if we were to go back to "again" then we may never
++		 * catch up, and will trigger the warn on, or lock
++		 * the box. Return the padding, and we will release
++		 * the current locks, and try again.
++		 */
+ 		rb_advance_reader(cpu_buffer);
+-		return NULL;
++		return event;
+ 
+ 	case RINGBUF_TYPE_TIME_EXTEND:
+ 		/* Internal data, OK to advance */
+@@ -1906,7 +2121,8 @@ rb_buffer_peek(struct ring_buffer *buffe
+ 	case RINGBUF_TYPE_DATA:
+ 		if (ts) {
+ 			*ts = cpu_buffer->read_stamp + event->time_delta;
+-			ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
++			ring_buffer_normalize_time_stamp(buffer,
++							 cpu_buffer->cpu, ts);
+ 		}
+ 		return event;
+ 
+@@ -1951,8 +2167,12 @@ rb_iter_peek(struct ring_buffer_iter *it
+ 
+ 	switch (event->type) {
+ 	case RINGBUF_TYPE_PADDING:
+-		rb_inc_iter(iter);
+-		goto again;
++		if (rb_null_event(event)) {
++			rb_inc_iter(iter);
++			goto again;
++		}
++		rb_advance_iter(iter);
++		return event;
+ 
+ 	case RINGBUF_TYPE_TIME_EXTEND:
+ 		/* Internal data, OK to advance */
+@@ -1967,7 +2187,8 @@ rb_iter_peek(struct ring_buffer_iter *it
+ 	case RINGBUF_TYPE_DATA:
+ 		if (ts) {
+ 			*ts = iter->read_stamp + event->time_delta;
+-			ring_buffer_normalize_time_stamp(cpu_buffer->cpu, ts);
++			ring_buffer_normalize_time_stamp(buffer,
++							 cpu_buffer->cpu, ts);
+ 		}
+ 		return event;
+ 
+@@ -1995,10 +2216,19 @@ ring_buffer_peek(struct ring_buffer *buf
+ 	struct ring_buffer_event *event;
+ 	unsigned long flags;
+ 
++	if (!cpumask_test_cpu(cpu, buffer->cpumask))
++		return NULL;
++
++ again:
+ 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ 	event = rb_buffer_peek(buffer, cpu, ts);
+ 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ 
++	if (event && event->type == RINGBUF_TYPE_PADDING) {
++		cpu_relax();
++		goto again;
++	}
++
+ 	return event;
+ }
+ 
+@@ -2017,10 +2247,16 @@ ring_buffer_iter_peek(struct ring_buffer
+ 	struct ring_buffer_event *event;
+ 	unsigned long flags;
+ 
++ again:
+ 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ 	event = rb_iter_peek(iter, ts);
+ 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ 
++	if (event && event->type == RINGBUF_TYPE_PADDING) {
++		cpu_relax();
++		goto again;
++	}
++
+ 	return event;
+ }
+ 
+@@ -2035,24 +2271,37 @@ ring_buffer_iter_peek(struct ring_buffer
+ struct ring_buffer_event *
+ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
+ {
+-	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+-	struct ring_buffer_event *event;
++	struct ring_buffer_per_cpu *cpu_buffer;
++	struct ring_buffer_event *event = NULL;
+ 	unsigned long flags;
+ 
++ again:
++	/* might be called in atomic */
++	preempt_disable();
++
+ 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+-		return NULL;
++		goto out;
+ 
++	cpu_buffer = buffer->buffers[cpu];
+ 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ 
+ 	event = rb_buffer_peek(buffer, cpu, ts);
+ 	if (!event)
+-		goto out;
++		goto out_unlock;
+ 
+ 	rb_advance_reader(cpu_buffer);
+ 
+- out:
++ out_unlock:
+ 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ 
++ out:
++	preempt_enable();
++
++	if (event && event->type == RINGBUF_TYPE_PADDING) {
++		cpu_relax();
++		goto again;
++	}
++
+ 	return event;
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_consume);
+@@ -2131,6 +2380,7 @@ ring_buffer_read(struct ring_buffer_iter
+ 	struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer;
+ 	unsigned long flags;
+ 
++ again:
+ 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ 	event = rb_iter_peek(iter, ts);
+ 	if (!event)
+@@ -2140,6 +2390,11 @@ ring_buffer_read(struct ring_buffer_iter
+  out:
+ 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ 
++	if (event && event->type == RINGBUF_TYPE_PADDING) {
++		cpu_relax();
++		goto again;
++	}
++
+ 	return event;
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_read);
+@@ -2232,6 +2487,7 @@ int ring_buffer_empty(struct ring_buffer
+ 		if (!rb_per_cpu_empty(cpu_buffer))
+ 			return 0;
+ 	}
++
+ 	return 1;
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_empty);
+@@ -2244,12 +2500,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_empty);
+ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu)
+ {
+ 	struct ring_buffer_per_cpu *cpu_buffer;
++	int ret;
+ 
+ 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ 		return 1;
+ 
+ 	cpu_buffer = buffer->buffers[cpu];
+-	return rb_per_cpu_empty(cpu_buffer);
++	ret = rb_per_cpu_empty(cpu_buffer);
++
++
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_empty_cpu);
+ 
+@@ -2268,18 +2528,36 @@ int ring_buffer_swap_cpu(struct ring_buf
+ {
+ 	struct ring_buffer_per_cpu *cpu_buffer_a;
+ 	struct ring_buffer_per_cpu *cpu_buffer_b;
++	int ret = -EINVAL;
+ 
+ 	if (!cpumask_test_cpu(cpu, buffer_a->cpumask) ||
+ 	    !cpumask_test_cpu(cpu, buffer_b->cpumask))
+-		return -EINVAL;
++		goto out;
+ 
+ 	/* At least make sure the two buffers are somewhat the same */
+ 	if (buffer_a->pages != buffer_b->pages)
+-		return -EINVAL;
++		goto out;
++
++	ret = -EAGAIN;
++
++	if (ring_buffer_flags != RB_BUFFERS_ON)
++		goto out;
++
++	if (atomic_read(&buffer_a->record_disabled))
++		goto out;
++
++	if (atomic_read(&buffer_b->record_disabled))
++		goto out;
+ 
+ 	cpu_buffer_a = buffer_a->buffers[cpu];
+ 	cpu_buffer_b = buffer_b->buffers[cpu];
+ 
++	if (atomic_read(&cpu_buffer_a->record_disabled))
++		goto out;
++
++	if (atomic_read(&cpu_buffer_b->record_disabled))
++		goto out;
++
+ 	/*
+ 	 * We can't do a synchronize_sched here because this
+ 	 * function can be called in atomic context.
+@@ -2298,18 +2576,21 @@ int ring_buffer_swap_cpu(struct ring_buf
+ 	atomic_dec(&cpu_buffer_a->record_disabled);
+ 	atomic_dec(&cpu_buffer_b->record_disabled);
+ 
+-	return 0;
++	ret = 0;
++out:
++	return ret;
+ }
+ EXPORT_SYMBOL_GPL(ring_buffer_swap_cpu);
+ 
+ static void rb_remove_entries(struct ring_buffer_per_cpu *cpu_buffer,
+-			      struct buffer_data_page *bpage)
++			      struct buffer_data_page *bpage,
++			      unsigned int offset)
+ {
+ 	struct ring_buffer_event *event;
+ 	unsigned long head;
+ 
+ 	__raw_spin_lock(&cpu_buffer->lock);
+-	for (head = 0; head < local_read(&bpage->commit);
++	for (head = offset; head < local_read(&bpage->commit);
+ 	     head += rb_event_length(event)) {
+ 
+ 		event = __rb_data_page_index(bpage, head);
+@@ -2340,8 +2621,8 @@ static void rb_remove_entries(struct rin
+  */
+ void *ring_buffer_alloc_read_page(struct ring_buffer *buffer)
+ {
+-	unsigned long addr;
+ 	struct buffer_data_page *bpage;
++	unsigned long addr;
+ 
+ 	addr = __get_free_page(GFP_KERNEL);
+ 	if (!addr)
+@@ -2349,6 +2630,8 @@ void *ring_buffer_alloc_read_page(struct
+ 
+ 	bpage = (void *)addr;
+ 
++	rb_init_page(bpage);
++
+ 	return bpage;
+ }
+ 
+@@ -2368,6 +2651,7 @@ void ring_buffer_free_read_page(struct r
+  * ring_buffer_read_page - extract a page from the ring buffer
+  * @buffer: buffer to extract from
+  * @data_page: the page to use allocated from ring_buffer_alloc_read_page
++ * @len: amount to extract
+  * @cpu: the cpu of the buffer to extract
+  * @full: should the extraction only happen when the page is full.
+  *
+@@ -2377,12 +2661,12 @@ void ring_buffer_free_read_page(struct r
+  * to swap with a page in the ring buffer.
+  *
+  * for example:
+- *	rpage = ring_buffer_alloc_page(buffer);
++ *	rpage = ring_buffer_alloc_read_page(buffer);
+  *	if (!rpage)
+  *		return error;
+- *	ret = ring_buffer_read_page(buffer, &rpage, cpu, 0);
+- *	if (ret)
+- *		process_page(rpage);
++ *	ret = ring_buffer_read_page(buffer, &rpage, len, cpu, 0);
++ *	if (ret >= 0)
++ *		process_page(rpage, ret);
+  *
+  * When @full is set, the function will not return true unless
+  * the writer is off the reader page.
+@@ -2393,72 +2677,118 @@ void ring_buffer_free_read_page(struct r
+  *  responsible for that.
+  *
+  * Returns:
+- *  1 if data has been transferred
+- *  0 if no data has been transferred.
++ *  >=0 if data has been transferred, returns the offset of consumed data.
++ *  <0 if no data has been transferred.
+  */
+ int ring_buffer_read_page(struct ring_buffer *buffer,
+-			    void **data_page, int cpu, int full)
++			  void **data_page, size_t len, int cpu, int full)
+ {
+ 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+ 	struct ring_buffer_event *event;
+ 	struct buffer_data_page *bpage;
++	struct buffer_page *reader;
+ 	unsigned long flags;
+-	int ret = 0;
++	unsigned int commit;
++	unsigned int read;
++	u64 save_timestamp;
++	int ret = -1;
++
++	if (!cpumask_test_cpu(cpu, buffer->cpumask))
++		goto out;
++
++	/*
++	 * If len is not big enough to hold the page header, then
++	 * we can not copy anything.
++	 */
++	if (len <= BUF_PAGE_HDR_SIZE)
++		goto out;
++
++	len -= BUF_PAGE_HDR_SIZE;
+ 
+ 	if (!data_page)
+-		return 0;
++		goto out;
+ 
+ 	bpage = *data_page;
+ 	if (!bpage)
+-		return 0;
++		goto out;
+ 
+ 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ 
+-	/*
+-	 * rb_buffer_peek will get the next ring buffer if
+-	 * the current reader page is empty.
+-	 */
+-	event = rb_buffer_peek(buffer, cpu, NULL);
+-	if (!event)
+-		goto out;
++	reader = rb_get_reader_page(cpu_buffer);
++	if (!reader)
++		goto out_unlock;
++
++	event = rb_reader_event(cpu_buffer);
++
++	read = reader->read;
++	commit = rb_page_commit(reader);
+ 
+-	/* check for data */
+-	if (!local_read(&cpu_buffer->reader_page->page->commit))
+-		goto out;
+ 	/*
+-	 * If the writer is already off of the read page, then simply
+-	 * switch the read page with the given page. Otherwise
+-	 * we need to copy the data from the reader to the writer.
+-	 */
+-	if (cpu_buffer->reader_page == cpu_buffer->commit_page) {
+-		unsigned int read = cpu_buffer->reader_page->read;
++	 * If this page has been partially read or
++	 * if len is not big enough to read the rest of the page or
++	 * a writer is still on the page, then
++	 * we must copy the data from the page to the buffer.
++	 * Otherwise, we can simply swap the page with the one passed in.
++	 */
++	if (read || (len < (commit - read)) ||
++	    cpu_buffer->reader_page == cpu_buffer->commit_page) {
++		struct buffer_data_page *rpage = cpu_buffer->reader_page->page;
++		unsigned int rpos = read;
++		unsigned int pos = 0;
++		unsigned int size;
+ 
+ 		if (full)
+-			goto out;
+-		/* The writer is still on the reader page, we must copy */
+-		bpage = cpu_buffer->reader_page->page;
+-		memcpy(bpage->data,
+-		       cpu_buffer->reader_page->page->data + read,
+-		       local_read(&bpage->commit) - read);
++			goto out_unlock;
++
++		if (len > (commit - read))
++			len = (commit - read);
++
++		size = rb_event_length(event);
++
++		if (len < size)
++			goto out_unlock;
++
++		/* save the current timestamp, since the user will need it */
++		save_timestamp = cpu_buffer->read_stamp;
++
++		/* Need to copy one event at a time */
++		do {
++			memcpy(bpage->data + pos, rpage->data + rpos, size);
++
++			len -= size;
+ 
+-		/* consume what was read */
+-		cpu_buffer->reader_page += read;
++			rb_advance_reader(cpu_buffer);
++			rpos = reader->read;
++			pos += size;
+ 
++			event = rb_reader_event(cpu_buffer);
++			size = rb_event_length(event);
++		} while (len > size);
++
++		/* update bpage */
++		local_set(&bpage->commit, pos);
++		bpage->time_stamp = save_timestamp;
++
++		/* we copied everything to the beginning */
++		read = 0;
+ 	} else {
+ 		/* swap the pages */
+ 		rb_init_page(bpage);
+-		bpage = cpu_buffer->reader_page->page;
+-		cpu_buffer->reader_page->page = *data_page;
+-		cpu_buffer->reader_page->read = 0;
++		bpage = reader->page;
++		reader->page = *data_page;
++		local_set(&reader->write, 0);
++		reader->read = 0;
+ 		*data_page = bpage;
++
++		/* update the entry counter */
++		rb_remove_entries(cpu_buffer, bpage, read);
+ 	}
+-	ret = 1;
++	ret = read;
+ 
+-	/* update the entry counter */
+-	rb_remove_entries(cpu_buffer, bpage);
+- out:
++ out_unlock:
+ 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+ 
++ out:
+ 	return ret;
+ }
+ 
+@@ -2466,7 +2796,7 @@ static ssize_t
+ rb_simple_read(struct file *filp, char __user *ubuf,
+ 	       size_t cnt, loff_t *ppos)
+ {
+-	long *p = filp->private_data;
++	unsigned long *p = filp->private_data;
+ 	char buf[64];
+ 	int r;
+ 
+@@ -2482,9 +2812,9 @@ static ssize_t
+ rb_simple_write(struct file *filp, const char __user *ubuf,
+ 		size_t cnt, loff_t *ppos)
+ {
+-	long *p = filp->private_data;
++	unsigned long *p = filp->private_data;
+ 	char buf[64];
+-	long val;
++	unsigned long val;
+ 	int ret;
+ 
+ 	if (cnt >= sizeof(buf))
+@@ -2509,7 +2839,7 @@ rb_simple_write(struct file *filp, const
+ 	return cnt;
+ }
+ 
+-static struct file_operations rb_simple_fops = {
++static const struct file_operations rb_simple_fops = {
+ 	.open		= tracing_open_generic,
+ 	.read		= rb_simple_read,
+ 	.write		= rb_simple_write,
+@@ -2532,3 +2862,42 @@ static __init int rb_init_debugfs(void)
+ }
+ 
+ fs_initcall(rb_init_debugfs);
++
++#ifdef CONFIG_HOTPLUG_CPU
++static int rb_cpu_notify(struct notifier_block *self,
++			 unsigned long action, void *hcpu)
++{
++	struct ring_buffer *buffer =
++		container_of(self, struct ring_buffer, cpu_notify);
++	long cpu = (long)hcpu;
++
++	switch (action) {
++	case CPU_UP_PREPARE:
++	case CPU_UP_PREPARE_FROZEN:
++		if (cpu_isset(cpu, *buffer->cpumask))
++			return NOTIFY_OK;
++
++		buffer->buffers[cpu] =
++			rb_allocate_cpu_buffer(buffer, cpu);
++		if (!buffer->buffers[cpu]) {
++			WARN(1, "failed to allocate ring buffer on CPU %ld\n",
++			     cpu);
++			return NOTIFY_OK;
++		}
++		smp_wmb();
++		cpu_set(cpu, *buffer->cpumask);
++		break;
++	case CPU_DOWN_PREPARE:
++	case CPU_DOWN_PREPARE_FROZEN:
++		/*
++		 * Do nothing.
++		 *  If we were to free the buffer, then the user would
++		 *  lose any trace that was in the buffer.
++		 */
++		break;
++	default:
++		break;
++	}
++	return NOTIFY_OK;
++}
++#endif
+Index: linux-2.6-tip/kernel/trace/trace.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace.c
++++ linux-2.6-tip/kernel/trace/trace.c
+@@ -11,32 +11,33 @@
+  *  Copyright (C) 2004-2006 Ingo Molnar
+  *  Copyright (C) 2004 William Lee Irwin III
+  */
++#include <linux/ring_buffer.h>
+ #include <linux/utsrelease.h>
++#include <linux/stacktrace.h>
++#include <linux/writeback.h>
+ #include <linux/kallsyms.h>
+ #include <linux/seq_file.h>
+ #include <linux/notifier.h>
++#include <linux/irqflags.h>
+ #include <linux/debugfs.h>
+ #include <linux/pagemap.h>
+ #include <linux/hardirq.h>
+ #include <linux/linkage.h>
+ #include <linux/uaccess.h>
++#include <linux/kprobes.h>
+ #include <linux/ftrace.h>
+ #include <linux/module.h>
+ #include <linux/percpu.h>
++#include <linux/splice.h>
+ #include <linux/kdebug.h>
+ #include <linux/ctype.h>
+ #include <linux/init.h>
+ #include <linux/poll.h>
+ #include <linux/gfp.h>
+ #include <linux/fs.h>
+-#include <linux/kprobes.h>
+-#include <linux/writeback.h>
+-
+-#include <linux/stacktrace.h>
+-#include <linux/ring_buffer.h>
+-#include <linux/irqflags.h>
+ 
+ #include "trace.h"
++#include "trace_output.h"
+ 
+ #define TRACE_BUFFER_FLAGS	(RB_FL_OVERWRITE)
+ 
+@@ -44,14 +45,25 @@ unsigned long __read_mostly	tracing_max_
+ unsigned long __read_mostly	tracing_thresh;
+ 
+ /*
++ * On boot up, the ring buffer is set to the minimum size, so that
++ * we do not waste memory on systems that are not using tracing.
++ */
++static int ring_buffer_expanded;
++
++/*
+  * We need to change this state when a selftest is running.
+  * A selftest will lurk into the ring-buffer to count the
+  * entries inserted during the selftest although some concurrent
+- * insertions into the ring-buffer such as ftrace_printk could occurred
++ * insertions into the ring-buffer such as trace_printk could occurred
+  * at the same time, giving false positive or negative results.
+  */
+ static bool __read_mostly tracing_selftest_running;
+ 
++/*
++ * If a tracer is running, we do not want to run SELFTEST.
++ */
++static bool __read_mostly tracing_selftest_disabled;
++
+ /* For tracers that don't implement custom flags */
+ static struct tracer_opt dummy_tracer_opt[] = {
+ 	{ }
+@@ -73,7 +85,7 @@ static int dummy_set_flag(u32 old_flags,
+  * of the tracer is successful. But that is the only place that sets
+  * this back to zero.
+  */
+-int tracing_disabled = 1;
++static int tracing_disabled = 1;
+ 
+ static DEFINE_PER_CPU(local_t, ftrace_cpu_disabled);
+ 
+@@ -91,6 +103,9 @@ static inline void ftrace_enable_cpu(voi
+ 
+ static cpumask_var_t __read_mostly	tracing_buffer_mask;
+ 
++/* Define which cpu buffers are currently read in trace_pipe */
++static cpumask_var_t			tracing_reader_cpumask;
++
+ #define for_each_tracing_cpu(cpu)	\
+ 	for_each_cpu(cpu, tracing_buffer_mask)
+ 
+@@ -109,14 +124,21 @@ static cpumask_var_t __read_mostly	traci
+  */
+ int ftrace_dump_on_oops;
+ 
+-static int tracing_set_tracer(char *buf);
++static int tracing_set_tracer(const char *buf);
++
++#define BOOTUP_TRACER_SIZE		100
++static char bootup_tracer_buf[BOOTUP_TRACER_SIZE] __initdata;
++static char *default_bootup_tracer;
+ 
+ static int __init set_ftrace(char *str)
+ {
+-	tracing_set_tracer(str);
++	strncpy(bootup_tracer_buf, str, BOOTUP_TRACER_SIZE);
++	default_bootup_tracer = bootup_tracer_buf;
++	/* We are using ftrace early, expand it */
++	ring_buffer_expanded = 1;
+ 	return 1;
+ }
+-__setup("ftrace", set_ftrace);
++__setup("ftrace=", set_ftrace);
+ 
+ static int __init set_ftrace_dump_on_oops(char *str)
+ {
+@@ -133,13 +155,6 @@ ns2usecs(cycle_t nsec)
+ 	return nsec;
+ }
+ 
+-cycle_t ftrace_now(int cpu)
+-{
+-	u64 ts = ring_buffer_time_stamp(cpu);
+-	ring_buffer_normalize_time_stamp(cpu, &ts);
+-	return ts;
+-}
+-
+ /*
+  * The global_trace is the descriptor that holds the tracing
+  * buffers for the live tracing. For each CPU, it contains
+@@ -156,6 +171,20 @@ static struct trace_array	global_trace;
+ 
+ static DEFINE_PER_CPU(struct trace_array_cpu, global_trace_cpu);
+ 
++cycle_t ftrace_now(int cpu)
++{
++	u64 ts;
++
++	/* Early boot up does not have a buffer yet */
++	if (!global_trace.buffer)
++		return trace_clock_local();
++
++	ts = ring_buffer_time_stamp(global_trace.buffer, cpu);
++	ring_buffer_normalize_time_stamp(global_trace.buffer, cpu, &ts);
++
++	return ts;
++}
++
+ /*
+  * The max_tr is used to snapshot the global_trace when a maximum
+  * latency is reached. Some tracers will use this to store a maximum
+@@ -186,9 +215,6 @@ int tracing_is_enabled(void)
+ 	return tracer_enabled;
+ }
+ 
+-/* function tracing enabled */
+-int				ftrace_function_enabled;
+-
+ /*
+  * trace_buf_size is the size in bytes that is allocated
+  * for a buffer. Note, the number of bytes is always rounded
+@@ -229,7 +255,8 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wai
+ 
+ /* trace_flags holds trace_options default values */
+ unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
+-	TRACE_ITER_ANNOTATE;
++	TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
++	TRACE_ITER_GRAPH_TIME;
+ 
+ /**
+  * trace_wake_up - wake up tasks waiting for trace input
+@@ -239,6 +266,10 @@ unsigned long trace_flags = TRACE_ITER_P
+  */
+ void trace_wake_up(void)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	if (in_atomic() || irqs_disabled())
++		return;
++#endif
+ 	/*
+ 	 * The runqueue_is_locked() can fail, but this is the best we
+ 	 * have for now:
+@@ -280,13 +311,18 @@ static const char *trace_options[] = {
+ 	"block",
+ 	"stacktrace",
+ 	"sched-tree",
+-	"ftrace_printk",
++	"trace_printk",
+ 	"ftrace_preempt",
+ 	"branch",
+ 	"annotate",
+ 	"userstacktrace",
+ 	"sym-userobj",
+ 	"printk-msg-only",
++	"context-info",
++	"latency-format",
++	"global-clock",
++	"sleep-time",
++	"graph-time",
+ 	NULL
+ };
+ 
+@@ -299,8 +335,7 @@ static const char *trace_options[] = {
+  * This is defined as a raw_spinlock_t in order to help
+  * with performance when lockdep debugging is enabled.
+  */
+-static raw_spinlock_t ftrace_max_lock =
+-	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
++static __raw_spinlock_t ftrace_max_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ 
+ /*
+  * Copy the new maximum trace into the separate maximum-trace
+@@ -326,146 +361,37 @@ __update_max_tr(struct trace_array *tr, 
+ 	data->rt_priority = tsk->rt_priority;
+ 
+ 	/* record this tasks comm */
+-	tracing_record_cmdline(current);
++	tracing_record_cmdline(tsk);
+ }
+ 
+-/**
+- * trace_seq_printf - sequence printing of trace information
+- * @s: trace sequence descriptor
+- * @fmt: printf format string
+- *
+- * The tracer may use either sequence operations or its own
+- * copy to user routines. To simplify formating of a trace
+- * trace_seq_printf is used to store strings into a special
+- * buffer (@s). Then the output may be either used by
+- * the sequencer or pulled into another buffer.
+- */
+-int
+-trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
++ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
+ {
+-	int len = (PAGE_SIZE - 1) - s->len;
+-	va_list ap;
++	int len;
+ 	int ret;
+ 
+-	if (!len)
+-		return 0;
+-
+-	va_start(ap, fmt);
+-	ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+-	va_end(ap);
+-
+-	/* If we can't write it all, don't bother writing anything */
+-	if (ret >= len)
+-		return 0;
+-
+-	s->len += ret;
+-
+-	return len;
+-}
+-
+-/**
+- * trace_seq_puts - trace sequence printing of simple string
+- * @s: trace sequence descriptor
+- * @str: simple string to record
+- *
+- * The tracer may use either the sequence operations or its own
+- * copy to user routines. This function records a simple string
+- * into a special buffer (@s) for later retrieval by a sequencer
+- * or other mechanism.
+- */
+-static int
+-trace_seq_puts(struct trace_seq *s, const char *str)
+-{
+-	int len = strlen(str);
+-
+-	if (len > ((PAGE_SIZE - 1) - s->len))
+-		return 0;
+-
+-	memcpy(s->buffer + s->len, str, len);
+-	s->len += len;
+-
+-	return len;
+-}
+-
+-static int
+-trace_seq_putc(struct trace_seq *s, unsigned char c)
+-{
+-	if (s->len >= (PAGE_SIZE - 1))
+-		return 0;
+-
+-	s->buffer[s->len++] = c;
+-
+-	return 1;
+-}
+-
+-static int
+-trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
+-{
+-	if (len > ((PAGE_SIZE - 1) - s->len))
++	if (!cnt)
+ 		return 0;
+ 
+-	memcpy(s->buffer + s->len, mem, len);
+-	s->len += len;
+-
+-	return len;
+-}
+-
+-#define MAX_MEMHEX_BYTES	8
+-#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
+-
+-static int
+-trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
+-{
+-	unsigned char hex[HEX_CHARS];
+-	unsigned char *data = mem;
+-	int i, j;
+-
+-#ifdef __BIG_ENDIAN
+-	for (i = 0, j = 0; i < len; i++) {
+-#else
+-	for (i = len-1, j = 0; i >= 0; i--) {
+-#endif
+-		hex[j++] = hex_asc_hi(data[i]);
+-		hex[j++] = hex_asc_lo(data[i]);
+-	}
+-	hex[j++] = ' ';
+-
+-	return trace_seq_putmem(s, hex, j);
+-}
+-
+-static int
+-trace_seq_path(struct trace_seq *s, struct path *path)
+-{
+-	unsigned char *p;
++	if (s->len <= s->readpos)
++		return -EBUSY;
+ 
+-	if (s->len >= (PAGE_SIZE - 1))
+-		return 0;
+-	p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
+-	if (!IS_ERR(p)) {
+-		p = mangle_path(s->buffer + s->len, p, "\n");
+-		if (p) {
+-			s->len = p - s->buffer;
+-			return 1;
+-		}
+-	} else {
+-		s->buffer[s->len++] = '?';
+-		return 1;
+-	}
++	len = s->len - s->readpos;
++	if (cnt > len)
++		cnt = len;
++	ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
++	if (ret == cnt)
++		return -EFAULT;
+ 
+-	return 0;
+-}
++	cnt -= ret;
+ 
+-static void
+-trace_seq_reset(struct trace_seq *s)
+-{
+-	s->len = 0;
+-	s->readpos = 0;
++	s->readpos += cnt;
++	return cnt;
+ }
+ 
+-ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf, size_t cnt)
++static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
+ {
+ 	int len;
+-	int ret;
++	void *ret;
+ 
+ 	if (s->len <= s->readpos)
+ 		return -EBUSY;
+@@ -473,25 +399,14 @@ ssize_t trace_seq_to_user(struct trace_s
+ 	len = s->len - s->readpos;
+ 	if (cnt > len)
+ 		cnt = len;
+-	ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
+-	if (ret)
++	ret = memcpy(buf, s->buffer + s->readpos, cnt);
++	if (!ret)
+ 		return -EFAULT;
+ 
+-	s->readpos += len;
++	s->readpos += cnt;
+ 	return cnt;
+ }
+ 
+-static void
+-trace_print_seq(struct seq_file *m, struct trace_seq *s)
+-{
+-	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
+-
+-	s->buffer[len] = 0;
+-	seq_puts(m, s->buffer);
+-
+-	trace_seq_reset(s);
+-}
+-
+ /**
+  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
+  * @tr: tracer
+@@ -543,7 +458,7 @@ update_max_tr_single(struct trace_array 
+ 
+ 	ftrace_enable_cpu();
+ 
+-	WARN_ON_ONCE(ret);
++	WARN_ON_ONCE(ret && ret != -EAGAIN);
+ 
+ 	__update_max_tr(tr, tsk, cpu);
+ 	__raw_spin_unlock(&ftrace_max_lock);
+@@ -556,6 +471,8 @@ update_max_tr_single(struct trace_array 
+  * Register a new plugin tracer.
+  */
+ int register_tracer(struct tracer *type)
++__releases(kernel_lock)
++__acquires(kernel_lock)
+ {
+ 	struct tracer *t;
+ 	int len;
+@@ -594,9 +511,12 @@ int register_tracer(struct tracer *type)
+ 	else
+ 		if (!type->flags->opts)
+ 			type->flags->opts = dummy_tracer_opt;
++	if (!type->wait_pipe)
++		type->wait_pipe = default_wait_pipe;
++
+ 
+ #ifdef CONFIG_FTRACE_STARTUP_TEST
+-	if (type->selftest) {
++	if (type->selftest && !tracing_selftest_disabled) {
+ 		struct tracer *saved_tracer = current_trace;
+ 		struct trace_array *tr = &global_trace;
+ 		int i;
+@@ -638,8 +558,26 @@ int register_tracer(struct tracer *type)
+  out:
+ 	tracing_selftest_running = false;
+ 	mutex_unlock(&trace_types_lock);
+-	lock_kernel();
+ 
++	if (ret || !default_bootup_tracer)
++		goto out_unlock;
++
++	if (strncmp(default_bootup_tracer, type->name, BOOTUP_TRACER_SIZE))
++		goto out_unlock;
++
++	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
++	/* Do we want this tracer to start on bootup? */
++	tracing_set_tracer(type->name);
++	default_bootup_tracer = NULL;
++	/* disable other selftests, since this will break it. */
++	tracing_selftest_disabled = 1;
++#ifdef CONFIG_FTRACE_STARTUP_TEST
++	printk(KERN_INFO "Disabling FTRACE selftests due to running tracer '%s'\n",
++	       type->name);
++#endif
++
++ out_unlock:
++	lock_kernel();
+ 	return ret;
+ }
+ 
+@@ -658,6 +596,15 @@ void unregister_tracer(struct tracer *ty
+ 
+  found:
+ 	*t = (*t)->next;
++
++	if (type == current_trace && tracer_enabled) {
++		tracer_enabled = 0;
++		tracing_stop();
++		if (current_trace->stop)
++			current_trace->stop(&global_trace);
++		current_trace = &nop_trace;
++	}
++
+ 	if (strlen(type->name) != max_tracer_type_len)
+ 		goto out;
+ 
+@@ -689,24 +636,25 @@ void tracing_reset_online_cpus(struct tr
+ }
+ 
+ #define SAVED_CMDLINES 128
++#define NO_CMDLINE_MAP UINT_MAX
+ static unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
+ static unsigned map_cmdline_to_pid[SAVED_CMDLINES];
+ static char saved_cmdlines[SAVED_CMDLINES][TASK_COMM_LEN];
+ static int cmdline_idx;
+-static DEFINE_SPINLOCK(trace_cmdline_lock);
++static __raw_spinlock_t trace_cmdline_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ 
+ /* temporary disable recording */
+-atomic_t trace_record_cmdline_disabled __read_mostly;
++static atomic_t trace_record_cmdline_disabled __read_mostly;
+ 
+ static void trace_init_cmdlines(void)
+ {
+-	memset(&map_pid_to_cmdline, -1, sizeof(map_pid_to_cmdline));
+-	memset(&map_cmdline_to_pid, -1, sizeof(map_cmdline_to_pid));
++	memset(&map_pid_to_cmdline, NO_CMDLINE_MAP, sizeof(map_pid_to_cmdline));
++	memset(&map_cmdline_to_pid, NO_CMDLINE_MAP, sizeof(map_cmdline_to_pid));
+ 	cmdline_idx = 0;
+ }
+ 
+ static int trace_stop_count;
+-static DEFINE_SPINLOCK(tracing_start_lock);
++static DEFINE_RAW_SPINLOCK(tracing_start_lock);
+ 
+ /**
+  * ftrace_off_permanent - disable all ftrace code permanently
+@@ -738,13 +686,12 @@ void tracing_start(void)
+ 		return;
+ 
+ 	spin_lock_irqsave(&tracing_start_lock, flags);
+-	if (--trace_stop_count)
+-		goto out;
+-
+-	if (trace_stop_count < 0) {
+-		/* Someone screwed up their debugging */
+-		WARN_ON_ONCE(1);
+-		trace_stop_count = 0;
++	if (--trace_stop_count) {
++		if (trace_stop_count < 0) {
++			/* Someone screwed up their debugging */
++			WARN_ON_ONCE(1);
++			trace_stop_count = 0;
++		}
+ 		goto out;
+ 	}
+ 
+@@ -794,8 +741,7 @@ void trace_stop_cmdline_recording(void);
+ 
+ static void trace_save_cmdline(struct task_struct *tsk)
+ {
+-	unsigned map;
+-	unsigned idx;
++	unsigned pid, idx;
+ 
+ 	if (!tsk->pid || unlikely(tsk->pid > PID_MAX_DEFAULT))
+ 		return;
+@@ -806,17 +752,24 @@ static void trace_save_cmdline(struct ta
+ 	 * nor do we want to disable interrupts,
+ 	 * so if we miss here, then better luck next time.
+ 	 */
+-	if (!spin_trylock(&trace_cmdline_lock))
++	if (!__raw_spin_trylock(&trace_cmdline_lock))
+ 		return;
+ 
+ 	idx = map_pid_to_cmdline[tsk->pid];
+-	if (idx >= SAVED_CMDLINES) {
++	if (idx == NO_CMDLINE_MAP) {
+ 		idx = (cmdline_idx + 1) % SAVED_CMDLINES;
+ 
+-		map = map_cmdline_to_pid[idx];
+-		if (map <= PID_MAX_DEFAULT)
+-			map_pid_to_cmdline[map] = (unsigned)-1;
++		/*
++		 * Check whether the cmdline buffer at idx has a pid
++		 * mapped. We are going to overwrite that entry so we
++		 * need to clear the map_pid_to_cmdline. Otherwise we
++		 * would read the new comm for the old pid.
++		 */
++		pid = map_cmdline_to_pid[idx];
++		if (pid != NO_CMDLINE_MAP)
++			map_pid_to_cmdline[pid] = NO_CMDLINE_MAP;
+ 
++		map_cmdline_to_pid[idx] = tsk->pid;
+ 		map_pid_to_cmdline[tsk->pid] = idx;
+ 
+ 		cmdline_idx = idx;
+@@ -824,33 +777,37 @@ static void trace_save_cmdline(struct ta
+ 
+ 	memcpy(&saved_cmdlines[idx], tsk->comm, TASK_COMM_LEN);
+ 
+-	spin_unlock(&trace_cmdline_lock);
++	__raw_spin_unlock(&trace_cmdline_lock);
+ }
+ 
+-char *trace_find_cmdline(int pid)
++void trace_find_cmdline(int pid, char comm[])
+ {
+-	char *cmdline = "<...>";
+ 	unsigned map;
+ 
+-	if (!pid)
+-		return "<idle>";
++	if (!pid) {
++		strcpy(comm, "<idle>");
++		return;
++	}
+ 
+-	if (pid > PID_MAX_DEFAULT)
+-		goto out;
++	if (pid > PID_MAX_DEFAULT) {
++		strcpy(comm, "<...>");
++		return;
++	}
+ 
++	__raw_spin_lock(&trace_cmdline_lock);
+ 	map = map_pid_to_cmdline[pid];
+-	if (map >= SAVED_CMDLINES)
+-		goto out;
+-
+-	cmdline = saved_cmdlines[map];
++	if (map != NO_CMDLINE_MAP)
++		strcpy(comm, saved_cmdlines[map]);
++	else
++		strcpy(comm, "<...>");
+ 
+- out:
+-	return cmdline;
++	__raw_spin_unlock(&trace_cmdline_lock);
+ }
+ 
+ void tracing_record_cmdline(struct task_struct *tsk)
+ {
+-	if (atomic_read(&trace_record_cmdline_disabled))
++	if (atomic_read(&trace_record_cmdline_disabled) || !tracer_enabled ||
++	    !tracing_is_on())
+ 		return;
+ 
+ 	trace_save_cmdline(tsk);
+@@ -864,7 +821,7 @@ tracing_generic_entry_update(struct trac
+ 
+ 	entry->preempt_count		= pc & 0xff;
+ 	entry->pid			= (tsk) ? tsk->pid : 0;
+-	entry->tgid               	= (tsk) ? tsk->tgid : 0;
++	entry->tgid			= (tsk) ? tsk->tgid : 0;
+ 	entry->flags =
+ #ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
+ 		(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
+@@ -876,78 +833,132 @@ tracing_generic_entry_update(struct trac
+ 		(need_resched() ? TRACE_FLAG_NEED_RESCHED : 0);
+ }
+ 
++struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
++						    unsigned char type,
++						    unsigned long len,
++						    unsigned long flags, int pc)
++{
++	struct ring_buffer_event *event;
++
++	event = ring_buffer_lock_reserve(tr->buffer, len);
++	if (event != NULL) {
++		struct trace_entry *ent = ring_buffer_event_data(event);
++
++		tracing_generic_entry_update(ent, flags, pc);
++		ent->type = type;
++	}
++
++	return event;
++}
++static void ftrace_trace_stack(struct trace_array *tr,
++			       unsigned long flags, int skip, int pc);
++static void ftrace_trace_userstack(struct trace_array *tr,
++				   unsigned long flags, int pc);
++
++static inline void __trace_buffer_unlock_commit(struct trace_array *tr,
++					struct ring_buffer_event *event,
++					unsigned long flags, int pc,
++					int wake)
++{
++	ring_buffer_unlock_commit(tr->buffer, event);
++
++	ftrace_trace_stack(tr, flags, 6, pc);
++	ftrace_trace_userstack(tr, flags, pc);
++
++	if (wake)
++		trace_wake_up();
++}
++
++void trace_buffer_unlock_commit(struct trace_array *tr,
++					struct ring_buffer_event *event,
++					unsigned long flags, int pc)
++{
++	__trace_buffer_unlock_commit(tr, event, flags, pc, 1);
++}
++
++struct ring_buffer_event *
++trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
++				  unsigned long flags, int pc)
++{
++	return trace_buffer_lock_reserve(&global_trace,
++					 type, len, flags, pc);
++}
++
++void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
++					unsigned long flags, int pc)
++{
++	return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 1);
++}
++
++void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
++					unsigned long flags, int pc)
++{
++	return __trace_buffer_unlock_commit(&global_trace, event, flags, pc, 0);
++}
++
+ void
+-trace_function(struct trace_array *tr, struct trace_array_cpu *data,
++trace_function(struct trace_array *tr,
+ 	       unsigned long ip, unsigned long parent_ip, unsigned long flags,
+ 	       int pc)
+ {
+ 	struct ring_buffer_event *event;
+ 	struct ftrace_entry *entry;
+-	unsigned long irq_flags;
+ 
+ 	/* If we are reading the ring buffer, don't trace */
+ 	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ 		return;
+ 
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					 &irq_flags);
++	event = trace_buffer_lock_reserve(tr, TRACE_FN, sizeof(*entry),
++					  flags, pc);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, flags, pc);
+-	entry->ent.type			= TRACE_FN;
+ 	entry->ip			= ip;
+ 	entry->parent_ip		= parent_ip;
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
++	ring_buffer_unlock_commit(tr->buffer, event);
+ }
+ 
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-static void __trace_graph_entry(struct trace_array *tr,
+-				struct trace_array_cpu *data,
++static int __trace_graph_entry(struct trace_array *tr,
+ 				struct ftrace_graph_ent *trace,
+ 				unsigned long flags,
+ 				int pc)
+ {
+ 	struct ring_buffer_event *event;
+ 	struct ftrace_graph_ent_entry *entry;
+-	unsigned long irq_flags;
+ 
+ 	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+-		return;
++		return 0;
+ 
+-	event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+-					 &irq_flags);
++	event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_ENT,
++					  sizeof(*entry), flags, pc);
+ 	if (!event)
+-		return;
++		return 0;
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, flags, pc);
+-	entry->ent.type			= TRACE_GRAPH_ENT;
+ 	entry->graph_ent			= *trace;
+-	ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
++	ring_buffer_unlock_commit(global_trace.buffer, event);
++
++	return 1;
+ }
+ 
+ static void __trace_graph_return(struct trace_array *tr,
+-				struct trace_array_cpu *data,
+ 				struct ftrace_graph_ret *trace,
+ 				unsigned long flags,
+ 				int pc)
+ {
+ 	struct ring_buffer_event *event;
+ 	struct ftrace_graph_ret_entry *entry;
+-	unsigned long irq_flags;
+ 
+ 	if (unlikely(local_read(&__get_cpu_var(ftrace_cpu_disabled))))
+ 		return;
+ 
+-	event = ring_buffer_lock_reserve(global_trace.buffer, sizeof(*entry),
+-					 &irq_flags);
++	event = trace_buffer_lock_reserve(&global_trace, TRACE_GRAPH_RET,
++					  sizeof(*entry), flags, pc);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, flags, pc);
+-	entry->ent.type			= TRACE_GRAPH_RET;
+ 	entry->ret				= *trace;
+-	ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
++	ring_buffer_unlock_commit(global_trace.buffer, event);
+ }
+ #endif
+ 
+@@ -957,31 +968,23 @@ ftrace(struct trace_array *tr, struct tr
+        int pc)
+ {
+ 	if (likely(!atomic_read(&data->disabled)))
+-		trace_function(tr, data, ip, parent_ip, flags, pc);
++		trace_function(tr, ip, parent_ip, flags, pc);
+ }
+ 
+-static void ftrace_trace_stack(struct trace_array *tr,
+-			       struct trace_array_cpu *data,
+-			       unsigned long flags,
+-			       int skip, int pc)
++static void __ftrace_trace_stack(struct trace_array *tr,
++				 unsigned long flags,
++				 int skip, int pc)
+ {
+ #ifdef CONFIG_STACKTRACE
+ 	struct ring_buffer_event *event;
+ 	struct stack_entry *entry;
+ 	struct stack_trace trace;
+-	unsigned long irq_flags;
+ 
+-	if (!(trace_flags & TRACE_ITER_STACKTRACE))
+-		return;
+-
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					 &irq_flags);
++	event = trace_buffer_lock_reserve(tr, TRACE_STACK,
++					  sizeof(*entry), flags, pc);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, flags, pc);
+-	entry->ent.type		= TRACE_STACK;
+-
+ 	memset(&entry->caller, 0, sizeof(entry->caller));
+ 
+ 	trace.nr_entries	= 0;
+@@ -990,38 +993,43 @@ static void ftrace_trace_stack(struct tr
+ 	trace.entries		= entry->caller;
+ 
+ 	save_stack_trace(&trace);
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
++	ring_buffer_unlock_commit(tr->buffer, event);
+ #endif
+ }
+ 
++static void ftrace_trace_stack(struct trace_array *tr,
++			       unsigned long flags,
++			       int skip, int pc)
++{
++	if (!(trace_flags & TRACE_ITER_STACKTRACE))
++		return;
++
++	__ftrace_trace_stack(tr, flags, skip, pc);
++}
++
+ void __trace_stack(struct trace_array *tr,
+-		   struct trace_array_cpu *data,
+ 		   unsigned long flags,
+-		   int skip)
++		   int skip, int pc)
+ {
+-	ftrace_trace_stack(tr, data, flags, skip, preempt_count());
++	__ftrace_trace_stack(tr, flags, skip, pc);
+ }
+ 
+ static void ftrace_trace_userstack(struct trace_array *tr,
+-		   struct trace_array_cpu *data,
+-		   unsigned long flags, int pc)
++				   unsigned long flags, int pc)
+ {
+ #ifdef CONFIG_STACKTRACE
+ 	struct ring_buffer_event *event;
+ 	struct userstack_entry *entry;
+ 	struct stack_trace trace;
+-	unsigned long irq_flags;
+ 
+ 	if (!(trace_flags & TRACE_ITER_USERSTACKTRACE))
+ 		return;
+ 
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					 &irq_flags);
++	event = trace_buffer_lock_reserve(tr, TRACE_USER_STACK,
++					  sizeof(*entry), flags, pc);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, flags, pc);
+-	entry->ent.type		= TRACE_USER_STACK;
+ 
+ 	memset(&entry->caller, 0, sizeof(entry->caller));
+ 
+@@ -1031,70 +1039,58 @@ static void ftrace_trace_userstack(struc
+ 	trace.entries		= entry->caller;
+ 
+ 	save_stack_trace_user(&trace);
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
++	ring_buffer_unlock_commit(tr->buffer, event);
+ #endif
+ }
+ 
+-void __trace_userstack(struct trace_array *tr,
+-		   struct trace_array_cpu *data,
+-		   unsigned long flags)
++#ifdef UNUSED
++static void __trace_userstack(struct trace_array *tr, unsigned long flags)
+ {
+-	ftrace_trace_userstack(tr, data, flags, preempt_count());
++	ftrace_trace_userstack(tr, flags, preempt_count());
+ }
++#endif /* UNUSED */
+ 
+ static void
+-ftrace_trace_special(void *__tr, void *__data,
++ftrace_trace_special(void *__tr,
+ 		     unsigned long arg1, unsigned long arg2, unsigned long arg3,
+ 		     int pc)
+ {
+ 	struct ring_buffer_event *event;
+-	struct trace_array_cpu *data = __data;
+ 	struct trace_array *tr = __tr;
+ 	struct special_entry *entry;
+-	unsigned long irq_flags;
+ 
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					 &irq_flags);
++	event = trace_buffer_lock_reserve(tr, TRACE_SPECIAL,
++					  sizeof(*entry), 0, pc);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, 0, pc);
+-	entry->ent.type			= TRACE_SPECIAL;
+ 	entry->arg1			= arg1;
+ 	entry->arg2			= arg2;
+ 	entry->arg3			= arg3;
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+-	ftrace_trace_stack(tr, data, irq_flags, 4, pc);
+-	ftrace_trace_userstack(tr, data, irq_flags, pc);
+-
+-	trace_wake_up();
++	trace_buffer_unlock_commit(tr, event, 0, pc);
+ }
+ 
+ void
+ __trace_special(void *__tr, void *__data,
+ 		unsigned long arg1, unsigned long arg2, unsigned long arg3)
+ {
+-	ftrace_trace_special(__tr, __data, arg1, arg2, arg3, preempt_count());
++	ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
+ }
+ 
+ void
+ tracing_sched_switch_trace(struct trace_array *tr,
+-			   struct trace_array_cpu *data,
+ 			   struct task_struct *prev,
+ 			   struct task_struct *next,
+ 			   unsigned long flags, int pc)
+ {
+ 	struct ring_buffer_event *event;
+ 	struct ctx_switch_entry *entry;
+-	unsigned long irq_flags;
+ 
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					   &irq_flags);
++	event = trace_buffer_lock_reserve(tr, TRACE_CTX,
++					  sizeof(*entry), flags, pc);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, flags, pc);
+-	entry->ent.type			= TRACE_CTX;
+ 	entry->prev_pid			= prev->pid;
+ 	entry->prev_prio		= prev->prio;
+ 	entry->prev_state		= prev->state;
+@@ -1102,29 +1098,23 @@ tracing_sched_switch_trace(struct trace_
+ 	entry->next_prio		= next->prio;
+ 	entry->next_state		= next->state;
+ 	entry->next_cpu	= task_cpu(next);
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+-	ftrace_trace_stack(tr, data, flags, 5, pc);
+-	ftrace_trace_userstack(tr, data, flags, pc);
++	trace_buffer_unlock_commit(tr, event, flags, pc);
+ }
+ 
+ void
+ tracing_sched_wakeup_trace(struct trace_array *tr,
+-			   struct trace_array_cpu *data,
+ 			   struct task_struct *wakee,
+ 			   struct task_struct *curr,
+ 			   unsigned long flags, int pc)
+ {
+ 	struct ring_buffer_event *event;
+ 	struct ctx_switch_entry *entry;
+-	unsigned long irq_flags;
+ 
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					   &irq_flags);
++	event = trace_buffer_lock_reserve(tr, TRACE_WAKE,
++					  sizeof(*entry), flags, pc);
+ 	if (!event)
+ 		return;
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, flags, pc);
+-	entry->ent.type			= TRACE_WAKE;
+ 	entry->prev_pid			= curr->pid;
+ 	entry->prev_prio		= curr->prio;
+ 	entry->prev_state		= curr->state;
+@@ -1132,11 +1122,10 @@ tracing_sched_wakeup_trace(struct trace_
+ 	entry->next_prio		= wakee->prio;
+ 	entry->next_state		= wakee->state;
+ 	entry->next_cpu			= task_cpu(wakee);
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+-	ftrace_trace_stack(tr, data, flags, 6, pc);
+-	ftrace_trace_userstack(tr, data, flags, pc);
+ 
+-	trace_wake_up();
++	ring_buffer_unlock_commit(tr->buffer, event);
++	ftrace_trace_stack(tr, flags, 6, pc);
++	ftrace_trace_userstack(tr, flags, pc);
+ }
+ 
+ void
+@@ -1157,66 +1146,7 @@ ftrace_special(unsigned long arg1, unsig
+ 	data = tr->data[cpu];
+ 
+ 	if (likely(atomic_inc_return(&data->disabled) == 1))
+-		ftrace_trace_special(tr, data, arg1, arg2, arg3, pc);
+-
+-	atomic_dec(&data->disabled);
+-	local_irq_restore(flags);
+-}
+-
+-#ifdef CONFIG_FUNCTION_TRACER
+-static void
+-function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
+-{
+-	struct trace_array *tr = &global_trace;
+-	struct trace_array_cpu *data;
+-	unsigned long flags;
+-	long disabled;
+-	int cpu, resched;
+-	int pc;
+-
+-	if (unlikely(!ftrace_function_enabled))
+-		return;
+-
+-	pc = preempt_count();
+-	resched = ftrace_preempt_disable();
+-	local_save_flags(flags);
+-	cpu = raw_smp_processor_id();
+-	data = tr->data[cpu];
+-	disabled = atomic_inc_return(&data->disabled);
+-
+-	if (likely(disabled == 1))
+-		trace_function(tr, data, ip, parent_ip, flags, pc);
+-
+-	atomic_dec(&data->disabled);
+-	ftrace_preempt_enable(resched);
+-}
+-
+-static void
+-function_trace_call(unsigned long ip, unsigned long parent_ip)
+-{
+-	struct trace_array *tr = &global_trace;
+-	struct trace_array_cpu *data;
+-	unsigned long flags;
+-	long disabled;
+-	int cpu;
+-	int pc;
+-
+-	if (unlikely(!ftrace_function_enabled))
+-		return;
+-
+-	/*
+-	 * Need to use raw, since this must be called before the
+-	 * recursive protection is performed.
+-	 */
+-	local_irq_save(flags);
+-	cpu = raw_smp_processor_id();
+-	data = tr->data[cpu];
+-	disabled = atomic_inc_return(&data->disabled);
+-
+-	if (likely(disabled == 1)) {
+-		pc = preempt_count();
+-		trace_function(tr, data, ip, parent_ip, flags, pc);
+-	}
++		ftrace_trace_special(tr, arg1, arg2, arg3, pc);
+ 
+ 	atomic_dec(&data->disabled);
+ 	local_irq_restore(flags);
+@@ -1229,6 +1159,7 @@ int trace_graph_entry(struct ftrace_grap
+ 	struct trace_array_cpu *data;
+ 	unsigned long flags;
+ 	long disabled;
++	int ret;
+ 	int cpu;
+ 	int pc;
+ 
+@@ -1244,15 +1175,18 @@ int trace_graph_entry(struct ftrace_grap
+ 	disabled = atomic_inc_return(&data->disabled);
+ 	if (likely(disabled == 1)) {
+ 		pc = preempt_count();
+-		__trace_graph_entry(tr, data, trace, flags, pc);
++		ret = __trace_graph_entry(tr, trace, flags, pc);
++	} else {
++		ret = 0;
+ 	}
+ 	/* Only do the atomic if it is not already set */
+ 	if (!test_tsk_trace_graph(current))
+ 		set_tsk_trace_graph(current);
++
+ 	atomic_dec(&data->disabled);
+ 	local_irq_restore(flags);
+ 
+-	return 1;
++	return ret;
+ }
+ 
+ void trace_graph_return(struct ftrace_graph_ret *trace)
+@@ -1270,7 +1204,7 @@ void trace_graph_return(struct ftrace_gr
+ 	disabled = atomic_inc_return(&data->disabled);
+ 	if (likely(disabled == 1)) {
+ 		pc = preempt_count();
+-		__trace_graph_return(tr, data, trace, flags, pc);
++		__trace_graph_return(tr, trace, flags, pc);
+ 	}
+ 	if (!trace->depth)
+ 		clear_tsk_trace_graph(current);
+@@ -1279,30 +1213,121 @@ void trace_graph_return(struct ftrace_gr
+ }
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+ 
+-static struct ftrace_ops trace_ops __read_mostly =
+-{
+-	.func = function_trace_call,
+-};
+ 
+-void tracing_start_function_trace(void)
++/**
++ * trace_vbprintk - write binary msg to tracing buffer
++ *
++ */
++int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
+ {
+-	ftrace_function_enabled = 0;
++	static __raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
++	static u32 trace_buf[TRACE_BUF_SIZE];
+ 
+-	if (trace_flags & TRACE_ITER_PREEMPTONLY)
+-		trace_ops.func = function_trace_call_preempt_only;
+-	else
+-		trace_ops.func = function_trace_call;
++	struct ring_buffer_event *event;
++	struct trace_array *tr = &global_trace;
++	struct trace_array_cpu *data;
++	struct bprint_entry *entry;
++	unsigned long flags;
++	int resched;
++	int cpu, len = 0, size, pc;
++
++	if (unlikely(tracing_selftest_running || tracing_disabled))
++		return 0;
++
++	/* Don't pollute graph traces with trace_vprintk internals */
++	pause_graph_tracing();
++
++	pc = preempt_count();
++	resched = ftrace_preempt_disable();
++	cpu = raw_smp_processor_id();
++	data = tr->data[cpu];
++
++	if (unlikely(atomic_read(&data->disabled)))
++		goto out;
++
++	/* Lockdep uses trace_printk for lock tracing */
++	local_irq_save(flags);
++	__raw_spin_lock(&trace_buf_lock);
++	len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
++
++	if (len > TRACE_BUF_SIZE || len < 0)
++		goto out_unlock;
++
++	size = sizeof(*entry) + sizeof(u32) * len;
++	event = trace_buffer_lock_reserve(tr, TRACE_BPRINT, size, flags, pc);
++	if (!event)
++		goto out_unlock;
++	entry = ring_buffer_event_data(event);
++	entry->ip			= ip;
++	entry->fmt			= fmt;
++
++	memcpy(entry->buf, trace_buf, sizeof(u32) * len);
++	ring_buffer_unlock_commit(tr->buffer, event);
++
++out_unlock:
++	__raw_spin_unlock(&trace_buf_lock);
++	local_irq_restore(flags);
++
++out:
++	ftrace_preempt_enable(resched);
++	unpause_graph_tracing();
+ 
+-	register_ftrace_function(&trace_ops);
+-	ftrace_function_enabled = 1;
++	return len;
+ }
++EXPORT_SYMBOL_GPL(trace_vbprintk);
+ 
+-void tracing_stop_function_trace(void)
++int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
+ {
+-	ftrace_function_enabled = 0;
+-	unregister_ftrace_function(&trace_ops);
++	static __raw_spinlock_t trace_buf_lock = __RAW_SPIN_LOCK_UNLOCKED;
++	static char trace_buf[TRACE_BUF_SIZE];
++
++	struct ring_buffer_event *event;
++	struct trace_array *tr = &global_trace;
++	struct trace_array_cpu *data;
++	int cpu, len = 0, size, pc;
++	struct print_entry *entry;
++	unsigned long irq_flags;
++
++	if (tracing_disabled || tracing_selftest_running)
++		return 0;
++
++	pc = preempt_count();
++	preempt_disable_notrace();
++	cpu = raw_smp_processor_id();
++	data = tr->data[cpu];
++
++	if (unlikely(atomic_read(&data->disabled)))
++		goto out;
++
++	pause_graph_tracing();
++	raw_local_irq_save(irq_flags);
++	__raw_spin_lock(&trace_buf_lock);
++	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
++
++	len = min(len, TRACE_BUF_SIZE-1);
++	trace_buf[len] = 0;
++
++	size = sizeof(*entry) + len + 1;
++	event = trace_buffer_lock_reserve(tr, TRACE_PRINT, size, irq_flags, pc);
++	if (!event)
++		goto out_unlock;
++	entry = ring_buffer_event_data(event);
++	entry->ip			= ip;
++
++	memcpy(&entry->buf, trace_buf, len);
++	entry->buf[len] = 0;
++	ring_buffer_unlock_commit(tr->buffer, event);
++
++ out_unlock:
++	__raw_spin_unlock(&trace_buf_lock);
++	raw_local_irq_restore(irq_flags);
++	unpause_graph_tracing();
++ out:
++	preempt_enable_notrace();
++
++	return len;
+ }
+-#endif
++EXPORT_SYMBOL_GPL(trace_vprintk);
+ 
+ enum trace_file_type {
+ 	TRACE_FILE_LAT_FMT	= 1,
+@@ -1345,10 +1370,25 @@ __find_next_entry(struct trace_iterator 
+ {
+ 	struct ring_buffer *buffer = iter->tr->buffer;
+ 	struct trace_entry *ent, *next = NULL;
++	int cpu_file = iter->cpu_file;
+ 	u64 next_ts = 0, ts;
+ 	int next_cpu = -1;
+ 	int cpu;
+ 
++	/*
++	 * If we are in a per_cpu trace file, don't bother by iterating over
++	 * all cpu and peek directly.
++	 */
++	if (cpu_file > TRACE_PIPE_ALL_CPU) {
++		if (ring_buffer_empty_cpu(buffer, cpu_file))
++			return NULL;
++		ent = peek_next_entry(iter, cpu_file, ent_ts);
++		if (ent_cpu)
++			*ent_cpu = cpu_file;
++
++		return ent;
++	}
++
+ 	for_each_tracing_cpu(cpu) {
+ 
+ 		if (ring_buffer_empty_cpu(buffer, cpu))
+@@ -1376,8 +1416,8 @@ __find_next_entry(struct trace_iterator 
+ }
+ 
+ /* Find the next real entry, without updating the iterator itself */
+-static struct trace_entry *
+-find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
++struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
++					  int *ent_cpu, u64 *ent_ts)
+ {
+ 	return __find_next_entry(iter, ent_cpu, ent_ts);
+ }
+@@ -1426,19 +1466,32 @@ static void *s_next(struct seq_file *m, 
+ 	return ent;
+ }
+ 
++/*
++ * No necessary locking here. The worst thing which can
++ * happen is loosing events consumed at the same time
++ * by a trace_pipe reader.
++ * Other than that, we don't risk to crash the ring buffer
++ * because it serializes the readers.
++ *
++ * The current tracer is copied to avoid a global locking
++ * all around.
++ */
+ static void *s_start(struct seq_file *m, loff_t *pos)
+ {
+ 	struct trace_iterator *iter = m->private;
++	static struct tracer *old_tracer;
++	int cpu_file = iter->cpu_file;
+ 	void *p = NULL;
+ 	loff_t l = 0;
+ 	int cpu;
+ 
++	/* copy the tracer to avoid using a global lock all around */
+ 	mutex_lock(&trace_types_lock);
+-
+-	if (!current_trace || current_trace != iter->trace) {
+-		mutex_unlock(&trace_types_lock);
+-		return NULL;
++	if (unlikely(old_tracer != current_trace && current_trace)) {
++		old_tracer = current_trace;
++		*iter->trace = *current_trace;
+ 	}
++	mutex_unlock(&trace_types_lock);
+ 
+ 	atomic_inc(&trace_record_cmdline_disabled);
+ 
+@@ -1449,9 +1502,12 @@ static void *s_start(struct seq_file *m,
+ 
+ 		ftrace_disable_cpu();
+ 
+-		for_each_tracing_cpu(cpu) {
+-			ring_buffer_iter_reset(iter->buffer_iter[cpu]);
+-		}
++		if (cpu_file == TRACE_PIPE_ALL_CPU) {
++			for_each_tracing_cpu(cpu)
++				ring_buffer_iter_reset(iter->buffer_iter[cpu]);
++		} else
++			ring_buffer_iter_reset(iter->buffer_iter[cpu_file]);
++
+ 
+ 		ftrace_enable_cpu();
+ 
+@@ -1469,155 +1525,6 @@ static void *s_start(struct seq_file *m,
+ static void s_stop(struct seq_file *m, void *p)
+ {
+ 	atomic_dec(&trace_record_cmdline_disabled);
+-	mutex_unlock(&trace_types_lock);
+-}
+-
+-#ifdef CONFIG_KRETPROBES
+-static inline const char *kretprobed(const char *name)
+-{
+-	static const char tramp_name[] = "kretprobe_trampoline";
+-	int size = sizeof(tramp_name);
+-
+-	if (strncmp(tramp_name, name, size) == 0)
+-		return "[unknown/kretprobe'd]";
+-	return name;
+-}
+-#else
+-static inline const char *kretprobed(const char *name)
+-{
+-	return name;
+-}
+-#endif /* CONFIG_KRETPROBES */
+-
+-static int
+-seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
+-{
+-#ifdef CONFIG_KALLSYMS
+-	char str[KSYM_SYMBOL_LEN];
+-	const char *name;
+-
+-	kallsyms_lookup(address, NULL, NULL, NULL, str);
+-
+-	name = kretprobed(str);
+-
+-	return trace_seq_printf(s, fmt, name);
+-#endif
+-	return 1;
+-}
+-
+-static int
+-seq_print_sym_offset(struct trace_seq *s, const char *fmt,
+-		     unsigned long address)
+-{
+-#ifdef CONFIG_KALLSYMS
+-	char str[KSYM_SYMBOL_LEN];
+-	const char *name;
+-
+-	sprint_symbol(str, address);
+-	name = kretprobed(str);
+-
+-	return trace_seq_printf(s, fmt, name);
+-#endif
+-	return 1;
+-}
+-
+-#ifndef CONFIG_64BIT
+-# define IP_FMT "%08lx"
+-#else
+-# define IP_FMT "%016lx"
+-#endif
+-
+-int
+-seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
+-{
+-	int ret;
+-
+-	if (!ip)
+-		return trace_seq_printf(s, "0");
+-
+-	if (sym_flags & TRACE_ITER_SYM_OFFSET)
+-		ret = seq_print_sym_offset(s, "%s", ip);
+-	else
+-		ret = seq_print_sym_short(s, "%s", ip);
+-
+-	if (!ret)
+-		return 0;
+-
+-	if (sym_flags & TRACE_ITER_SYM_ADDR)
+-		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+-	return ret;
+-}
+-
+-static inline int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
+-				    unsigned long ip, unsigned long sym_flags)
+-{
+-	struct file *file = NULL;
+-	unsigned long vmstart = 0;
+-	int ret = 1;
+-
+-	if (mm) {
+-		const struct vm_area_struct *vma;
+-
+-		down_read(&mm->mmap_sem);
+-		vma = find_vma(mm, ip);
+-		if (vma) {
+-			file = vma->vm_file;
+-			vmstart = vma->vm_start;
+-		}
+-		if (file) {
+-			ret = trace_seq_path(s, &file->f_path);
+-			if (ret)
+-				ret = trace_seq_printf(s, "[+0x%lx]", ip - vmstart);
+-		}
+-		up_read(&mm->mmap_sem);
+-	}
+-	if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
+-		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
+-	return ret;
+-}
+-
+-static int
+-seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
+-		      unsigned long sym_flags)
+-{
+-	struct mm_struct *mm = NULL;
+-	int ret = 1;
+-	unsigned int i;
+-
+-	if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
+-		struct task_struct *task;
+-		/*
+-		 * we do the lookup on the thread group leader,
+-		 * since individual threads might have already quit!
+-		 */
+-		rcu_read_lock();
+-		task = find_task_by_vpid(entry->ent.tgid);
+-		if (task)
+-			mm = get_task_mm(task);
+-		rcu_read_unlock();
+-	}
+-
+-	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+-		unsigned long ip = entry->caller[i];
+-
+-		if (ip == ULONG_MAX || !ret)
+-			break;
+-		if (i && ret)
+-			ret = trace_seq_puts(s, " <- ");
+-		if (!ip) {
+-			if (ret)
+-				ret = trace_seq_puts(s, "??");
+-			continue;
+-		}
+-		if (!ret)
+-			break;
+-		if (ret)
+-			ret = seq_print_user_ip(s, mm, ip, sym_flags);
+-	}
+-
+-	if (mm)
+-		mmput(mm);
+-	return ret;
+ }
+ 
+ static void print_lat_help_header(struct seq_file *m)
+@@ -1658,11 +1565,11 @@ print_trace_header(struct seq_file *m, s
+ 	total = entries +
+ 		ring_buffer_overruns(iter->tr->buffer);
+ 
+-	seq_printf(m, "%s latency trace v1.1.5 on %s\n",
++	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
+ 		   name, UTS_RELEASE);
+-	seq_puts(m, "-----------------------------------"
++	seq_puts(m, "# -----------------------------------"
+ 		 "---------------------------------\n");
+-	seq_printf(m, " latency: %lu us, #%lu/%lu, CPU#%d |"
++	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
+ 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
+ 		   nsecs_to_usecs(data->saved_latency),
+ 		   entries,
+@@ -1684,121 +1591,24 @@ print_trace_header(struct seq_file *m, s
+ #else
+ 	seq_puts(m, ")\n");
+ #endif
+-	seq_puts(m, "    -----------------\n");
+-	seq_printf(m, "    | task: %.16s-%d "
++	seq_puts(m, "#    -----------------\n");
++	seq_printf(m, "#    | task: %.16s-%d "
+ 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
+ 		   data->comm, data->pid, data->uid, data->nice,
+ 		   data->policy, data->rt_priority);
+-	seq_puts(m, "    -----------------\n");
++	seq_puts(m, "#    -----------------\n");
+ 
+ 	if (data->critical_start) {
+-		seq_puts(m, " => started at: ");
++		seq_puts(m, "#  => started at: ");
+ 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
+ 		trace_print_seq(m, &iter->seq);
+-		seq_puts(m, "\n => ended at:   ");
++		seq_puts(m, "\n#  => ended at:   ");
+ 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
+ 		trace_print_seq(m, &iter->seq);
+-		seq_puts(m, "\n");
+-	}
+-
+-	seq_puts(m, "\n");
+-}
+-
+-static void
+-lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
+-{
+-	int hardirq, softirq;
+-	char *comm;
+-
+-	comm = trace_find_cmdline(entry->pid);
+-
+-	trace_seq_printf(s, "%8.8s-%-5d ", comm, entry->pid);
+-	trace_seq_printf(s, "%3d", cpu);
+-	trace_seq_printf(s, "%c%c",
+-			(entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+-			 (entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ? 'X' : '.',
+-			((entry->flags & TRACE_FLAG_NEED_RESCHED) ? 'N' : '.'));
+-
+-	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
+-	softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
+-	if (hardirq && softirq) {
+-		trace_seq_putc(s, 'H');
+-	} else {
+-		if (hardirq) {
+-			trace_seq_putc(s, 'h');
+-		} else {
+-			if (softirq)
+-				trace_seq_putc(s, 's');
+-			else
+-				trace_seq_putc(s, '.');
+-		}
+-	}
+-
+-	if (entry->preempt_count)
+-		trace_seq_printf(s, "%x", entry->preempt_count);
+-	else
+-		trace_seq_puts(s, ".");
+-}
+-
+-unsigned long preempt_mark_thresh = 100;
+-
+-static void
+-lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
+-		    unsigned long rel_usecs)
+-{
+-	trace_seq_printf(s, " %4lldus", abs_usecs);
+-	if (rel_usecs > preempt_mark_thresh)
+-		trace_seq_puts(s, "!: ");
+-	else if (rel_usecs > 1)
+-		trace_seq_puts(s, "+: ");
+-	else
+-		trace_seq_puts(s, " : ");
+-}
+-
+-static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
+-
+-static int task_state_char(unsigned long state)
+-{
+-	int bit = state ? __ffs(state) + 1 : 0;
+-
+-	return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
+-}
+-
+-/*
+- * The message is supposed to contain an ending newline.
+- * If the printing stops prematurely, try to add a newline of our own.
+- */
+-void trace_seq_print_cont(struct trace_seq *s, struct trace_iterator *iter)
+-{
+-	struct trace_entry *ent;
+-	struct trace_field_cont *cont;
+-	bool ok = true;
+-
+-	ent = peek_next_entry(iter, iter->cpu, NULL);
+-	if (!ent || ent->type != TRACE_CONT) {
+-		trace_seq_putc(s, '\n');
+-		return;
++		seq_puts(m, "#\n");
+ 	}
+ 
+-	do {
+-		cont = (struct trace_field_cont *)ent;
+-		if (ok)
+-			ok = (trace_seq_printf(s, "%s", cont->buf) > 0);
+-
+-		ftrace_disable_cpu();
+-
+-		if (iter->buffer_iter[iter->cpu])
+-			ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+-		else
+-			ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+-
+-		ftrace_enable_cpu();
+-
+-		ent = peek_next_entry(iter, iter->cpu, NULL);
+-	} while (ent && ent->type == TRACE_CONT);
+-
+-	if (!ok)
+-		trace_seq_putc(s, '\n');
++	seq_puts(m, "#\n");
+ }
+ 
+ static void test_cpu_buff_start(struct trace_iterator *iter)
+@@ -1818,533 +1628,128 @@ static void test_cpu_buff_start(struct t
+ 	trace_seq_printf(s, "##### CPU %u buffer started ####\n", iter->cpu);
+ }
+ 
+-static enum print_line_t
+-print_lat_fmt(struct trace_iterator *iter, unsigned int trace_idx, int cpu)
++static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
+ {
+ 	struct trace_seq *s = &iter->seq;
+ 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
+-	struct trace_entry *next_entry;
+-	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
+-	struct trace_entry *entry = iter->ent;
+-	unsigned long abs_usecs;
+-	unsigned long rel_usecs;
+-	u64 next_ts;
+-	char *comm;
+-	int S, T;
+-	int i;
++	struct trace_entry *entry;
++	struct trace_event *event;
+ 
+-	if (entry->type == TRACE_CONT)
+-		return TRACE_TYPE_HANDLED;
++	entry = iter->ent;
+ 
+ 	test_cpu_buff_start(iter);
+ 
+-	next_entry = find_next_entry(iter, NULL, &next_ts);
+-	if (!next_entry)
+-		next_ts = iter->ts;
+-	rel_usecs = ns2usecs(next_ts - iter->ts);
+-	abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
+-
+-	if (verbose) {
+-		comm = trace_find_cmdline(entry->pid);
+-		trace_seq_printf(s, "%16s %5d %3d %d %08x %08x [%08lx]"
+-				 " %ld.%03ldms (+%ld.%03ldms): ",
+-				 comm,
+-				 entry->pid, cpu, entry->flags,
+-				 entry->preempt_count, trace_idx,
+-				 ns2usecs(iter->ts),
+-				 abs_usecs/1000,
+-				 abs_usecs % 1000, rel_usecs/1000,
+-				 rel_usecs % 1000);
+-	} else {
+-		lat_print_generic(s, entry, cpu);
+-		lat_print_timestamp(s, abs_usecs, rel_usecs);
+-	}
+-	switch (entry->type) {
+-	case TRACE_FN: {
+-		struct ftrace_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		seq_print_ip_sym(s, field->ip, sym_flags);
+-		trace_seq_puts(s, " (");
+-		seq_print_ip_sym(s, field->parent_ip, sym_flags);
+-		trace_seq_puts(s, ")\n");
+-		break;
+-	}
+-	case TRACE_CTX:
+-	case TRACE_WAKE: {
+-		struct ctx_switch_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		T = task_state_char(field->next_state);
+-		S = task_state_char(field->prev_state);
+-		comm = trace_find_cmdline(field->next_pid);
+-		trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+-				 field->prev_pid,
+-				 field->prev_prio,
+-				 S, entry->type == TRACE_CTX ? "==>" : "  +",
+-				 field->next_cpu,
+-				 field->next_pid,
+-				 field->next_prio,
+-				 T, comm);
+-		break;
+-	}
+-	case TRACE_SPECIAL: {
+-		struct special_entry *field;
+-
+-		trace_assign_type(field, entry);
++	event = ftrace_find_event(entry->type);
+ 
+-		trace_seq_printf(s, "# %ld %ld %ld\n",
+-				 field->arg1,
+-				 field->arg2,
+-				 field->arg3);
+-		break;
++	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
++		if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
++			if (!trace_print_lat_context(iter))
++				goto partial;
++		} else {
++			if (!trace_print_context(iter))
++				goto partial;
++		}
+ 	}
+-	case TRACE_STACK: {
+-		struct stack_entry *field;
+ 
+-		trace_assign_type(field, entry);
++	if (event)
++		return event->trace(iter, sym_flags);
+ 
+-		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+-			if (i)
+-				trace_seq_puts(s, " <= ");
+-			seq_print_ip_sym(s, field->caller[i], sym_flags);
+-		}
+-		trace_seq_puts(s, "\n");
+-		break;
+-	}
+-	case TRACE_PRINT: {
+-		struct print_entry *field;
++	if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
++		goto partial;
+ 
+-		trace_assign_type(field, entry);
++	return TRACE_TYPE_HANDLED;
++partial:
++	return TRACE_TYPE_PARTIAL_LINE;
++}
+ 
+-		seq_print_ip_sym(s, field->ip, sym_flags);
+-		trace_seq_printf(s, ": %s", field->buf);
+-		if (entry->flags & TRACE_FLAG_CONT)
+-			trace_seq_print_cont(s, iter);
+-		break;
+-	}
+-	case TRACE_BRANCH: {
+-		struct trace_branch *field;
++static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
++{
++	struct trace_seq *s = &iter->seq;
++	struct trace_entry *entry;
++	struct trace_event *event;
+ 
+-		trace_assign_type(field, entry);
++	entry = iter->ent;
+ 
+-		trace_seq_printf(s, "[%s] %s:%s:%d\n",
+-				 field->correct ? "  ok  " : " MISS ",
+-				 field->func,
+-				 field->file,
+-				 field->line);
+-		break;
++	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
++		if (!trace_seq_printf(s, "%d %d %llu ",
++				      entry->pid, iter->cpu, iter->ts))
++			goto partial;
+ 	}
+-	case TRACE_USER_STACK: {
+-		struct userstack_entry *field;
+ 
+-		trace_assign_type(field, entry);
++	event = ftrace_find_event(entry->type);
++	if (event)
++		return event->raw(iter, 0);
++
++	if (!trace_seq_printf(s, "%d ?\n", entry->type))
++		goto partial;
+ 
+-		seq_print_userip_objs(field, s, sym_flags);
+-		trace_seq_putc(s, '\n');
+-		break;
+-	}
+-	default:
+-		trace_seq_printf(s, "Unknown type %d\n", entry->type);
+-	}
+ 	return TRACE_TYPE_HANDLED;
++partial:
++	return TRACE_TYPE_PARTIAL_LINE;
+ }
+ 
+-static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
++static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
+ {
+ 	struct trace_seq *s = &iter->seq;
+-	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
++	unsigned char newline = '\n';
+ 	struct trace_entry *entry;
+-	unsigned long usec_rem;
+-	unsigned long long t;
+-	unsigned long secs;
+-	char *comm;
+-	int ret;
+-	int S, T;
+-	int i;
++	struct trace_event *event;
+ 
+ 	entry = iter->ent;
+ 
+-	if (entry->type == TRACE_CONT)
+-		return TRACE_TYPE_HANDLED;
+-
+-	test_cpu_buff_start(iter);
++	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
++		SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
++		SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
++		SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
++	}
+ 
+-	comm = trace_find_cmdline(iter->ent->pid);
++	event = ftrace_find_event(entry->type);
++	if (event) {
++		enum print_line_t ret = event->hex(iter, 0);
++		if (ret != TRACE_TYPE_HANDLED)
++			return ret;
++	}
+ 
+-	t = ns2usecs(iter->ts);
+-	usec_rem = do_div(t, 1000000ULL);
+-	secs = (unsigned long)t;
++	SEQ_PUT_FIELD_RET(s, newline);
+ 
+-	ret = trace_seq_printf(s, "%16s-%-5d ", comm, entry->pid);
+-	if (!ret)
+-		return TRACE_TYPE_PARTIAL_LINE;
+-	ret = trace_seq_printf(s, "[%03d] ", iter->cpu);
+-	if (!ret)
+-		return TRACE_TYPE_PARTIAL_LINE;
+-	ret = trace_seq_printf(s, "%5lu.%06lu: ", secs, usec_rem);
+-	if (!ret)
+-		return TRACE_TYPE_PARTIAL_LINE;
++	return TRACE_TYPE_HANDLED;
++}
+ 
+-	switch (entry->type) {
+-	case TRACE_FN: {
+-		struct ftrace_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		ret = seq_print_ip_sym(s, field->ip, sym_flags);
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-		if ((sym_flags & TRACE_ITER_PRINT_PARENT) &&
+-						field->parent_ip) {
+-			ret = trace_seq_printf(s, " <-");
+-			if (!ret)
+-				return TRACE_TYPE_PARTIAL_LINE;
+-			ret = seq_print_ip_sym(s,
+-					       field->parent_ip,
+-					       sym_flags);
+-			if (!ret)
+-				return TRACE_TYPE_PARTIAL_LINE;
+-		}
+-		ret = trace_seq_printf(s, "\n");
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-		break;
+-	}
+-	case TRACE_CTX:
+-	case TRACE_WAKE: {
+-		struct ctx_switch_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		T = task_state_char(field->next_state);
+-		S = task_state_char(field->prev_state);
+-		ret = trace_seq_printf(s, " %5d:%3d:%c %s [%03d] %5d:%3d:%c\n",
+-				       field->prev_pid,
+-				       field->prev_prio,
+-				       S,
+-				       entry->type == TRACE_CTX ? "==>" : "  +",
+-				       field->next_cpu,
+-				       field->next_pid,
+-				       field->next_prio,
+-				       T);
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-		break;
+-	}
+-	case TRACE_SPECIAL: {
+-		struct special_entry *field;
++static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
++{
++	struct trace_seq *s = &iter->seq;
++	struct trace_entry *entry;
++	struct trace_event *event;
+ 
+-		trace_assign_type(field, entry);
++	entry = iter->ent;
+ 
+-		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
+-				 field->arg1,
+-				 field->arg2,
+-				 field->arg3);
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-		break;
++	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
++		SEQ_PUT_FIELD_RET(s, entry->pid);
++		SEQ_PUT_FIELD_RET(s, iter->cpu);
++		SEQ_PUT_FIELD_RET(s, iter->ts);
+ 	}
+-	case TRACE_STACK: {
+-		struct stack_entry *field;
+ 
+-		trace_assign_type(field, entry);
+-
+-		for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
+-			if (i) {
+-				ret = trace_seq_puts(s, " <= ");
+-				if (!ret)
+-					return TRACE_TYPE_PARTIAL_LINE;
+-			}
+-			ret = seq_print_ip_sym(s, field->caller[i],
+-					       sym_flags);
+-			if (!ret)
+-				return TRACE_TYPE_PARTIAL_LINE;
+-		}
+-		ret = trace_seq_puts(s, "\n");
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-		break;
+-	}
+-	case TRACE_PRINT: {
+-		struct print_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		seq_print_ip_sym(s, field->ip, sym_flags);
+-		trace_seq_printf(s, ": %s", field->buf);
+-		if (entry->flags & TRACE_FLAG_CONT)
+-			trace_seq_print_cont(s, iter);
+-		break;
+-	}
+-	case TRACE_GRAPH_RET: {
+-		return print_graph_function(iter);
+-	}
+-	case TRACE_GRAPH_ENT: {
+-		return print_graph_function(iter);
+-	}
+-	case TRACE_BRANCH: {
+-		struct trace_branch *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		trace_seq_printf(s, "[%s] %s:%s:%d\n",
+-				 field->correct ? "  ok  " : " MISS ",
+-				 field->func,
+-				 field->file,
+-				 field->line);
+-		break;
+-	}
+-	case TRACE_USER_STACK: {
+-		struct userstack_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		ret = seq_print_userip_objs(field, s, sym_flags);
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-		ret = trace_seq_putc(s, '\n');
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-		break;
+-	}
+-	}
+-	return TRACE_TYPE_HANDLED;
+-}
+-
+-static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
+-{
+-	struct trace_seq *s = &iter->seq;
+-	struct trace_entry *entry;
+-	int ret;
+-	int S, T;
+-
+-	entry = iter->ent;
+-
+-	if (entry->type == TRACE_CONT)
+-		return TRACE_TYPE_HANDLED;
+-
+-	ret = trace_seq_printf(s, "%d %d %llu ",
+-		entry->pid, iter->cpu, iter->ts);
+-	if (!ret)
+-		return TRACE_TYPE_PARTIAL_LINE;
+-
+-	switch (entry->type) {
+-	case TRACE_FN: {
+-		struct ftrace_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		ret = trace_seq_printf(s, "%x %x\n",
+-					field->ip,
+-					field->parent_ip);
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-		break;
+-	}
+-	case TRACE_CTX:
+-	case TRACE_WAKE: {
+-		struct ctx_switch_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		T = task_state_char(field->next_state);
+-		S = entry->type == TRACE_WAKE ? '+' :
+-			task_state_char(field->prev_state);
+-		ret = trace_seq_printf(s, "%d %d %c %d %d %d %c\n",
+-				       field->prev_pid,
+-				       field->prev_prio,
+-				       S,
+-				       field->next_cpu,
+-				       field->next_pid,
+-				       field->next_prio,
+-				       T);
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-		break;
+-	}
+-	case TRACE_SPECIAL:
+-	case TRACE_USER_STACK:
+-	case TRACE_STACK: {
+-		struct special_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		ret = trace_seq_printf(s, "# %ld %ld %ld\n",
+-				 field->arg1,
+-				 field->arg2,
+-				 field->arg3);
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-		break;
+-	}
+-	case TRACE_PRINT: {
+-		struct print_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		trace_seq_printf(s, "# %lx %s", field->ip, field->buf);
+-		if (entry->flags & TRACE_FLAG_CONT)
+-			trace_seq_print_cont(s, iter);
+-		break;
+-	}
+-	}
+-	return TRACE_TYPE_HANDLED;
+-}
+-
+-#define SEQ_PUT_FIELD_RET(s, x)				\
+-do {							\
+-	if (!trace_seq_putmem(s, &(x), sizeof(x)))	\
+-		return 0;				\
+-} while (0)
+-
+-#define SEQ_PUT_HEX_FIELD_RET(s, x)			\
+-do {							\
+-	BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);	\
+-	if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))	\
+-		return 0;				\
+-} while (0)
+-
+-static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
+-{
+-	struct trace_seq *s = &iter->seq;
+-	unsigned char newline = '\n';
+-	struct trace_entry *entry;
+-	int S, T;
+-
+-	entry = iter->ent;
+-
+-	if (entry->type == TRACE_CONT)
+-		return TRACE_TYPE_HANDLED;
+-
+-	SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
+-	SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
+-	SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
+-
+-	switch (entry->type) {
+-	case TRACE_FN: {
+-		struct ftrace_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		SEQ_PUT_HEX_FIELD_RET(s, field->ip);
+-		SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
+-		break;
+-	}
+-	case TRACE_CTX:
+-	case TRACE_WAKE: {
+-		struct ctx_switch_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		T = task_state_char(field->next_state);
+-		S = entry->type == TRACE_WAKE ? '+' :
+-			task_state_char(field->prev_state);
+-		SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
+-		SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
+-		SEQ_PUT_HEX_FIELD_RET(s, S);
+-		SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
+-		SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
+-		SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
+-		SEQ_PUT_HEX_FIELD_RET(s, T);
+-		break;
+-	}
+-	case TRACE_SPECIAL:
+-	case TRACE_USER_STACK:
+-	case TRACE_STACK: {
+-		struct special_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
+-		SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
+-		SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
+-		break;
+-	}
+-	}
+-	SEQ_PUT_FIELD_RET(s, newline);
+-
+-	return TRACE_TYPE_HANDLED;
+-}
+-
+-static enum print_line_t print_printk_msg_only(struct trace_iterator *iter)
+-{
+-	struct trace_seq *s = &iter->seq;
+-	struct trace_entry *entry = iter->ent;
+-	struct print_entry *field;
+-	int ret;
+-
+-	trace_assign_type(field, entry);
+-
+-	ret = trace_seq_printf(s, field->buf);
+-	if (!ret)
+-		return TRACE_TYPE_PARTIAL_LINE;
+-
+-	if (entry->flags & TRACE_FLAG_CONT)
+-		trace_seq_print_cont(s, iter);
+-
+-	return TRACE_TYPE_HANDLED;
+-}
+-
+-static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
+-{
+-	struct trace_seq *s = &iter->seq;
+-	struct trace_entry *entry;
+-
+-	entry = iter->ent;
+-
+-	if (entry->type == TRACE_CONT)
+-		return TRACE_TYPE_HANDLED;
+-
+-	SEQ_PUT_FIELD_RET(s, entry->pid);
+-	SEQ_PUT_FIELD_RET(s, entry->cpu);
+-	SEQ_PUT_FIELD_RET(s, iter->ts);
+-
+-	switch (entry->type) {
+-	case TRACE_FN: {
+-		struct ftrace_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		SEQ_PUT_FIELD_RET(s, field->ip);
+-		SEQ_PUT_FIELD_RET(s, field->parent_ip);
+-		break;
+-	}
+-	case TRACE_CTX: {
+-		struct ctx_switch_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		SEQ_PUT_FIELD_RET(s, field->prev_pid);
+-		SEQ_PUT_FIELD_RET(s, field->prev_prio);
+-		SEQ_PUT_FIELD_RET(s, field->prev_state);
+-		SEQ_PUT_FIELD_RET(s, field->next_pid);
+-		SEQ_PUT_FIELD_RET(s, field->next_prio);
+-		SEQ_PUT_FIELD_RET(s, field->next_state);
+-		break;
+-	}
+-	case TRACE_SPECIAL:
+-	case TRACE_USER_STACK:
+-	case TRACE_STACK: {
+-		struct special_entry *field;
+-
+-		trace_assign_type(field, entry);
+-
+-		SEQ_PUT_FIELD_RET(s, field->arg1);
+-		SEQ_PUT_FIELD_RET(s, field->arg2);
+-		SEQ_PUT_FIELD_RET(s, field->arg3);
+-		break;
+-	}
+-	}
+-	return 1;
+-}
++	event = ftrace_find_event(entry->type);
++	return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
++}
+ 
+ static int trace_empty(struct trace_iterator *iter)
+ {
+ 	int cpu;
+ 
++	/* If we are looking at one CPU buffer, only check that one */
++	if (iter->cpu_file != TRACE_PIPE_ALL_CPU) {
++		cpu = iter->cpu_file;
++		if (iter->buffer_iter[cpu]) {
++			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
++				return 0;
++		} else {
++			if (!ring_buffer_empty_cpu(iter->tr->buffer, cpu))
++				return 0;
++		}
++		return 1;
++	}
++
+ 	for_each_tracing_cpu(cpu) {
+ 		if (iter->buffer_iter[cpu]) {
+ 			if (!ring_buffer_iter_empty(iter->buffer_iter[cpu]))
+@@ -2368,10 +1773,15 @@ static enum print_line_t print_trace_lin
+ 			return ret;
+ 	}
+ 
++	if (iter->ent->type == TRACE_BPRINT &&
++			trace_flags & TRACE_ITER_PRINTK &&
++			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
++		return trace_print_bprintk_msg_only(iter);
++
+ 	if (iter->ent->type == TRACE_PRINT &&
+ 			trace_flags & TRACE_ITER_PRINTK &&
+ 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
+-		return print_printk_msg_only(iter);
++		return trace_print_printk_msg_only(iter);
+ 
+ 	if (trace_flags & TRACE_ITER_BIN)
+ 		return print_bin_fmt(iter);
+@@ -2382,9 +1792,6 @@ static enum print_line_t print_trace_lin
+ 	if (trace_flags & TRACE_ITER_RAW)
+ 		return print_raw_fmt(iter);
+ 
+-	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
+-		return print_lat_fmt(iter, iter->idx, iter->cpu);
+-
+ 	return print_trace_fmt(iter);
+ }
+ 
+@@ -2426,30 +1833,40 @@ static struct seq_operations tracer_seq_
+ };
+ 
+ static struct trace_iterator *
+-__tracing_open(struct inode *inode, struct file *file, int *ret)
++__tracing_open(struct inode *inode, struct file *file)
+ {
++	long cpu_file = (long) inode->i_private;
++	void *fail_ret = ERR_PTR(-ENOMEM);
+ 	struct trace_iterator *iter;
+ 	struct seq_file *m;
+-	int cpu;
++	int cpu, ret;
+ 
+-	if (tracing_disabled) {
+-		*ret = -ENODEV;
+-		return NULL;
+-	}
++	if (tracing_disabled)
++		return ERR_PTR(-ENODEV);
+ 
+ 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+-	if (!iter) {
+-		*ret = -ENOMEM;
+-		goto out;
+-	}
++	if (!iter)
++		return ERR_PTR(-ENOMEM);
+ 
++	/*
++	 * We make a copy of the current tracer to avoid concurrent
++	 * changes on it while we are reading.
++	 */
+ 	mutex_lock(&trace_types_lock);
++	iter->trace = kzalloc(sizeof(*iter->trace), GFP_KERNEL);
++	if (!iter->trace)
++		goto fail;
++
++	if (current_trace)
++		*iter->trace = *current_trace;
++
+ 	if (current_trace && current_trace->print_max)
+ 		iter->tr = &max_tr;
+ 	else
+-		iter->tr = inode->i_private;
+-	iter->trace = current_trace;
++		iter->tr = &global_trace;
+ 	iter->pos = -1;
++	mutex_init(&iter->mutex);
++	iter->cpu_file = cpu_file;
+ 
+ 	/* Notify the tracer early; before we stop tracing. */
+ 	if (iter->trace && iter->trace->open)
+@@ -2459,20 +1876,24 @@ __tracing_open(struct inode *inode, stru
+ 	if (ring_buffer_overruns(iter->tr->buffer))
+ 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
+ 
++	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
++		for_each_tracing_cpu(cpu) {
+ 
+-	for_each_tracing_cpu(cpu) {
+-
++			iter->buffer_iter[cpu] =
++				ring_buffer_read_start(iter->tr->buffer, cpu);
++		}
++	} else {
++		cpu = iter->cpu_file;
+ 		iter->buffer_iter[cpu] =
+-			ring_buffer_read_start(iter->tr->buffer, cpu);
+-
+-		if (!iter->buffer_iter[cpu])
+-			goto fail_buffer;
++				ring_buffer_read_start(iter->tr->buffer, cpu);
+ 	}
+ 
+ 	/* TODO stop tracer */
+-	*ret = seq_open(file, &tracer_seq_ops);
+-	if (*ret)
++	ret = seq_open(file, &tracer_seq_ops);
++	if (ret < 0) {
++		fail_ret = ERR_PTR(ret);
+ 		goto fail_buffer;
++	}
+ 
+ 	m = file->private_data;
+ 	m->private = iter;
+@@ -2482,7 +1903,6 @@ __tracing_open(struct inode *inode, stru
+ 
+ 	mutex_unlock(&trace_types_lock);
+ 
+- out:
+ 	return iter;
+ 
+  fail_buffer:
+@@ -2490,10 +1910,12 @@ __tracing_open(struct inode *inode, stru
+ 		if (iter->buffer_iter[cpu])
+ 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
+ 	}
++ fail:
+ 	mutex_unlock(&trace_types_lock);
++	kfree(iter->trace);
+ 	kfree(iter);
+ 
+-	return ERR_PTR(-ENOMEM);
++	return fail_ret;
+ }
+ 
+ int tracing_open_generic(struct inode *inode, struct file *filp)
+@@ -2505,12 +1927,17 @@ int tracing_open_generic(struct inode *i
+ 	return 0;
+ }
+ 
+-int tracing_release(struct inode *inode, struct file *file)
++static int tracing_release(struct inode *inode, struct file *file)
+ {
+ 	struct seq_file *m = (struct seq_file *)file->private_data;
+-	struct trace_iterator *iter = m->private;
++	struct trace_iterator *iter;
+ 	int cpu;
+ 
++	if (!(file->f_mode & FMODE_READ))
++		return 0;
++
++	iter = m->private;
++
+ 	mutex_lock(&trace_types_lock);
+ 	for_each_tracing_cpu(cpu) {
+ 		if (iter->buffer_iter[cpu])
+@@ -2525,33 +1952,38 @@ int tracing_release(struct inode *inode,
+ 	mutex_unlock(&trace_types_lock);
+ 
+ 	seq_release(inode, file);
++	mutex_destroy(&iter->mutex);
++	kfree(iter->trace);
+ 	kfree(iter);
+ 	return 0;
+ }
+ 
+ static int tracing_open(struct inode *inode, struct file *file)
+ {
+-	int ret;
+-
+-	__tracing_open(inode, file, &ret);
+-
+-	return ret;
+-}
+-
+-static int tracing_lt_open(struct inode *inode, struct file *file)
+-{
+ 	struct trace_iterator *iter;
+-	int ret;
++	int ret = 0;
+ 
+-	iter = __tracing_open(inode, file, &ret);
++	/* If this file was open for write, then erase contents */
++	if ((file->f_mode & FMODE_WRITE) &&
++	    !(file->f_flags & O_APPEND)) {
++		long cpu = (long) inode->i_private;
+ 
+-	if (!ret)
+-		iter->iter_flags |= TRACE_FILE_LAT_FMT;
++		if (cpu == TRACE_PIPE_ALL_CPU)
++			tracing_reset_online_cpus(&global_trace);
++		else
++			tracing_reset(&global_trace, cpu);
++	}
+ 
++	if (file->f_mode & FMODE_READ) {
++		iter = __tracing_open(inode, file);
++		if (IS_ERR(iter))
++			ret = PTR_ERR(iter);
++		else if (trace_flags & TRACE_ITER_LATENCY_FMT)
++			iter->iter_flags |= TRACE_FILE_LAT_FMT;
++	}
+ 	return ret;
+ }
+ 
+-
+ static void *
+ t_next(struct seq_file *m, void *v, loff_t *pos)
+ {
+@@ -2623,21 +2055,22 @@ static int show_traces_open(struct inode
+ 	return ret;
+ }
+ 
+-static struct file_operations tracing_fops = {
+-	.open		= tracing_open,
+-	.read		= seq_read,
+-	.llseek		= seq_lseek,
+-	.release	= tracing_release,
+-};
++static ssize_t
++tracing_write_stub(struct file *filp, const char __user *ubuf,
++		   size_t count, loff_t *ppos)
++{
++	return count;
++}
+ 
+-static struct file_operations tracing_lt_fops = {
+-	.open		= tracing_lt_open,
++static const struct file_operations tracing_fops = {
++	.open		= tracing_open,
+ 	.read		= seq_read,
++	.write		= tracing_write_stub,
+ 	.llseek		= seq_lseek,
+ 	.release	= tracing_release,
+ };
+ 
+-static struct file_operations show_traces_fops = {
++static const struct file_operations show_traces_fops = {
+ 	.open		= show_traces_open,
+ 	.read		= seq_read,
+ 	.release	= seq_release,
+@@ -2730,7 +2163,7 @@ err_unlock:
+ 	return err;
+ }
+ 
+-static struct file_operations tracing_cpumask_fops = {
++static const struct file_operations tracing_cpumask_fops = {
+ 	.open		= tracing_open_generic,
+ 	.read		= tracing_cpumask_read,
+ 	.write		= tracing_cpumask_write,
+@@ -2740,57 +2173,62 @@ static ssize_t
+ tracing_trace_options_read(struct file *filp, char __user *ubuf,
+ 		       size_t cnt, loff_t *ppos)
+ {
+-	int i;
++	struct tracer_opt *trace_opts;
++	u32 tracer_flags;
++	int len = 0;
+ 	char *buf;
+ 	int r = 0;
+-	int len = 0;
+-	u32 tracer_flags = current_trace->flags->val;
+-	struct tracer_opt *trace_opts = current_trace->flags->opts;
++	int i;
+ 
+ 
+-	/* calulate max size */
++	/* calculate max size */
+ 	for (i = 0; trace_options[i]; i++) {
+ 		len += strlen(trace_options[i]);
+-		len += 3; /* "no" and space */
++		len += 3; /* "no" and newline */
+ 	}
+ 
++	mutex_lock(&trace_types_lock);
++	tracer_flags = current_trace->flags->val;
++	trace_opts = current_trace->flags->opts;
++
+ 	/*
+ 	 * Increase the size with names of options specific
+ 	 * of the current tracer.
+ 	 */
+ 	for (i = 0; trace_opts[i].name; i++) {
+ 		len += strlen(trace_opts[i].name);
+-		len += 3; /* "no" and space */
++		len += 3; /* "no" and newline */
+ 	}
+ 
+ 	/* +2 for \n and \0 */
+ 	buf = kmalloc(len + 2, GFP_KERNEL);
+-	if (!buf)
++	if (!buf) {
++		mutex_unlock(&trace_types_lock);
+ 		return -ENOMEM;
++	}
+ 
+ 	for (i = 0; trace_options[i]; i++) {
+ 		if (trace_flags & (1 << i))
+-			r += sprintf(buf + r, "%s ", trace_options[i]);
++			r += sprintf(buf + r, "%s\n", trace_options[i]);
+ 		else
+-			r += sprintf(buf + r, "no%s ", trace_options[i]);
++			r += sprintf(buf + r, "no%s\n", trace_options[i]);
+ 	}
+ 
+ 	for (i = 0; trace_opts[i].name; i++) {
+ 		if (tracer_flags & trace_opts[i].bit)
+-			r += sprintf(buf + r, "%s ",
++			r += sprintf(buf + r, "%s\n",
+ 				trace_opts[i].name);
+ 		else
+-			r += sprintf(buf + r, "no%s ",
++			r += sprintf(buf + r, "no%s\n",
+ 				trace_opts[i].name);
+ 	}
++	mutex_unlock(&trace_types_lock);
+ 
+-	r += sprintf(buf + r, "\n");
+ 	WARN_ON(r >= len + 2);
+ 
+ 	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ 
+ 	kfree(buf);
+-
+ 	return r;
+ }
+ 
+@@ -2828,6 +2266,34 @@ static int set_tracer_option(struct trac
+ 	return 0;
+ }
+ 
++static void set_tracer_flags(unsigned int mask, int enabled)
++{
++	/* do nothing if flag is already set */
++	if (!!(trace_flags & mask) == !!enabled)
++		return;
++
++	if (enabled)
++		trace_flags |= mask;
++	else
++		trace_flags &= ~mask;
++
++	if (mask == TRACE_ITER_GLOBAL_CLK) {
++		u64 (*func)(void);
++
++		if (enabled)
++			func = trace_clock_global;
++		else
++			func = trace_clock_local;
++
++		mutex_lock(&trace_types_lock);
++		ring_buffer_set_clock(global_trace.buffer, func);
++
++		if (max_tr.buffer)
++			ring_buffer_set_clock(max_tr.buffer, func);
++		mutex_unlock(&trace_types_lock);
++	}
++}
++
+ static ssize_t
+ tracing_trace_options_write(struct file *filp, const char __user *ubuf,
+ 			size_t cnt, loff_t *ppos)
+@@ -2855,17 +2321,16 @@ tracing_trace_options_write(struct file 
+ 		int len = strlen(trace_options[i]);
+ 
+ 		if (strncmp(cmp, trace_options[i], len) == 0) {
+-			if (neg)
+-				trace_flags &= ~(1 << i);
+-			else
+-				trace_flags |= (1 << i);
++			set_tracer_flags(1 << i, !neg);
+ 			break;
+ 		}
+ 	}
+ 
+ 	/* If no option could be set, test the specific tracer options */
+ 	if (!trace_options[i]) {
++		mutex_lock(&trace_types_lock);
+ 		ret = set_tracer_option(current_trace, cmp, neg);
++		mutex_unlock(&trace_types_lock);
+ 		if (ret)
+ 			return ret;
+ 	}
+@@ -2875,7 +2340,7 @@ tracing_trace_options_write(struct file 
+ 	return cnt;
+ }
+ 
+-static struct file_operations tracing_iter_fops = {
++static const struct file_operations tracing_iter_fops = {
+ 	.open		= tracing_open_generic,
+ 	.read		= tracing_trace_options_read,
+ 	.write		= tracing_trace_options_write,
+@@ -2908,7 +2373,7 @@ tracing_readme_read(struct file *filp, c
+ 					readme_msg, strlen(readme_msg));
+ }
+ 
+-static struct file_operations tracing_readme_fops = {
++static const struct file_operations tracing_readme_fops = {
+ 	.open		= tracing_open_generic,
+ 	.read		= tracing_readme_read,
+ };
+@@ -2930,7 +2395,7 @@ tracing_ctrl_write(struct file *filp, co
+ {
+ 	struct trace_array *tr = filp->private_data;
+ 	char buf[64];
+-	long val;
++	unsigned long val;
+ 	int ret;
+ 
+ 	if (cnt >= sizeof(buf))
+@@ -2985,13 +2450,105 @@ tracing_set_trace_read(struct file *filp
+ 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ }
+ 
+-static int tracing_set_tracer(char *buf)
++int tracer_init(struct tracer *t, struct trace_array *tr)
+ {
++	tracing_reset_online_cpus(tr);
++	return t->init(tr);
++}
++
++static int tracing_resize_ring_buffer(unsigned long size)
++{
++	int ret;
++
++	/*
++	 * If kernel or user changes the size of the ring buffer
++	 * we use the size that was given, and we can forget about
++	 * expanding it later.
++	 */
++	ring_buffer_expanded = 1;
++
++	ret = ring_buffer_resize(global_trace.buffer, size);
++	if (ret < 0)
++		return ret;
++
++	ret = ring_buffer_resize(max_tr.buffer, size);
++	if (ret < 0) {
++		int r;
++
++		r = ring_buffer_resize(global_trace.buffer,
++				       global_trace.entries);
++		if (r < 0) {
++			/*
++			 * AARGH! We are left with different
++			 * size max buffer!!!!
++			 * The max buffer is our "snapshot" buffer.
++			 * When a tracer needs a snapshot (one of the
++			 * latency tracers), it swaps the max buffer
++			 * with the saved snap shot. We succeeded to
++			 * update the size of the main buffer, but failed to
++			 * update the size of the max buffer. But when we tried
++			 * to reset the main buffer to the original size, we
++			 * failed there too. This is very unlikely to
++			 * happen, but if it does, warn and kill all
++			 * tracing.
++			 */
++			WARN_ON(1);
++			tracing_disabled = 1;
++		}
++		return ret;
++	}
++
++	global_trace.entries = size;
++
++	return ret;
++}
++
++/**
++ * tracing_update_buffers - used by tracing facility to expand ring buffers
++ *
++ * To save on memory when the tracing is never used on a system with it
++ * configured in. The ring buffers are set to a minimum size. But once
++ * a user starts to use the tracing facility, then they need to grow
++ * to their default size.
++ *
++ * This function is to be called when a tracer is about to be used.
++ */
++int tracing_update_buffers(void)
++{
++	int ret = 0;
++
++	mutex_lock(&trace_types_lock);
++	if (!ring_buffer_expanded)
++		ret = tracing_resize_ring_buffer(trace_buf_size);
++	mutex_unlock(&trace_types_lock);
++
++	return ret;
++}
++
++struct trace_option_dentry;
++
++static struct trace_option_dentry *
++create_trace_option_files(struct tracer *tracer);
++
++static void
++destroy_trace_option_files(struct trace_option_dentry *topts);
++
++static int tracing_set_tracer(const char *buf)
++{
++	static struct trace_option_dentry *topts;
+ 	struct trace_array *tr = &global_trace;
+ 	struct tracer *t;
+ 	int ret = 0;
+ 
+ 	mutex_lock(&trace_types_lock);
++
++	if (!ring_buffer_expanded) {
++		ret = tracing_resize_ring_buffer(trace_buf_size);
++		if (ret < 0)
++			goto out;
++		ret = 0;
++	}
++
+ 	for (t = trace_types; t; t = t->next) {
+ 		if (strcmp(t->name, buf) == 0)
+ 			break;
+@@ -3007,9 +2564,14 @@ static int tracing_set_tracer(char *buf)
+ 	if (current_trace && current_trace->reset)
+ 		current_trace->reset(tr);
+ 
++	destroy_trace_option_files(topts);
++
+ 	current_trace = t;
++
++	topts = create_trace_option_files(current_trace);
++
+ 	if (t->init) {
+-		ret = t->init(tr);
++		ret = tracer_init(t, tr);
+ 		if (ret)
+ 			goto out;
+ 	}
+@@ -3072,9 +2634,9 @@ static ssize_t
+ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
+ 		      size_t cnt, loff_t *ppos)
+ {
+-	long *ptr = filp->private_data;
++	unsigned long *ptr = filp->private_data;
+ 	char buf[64];
+-	long val;
++	unsigned long val;
+ 	int ret;
+ 
+ 	if (cnt >= sizeof(buf))
+@@ -3094,54 +2656,96 @@ tracing_max_lat_write(struct file *filp,
+ 	return cnt;
+ }
+ 
+-static atomic_t tracing_reader;
+-
+ static int tracing_open_pipe(struct inode *inode, struct file *filp)
+ {
++	long cpu_file = (long) inode->i_private;
+ 	struct trace_iterator *iter;
++	int ret = 0;
+ 
+ 	if (tracing_disabled)
+ 		return -ENODEV;
+ 
+-	/* We only allow for reader of the pipe */
+-	if (atomic_inc_return(&tracing_reader) != 1) {
+-		atomic_dec(&tracing_reader);
+-		return -EBUSY;
++	mutex_lock(&trace_types_lock);
++
++	/* We only allow one reader per cpu */
++	if (cpu_file == TRACE_PIPE_ALL_CPU) {
++		if (!cpumask_empty(tracing_reader_cpumask)) {
++			ret = -EBUSY;
++			goto out;
++		}
++		cpumask_setall(tracing_reader_cpumask);
++	} else {
++		if (!cpumask_test_cpu(cpu_file, tracing_reader_cpumask))
++			cpumask_set_cpu(cpu_file, tracing_reader_cpumask);
++		else {
++			ret = -EBUSY;
++			goto out;
++		}
+ 	}
+ 
+ 	/* create a buffer to store the information to pass to userspace */
+ 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
+-	if (!iter)
+-		return -ENOMEM;
+-
+-	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
+-		kfree(iter);
+-		return -ENOMEM;
++	if (!iter) {
++		ret = -ENOMEM;
++		goto out;
+ 	}
+ 
+-	mutex_lock(&trace_types_lock);
++	/*
++	 * We make a copy of the current tracer to avoid concurrent
++	 * changes on it while we are reading.
++	 */
++	iter->trace = kmalloc(sizeof(*iter->trace), GFP_KERNEL);
++	if (!iter->trace) {
++		ret = -ENOMEM;
++		goto fail;
++	}
++	if (current_trace)
++		*iter->trace = *current_trace;
++
++	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
++		ret = -ENOMEM;
++		goto fail;
++	}
+ 
+ 	/* trace pipe does not show start of buffer */
+ 	cpumask_setall(iter->started);
+ 
++	iter->cpu_file = cpu_file;
+ 	iter->tr = &global_trace;
+-	iter->trace = current_trace;
++	mutex_init(&iter->mutex);
+ 	filp->private_data = iter;
+ 
+ 	if (iter->trace->pipe_open)
+ 		iter->trace->pipe_open(iter);
++
++out:
+ 	mutex_unlock(&trace_types_lock);
++	return ret;
+ 
+-	return 0;
++fail:
++	kfree(iter->trace);
++	kfree(iter);
++	mutex_unlock(&trace_types_lock);
++	return ret;
+ }
+ 
+ static int tracing_release_pipe(struct inode *inode, struct file *file)
+ {
+ 	struct trace_iterator *iter = file->private_data;
+ 
++	mutex_lock(&trace_types_lock);
++
++	if (iter->cpu_file == TRACE_PIPE_ALL_CPU)
++		cpumask_clear(tracing_reader_cpumask);
++	else
++		cpumask_clear_cpu(iter->cpu_file, tracing_reader_cpumask);
++
++	mutex_unlock(&trace_types_lock);
++
+ 	free_cpumask_var(iter->started);
++	mutex_destroy(&iter->mutex);
++	kfree(iter->trace);
+ 	kfree(iter);
+-	atomic_dec(&tracing_reader);
+ 
+ 	return 0;
+ }
+@@ -3167,67 +2771,57 @@ tracing_poll_pipe(struct file *filp, pol
+ 	}
+ }
+ 
+-/*
+- * Consumer reader.
+- */
+-static ssize_t
+-tracing_read_pipe(struct file *filp, char __user *ubuf,
+-		  size_t cnt, loff_t *ppos)
++
++void default_wait_pipe(struct trace_iterator *iter)
+ {
+-	struct trace_iterator *iter = filp->private_data;
+-	ssize_t sret;
++	DEFINE_WAIT(wait);
+ 
+-	/* return any leftover data */
+-	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+-	if (sret != -EBUSY)
+-		return sret;
++	prepare_to_wait(&trace_wait, &wait, TASK_INTERRUPTIBLE);
+ 
+-	trace_seq_reset(&iter->seq);
++	if (trace_empty(iter))
++		schedule();
+ 
+-	mutex_lock(&trace_types_lock);
+-	if (iter->trace->read) {
+-		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
+-		if (sret)
+-			goto out;
+-	}
++	finish_wait(&trace_wait, &wait);
++}
++
++/*
++ * This is a make-shift waitqueue.
++ * A tracer might use this callback on some rare cases:
++ *
++ *  1) the current tracer might hold the runqueue lock when it wakes up
++ *     a reader, hence a deadlock (sched, function, and function graph tracers)
++ *  2) the function tracers, trace all functions, we don't want
++ *     the overhead of calling wake_up and friends
++ *     (and tracing them too)
++ *
++ *     Anyway, this is really very primitive wakeup.
++ */
++void poll_wait_pipe(struct trace_iterator *iter)
++{
++	set_current_state(TASK_INTERRUPTIBLE);
++	/* sleep for 100 msecs, and try again. */
++	schedule_timeout(HZ / 10);
++}
++
++/* Must be called with trace_types_lock mutex held. */
++static int tracing_wait_pipe(struct file *filp)
++{
++	struct trace_iterator *iter = filp->private_data;
+ 
+-waitagain:
+-	sret = 0;
+ 	while (trace_empty(iter)) {
+ 
+ 		if ((filp->f_flags & O_NONBLOCK)) {
+-			sret = -EAGAIN;
+-			goto out;
++			return -EAGAIN;
+ 		}
+ 
+-		/*
+-		 * This is a make-shift waitqueue. The reason we don't use
+-		 * an actual wait queue is because:
+-		 *  1) we only ever have one waiter
+-		 *  2) the tracing, traces all functions, we don't want
+-		 *     the overhead of calling wake_up and friends
+-		 *     (and tracing them too)
+-		 *     Anyway, this is really very primitive wakeup.
+-		 */
+-		set_current_state(TASK_INTERRUPTIBLE);
+-		iter->tr->waiter = current;
++		mutex_unlock(&iter->mutex);
+ 
+-		mutex_unlock(&trace_types_lock);
++		iter->trace->wait_pipe(iter);
+ 
+-		/* sleep for 100 msecs, and try again. */
+-		schedule_timeout(HZ/10);
++		mutex_lock(&iter->mutex);
+ 
+-		mutex_lock(&trace_types_lock);
+-
+-		iter->tr->waiter = NULL;
+-
+-		if (signal_pending(current)) {
+-			sret = -EINTR;
+-			goto out;
+-		}
+-
+-		if (iter->trace != current_trace)
+-			goto out;
++		if (signal_pending(current))
++			return -EINTR;
+ 
+ 		/*
+ 		 * We block until we read something and tracing is disabled.
+@@ -3240,13 +2834,59 @@ waitagain:
+ 		 */
+ 		if (!tracer_enabled && iter->pos)
+ 			break;
++	}
++
++	return 1;
++}
++
++/*
++ * Consumer reader.
++ */
++static ssize_t
++tracing_read_pipe(struct file *filp, char __user *ubuf,
++		  size_t cnt, loff_t *ppos)
++{
++	struct trace_iterator *iter = filp->private_data;
++	static struct tracer *old_tracer;
++	ssize_t sret;
+ 
+-		continue;
++	/* return any leftover data */
++	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
++	if (sret != -EBUSY)
++		return sret;
++
++	trace_seq_init(&iter->seq);
++
++	/* copy the tracer to avoid using a global lock all around */
++	mutex_lock(&trace_types_lock);
++	if (unlikely(old_tracer != current_trace && current_trace)) {
++		old_tracer = current_trace;
++		*iter->trace = *current_trace;
+ 	}
++	mutex_unlock(&trace_types_lock);
++
++	/*
++	 * Avoid more than one consumer on a single file descriptor
++	 * This is just a matter of traces coherency, the ring buffer itself
++	 * is protected.
++	 */
++	mutex_lock(&iter->mutex);
++	if (iter->trace->read) {
++		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
++		if (sret)
++			goto out;
++	}
++
++waitagain:
++	sret = tracing_wait_pipe(filp);
++	if (sret <= 0)
++		goto out;
+ 
+ 	/* stop when tracing is finished */
+-	if (trace_empty(iter))
++	if (trace_empty(iter)) {
++		sret = 0;
+ 		goto out;
++	}
+ 
+ 	if (cnt >= PAGE_SIZE)
+ 		cnt = PAGE_SIZE - 1;
+@@ -3267,8 +2907,8 @@ waitagain:
+ 			iter->seq.len = len;
+ 			break;
+ 		}
+-
+-		trace_consume(iter);
++		if (ret != TRACE_TYPE_NO_CONSUME)
++			trace_consume(iter);
+ 
+ 		if (iter->seq.len >= cnt)
+ 			break;
+@@ -3277,7 +2917,7 @@ waitagain:
+ 	/* Now copy what we have to the user */
+ 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ 	if (iter->seq.readpos >= iter->seq.len)
+-		trace_seq_reset(&iter->seq);
++		trace_seq_init(&iter->seq);
+ 
+ 	/*
+ 	 * If there was nothing to send to user, inspite of consuming trace
+@@ -3287,20 +2927,165 @@ waitagain:
+ 		goto waitagain;
+ 
+ out:
+-	mutex_unlock(&trace_types_lock);
++	mutex_unlock(&iter->mutex);
+ 
+ 	return sret;
+ }
+ 
++static void tracing_pipe_buf_release(struct pipe_inode_info *pipe,
++				     struct pipe_buffer *buf)
++{
++	__free_page(buf->page);
++}
++
++static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
++				     unsigned int idx)
++{
++	__free_page(spd->pages[idx]);
++}
++
++static struct pipe_buf_operations tracing_pipe_buf_ops = {
++	.can_merge		= 0,
++	.map			= generic_pipe_buf_map,
++	.unmap			= generic_pipe_buf_unmap,
++	.confirm		= generic_pipe_buf_confirm,
++	.release		= tracing_pipe_buf_release,
++	.steal			= generic_pipe_buf_steal,
++	.get			= generic_pipe_buf_get,
++};
++
++static size_t
++tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
++{
++	size_t count;
++	int ret;
++
++	/* Seq buffer is page-sized, exactly what we need. */
++	for (;;) {
++		count = iter->seq.len;
++		ret = print_trace_line(iter);
++		count = iter->seq.len - count;
++		if (rem < count) {
++			rem = 0;
++			iter->seq.len -= count;
++			break;
++		}
++		if (ret == TRACE_TYPE_PARTIAL_LINE) {
++			iter->seq.len -= count;
++			break;
++		}
++
++		trace_consume(iter);
++		rem -= count;
++		if (!find_next_entry_inc(iter))	{
++			rem = 0;
++			iter->ent = NULL;
++			break;
++		}
++	}
++
++	return rem;
++}
++
++static ssize_t tracing_splice_read_pipe(struct file *filp,
++					loff_t *ppos,
++					struct pipe_inode_info *pipe,
++					size_t len,
++					unsigned int flags)
++{
++	struct page *pages[PIPE_BUFFERS];
++	struct partial_page partial[PIPE_BUFFERS];
++	struct trace_iterator *iter = filp->private_data;
++	struct splice_pipe_desc spd = {
++		.pages		= pages,
++		.partial	= partial,
++		.nr_pages	= 0, /* This gets updated below. */
++		.flags		= flags,
++		.ops		= &tracing_pipe_buf_ops,
++		.spd_release	= tracing_spd_release_pipe,
++	};
++	static struct tracer *old_tracer;
++	ssize_t ret;
++	size_t rem;
++	unsigned int i;
++
++	/* copy the tracer to avoid using a global lock all around */
++	mutex_lock(&trace_types_lock);
++	if (unlikely(old_tracer != current_trace && current_trace)) {
++		old_tracer = current_trace;
++		*iter->trace = *current_trace;
++	}
++	mutex_unlock(&trace_types_lock);
++
++	mutex_lock(&iter->mutex);
++
++	if (iter->trace->splice_read) {
++		ret = iter->trace->splice_read(iter, filp,
++					       ppos, pipe, len, flags);
++		if (ret)
++			goto out_err;
++	}
++
++	ret = tracing_wait_pipe(filp);
++	if (ret <= 0)
++		goto out_err;
++
++	if (!iter->ent && !find_next_entry_inc(iter)) {
++		ret = -EFAULT;
++		goto out_err;
++	}
++
++	/* Fill as many pages as possible. */
++	for (i = 0, rem = len; i < PIPE_BUFFERS && rem; i++) {
++		pages[i] = alloc_page(GFP_KERNEL);
++		if (!pages[i])
++			break;
++
++		rem = tracing_fill_pipe_page(rem, iter);
++
++		/* Copy the data into the page, so we can start over. */
++		ret = trace_seq_to_buffer(&iter->seq,
++					  page_address(pages[i]),
++					  iter->seq.len);
++		if (ret < 0) {
++			__free_page(pages[i]);
++			break;
++		}
++		partial[i].offset = 0;
++		partial[i].len = iter->seq.len;
++
++		trace_seq_init(&iter->seq);
++	}
++
++	mutex_unlock(&iter->mutex);
++
++	spd.nr_pages = i;
++
++	return splice_to_pipe(pipe, &spd);
++
++out_err:
++	mutex_unlock(&iter->mutex);
++
++	return ret;
++}
++
+ static ssize_t
+ tracing_entries_read(struct file *filp, char __user *ubuf,
+ 		     size_t cnt, loff_t *ppos)
+ {
+ 	struct trace_array *tr = filp->private_data;
+-	char buf[64];
++	char buf[96];
+ 	int r;
+ 
+-	r = sprintf(buf, "%lu\n", tr->entries >> 10);
++	mutex_lock(&trace_types_lock);
++	if (!ring_buffer_expanded)
++		r = sprintf(buf, "%lu (expanded: %lu)\n",
++			    tr->entries >> 10,
++			    trace_buf_size >> 10);
++	else
++		r = sprintf(buf, "%lu\n", tr->entries >> 10);
++	mutex_unlock(&trace_types_lock);
++
+ 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+ }
+ 
+@@ -3344,197 +3129,764 @@ tracing_entries_write(struct file *filp,
+ 	val <<= 10;
+ 
+ 	if (val != global_trace.entries) {
+-		ret = ring_buffer_resize(global_trace.buffer, val);
++		ret = tracing_resize_ring_buffer(val);
+ 		if (ret < 0) {
+ 			cnt = ret;
+ 			goto out;
+ 		}
++	}
+ 
+-		ret = ring_buffer_resize(max_tr.buffer, val);
+-		if (ret < 0) {
+-			int r;
+-			cnt = ret;
+-			r = ring_buffer_resize(global_trace.buffer,
+-					       global_trace.entries);
+-			if (r < 0) {
+-				/* AARGH! We are left with different
+-				 * size max buffer!!!! */
+-				WARN_ON(1);
+-				tracing_disabled = 1;
+-			}
+-			goto out;
++	filp->f_pos += cnt;
++
++	/* If check pages failed, return ENOMEM */
++	if (tracing_disabled)
++		cnt = -ENOMEM;
++ out:
++	for_each_tracing_cpu(cpu) {
++		if (global_trace.data[cpu])
++			atomic_dec(&global_trace.data[cpu]->disabled);
++		if (max_tr.data[cpu])
++			atomic_dec(&max_tr.data[cpu]->disabled);
++	}
++
++	tracing_start();
++	max_tr.entries = global_trace.entries;
++	mutex_unlock(&trace_types_lock);
++
++	return cnt;
++}
++
++static int mark_printk(const char *fmt, ...)
++{
++	int ret;
++	va_list args;
++	va_start(args, fmt);
++	ret = trace_vprintk(0, fmt, args);
++	va_end(args);
++	return ret;
++}
++
++static ssize_t
++tracing_mark_write(struct file *filp, const char __user *ubuf,
++					size_t cnt, loff_t *fpos)
++{
++	char *buf;
++	char *end;
++
++	if (tracing_disabled)
++		return -EINVAL;
++
++	if (cnt > TRACE_BUF_SIZE)
++		cnt = TRACE_BUF_SIZE;
++
++	buf = kmalloc(cnt + 1, GFP_KERNEL);
++	if (buf == NULL)
++		return -ENOMEM;
++
++	if (copy_from_user(buf, ubuf, cnt)) {
++		kfree(buf);
++		return -EFAULT;
++	}
++
++	/* Cut from the first nil or newline. */
++	buf[cnt] = '\0';
++	end = strchr(buf, '\n');
++	if (end)
++		*end = '\0';
++
++	cnt = mark_printk("%s\n", buf);
++	kfree(buf);
++	*fpos += cnt;
++
++	return cnt;
++}
++
++static const struct file_operations tracing_max_lat_fops = {
++	.open		= tracing_open_generic,
++	.read		= tracing_max_lat_read,
++	.write		= tracing_max_lat_write,
++};
++
++static const struct file_operations tracing_ctrl_fops = {
++	.open		= tracing_open_generic,
++	.read		= tracing_ctrl_read,
++	.write		= tracing_ctrl_write,
++};
++
++static const struct file_operations set_tracer_fops = {
++	.open		= tracing_open_generic,
++	.read		= tracing_set_trace_read,
++	.write		= tracing_set_trace_write,
++};
++
++static const struct file_operations tracing_pipe_fops = {
++	.open		= tracing_open_pipe,
++	.poll		= tracing_poll_pipe,
++	.read		= tracing_read_pipe,
++	.splice_read	= tracing_splice_read_pipe,
++	.release	= tracing_release_pipe,
++};
++
++static const struct file_operations tracing_entries_fops = {
++	.open		= tracing_open_generic,
++	.read		= tracing_entries_read,
++	.write		= tracing_entries_write,
++};
++
++static const struct file_operations tracing_mark_fops = {
++	.open		= tracing_open_generic,
++	.write		= tracing_mark_write,
++};
++
++struct ftrace_buffer_info {
++	struct trace_array	*tr;
++	void			*spare;
++	int			cpu;
++	unsigned int		read;
++};
++
++static int tracing_buffers_open(struct inode *inode, struct file *filp)
++{
++	int cpu = (int)(long)inode->i_private;
++	struct ftrace_buffer_info *info;
++
++	if (tracing_disabled)
++		return -ENODEV;
++
++	info = kzalloc(sizeof(*info), GFP_KERNEL);
++	if (!info)
++		return -ENOMEM;
++
++	info->tr	= &global_trace;
++	info->cpu	= cpu;
++	info->spare	= ring_buffer_alloc_read_page(info->tr->buffer);
++	/* Force reading ring buffer for first read */
++	info->read	= (unsigned int)-1;
++	if (!info->spare)
++		goto out;
++
++	filp->private_data = info;
++
++	return 0;
++
++ out:
++	kfree(info);
++	return -ENOMEM;
++}
++
++static ssize_t
++tracing_buffers_read(struct file *filp, char __user *ubuf,
++		     size_t count, loff_t *ppos)
++{
++	struct ftrace_buffer_info *info = filp->private_data;
++	unsigned int pos;
++	ssize_t ret;
++	size_t size;
++
++	if (!count)
++		return 0;
++
++	/* Do we have previous read data to read? */
++	if (info->read < PAGE_SIZE)
++		goto read;
++
++	info->read = 0;
++
++	ret = ring_buffer_read_page(info->tr->buffer,
++				    &info->spare,
++				    count,
++				    info->cpu, 0);
++	if (ret < 0)
++		return 0;
++
++	pos = ring_buffer_page_len(info->spare);
++
++	if (pos < PAGE_SIZE)
++		memset(info->spare + pos, 0, PAGE_SIZE - pos);
++
++read:
++	size = PAGE_SIZE - info->read;
++	if (size > count)
++		size = count;
++
++	ret = copy_to_user(ubuf, info->spare + info->read, size);
++	if (ret == size)
++		return -EFAULT;
++	size -= ret;
++
++	*ppos += size;
++	info->read += size;
++
++	return size;
++}
++
++static int tracing_buffers_release(struct inode *inode, struct file *file)
++{
++	struct ftrace_buffer_info *info = file->private_data;
++
++	ring_buffer_free_read_page(info->tr->buffer, info->spare);
++	kfree(info);
++
++	return 0;
++}
++
++struct buffer_ref {
++	struct ring_buffer	*buffer;
++	void			*page;
++	int			ref;
++};
++
++static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
++				    struct pipe_buffer *buf)
++{
++	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
++
++	if (--ref->ref)
++		return;
++
++	ring_buffer_free_read_page(ref->buffer, ref->page);
++	kfree(ref);
++	buf->private = 0;
++}
++
++static int buffer_pipe_buf_steal(struct pipe_inode_info *pipe,
++				 struct pipe_buffer *buf)
++{
++	return 1;
++}
++
++static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
++				struct pipe_buffer *buf)
++{
++	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
++
++	ref->ref++;
++}
++
++/* Pipe buffer operations for a buffer. */
++static struct pipe_buf_operations buffer_pipe_buf_ops = {
++	.can_merge		= 0,
++	.map			= generic_pipe_buf_map,
++	.unmap			= generic_pipe_buf_unmap,
++	.confirm		= generic_pipe_buf_confirm,
++	.release		= buffer_pipe_buf_release,
++	.steal			= buffer_pipe_buf_steal,
++	.get			= buffer_pipe_buf_get,
++};
++
++/*
++ * Callback from splice_to_pipe(), if we need to release some pages
++ * at the end of the spd in case we error'ed out in filling the pipe.
++ */
++static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
++{
++	struct buffer_ref *ref =
++		(struct buffer_ref *)spd->partial[i].private;
++
++	if (--ref->ref)
++		return;
++
++	ring_buffer_free_read_page(ref->buffer, ref->page);
++	kfree(ref);
++	spd->partial[i].private = 0;
++}
++
++static ssize_t
++tracing_buffers_splice_read(struct file *file, loff_t *ppos,
++			    struct pipe_inode_info *pipe, size_t len,
++			    unsigned int flags)
++{
++	struct ftrace_buffer_info *info = file->private_data;
++	struct partial_page partial[PIPE_BUFFERS];
++	struct page *pages[PIPE_BUFFERS];
++	struct splice_pipe_desc spd = {
++		.pages		= pages,
++		.partial	= partial,
++		.flags		= flags,
++		.ops		= &buffer_pipe_buf_ops,
++		.spd_release	= buffer_spd_release,
++	};
++	struct buffer_ref *ref;
++	int size, i;
++	size_t ret;
++
++	/*
++	 * We can't seek on a buffer input
++	 */
++	if (unlikely(*ppos))
++		return -ESPIPE;
++
++
++	for (i = 0; i < PIPE_BUFFERS && len; i++, len -= size) {
++		struct page *page;
++		int r;
++
++		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
++		if (!ref)
++			break;
++
++		ref->buffer = info->tr->buffer;
++		ref->page = ring_buffer_alloc_read_page(ref->buffer);
++		if (!ref->page) {
++			kfree(ref);
++			break;
++		}
++
++		r = ring_buffer_read_page(ref->buffer, &ref->page,
++					  len, info->cpu, 0);
++		if (r < 0) {
++			ring_buffer_free_read_page(ref->buffer,
++						   ref->page);
++			kfree(ref);
++			break;
+ 		}
+ 
+-		global_trace.entries = val;
++		/*
++		 * zero out any left over data, this is going to
++		 * user land.
++		 */
++		size = ring_buffer_page_len(ref->page);
++		if (size < PAGE_SIZE)
++			memset(ref->page + size, 0, PAGE_SIZE - size);
++
++		page = virt_to_page(ref->page);
++
++		spd.pages[i] = page;
++		spd.partial[i].len = PAGE_SIZE;
++		spd.partial[i].offset = 0;
++		spd.partial[i].private = (unsigned long)ref;
++		spd.nr_pages++;
+ 	}
+ 
+-	filp->f_pos += cnt;
++	spd.nr_pages = i;
++
++	/* did we read anything? */
++	if (!spd.nr_pages) {
++		if (flags & SPLICE_F_NONBLOCK)
++			ret = -EAGAIN;
++		else
++			ret = 0;
++		/* TODO: block */
++		return ret;
++	}
++
++	ret = splice_to_pipe(pipe, &spd);
++
++	return ret;
++}
++
++static const struct file_operations tracing_buffers_fops = {
++	.open		= tracing_buffers_open,
++	.read		= tracing_buffers_read,
++	.release	= tracing_buffers_release,
++	.splice_read	= tracing_buffers_splice_read,
++	.llseek		= no_llseek,
++};
++
++#ifdef CONFIG_DYNAMIC_FTRACE
++
++int __weak ftrace_arch_read_dyn_info(char *buf, int size)
++{
++	return 0;
++}
++
++static ssize_t
++tracing_read_dyn_info(struct file *filp, char __user *ubuf,
++		  size_t cnt, loff_t *ppos)
++{
++	static char ftrace_dyn_info_buffer[1024];
++	static DEFINE_MUTEX(dyn_info_mutex);
++	unsigned long *p = filp->private_data;
++	char *buf = ftrace_dyn_info_buffer;
++	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
++	int r;
++
++	mutex_lock(&dyn_info_mutex);
++	r = sprintf(buf, "%ld ", *p);
++
++	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
++	buf[r++] = '\n';
++
++	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
++
++	mutex_unlock(&dyn_info_mutex);
++
++	return r;
++}
++
++static const struct file_operations tracing_dyn_info_fops = {
++	.open		= tracing_open_generic,
++	.read		= tracing_read_dyn_info,
++};
++#endif
++
++static struct dentry *d_tracer;
++
++struct dentry *tracing_init_dentry(void)
++{
++	static int once;
++
++	if (d_tracer)
++		return d_tracer;
++
++	if (!debugfs_initialized())
++		return NULL;
++
++	d_tracer = debugfs_create_dir("tracing", NULL);
++
++	if (!d_tracer && !once) {
++		once = 1;
++		pr_warning("Could not create debugfs directory 'tracing'\n");
++		return NULL;
++	}
++
++	return d_tracer;
++}
++
++static struct dentry *d_percpu;
++
++struct dentry *tracing_dentry_percpu(void)
++{
++	static int once;
++	struct dentry *d_tracer;
++
++	if (d_percpu)
++		return d_percpu;
++
++	d_tracer = tracing_init_dentry();
++
++	if (!d_tracer)
++		return NULL;
++
++	d_percpu = debugfs_create_dir("per_cpu", d_tracer);
++
++	if (!d_percpu && !once) {
++		once = 1;
++		pr_warning("Could not create debugfs directory 'per_cpu'\n");
++		return NULL;
++	}
++
++	return d_percpu;
++}
++
++static void tracing_init_debugfs_percpu(long cpu)
++{
++	struct dentry *d_percpu = tracing_dentry_percpu();
++	struct dentry *entry, *d_cpu;
++	/* strlen(cpu) + MAX(log10(cpu)) + '\0' */
++	char cpu_dir[7];
++
++	if (cpu > 999 || cpu < 0)
++		return;
++
++	sprintf(cpu_dir, "cpu%ld", cpu);
++	d_cpu = debugfs_create_dir(cpu_dir, d_percpu);
++	if (!d_cpu) {
++		pr_warning("Could not create debugfs '%s' entry\n", cpu_dir);
++		return;
++	}
++
++	/* per cpu trace_pipe */
++	entry = debugfs_create_file("trace_pipe", 0444, d_cpu,
++				(void *) cpu, &tracing_pipe_fops);
++	if (!entry)
++		pr_warning("Could not create debugfs 'trace_pipe' entry\n");
++
++	/* per cpu trace */
++	entry = debugfs_create_file("trace", 0644, d_cpu,
++				(void *) cpu, &tracing_fops);
++	if (!entry)
++		pr_warning("Could not create debugfs 'trace' entry\n");
++
++	entry = debugfs_create_file("trace_pipe_raw", 0444, d_cpu,
++				    (void *) cpu, &tracing_buffers_fops);
++	if (!entry)
++		pr_warning("Could not create debugfs 'trace_pipe_raw' entry\n");
++}
++
++#ifdef CONFIG_FTRACE_SELFTEST
++/* Let selftest have access to static functions in this file */
++#include "trace_selftest.c"
++#endif
++
++struct trace_option_dentry {
++	struct tracer_opt		*opt;
++	struct tracer_flags		*flags;
++	struct dentry			*entry;
++};
++
++static ssize_t
++trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
++			loff_t *ppos)
++{
++	struct trace_option_dentry *topt = filp->private_data;
++	char *buf;
++
++	if (topt->flags->val & topt->opt->bit)
++		buf = "1\n";
++	else
++		buf = "0\n";
++
++	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
++}
++
++static ssize_t
++trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
++			 loff_t *ppos)
++{
++	struct trace_option_dentry *topt = filp->private_data;
++	unsigned long val;
++	char buf[64];
++	int ret;
++
++	if (cnt >= sizeof(buf))
++		return -EINVAL;
++
++	if (copy_from_user(&buf, ubuf, cnt))
++		return -EFAULT;
++
++	buf[cnt] = 0;
++
++	ret = strict_strtoul(buf, 10, &val);
++	if (ret < 0)
++		return ret;
++
++	ret = 0;
++	switch (val) {
++	case 0:
++		/* do nothing if already cleared */
++		if (!(topt->flags->val & topt->opt->bit))
++			break;
++
++		mutex_lock(&trace_types_lock);
++		if (current_trace->set_flag)
++			ret = current_trace->set_flag(topt->flags->val,
++						      topt->opt->bit, 0);
++		mutex_unlock(&trace_types_lock);
++		if (ret)
++			return ret;
++		topt->flags->val &= ~topt->opt->bit;
++		break;
++	case 1:
++		/* do nothing if already set */
++		if (topt->flags->val & topt->opt->bit)
++			break;
++
++		mutex_lock(&trace_types_lock);
++		if (current_trace->set_flag)
++			ret = current_trace->set_flag(topt->flags->val,
++						      topt->opt->bit, 1);
++		mutex_unlock(&trace_types_lock);
++		if (ret)
++			return ret;
++		topt->flags->val |= topt->opt->bit;
++		break;
+ 
+-	/* If check pages failed, return ENOMEM */
+-	if (tracing_disabled)
+-		cnt = -ENOMEM;
+- out:
+-	for_each_tracing_cpu(cpu) {
+-		if (global_trace.data[cpu])
+-			atomic_dec(&global_trace.data[cpu]->disabled);
+-		if (max_tr.data[cpu])
+-			atomic_dec(&max_tr.data[cpu]->disabled);
++	default:
++		return -EINVAL;
+ 	}
+ 
+-	tracing_start();
+-	max_tr.entries = global_trace.entries;
+-	mutex_unlock(&trace_types_lock);
++	*ppos += cnt;
+ 
+ 	return cnt;
+ }
+ 
+-static int mark_printk(const char *fmt, ...)
++
++static const struct file_operations trace_options_fops = {
++	.open = tracing_open_generic,
++	.read = trace_options_read,
++	.write = trace_options_write,
++};
++
++static ssize_t
++trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
++			loff_t *ppos)
+ {
+-	int ret;
+-	va_list args;
+-	va_start(args, fmt);
+-	ret = trace_vprintk(0, -1, fmt, args);
+-	va_end(args);
+-	return ret;
++	long index = (long)filp->private_data;
++	char *buf;
++
++	if (trace_flags & (1 << index))
++		buf = "1\n";
++	else
++		buf = "0\n";
++
++	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
+ }
+ 
+ static ssize_t
+-tracing_mark_write(struct file *filp, const char __user *ubuf,
+-					size_t cnt, loff_t *fpos)
++trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
++			 loff_t *ppos)
+ {
+-	char *buf;
+-	char *end;
++	long index = (long)filp->private_data;
++	char buf[64];
++	unsigned long val;
++	int ret;
+ 
+-	if (tracing_disabled)
++	if (cnt >= sizeof(buf))
+ 		return -EINVAL;
+ 
+-	if (cnt > TRACE_BUF_SIZE)
+-		cnt = TRACE_BUF_SIZE;
++	if (copy_from_user(&buf, ubuf, cnt))
++		return -EFAULT;
+ 
+-	buf = kmalloc(cnt + 1, GFP_KERNEL);
+-	if (buf == NULL)
+-		return -ENOMEM;
++	buf[cnt] = 0;
+ 
+-	if (copy_from_user(buf, ubuf, cnt)) {
+-		kfree(buf);
+-		return -EFAULT;
+-	}
++	ret = strict_strtoul(buf, 10, &val);
++	if (ret < 0)
++		return ret;
+ 
+-	/* Cut from the first nil or newline. */
+-	buf[cnt] = '\0';
+-	end = strchr(buf, '\n');
+-	if (end)
+-		*end = '\0';
++	switch (val) {
++	case 0:
++		trace_flags &= ~(1 << index);
++		break;
++	case 1:
++		trace_flags |= 1 << index;
++		break;
+ 
+-	cnt = mark_printk("%s\n", buf);
+-	kfree(buf);
+-	*fpos += cnt;
++	default:
++		return -EINVAL;
++	}
++
++	*ppos += cnt;
+ 
+ 	return cnt;
+ }
+ 
+-static struct file_operations tracing_max_lat_fops = {
+-	.open		= tracing_open_generic,
+-	.read		= tracing_max_lat_read,
+-	.write		= tracing_max_lat_write,
+-};
+-
+-static struct file_operations tracing_ctrl_fops = {
+-	.open		= tracing_open_generic,
+-	.read		= tracing_ctrl_read,
+-	.write		= tracing_ctrl_write,
++static const struct file_operations trace_options_core_fops = {
++	.open = tracing_open_generic,
++	.read = trace_options_core_read,
++	.write = trace_options_core_write,
+ };
+ 
+-static struct file_operations set_tracer_fops = {
+-	.open		= tracing_open_generic,
+-	.read		= tracing_set_trace_read,
+-	.write		= tracing_set_trace_write,
+-};
++static struct dentry *trace_options_init_dentry(void)
++{
++	struct dentry *d_tracer;
++	static struct dentry *t_options;
+ 
+-static struct file_operations tracing_pipe_fops = {
+-	.open		= tracing_open_pipe,
+-	.poll		= tracing_poll_pipe,
+-	.read		= tracing_read_pipe,
+-	.release	= tracing_release_pipe,
+-};
++	if (t_options)
++		return t_options;
+ 
+-static struct file_operations tracing_entries_fops = {
+-	.open		= tracing_open_generic,
+-	.read		= tracing_entries_read,
+-	.write		= tracing_entries_write,
+-};
++	d_tracer = tracing_init_dentry();
++	if (!d_tracer)
++		return NULL;
+ 
+-static struct file_operations tracing_mark_fops = {
+-	.open		= tracing_open_generic,
+-	.write		= tracing_mark_write,
+-};
++	t_options = debugfs_create_dir("options", d_tracer);
++	if (!t_options) {
++		pr_warning("Could not create debugfs directory 'options'\n");
++		return NULL;
++	}
+ 
+-#ifdef CONFIG_DYNAMIC_FTRACE
++	return t_options;
++}
+ 
+-int __weak ftrace_arch_read_dyn_info(char *buf, int size)
++static void
++create_trace_option_file(struct trace_option_dentry *topt,
++			 struct tracer_flags *flags,
++			 struct tracer_opt *opt)
+ {
+-	return 0;
++	struct dentry *t_options;
++	struct dentry *entry;
++
++	t_options = trace_options_init_dentry();
++	if (!t_options)
++		return;
++
++	topt->flags = flags;
++	topt->opt = opt;
++
++	entry = debugfs_create_file(opt->name, 0644, t_options, topt,
++				    &trace_options_fops);
++
++	topt->entry = entry;
++
+ }
+ 
+-static ssize_t
+-tracing_read_dyn_info(struct file *filp, char __user *ubuf,
+-		  size_t cnt, loff_t *ppos)
++static struct trace_option_dentry *
++create_trace_option_files(struct tracer *tracer)
+ {
+-	static char ftrace_dyn_info_buffer[1024];
+-	static DEFINE_MUTEX(dyn_info_mutex);
+-	unsigned long *p = filp->private_data;
+-	char *buf = ftrace_dyn_info_buffer;
+-	int size = ARRAY_SIZE(ftrace_dyn_info_buffer);
+-	int r;
++	struct trace_option_dentry *topts;
++	struct tracer_flags *flags;
++	struct tracer_opt *opts;
++	int cnt;
+ 
+-	mutex_lock(&dyn_info_mutex);
+-	r = sprintf(buf, "%ld ", *p);
++	if (!tracer)
++		return NULL;
+ 
+-	r += ftrace_arch_read_dyn_info(buf+r, (size-1)-r);
+-	buf[r++] = '\n';
++	flags = tracer->flags;
+ 
+-	r = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
++	if (!flags || !flags->opts)
++		return NULL;
+ 
+-	mutex_unlock(&dyn_info_mutex);
++	opts = flags->opts;
+ 
+-	return r;
+-}
++	for (cnt = 0; opts[cnt].name; cnt++)
++		;
+ 
+-static struct file_operations tracing_dyn_info_fops = {
+-	.open		= tracing_open_generic,
+-	.read		= tracing_read_dyn_info,
+-};
+-#endif
++	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
++	if (!topts)
++		return NULL;
+ 
+-static struct dentry *d_tracer;
++	for (cnt = 0; opts[cnt].name; cnt++)
++		create_trace_option_file(&topts[cnt], flags,
++					 &opts[cnt]);
+ 
+-struct dentry *tracing_init_dentry(void)
++	return topts;
++}
++
++static void
++destroy_trace_option_files(struct trace_option_dentry *topts)
+ {
+-	static int once;
++	int cnt;
+ 
+-	if (d_tracer)
+-		return d_tracer;
++	if (!topts)
++		return;
+ 
+-	d_tracer = debugfs_create_dir("tracing", NULL);
++	for (cnt = 0; topts[cnt].opt; cnt++) {
++		if (topts[cnt].entry)
++			debugfs_remove(topts[cnt].entry);
++	}
+ 
+-	if (!d_tracer && !once) {
+-		once = 1;
+-		pr_warning("Could not create debugfs directory 'tracing'\n");
++	kfree(topts);
++}
++
++static struct dentry *
++create_trace_option_core_file(const char *option, long index)
++{
++	struct dentry *t_options;
++	struct dentry *entry;
++
++	t_options = trace_options_init_dentry();
++	if (!t_options)
+ 		return NULL;
+-	}
+ 
+-	return d_tracer;
++	entry = debugfs_create_file(option, 0644, t_options, (void *)index,
++				    &trace_options_core_fops);
++
++	return entry;
+ }
+ 
+-#ifdef CONFIG_FTRACE_SELFTEST
+-/* Let selftest have access to static functions in this file */
+-#include "trace_selftest.c"
+-#endif
++static __init void create_trace_options_dir(void)
++{
++	struct dentry *t_options;
++	struct dentry *entry;
++	int i;
++
++	t_options = trace_options_init_dentry();
++	if (!t_options)
++		return;
++
++	for (i = 0; trace_options[i]; i++) {
++		entry = create_trace_option_core_file(trace_options[i], i);
++		if (!entry)
++			pr_warning("Could not create debugfs %s entry\n",
++				   trace_options[i]);
++	}
++}
+ 
+ static __init int tracer_init_debugfs(void)
+ {
+ 	struct dentry *d_tracer;
+ 	struct dentry *entry;
++	int cpu;
+ 
+ 	d_tracer = tracing_init_dentry();
+ 
+@@ -3548,18 +3900,15 @@ static __init int tracer_init_debugfs(vo
+ 	if (!entry)
+ 		pr_warning("Could not create debugfs 'trace_options' entry\n");
+ 
++	create_trace_options_dir();
++
+ 	entry = debugfs_create_file("tracing_cpumask", 0644, d_tracer,
+ 				    NULL, &tracing_cpumask_fops);
+ 	if (!entry)
+ 		pr_warning("Could not create debugfs 'tracing_cpumask' entry\n");
+ 
+-	entry = debugfs_create_file("latency_trace", 0444, d_tracer,
+-				    &global_trace, &tracing_lt_fops);
+-	if (!entry)
+-		pr_warning("Could not create debugfs 'latency_trace' entry\n");
+-
+-	entry = debugfs_create_file("trace", 0444, d_tracer,
+-				    &global_trace, &tracing_fops);
++	entry = debugfs_create_file("trace", 0644, d_tracer,
++				 (void *) TRACE_PIPE_ALL_CPU, &tracing_fops);
+ 	if (!entry)
+ 		pr_warning("Could not create debugfs 'trace' entry\n");
+ 
+@@ -3590,8 +3939,8 @@ static __init int tracer_init_debugfs(vo
+ 	if (!entry)
+ 		pr_warning("Could not create debugfs 'README' entry\n");
+ 
+-	entry = debugfs_create_file("trace_pipe", 0644, d_tracer,
+-				    NULL, &tracing_pipe_fops);
++	entry = debugfs_create_file("trace_pipe", 0444, d_tracer,
++			(void *) TRACE_PIPE_ALL_CPU, &tracing_pipe_fops);
+ 	if (!entry)
+ 		pr_warning("Could not create debugfs "
+ 			   "'trace_pipe' entry\n");
+@@ -3619,77 +3968,12 @@ static __init int tracer_init_debugfs(vo
+ #ifdef CONFIG_SYSPROF_TRACER
+ 	init_tracer_sysprof_debugfs(d_tracer);
+ #endif
+-	return 0;
+-}
+-
+-int trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args)
+-{
+-	static DEFINE_SPINLOCK(trace_buf_lock);
+-	static char trace_buf[TRACE_BUF_SIZE];
+-
+-	struct ring_buffer_event *event;
+-	struct trace_array *tr = &global_trace;
+-	struct trace_array_cpu *data;
+-	int cpu, len = 0, size, pc;
+-	struct print_entry *entry;
+-	unsigned long irq_flags;
+-
+-	if (tracing_disabled || tracing_selftest_running)
+-		return 0;
+-
+-	pc = preempt_count();
+-	preempt_disable_notrace();
+-	cpu = raw_smp_processor_id();
+-	data = tr->data[cpu];
+-
+-	if (unlikely(atomic_read(&data->disabled)))
+-		goto out;
+-
+-	pause_graph_tracing();
+-	spin_lock_irqsave(&trace_buf_lock, irq_flags);
+-	len = vsnprintf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+-
+-	len = min(len, TRACE_BUF_SIZE-1);
+-	trace_buf[len] = 0;
+-
+-	size = sizeof(*entry) + len + 1;
+-	event = ring_buffer_lock_reserve(tr->buffer, size, &irq_flags);
+-	if (!event)
+-		goto out_unlock;
+-	entry = ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, irq_flags, pc);
+-	entry->ent.type			= TRACE_PRINT;
+-	entry->ip			= ip;
+-	entry->depth			= depth;
+ 
+-	memcpy(&entry->buf, trace_buf, len);
+-	entry->buf[len] = 0;
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+-
+- out_unlock:
+-	spin_unlock_irqrestore(&trace_buf_lock, irq_flags);
+-	unpause_graph_tracing();
+- out:
+-	preempt_enable_notrace();
+-
+-	return len;
+-}
+-EXPORT_SYMBOL_GPL(trace_vprintk);
+-
+-int __ftrace_printk(unsigned long ip, const char *fmt, ...)
+-{
+-	int ret;
+-	va_list ap;
+-
+-	if (!(trace_flags & TRACE_ITER_PRINTK))
+-		return 0;
++	for_each_tracing_cpu(cpu)
++		tracing_init_debugfs_percpu(cpu);
+ 
+-	va_start(ap, fmt);
+-	ret = trace_vprintk(ip, task_curr_ret_stack(current), fmt, ap);
+-	va_end(ap);
+-	return ret;
++	return 0;
+ }
+-EXPORT_SYMBOL_GPL(__ftrace_printk);
+ 
+ static int trace_panic_handler(struct notifier_block *this,
+ 			       unsigned long event, void *unused)
+@@ -3750,14 +4034,15 @@ trace_printk_seq(struct trace_seq *s)
+ 
+ 	printk(KERN_TRACE "%s", s->buffer);
+ 
+-	trace_seq_reset(s);
++	trace_seq_init(s);
+ }
+ 
+-void ftrace_dump(void)
++static void __ftrace_dump(bool disable_tracing)
+ {
+-	static DEFINE_SPINLOCK(ftrace_dump_lock);
++	static DEFINE_RAW_SPINLOCK(ftrace_dump_lock);
+ 	/* use static because iter can be a bit big for the stack */
+ 	static struct trace_iterator iter;
++	unsigned int old_userobj;
+ 	static int dump_ran;
+ 	unsigned long flags;
+ 	int cnt = 0, cpu;
+@@ -3769,21 +4054,26 @@ void ftrace_dump(void)
+ 
+ 	dump_ran = 1;
+ 
+-	/* No turning back! */
+ 	tracing_off();
+-	ftrace_kill();
++
++	if (disable_tracing)
++		ftrace_kill();
+ 
+ 	for_each_tracing_cpu(cpu) {
+ 		atomic_inc(&global_trace.data[cpu]->disabled);
+ 	}
+ 
++	old_userobj = trace_flags & TRACE_ITER_SYM_USEROBJ;
++
+ 	/* don't look at user memory in panic mode */
+ 	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
+ 
+ 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
+ 
++	/* Simulate the iterator */
+ 	iter.tr = &global_trace;
+ 	iter.trace = current_trace;
++	iter.cpu_file = TRACE_PIPE_ALL_CPU;
+ 
+ 	/*
+ 	 * We need to stop all tracing on all CPUS to read the
+@@ -3819,13 +4109,30 @@ void ftrace_dump(void)
+ 	else
+ 		printk(KERN_TRACE "---------------------------------\n");
+ 
++	/* Re-enable tracing if requested */
++	if (!disable_tracing) {
++		trace_flags |= old_userobj;
++
++		for_each_tracing_cpu(cpu) {
++			atomic_dec(&global_trace.data[cpu]->disabled);
++		}
++		tracing_on();
++	}
++
+  out:
+ 	spin_unlock_irqrestore(&ftrace_dump_lock, flags);
+ }
+ 
++/* By default: disable tracing after the dump */
++void ftrace_dump(void)
++{
++	__ftrace_dump(true);
++}
++
+ __init static int tracer_alloc_buffers(void)
+ {
+ 	struct trace_array_cpu *data;
++	int ring_buf_size;
+ 	int i;
+ 	int ret = -ENOMEM;
+ 
+@@ -3835,11 +4142,21 @@ __init static int tracer_alloc_buffers(v
+ 	if (!alloc_cpumask_var(&tracing_cpumask, GFP_KERNEL))
+ 		goto out_free_buffer_mask;
+ 
++	if (!alloc_cpumask_var(&tracing_reader_cpumask, GFP_KERNEL))
++		goto out_free_tracing_cpumask;
++
++	/* To save memory, keep the ring buffer size to its minimum */
++	if (ring_buffer_expanded)
++		ring_buf_size = trace_buf_size;
++	else
++		ring_buf_size = 1;
++
+ 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
+ 	cpumask_copy(tracing_cpumask, cpu_all_mask);
++	cpumask_clear(tracing_reader_cpumask);
+ 
+ 	/* TODO: make the number of buffers hot pluggable with CPUS */
+-	global_trace.buffer = ring_buffer_alloc(trace_buf_size,
++	global_trace.buffer = ring_buffer_alloc(ring_buf_size,
+ 						   TRACE_BUFFER_FLAGS);
+ 	if (!global_trace.buffer) {
+ 		printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
+@@ -3850,7 +4167,7 @@ __init static int tracer_alloc_buffers(v
+ 
+ 
+ #ifdef CONFIG_TRACER_MAX_TRACE
+-	max_tr.buffer = ring_buffer_alloc(trace_buf_size,
++	max_tr.buffer = ring_buffer_alloc(ring_buf_size,
+ 					     TRACE_BUFFER_FLAGS);
+ 	if (!max_tr.buffer) {
+ 		printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
+@@ -3871,14 +4188,10 @@ __init static int tracer_alloc_buffers(v
+ 	trace_init_cmdlines();
+ 
+ 	register_tracer(&nop_trace);
++	current_trace = &nop_trace;
+ #ifdef CONFIG_BOOT_TRACER
+ 	register_tracer(&boot_tracer);
+-	current_trace = &boot_tracer;
+-	current_trace->init(&global_trace);
+-#else
+-	current_trace = &nop_trace;
+ #endif
+-
+ 	/* All seems OK, enable tracing */
+ 	tracing_disabled = 0;
+ 
+@@ -3890,11 +4203,34 @@ __init static int tracer_alloc_buffers(v
+ 	return 0;
+ 
+ out_free_cpumask:
++	free_cpumask_var(tracing_reader_cpumask);
++out_free_tracing_cpumask:
+ 	free_cpumask_var(tracing_cpumask);
+ out_free_buffer_mask:
+ 	free_cpumask_var(tracing_buffer_mask);
+ out:
+ 	return ret;
+ }
++
++__init static int clear_boot_tracer(void)
++{
++	/*
++	 * The default tracer at boot buffer is an init section.
++	 * This function is called in lateinit. If we did not
++	 * find the boot tracer, then clear it out, to prevent
++	 * later registration from accessing the buffer that is
++	 * about to be freed.
++	 */
++	if (!default_bootup_tracer)
++		return 0;
++
++	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
++	       default_bootup_tracer);
++	default_bootup_tracer = NULL;
++
++	return 0;
++}
++
+ early_initcall(tracer_alloc_buffers);
+ fs_initcall(tracer_init_debugfs);
++late_initcall(clear_boot_tracer);
+Index: linux-2.6-tip/kernel/trace/trace.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace.h
++++ linux-2.6-tip/kernel/trace/trace.h
+@@ -9,6 +9,8 @@
+ #include <linux/mmiotrace.h>
+ #include <linux/ftrace.h>
+ #include <trace/boot.h>
++#include <trace/kmemtrace.h>
++#include <trace/power.h>
+ 
+ enum trace_type {
+ 	__TRACE_FIRST_TYPE = 0,
+@@ -16,9 +18,9 @@ enum trace_type {
+ 	TRACE_FN,
+ 	TRACE_CTX,
+ 	TRACE_WAKE,
+-	TRACE_CONT,
+ 	TRACE_STACK,
+ 	TRACE_PRINT,
++	TRACE_BPRINT,
+ 	TRACE_SPECIAL,
+ 	TRACE_MMIO_RW,
+ 	TRACE_MMIO_MAP,
+@@ -29,9 +31,14 @@ enum trace_type {
+ 	TRACE_GRAPH_ENT,
+ 	TRACE_USER_STACK,
+ 	TRACE_HW_BRANCHES,
++	TRACE_SYSCALL_ENTER,
++	TRACE_SYSCALL_EXIT,
++	TRACE_KMEM_ALLOC,
++	TRACE_KMEM_FREE,
+ 	TRACE_POWER,
++	TRACE_BLK,
+ 
+-	__TRACE_LAST_TYPE
++	__TRACE_LAST_TYPE,
+ };
+ 
+ /*
+@@ -42,7 +49,6 @@ enum trace_type {
+  */
+ struct trace_entry {
+ 	unsigned char		type;
+-	unsigned char		cpu;
+ 	unsigned char		flags;
+ 	unsigned char		preempt_count;
+ 	int			pid;
+@@ -60,13 +66,13 @@ struct ftrace_entry {
+ 
+ /* Function call entry */
+ struct ftrace_graph_ent_entry {
+-	struct trace_entry			ent;
++	struct trace_entry		ent;
+ 	struct ftrace_graph_ent		graph_ent;
+ };
+ 
+ /* Function return entry */
+ struct ftrace_graph_ret_entry {
+-	struct trace_entry			ent;
++	struct trace_entry		ent;
+ 	struct ftrace_graph_ret		ret;
+ };
+ extern struct tracer boot_tracer;
+@@ -112,12 +118,18 @@ struct userstack_entry {
+ };
+ 
+ /*
+- * ftrace_printk entry:
++ * trace_printk entry:
+  */
++struct bprint_entry {
++	struct trace_entry	ent;
++	unsigned long		ip;
++	const char		*fmt;
++	u32			buf[];
++};
++
+ struct print_entry {
+ 	struct trace_entry	ent;
+ 	unsigned long		ip;
+-	int			depth;
+ 	char			buf[];
+ };
+ 
+@@ -170,15 +182,51 @@ struct trace_power {
+ 	struct power_trace	state_data;
+ };
+ 
++enum kmemtrace_type_id {
++	KMEMTRACE_TYPE_KMALLOC = 0,	/* kmalloc() or kfree(). */
++	KMEMTRACE_TYPE_CACHE,		/* kmem_cache_*(). */
++	KMEMTRACE_TYPE_PAGES,		/* __get_free_pages() and friends. */
++};
++
++struct kmemtrace_alloc_entry {
++	struct trace_entry	ent;
++	enum kmemtrace_type_id type_id;
++	unsigned long call_site;
++	const void *ptr;
++	size_t bytes_req;
++	size_t bytes_alloc;
++	gfp_t gfp_flags;
++	int node;
++};
++
++struct kmemtrace_free_entry {
++	struct trace_entry	ent;
++	enum kmemtrace_type_id type_id;
++	unsigned long call_site;
++	const void *ptr;
++};
++
++struct syscall_trace_enter {
++	struct trace_entry	ent;
++	int			nr;
++	unsigned long		args[];
++};
++
++struct syscall_trace_exit {
++	struct trace_entry	ent;
++	int			nr;
++	unsigned long		ret;
++};
++
++
+ /*
+  * trace_flag_type is an enumeration that holds different
+  * states when a trace occurs. These are:
+  *  IRQS_OFF		- interrupts were disabled
+- *  IRQS_NOSUPPORT 	- arch does not support irqs_disabled_flags
++ *  IRQS_NOSUPPORT	- arch does not support irqs_disabled_flags
+  *  NEED_RESCED		- reschedule is requested
+  *  HARDIRQ		- inside an interrupt handler
+  *  SOFTIRQ		- inside a softirq handler
+- *  CONT		- multiple entries hold the trace item
+  */
+ enum trace_flag_type {
+ 	TRACE_FLAG_IRQS_OFF		= 0x01,
+@@ -186,7 +234,6 @@ enum trace_flag_type {
+ 	TRACE_FLAG_NEED_RESCHED		= 0x04,
+ 	TRACE_FLAG_HARDIRQ		= 0x08,
+ 	TRACE_FLAG_SOFTIRQ		= 0x10,
+-	TRACE_FLAG_CONT			= 0x20,
+ };
+ 
+ #define TRACE_BUF_SIZE		1024
+@@ -198,6 +245,7 @@ enum trace_flag_type {
+  */
+ struct trace_array_cpu {
+ 	atomic_t		disabled;
++	void			*buffer_page;	/* ring buffer spare */
+ 
+ 	/* these fields get copied into max-trace: */
+ 	unsigned long		trace_idx;
+@@ -262,10 +310,10 @@ extern void __ftrace_bad_type(void);
+ 	do {								\
+ 		IF_ASSIGN(var, ent, struct ftrace_entry, TRACE_FN);	\
+ 		IF_ASSIGN(var, ent, struct ctx_switch_entry, 0);	\
+-		IF_ASSIGN(var, ent, struct trace_field_cont, TRACE_CONT); \
+ 		IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK);	\
+ 		IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
+ 		IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT);	\
++		IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT);	\
+ 		IF_ASSIGN(var, ent, struct special_entry, 0);		\
+ 		IF_ASSIGN(var, ent, struct trace_mmiotrace_rw,		\
+ 			  TRACE_MMIO_RW);				\
+@@ -279,7 +327,15 @@ extern void __ftrace_bad_type(void);
+ 		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
+ 			  TRACE_GRAPH_RET);		\
+ 		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
+- 		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
++		IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
++		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
++			  TRACE_KMEM_ALLOC);	\
++		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
++			  TRACE_KMEM_FREE);	\
++		IF_ASSIGN(var, ent, struct syscall_trace_enter,		\
++			  TRACE_SYSCALL_ENTER);				\
++		IF_ASSIGN(var, ent, struct syscall_trace_exit,		\
++			  TRACE_SYSCALL_EXIT);				\
+ 		__ftrace_bad_type();					\
+ 	} while (0)
+ 
+@@ -287,7 +343,8 @@ extern void __ftrace_bad_type(void);
+ enum print_line_t {
+ 	TRACE_TYPE_PARTIAL_LINE	= 0,	/* Retry after flushing the seq */
+ 	TRACE_TYPE_HANDLED	= 1,
+-	TRACE_TYPE_UNHANDLED	= 2	/* Relay to other output functions */
++	TRACE_TYPE_UNHANDLED	= 2,	/* Relay to other output functions */
++	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
+ };
+ 
+ 
+@@ -297,8 +354,8 @@ enum print_line_t {
+  * flags value in struct tracer_flags.
+  */
+ struct tracer_opt {
+-	const char 	*name; /* Will appear on the trace_options file */
+-	u32 		bit; /* Mask assigned in val field in tracer_flags */
++	const char	*name; /* Will appear on the trace_options file */
++	u32		bit; /* Mask assigned in val field in tracer_flags */
+ };
+ 
+ /*
+@@ -307,28 +364,51 @@ struct tracer_opt {
+  */
+ struct tracer_flags {
+ 	u32			val;
+-	struct tracer_opt 	*opts;
++	struct tracer_opt	*opts;
+ };
+ 
+ /* Makes more easy to define a tracer opt */
+ #define TRACER_OPT(s, b)	.name = #s, .bit = b
+ 
+-/*
+- * A specific tracer, represented by methods that operate on a trace array:
++
++/**
++ * struct tracer - a specific tracer and its callbacks to interact with debugfs
++ * @name: the name chosen to select it on the available_tracers file
++ * @init: called when one switches to this tracer (echo name > current_tracer)
++ * @reset: called when one switches to another tracer
++ * @start: called when tracing is unpaused (echo 1 > tracing_enabled)
++ * @stop: called when tracing is paused (echo 0 > tracing_enabled)
++ * @open: called when the trace file is opened
++ * @pipe_open: called when the trace_pipe file is opened
++ * @wait_pipe: override how the user waits for traces on trace_pipe
++ * @close: called when the trace file is released
++ * @read: override the default read callback on trace_pipe
++ * @splice_read: override the default splice_read callback on trace_pipe
++ * @selftest: selftest to run on boot (see trace_selftest.c)
++ * @print_headers: override the first lines that describe your columns
++ * @print_line: callback that prints a trace
++ * @set_flag: signals one of your private flags changed (trace_options file)
++ * @flags: your private flags
+  */
+ struct tracer {
+ 	const char		*name;
+-	/* Your tracer should raise a warning if init fails */
+ 	int			(*init)(struct trace_array *tr);
+ 	void			(*reset)(struct trace_array *tr);
+ 	void			(*start)(struct trace_array *tr);
+ 	void			(*stop)(struct trace_array *tr);
+ 	void			(*open)(struct trace_iterator *iter);
+ 	void			(*pipe_open)(struct trace_iterator *iter);
++	void			(*wait_pipe)(struct trace_iterator *iter);
+ 	void			(*close)(struct trace_iterator *iter);
+ 	ssize_t			(*read)(struct trace_iterator *iter,
+ 					struct file *filp, char __user *ubuf,
+ 					size_t cnt, loff_t *ppos);
++	ssize_t			(*splice_read)(struct trace_iterator *iter,
++					       struct file *filp,
++					       loff_t *ppos,
++					       struct pipe_inode_info *pipe,
++					       size_t len,
++					       unsigned int flags);
+ #ifdef CONFIG_FTRACE_STARTUP_TEST
+ 	int			(*selftest)(struct tracer *trace,
+ 					    struct trace_array *tr);
+@@ -339,7 +419,8 @@ struct tracer {
+ 	int			(*set_flag)(u32 old_flags, u32 bit, int set);
+ 	struct tracer		*next;
+ 	int			print_max;
+-	struct tracer_flags 	*flags;
++	struct tracer_flags	*flags;
++	struct tracer_stat	*stats;
+ };
+ 
+ struct trace_seq {
+@@ -348,6 +429,16 @@ struct trace_seq {
+ 	unsigned int		readpos;
+ };
+ 
++static inline void
++trace_seq_init(struct trace_seq *s)
++{
++	s->len = 0;
++	s->readpos = 0;
++}
++
++
++#define TRACE_PIPE_ALL_CPU	-1
++
+ /*
+  * Trace iterator - used by printout routines who present trace
+  * results to users and which routines might sleep, etc:
+@@ -356,6 +447,8 @@ struct trace_iterator {
+ 	struct trace_array	*tr;
+ 	struct tracer		*trace;
+ 	void			*private;
++	int			cpu_file;
++	struct mutex		mutex;
+ 	struct ring_buffer_iter	*buffer_iter[NR_CPUS];
+ 
+ 	/* The below is zeroed out in pipe_read */
+@@ -371,6 +464,7 @@ struct trace_iterator {
+ 	cpumask_var_t		started;
+ };
+ 
++int tracer_init(struct tracer *t, struct trace_array *tr);
+ int tracing_is_enabled(void);
+ void trace_wake_up(void);
+ void tracing_reset(struct trace_array *tr, int cpu);
+@@ -379,26 +473,50 @@ int tracing_open_generic(struct inode *i
+ struct dentry *tracing_init_dentry(void);
+ void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
+ 
++struct ring_buffer_event;
++
++struct ring_buffer_event *trace_buffer_lock_reserve(struct trace_array *tr,
++						    unsigned char type,
++						    unsigned long len,
++						    unsigned long flags,
++						    int pc);
++void trace_buffer_unlock_commit(struct trace_array *tr,
++				struct ring_buffer_event *event,
++				unsigned long flags, int pc);
++
++struct ring_buffer_event *
++trace_current_buffer_lock_reserve(unsigned char type, unsigned long len,
++				  unsigned long flags, int pc);
++void trace_current_buffer_unlock_commit(struct ring_buffer_event *event,
++					unsigned long flags, int pc);
++void trace_nowake_buffer_unlock_commit(struct ring_buffer_event *event,
++					unsigned long flags, int pc);
++
+ struct trace_entry *tracing_get_trace_entry(struct trace_array *tr,
+ 						struct trace_array_cpu *data);
++
++struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
++					  int *ent_cpu, u64 *ent_ts);
++
+ void tracing_generic_entry_update(struct trace_entry *entry,
+ 				  unsigned long flags,
+ 				  int pc);
+ 
++void default_wait_pipe(struct trace_iterator *iter);
++void poll_wait_pipe(struct trace_iterator *iter);
++
+ void ftrace(struct trace_array *tr,
+ 			    struct trace_array_cpu *data,
+ 			    unsigned long ip,
+ 			    unsigned long parent_ip,
+ 			    unsigned long flags, int pc);
+ void tracing_sched_switch_trace(struct trace_array *tr,
+-				struct trace_array_cpu *data,
+ 				struct task_struct *prev,
+ 				struct task_struct *next,
+ 				unsigned long flags, int pc);
+ void tracing_record_cmdline(struct task_struct *tsk);
+ 
+ void tracing_sched_wakeup_trace(struct trace_array *tr,
+-				struct trace_array_cpu *data,
+ 				struct task_struct *wakee,
+ 				struct task_struct *cur,
+ 				unsigned long flags, int pc);
+@@ -408,14 +526,12 @@ void trace_special(struct trace_array *t
+ 		   unsigned long arg2,
+ 		   unsigned long arg3, int pc);
+ void trace_function(struct trace_array *tr,
+-		    struct trace_array_cpu *data,
+ 		    unsigned long ip,
+ 		    unsigned long parent_ip,
+ 		    unsigned long flags, int pc);
+ 
+ void trace_graph_return(struct ftrace_graph_ret *trace);
+ int trace_graph_entry(struct ftrace_graph_ent *trace);
+-void trace_hw_branch(struct trace_array *tr, u64 from, u64 to);
+ 
+ void tracing_start_cmdline_record(void);
+ void tracing_stop_cmdline_record(void);
+@@ -434,15 +550,11 @@ void update_max_tr(struct trace_array *t
+ void update_max_tr_single(struct trace_array *tr,
+ 			  struct task_struct *tsk, int cpu);
+ 
+-extern cycle_t ftrace_now(int cpu);
++void __trace_stack(struct trace_array *tr,
++		   unsigned long flags,
++		   int skip, int pc);
+ 
+-#ifdef CONFIG_FUNCTION_TRACER
+-void tracing_start_function_trace(void);
+-void tracing_stop_function_trace(void);
+-#else
+-# define tracing_start_function_trace()		do { } while (0)
+-# define tracing_stop_function_trace()		do { } while (0)
+-#endif
++extern cycle_t ftrace_now(int cpu);
+ 
+ #ifdef CONFIG_CONTEXT_SWITCH_TRACER
+ typedef void
+@@ -456,10 +568,10 @@ struct tracer_switch_ops {
+ 	void				*private;
+ 	struct tracer_switch_ops	*next;
+ };
+-
+-char *trace_find_cmdline(int pid);
+ #endif /* CONFIG_CONTEXT_SWITCH_TRACER */
+ 
++extern void trace_find_cmdline(int pid, char comm[]);
++
+ #ifdef CONFIG_DYNAMIC_FTRACE
+ extern unsigned long ftrace_update_tot_cnt;
+ #define DYN_FTRACE_TEST_NAME trace_selftest_dynamic_test_func
+@@ -469,6 +581,8 @@ extern int DYN_FTRACE_TEST_NAME(void);
+ #ifdef CONFIG_FTRACE_STARTUP_TEST
+ extern int trace_selftest_startup_function(struct tracer *trace,
+ 					   struct trace_array *tr);
++extern int trace_selftest_startup_function_graph(struct tracer *trace,
++						 struct trace_array *tr);
+ extern int trace_selftest_startup_irqsoff(struct tracer *trace,
+ 					  struct trace_array *tr);
+ extern int trace_selftest_startup_preemptoff(struct tracer *trace,
+@@ -488,24 +602,19 @@ extern int trace_selftest_startup_branch
+ #endif /* CONFIG_FTRACE_STARTUP_TEST */
+ 
+ extern void *head_page(struct trace_array_cpu *data);
+-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
+-extern void trace_seq_print_cont(struct trace_seq *s,
+-				 struct trace_iterator *iter);
+-
+-extern int
+-seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
+-		unsigned long sym_flags);
+-extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
+-				 size_t cnt);
+ extern long ns2usecs(cycle_t nsec);
+ extern int
+-trace_vprintk(unsigned long ip, int depth, const char *fmt, va_list args);
++trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
++extern int
++trace_vprintk(unsigned long ip, const char *fmt, va_list args);
+ 
+ extern unsigned long trace_flags;
+ 
+ /* Standard output formatting function used for function return traces */
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ extern enum print_line_t print_graph_function(struct trace_iterator *iter);
++extern enum print_line_t
++trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
+ 
+ #ifdef CONFIG_DYNAMIC_FTRACE
+ /* TODO: make this variable */
+@@ -537,7 +646,6 @@ static inline int ftrace_graph_addr(unsi
+ 	return 1;
+ }
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+-
+ #else /* CONFIG_FUNCTION_GRAPH_TRACER */
+ static inline enum print_line_t
+ print_graph_function(struct trace_iterator *iter)
+@@ -580,7 +688,12 @@ enum trace_iterator_flags {
+ 	TRACE_ITER_ANNOTATE		= 0x2000,
+ 	TRACE_ITER_USERSTACKTRACE       = 0x4000,
+ 	TRACE_ITER_SYM_USEROBJ          = 0x8000,
+-	TRACE_ITER_PRINTK_MSGONLY	= 0x10000
++	TRACE_ITER_PRINTK_MSGONLY	= 0x10000,
++	TRACE_ITER_CONTEXT_INFO		= 0x20000, /* Print pid/cpu/time */
++	TRACE_ITER_LATENCY_FMT		= 0x40000,
++	TRACE_ITER_GLOBAL_CLK		= 0x80000,
++	TRACE_ITER_SLEEP_TIME		= 0x100000,
++	TRACE_ITER_GRAPH_TIME		= 0x200000,
+ };
+ 
+ /*
+@@ -601,12 +714,12 @@ extern struct tracer nop_trace;
+  * preempt_enable (after a disable), a schedule might take place
+  * causing an infinite recursion.
+  *
+- * To prevent this, we read the need_recshed flag before
++ * To prevent this, we read the need_resched flag before
+  * disabling preemption. When we want to enable preemption we
+  * check the flag, if it is set, then we call preempt_enable_no_resched.
+  * Otherwise, we call preempt_enable.
+  *
+- * The rational for doing the above is that if need resched is set
++ * The rational for doing the above is that if need_resched is set
+  * and we have yet to reschedule, we are either in an atomic location
+  * (where we do not need to check for scheduling) or we are inside
+  * the scheduler and do not want to resched.
+@@ -627,7 +740,7 @@ static inline int ftrace_preempt_disable
+  *
+  * This is a scheduler safe way to enable preemption and not miss
+  * any preemption checks. The disabled saved the state of preemption.
+- * If resched is set, then we were either inside an atomic or
++ * If resched is set, then we are either inside an atomic or
+  * are inside the scheduler (we would have already scheduled
+  * otherwise). In this case, we do not want to call normal
+  * preempt_enable, but preempt_enable_no_resched instead.
+@@ -664,4 +777,118 @@ static inline void trace_branch_disable(
+ }
+ #endif /* CONFIG_BRANCH_TRACER */
+ 
++/* set ring buffers to default size if not already done so */
++int tracing_update_buffers(void);
++
++/* trace event type bit fields, not numeric */
++enum {
++	TRACE_EVENT_TYPE_PRINTF		= 1,
++	TRACE_EVENT_TYPE_RAW		= 2,
++};
++
++struct ftrace_event_field {
++	struct list_head	link;
++	char			*name;
++	char			*type;
++	int			offset;
++	int			size;
++};
++
++struct ftrace_event_call {
++	char			*name;
++	char			*system;
++	struct dentry		*dir;
++	int			enabled;
++	int			(*regfunc)(void);
++	void			(*unregfunc)(void);
++	int			id;
++	int			(*raw_init)(void);
++	int			(*show_format)(struct trace_seq *s);
++	int			(*define_fields)(void);
++	struct list_head	fields;
++	struct filter_pred	**preds;
++
++#ifdef CONFIG_EVENT_PROFILE
++	atomic_t	profile_count;
++	int		(*profile_enable)(struct ftrace_event_call *);
++	void		(*profile_disable)(struct ftrace_event_call *);
++#endif
++};
++
++struct event_subsystem {
++	struct list_head	list;
++	const char		*name;
++	struct dentry		*entry;
++	struct filter_pred	**preds;
++};
++
++#define events_for_each(event)						\
++	for (event = __start_ftrace_events;				\
++	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
++	     event++)
++
++#define MAX_FILTER_PRED 8
++
++struct filter_pred;
++
++typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
++
++struct filter_pred {
++	filter_pred_fn_t fn;
++	u64 val;
++	char *str_val;
++	int str_len;
++	char *field_name;
++	int offset;
++	int not;
++	int or;
++	int compound;
++	int clear;
++};
++
++int trace_define_field(struct ftrace_event_call *call, char *type,
++		       char *name, int offset, int size);
++extern void filter_free_pred(struct filter_pred *pred);
++extern void filter_print_preds(struct filter_pred **preds,
++			       struct trace_seq *s);
++extern int filter_parse(char **pbuf, struct filter_pred *pred);
++extern int filter_add_pred(struct ftrace_event_call *call,
++			   struct filter_pred *pred);
++extern void filter_free_preds(struct ftrace_event_call *call);
++extern int filter_match_preds(struct ftrace_event_call *call, void *rec);
++extern void filter_free_subsystem_preds(struct event_subsystem *system);
++extern int filter_add_subsystem_pred(struct event_subsystem *system,
++				     struct filter_pred *pred);
++
++void event_trace_printk(unsigned long ip, const char *fmt, ...);
++extern struct ftrace_event_call __start_ftrace_events[];
++extern struct ftrace_event_call __stop_ftrace_events[];
++
++#define for_each_event(event)						\
++	for (event = __start_ftrace_events;				\
++	     (unsigned long)event < (unsigned long)__stop_ftrace_events; \
++	     event++)
++
++extern const char *__start___trace_bprintk_fmt[];
++extern const char *__stop___trace_bprintk_fmt[];
++
++/*
++ * The double __builtin_constant_p is because gcc will give us an error
++ * if we try to allocate the static variable to fmt if it is not a
++ * constant. Even with the outer if statement optimizing out.
++ */
++#define event_trace_printk(ip, fmt, args...)				\
++do {									\
++	__trace_printk_check_format(fmt, ##args);			\
++	tracing_record_cmdline(current);				\
++	if (__builtin_constant_p(fmt)) {				\
++		static const char *trace_printk_fmt			\
++		  __attribute__((section("__trace_printk_fmt"))) =	\
++			__builtin_constant_p(fmt) ? fmt : NULL;		\
++									\
++		__trace_bprintk(ip, trace_printk_fmt, ##args);		\
++	} else								\
++		__trace_printk(ip, fmt, ##args);			\
++} while (0)
++
+ #endif /* _LINUX_KERNEL_TRACE_H */
+Index: linux-2.6-tip/kernel/trace/trace_boot.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_boot.c
++++ linux-2.6-tip/kernel/trace/trace_boot.c
+@@ -11,6 +11,7 @@
+ #include <linux/kallsyms.h>
+ 
+ #include "trace.h"
++#include "trace_output.h"
+ 
+ static struct trace_array *boot_trace;
+ static bool pre_initcalls_finished;
+@@ -27,13 +28,13 @@ void start_boot_trace(void)
+ 
+ void enable_boot_trace(void)
+ {
+-	if (pre_initcalls_finished)
++	if (boot_trace && pre_initcalls_finished)
+ 		tracing_start_sched_switch_record();
+ }
+ 
+ void disable_boot_trace(void)
+ {
+-	if (pre_initcalls_finished)
++	if (boot_trace && pre_initcalls_finished)
+ 		tracing_stop_sched_switch_record();
+ }
+ 
+@@ -42,6 +43,9 @@ static int boot_trace_init(struct trace_
+ 	int cpu;
+ 	boot_trace = tr;
+ 
++	if (!tr)
++		return 0;
++
+ 	for_each_cpu(cpu, cpu_possible_mask)
+ 		tracing_reset(tr, cpu);
+ 
+@@ -128,10 +132,9 @@ void trace_boot_call(struct boot_trace_c
+ {
+ 	struct ring_buffer_event *event;
+ 	struct trace_boot_call *entry;
+-	unsigned long irq_flags;
+ 	struct trace_array *tr = boot_trace;
+ 
+-	if (!pre_initcalls_finished)
++	if (!tr || !pre_initcalls_finished)
+ 		return;
+ 
+ 	/* Get its name now since this function could
+@@ -140,18 +143,13 @@ void trace_boot_call(struct boot_trace_c
+ 	sprint_symbol(bt->func, (unsigned long)fn);
+ 	preempt_disable();
+ 
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					 &irq_flags);
++	event = trace_buffer_lock_reserve(tr, TRACE_BOOT_CALL,
++					  sizeof(*entry), 0, 0);
+ 	if (!event)
+ 		goto out;
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, 0, 0);
+-	entry->ent.type = TRACE_BOOT_CALL;
+ 	entry->boot_call = *bt;
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+-
+-	trace_wake_up();
+-
++	trace_buffer_unlock_commit(tr, event, 0, 0);
+  out:
+ 	preempt_enable();
+ }
+@@ -160,27 +158,21 @@ void trace_boot_ret(struct boot_trace_re
+ {
+ 	struct ring_buffer_event *event;
+ 	struct trace_boot_ret *entry;
+-	unsigned long irq_flags;
+ 	struct trace_array *tr = boot_trace;
+ 
+-	if (!pre_initcalls_finished)
++	if (!tr || !pre_initcalls_finished)
+ 		return;
+ 
+ 	sprint_symbol(bt->func, (unsigned long)fn);
+ 	preempt_disable();
+ 
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					 &irq_flags);
++	event = trace_buffer_lock_reserve(tr, TRACE_BOOT_RET,
++					  sizeof(*entry), 0, 0);
+ 	if (!event)
+ 		goto out;
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, 0, 0);
+-	entry->ent.type = TRACE_BOOT_RET;
+ 	entry->boot_ret = *bt;
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+-
+-	trace_wake_up();
+-
++	trace_buffer_unlock_commit(tr, event, 0, 0);
+  out:
+ 	preempt_enable();
+ }
+Index: linux-2.6-tip/kernel/trace/trace_branch.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_branch.c
++++ linux-2.6-tip/kernel/trace/trace_branch.c
+@@ -14,12 +14,17 @@
+ #include <linux/hash.h>
+ #include <linux/fs.h>
+ #include <asm/local.h>
++
+ #include "trace.h"
++#include "trace_stat.h"
++#include "trace_output.h"
+ 
+ #ifdef CONFIG_BRANCH_TRACER
+ 
++static struct tracer branch_trace;
+ static int branch_tracing_enabled __read_mostly;
+ static DEFINE_MUTEX(branch_tracing_mutex);
++
+ static struct trace_array *branch_tracer;
+ 
+ static void
+@@ -28,7 +33,7 @@ probe_likely_condition(struct ftrace_bra
+ 	struct trace_array *tr = branch_tracer;
+ 	struct ring_buffer_event *event;
+ 	struct trace_branch *entry;
+-	unsigned long flags, irq_flags;
++	unsigned long flags;
+ 	int cpu, pc;
+ 	const char *p;
+ 
+@@ -47,15 +52,13 @@ probe_likely_condition(struct ftrace_bra
+ 	if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
+ 		goto out;
+ 
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					 &irq_flags);
++	pc = preempt_count();
++	event = trace_buffer_lock_reserve(tr, TRACE_BRANCH,
++					  sizeof(*entry), flags, pc);
+ 	if (!event)
+ 		goto out;
+ 
+-	pc = preempt_count();
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, flags, pc);
+-	entry->ent.type		= TRACE_BRANCH;
+ 
+ 	/* Strip off the path, only save the file */
+ 	p = f->file + strlen(f->file);
+@@ -70,7 +73,7 @@ probe_likely_condition(struct ftrace_bra
+ 	entry->line = f->line;
+ 	entry->correct = val == expect;
+ 
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
++	ring_buffer_unlock_commit(tr->buffer, event);
+ 
+  out:
+ 	atomic_dec(&tr->data[cpu]->disabled);
+@@ -88,8 +91,6 @@ void trace_likely_condition(struct ftrac
+ 
+ int enable_branch_tracing(struct trace_array *tr)
+ {
+-	int ret = 0;
+-
+ 	mutex_lock(&branch_tracing_mutex);
+ 	branch_tracer = tr;
+ 	/*
+@@ -100,7 +101,7 @@ int enable_branch_tracing(struct trace_a
+ 	branch_tracing_enabled++;
+ 	mutex_unlock(&branch_tracing_mutex);
+ 
+-	return ret;
++	return 0;
+ }
+ 
+ void disable_branch_tracing(void)
+@@ -128,11 +129,6 @@ static void stop_branch_trace(struct tra
+ 
+ static int branch_trace_init(struct trace_array *tr)
+ {
+-	int cpu;
+-
+-	for_each_online_cpu(cpu)
+-		tracing_reset(tr, cpu);
+-
+ 	start_branch_trace(tr);
+ 	return 0;
+ }
+@@ -142,22 +138,53 @@ static void branch_trace_reset(struct tr
+ 	stop_branch_trace(tr);
+ }
+ 
+-struct tracer branch_trace __read_mostly =
++static enum print_line_t trace_branch_print(struct trace_iterator *iter,
++					    int flags)
++{
++	struct trace_branch *field;
++
++	trace_assign_type(field, iter->ent);
++
++	if (trace_seq_printf(&iter->seq, "[%s] %s:%s:%d\n",
++			     field->correct ? "  ok  " : " MISS ",
++			     field->func,
++			     field->file,
++			     field->line))
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++
++static struct trace_event trace_branch_event = {
++	.type		= TRACE_BRANCH,
++	.trace		= trace_branch_print,
++};
++
++static struct tracer branch_trace __read_mostly =
+ {
+ 	.name		= "branch",
+ 	.init		= branch_trace_init,
+ 	.reset		= branch_trace_reset,
+ #ifdef CONFIG_FTRACE_SELFTEST
+ 	.selftest	= trace_selftest_startup_branch,
+-#endif
++#endif /* CONFIG_FTRACE_SELFTEST */
+ };
+ 
+-__init static int init_branch_trace(void)
++__init static int init_branch_tracer(void)
+ {
++	int ret;
++
++	ret = register_ftrace_event(&trace_branch_event);
++	if (!ret) {
++		printk(KERN_WARNING "Warning: could not register "
++				    "branch events\n");
++		return 1;
++	}
+ 	return register_tracer(&branch_trace);
+ }
++device_initcall(init_branch_tracer);
+ 
+-device_initcall(init_branch_trace);
+ #else
+ static inline
+ void trace_likely_condition(struct ftrace_branch_data *f, int val, int expect)
+@@ -183,66 +210,39 @@ void ftrace_likely_update(struct ftrace_
+ }
+ EXPORT_SYMBOL(ftrace_likely_update);
+ 
+-struct ftrace_pointer {
+-	void		*start;
+-	void		*stop;
+-	int		hit;
+-};
++extern unsigned long __start_annotated_branch_profile[];
++extern unsigned long __stop_annotated_branch_profile[];
+ 
+-static void *
+-t_next(struct seq_file *m, void *v, loff_t *pos)
++static int annotated_branch_stat_headers(struct seq_file *m)
+ {
+-	const struct ftrace_pointer *f = m->private;
+-	struct ftrace_branch_data *p = v;
+-
+-	(*pos)++;
+-
+-	if (v == (void *)1)
+-		return f->start;
+-
+-	++p;
+-
+-	if ((void *)p >= (void *)f->stop)
+-		return NULL;
+-
+-	return p;
++	seq_printf(m, " correct incorrect  %% ");
++	seq_printf(m, "       Function                "
++			      "  File              Line\n"
++			      " ------- ---------  - "
++			      "       --------                "
++			      "  ----              ----\n");
++	return 0;
+ }
+ 
+-static void *t_start(struct seq_file *m, loff_t *pos)
++static inline long get_incorrect_percent(struct ftrace_branch_data *p)
+ {
+-	void *t = (void *)1;
+-	loff_t l = 0;
+-
+-	for (; t && l < *pos; t = t_next(m, t, &l))
+-		;
++	long percent;
+ 
+-	return t;
+-}
++	if (p->correct) {
++		percent = p->incorrect * 100;
++		percent /= p->correct + p->incorrect;
++	} else
++		percent = p->incorrect ? 100 : -1;
+ 
+-static void t_stop(struct seq_file *m, void *p)
+-{
++	return percent;
+ }
+ 
+-static int t_show(struct seq_file *m, void *v)
++static int branch_stat_show(struct seq_file *m, void *v)
+ {
+-	const struct ftrace_pointer *fp = m->private;
+ 	struct ftrace_branch_data *p = v;
+ 	const char *f;
+ 	long percent;
+ 
+-	if (v == (void *)1) {
+-		if (fp->hit)
+-			seq_printf(m, "   miss      hit    %% ");
+-		else
+-			seq_printf(m, " correct incorrect  %% ");
+-		seq_printf(m, "       Function                "
+-			      "  File              Line\n"
+-			      " ------- ---------  - "
+-			      "       --------                "
+-			      "  ----              ----\n");
+-		return 0;
+-	}
+-
+ 	/* Only print the file, not the path */
+ 	f = p->file + strlen(p->file);
+ 	while (f >= p->file && *f != '/')
+@@ -252,11 +252,7 @@ static int t_show(struct seq_file *m, vo
+ 	/*
+ 	 * The miss is overlayed on correct, and hit on incorrect.
+ 	 */
+-	if (p->correct) {
+-		percent = p->incorrect * 100;
+-		percent /= p->correct + p->incorrect;
+-	} else
+-		percent = p->incorrect ? 100 : -1;
++	percent = get_incorrect_percent(p);
+ 
+ 	seq_printf(m, "%8lu %8lu ",  p->correct, p->incorrect);
+ 	if (percent < 0)
+@@ -267,76 +263,118 @@ static int t_show(struct seq_file *m, vo
+ 	return 0;
+ }
+ 
+-static struct seq_operations tracing_likely_seq_ops = {
+-	.start		= t_start,
+-	.next		= t_next,
+-	.stop		= t_stop,
+-	.show		= t_show,
++static void *annotated_branch_stat_start(void)
++{
++	return __start_annotated_branch_profile;
++}
++
++static void *
++annotated_branch_stat_next(void *v, int idx)
++{
++	struct ftrace_branch_data *p = v;
++
++	++p;
++
++	if ((void *)p >= (void *)__stop_annotated_branch_profile)
++		return NULL;
++
++	return p;
++}
++
++static int annotated_branch_stat_cmp(void *p1, void *p2)
++{
++	struct ftrace_branch_data *a = p1;
++	struct ftrace_branch_data *b = p2;
++
++	long percent_a, percent_b;
++
++	percent_a = get_incorrect_percent(a);
++	percent_b = get_incorrect_percent(b);
++
++	if (percent_a < percent_b)
++		return -1;
++	if (percent_a > percent_b)
++		return 1;
++	else
++		return 0;
++}
++
++static struct tracer_stat annotated_branch_stats = {
++	.name = "branch_annotated",
++	.stat_start = annotated_branch_stat_start,
++	.stat_next = annotated_branch_stat_next,
++	.stat_cmp = annotated_branch_stat_cmp,
++	.stat_headers = annotated_branch_stat_headers,
++	.stat_show = branch_stat_show
+ };
+ 
+-static int tracing_branch_open(struct inode *inode, struct file *file)
++__init static int init_annotated_branch_stats(void)
+ {
+ 	int ret;
+ 
+-	ret = seq_open(file, &tracing_likely_seq_ops);
++	ret = register_stat_tracer(&annotated_branch_stats);
+ 	if (!ret) {
+-		struct seq_file *m = file->private_data;
+-		m->private = (void *)inode->i_private;
++		printk(KERN_WARNING "Warning: could not register "
++				    "annotated branches stats\n");
++		return 1;
+ 	}
+-
+-	return ret;
++	return 0;
+ }
+-
+-static const struct file_operations tracing_branch_fops = {
+-	.open		= tracing_branch_open,
+-	.read		= seq_read,
+-	.llseek		= seq_lseek,
+-};
++fs_initcall(init_annotated_branch_stats);
+ 
+ #ifdef CONFIG_PROFILE_ALL_BRANCHES
++
+ extern unsigned long __start_branch_profile[];
+ extern unsigned long __stop_branch_profile[];
+ 
+-static const struct ftrace_pointer ftrace_branch_pos = {
+-	.start			= __start_branch_profile,
+-	.stop			= __stop_branch_profile,
+-	.hit			= 1,
+-};
++static int all_branch_stat_headers(struct seq_file *m)
++{
++	seq_printf(m, "   miss      hit    %% ");
++	seq_printf(m, "       Function                "
++			      "  File              Line\n"
++			      " ------- ---------  - "
++			      "       --------                "
++			      "  ----              ----\n");
++	return 0;
++}
+ 
+-#endif /* CONFIG_PROFILE_ALL_BRANCHES */
++static void *all_branch_stat_start(void)
++{
++	return __start_branch_profile;
++}
+ 
+-extern unsigned long __start_annotated_branch_profile[];
+-extern unsigned long __stop_annotated_branch_profile[];
++static void *
++all_branch_stat_next(void *v, int idx)
++{
++	struct ftrace_branch_data *p = v;
+ 
+-static const struct ftrace_pointer ftrace_annotated_branch_pos = {
+-	.start			= __start_annotated_branch_profile,
+-	.stop			= __stop_annotated_branch_profile,
+-};
++	++p;
+ 
+-static __init int ftrace_branch_init(void)
+-{
+-	struct dentry *d_tracer;
+-	struct dentry *entry;
++	if ((void *)p >= (void *)__stop_branch_profile)
++		return NULL;
+ 
+-	d_tracer = tracing_init_dentry();
++	return p;
++}
+ 
+-	entry = debugfs_create_file("profile_annotated_branch", 0444, d_tracer,
+-				    (void *)&ftrace_annotated_branch_pos,
+-				    &tracing_branch_fops);
+-	if (!entry)
+-		pr_warning("Could not create debugfs "
+-			   "'profile_annotatet_branch' entry\n");
++static struct tracer_stat all_branch_stats = {
++	.name = "branch_all",
++	.stat_start = all_branch_stat_start,
++	.stat_next = all_branch_stat_next,
++	.stat_headers = all_branch_stat_headers,
++	.stat_show = branch_stat_show
++};
+ 
+-#ifdef CONFIG_PROFILE_ALL_BRANCHES
+-	entry = debugfs_create_file("profile_branch", 0444, d_tracer,
+-				    (void *)&ftrace_branch_pos,
+-				    &tracing_branch_fops);
+-	if (!entry)
+-		pr_warning("Could not create debugfs"
+-			   " 'profile_branch' entry\n");
+-#endif
++__init static int all_annotated_branch_stats(void)
++{
++	int ret;
+ 
++	ret = register_stat_tracer(&all_branch_stats);
++	if (!ret) {
++		printk(KERN_WARNING "Warning: could not register "
++				    "all branches stats\n");
++		return 1;
++	}
+ 	return 0;
+ }
+-
+-device_initcall(ftrace_branch_init);
++fs_initcall(all_annotated_branch_stats);
++#endif /* CONFIG_PROFILE_ALL_BRANCHES */
+Index: linux-2.6-tip/kernel/trace/trace_clock.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_clock.c
+@@ -0,0 +1,109 @@
++/*
++ * tracing clocks
++ *
++ *  Copyright (C) 2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
++ *
++ * Implements 3 trace clock variants, with differing scalability/precision
++ * tradeoffs:
++ *
++ *  -   local: CPU-local trace clock
++ *  -  medium: scalable global clock with some jitter
++ *  -  global: globally monotonic, serialized clock
++ *
++ * Tracer plugins will chose a default from these clocks.
++ */
++#include <linux/spinlock.h>
++#include <linux/hardirq.h>
++#include <linux/module.h>
++#include <linux/percpu.h>
++#include <linux/sched.h>
++#include <linux/ktime.h>
++#include <linux/trace_clock.h>
++
++/*
++ * trace_clock_local(): the simplest and least coherent tracing clock.
++ *
++ * Useful for tracing that does not cross to other CPUs nor
++ * does it go through idle events.
++ */
++u64 notrace trace_clock_local(void)
++{
++	unsigned long flags;
++	u64 clock;
++
++	/*
++	 * sched_clock() is an architecture implemented, fast, scalable,
++	 * lockless clock. It is not guaranteed to be coherent across
++	 * CPUs, nor across CPU idle events.
++	 */
++	raw_local_irq_save(flags);
++	clock = sched_clock();
++	raw_local_irq_restore(flags);
++
++	return clock;
++}
++
++/*
++ * trace_clock(): 'inbetween' trace clock. Not completely serialized,
++ * but not completely incorrect when crossing CPUs either.
++ *
++ * This is based on cpu_clock(), which will allow at most ~1 jiffy of
++ * jitter between CPUs. So it's a pretty scalable clock, but there
++ * can be offsets in the trace data.
++ */
++u64 notrace trace_clock(void)
++{
++	return cpu_clock(raw_smp_processor_id());
++}
++
++
++/*
++ * trace_clock_global(): special globally coherent trace clock
++ *
++ * It has higher overhead than the other trace clocks but is still
++ * an order of magnitude faster than GTOD derived hardware clocks.
++ *
++ * Used by plugins that need globally coherent timestamps.
++ */
++
++static u64 prev_trace_clock_time;
++
++static __raw_spinlock_t trace_clock_lock ____cacheline_aligned_in_smp =
++	__RAW_SPIN_LOCK_UNLOCKED;
++
++u64 notrace trace_clock_global(void)
++{
++	unsigned long flags;
++	int this_cpu;
++	u64 now;
++
++	raw_local_irq_save(flags);
++
++	this_cpu = raw_smp_processor_id();
++	now = cpu_clock(this_cpu);
++	/*
++	 * If in an NMI context then dont risk lockups and return the
++	 * cpu_clock() time:
++	 */
++	if (unlikely(in_nmi()))
++		goto out;
++
++	__raw_spin_lock(&trace_clock_lock);
++
++	/*
++	 * TODO: if this happens often then maybe we should reset
++	 * my_scd->clock to prev_trace_clock_time+1, to make sure
++	 * we start ticking with the local clock from now on?
++	 */
++	if ((s64)(now - prev_trace_clock_time) < 0)
++		now = prev_trace_clock_time + 1;
++
++	prev_trace_clock_time = now;
++
++	__raw_spin_unlock(&trace_clock_lock);
++
++ out:
++	raw_local_irq_restore(flags);
++
++	return now;
++}
+Index: linux-2.6-tip/kernel/trace/trace_event_profile.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_event_profile.c
+@@ -0,0 +1,31 @@
++/*
++ * trace event based perf counter profiling
++ *
++ * Copyright (C) 2009 Red Hat Inc, Peter Zijlstra <pzijlstr@redhat.com>
++ *
++ */
++
++#include "trace.h"
++
++int ftrace_profile_enable(int event_id)
++{
++	struct ftrace_event_call *event;
++
++	for_each_event(event) {
++		if (event->id == event_id)
++			return event->profile_enable(event);
++	}
++
++	return -EINVAL;
++}
++
++void ftrace_profile_disable(int event_id)
++{
++	struct ftrace_event_call *event;
++
++	for_each_event(event) {
++		if (event->id == event_id)
++			return event->profile_disable(event);
++	}
++}
++
+Index: linux-2.6-tip/kernel/trace/trace_event_types.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_event_types.h
+@@ -0,0 +1,173 @@
++#undef TRACE_SYSTEM
++#define TRACE_SYSTEM	ftrace
++
++/*
++ * We cheat and use the proto type field as the ID
++ * and args as the entry type (minus 'struct')
++ */
++TRACE_EVENT_FORMAT(function, TRACE_FN, ftrace_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned long, ip, ip)
++		TRACE_FIELD(unsigned long, parent_ip, parent_ip)
++	),
++	TP_RAW_FMT(" %lx <-- %lx")
++);
++
++TRACE_EVENT_FORMAT(funcgraph_entry, TRACE_GRAPH_ENT,
++		   ftrace_graph_ent_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned long, graph_ent.func, func)
++		TRACE_FIELD(int, graph_ent.depth, depth)
++	),
++	TP_RAW_FMT("--> %lx (%d)")
++);
++
++TRACE_EVENT_FORMAT(funcgraph_exit, TRACE_GRAPH_RET,
++		   ftrace_graph_ret_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned long, ret.func, func)
++		TRACE_FIELD(int, ret.depth, depth)
++	),
++	TP_RAW_FMT("<-- %lx (%d)")
++);
++
++TRACE_EVENT_FORMAT(wakeup, TRACE_WAKE, ctx_switch_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned int, prev_pid, prev_pid)
++		TRACE_FIELD(unsigned char, prev_prio, prev_prio)
++		TRACE_FIELD(unsigned char, prev_state, prev_state)
++		TRACE_FIELD(unsigned int, next_pid, next_pid)
++		TRACE_FIELD(unsigned char, next_prio, next_prio)
++		TRACE_FIELD(unsigned char, next_state, next_state)
++		TRACE_FIELD(unsigned int, next_cpu, next_cpu)
++	),
++	TP_RAW_FMT("%u:%u:%u  ==+ %u:%u:%u [%03u]")
++);
++
++TRACE_EVENT_FORMAT(context_switch, TRACE_CTX, ctx_switch_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned int, prev_pid, prev_pid)
++		TRACE_FIELD(unsigned char, prev_prio, prev_prio)
++		TRACE_FIELD(unsigned char, prev_state, prev_state)
++		TRACE_FIELD(unsigned int, next_pid, next_pid)
++		TRACE_FIELD(unsigned char, next_prio, next_prio)
++		TRACE_FIELD(unsigned char, next_state, next_state)
++		TRACE_FIELD(unsigned int, next_cpu, next_cpu)
++	),
++	TP_RAW_FMT("%u:%u:%u  ==+ %u:%u:%u [%03u]")
++);
++
++TRACE_EVENT_FORMAT(special, TRACE_SPECIAL, special_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned long, arg1, arg1)
++		TRACE_FIELD(unsigned long, arg2, arg2)
++		TRACE_FIELD(unsigned long, arg3, arg3)
++	),
++	TP_RAW_FMT("(%08lx) (%08lx) (%08lx)")
++);
++
++/*
++ * Stack-trace entry:
++ */
++
++/* #define FTRACE_STACK_ENTRIES   8 */
++
++TRACE_EVENT_FORMAT(kernel_stack, TRACE_STACK, stack_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned long, caller[0], stack0)
++		TRACE_FIELD(unsigned long, caller[1], stack1)
++		TRACE_FIELD(unsigned long, caller[2], stack2)
++		TRACE_FIELD(unsigned long, caller[3], stack3)
++		TRACE_FIELD(unsigned long, caller[4], stack4)
++		TRACE_FIELD(unsigned long, caller[5], stack5)
++		TRACE_FIELD(unsigned long, caller[6], stack6)
++		TRACE_FIELD(unsigned long, caller[7], stack7)
++	),
++	TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
++		 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
++);
++
++TRACE_EVENT_FORMAT(user_stack, TRACE_USER_STACK, userstack_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned long, caller[0], stack0)
++		TRACE_FIELD(unsigned long, caller[1], stack1)
++		TRACE_FIELD(unsigned long, caller[2], stack2)
++		TRACE_FIELD(unsigned long, caller[3], stack3)
++		TRACE_FIELD(unsigned long, caller[4], stack4)
++		TRACE_FIELD(unsigned long, caller[5], stack5)
++		TRACE_FIELD(unsigned long, caller[6], stack6)
++		TRACE_FIELD(unsigned long, caller[7], stack7)
++	),
++	TP_RAW_FMT("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
++		 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n")
++);
++
++TRACE_EVENT_FORMAT(bprint, TRACE_BPRINT, bprint_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned long, ip, ip)
++		TRACE_FIELD(char *, fmt, fmt)
++		TRACE_FIELD_ZERO_CHAR(buf)
++	),
++	TP_RAW_FMT("%08lx (%d) fmt:%p %s")
++);
++
++TRACE_EVENT_FORMAT(print, TRACE_PRINT, print_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned long, ip, ip)
++		TRACE_FIELD_ZERO_CHAR(buf)
++	),
++	TP_RAW_FMT("%08lx (%d) fmt:%p %s")
++);
++
++TRACE_EVENT_FORMAT(branch, TRACE_BRANCH, trace_branch, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(unsigned int, line, line)
++		TRACE_FIELD_SPECIAL(char func[TRACE_FUNC_SIZE+1], func, func)
++		TRACE_FIELD_SPECIAL(char file[TRACE_FUNC_SIZE+1], file, file)
++		TRACE_FIELD(char, correct, correct)
++	),
++	TP_RAW_FMT("%u:%s:%s (%u)")
++);
++
++TRACE_EVENT_FORMAT(hw_branch, TRACE_HW_BRANCHES, hw_branch_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(u64, from, from)
++		TRACE_FIELD(u64, to, to)
++	),
++	TP_RAW_FMT("from: %llx to: %llx")
++);
++
++TRACE_EVENT_FORMAT(power, TRACE_POWER, trace_power, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(ktime_t, state_data.stamp, stamp)
++		TRACE_FIELD(ktime_t, state_data.end, end)
++		TRACE_FIELD(int, state_data.type, type)
++		TRACE_FIELD(int, state_data.state, state)
++	),
++	TP_RAW_FMT("%llx->%llx type:%u state:%u")
++);
++
++TRACE_EVENT_FORMAT(kmem_alloc, TRACE_KMEM_ALLOC, kmemtrace_alloc_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
++		TRACE_FIELD(unsigned long, call_site, call_site)
++		TRACE_FIELD(const void *, ptr, ptr)
++		TRACE_FIELD(size_t, bytes_req, bytes_req)
++		TRACE_FIELD(size_t, bytes_alloc, bytes_alloc)
++		TRACE_FIELD(gfp_t, gfp_flags, gfp_flags)
++		TRACE_FIELD(int, node, node)
++	),
++	TP_RAW_FMT("type:%u call_site:%lx ptr:%p req:%lu alloc:%lu"
++		 " flags:%x node:%d")
++);
++
++TRACE_EVENT_FORMAT(kmem_free, TRACE_KMEM_FREE, kmemtrace_free_entry, ignore,
++	TRACE_STRUCT(
++		TRACE_FIELD(enum kmemtrace_type_id, type_id, type_id)
++		TRACE_FIELD(unsigned long, call_site, call_site)
++		TRACE_FIELD(const void *, ptr, ptr)
++	),
++	TP_RAW_FMT("type:%u call_site:%lx ptr:%p")
++);
++
++#undef TRACE_SYSTEM
+Index: linux-2.6-tip/kernel/trace/trace_events.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_events.c
+@@ -0,0 +1,832 @@
++/*
++ * event tracer
++ *
++ * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
++ *
++ *  - Added format output of fields of the trace point.
++ *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
++ *
++ */
++
++#include <linux/debugfs.h>
++#include <linux/uaccess.h>
++#include <linux/module.h>
++#include <linux/ctype.h>
++
++#include "trace_output.h"
++
++#define TRACE_SYSTEM "TRACE_SYSTEM"
++
++static DEFINE_MUTEX(event_mutex);
++
++int trace_define_field(struct ftrace_event_call *call, char *type,
++		       char *name, int offset, int size)
++{
++	struct ftrace_event_field *field;
++
++	field = kzalloc(sizeof(*field), GFP_KERNEL);
++	if (!field)
++		goto err;
++
++	field->name = kstrdup(name, GFP_KERNEL);
++	if (!field->name)
++		goto err;
++
++	field->type = kstrdup(type, GFP_KERNEL);
++	if (!field->type)
++		goto err;
++
++	field->offset = offset;
++	field->size = size;
++	list_add(&field->link, &call->fields);
++
++	return 0;
++
++err:
++	if (field) {
++		kfree(field->name);
++		kfree(field->type);
++	}
++	kfree(field);
++
++	return -ENOMEM;
++}
++
++static void ftrace_clear_events(void)
++{
++	struct ftrace_event_call *call = (void *)__start_ftrace_events;
++
++
++	while ((unsigned long)call < (unsigned long)__stop_ftrace_events) {
++
++		if (call->enabled) {
++			call->enabled = 0;
++			call->unregfunc();
++		}
++		call++;
++	}
++}
++
++static void ftrace_event_enable_disable(struct ftrace_event_call *call,
++					int enable)
++{
++
++	switch (enable) {
++	case 0:
++		if (call->enabled) {
++			call->enabled = 0;
++			call->unregfunc();
++		}
++		break;
++	case 1:
++		if (!call->enabled) {
++			call->enabled = 1;
++			call->regfunc();
++		}
++		break;
++	}
++}
++
++static int ftrace_set_clr_event(char *buf, int set)
++{
++	struct ftrace_event_call *call = __start_ftrace_events;
++	char *event = NULL, *sub = NULL, *match;
++	int ret = -EINVAL;
++
++	/*
++	 * The buf format can be <subsystem>:<event-name>
++	 *  *:<event-name> means any event by that name.
++	 *  :<event-name> is the same.
++	 *
++	 *  <subsystem>:* means all events in that subsystem
++	 *  <subsystem>: means the same.
++	 *
++	 *  <name> (no ':') means all events in a subsystem with
++	 *  the name <name> or any event that matches <name>
++	 */
++
++	match = strsep(&buf, ":");
++	if (buf) {
++		sub = match;
++		event = buf;
++		match = NULL;
++
++		if (!strlen(sub) || strcmp(sub, "*") == 0)
++			sub = NULL;
++		if (!strlen(event) || strcmp(event, "*") == 0)
++			event = NULL;
++	}
++
++	mutex_lock(&event_mutex);
++	for_each_event(call) {
++
++		if (!call->name || !call->regfunc)
++			continue;
++
++		if (match &&
++		    strcmp(match, call->name) != 0 &&
++		    strcmp(match, call->system) != 0)
++			continue;
++
++		if (sub && strcmp(sub, call->system) != 0)
++			continue;
++
++		if (event && strcmp(event, call->name) != 0)
++			continue;
++
++		ftrace_event_enable_disable(call, set);
++
++		ret = 0;
++	}
++	mutex_unlock(&event_mutex);
++
++	return ret;
++}
++
++/* 128 should be much more than enough */
++#define EVENT_BUF_SIZE		127
++
++static ssize_t
++ftrace_event_write(struct file *file, const char __user *ubuf,
++		   size_t cnt, loff_t *ppos)
++{
++	size_t read = 0;
++	int i, set = 1;
++	ssize_t ret;
++	char *buf;
++	char ch;
++
++	if (!cnt || cnt < 0)
++		return 0;
++
++	ret = tracing_update_buffers();
++	if (ret < 0)
++		return ret;
++
++	ret = get_user(ch, ubuf++);
++	if (ret)
++		return ret;
++	read++;
++	cnt--;
++
++	/* skip white space */
++	while (cnt && isspace(ch)) {
++		ret = get_user(ch, ubuf++);
++		if (ret)
++			return ret;
++		read++;
++		cnt--;
++	}
++
++	/* Only white space found? */
++	if (isspace(ch)) {
++		file->f_pos += read;
++		ret = read;
++		return ret;
++	}
++
++	buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
++	if (!buf)
++		return -ENOMEM;
++
++	if (cnt > EVENT_BUF_SIZE)
++		cnt = EVENT_BUF_SIZE;
++
++	i = 0;
++	while (cnt && !isspace(ch)) {
++		if (!i && ch == '!')
++			set = 0;
++		else
++			buf[i++] = ch;
++
++		ret = get_user(ch, ubuf++);
++		if (ret)
++			goto out_free;
++		read++;
++		cnt--;
++	}
++	buf[i] = 0;
++
++	file->f_pos += read;
++
++	ret = ftrace_set_clr_event(buf, set);
++	if (ret)
++		goto out_free;
++
++	ret = read;
++
++ out_free:
++	kfree(buf);
++
++	return ret;
++}
++
++static void *
++t_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct ftrace_event_call *call = m->private;
++	struct ftrace_event_call *next = call;
++
++	(*pos)++;
++
++	for (;;) {
++		if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
++			return NULL;
++
++		/*
++		 * The ftrace subsystem is for showing formats only.
++		 * They can not be enabled or disabled via the event files.
++		 */
++		if (call->regfunc)
++			break;
++
++		call++;
++		next = call;
++	}
++
++	m->private = ++next;
++
++	return call;
++}
++
++static void *t_start(struct seq_file *m, loff_t *pos)
++{
++	return t_next(m, NULL, pos);
++}
++
++static void *
++s_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	struct ftrace_event_call *call = m->private;
++	struct ftrace_event_call *next;
++
++	(*pos)++;
++
++ retry:
++	if ((unsigned long)call >= (unsigned long)__stop_ftrace_events)
++		return NULL;
++
++	if (!call->enabled) {
++		call++;
++		goto retry;
++	}
++
++	next = call;
++	m->private = ++next;
++
++	return call;
++}
++
++static void *s_start(struct seq_file *m, loff_t *pos)
++{
++	return s_next(m, NULL, pos);
++}
++
++static int t_show(struct seq_file *m, void *v)
++{
++	struct ftrace_event_call *call = v;
++
++	if (strcmp(call->system, TRACE_SYSTEM) != 0)
++		seq_printf(m, "%s:", call->system);
++	seq_printf(m, "%s\n", call->name);
++
++	return 0;
++}
++
++static void t_stop(struct seq_file *m, void *p)
++{
++}
++
++static int
++ftrace_event_seq_open(struct inode *inode, struct file *file)
++{
++	int ret;
++	const struct seq_operations *seq_ops;
++
++	if ((file->f_mode & FMODE_WRITE) &&
++	    !(file->f_flags & O_APPEND))
++		ftrace_clear_events();
++
++	seq_ops = inode->i_private;
++	ret = seq_open(file, seq_ops);
++	if (!ret) {
++		struct seq_file *m = file->private_data;
++
++		m->private = __start_ftrace_events;
++	}
++	return ret;
++}
++
++static ssize_t
++event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
++		  loff_t *ppos)
++{
++	struct ftrace_event_call *call = filp->private_data;
++	char *buf;
++
++	if (call->enabled)
++		buf = "1\n";
++	else
++		buf = "0\n";
++
++	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
++}
++
++static ssize_t
++event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
++		   loff_t *ppos)
++{
++	struct ftrace_event_call *call = filp->private_data;
++	char buf[64];
++	unsigned long val;
++	int ret;
++
++	if (cnt >= sizeof(buf))
++		return -EINVAL;
++
++	if (copy_from_user(&buf, ubuf, cnt))
++		return -EFAULT;
++
++	buf[cnt] = 0;
++
++	ret = strict_strtoul(buf, 10, &val);
++	if (ret < 0)
++		return ret;
++
++	ret = tracing_update_buffers();
++	if (ret < 0)
++		return ret;
++
++	switch (val) {
++	case 0:
++	case 1:
++		mutex_lock(&event_mutex);
++		ftrace_event_enable_disable(call, val);
++		mutex_unlock(&event_mutex);
++		break;
++
++	default:
++		return -EINVAL;
++	}
++
++	*ppos += cnt;
++
++	return cnt;
++}
++
++#undef FIELD
++#define FIELD(type, name)						\
++	#type, "common_" #name, offsetof(typeof(field), name),		\
++		sizeof(field.name)
++
++static int trace_write_header(struct trace_seq *s)
++{
++	struct trace_entry field;
++
++	/* struct trace_entry */
++	return trace_seq_printf(s,
++				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
++				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
++				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
++				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
++				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
++				"\n",
++				FIELD(unsigned char, type),
++				FIELD(unsigned char, flags),
++				FIELD(unsigned char, preempt_count),
++				FIELD(int, pid),
++				FIELD(int, tgid));
++}
++
++static ssize_t
++event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
++		  loff_t *ppos)
++{
++	struct ftrace_event_call *call = filp->private_data;
++	struct trace_seq *s;
++	char *buf;
++	int r;
++
++	if (*ppos)
++		return 0;
++
++	s = kmalloc(sizeof(*s), GFP_KERNEL);
++	if (!s)
++		return -ENOMEM;
++
++	trace_seq_init(s);
++
++	/* If any of the first writes fail, so will the show_format. */
++
++	trace_seq_printf(s, "name: %s\n", call->name);
++	trace_seq_printf(s, "ID: %d\n", call->id);
++	trace_seq_printf(s, "format:\n");
++	trace_write_header(s);
++
++	r = call->show_format(s);
++	if (!r) {
++		/*
++		 * ug!  The format output is bigger than a PAGE!!
++		 */
++		buf = "FORMAT TOO BIG\n";
++		r = simple_read_from_buffer(ubuf, cnt, ppos,
++					      buf, strlen(buf));
++		goto out;
++	}
++
++	r = simple_read_from_buffer(ubuf, cnt, ppos,
++				    s->buffer, s->len);
++ out:
++	kfree(s);
++	return r;
++}
++
++static ssize_t
++event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
++{
++	struct ftrace_event_call *call = filp->private_data;
++	struct trace_seq *s;
++	int r;
++
++	if (*ppos)
++		return 0;
++
++	s = kmalloc(sizeof(*s), GFP_KERNEL);
++	if (!s)
++		return -ENOMEM;
++
++	trace_seq_init(s);
++	trace_seq_printf(s, "%d\n", call->id);
++
++	r = simple_read_from_buffer(ubuf, cnt, ppos,
++				    s->buffer, s->len);
++	kfree(s);
++	return r;
++}
++
++static ssize_t
++event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
++		  loff_t *ppos)
++{
++	struct ftrace_event_call *call = filp->private_data;
++	struct trace_seq *s;
++	int r;
++
++	if (*ppos)
++		return 0;
++
++	s = kmalloc(sizeof(*s), GFP_KERNEL);
++	if (!s)
++		return -ENOMEM;
++
++	trace_seq_init(s);
++
++	filter_print_preds(call->preds, s);
++	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
++
++	kfree(s);
++
++	return r;
++}
++
++static ssize_t
++event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
++		   loff_t *ppos)
++{
++	struct ftrace_event_call *call = filp->private_data;
++	char buf[64], *pbuf = buf;
++	struct filter_pred *pred;
++	int err;
++
++	if (cnt >= sizeof(buf))
++		return -EINVAL;
++
++	if (copy_from_user(&buf, ubuf, cnt))
++		return -EFAULT;
++
++	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
++	if (!pred)
++		return -ENOMEM;
++
++	err = filter_parse(&pbuf, pred);
++	if (err < 0) {
++		filter_free_pred(pred);
++		return err;
++	}
++
++	if (pred->clear) {
++		filter_free_preds(call);
++		filter_free_pred(pred);
++		return cnt;
++	}
++
++	if (filter_add_pred(call, pred)) {
++		filter_free_pred(pred);
++		return -EINVAL;
++	}
++
++	*ppos += cnt;
++
++	return cnt;
++}
++
++static ssize_t
++subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
++		      loff_t *ppos)
++{
++	struct event_subsystem *system = filp->private_data;
++	struct trace_seq *s;
++	int r;
++
++	if (*ppos)
++		return 0;
++
++	s = kmalloc(sizeof(*s), GFP_KERNEL);
++	if (!s)
++		return -ENOMEM;
++
++	trace_seq_init(s);
++
++	filter_print_preds(system->preds, s);
++	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
++
++	kfree(s);
++
++	return r;
++}
++
++static ssize_t
++subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
++		       loff_t *ppos)
++{
++	struct event_subsystem *system = filp->private_data;
++	char buf[64], *pbuf = buf;
++	struct filter_pred *pred;
++	int err;
++
++	if (cnt >= sizeof(buf))
++		return -EINVAL;
++
++	if (copy_from_user(&buf, ubuf, cnt))
++		return -EFAULT;
++
++	pred = kzalloc(sizeof(*pred), GFP_KERNEL);
++	if (!pred)
++		return -ENOMEM;
++
++	err = filter_parse(&pbuf, pred);
++	if (err < 0) {
++		filter_free_pred(pred);
++		return err;
++	}
++
++	if (pred->clear) {
++		filter_free_subsystem_preds(system);
++		filter_free_pred(pred);
++		return cnt;
++	}
++
++	if (filter_add_subsystem_pred(system, pred)) {
++		filter_free_subsystem_preds(system);
++		filter_free_pred(pred);
++		return -EINVAL;
++	}
++
++	*ppos += cnt;
++
++	return cnt;
++}
++
++static const struct seq_operations show_event_seq_ops = {
++	.start = t_start,
++	.next = t_next,
++	.show = t_show,
++	.stop = t_stop,
++};
++
++static const struct seq_operations show_set_event_seq_ops = {
++	.start = s_start,
++	.next = s_next,
++	.show = t_show,
++	.stop = t_stop,
++};
++
++static const struct file_operations ftrace_avail_fops = {
++	.open = ftrace_event_seq_open,
++	.read = seq_read,
++	.llseek = seq_lseek,
++	.release = seq_release,
++};
++
++static const struct file_operations ftrace_set_event_fops = {
++	.open = ftrace_event_seq_open,
++	.read = seq_read,
++	.write = ftrace_event_write,
++	.llseek = seq_lseek,
++	.release = seq_release,
++};
++
++static const struct file_operations ftrace_enable_fops = {
++	.open = tracing_open_generic,
++	.read = event_enable_read,
++	.write = event_enable_write,
++};
++
++static const struct file_operations ftrace_event_format_fops = {
++	.open = tracing_open_generic,
++	.read = event_format_read,
++};
++
++static const struct file_operations ftrace_event_id_fops = {
++	.open = tracing_open_generic,
++	.read = event_id_read,
++};
++
++static const struct file_operations ftrace_event_filter_fops = {
++	.open = tracing_open_generic,
++	.read = event_filter_read,
++	.write = event_filter_write,
++};
++
++static const struct file_operations ftrace_subsystem_filter_fops = {
++	.open = tracing_open_generic,
++	.read = subsystem_filter_read,
++	.write = subsystem_filter_write,
++};
++
++static struct dentry *event_trace_events_dir(void)
++{
++	static struct dentry *d_tracer;
++	static struct dentry *d_events;
++
++	if (d_events)
++		return d_events;
++
++	d_tracer = tracing_init_dentry();
++	if (!d_tracer)
++		return NULL;
++
++	d_events = debugfs_create_dir("events", d_tracer);
++	if (!d_events)
++		pr_warning("Could not create debugfs "
++			   "'events' directory\n");
++
++	return d_events;
++}
++
++static LIST_HEAD(event_subsystems);
++
++static struct dentry *
++event_subsystem_dir(const char *name, struct dentry *d_events)
++{
++	struct event_subsystem *system;
++	struct dentry *entry;
++
++	/* First see if we did not already create this dir */
++	list_for_each_entry(system, &event_subsystems, list) {
++		if (strcmp(system->name, name) == 0)
++			return system->entry;
++	}
++
++	/* need to create new entry */
++	system = kmalloc(sizeof(*system), GFP_KERNEL);
++	if (!system) {
++		pr_warning("No memory to create event subsystem %s\n",
++			   name);
++		return d_events;
++	}
++
++	system->entry = debugfs_create_dir(name, d_events);
++	if (!system->entry) {
++		pr_warning("Could not create event subsystem %s\n",
++			   name);
++		kfree(system);
++		return d_events;
++	}
++
++	system->name = name;
++	list_add(&system->list, &event_subsystems);
++
++	system->preds = NULL;
++
++	entry = debugfs_create_file("filter", 0644, system->entry, system,
++				    &ftrace_subsystem_filter_fops);
++	if (!entry)
++		pr_warning("Could not create debugfs "
++			   "'%s/filter' entry\n", name);
++
++	return system->entry;
++}
++
++static int
++event_create_dir(struct ftrace_event_call *call, struct dentry *d_events)
++{
++	struct dentry *entry;
++	int ret;
++
++	/*
++	 * If the trace point header did not define TRACE_SYSTEM
++	 * then the system would be called "TRACE_SYSTEM".
++	 */
++	if (strcmp(call->system, "TRACE_SYSTEM") != 0)
++		d_events = event_subsystem_dir(call->system, d_events);
++
++	if (call->raw_init) {
++		ret = call->raw_init();
++		if (ret < 0) {
++			pr_warning("Could not initialize trace point"
++				   " events/%s\n", call->name);
++			return ret;
++		}
++	}
++
++	call->dir = debugfs_create_dir(call->name, d_events);
++	if (!call->dir) {
++		pr_warning("Could not create debugfs "
++			   "'%s' directory\n", call->name);
++		return -1;
++	}
++
++	if (call->regfunc) {
++		entry = debugfs_create_file("enable", 0644, call->dir, call,
++					    &ftrace_enable_fops);
++		if (!entry)
++			pr_warning("Could not create debugfs "
++				   "'%s/enable' entry\n", call->name);
++	}
++
++	if (call->id) {
++		entry = debugfs_create_file("id", 0444, call->dir, call,
++				&ftrace_event_id_fops);
++		if (!entry)
++			pr_warning("Could not create debugfs '%s/id' entry\n",
++					call->name);
++	}
++
++	if (call->define_fields) {
++		ret = call->define_fields();
++		if (ret < 0) {
++			pr_warning("Could not initialize trace point"
++				   " events/%s\n", call->name);
++			return ret;
++		}
++	}
++
++	entry = debugfs_create_file("filter", 0644, call->dir, call,
++				    &ftrace_event_filter_fops);
++	if (!entry)
++		pr_warning("Could not create debugfs "
++			   "'%s/filter' entry\n", call->name);
++
++	/* A trace may not want to export its format */
++	if (!call->show_format)
++		return 0;
++
++	entry = debugfs_create_file("format", 0444, call->dir, call,
++				    &ftrace_event_format_fops);
++	if (!entry)
++		pr_warning("Could not create debugfs "
++			   "'%s/format' entry\n", call->name);
++
++	return 0;
++}
++
++static __init int event_trace_init(void)
++{
++	struct ftrace_event_call *call = __start_ftrace_events;
++	struct dentry *d_tracer;
++	struct dentry *entry;
++	struct dentry *d_events;
++
++	d_tracer = tracing_init_dentry();
++	if (!d_tracer)
++		return 0;
++
++	entry = debugfs_create_file("available_events", 0444, d_tracer,
++				    (void *)&show_event_seq_ops,
++				    &ftrace_avail_fops);
++	if (!entry)
++		pr_warning("Could not create debugfs "
++			   "'available_events' entry\n");
++
++	entry = debugfs_create_file("set_event", 0644, d_tracer,
++				    (void *)&show_set_event_seq_ops,
++				    &ftrace_set_event_fops);
++	if (!entry)
++		pr_warning("Could not create debugfs "
++			   "'set_event' entry\n");
++
++	d_events = event_trace_events_dir();
++	if (!d_events)
++		return 0;
++
++	for_each_event(call) {
++		/* The linker may leave blanks */
++		if (!call->name)
++			continue;
++		event_create_dir(call, d_events);
++	}
++
++	return 0;
++}
++fs_initcall(event_trace_init);
+Index: linux-2.6-tip/kernel/trace/trace_events_filter.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_events_filter.c
+@@ -0,0 +1,427 @@
++/*
++ * trace_events_filter - generic event filtering
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ * Copyright (C) 2009 Tom Zanussi <tzanussi@gmail.com>
++ */
++
++#include <linux/debugfs.h>
++#include <linux/uaccess.h>
++#include <linux/module.h>
++#include <linux/ctype.h>
++
++#include "trace.h"
++#include "trace_output.h"
++
++static int filter_pred_64(struct filter_pred *pred, void *event)
++{
++	u64 *addr = (u64 *)(event + pred->offset);
++	u64 val = (u64)pred->val;
++	int match;
++
++	match = (val == *addr) ^ pred->not;
++
++	return match;
++}
++
++static int filter_pred_32(struct filter_pred *pred, void *event)
++{
++	u32 *addr = (u32 *)(event + pred->offset);
++	u32 val = (u32)pred->val;
++	int match;
++
++	match = (val == *addr) ^ pred->not;
++
++	return match;
++}
++
++static int filter_pred_16(struct filter_pred *pred, void *event)
++{
++	u16 *addr = (u16 *)(event + pred->offset);
++	u16 val = (u16)pred->val;
++	int match;
++
++	match = (val == *addr) ^ pred->not;
++
++	return match;
++}
++
++static int filter_pred_8(struct filter_pred *pred, void *event)
++{
++	u8 *addr = (u8 *)(event + pred->offset);
++	u8 val = (u8)pred->val;
++	int match;
++
++	match = (val == *addr) ^ pred->not;
++
++	return match;
++}
++
++static int filter_pred_string(struct filter_pred *pred, void *event)
++{
++	char *addr = (char *)(event + pred->offset);
++	int cmp, match;
++
++	cmp = strncmp(addr, pred->str_val, pred->str_len);
++
++	match = (!cmp) ^ pred->not;
++
++	return match;
++}
++
++/* return 1 if event matches, 0 otherwise (discard) */
++int filter_match_preds(struct ftrace_event_call *call, void *rec)
++{
++	int i, matched, and_failed = 0;
++	struct filter_pred *pred;
++
++	for (i = 0; i < MAX_FILTER_PRED; i++) {
++		if (call->preds[i]) {
++			pred = call->preds[i];
++			if (and_failed && !pred->or)
++				continue;
++			matched = pred->fn(pred, rec);
++			if (!matched && !pred->or) {
++				and_failed = 1;
++				continue;
++			} else if (matched && pred->or)
++				return 1;
++		} else
++			break;
++	}
++
++	if (and_failed)
++		return 0;
++
++	return 1;
++}
++
++void filter_print_preds(struct filter_pred **preds, struct trace_seq *s)
++{
++	char *field_name;
++	struct filter_pred *pred;
++	int i;
++
++	if (!preds) {
++		trace_seq_printf(s, "none\n");
++		return;
++	}
++
++	for (i = 0; i < MAX_FILTER_PRED; i++) {
++		if (preds[i]) {
++			pred = preds[i];
++			field_name = pred->field_name;
++			if (i)
++				trace_seq_printf(s, pred->or ? "|| " : "&& ");
++			trace_seq_printf(s, "%s ", field_name);
++			trace_seq_printf(s, pred->not ? "!= " : "== ");
++			if (pred->str_val)
++				trace_seq_printf(s, "%s\n", pred->str_val);
++			else
++				trace_seq_printf(s, "%llu\n", pred->val);
++		} else
++			break;
++	}
++}
++
++static struct ftrace_event_field *
++find_event_field(struct ftrace_event_call *call, char *name)
++{
++	struct ftrace_event_field *field;
++
++	list_for_each_entry(field, &call->fields, link) {
++		if (!strcmp(field->name, name))
++			return field;
++	}
++
++	return NULL;
++}
++
++void filter_free_pred(struct filter_pred *pred)
++{
++	if (!pred)
++		return;
++
++	kfree(pred->field_name);
++	kfree(pred->str_val);
++	kfree(pred);
++}
++
++void filter_free_preds(struct ftrace_event_call *call)
++{
++	int i;
++
++	if (call->preds) {
++		for (i = 0; i < MAX_FILTER_PRED; i++)
++			filter_free_pred(call->preds[i]);
++		kfree(call->preds);
++		call->preds = NULL;
++	}
++}
++
++void filter_free_subsystem_preds(struct event_subsystem *system)
++{
++	struct ftrace_event_call *call = __start_ftrace_events;
++	int i;
++
++	if (system->preds) {
++		for (i = 0; i < MAX_FILTER_PRED; i++)
++			filter_free_pred(system->preds[i]);
++		kfree(system->preds);
++		system->preds = NULL;
++	}
++
++	events_for_each(call) {
++		if (!call->name || !call->regfunc)
++			continue;
++
++		if (!strcmp(call->system, system->name))
++			filter_free_preds(call);
++	}
++}
++
++static int __filter_add_pred(struct ftrace_event_call *call,
++			     struct filter_pred *pred)
++{
++	int i;
++
++	if (call->preds && !pred->compound)
++		filter_free_preds(call);
++
++	if (!call->preds) {
++		call->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
++				      GFP_KERNEL);
++		if (!call->preds)
++			return -ENOMEM;
++	}
++
++	for (i = 0; i < MAX_FILTER_PRED; i++) {
++		if (!call->preds[i]) {
++			call->preds[i] = pred;
++			return 0;
++		}
++	}
++
++	return -ENOMEM;
++}
++
++static int is_string_field(const char *type)
++{
++	if (strchr(type, '[') && strstr(type, "char"))
++		return 1;
++
++	return 0;
++}
++
++int filter_add_pred(struct ftrace_event_call *call, struct filter_pred *pred)
++{
++	struct ftrace_event_field *field;
++
++	field = find_event_field(call, pred->field_name);
++	if (!field)
++		return -EINVAL;
++
++	pred->offset = field->offset;
++
++	if (is_string_field(field->type)) {
++		if (!pred->str_val)
++			return -EINVAL;
++		pred->fn = filter_pred_string;
++		pred->str_len = field->size;
++		return __filter_add_pred(call, pred);
++	} else {
++		if (pred->str_val)
++			return -EINVAL;
++	}
++
++	switch (field->size) {
++	case 8:
++		pred->fn = filter_pred_64;
++		break;
++	case 4:
++		pred->fn = filter_pred_32;
++		break;
++	case 2:
++		pred->fn = filter_pred_16;
++		break;
++	case 1:
++		pred->fn = filter_pred_8;
++		break;
++	default:
++		return -EINVAL;
++	}
++
++	return __filter_add_pred(call, pred);
++}
++
++static struct filter_pred *copy_pred(struct filter_pred *pred)
++{
++	struct filter_pred *new_pred = kmalloc(sizeof(*pred), GFP_KERNEL);
++	if (!new_pred)
++		return NULL;
++
++	memcpy(new_pred, pred, sizeof(*pred));
++
++	if (pred->field_name) {
++		new_pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
++		if (!new_pred->field_name) {
++			kfree(new_pred);
++			return NULL;
++		}
++	}
++
++	if (pred->str_val) {
++		new_pred->str_val = kstrdup(pred->str_val, GFP_KERNEL);
++		if (!new_pred->str_val) {
++			filter_free_pred(new_pred);
++			return NULL;
++		}
++	}
++
++	return new_pred;
++}
++
++int filter_add_subsystem_pred(struct event_subsystem *system,
++			      struct filter_pred *pred)
++{
++	struct ftrace_event_call *call = __start_ftrace_events;
++	struct filter_pred *event_pred;
++	int i;
++
++	if (system->preds && !pred->compound)
++		filter_free_subsystem_preds(system);
++
++	if (!system->preds) {
++		system->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred),
++					GFP_KERNEL);
++		if (!system->preds)
++			return -ENOMEM;
++	}
++
++	for (i = 0; i < MAX_FILTER_PRED; i++) {
++		if (!system->preds[i]) {
++			system->preds[i] = pred;
++			break;
++		}
++	}
++
++	if (i == MAX_FILTER_PRED)
++		return -EINVAL;
++
++	events_for_each(call) {
++		int err;
++
++		if (!call->name || !call->regfunc)
++			continue;
++
++		if (strcmp(call->system, system->name))
++			continue;
++
++		if (!find_event_field(call, pred->field_name))
++			continue;
++
++		event_pred = copy_pred(pred);
++		if (!event_pred)
++			goto oom;
++
++		err = filter_add_pred(call, event_pred);
++		if (err)
++			filter_free_pred(event_pred);
++		if (err == -ENOMEM)
++			goto oom;
++	}
++
++	return 0;
++
++oom:
++	system->preds[i] = NULL;
++	return -ENOMEM;
++}
++
++int filter_parse(char **pbuf, struct filter_pred *pred)
++{
++	char *tmp, *tok, *val_str = NULL;
++	int tok_n = 0;
++
++	/* field ==/!= number, or/and field ==/!= number, number */
++	while ((tok = strsep(pbuf, " \n"))) {
++		if (tok_n == 0) {
++			if (!strcmp(tok, "0")) {
++				pred->clear = 1;
++				return 0;
++			} else if (!strcmp(tok, "&&")) {
++				pred->or = 0;
++				pred->compound = 1;
++			} else if (!strcmp(tok, "||")) {
++				pred->or = 1;
++				pred->compound = 1;
++			} else
++				pred->field_name = tok;
++			tok_n = 1;
++			continue;
++		}
++		if (tok_n == 1) {
++			if (!pred->field_name)
++				pred->field_name = tok;
++			else if (!strcmp(tok, "!="))
++				pred->not = 1;
++			else if (!strcmp(tok, "=="))
++				pred->not = 0;
++			else {
++				pred->field_name = NULL;
++				return -EINVAL;
++			}
++			tok_n = 2;
++			continue;
++		}
++		if (tok_n == 2) {
++			if (pred->compound) {
++				if (!strcmp(tok, "!="))
++					pred->not = 1;
++				else if (!strcmp(tok, "=="))
++					pred->not = 0;
++				else {
++					pred->field_name = NULL;
++					return -EINVAL;
++				}
++			} else {
++				val_str = tok;
++				break; /* done */
++			}
++			tok_n = 3;
++			continue;
++		}
++		if (tok_n == 3) {
++			val_str = tok;
++			break; /* done */
++		}
++	}
++
++	pred->field_name = kstrdup(pred->field_name, GFP_KERNEL);
++	if (!pred->field_name)
++		return -ENOMEM;
++
++	pred->val = simple_strtoull(val_str, &tmp, 10);
++	if (tmp == val_str) {
++		pred->str_val = kstrdup(val_str, GFP_KERNEL);
++		if (!pred->str_val)
++			return -ENOMEM;
++	}
++
++	return 0;
++}
++
++
+Index: linux-2.6-tip/kernel/trace/trace_events_stage_1.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_events_stage_1.h
+@@ -0,0 +1,39 @@
++/*
++ * Stage 1 of the trace events.
++ *
++ * Override the macros in <trace/trace_event_types.h> to include the following:
++ *
++ * struct ftrace_raw_<call> {
++ *	struct trace_entry		ent;
++ *	<type>				<item>;
++ *	<type2>				<item2>[<len>];
++ *	[...]
++ * };
++ *
++ * The <type> <item> is created by the __field(type, item) macro or
++ * the __array(type2, item2, len) macro.
++ * We simply do "type item;", and that will create the fields
++ * in the structure.
++ */
++
++#undef TRACE_FORMAT
++#define TRACE_FORMAT(call, proto, args, fmt)
++
++#undef __array
++#define __array(type, item, len)	type	item[len];
++
++#undef __field
++#define __field(type, item)		type	item;
++
++#undef TP_STRUCT__entry
++#define TP_STRUCT__entry(args...) args
++
++#undef TRACE_EVENT
++#define TRACE_EVENT(name, proto, args, tstruct, assign, print)	\
++	struct ftrace_raw_##name {				\
++		struct trace_entry	ent;			\
++		tstruct						\
++	};							\
++	static struct ftrace_event_call event_##name
++
++#include <trace/trace_event_types.h>
+Index: linux-2.6-tip/kernel/trace/trace_events_stage_2.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_events_stage_2.h
+@@ -0,0 +1,176 @@
++/*
++ * Stage 2 of the trace events.
++ *
++ * Override the macros in <trace/trace_event_types.h> to include the following:
++ *
++ * enum print_line_t
++ * ftrace_raw_output_<call>(struct trace_iterator *iter, int flags)
++ * {
++ *	struct trace_seq *s = &iter->seq;
++ *	struct ftrace_raw_<call> *field; <-- defined in stage 1
++ *	struct trace_entry *entry;
++ *	int ret;
++ *
++ *	entry = iter->ent;
++ *
++ *	if (entry->type != event_<call>.id) {
++ *		WARN_ON_ONCE(1);
++ *		return TRACE_TYPE_UNHANDLED;
++ *	}
++ *
++ *	field = (typeof(field))entry;
++ *
++ *	ret = trace_seq_printf(s, <TP_printk> "\n");
++ *	if (!ret)
++ *		return TRACE_TYPE_PARTIAL_LINE;
++ *
++ *	return TRACE_TYPE_HANDLED;
++ * }
++ *
++ * This is the method used to print the raw event to the trace
++ * output format. Note, this is not needed if the data is read
++ * in binary.
++ */
++
++#undef __entry
++#define __entry field
++
++#undef TP_printk
++#define TP_printk(fmt, args...) fmt "\n", args
++
++#undef TRACE_EVENT
++#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
++enum print_line_t							\
++ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
++{									\
++	struct trace_seq *s = &iter->seq;				\
++	struct ftrace_raw_##call *field;				\
++	struct trace_entry *entry;					\
++	int ret;							\
++									\
++	entry = iter->ent;						\
++									\
++	if (entry->type != event_##call.id) {				\
++		WARN_ON_ONCE(1);					\
++		return TRACE_TYPE_UNHANDLED;				\
++	}								\
++									\
++	field = (typeof(field))entry;					\
++									\
++	ret = trace_seq_printf(s, #call ": " print);			\
++	if (!ret)							\
++		return TRACE_TYPE_PARTIAL_LINE;				\
++									\
++	return TRACE_TYPE_HANDLED;					\
++}
++	
++#include <trace/trace_event_types.h>
++
++/*
++ * Setup the showing format of trace point.
++ *
++ * int
++ * ftrace_format_##call(struct trace_seq *s)
++ * {
++ *	struct ftrace_raw_##call field;
++ *	int ret;
++ *
++ *	ret = trace_seq_printf(s, #type " " #item ";"
++ *			       " offset:%u; size:%u;\n",
++ *			       offsetof(struct ftrace_raw_##call, item),
++ *			       sizeof(field.type));
++ *
++ * }
++ */
++
++#undef TP_STRUCT__entry
++#define TP_STRUCT__entry(args...) args
++
++#undef __field
++#define __field(type, item)					\
++	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
++			       "offset:%u;\tsize:%u;\n",		\
++			       (unsigned int)offsetof(typeof(field), item), \
++			       (unsigned int)sizeof(field.item));	\
++	if (!ret)							\
++		return 0;
++
++#undef __array
++#define __array(type, item, len)						\
++	ret = trace_seq_printf(s, "\tfield:" #type " " #item "[" #len "];\t"	\
++			       "offset:%u;\tsize:%u;\n",		\
++			       (unsigned int)offsetof(typeof(field), item), \
++			       (unsigned int)sizeof(field.item));	\
++	if (!ret)							\
++		return 0;
++
++#undef __entry
++#define __entry "REC"
++
++#undef TP_printk
++#define TP_printk(fmt, args...) "%s, %s\n", #fmt, #args
++
++#undef TP_fast_assign
++#define TP_fast_assign(args...) args
++
++#undef TRACE_EVENT
++#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
++static int								\
++ftrace_format_##call(struct trace_seq *s)				\
++{									\
++	struct ftrace_raw_##call field;					\
++	int ret;							\
++									\
++	tstruct;							\
++									\
++	trace_seq_printf(s, "\nprint fmt: " print);			\
++									\
++	return ret;							\
++}
++
++#include <trace/trace_event_types.h>
++
++#undef __field
++#define __field(type, item)						\
++	ret = trace_define_field(event_call, #type, #item,		\
++				 offsetof(typeof(field), item),		\
++				 sizeof(field.item));			\
++	if (ret)							\
++		return ret;
++
++#undef __array
++#define __array(type, item, len)					\
++	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
++				 offsetof(typeof(field), item),		\
++				 sizeof(field.item));			\
++	if (ret)							\
++		return ret;
++
++#define __common_field(type, item)					\
++	ret = trace_define_field(event_call, #type, "common_" #item,	\
++				 offsetof(typeof(field.ent), item),	\
++				 sizeof(field.ent.item));		\
++	if (ret)							\
++		return ret;
++
++#undef TRACE_EVENT
++#define TRACE_EVENT(call, proto, args, tstruct, func, print)		\
++int									\
++ftrace_define_fields_##call(void)					\
++{									\
++	struct ftrace_raw_##call field;					\
++	struct ftrace_event_call *event_call = &event_##call;		\
++	int ret;							\
++									\
++	__common_field(unsigned char, type);				\
++	__common_field(unsigned char, flags);				\
++	__common_field(unsigned char, preempt_count);			\
++	__common_field(int, pid);					\
++	__common_field(int, tgid);					\
++									\
++	tstruct;							\
++									\
++	return ret;							\
++}
++
++#include <trace/trace_event_types.h>
+Index: linux-2.6-tip/kernel/trace/trace_events_stage_3.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_events_stage_3.h
+@@ -0,0 +1,281 @@
++/*
++ * Stage 3 of the trace events.
++ *
++ * Override the macros in <trace/trace_event_types.h> to include the following:
++ *
++ * static void ftrace_event_<call>(proto)
++ * {
++ *	event_trace_printk(_RET_IP_, "<call>: " <fmt>);
++ * }
++ *
++ * static int ftrace_reg_event_<call>(void)
++ * {
++ *	int ret;
++ *
++ *	ret = register_trace_<call>(ftrace_event_<call>);
++ *	if (!ret)
++ *		pr_info("event trace: Could not activate trace point "
++ *			"probe to  <call>");
++ *	return ret;
++ * }
++ *
++ * static void ftrace_unreg_event_<call>(void)
++ * {
++ *	unregister_trace_<call>(ftrace_event_<call>);
++ * }
++ *
++ * For those macros defined with TRACE_FORMAT:
++ *
++ * static struct ftrace_event_call __used
++ * __attribute__((__aligned__(4)))
++ * __attribute__((section("_ftrace_events"))) event_<call> = {
++ *	.name			= "<call>",
++ *	.regfunc		= ftrace_reg_event_<call>,
++ *	.unregfunc		= ftrace_unreg_event_<call>,
++ * }
++ *
++ *
++ * For those macros defined with TRACE_EVENT:
++ *
++ * static struct ftrace_event_call event_<call>;
++ *
++ * static void ftrace_raw_event_<call>(proto)
++ * {
++ *	struct ring_buffer_event *event;
++ *	struct ftrace_raw_<call> *entry; <-- defined in stage 1
++ *	unsigned long irq_flags;
++ *	int pc;
++ *
++ *	local_save_flags(irq_flags);
++ *	pc = preempt_count();
++ *
++ *	event = trace_current_buffer_lock_reserve(event_<call>.id,
++ *				  sizeof(struct ftrace_raw_<call>),
++ *				  irq_flags, pc);
++ *	if (!event)
++ *		return;
++ *	entry	= ring_buffer_event_data(event);
++ *
++ *	<assign>;  <-- Here we assign the entries by the __field and
++ *			__array macros.
++ *
++ *	trace_current_buffer_unlock_commit(event, irq_flags, pc);
++ * }
++ *
++ * static int ftrace_raw_reg_event_<call>(void)
++ * {
++ *	int ret;
++ *
++ *	ret = register_trace_<call>(ftrace_raw_event_<call>);
++ *	if (!ret)
++ *		pr_info("event trace: Could not activate trace point "
++ *			"probe to <call>");
++ *	return ret;
++ * }
++ *
++ * static void ftrace_unreg_event_<call>(void)
++ * {
++ *	unregister_trace_<call>(ftrace_raw_event_<call>);
++ * }
++ *
++ * static struct trace_event ftrace_event_type_<call> = {
++ *	.trace			= ftrace_raw_output_<call>, <-- stage 2
++ * };
++ *
++ * static int ftrace_raw_init_event_<call>(void)
++ * {
++ *	int id;
++ *
++ *	id = register_ftrace_event(&ftrace_event_type_<call>);
++ *	if (!id)
++ *		return -ENODEV;
++ *	event_<call>.id = id;
++ *	return 0;
++ * }
++ *
++ * static struct ftrace_event_call __used
++ * __attribute__((__aligned__(4)))
++ * __attribute__((section("_ftrace_events"))) event_<call> = {
++ *	.name			= "<call>",
++ *	.system			= "<system>",
++ *	.raw_init		= ftrace_raw_init_event_<call>,
++ *	.regfunc		= ftrace_reg_event_<call>,
++ *	.unregfunc		= ftrace_unreg_event_<call>,
++ *	.show_format		= ftrace_format_<call>,
++ * }
++ *
++ */
++
++#undef TP_FMT
++#define TP_FMT(fmt, args...)	fmt "\n", ##args
++
++#ifdef CONFIG_EVENT_PROFILE
++#define _TRACE_PROFILE(call, proto, args)				\
++static void ftrace_profile_##call(proto)				\
++{									\
++	extern void perf_tpcounter_event(int);				\
++	perf_tpcounter_event(event_##call.id);				\
++}									\
++									\
++static int ftrace_profile_enable_##call(struct ftrace_event_call *call) \
++{									\
++	int ret = 0;							\
++									\
++	if (!atomic_inc_return(&call->profile_count))			\
++		ret = register_trace_##call(ftrace_profile_##call);	\
++									\
++	return ret;							\
++}									\
++									\
++static void ftrace_profile_disable_##call(struct ftrace_event_call *call) \
++{									\
++	if (atomic_add_negative(-1, &call->profile_count))		\
++		unregister_trace_##call(ftrace_profile_##call);		\
++}
++
++#define _TRACE_PROFILE_INIT(call)					\
++	.profile_count = ATOMIC_INIT(-1),				\
++	.profile_enable = ftrace_profile_enable_##call,			\
++	.profile_disable = ftrace_profile_disable_##call,
++
++#else
++#define _TRACE_PROFILE(call, proto, args)
++#define _TRACE_PROFILE_INIT(call)
++#endif
++
++#define _TRACE_FORMAT(call, proto, args, fmt)				\
++static void ftrace_event_##call(proto)					\
++{									\
++	event_trace_printk(_RET_IP_, #call ": " fmt);			\
++}									\
++									\
++static int ftrace_reg_event_##call(void)				\
++{									\
++	int ret;							\
++									\
++	ret = register_trace_##call(ftrace_event_##call);		\
++	if (ret)							\
++		pr_info("event trace: Could not activate trace point "	\
++			"probe to " #call "\n");			\
++	return ret;							\
++}									\
++									\
++static void ftrace_unreg_event_##call(void)				\
++{									\
++	unregister_trace_##call(ftrace_event_##call);			\
++}									\
++									\
++static struct ftrace_event_call event_##call;				\
++									\
++static int ftrace_init_event_##call(void)				\
++{									\
++	int id;								\
++									\
++	id = register_ftrace_event(NULL);				\
++	if (!id)							\
++		return -ENODEV;						\
++	event_##call.id = id;						\
++	return 0;							\
++}
++
++#undef TRACE_FORMAT
++#define TRACE_FORMAT(call, proto, args, fmt)				\
++_TRACE_FORMAT(call, PARAMS(proto), PARAMS(args), PARAMS(fmt))		\
++_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
++static struct ftrace_event_call __used					\
++__attribute__((__aligned__(4)))						\
++__attribute__((section("_ftrace_events"))) event_##call = {		\
++	.name			= #call,				\
++	.system			= __stringify(TRACE_SYSTEM),		\
++	.raw_init		= ftrace_init_event_##call,		\
++	.regfunc		= ftrace_reg_event_##call,		\
++	.unregfunc		= ftrace_unreg_event_##call,		\
++	_TRACE_PROFILE_INIT(call)					\
++}
++
++#undef __entry
++#define __entry entry
++
++#undef TRACE_EVENT
++#define TRACE_EVENT(call, proto, args, tstruct, assign, print)		\
++_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args))			\
++									\
++static struct ftrace_event_call event_##call;				\
++									\
++static void ftrace_raw_event_##call(proto)				\
++{									\
++	struct ftrace_event_call *call = &event_##call;			\
++	struct ring_buffer_event *event;				\
++	struct ftrace_raw_##call *entry;				\
++	unsigned long irq_flags;					\
++	int pc;								\
++									\
++	local_save_flags(irq_flags);					\
++	pc = preempt_count();						\
++									\
++	event = trace_current_buffer_lock_reserve(event_##call.id,	\
++				  sizeof(struct ftrace_raw_##call),	\
++				  irq_flags, pc);			\
++	if (!event)							\
++		return;							\
++	entry	= ring_buffer_event_data(event);			\
++									\
++	assign;								\
++									\
++	if (call->preds && !filter_match_preds(call, entry))		\
++		ring_buffer_event_discard(event);			\
++									\
++	trace_nowake_buffer_unlock_commit(event, irq_flags, pc);	\
++									\
++}									\
++									\
++static int ftrace_raw_reg_event_##call(void)				\
++{									\
++	int ret;							\
++									\
++	ret = register_trace_##call(ftrace_raw_event_##call);		\
++	if (ret)							\
++		pr_info("event trace: Could not activate trace point "	\
++			"probe to " #call "\n");			\
++	return ret;							\
++}									\
++									\
++static void ftrace_raw_unreg_event_##call(void)				\
++{									\
++	unregister_trace_##call(ftrace_raw_event_##call);		\
++}									\
++									\
++static struct trace_event ftrace_event_type_##call = {			\
++	.trace			= ftrace_raw_output_##call,		\
++};									\
++									\
++static int ftrace_raw_init_event_##call(void)				\
++{									\
++	int id;								\
++									\
++	id = register_ftrace_event(&ftrace_event_type_##call);		\
++	if (!id)							\
++		return -ENODEV;						\
++	event_##call.id = id;						\
++	INIT_LIST_HEAD(&event_##call.fields);				\
++	return 0;							\
++}									\
++									\
++static struct ftrace_event_call __used					\
++__attribute__((__aligned__(4)))						\
++__attribute__((section("_ftrace_events"))) event_##call = {		\
++	.name			= #call,				\
++	.system			= __stringify(TRACE_SYSTEM),		\
++	.raw_init		= ftrace_raw_init_event_##call,		\
++	.regfunc		= ftrace_raw_reg_event_##call,		\
++	.unregfunc		= ftrace_raw_unreg_event_##call,	\
++	.show_format		= ftrace_format_##call,			\
++	.define_fields		= ftrace_define_fields_##call,		\
++	_TRACE_PROFILE_INIT(call)					\
++}
++
++#include <trace/trace_event_types.h>
++
++#undef _TRACE_PROFILE
++#undef _TRACE_PROFILE_INIT
++
+Index: linux-2.6-tip/kernel/trace/trace_export.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_export.c
+@@ -0,0 +1,102 @@
++/*
++ * trace_export.c - export basic ftrace utilities to user space
++ *
++ * Copyright (C) 2009 Steven Rostedt <srostedt@redhat.com>
++ */
++#include <linux/stringify.h>
++#include <linux/kallsyms.h>
++#include <linux/seq_file.h>
++#include <linux/debugfs.h>
++#include <linux/uaccess.h>
++#include <linux/ftrace.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/fs.h>
++
++#include "trace_output.h"
++
++
++#undef TRACE_STRUCT
++#define TRACE_STRUCT(args...) args
++
++#undef TRACE_FIELD
++#define TRACE_FIELD(type, item, assign)					\
++	ret = trace_seq_printf(s, "\tfield:" #type " " #item ";\t"	\
++			       "offset:%u;\tsize:%u;\n",		\
++			       (unsigned int)offsetof(typeof(field), item), \
++			       (unsigned int)sizeof(field.item));	\
++	if (!ret)							\
++		return 0;
++
++
++#undef TRACE_FIELD_SPECIAL
++#define TRACE_FIELD_SPECIAL(type_item, item, cmd)			\
++	ret = trace_seq_printf(s, "\tfield special:" #type_item ";\t"	\
++			       "offset:%u;\tsize:%u;\n",		\
++			       (unsigned int)offsetof(typeof(field), item), \
++			       (unsigned int)sizeof(field.item));	\
++	if (!ret)							\
++		return 0;
++
++#undef TRACE_FIELD_ZERO_CHAR
++#define TRACE_FIELD_ZERO_CHAR(item)					\
++	ret = trace_seq_printf(s, "\tfield: char " #item ";\t"		\
++			       "offset:%u;\tsize:0;\n",			\
++			       (unsigned int)offsetof(typeof(field), item)); \
++	if (!ret)							\
++		return 0;
++
++
++#undef TP_RAW_FMT
++#define TP_RAW_FMT(args...) args
++
++#undef TRACE_EVENT_FORMAT
++#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
++static int								\
++ftrace_format_##call(struct trace_seq *s)				\
++{									\
++	struct args field;						\
++	int ret;							\
++									\
++	tstruct;							\
++									\
++	trace_seq_printf(s, "\nprint fmt: \"%s\"\n", tpfmt);		\
++									\
++	return ret;							\
++}
++
++#include "trace_event_types.h"
++
++#undef TRACE_ZERO_CHAR
++#define TRACE_ZERO_CHAR(arg)
++
++#undef TRACE_FIELD
++#define TRACE_FIELD(type, item, assign)\
++	entry->item = assign;
++
++#undef TRACE_FIELD
++#define TRACE_FIELD(type, item, assign)\
++	entry->item = assign;
++
++#undef TP_CMD
++#define TP_CMD(cmd...)	cmd
++
++#undef TRACE_ENTRY
++#define TRACE_ENTRY	entry
++
++#undef TRACE_FIELD_SPECIAL
++#define TRACE_FIELD_SPECIAL(type_item, item, cmd) \
++	cmd;
++
++#undef TRACE_EVENT_FORMAT
++#define TRACE_EVENT_FORMAT(call, proto, args, fmt, tstruct, tpfmt)	\
++									\
++static struct ftrace_event_call __used					\
++__attribute__((__aligned__(4)))						\
++__attribute__((section("_ftrace_events"))) event_##call = {		\
++	.name			= #call,				\
++	.id			= proto,				\
++	.system			= __stringify(TRACE_SYSTEM),		\
++	.show_format		= ftrace_format_##call,			\
++}
++#include "trace_event_types.h"
+Index: linux-2.6-tip/kernel/trace/trace_functions.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_functions.c
++++ linux-2.6-tip/kernel/trace/trace_functions.c
+@@ -9,6 +9,7 @@
+  *  Copyright (C) 2004-2006 Ingo Molnar
+  *  Copyright (C) 2004 William Lee Irwin III
+  */
++#include <linux/ring_buffer.h>
+ #include <linux/debugfs.h>
+ #include <linux/uaccess.h>
+ #include <linux/ftrace.h>
+@@ -16,52 +17,388 @@
+ 
+ #include "trace.h"
+ 
+-static void start_function_trace(struct trace_array *tr)
++/* function tracing enabled */
++static int			ftrace_function_enabled;
++
++static struct trace_array	*func_trace;
++
++static void tracing_start_function_trace(void);
++static void tracing_stop_function_trace(void);
++
++static int function_trace_init(struct trace_array *tr)
+ {
++	func_trace = tr;
+ 	tr->cpu = get_cpu();
+-	tracing_reset_online_cpus(tr);
+ 	put_cpu();
+ 
+ 	tracing_start_cmdline_record();
+ 	tracing_start_function_trace();
++	return 0;
+ }
+ 
+-static void stop_function_trace(struct trace_array *tr)
++static void function_trace_reset(struct trace_array *tr)
+ {
+ 	tracing_stop_function_trace();
+ 	tracing_stop_cmdline_record();
+ }
+ 
+-static int function_trace_init(struct trace_array *tr)
++static void function_trace_start(struct trace_array *tr)
+ {
+-	start_function_trace(tr);
+-	return 0;
++	tracing_reset_online_cpus(tr);
+ }
+ 
+-static void function_trace_reset(struct trace_array *tr)
++static void
++function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
++{
++	struct trace_array *tr = func_trace;
++	struct trace_array_cpu *data;
++	unsigned long flags;
++	long disabled;
++	int cpu, resched;
++	int pc;
++
++	if (unlikely(!ftrace_function_enabled))
++		return;
++
++	pc = preempt_count();
++	resched = ftrace_preempt_disable();
++	local_save_flags(flags);
++	cpu = raw_smp_processor_id();
++	data = tr->data[cpu];
++	disabled = atomic_inc_return(&data->disabled);
++
++	if (likely(disabled == 1))
++		trace_function(tr, ip, parent_ip, flags, pc);
++
++	atomic_dec(&data->disabled);
++	ftrace_preempt_enable(resched);
++}
++
++static void
++function_trace_call(unsigned long ip, unsigned long parent_ip)
+ {
+-	stop_function_trace(tr);
++	struct trace_array *tr = func_trace;
++	struct trace_array_cpu *data;
++	unsigned long flags;
++	long disabled;
++	int cpu;
++	int pc;
++
++	if (unlikely(!ftrace_function_enabled))
++		return;
++
++	/*
++	 * Need to use raw, since this must be called before the
++	 * recursive protection is performed.
++	 */
++	local_irq_save(flags);
++	cpu = raw_smp_processor_id();
++	data = tr->data[cpu];
++	disabled = atomic_inc_return(&data->disabled);
++
++	if (likely(disabled == 1)) {
++		pc = preempt_count();
++		trace_function(tr, ip, parent_ip, flags, pc);
++	}
++
++	atomic_dec(&data->disabled);
++	local_irq_restore(flags);
+ }
+ 
+-static void function_trace_start(struct trace_array *tr)
++static void
++function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
+ {
+-	tracing_reset_online_cpus(tr);
++	struct trace_array *tr = func_trace;
++	struct trace_array_cpu *data;
++	unsigned long flags;
++	long disabled;
++	int cpu;
++	int pc;
++
++	if (unlikely(!ftrace_function_enabled))
++		return;
++
++	/*
++	 * Need to use raw, since this must be called before the
++	 * recursive protection is performed.
++	 */
++	local_irq_save(flags);
++	cpu = raw_smp_processor_id();
++	data = tr->data[cpu];
++	disabled = atomic_inc_return(&data->disabled);
++
++	if (likely(disabled == 1)) {
++		pc = preempt_count();
++		trace_function(tr, ip, parent_ip, flags, pc);
++		/*
++		 * skip over 5 funcs:
++		 *    __ftrace_trace_stack,
++		 *    __trace_stack,
++		 *    function_stack_trace_call
++		 *    ftrace_list_func
++		 *    ftrace_call
++		 */
++		__trace_stack(tr, flags, 5, pc);
++	}
++
++	atomic_dec(&data->disabled);
++	local_irq_restore(flags);
++}
++
++
++static struct ftrace_ops trace_ops __read_mostly =
++{
++	.func = function_trace_call,
++};
++
++static struct ftrace_ops trace_stack_ops __read_mostly =
++{
++	.func = function_stack_trace_call,
++};
++
++/* Our two options */
++enum {
++	TRACE_FUNC_OPT_STACK = 0x1,
++};
++
++static struct tracer_opt func_opts[] = {
++#ifdef CONFIG_STACKTRACE
++	{ TRACER_OPT(func_stack_trace, TRACE_FUNC_OPT_STACK) },
++#endif
++	{ } /* Always set a last empty entry */
++};
++
++static struct tracer_flags func_flags = {
++	.val = 0, /* By default: all flags disabled */
++	.opts = func_opts
++};
++
++static void tracing_start_function_trace(void)
++{
++	ftrace_function_enabled = 0;
++
++	if (trace_flags & TRACE_ITER_PREEMPTONLY)
++		trace_ops.func = function_trace_call_preempt_only;
++	else
++		trace_ops.func = function_trace_call;
++
++	if (func_flags.val & TRACE_FUNC_OPT_STACK)
++		register_ftrace_function(&trace_stack_ops);
++	else
++		register_ftrace_function(&trace_ops);
++
++	ftrace_function_enabled = 1;
++}
++
++static void tracing_stop_function_trace(void)
++{
++	ftrace_function_enabled = 0;
++	/* OK if they are not registered */
++	unregister_ftrace_function(&trace_stack_ops);
++	unregister_ftrace_function(&trace_ops);
++}
++
++static int func_set_flag(u32 old_flags, u32 bit, int set)
++{
++	if (bit == TRACE_FUNC_OPT_STACK) {
++		/* do nothing if already set */
++		if (!!set == !!(func_flags.val & TRACE_FUNC_OPT_STACK))
++			return 0;
++
++		if (set) {
++			unregister_ftrace_function(&trace_ops);
++			register_ftrace_function(&trace_stack_ops);
++		} else {
++			unregister_ftrace_function(&trace_stack_ops);
++			register_ftrace_function(&trace_ops);
++		}
++
++		return 0;
++	}
++
++	return -EINVAL;
+ }
+ 
+ static struct tracer function_trace __read_mostly =
+ {
+-	.name	     = "function",
+-	.init	     = function_trace_init,
+-	.reset	     = function_trace_reset,
+-	.start	     = function_trace_start,
++	.name		= "function",
++	.init		= function_trace_init,
++	.reset		= function_trace_reset,
++	.start		= function_trace_start,
++	.wait_pipe	= poll_wait_pipe,
++	.flags		= &func_flags,
++	.set_flag	= func_set_flag,
+ #ifdef CONFIG_FTRACE_SELFTEST
+-	.selftest    = trace_selftest_startup_function,
++	.selftest	= trace_selftest_startup_function,
+ #endif
+ };
+ 
++#ifdef CONFIG_DYNAMIC_FTRACE
++static void
++ftrace_traceon(unsigned long ip, unsigned long parent_ip, void **data)
++{
++	long *count = (long *)data;
++
++	if (tracing_is_on())
++		return;
++
++	if (!*count)
++		return;
++
++	if (*count != -1)
++		(*count)--;
++
++	tracing_on();
++}
++
++static void
++ftrace_traceoff(unsigned long ip, unsigned long parent_ip, void **data)
++{
++	long *count = (long *)data;
++
++	if (!tracing_is_on())
++		return;
++
++	if (!*count)
++		return;
++
++	if (*count != -1)
++		(*count)--;
++
++	tracing_off();
++}
++
++static int
++ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
++			 struct ftrace_probe_ops *ops, void *data);
++
++static struct ftrace_probe_ops traceon_probe_ops = {
++	.func			= ftrace_traceon,
++	.print			= ftrace_trace_onoff_print,
++};
++
++static struct ftrace_probe_ops traceoff_probe_ops = {
++	.func			= ftrace_traceoff,
++	.print			= ftrace_trace_onoff_print,
++};
++
++static int
++ftrace_trace_onoff_print(struct seq_file *m, unsigned long ip,
++			 struct ftrace_probe_ops *ops, void *data)
++{
++	char str[KSYM_SYMBOL_LEN];
++	long count = (long)data;
++
++	kallsyms_lookup(ip, NULL, NULL, NULL, str);
++	seq_printf(m, "%s:", str);
++
++	if (ops == &traceon_probe_ops)
++		seq_printf(m, "traceon");
++	else
++		seq_printf(m, "traceoff");
++
++	if (count == -1)
++		seq_printf(m, ":unlimited\n");
++	else
++		seq_printf(m, ":count=%ld", count);
++	seq_putc(m, '\n');
++
++	return 0;
++}
++
++static int
++ftrace_trace_onoff_unreg(char *glob, char *cmd, char *param)
++{
++	struct ftrace_probe_ops *ops;
++
++	/* we register both traceon and traceoff to this callback */
++	if (strcmp(cmd, "traceon") == 0)
++		ops = &traceon_probe_ops;
++	else
++		ops = &traceoff_probe_ops;
++
++	unregister_ftrace_function_probe_func(glob, ops);
++
++	return 0;
++}
++
++static int
++ftrace_trace_onoff_callback(char *glob, char *cmd, char *param, int enable)
++{
++	struct ftrace_probe_ops *ops;
++	void *count = (void *)-1;
++	char *number;
++	int ret;
++
++	/* hash funcs only work with set_ftrace_filter */
++	if (!enable)
++		return -EINVAL;
++
++	if (glob[0] == '!')
++		return ftrace_trace_onoff_unreg(glob+1, cmd, param);
++
++	/* we register both traceon and traceoff to this callback */
++	if (strcmp(cmd, "traceon") == 0)
++		ops = &traceon_probe_ops;
++	else
++		ops = &traceoff_probe_ops;
++
++	if (!param)
++		goto out_reg;
++
++	number = strsep(&param, ":");
++
++	if (!strlen(number))
++		goto out_reg;
++
++	/*
++	 * We use the callback data field (which is a pointer)
++	 * as our counter.
++	 */
++	ret = strict_strtoul(number, 0, (unsigned long *)&count);
++	if (ret)
++		return ret;
++
++ out_reg:
++	ret = register_ftrace_function_probe(glob, ops, count);
++
++	return ret;
++}
++
++static struct ftrace_func_command ftrace_traceon_cmd = {
++	.name			= "traceon",
++	.func			= ftrace_trace_onoff_callback,
++};
++
++static struct ftrace_func_command ftrace_traceoff_cmd = {
++	.name			= "traceoff",
++	.func			= ftrace_trace_onoff_callback,
++};
++
++static int __init init_func_cmd_traceon(void)
++{
++	int ret;
++
++	ret = register_ftrace_command(&ftrace_traceoff_cmd);
++	if (ret)
++		return ret;
++
++	ret = register_ftrace_command(&ftrace_traceon_cmd);
++	if (ret)
++		unregister_ftrace_command(&ftrace_traceoff_cmd);
++	return ret;
++}
++#else
++static inline int init_func_cmd_traceon(void)
++{
++	return 0;
++}
++#endif /* CONFIG_DYNAMIC_FTRACE */
++
+ static __init int init_function_trace(void)
+ {
++	init_func_cmd_traceon();
+ 	return register_tracer(&function_trace);
+ }
+-
+ device_initcall(init_function_trace);
++
+Index: linux-2.6-tip/kernel/trace/trace_functions_graph.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_functions_graph.c
++++ linux-2.6-tip/kernel/trace/trace_functions_graph.c
+@@ -1,7 +1,7 @@
+ /*
+  *
+  * Function graph tracer.
+- * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
++ * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
+  * Mostly borrowed from function tracer which
+  * is Copyright (c) Steven Rostedt <srostedt@redhat.com>
+  *
+@@ -12,6 +12,12 @@
+ #include <linux/fs.h>
+ 
+ #include "trace.h"
++#include "trace_output.h"
++
++struct fgraph_data {
++	pid_t		last_pid;
++	int		depth;
++};
+ 
+ #define TRACE_GRAPH_INDENT	2
+ 
+@@ -20,9 +26,11 @@
+ #define TRACE_GRAPH_PRINT_CPU		0x2
+ #define TRACE_GRAPH_PRINT_OVERHEAD	0x4
+ #define TRACE_GRAPH_PRINT_PROC		0x8
++#define TRACE_GRAPH_PRINT_DURATION	0x10
++#define TRACE_GRAPH_PRINT_ABS_TIME	0X20
+ 
+ static struct tracer_opt trace_opts[] = {
+-	/* Display overruns ? */
++	/* Display overruns? (for self-debug purpose) */
+ 	{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+ 	/* Display CPU ? */
+ 	{ TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
+@@ -30,26 +38,103 @@ static struct tracer_opt trace_opts[] = 
+ 	{ TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
+ 	/* Display proc name/pid */
+ 	{ TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
++	/* Display duration of execution */
++	{ TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
++	/* Display absolute time of an entry */
++	{ TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
+ 	{ } /* Empty entry */
+ };
+ 
+ static struct tracer_flags tracer_flags = {
+ 	/* Don't display overruns and proc by default */
+-	.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
++	.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
++	       TRACE_GRAPH_PRINT_DURATION,
+ 	.opts = trace_opts
+ };
+ 
+ /* pid on the last trace processed */
+-static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
+ 
+-static int graph_trace_init(struct trace_array *tr)
++
++/* Add a function return address to the trace stack on thread info.*/
++int
++ftrace_push_return_trace(unsigned long ret, unsigned long func, int *depth)
++{
++	unsigned long long calltime;
++	int index;
++
++	if (!current->ret_stack)
++		return -EBUSY;
++
++	/* The return trace stack is full */
++	if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
++		atomic_inc(&current->trace_overrun);
++		return -EBUSY;
++	}
++
++	calltime = trace_clock_local();
++
++	index = ++current->curr_ret_stack;
++	barrier();
++	current->ret_stack[index].ret = ret;
++	current->ret_stack[index].func = func;
++	current->ret_stack[index].calltime = calltime;
++	current->ret_stack[index].subtime = 0;
++	*depth = index;
++
++	return 0;
++}
++
++/* Retrieve a function return address to the trace stack on thread info.*/
++static void
++ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret)
++{
++	int index;
++
++	index = current->curr_ret_stack;
++
++	if (unlikely(index < 0)) {
++		ftrace_graph_stop();
++		WARN_ON(1);
++		/* Might as well panic, otherwise we have no where to go */
++		*ret = (unsigned long)panic;
++		return;
++	}
++
++	*ret = current->ret_stack[index].ret;
++	trace->func = current->ret_stack[index].func;
++	trace->calltime = current->ret_stack[index].calltime;
++	trace->overrun = atomic_read(&current->trace_overrun);
++	trace->depth = index;
++}
++
++/*
++ * Send the trace to the ring-buffer.
++ * @return the original return address.
++ */
++unsigned long ftrace_return_to_handler(void)
+ {
+-	int cpu, ret;
++	struct ftrace_graph_ret trace;
++	unsigned long ret;
++
++	ftrace_pop_return_trace(&trace, &ret);
++	trace.rettime = trace_clock_local();
++	ftrace_graph_return(&trace);
++	barrier();
++	current->curr_ret_stack--;
++
++	if (unlikely(!ret)) {
++		ftrace_graph_stop();
++		WARN_ON(1);
++		/* Might as well panic. What else to do? */
++		ret = (unsigned long)panic;
++	}
+ 
+-	for_each_online_cpu(cpu)
+-		tracing_reset(tr, cpu);
++	return ret;
++}
+ 
+-	ret = register_ftrace_graph(&trace_graph_return,
++static int graph_trace_init(struct trace_array *tr)
++{
++	int ret = register_ftrace_graph(&trace_graph_return,
+ 					&trace_graph_entry);
+ 	if (ret)
+ 		return ret;
+@@ -112,15 +197,15 @@ print_graph_cpu(struct trace_seq *s, int
+ static enum print_line_t
+ print_graph_proc(struct trace_seq *s, pid_t pid)
+ {
+-	int i;
+-	int ret;
+-	int len;
+-	char comm[8];
+-	int spaces = 0;
++	char comm[TASK_COMM_LEN];
+ 	/* sign + log10(MAX_INT) + '\0' */
+ 	char pid_str[11];
++	int spaces = 0;
++	int ret;
++	int len;
++	int i;
+ 
+-	strncpy(comm, trace_find_cmdline(pid), 7);
++	trace_find_cmdline(pid, comm);
+ 	comm[7] = '\0';
+ 	sprintf(pid_str, "%d", pid);
+ 
+@@ -153,17 +238,25 @@ print_graph_proc(struct trace_seq *s, pi
+ 
+ /* If the pid changed since the last trace, output this event */
+ static enum print_line_t
+-verif_pid(struct trace_seq *s, pid_t pid, int cpu)
++verif_pid(struct trace_seq *s, pid_t pid, int cpu, struct fgraph_data *data)
+ {
+ 	pid_t prev_pid;
++	pid_t *last_pid;
+ 	int ret;
+ 
+-	if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
++	if (!data)
++		return TRACE_TYPE_HANDLED;
++
++	last_pid = &(per_cpu_ptr(data, cpu)->last_pid);
++
++	if (*last_pid == pid)
+ 		return TRACE_TYPE_HANDLED;
+ 
+-	prev_pid = last_pid[cpu];
+-	last_pid[cpu] = pid;
++	prev_pid = *last_pid;
++	*last_pid = pid;
+ 
++	if (prev_pid == -1)
++		return TRACE_TYPE_HANDLED;
+ /*
+  * Context-switch trace line:
+ 
+@@ -175,34 +268,34 @@ verif_pid(struct trace_seq *s, pid_t pid
+ 	ret = trace_seq_printf(s,
+ 		" ------------------------------------------\n");
+ 	if (!ret)
+-		TRACE_TYPE_PARTIAL_LINE;
++		return TRACE_TYPE_PARTIAL_LINE;
+ 
+ 	ret = print_graph_cpu(s, cpu);
+ 	if (ret == TRACE_TYPE_PARTIAL_LINE)
+-		TRACE_TYPE_PARTIAL_LINE;
++		return TRACE_TYPE_PARTIAL_LINE;
+ 
+ 	ret = print_graph_proc(s, prev_pid);
+ 	if (ret == TRACE_TYPE_PARTIAL_LINE)
+-		TRACE_TYPE_PARTIAL_LINE;
++		return TRACE_TYPE_PARTIAL_LINE;
+ 
+ 	ret = trace_seq_printf(s, " => ");
+ 	if (!ret)
+-		TRACE_TYPE_PARTIAL_LINE;
++		return TRACE_TYPE_PARTIAL_LINE;
+ 
+ 	ret = print_graph_proc(s, pid);
+ 	if (ret == TRACE_TYPE_PARTIAL_LINE)
+-		TRACE_TYPE_PARTIAL_LINE;
++		return TRACE_TYPE_PARTIAL_LINE;
+ 
+ 	ret = trace_seq_printf(s,
+ 		"\n ------------------------------------------\n\n");
+ 	if (!ret)
+-		TRACE_TYPE_PARTIAL_LINE;
++		return TRACE_TYPE_PARTIAL_LINE;
+ 
+-	return ret;
++	return TRACE_TYPE_HANDLED;
+ }
+ 
+-static bool
+-trace_branch_is_leaf(struct trace_iterator *iter,
++static struct ftrace_graph_ret_entry *
++get_return_for_leaf(struct trace_iterator *iter,
+ 		struct ftrace_graph_ent_entry *curr)
+ {
+ 	struct ring_buffer_iter *ring_iter;
+@@ -211,72 +304,130 @@ trace_branch_is_leaf(struct trace_iterat
+ 
+ 	ring_iter = iter->buffer_iter[iter->cpu];
+ 
+-	if (!ring_iter)
+-		return false;
+-
+-	event = ring_buffer_iter_peek(ring_iter, NULL);
++	/* First peek to compare current entry and the next one */
++	if (ring_iter)
++		event = ring_buffer_iter_peek(ring_iter, NULL);
++	else {
++	/* We need to consume the current entry to see the next one */
++		ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
++		event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
++					NULL);
++	}
+ 
+ 	if (!event)
+-		return false;
++		return NULL;
+ 
+ 	next = ring_buffer_event_data(event);
+ 
+ 	if (next->ent.type != TRACE_GRAPH_RET)
+-		return false;
++		return NULL;
+ 
+ 	if (curr->ent.pid != next->ent.pid ||
+ 			curr->graph_ent.func != next->ret.func)
+-		return false;
++		return NULL;
++
++	/* this is a leaf, now advance the iterator */
++	if (ring_iter)
++		ring_buffer_read(ring_iter, NULL);
++
++	return next;
++}
++
++/* Signal a overhead of time execution to the output */
++static int
++print_graph_overhead(unsigned long long duration, struct trace_seq *s)
++{
++	/* If duration disappear, we don't need anything */
++	if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION))
++		return 1;
++
++	/* Non nested entry or return */
++	if (duration == -1)
++		return trace_seq_printf(s, "  ");
++
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
++		/* Duration exceeded 100 msecs */
++		if (duration > 100000ULL)
++			return trace_seq_printf(s, "! ");
++
++		/* Duration exceeded 10 msecs */
++		if (duration > 10000ULL)
++			return trace_seq_printf(s, "+ ");
++	}
++
++	return trace_seq_printf(s, "  ");
++}
++
++static int print_graph_abs_time(u64 t, struct trace_seq *s)
++{
++	unsigned long usecs_rem;
++
++	usecs_rem = do_div(t, NSEC_PER_SEC);
++	usecs_rem /= 1000;
+ 
+-	return true;
++	return trace_seq_printf(s, "%5lu.%06lu |  ",
++			(unsigned long)t, usecs_rem);
+ }
+ 
+ static enum print_line_t
+-print_graph_irq(struct trace_seq *s, unsigned long addr,
+-				enum trace_type type, int cpu, pid_t pid)
++print_graph_irq(struct trace_iterator *iter, unsigned long addr,
++		enum trace_type type, int cpu, pid_t pid)
+ {
+ 	int ret;
++	struct trace_seq *s = &iter->seq;
+ 
+ 	if (addr < (unsigned long)__irqentry_text_start ||
+ 		addr >= (unsigned long)__irqentry_text_end)
+ 		return TRACE_TYPE_UNHANDLED;
+ 
+-	if (type == TRACE_GRAPH_ENT) {
+-		ret = trace_seq_printf(s, "==========> |  ");
+-	} else {
+-		/* Cpu */
+-		if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+-			ret = print_graph_cpu(s, cpu);
+-			if (ret == TRACE_TYPE_PARTIAL_LINE)
+-				return TRACE_TYPE_PARTIAL_LINE;
+-		}
+-		/* Proc */
+-		if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
+-			ret = print_graph_proc(s, pid);
+-			if (ret == TRACE_TYPE_PARTIAL_LINE)
+-				return TRACE_TYPE_PARTIAL_LINE;
++	/* Absolute time */
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
++		ret = print_graph_abs_time(iter->ts, s);
++		if (!ret)
++			return TRACE_TYPE_PARTIAL_LINE;
++	}
+ 
+-			ret = trace_seq_printf(s, " | ");
+-			if (!ret)
+-				return TRACE_TYPE_PARTIAL_LINE;
+-		}
++	/* Cpu */
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
++		ret = print_graph_cpu(s, cpu);
++		if (ret == TRACE_TYPE_PARTIAL_LINE)
++			return TRACE_TYPE_PARTIAL_LINE;
++	}
++	/* Proc */
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
++		ret = print_graph_proc(s, pid);
++		if (ret == TRACE_TYPE_PARTIAL_LINE)
++			return TRACE_TYPE_PARTIAL_LINE;
++		ret = trace_seq_printf(s, " | ");
++		if (!ret)
++			return TRACE_TYPE_PARTIAL_LINE;
++	}
+ 
+-		/* No overhead */
+-		if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+-			ret = trace_seq_printf(s, "  ");
+-			if (!ret)
+-				return TRACE_TYPE_PARTIAL_LINE;
+-		}
++	/* No overhead */
++	ret = print_graph_overhead(-1, s);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	if (type == TRACE_GRAPH_ENT)
++		ret = trace_seq_printf(s, "==========>");
++	else
++		ret = trace_seq_printf(s, "<==========");
++
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* Don't close the duration column if haven't one */
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
++		trace_seq_printf(s, " |");
++	ret = trace_seq_printf(s, "\n");
+ 
+-		ret = trace_seq_printf(s, "<========== |\n");
+-	}
+ 	if (!ret)
+ 		return TRACE_TYPE_PARTIAL_LINE;
+ 	return TRACE_TYPE_HANDLED;
+ }
+ 
+-static enum print_line_t
+-print_graph_duration(unsigned long long duration, struct trace_seq *s)
++enum print_line_t
++trace_print_graph_duration(unsigned long long duration, struct trace_seq *s)
+ {
+ 	unsigned long nsecs_rem = do_div(duration, 1000);
+ 	/* log10(ULONG_MAX) + '\0' */
+@@ -285,10 +436,10 @@ print_graph_duration(unsigned long long 
+ 	int ret, len;
+ 	int i;
+ 
+-	sprintf(msecs_str, "%lu", (unsigned long) duration);
++	snprintf(msecs_str, sizeof(msecs_str), "%lu", (unsigned long) duration);
+ 
+ 	/* Print msecs */
+-	ret = trace_seq_printf(s, msecs_str);
++	ret = trace_seq_printf(s, "%s", msecs_str);
+ 	if (!ret)
+ 		return TRACE_TYPE_PARTIAL_LINE;
+ 
+@@ -313,60 +464,66 @@ print_graph_duration(unsigned long long 
+ 		if (!ret)
+ 			return TRACE_TYPE_PARTIAL_LINE;
+ 	}
+-
+-	ret = trace_seq_printf(s, "|  ");
+-	if (!ret)
+-		return TRACE_TYPE_PARTIAL_LINE;
+ 	return TRACE_TYPE_HANDLED;
+-
+ }
+ 
+-/* Signal a overhead of time execution to the output */
+-static int
+-print_graph_overhead(unsigned long long duration, struct trace_seq *s)
++static enum print_line_t
++print_graph_duration(unsigned long long duration, struct trace_seq *s)
+ {
+-	/* Duration exceeded 100 msecs */
+-	if (duration > 100000ULL)
+-		return trace_seq_printf(s, "! ");
+-
+-	/* Duration exceeded 10 msecs */
+-	if (duration > 10000ULL)
+-		return trace_seq_printf(s, "+ ");
++	int ret;
+ 
+-	return trace_seq_printf(s, "  ");
++	ret = trace_print_graph_duration(duration, s);
++	if (ret != TRACE_TYPE_HANDLED)
++		return ret;
++
++	ret = trace_seq_printf(s, "|  ");
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
+ }
+ 
+ /* Case of a leaf function on its call entry */
+ static enum print_line_t
+ print_graph_entry_leaf(struct trace_iterator *iter,
+-		struct ftrace_graph_ent_entry *entry, struct trace_seq *s)
++		struct ftrace_graph_ent_entry *entry,
++		struct ftrace_graph_ret_entry *ret_entry, struct trace_seq *s)
+ {
+-	struct ftrace_graph_ret_entry *ret_entry;
++	struct fgraph_data *data = iter->private;
+ 	struct ftrace_graph_ret *graph_ret;
+-	struct ring_buffer_event *event;
+ 	struct ftrace_graph_ent *call;
+ 	unsigned long long duration;
+ 	int ret;
+ 	int i;
+ 
+-	event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
+-	ret_entry = ring_buffer_event_data(event);
+ 	graph_ret = &ret_entry->ret;
+ 	call = &entry->graph_ent;
+ 	duration = graph_ret->rettime - graph_ret->calltime;
+ 
+-	/* Overhead */
+-	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+-		ret = print_graph_overhead(duration, s);
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
++	if (data) {
++		int cpu = iter->cpu;
++		int *depth = &(per_cpu_ptr(data, cpu)->depth);
++
++		/*
++		 * Comments display at + 1 to depth. Since
++		 * this is a leaf function, keep the comments
++		 * equal to this depth.
++		 */
++		*depth = call->depth - 1;
+ 	}
+ 
+-	/* Duration */
+-	ret = print_graph_duration(duration, s);
+-	if (ret == TRACE_TYPE_PARTIAL_LINE)
++	/* Overhead */
++	ret = print_graph_overhead(duration, s);
++	if (!ret)
+ 		return TRACE_TYPE_PARTIAL_LINE;
+ 
++	/* Duration */
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
++		ret = print_graph_duration(duration, s);
++		if (ret == TRACE_TYPE_PARTIAL_LINE)
++			return TRACE_TYPE_PARTIAL_LINE;
++	}
++
+ 	/* Function */
+ 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
+ 		ret = trace_seq_printf(s, " ");
+@@ -386,33 +543,34 @@ print_graph_entry_leaf(struct trace_iter
+ }
+ 
+ static enum print_line_t
+-print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
+-			struct trace_seq *s, pid_t pid, int cpu)
++print_graph_entry_nested(struct trace_iterator *iter,
++			 struct ftrace_graph_ent_entry *entry,
++			 struct trace_seq *s, int cpu)
+ {
+-	int i;
+-	int ret;
+ 	struct ftrace_graph_ent *call = &entry->graph_ent;
++	struct fgraph_data *data = iter->private;
++	int ret;
++	int i;
+ 
+-	/* No overhead */
+-	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+-		ret = trace_seq_printf(s, "  ");
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
++	if (data) {
++		int cpu = iter->cpu;
++		int *depth = &(per_cpu_ptr(data, cpu)->depth);
++
++		*depth = call->depth;
+ 	}
+ 
+-	/* Interrupt */
+-	ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid);
+-	if (ret == TRACE_TYPE_UNHANDLED) {
+-		/* No time */
++	/* No overhead */
++	ret = print_graph_overhead(-1, s);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* No time */
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
+ 		ret = trace_seq_printf(s, "            |  ");
+ 		if (!ret)
+ 			return TRACE_TYPE_PARTIAL_LINE;
+-	} else {
+-		if (ret == TRACE_TYPE_PARTIAL_LINE)
+-			return TRACE_TYPE_PARTIAL_LINE;
+ 	}
+ 
+-
+ 	/* Function */
+ 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
+ 		ret = trace_seq_printf(s, " ");
+@@ -428,20 +586,40 @@ print_graph_entry_nested(struct ftrace_g
+ 	if (!ret)
+ 		return TRACE_TYPE_PARTIAL_LINE;
+ 
+-	return TRACE_TYPE_HANDLED;
++	/*
++	 * we already consumed the current entry to check the next one
++	 * and see if this is a leaf.
++	 */
++	return TRACE_TYPE_NO_CONSUME;
+ }
+ 
+ static enum print_line_t
+-print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
+-			struct trace_iterator *iter, int cpu)
++print_graph_prologue(struct trace_iterator *iter, struct trace_seq *s,
++		     int type, unsigned long addr)
+ {
+-	int ret;
++	struct fgraph_data *data = iter->private;
+ 	struct trace_entry *ent = iter->ent;
++	int cpu = iter->cpu;
++	int ret;
+ 
+ 	/* Pid */
+-	if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
++	if (verif_pid(s, ent->pid, cpu, data) == TRACE_TYPE_PARTIAL_LINE)
+ 		return TRACE_TYPE_PARTIAL_LINE;
+ 
++	if (type) {
++		/* Interrupt */
++		ret = print_graph_irq(iter, addr, type, cpu, ent->pid);
++		if (ret == TRACE_TYPE_PARTIAL_LINE)
++			return TRACE_TYPE_PARTIAL_LINE;
++	}
++
++	/* Absolute time */
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
++		ret = print_graph_abs_time(iter->ts, s);
++		if (!ret)
++			return TRACE_TYPE_PARTIAL_LINE;
++	}
++
+ 	/* Cpu */
+ 	if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ 		ret = print_graph_cpu(s, cpu);
+@@ -460,54 +638,65 @@ print_graph_entry(struct ftrace_graph_en
+ 			return TRACE_TYPE_PARTIAL_LINE;
+ 	}
+ 
+-	if (trace_branch_is_leaf(iter, field))
+-		return print_graph_entry_leaf(iter, field, s);
++	return 0;
++}
++
++static enum print_line_t
++print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
++			struct trace_iterator *iter)
++{
++	int cpu = iter->cpu;
++	struct ftrace_graph_ent *call = &field->graph_ent;
++	struct ftrace_graph_ret_entry *leaf_ret;
++
++	if (print_graph_prologue(iter, s, TRACE_GRAPH_ENT, call->func))
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	leaf_ret = get_return_for_leaf(iter, field);
++	if (leaf_ret)
++		return print_graph_entry_leaf(iter, field, leaf_ret, s);
+ 	else
+-		return print_graph_entry_nested(field, s, iter->ent->pid, cpu);
++		return print_graph_entry_nested(iter, field, s, cpu);
+ 
+ }
+ 
+ static enum print_line_t
+ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
+-		   struct trace_entry *ent, int cpu)
++		   struct trace_entry *ent, struct trace_iterator *iter)
+ {
+-	int i;
+-	int ret;
+ 	unsigned long long duration = trace->rettime - trace->calltime;
++	struct fgraph_data *data = iter->private;
++	pid_t pid = ent->pid;
++	int cpu = iter->cpu;
++	int ret;
++	int i;
+ 
+-	/* Pid */
+-	if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
+-		return TRACE_TYPE_PARTIAL_LINE;
+-
+-	/* Cpu */
+-	if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+-		ret = print_graph_cpu(s, cpu);
+-		if (ret == TRACE_TYPE_PARTIAL_LINE)
+-			return TRACE_TYPE_PARTIAL_LINE;
++	if (data) {
++		int cpu = iter->cpu;
++		int *depth = &(per_cpu_ptr(data, cpu)->depth);
++
++		/*
++		 * Comments display at + 1 to depth. This is the
++		 * return from a function, we now want the comments
++		 * to display at the same level of the bracket.
++		 */
++		*depth = trace->depth - 1;
+ 	}
+ 
+-	/* Proc */
+-	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
+-		ret = print_graph_proc(s, ent->pid);
+-		if (ret == TRACE_TYPE_PARTIAL_LINE)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-
+-		ret = trace_seq_printf(s, " | ");
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-	}
++	if (print_graph_prologue(iter, s, 0, 0))
++		return TRACE_TYPE_PARTIAL_LINE;
+ 
+ 	/* Overhead */
+-	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+-		ret = print_graph_overhead(duration, s);
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-	}
++	ret = print_graph_overhead(duration, s);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
+ 
+ 	/* Duration */
+-	ret = print_graph_duration(duration, s);
+-	if (ret == TRACE_TYPE_PARTIAL_LINE)
+-		return TRACE_TYPE_PARTIAL_LINE;
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
++		ret = print_graph_duration(duration, s);
++		if (ret == TRACE_TYPE_PARTIAL_LINE)
++			return TRACE_TYPE_PARTIAL_LINE;
++	}
+ 
+ 	/* Closing brace */
+ 	for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
+@@ -528,7 +717,7 @@ print_graph_return(struct ftrace_graph_r
+ 			return TRACE_TYPE_PARTIAL_LINE;
+ 	}
+ 
+-	ret = print_graph_irq(s, trace->func, TRACE_GRAPH_RET, cpu, ent->pid);
++	ret = print_graph_irq(iter, trace->func, TRACE_GRAPH_RET, cpu, pid);
+ 	if (ret == TRACE_TYPE_PARTIAL_LINE)
+ 		return TRACE_TYPE_PARTIAL_LINE;
+ 
+@@ -536,61 +725,73 @@ print_graph_return(struct ftrace_graph_r
+ }
+ 
+ static enum print_line_t
+-print_graph_comment(struct print_entry *trace, struct trace_seq *s,
+-		   struct trace_entry *ent, struct trace_iterator *iter)
++print_graph_comment(struct trace_seq *s,  struct trace_entry *ent,
++		    struct trace_iterator *iter)
+ {
+-	int i;
++	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
++	struct fgraph_data *data = iter->private;
++	struct trace_event *event;
++	int depth = 0;
+ 	int ret;
++	int i;
+ 
+-	/* Pid */
+-	if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE)
+-		return TRACE_TYPE_PARTIAL_LINE;
+-
+-	/* Cpu */
+-	if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+-		ret = print_graph_cpu(s, iter->cpu);
+-		if (ret == TRACE_TYPE_PARTIAL_LINE)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-	}
+-
+-	/* Proc */
+-	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
+-		ret = print_graph_proc(s, ent->pid);
+-		if (ret == TRACE_TYPE_PARTIAL_LINE)
+-			return TRACE_TYPE_PARTIAL_LINE;
++	if (data)
++		depth = per_cpu_ptr(data, iter->cpu)->depth;
+ 
+-		ret = trace_seq_printf(s, " | ");
+-		if (!ret)
+-			return TRACE_TYPE_PARTIAL_LINE;
+-	}
++	if (print_graph_prologue(iter, s, 0, 0))
++		return TRACE_TYPE_PARTIAL_LINE;
+ 
+ 	/* No overhead */
+-	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+-		ret = trace_seq_printf(s, "  ");
++	ret = print_graph_overhead(-1, s);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	/* No time */
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
++		ret = trace_seq_printf(s, "            |  ");
+ 		if (!ret)
+ 			return TRACE_TYPE_PARTIAL_LINE;
+ 	}
+ 
+-	/* No time */
+-	ret = trace_seq_printf(s, "            |  ");
+-	if (!ret)
+-		return TRACE_TYPE_PARTIAL_LINE;
+-
+ 	/* Indentation */
+-	if (trace->depth > 0)
+-		for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
++	if (depth > 0)
++		for (i = 0; i < (depth + 1) * TRACE_GRAPH_INDENT; i++) {
+ 			ret = trace_seq_printf(s, " ");
+ 			if (!ret)
+ 				return TRACE_TYPE_PARTIAL_LINE;
+ 		}
+ 
+ 	/* The comment */
+-	ret = trace_seq_printf(s, "/* %s", trace->buf);
++	ret = trace_seq_printf(s, "/* ");
+ 	if (!ret)
+ 		return TRACE_TYPE_PARTIAL_LINE;
+ 
+-	if (ent->flags & TRACE_FLAG_CONT)
+-		trace_seq_print_cont(s, iter);
++	switch (iter->ent->type) {
++	case TRACE_BPRINT:
++		ret = trace_print_bprintk_msg_only(iter);
++		if (ret != TRACE_TYPE_HANDLED)
++			return ret;
++		break;
++	case TRACE_PRINT:
++		ret = trace_print_printk_msg_only(iter);
++		if (ret != TRACE_TYPE_HANDLED)
++			return ret;
++		break;
++	default:
++		event = ftrace_find_event(ent->type);
++		if (!event)
++			return TRACE_TYPE_UNHANDLED;
++
++		ret = event->trace(iter, sym_flags);
++		if (ret != TRACE_TYPE_HANDLED)
++			return ret;
++	}
++
++	/* Strip ending newline */
++	if (s->buffer[s->len - 1] == '\n') {
++		s->buffer[s->len - 1] = '\0';
++		s->len--;
++	}
+ 
+ 	ret = trace_seq_printf(s, " */\n");
+ 	if (!ret)
+@@ -603,62 +804,91 @@ print_graph_comment(struct print_entry *
+ enum print_line_t
+ print_graph_function(struct trace_iterator *iter)
+ {
+-	struct trace_seq *s = &iter->seq;
+ 	struct trace_entry *entry = iter->ent;
++	struct trace_seq *s = &iter->seq;
+ 
+ 	switch (entry->type) {
+ 	case TRACE_GRAPH_ENT: {
+ 		struct ftrace_graph_ent_entry *field;
+ 		trace_assign_type(field, entry);
+-		return print_graph_entry(field, s, iter,
+-					 iter->cpu);
++		return print_graph_entry(field, s, iter);
+ 	}
+ 	case TRACE_GRAPH_RET: {
+ 		struct ftrace_graph_ret_entry *field;
+ 		trace_assign_type(field, entry);
+-		return print_graph_return(&field->ret, s, entry, iter->cpu);
+-	}
+-	case TRACE_PRINT: {
+-		struct print_entry *field;
+-		trace_assign_type(field, entry);
+-		return print_graph_comment(field, s, entry, iter);
++		return print_graph_return(&field->ret, s, entry, iter);
+ 	}
+ 	default:
+-		return TRACE_TYPE_UNHANDLED;
++		return print_graph_comment(s, entry, iter);
+ 	}
++
++	return TRACE_TYPE_HANDLED;
+ }
+ 
+ static void print_graph_headers(struct seq_file *s)
+ {
+ 	/* 1st line */
+ 	seq_printf(s, "# ");
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
++		seq_printf(s, "     TIME       ");
+ 	if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
+-		seq_printf(s, "CPU ");
++		seq_printf(s, "CPU");
+ 	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
+-		seq_printf(s, "TASK/PID     ");
+-	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD)
+-		seq_printf(s, "OVERHEAD/");
+-	seq_printf(s, "DURATION            FUNCTION CALLS\n");
++		seq_printf(s, "  TASK/PID      ");
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
++		seq_printf(s, "  DURATION   ");
++	seq_printf(s, "               FUNCTION CALLS\n");
+ 
+ 	/* 2nd line */
+ 	seq_printf(s, "# ");
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
++		seq_printf(s, "      |         ");
+ 	if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
+-		seq_printf(s, "|   ");
++		seq_printf(s, "|  ");
+ 	if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
+-		seq_printf(s, "|      |     ");
+-	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+-		seq_printf(s, "|        ");
+-		seq_printf(s, "|                   |   |   |   |\n");
+-	} else
+-		seq_printf(s, "    |               |   |   |   |\n");
++		seq_printf(s, "  |    |        ");
++	if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
++		seq_printf(s, "   |   |      ");
++	seq_printf(s, "               |   |   |   |\n");
++}
++
++static void graph_trace_open(struct trace_iterator *iter)
++{
++	/* pid and depth on the last trace processed */
++	struct fgraph_data *data = alloc_percpu(struct fgraph_data);
++	int cpu;
++
++	if (!data)
++		pr_warning("function graph tracer: not enough memory\n");
++	else
++		for_each_possible_cpu(cpu) {
++			pid_t *pid = &(per_cpu_ptr(data, cpu)->last_pid);
++			int *depth = &(per_cpu_ptr(data, cpu)->depth);
++			*pid = -1;
++			*depth = 0;
++		}
++
++	iter->private = data;
+ }
++
++static void graph_trace_close(struct trace_iterator *iter)
++{
++	free_percpu(iter->private);
++}
++
+ static struct tracer graph_trace __read_mostly = {
+-	.name	     	= "function_graph",
+-	.init	     	= graph_trace_init,
+-	.reset	     	= graph_trace_reset,
++	.name		= "function_graph",
++	.open		= graph_trace_open,
++	.close		= graph_trace_close,
++	.wait_pipe	= poll_wait_pipe,
++	.init		= graph_trace_init,
++	.reset		= graph_trace_reset,
+ 	.print_line	= print_graph_function,
+ 	.print_header	= print_graph_headers,
+ 	.flags		= &tracer_flags,
++#ifdef CONFIG_FTRACE_SELFTEST
++	.selftest	= trace_selftest_startup_function_graph,
++#endif
+ };
+ 
+ static __init int init_graph_trace(void)
+Index: linux-2.6-tip/kernel/trace/trace_hw_branches.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_hw_branches.c
++++ linux-2.6-tip/kernel/trace/trace_hw_branches.c
+@@ -1,30 +1,53 @@
+ /*
+  * h/w branch tracer for x86 based on bts
+  *
+- * Copyright (C) 2008 Markus Metzger <markus.t.metzger@gmail.com>
+- *
++ * Copyright (C) 2008-2009 Intel Corporation.
++ * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
+  */
+-
+-#include <linux/module.h>
+-#include <linux/fs.h>
++#include <linux/spinlock.h>
++#include <linux/kallsyms.h>
+ #include <linux/debugfs.h>
+ #include <linux/ftrace.h>
+-#include <linux/kallsyms.h>
++#include <linux/module.h>
++#include <linux/cpu.h>
++#include <linux/smp.h>
++#include <linux/fs.h>
+ 
+ #include <asm/ds.h>
+ 
+ #include "trace.h"
++#include "trace_output.h"
+ 
+ 
+ #define SIZEOF_BTS (1 << 13)
+ 
++/*
++ * The tracer lock protects the below per-cpu tracer array.
++ * It needs to be held to:
++ * - start tracing on all cpus
++ * - stop tracing on all cpus
++ * - start tracing on a single hotplug cpu
++ * - stop tracing on a single hotplug cpu
++ * - read the trace from all cpus
++ * - read the trace from a single cpu
++ */
++static DEFINE_SPINLOCK(bts_tracer_lock);
+ static DEFINE_PER_CPU(struct bts_tracer *, tracer);
+ static DEFINE_PER_CPU(unsigned char[SIZEOF_BTS], buffer);
+ 
+ #define this_tracer per_cpu(tracer, smp_processor_id())
+ #define this_buffer per_cpu(buffer, smp_processor_id())
+ 
++static int __read_mostly trace_hw_branches_enabled;
++static struct trace_array *hw_branch_trace __read_mostly;
++
+ 
++/*
++ * Start tracing on the current cpu.
++ * The argument is ignored.
++ *
++ * pre: bts_tracer_lock must be locked.
++ */
+ static void bts_trace_start_cpu(void *arg)
+ {
+ 	if (this_tracer)
+@@ -42,14 +65,20 @@ static void bts_trace_start_cpu(void *ar
+ 
+ static void bts_trace_start(struct trace_array *tr)
+ {
+-	int cpu;
++	spin_lock(&bts_tracer_lock);
+ 
+-	tracing_reset_online_cpus(tr);
++	on_each_cpu(bts_trace_start_cpu, NULL, 1);
++	trace_hw_branches_enabled = 1;
+ 
+-	for_each_cpu(cpu, cpu_possible_mask)
+-		smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
++	spin_unlock(&bts_tracer_lock);
+ }
+ 
++/*
++ * Stop tracing on the current cpu.
++ * The argument is ignored.
++ *
++ * pre: bts_tracer_lock must be locked.
++ */
+ static void bts_trace_stop_cpu(void *arg)
+ {
+ 	if (this_tracer) {
+@@ -60,26 +89,60 @@ static void bts_trace_stop_cpu(void *arg
+ 
+ static void bts_trace_stop(struct trace_array *tr)
+ {
+-	int cpu;
++	spin_lock(&bts_tracer_lock);
++
++	trace_hw_branches_enabled = 0;
++	on_each_cpu(bts_trace_stop_cpu, NULL, 1);
++
++	spin_unlock(&bts_tracer_lock);
++}
++
++static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
++				     unsigned long action, void *hcpu)
++{
++	unsigned int cpu = (unsigned long)hcpu;
+ 
+-	for_each_cpu(cpu, cpu_possible_mask)
++	spin_lock(&bts_tracer_lock);
++
++	if (!trace_hw_branches_enabled)
++		goto out;
++
++	switch (action) {
++	case CPU_ONLINE:
++	case CPU_DOWN_FAILED:
++		smp_call_function_single(cpu, bts_trace_start_cpu, NULL, 1);
++		break;
++	case CPU_DOWN_PREPARE:
+ 		smp_call_function_single(cpu, bts_trace_stop_cpu, NULL, 1);
++		break;
++	}
++
++ out:
++	spin_unlock(&bts_tracer_lock);
++	return NOTIFY_DONE;
+ }
+ 
++static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
++	.notifier_call = bts_hotcpu_handler
++};
++
+ static int bts_trace_init(struct trace_array *tr)
+ {
+-	tracing_reset_online_cpus(tr);
++	hw_branch_trace = tr;
++
+ 	bts_trace_start(tr);
+ 
+ 	return 0;
+ }
+ 
++static void bts_trace_reset(struct trace_array *tr)
++{
++	bts_trace_stop(tr);
++}
++
+ static void bts_trace_print_header(struct seq_file *m)
+ {
+-	seq_puts(m,
+-		 "# CPU#        FROM                   TO         FUNCTION\n");
+-	seq_puts(m,
+-		 "#  |           |                     |             |\n");
++	seq_puts(m, "# CPU#        TO  <-  FROM\n");
+ }
+ 
+ static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
+@@ -87,15 +150,15 @@ static enum print_line_t bts_trace_print
+ 	struct trace_entry *entry = iter->ent;
+ 	struct trace_seq *seq = &iter->seq;
+ 	struct hw_branch_entry *it;
++	unsigned long symflags = TRACE_ITER_SYM_OFFSET;
+ 
+ 	trace_assign_type(it, entry);
+ 
+ 	if (entry->type == TRACE_HW_BRANCHES) {
+-		if (trace_seq_printf(seq, "%4d  ", entry->cpu) &&
+-		    trace_seq_printf(seq, "0x%016llx -> 0x%016llx ",
+-				     it->from, it->to) &&
+-		    (!it->from ||
+-		     seq_print_ip_sym(seq, it->from, /* sym_flags = */ 0)) &&
++		if (trace_seq_printf(seq, "%4d  ", iter->cpu) &&
++		    seq_print_ip_sym(seq, it->to, symflags) &&
++		    trace_seq_printf(seq, "\t  <-  ") &&
++		    seq_print_ip_sym(seq, it->from, symflags) &&
+ 		    trace_seq_printf(seq, "\n"))
+ 			return TRACE_TYPE_HANDLED;
+ 		return TRACE_TYPE_PARTIAL_LINE;;
+@@ -103,26 +166,42 @@ static enum print_line_t bts_trace_print
+ 	return TRACE_TYPE_UNHANDLED;
+ }
+ 
+-void trace_hw_branch(struct trace_array *tr, u64 from, u64 to)
++void trace_hw_branch(u64 from, u64 to)
+ {
++	struct trace_array *tr = hw_branch_trace;
+ 	struct ring_buffer_event *event;
+ 	struct hw_branch_entry *entry;
+-	unsigned long irq;
++	unsigned long irq1;
++	int cpu;
+ 
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry), &irq);
+-	if (!event)
++	if (unlikely(!tr))
+ 		return;
++
++	if (unlikely(!trace_hw_branches_enabled))
++		return;
++
++	local_irq_save(irq1);
++	cpu = raw_smp_processor_id();
++	if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
++		goto out;
++
++	event = trace_buffer_lock_reserve(tr, TRACE_HW_BRANCHES,
++					  sizeof(*entry), 0, 0);
++	if (!event)
++		goto out;
+ 	entry	= ring_buffer_event_data(event);
+ 	tracing_generic_entry_update(&entry->ent, 0, from);
+ 	entry->ent.type = TRACE_HW_BRANCHES;
+-	entry->ent.cpu = smp_processor_id();
+ 	entry->from = from;
+ 	entry->to   = to;
+-	ring_buffer_unlock_commit(tr->buffer, event, irq);
++	trace_buffer_unlock_commit(tr, event, 0, 0);
++
++ out:
++	atomic_dec(&tr->data[cpu]->disabled);
++	local_irq_restore(irq1);
+ }
+ 
+-static void trace_bts_at(struct trace_array *tr,
+-			 const struct bts_trace *trace, void *at)
++static void trace_bts_at(const struct bts_trace *trace, void *at)
+ {
+ 	struct bts_struct bts;
+ 	int err = 0;
+@@ -137,18 +216,29 @@ static void trace_bts_at(struct trace_ar
+ 
+ 	switch (bts.qualifier) {
+ 	case BTS_BRANCH:
+-		trace_hw_branch(tr, bts.variant.lbr.from, bts.variant.lbr.to);
++		trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
+ 		break;
+ 	}
+ }
+ 
++/*
++ * Collect the trace on the current cpu and write it into the ftrace buffer.
++ *
++ * pre: bts_tracer_lock must be locked
++ */
+ static void trace_bts_cpu(void *arg)
+ {
+ 	struct trace_array *tr = (struct trace_array *) arg;
+ 	const struct bts_trace *trace;
+ 	unsigned char *at;
+ 
+-	if (!this_tracer)
++	if (unlikely(!tr))
++		return;
++
++	if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
++		return;
++
++	if (unlikely(!this_tracer))
+ 		return;
+ 
+ 	ds_suspend_bts(this_tracer);
+@@ -158,11 +248,11 @@ static void trace_bts_cpu(void *arg)
+ 
+ 	for (at = trace->ds.top; (void *)at < trace->ds.end;
+ 	     at += trace->ds.size)
+-		trace_bts_at(tr, trace, at);
++		trace_bts_at(trace, at);
+ 
+ 	for (at = trace->ds.begin; (void *)at < trace->ds.top;
+ 	     at += trace->ds.size)
+-		trace_bts_at(tr, trace, at);
++		trace_bts_at(trace, at);
+ 
+ out:
+ 	ds_resume_bts(this_tracer);
+@@ -170,26 +260,43 @@ out:
+ 
+ static void trace_bts_prepare(struct trace_iterator *iter)
+ {
+-	int cpu;
++	spin_lock(&bts_tracer_lock);
++
++	on_each_cpu(trace_bts_cpu, iter->tr, 1);
++
++	spin_unlock(&bts_tracer_lock);
++}
++
++static void trace_bts_close(struct trace_iterator *iter)
++{
++	tracing_reset_online_cpus(iter->tr);
++}
++
++void trace_hw_branch_oops(void)
++{
++	spin_lock(&bts_tracer_lock);
++
++	trace_bts_cpu(hw_branch_trace);
+ 
+-	for_each_cpu(cpu, cpu_possible_mask)
+-		smp_call_function_single(cpu, trace_bts_cpu, iter->tr, 1);
++	spin_unlock(&bts_tracer_lock);
+ }
+ 
+ struct tracer bts_tracer __read_mostly =
+ {
+ 	.name		= "hw-branch-tracer",
+ 	.init		= bts_trace_init,
+-	.reset		= bts_trace_stop,
++	.reset		= bts_trace_reset,
+ 	.print_header	= bts_trace_print_header,
+ 	.print_line	= bts_trace_print_line,
+ 	.start		= bts_trace_start,
+ 	.stop		= bts_trace_stop,
+-	.open		= trace_bts_prepare
++	.open		= trace_bts_prepare,
++	.close		= trace_bts_close
+ };
+ 
+ __init static int init_bts_trace(void)
+ {
++	register_hotcpu_notifier(&bts_hotcpu_notifier);
+ 	return register_tracer(&bts_tracer);
+ }
+ device_initcall(init_bts_trace);
+Index: linux-2.6-tip/kernel/trace/trace_irqsoff.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_irqsoff.c
++++ linux-2.6-tip/kernel/trace/trace_irqsoff.c
+@@ -1,5 +1,5 @@
+ /*
+- * trace irqs off criticall timings
++ * trace irqs off critical timings
+  *
+  * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
+  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
+@@ -17,13 +17,14 @@
+ #include <linux/fs.h>
+ 
+ #include "trace.h"
++#include "trace_hist.h"
+ 
+ static struct trace_array		*irqsoff_trace __read_mostly;
+ static int				tracer_enabled __read_mostly;
+ 
+ static DEFINE_PER_CPU(int, tracing_cpu);
+ 
+-static DEFINE_SPINLOCK(max_trace_lock);
++static DEFINE_RAW_SPINLOCK(max_trace_lock);
+ 
+ enum {
+ 	TRACER_IRQS_OFF		= (1 << 1),
+@@ -32,6 +33,8 @@ enum {
+ 
+ static int trace_type __read_mostly;
+ 
++static int save_lat_flag;
++
+ #ifdef CONFIG_PREEMPT_TRACER
+ static inline int
+ preempt_trace(void)
+@@ -95,7 +98,7 @@ irqsoff_tracer_call(unsigned long ip, un
+ 	disabled = atomic_inc_return(&data->disabled);
+ 
+ 	if (likely(disabled == 1))
+-		trace_function(tr, data, ip, parent_ip, flags, preempt_count());
++		trace_function(tr, ip, parent_ip, flags, preempt_count());
+ 
+ 	atomic_dec(&data->disabled);
+ }
+@@ -153,7 +156,7 @@ check_critical_timing(struct trace_array
+ 	if (!report_latency(delta))
+ 		goto out_unlock;
+ 
+-	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
++	trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
+ 
+ 	latency = nsecs_to_usecs(delta);
+ 
+@@ -177,7 +180,7 @@ out:
+ 	data->critical_sequence = max_sequence;
+ 	data->preempt_timestamp = ftrace_now(cpu);
+ 	tracing_reset(tr, cpu);
+-	trace_function(tr, data, CALLER_ADDR0, parent_ip, flags, pc);
++	trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
+ }
+ 
+ static inline void
+@@ -210,7 +213,7 @@ start_critical_timing(unsigned long ip, 
+ 
+ 	local_save_flags(flags);
+ 
+-	trace_function(tr, data, ip, parent_ip, flags, preempt_count());
++	trace_function(tr, ip, parent_ip, flags, preempt_count());
+ 
+ 	per_cpu(tracing_cpu, cpu) = 1;
+ 
+@@ -244,7 +247,7 @@ stop_critical_timing(unsigned long ip, u
+ 	atomic_inc(&data->disabled);
+ 
+ 	local_save_flags(flags);
+-	trace_function(tr, data, ip, parent_ip, flags, preempt_count());
++	trace_function(tr, ip, parent_ip, flags, preempt_count());
+ 	check_critical_timing(tr, data, parent_ip ? : ip, cpu);
+ 	data->critical_start = 0;
+ 	atomic_dec(&data->disabled);
+@@ -253,6 +256,7 @@ stop_critical_timing(unsigned long ip, u
+ /* start and stop critical timings used to for stoppage (in idle) */
+ void start_critical_timings(void)
+ {
++	tracing_hist_preempt_start();
+ 	if (preempt_trace() || irq_trace())
+ 		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ }
+@@ -260,6 +264,7 @@ EXPORT_SYMBOL_GPL(start_critical_timings
+ 
+ void stop_critical_timings(void)
+ {
++	tracing_hist_preempt_stop(TRACE_STOP);
+ 	if (preempt_trace() || irq_trace())
+ 		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ }
+@@ -269,12 +274,14 @@ EXPORT_SYMBOL_GPL(stop_critical_timings)
+ #ifdef CONFIG_PROVE_LOCKING
+ void time_hardirqs_on(unsigned long a0, unsigned long a1)
+ {
++	tracing_hist_preempt_stop(1);
+ 	if (!preempt_trace() && irq_trace())
+ 		stop_critical_timing(a0, a1);
+ }
+ 
+ void time_hardirqs_off(unsigned long a0, unsigned long a1)
+ {
++	tracing_hist_preempt_start();
+ 	if (!preempt_trace() && irq_trace())
+ 		start_critical_timing(a0, a1);
+ }
+@@ -310,6 +317,7 @@ inline void print_irqtrace_events(struct
+  */
+ void trace_hardirqs_on(void)
+ {
++	tracing_hist_preempt_stop(1);
+ 	if (!preempt_trace() && irq_trace())
+ 		stop_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ }
+@@ -317,6 +325,7 @@ EXPORT_SYMBOL(trace_hardirqs_on);
+ 
+ void trace_hardirqs_off(void)
+ {
++	tracing_hist_preempt_start();
+ 	if (!preempt_trace() && irq_trace())
+ 		start_critical_timing(CALLER_ADDR0, CALLER_ADDR1);
+ }
+@@ -324,6 +333,7 @@ EXPORT_SYMBOL(trace_hardirqs_off);
+ 
+ void trace_hardirqs_on_caller(unsigned long caller_addr)
+ {
++	tracing_hist_preempt_stop(1);
+ 	if (!preempt_trace() && irq_trace())
+ 		stop_critical_timing(CALLER_ADDR0, caller_addr);
+ }
+@@ -331,6 +341,7 @@ EXPORT_SYMBOL(trace_hardirqs_on_caller);
+ 
+ void trace_hardirqs_off_caller(unsigned long caller_addr)
+ {
++	tracing_hist_preempt_start();
+ 	if (!preempt_trace() && irq_trace())
+ 		start_critical_timing(CALLER_ADDR0, caller_addr);
+ }
+@@ -342,44 +353,39 @@ EXPORT_SYMBOL(trace_hardirqs_off_caller)
+ #ifdef CONFIG_PREEMPT_TRACER
+ void trace_preempt_on(unsigned long a0, unsigned long a1)
+ {
++	tracing_hist_preempt_stop(0);
+ 	if (preempt_trace())
+ 		stop_critical_timing(a0, a1);
+ }
+ 
+ void trace_preempt_off(unsigned long a0, unsigned long a1)
+ {
++	tracing_hist_preempt_start();
+ 	if (preempt_trace())
+ 		start_critical_timing(a0, a1);
+ }
+ #endif /* CONFIG_PREEMPT_TRACER */
+ 
+-/*
+- * save_tracer_enabled is used to save the state of the tracer_enabled
+- * variable when we disable it when we open a trace output file.
+- */
+-static int save_tracer_enabled;
+-
+ static void start_irqsoff_tracer(struct trace_array *tr)
+ {
+ 	register_ftrace_function(&trace_ops);
+-	if (tracing_is_enabled()) {
++	if (tracing_is_enabled())
+ 		tracer_enabled = 1;
+-		save_tracer_enabled = 1;
+-	} else {
++	else
+ 		tracer_enabled = 0;
+-		save_tracer_enabled = 0;
+-	}
+ }
+ 
+ static void stop_irqsoff_tracer(struct trace_array *tr)
+ {
+ 	tracer_enabled = 0;
+-	save_tracer_enabled = 0;
+ 	unregister_ftrace_function(&trace_ops);
+ }
+ 
+ static void __irqsoff_tracer_init(struct trace_array *tr)
+ {
++	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
++	trace_flags |= TRACE_ITER_LATENCY_FMT;
++
+ 	tracing_max_latency = 0;
+ 	irqsoff_trace = tr;
+ 	/* make sure that the tracer is visible */
+@@ -390,30 +396,19 @@ static void __irqsoff_tracer_init(struct
+ static void irqsoff_tracer_reset(struct trace_array *tr)
+ {
+ 	stop_irqsoff_tracer(tr);
++
++	if (!save_lat_flag)
++		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
+ }
+ 
+ static void irqsoff_tracer_start(struct trace_array *tr)
+ {
+ 	tracer_enabled = 1;
+-	save_tracer_enabled = 1;
+ }
+ 
+ static void irqsoff_tracer_stop(struct trace_array *tr)
+ {
+ 	tracer_enabled = 0;
+-	save_tracer_enabled = 0;
+-}
+-
+-static void irqsoff_tracer_open(struct trace_iterator *iter)
+-{
+-	/* stop the trace while dumping */
+-	tracer_enabled = 0;
+-}
+-
+-static void irqsoff_tracer_close(struct trace_iterator *iter)
+-{
+-	/* restart tracing */
+-	tracer_enabled = save_tracer_enabled;
+ }
+ 
+ #ifdef CONFIG_IRQSOFF_TRACER
+@@ -431,8 +426,6 @@ static struct tracer irqsoff_tracer __re
+ 	.reset		= irqsoff_tracer_reset,
+ 	.start		= irqsoff_tracer_start,
+ 	.stop		= irqsoff_tracer_stop,
+-	.open		= irqsoff_tracer_open,
+-	.close		= irqsoff_tracer_close,
+ 	.print_max	= 1,
+ #ifdef CONFIG_FTRACE_SELFTEST
+ 	.selftest    = trace_selftest_startup_irqsoff,
+@@ -459,8 +452,6 @@ static struct tracer preemptoff_tracer _
+ 	.reset		= irqsoff_tracer_reset,
+ 	.start		= irqsoff_tracer_start,
+ 	.stop		= irqsoff_tracer_stop,
+-	.open		= irqsoff_tracer_open,
+-	.close		= irqsoff_tracer_close,
+ 	.print_max	= 1,
+ #ifdef CONFIG_FTRACE_SELFTEST
+ 	.selftest    = trace_selftest_startup_preemptoff,
+@@ -489,8 +480,6 @@ static struct tracer preemptirqsoff_trac
+ 	.reset		= irqsoff_tracer_reset,
+ 	.start		= irqsoff_tracer_start,
+ 	.stop		= irqsoff_tracer_stop,
+-	.open		= irqsoff_tracer_open,
+-	.close		= irqsoff_tracer_close,
+ 	.print_max	= 1,
+ #ifdef CONFIG_FTRACE_SELFTEST
+ 	.selftest    = trace_selftest_startup_preemptirqsoff,
+Index: linux-2.6-tip/kernel/trace/trace_mmiotrace.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_mmiotrace.c
++++ linux-2.6-tip/kernel/trace/trace_mmiotrace.c
+@@ -12,6 +12,7 @@
+ #include <asm/atomic.h>
+ 
+ #include "trace.h"
++#include "trace_output.h"
+ 
+ struct header_iter {
+ 	struct pci_dev *dev;
+@@ -183,21 +184,22 @@ static enum print_line_t mmio_print_rw(s
+ 	switch (rw->opcode) {
+ 	case MMIO_READ:
+ 		ret = trace_seq_printf(s,
+-			"R %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
++			"R %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
+ 			rw->width, secs, usec_rem, rw->map_id,
+ 			(unsigned long long)rw->phys,
+ 			rw->value, rw->pc, 0);
+ 		break;
+ 	case MMIO_WRITE:
+ 		ret = trace_seq_printf(s,
+-			"W %d %lu.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
++			"W %d %u.%06lu %d 0x%llx 0x%lx 0x%lx %d\n",
+ 			rw->width, secs, usec_rem, rw->map_id,
+ 			(unsigned long long)rw->phys,
+ 			rw->value, rw->pc, 0);
+ 		break;
+ 	case MMIO_UNKNOWN_OP:
+ 		ret = trace_seq_printf(s,
+-			"UNKNOWN %lu.%06lu %d 0x%llx %02x,%02x,%02x 0x%lx %d\n",
++			"UNKNOWN %u.%06lu %d 0x%llx %02lx,%02lx,"
++			"%02lx 0x%lx %d\n",
+ 			secs, usec_rem, rw->map_id,
+ 			(unsigned long long)rw->phys,
+ 			(rw->value >> 16) & 0xff, (rw->value >> 8) & 0xff,
+@@ -229,14 +231,14 @@ static enum print_line_t mmio_print_map(
+ 	switch (m->opcode) {
+ 	case MMIO_PROBE:
+ 		ret = trace_seq_printf(s,
+-			"MAP %lu.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
++			"MAP %u.%06lu %d 0x%llx 0x%lx 0x%lx 0x%lx %d\n",
+ 			secs, usec_rem, m->map_id,
+ 			(unsigned long long)m->phys, m->virt, m->len,
+ 			0UL, 0);
+ 		break;
+ 	case MMIO_UNPROBE:
+ 		ret = trace_seq_printf(s,
+-			"UNMAP %lu.%06lu %d 0x%lx %d\n",
++			"UNMAP %u.%06lu %d 0x%lx %d\n",
+ 			secs, usec_rem, m->map_id, 0UL, 0);
+ 		break;
+ 	default:
+@@ -255,18 +257,15 @@ static enum print_line_t mmio_print_mark
+ 	const char *msg		= print->buf;
+ 	struct trace_seq *s	= &iter->seq;
+ 	unsigned long long t	= ns2usecs(iter->ts);
+-	unsigned long usec_rem	= do_div(t, 1000000ULL);
++	unsigned long usec_rem	= do_div(t, USEC_PER_SEC);
+ 	unsigned secs		= (unsigned long)t;
+ 	int ret;
+ 
+ 	/* The trailing newline must be in the message. */
+-	ret = trace_seq_printf(s, "MARK %lu.%06lu %s", secs, usec_rem, msg);
++	ret = trace_seq_printf(s, "MARK %u.%06lu %s", secs, usec_rem, msg);
+ 	if (!ret)
+ 		return TRACE_TYPE_PARTIAL_LINE;
+ 
+-	if (entry->flags & TRACE_FLAG_CONT)
+-		trace_seq_print_cont(s, iter);
+-
+ 	return TRACE_TYPE_HANDLED;
+ }
+ 
+@@ -308,21 +307,17 @@ static void __trace_mmiotrace_rw(struct 
+ {
+ 	struct ring_buffer_event *event;
+ 	struct trace_mmiotrace_rw *entry;
+-	unsigned long irq_flags;
++	int pc = preempt_count();
+ 
+-	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					   &irq_flags);
++	event = trace_buffer_lock_reserve(tr, TRACE_MMIO_RW,
++					  sizeof(*entry), 0, pc);
+ 	if (!event) {
+ 		atomic_inc(&dropped_count);
+ 		return;
+ 	}
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+-	entry->ent.type			= TRACE_MMIO_RW;
+ 	entry->rw			= *rw;
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+-
+-	trace_wake_up();
++	trace_buffer_unlock_commit(tr, event, 0, pc);
+ }
+ 
+ void mmio_trace_rw(struct mmiotrace_rw *rw)
+@@ -338,21 +333,17 @@ static void __trace_mmiotrace_map(struct
+ {
+ 	struct ring_buffer_event *event;
+ 	struct trace_mmiotrace_map *entry;
+-	unsigned long irq_flags;
++	int pc = preempt_count();
+ 
+-	event	= ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					   &irq_flags);
++	event = trace_buffer_lock_reserve(tr, TRACE_MMIO_MAP,
++					  sizeof(*entry), 0, pc);
+ 	if (!event) {
+ 		atomic_inc(&dropped_count);
+ 		return;
+ 	}
+ 	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, 0, preempt_count());
+-	entry->ent.type			= TRACE_MMIO_MAP;
+ 	entry->map			= *map;
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+-
+-	trace_wake_up();
++	trace_buffer_unlock_commit(tr, event, 0, pc);
+ }
+ 
+ void mmio_trace_mapping(struct mmiotrace_map *map)
+@@ -368,5 +359,5 @@ void mmio_trace_mapping(struct mmiotrace
+ 
+ int mmio_trace_printk(const char *fmt, va_list args)
+ {
+-	return trace_vprintk(0, -1, fmt, args);
++	return trace_vprintk(0, fmt, args);
+ }
+Index: linux-2.6-tip/kernel/trace/trace_nop.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_nop.c
++++ linux-2.6-tip/kernel/trace/trace_nop.c
+@@ -47,12 +47,7 @@ static void stop_nop_trace(struct trace_
+ 
+ static int nop_trace_init(struct trace_array *tr)
+ {
+-	int cpu;
+ 	ctx_trace = tr;
+-
+-	for_each_online_cpu(cpu)
+-		tracing_reset(tr, cpu);
+-
+ 	start_nop_trace(tr);
+ 	return 0;
+ }
+@@ -96,6 +91,7 @@ struct tracer nop_trace __read_mostly =
+ 	.name		= "nop",
+ 	.init		= nop_trace_init,
+ 	.reset		= nop_trace_reset,
++	.wait_pipe	= poll_wait_pipe,
+ #ifdef CONFIG_FTRACE_SELFTEST
+ 	.selftest	= trace_selftest_startup_nop,
+ #endif
+Index: linux-2.6-tip/kernel/trace/trace_output.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_output.c
+@@ -0,0 +1,1027 @@
++/*
++ * trace_output.c
++ *
++ * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/ftrace.h>
++
++#include "trace_output.h"
++
++/* must be a power of 2 */
++#define EVENT_HASHSIZE	128
++
++static DEFINE_MUTEX(trace_event_mutex);
++static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
++
++static int next_event_type = __TRACE_LAST_TYPE + 1;
++
++void trace_print_seq(struct seq_file *m, struct trace_seq *s)
++{
++	int len = s->len >= PAGE_SIZE ? PAGE_SIZE - 1 : s->len;
++
++	s->buffer[len] = 0;
++	seq_puts(m, s->buffer);
++
++	trace_seq_init(s);
++}
++
++enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
++{
++	struct trace_seq *s = &iter->seq;
++	struct trace_entry *entry = iter->ent;
++	struct bprint_entry *field;
++	int ret;
++
++	trace_assign_type(field, entry);
++
++	ret = trace_seq_bprintf(s, field->fmt, field->buf);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
++{
++	struct trace_seq *s = &iter->seq;
++	struct trace_entry *entry = iter->ent;
++	struct print_entry *field;
++	int ret;
++
++	trace_assign_type(field, entry);
++
++	ret = trace_seq_printf(s, "%s", field->buf);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++/**
++ * trace_seq_printf - sequence printing of trace information
++ * @s: trace sequence descriptor
++ * @fmt: printf format string
++ *
++ * The tracer may use either sequence operations or its own
++ * copy to user routines. To simplify formating of a trace
++ * trace_seq_printf is used to store strings into a special
++ * buffer (@s). Then the output may be either used by
++ * the sequencer or pulled into another buffer.
++ */
++int
++trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
++{
++	int len = (PAGE_SIZE - 1) - s->len;
++	va_list ap;
++	int ret;
++
++	if (!len)
++		return 0;
++
++	va_start(ap, fmt);
++	ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
++	va_end(ap);
++
++	/* If we can't write it all, don't bother writing anything */
++	if (ret >= len)
++		return 0;
++
++	s->len += ret;
++
++	return len;
++}
++
++int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
++{
++	int len = (PAGE_SIZE - 1) - s->len;
++	int ret;
++
++	if (!len)
++		return 0;
++
++	ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
++
++	/* If we can't write it all, don't bother writing anything */
++	if (ret >= len)
++		return 0;
++
++	s->len += ret;
++
++	return len;
++}
++
++/**
++ * trace_seq_puts - trace sequence printing of simple string
++ * @s: trace sequence descriptor
++ * @str: simple string to record
++ *
++ * The tracer may use either the sequence operations or its own
++ * copy to user routines. This function records a simple string
++ * into a special buffer (@s) for later retrieval by a sequencer
++ * or other mechanism.
++ */
++int trace_seq_puts(struct trace_seq *s, const char *str)
++{
++	int len = strlen(str);
++
++	if (len > ((PAGE_SIZE - 1) - s->len))
++		return 0;
++
++	memcpy(s->buffer + s->len, str, len);
++	s->len += len;
++
++	return len;
++}
++
++int trace_seq_putc(struct trace_seq *s, unsigned char c)
++{
++	if (s->len >= (PAGE_SIZE - 1))
++		return 0;
++
++	s->buffer[s->len++] = c;
++
++	return 1;
++}
++
++int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len)
++{
++	if (len > ((PAGE_SIZE - 1) - s->len))
++		return 0;
++
++	memcpy(s->buffer + s->len, mem, len);
++	s->len += len;
++
++	return len;
++}
++
++int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len)
++{
++	unsigned char hex[HEX_CHARS];
++	unsigned char *data = mem;
++	int i, j;
++
++#ifdef __BIG_ENDIAN
++	for (i = 0, j = 0; i < len; i++) {
++#else
++	for (i = len-1, j = 0; i >= 0; i--) {
++#endif
++		hex[j++] = hex_asc_hi(data[i]);
++		hex[j++] = hex_asc_lo(data[i]);
++	}
++	hex[j++] = ' ';
++
++	return trace_seq_putmem(s, hex, j);
++}
++
++void *trace_seq_reserve(struct trace_seq *s, size_t len)
++{
++	void *ret;
++
++	if (len > ((PAGE_SIZE - 1) - s->len))
++		return NULL;
++
++	ret = s->buffer + s->len;
++	s->len += len;
++
++	return ret;
++}
++
++int trace_seq_path(struct trace_seq *s, struct path *path)
++{
++	unsigned char *p;
++
++	if (s->len >= (PAGE_SIZE - 1))
++		return 0;
++	p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
++	if (!IS_ERR(p)) {
++		p = mangle_path(s->buffer + s->len, p, "\n");
++		if (p) {
++			s->len = p - s->buffer;
++			return 1;
++		}
++	} else {
++		s->buffer[s->len++] = '?';
++		return 1;
++	}
++
++	return 0;
++}
++
++#ifdef CONFIG_KRETPROBES
++static inline const char *kretprobed(const char *name)
++{
++	static const char tramp_name[] = "kretprobe_trampoline";
++	int size = sizeof(tramp_name);
++
++	if (strncmp(tramp_name, name, size) == 0)
++		return "[unknown/kretprobe'd]";
++	return name;
++}
++#else
++static inline const char *kretprobed(const char *name)
++{
++	return name;
++}
++#endif /* CONFIG_KRETPROBES */
++
++static int
++seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
++{
++#ifdef CONFIG_KALLSYMS
++	char str[KSYM_SYMBOL_LEN];
++	const char *name;
++
++	kallsyms_lookup(address, NULL, NULL, NULL, str);
++
++	name = kretprobed(str);
++
++	return trace_seq_printf(s, fmt, name);
++#endif
++	return 1;
++}
++
++static int
++seq_print_sym_offset(struct trace_seq *s, const char *fmt,
++		     unsigned long address)
++{
++#ifdef CONFIG_KALLSYMS
++	char str[KSYM_SYMBOL_LEN];
++	const char *name;
++
++	sprint_symbol(str, address);
++	name = kretprobed(str);
++
++	return trace_seq_printf(s, fmt, name);
++#endif
++	return 1;
++}
++
++#ifndef CONFIG_64BIT
++# define IP_FMT "%08lx"
++#else
++# define IP_FMT "%016lx"
++#endif
++
++int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
++		      unsigned long ip, unsigned long sym_flags)
++{
++	struct file *file = NULL;
++	unsigned long vmstart = 0;
++	int ret = 1;
++
++	if (mm) {
++		const struct vm_area_struct *vma;
++
++		down_read(&mm->mmap_sem);
++		vma = find_vma(mm, ip);
++		if (vma) {
++			file = vma->vm_file;
++			vmstart = vma->vm_start;
++		}
++		if (file) {
++			ret = trace_seq_path(s, &file->f_path);
++			if (ret)
++				ret = trace_seq_printf(s, "[+0x%lx]",
++						       ip - vmstart);
++		}
++		up_read(&mm->mmap_sem);
++	}
++	if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
++		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
++	return ret;
++}
++
++int
++seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
++		      unsigned long sym_flags)
++{
++	struct mm_struct *mm = NULL;
++	int ret = 1;
++	unsigned int i;
++
++	if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
++		struct task_struct *task;
++		/*
++		 * we do the lookup on the thread group leader,
++		 * since individual threads might have already quit!
++		 */
++		rcu_read_lock();
++		task = find_task_by_vpid(entry->ent.tgid);
++		if (task)
++			mm = get_task_mm(task);
++		rcu_read_unlock();
++	}
++
++	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
++		unsigned long ip = entry->caller[i];
++
++		if (ip == ULONG_MAX || !ret)
++			break;
++		if (i && ret)
++			ret = trace_seq_puts(s, " <- ");
++		if (!ip) {
++			if (ret)
++				ret = trace_seq_puts(s, "??");
++			continue;
++		}
++		if (!ret)
++			break;
++		if (ret)
++			ret = seq_print_user_ip(s, mm, ip, sym_flags);
++	}
++
++	if (mm)
++		mmput(mm);
++	return ret;
++}
++
++int
++seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
++{
++	int ret;
++
++	if (!ip)
++		return trace_seq_printf(s, "0");
++
++	if (sym_flags & TRACE_ITER_SYM_OFFSET)
++		ret = seq_print_sym_offset(s, "%s", ip);
++	else
++		ret = seq_print_sym_short(s, "%s", ip);
++
++	if (!ret)
++		return 0;
++
++	if (sym_flags & TRACE_ITER_SYM_ADDR)
++		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
++	return ret;
++}
++
++static int
++lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
++{
++	int hardirq, softirq;
++	char comm[TASK_COMM_LEN];
++
++	trace_find_cmdline(entry->pid, comm);
++	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
++	softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
++
++	if (!trace_seq_printf(s, "%8.8s-%-5d %3d%c%c%c",
++			      comm, entry->pid, cpu,
++			      (entry->flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
++				(entry->flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
++				  'X' : '.',
++			      (entry->flags & TRACE_FLAG_NEED_RESCHED) ?
++				'N' : '.',
++			      (hardirq && softirq) ? 'H' :
++				hardirq ? 'h' : softirq ? 's' : '.'))
++		return 0;
++
++	if (entry->preempt_count)
++		return trace_seq_printf(s, "%x", entry->preempt_count);
++	return trace_seq_puts(s, ".");
++}
++
++static unsigned long preempt_mark_thresh = 100;
++
++static int
++lat_print_timestamp(struct trace_seq *s, u64 abs_usecs,
++		    unsigned long rel_usecs)
++{
++	return trace_seq_printf(s, " %4lldus%c: ", abs_usecs,
++				rel_usecs > preempt_mark_thresh ? '!' :
++				  rel_usecs > 1 ? '+' : ' ');
++}
++
++int trace_print_context(struct trace_iterator *iter)
++{
++	struct trace_seq *s = &iter->seq;
++	struct trace_entry *entry = iter->ent;
++	unsigned long long t = ns2usecs(iter->ts);
++	unsigned long usec_rem = do_div(t, USEC_PER_SEC);
++	unsigned long secs = (unsigned long)t;
++	char comm[TASK_COMM_LEN];
++
++	trace_find_cmdline(entry->pid, comm);
++
++	return trace_seq_printf(s, "%16s-%-5d [%03d] %5lu.%06lu: ",
++				comm, entry->pid, iter->cpu, secs, usec_rem);
++}
++
++int trace_print_lat_context(struct trace_iterator *iter)
++{
++	u64 next_ts;
++	int ret;
++	struct trace_seq *s = &iter->seq;
++	struct trace_entry *entry = iter->ent,
++			   *next_entry = trace_find_next_entry(iter, NULL,
++							       &next_ts);
++	unsigned long verbose = (trace_flags & TRACE_ITER_VERBOSE);
++	unsigned long abs_usecs = ns2usecs(iter->ts - iter->tr->time_start);
++	unsigned long rel_usecs;
++
++	if (!next_entry)
++		next_ts = iter->ts;
++	rel_usecs = ns2usecs(next_ts - iter->ts);
++
++	if (verbose) {
++		char comm[TASK_COMM_LEN];
++
++		trace_find_cmdline(entry->pid, comm);
++
++		ret = trace_seq_printf(s, "%16s %5d %3d %d %08x %08lx [%08lx]"
++				       " %ld.%03ldms (+%ld.%03ldms): ", comm,
++				       entry->pid, iter->cpu, entry->flags,
++				       entry->preempt_count, iter->idx,
++				       ns2usecs(iter->ts),
++				       abs_usecs / USEC_PER_MSEC,
++				       abs_usecs % USEC_PER_MSEC,
++				       rel_usecs / USEC_PER_MSEC,
++				       rel_usecs % USEC_PER_MSEC);
++	} else {
++		ret = lat_print_generic(s, entry, iter->cpu);
++		if (ret)
++			ret = lat_print_timestamp(s, abs_usecs, rel_usecs);
++	}
++
++	return ret;
++}
++
++static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
++
++static int task_state_char(unsigned long state)
++{
++	int bit = state ? __ffs(state) + 1 : 0;
++
++	return bit < sizeof(state_to_char) - 1 ? state_to_char[bit] : '?';
++}
++
++/**
++ * ftrace_find_event - find a registered event
++ * @type: the type of event to look for
++ *
++ * Returns an event of type @type otherwise NULL
++ */
++struct trace_event *ftrace_find_event(int type)
++{
++	struct trace_event *event;
++	struct hlist_node *n;
++	unsigned key;
++
++	key = type & (EVENT_HASHSIZE - 1);
++
++	hlist_for_each_entry_rcu(event, n, &event_hash[key], node) {
++		if (event->type == type)
++			return event;
++	}
++
++	return NULL;
++}
++
++/**
++ * register_ftrace_event - register output for an event type
++ * @event: the event type to register
++ *
++ * Event types are stored in a hash and this hash is used to
++ * find a way to print an event. If the @event->type is set
++ * then it will use that type, otherwise it will assign a
++ * type to use.
++ *
++ * If you assign your own type, please make sure it is added
++ * to the trace_type enum in trace.h, to avoid collisions
++ * with the dynamic types.
++ *
++ * Returns the event type number or zero on error.
++ */
++int register_ftrace_event(struct trace_event *event)
++{
++	unsigned key;
++	int ret = 0;
++
++	mutex_lock(&trace_event_mutex);
++
++	if (!event) {
++		ret = next_event_type++;
++		goto out;
++	}
++
++	if (!event->type)
++		event->type = next_event_type++;
++	else if (event->type > __TRACE_LAST_TYPE) {
++		printk(KERN_WARNING "Need to add type to trace.h\n");
++		WARN_ON(1);
++	}
++
++	if (ftrace_find_event(event->type))
++		goto out;
++
++	if (event->trace == NULL)
++		event->trace = trace_nop_print;
++	if (event->raw == NULL)
++		event->raw = trace_nop_print;
++	if (event->hex == NULL)
++		event->hex = trace_nop_print;
++	if (event->binary == NULL)
++		event->binary = trace_nop_print;
++
++	key = event->type & (EVENT_HASHSIZE - 1);
++
++	hlist_add_head_rcu(&event->node, &event_hash[key]);
++
++	ret = event->type;
++ out:
++	mutex_unlock(&trace_event_mutex);
++
++	return ret;
++}
++
++/**
++ * unregister_ftrace_event - remove a no longer used event
++ * @event: the event to remove
++ */
++int unregister_ftrace_event(struct trace_event *event)
++{
++	mutex_lock(&trace_event_mutex);
++	hlist_del(&event->node);
++	mutex_unlock(&trace_event_mutex);
++
++	return 0;
++}
++
++/*
++ * Standard events
++ */
++
++enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags)
++{
++	return TRACE_TYPE_HANDLED;
++}
++
++/* TRACE_FN */
++static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags)
++{
++	struct ftrace_entry *field;
++	struct trace_seq *s = &iter->seq;
++
++	trace_assign_type(field, iter->ent);
++
++	if (!seq_print_ip_sym(s, field->ip, flags))
++		goto partial;
++
++	if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
++		if (!trace_seq_printf(s, " <-"))
++			goto partial;
++		if (!seq_print_ip_sym(s,
++				      field->parent_ip,
++				      flags))
++			goto partial;
++	}
++	if (!trace_seq_printf(s, "\n"))
++		goto partial;
++
++	return TRACE_TYPE_HANDLED;
++
++ partial:
++	return TRACE_TYPE_PARTIAL_LINE;
++}
++
++static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags)
++{
++	struct ftrace_entry *field;
++
++	trace_assign_type(field, iter->ent);
++
++	if (!trace_seq_printf(&iter->seq, "%lx %lx\n",
++			      field->ip,
++			      field->parent_ip))
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags)
++{
++	struct ftrace_entry *field;
++	struct trace_seq *s = &iter->seq;
++
++	trace_assign_type(field, iter->ent);
++
++	SEQ_PUT_HEX_FIELD_RET(s, field->ip);
++	SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags)
++{
++	struct ftrace_entry *field;
++	struct trace_seq *s = &iter->seq;
++
++	trace_assign_type(field, iter->ent);
++
++	SEQ_PUT_FIELD_RET(s, field->ip);
++	SEQ_PUT_FIELD_RET(s, field->parent_ip);
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static struct trace_event trace_fn_event = {
++	.type		= TRACE_FN,
++	.trace		= trace_fn_trace,
++	.raw		= trace_fn_raw,
++	.hex		= trace_fn_hex,
++	.binary		= trace_fn_bin,
++};
++
++/* TRACE_CTX an TRACE_WAKE */
++static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
++					     char *delim)
++{
++	struct ctx_switch_entry *field;
++	char comm[TASK_COMM_LEN];
++	int S, T;
++
++
++	trace_assign_type(field, iter->ent);
++
++	T = task_state_char(field->next_state);
++	S = task_state_char(field->prev_state);
++	trace_find_cmdline(field->next_pid, comm);
++	if (!trace_seq_printf(&iter->seq,
++			      " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
++			      field->prev_pid,
++			      field->prev_prio,
++			      S, delim,
++			      field->next_cpu,
++			      field->next_pid,
++			      field->next_prio,
++			      T, comm))
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags)
++{
++	return trace_ctxwake_print(iter, "==>");
++}
++
++static enum print_line_t trace_wake_print(struct trace_iterator *iter,
++					  int flags)
++{
++	return trace_ctxwake_print(iter, "  +");
++}
++
++static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
++{
++	struct ctx_switch_entry *field;
++	int T;
++
++	trace_assign_type(field, iter->ent);
++
++	if (!S)
++		task_state_char(field->prev_state);
++	T = task_state_char(field->next_state);
++	if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
++			      field->prev_pid,
++			      field->prev_prio,
++			      S,
++			      field->next_cpu,
++			      field->next_pid,
++			      field->next_prio,
++			      T))
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags)
++{
++	return trace_ctxwake_raw(iter, 0);
++}
++
++static enum print_line_t trace_wake_raw(struct trace_iterator *iter, int flags)
++{
++	return trace_ctxwake_raw(iter, '+');
++}
++
++
++static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
++{
++	struct ctx_switch_entry *field;
++	struct trace_seq *s = &iter->seq;
++	int T;
++
++	trace_assign_type(field, iter->ent);
++
++	if (!S)
++		task_state_char(field->prev_state);
++	T = task_state_char(field->next_state);
++
++	SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
++	SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
++	SEQ_PUT_HEX_FIELD_RET(s, S);
++	SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
++	SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
++	SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
++	SEQ_PUT_HEX_FIELD_RET(s, T);
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags)
++{
++	return trace_ctxwake_hex(iter, 0);
++}
++
++static enum print_line_t trace_wake_hex(struct trace_iterator *iter, int flags)
++{
++	return trace_ctxwake_hex(iter, '+');
++}
++
++static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
++					   int flags)
++{
++	struct ctx_switch_entry *field;
++	struct trace_seq *s = &iter->seq;
++
++	trace_assign_type(field, iter->ent);
++
++	SEQ_PUT_FIELD_RET(s, field->prev_pid);
++	SEQ_PUT_FIELD_RET(s, field->prev_prio);
++	SEQ_PUT_FIELD_RET(s, field->prev_state);
++	SEQ_PUT_FIELD_RET(s, field->next_pid);
++	SEQ_PUT_FIELD_RET(s, field->next_prio);
++	SEQ_PUT_FIELD_RET(s, field->next_state);
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static struct trace_event trace_ctx_event = {
++	.type		= TRACE_CTX,
++	.trace		= trace_ctx_print,
++	.raw		= trace_ctx_raw,
++	.hex		= trace_ctx_hex,
++	.binary		= trace_ctxwake_bin,
++};
++
++static struct trace_event trace_wake_event = {
++	.type		= TRACE_WAKE,
++	.trace		= trace_wake_print,
++	.raw		= trace_wake_raw,
++	.hex		= trace_wake_hex,
++	.binary		= trace_ctxwake_bin,
++};
++
++/* TRACE_SPECIAL */
++static enum print_line_t trace_special_print(struct trace_iterator *iter,
++					     int flags)
++{
++	struct special_entry *field;
++
++	trace_assign_type(field, iter->ent);
++
++	if (!trace_seq_printf(&iter->seq, "# %ld %ld %ld\n",
++			      field->arg1,
++			      field->arg2,
++			      field->arg3))
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static enum print_line_t trace_special_hex(struct trace_iterator *iter,
++					   int flags)
++{
++	struct special_entry *field;
++	struct trace_seq *s = &iter->seq;
++
++	trace_assign_type(field, iter->ent);
++
++	SEQ_PUT_HEX_FIELD_RET(s, field->arg1);
++	SEQ_PUT_HEX_FIELD_RET(s, field->arg2);
++	SEQ_PUT_HEX_FIELD_RET(s, field->arg3);
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static enum print_line_t trace_special_bin(struct trace_iterator *iter,
++					   int flags)
++{
++	struct special_entry *field;
++	struct trace_seq *s = &iter->seq;
++
++	trace_assign_type(field, iter->ent);
++
++	SEQ_PUT_FIELD_RET(s, field->arg1);
++	SEQ_PUT_FIELD_RET(s, field->arg2);
++	SEQ_PUT_FIELD_RET(s, field->arg3);
++
++	return TRACE_TYPE_HANDLED;
++}
++
++static struct trace_event trace_special_event = {
++	.type		= TRACE_SPECIAL,
++	.trace		= trace_special_print,
++	.raw		= trace_special_print,
++	.hex		= trace_special_hex,
++	.binary		= trace_special_bin,
++};
++
++/* TRACE_STACK */
++
++static enum print_line_t trace_stack_print(struct trace_iterator *iter,
++					   int flags)
++{
++	struct stack_entry *field;
++	struct trace_seq *s = &iter->seq;
++	int i;
++
++	trace_assign_type(field, iter->ent);
++
++	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
++		if (i) {
++			if (!trace_seq_puts(s, " <= "))
++				goto partial;
++
++			if (!seq_print_ip_sym(s, field->caller[i], flags))
++				goto partial;
++		}
++		if (!trace_seq_puts(s, "\n"))
++			goto partial;
++	}
++
++	return TRACE_TYPE_HANDLED;
++
++ partial:
++	return TRACE_TYPE_PARTIAL_LINE;
++}
++
++static struct trace_event trace_stack_event = {
++	.type		= TRACE_STACK,
++	.trace		= trace_stack_print,
++	.raw		= trace_special_print,
++	.hex		= trace_special_hex,
++	.binary		= trace_special_bin,
++};
++
++/* TRACE_USER_STACK */
++static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
++						int flags)
++{
++	struct userstack_entry *field;
++	struct trace_seq *s = &iter->seq;
++
++	trace_assign_type(field, iter->ent);
++
++	if (!seq_print_userip_objs(field, s, flags))
++		goto partial;
++
++	if (!trace_seq_putc(s, '\n'))
++		goto partial;
++
++	return TRACE_TYPE_HANDLED;
++
++ partial:
++	return TRACE_TYPE_PARTIAL_LINE;
++}
++
++static struct trace_event trace_user_stack_event = {
++	.type		= TRACE_USER_STACK,
++	.trace		= trace_user_stack_print,
++	.raw		= trace_special_print,
++	.hex		= trace_special_hex,
++	.binary		= trace_special_bin,
++};
++
++/* TRACE_BPRINT */
++static enum print_line_t
++trace_bprint_print(struct trace_iterator *iter, int flags)
++{
++	struct trace_entry *entry = iter->ent;
++	struct trace_seq *s = &iter->seq;
++	struct bprint_entry *field;
++
++	trace_assign_type(field, entry);
++
++	if (!seq_print_ip_sym(s, field->ip, flags))
++		goto partial;
++
++	if (!trace_seq_puts(s, ": "))
++		goto partial;
++
++	if (!trace_seq_bprintf(s, field->fmt, field->buf))
++		goto partial;
++
++	return TRACE_TYPE_HANDLED;
++
++ partial:
++	return TRACE_TYPE_PARTIAL_LINE;
++}
++
++
++static enum print_line_t
++trace_bprint_raw(struct trace_iterator *iter, int flags)
++{
++	struct bprint_entry *field;
++	struct trace_seq *s = &iter->seq;
++
++	trace_assign_type(field, iter->ent);
++
++	if (!trace_seq_printf(s, ": %lx : ", field->ip))
++		goto partial;
++
++	if (!trace_seq_bprintf(s, field->fmt, field->buf))
++		goto partial;
++
++	return TRACE_TYPE_HANDLED;
++
++ partial:
++	return TRACE_TYPE_PARTIAL_LINE;
++}
++
++
++static struct trace_event trace_bprint_event = {
++	.type		= TRACE_BPRINT,
++	.trace		= trace_bprint_print,
++	.raw		= trace_bprint_raw,
++};
++
++/* TRACE_PRINT */
++static enum print_line_t trace_print_print(struct trace_iterator *iter,
++					   int flags)
++{
++	struct print_entry *field;
++	struct trace_seq *s = &iter->seq;
++
++	trace_assign_type(field, iter->ent);
++
++	if (!seq_print_ip_sym(s, field->ip, flags))
++		goto partial;
++
++	if (!trace_seq_printf(s, ": %s", field->buf))
++		goto partial;
++
++	return TRACE_TYPE_HANDLED;
++
++ partial:
++	return TRACE_TYPE_PARTIAL_LINE;
++}
++
++static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags)
++{
++	struct print_entry *field;
++
++	trace_assign_type(field, iter->ent);
++
++	if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf))
++		goto partial;
++
++	return TRACE_TYPE_HANDLED;
++
++ partial:
++	return TRACE_TYPE_PARTIAL_LINE;
++}
++
++static struct trace_event trace_print_event = {
++	.type	 	= TRACE_PRINT,
++	.trace		= trace_print_print,
++	.raw		= trace_print_raw,
++};
++
++
++static struct trace_event *events[] __initdata = {
++	&trace_fn_event,
++	&trace_ctx_event,
++	&trace_wake_event,
++	&trace_special_event,
++	&trace_stack_event,
++	&trace_user_stack_event,
++	&trace_bprint_event,
++	&trace_print_event,
++	NULL
++};
++
++__init static int init_events(void)
++{
++	struct trace_event *event;
++	int i, ret;
++
++	for (i = 0; events[i]; i++) {
++		event = events[i];
++
++		ret = register_ftrace_event(event);
++		if (!ret) {
++			printk(KERN_WARNING "event %d failed to register\n",
++			       event->type);
++			WARN_ON_ONCE(1);
++		}
++	}
++
++	return 0;
++}
++device_initcall(init_events);
+Index: linux-2.6-tip/kernel/trace/trace_output.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_output.h
+@@ -0,0 +1,72 @@
++#ifndef __TRACE_EVENTS_H
++#define __TRACE_EVENTS_H
++
++#include "trace.h"
++
++typedef enum print_line_t (*trace_print_func)(struct trace_iterator *iter,
++					      int flags);
++
++struct trace_event {
++	struct hlist_node	node;
++	int			type;
++	trace_print_func	trace;
++	trace_print_func	raw;
++	trace_print_func	hex;
++	trace_print_func	binary;
++};
++
++extern enum print_line_t
++trace_print_bprintk_msg_only(struct trace_iterator *iter);
++extern enum print_line_t
++trace_print_printk_msg_only(struct trace_iterator *iter);
++
++extern void trace_print_seq(struct seq_file *m, struct trace_seq *s);
++
++extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
++	__attribute__ ((format (printf, 2, 3)));
++extern int
++trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
++extern int
++seq_print_ip_sym(struct trace_seq *s, unsigned long ip,
++		unsigned long sym_flags);
++extern ssize_t trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
++				 size_t cnt);
++extern int trace_seq_puts(struct trace_seq *s, const char *str);
++extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
++extern int trace_seq_putmem(struct trace_seq *s, void *mem, size_t len);
++extern int trace_seq_putmem_hex(struct trace_seq *s, void *mem, size_t len);
++extern void *trace_seq_reserve(struct trace_seq *s, size_t len);
++extern int trace_seq_path(struct trace_seq *s, struct path *path);
++extern int seq_print_userip_objs(const struct userstack_entry *entry,
++				 struct trace_seq *s, unsigned long sym_flags);
++extern int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
++			     unsigned long ip, unsigned long sym_flags);
++
++extern int trace_print_context(struct trace_iterator *iter);
++extern int trace_print_lat_context(struct trace_iterator *iter);
++
++extern struct trace_event *ftrace_find_event(int type);
++extern int register_ftrace_event(struct trace_event *event);
++extern int unregister_ftrace_event(struct trace_event *event);
++
++extern enum print_line_t trace_nop_print(struct trace_iterator *iter,
++					 int flags);
++
++#define MAX_MEMHEX_BYTES	8
++#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
++
++#define SEQ_PUT_FIELD_RET(s, x)				\
++do {							\
++	if (!trace_seq_putmem(s, &(x), sizeof(x)))	\
++		return TRACE_TYPE_PARTIAL_LINE;		\
++} while (0)
++
++#define SEQ_PUT_HEX_FIELD_RET(s, x)			\
++do {							\
++	BUILD_BUG_ON(sizeof(x) > MAX_MEMHEX_BYTES);	\
++	if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))	\
++		return TRACE_TYPE_PARTIAL_LINE;		\
++} while (0)
++
++#endif
++
+Index: linux-2.6-tip/kernel/trace/trace_power.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_power.c
++++ linux-2.6-tip/kernel/trace/trace_power.c
+@@ -11,15 +11,113 @@
+ 
+ #include <linux/init.h>
+ #include <linux/debugfs.h>
+-#include <linux/ftrace.h>
++#include <trace/power.h>
+ #include <linux/kallsyms.h>
+ #include <linux/module.h>
+ 
+ #include "trace.h"
++#include "trace_output.h"
+ 
+ static struct trace_array *power_trace;
+ static int __read_mostly trace_power_enabled;
+ 
++static void probe_power_start(struct power_trace *it, unsigned int type,
++				unsigned int level)
++{
++	if (!trace_power_enabled)
++		return;
++
++	memset(it, 0, sizeof(struct power_trace));
++	it->state = level;
++	it->type = type;
++	it->stamp = ktime_get();
++}
++
++
++static void probe_power_end(struct power_trace *it)
++{
++	struct ring_buffer_event *event;
++	struct trace_power *entry;
++	struct trace_array_cpu *data;
++	struct trace_array *tr = power_trace;
++
++	if (!trace_power_enabled)
++		return;
++
++	preempt_disable();
++	it->end = ktime_get();
++	data = tr->data[smp_processor_id()];
++
++	event = trace_buffer_lock_reserve(tr, TRACE_POWER,
++					  sizeof(*entry), 0, 0);
++	if (!event)
++		goto out;
++	entry	= ring_buffer_event_data(event);
++	entry->state_data = *it;
++	trace_buffer_unlock_commit(tr, event, 0, 0);
++ out:
++	preempt_enable();
++}
++
++static void probe_power_mark(struct power_trace *it, unsigned int type,
++				unsigned int level)
++{
++	struct ring_buffer_event *event;
++	struct trace_power *entry;
++	struct trace_array_cpu *data;
++	struct trace_array *tr = power_trace;
++
++	if (!trace_power_enabled)
++		return;
++
++	memset(it, 0, sizeof(struct power_trace));
++	it->state = level;
++	it->type = type;
++	it->stamp = ktime_get();
++	preempt_disable();
++	it->end = it->stamp;
++	data = tr->data[smp_processor_id()];
++
++	event = trace_buffer_lock_reserve(tr, TRACE_POWER,
++					  sizeof(*entry), 0, 0);
++	if (!event)
++		goto out;
++	entry	= ring_buffer_event_data(event);
++	entry->state_data = *it;
++	trace_buffer_unlock_commit(tr, event, 0, 0);
++ out:
++	preempt_enable();
++}
++
++static int tracing_power_register(void)
++{
++	int ret;
++
++	ret = register_trace_power_start(probe_power_start);
++	if (ret) {
++		pr_info("power trace: Couldn't activate tracepoint"
++			" probe to trace_power_start\n");
++		return ret;
++	}
++	ret = register_trace_power_end(probe_power_end);
++	if (ret) {
++		pr_info("power trace: Couldn't activate tracepoint"
++			" probe to trace_power_end\n");
++		goto fail_start;
++	}
++	ret = register_trace_power_mark(probe_power_mark);
++	if (ret) {
++		pr_info("power trace: Couldn't activate tracepoint"
++			" probe to trace_power_mark\n");
++		goto fail_end;
++	}
++	return ret;
++fail_end:
++	unregister_trace_power_end(probe_power_end);
++fail_start:
++	unregister_trace_power_start(probe_power_start);
++	return ret;
++}
+ 
+ static void start_power_trace(struct trace_array *tr)
+ {
+@@ -31,6 +129,14 @@ static void stop_power_trace(struct trac
+ 	trace_power_enabled = 0;
+ }
+ 
++static void power_trace_reset(struct trace_array *tr)
++{
++	trace_power_enabled = 0;
++	unregister_trace_power_start(probe_power_start);
++	unregister_trace_power_end(probe_power_end);
++	unregister_trace_power_mark(probe_power_mark);
++}
++
+ 
+ static int power_trace_init(struct trace_array *tr)
+ {
+@@ -38,6 +144,7 @@ static int power_trace_init(struct trace
+ 	power_trace = tr;
+ 
+ 	trace_power_enabled = 1;
++	tracing_power_register();
+ 
+ 	for_each_cpu(cpu, cpu_possible_mask)
+ 		tracing_reset(tr, cpu);
+@@ -85,7 +192,7 @@ static struct tracer power_tracer __read
+ 	.init		= power_trace_init,
+ 	.start		= start_power_trace,
+ 	.stop		= stop_power_trace,
+-	.reset		= stop_power_trace,
++	.reset		= power_trace_reset,
+ 	.print_line	= power_print_line,
+ };
+ 
+@@ -94,86 +201,3 @@ static int init_power_trace(void)
+ 	return register_tracer(&power_tracer);
+ }
+ device_initcall(init_power_trace);
+-
+-void trace_power_start(struct power_trace *it, unsigned int type,
+-			 unsigned int level)
+-{
+-	if (!trace_power_enabled)
+-		return;
+-
+-	memset(it, 0, sizeof(struct power_trace));
+-	it->state = level;
+-	it->type = type;
+-	it->stamp = ktime_get();
+-}
+-EXPORT_SYMBOL_GPL(trace_power_start);
+-
+-
+-void trace_power_end(struct power_trace *it)
+-{
+-	struct ring_buffer_event *event;
+-	struct trace_power *entry;
+-	struct trace_array_cpu *data;
+-	unsigned long irq_flags;
+-	struct trace_array *tr = power_trace;
+-
+-	if (!trace_power_enabled)
+-		return;
+-
+-	preempt_disable();
+-	it->end = ktime_get();
+-	data = tr->data[smp_processor_id()];
+-
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					 &irq_flags);
+-	if (!event)
+-		goto out;
+-	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, 0, 0);
+-	entry->ent.type = TRACE_POWER;
+-	entry->state_data = *it;
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+-
+-	trace_wake_up();
+-
+- out:
+-	preempt_enable();
+-}
+-EXPORT_SYMBOL_GPL(trace_power_end);
+-
+-void trace_power_mark(struct power_trace *it, unsigned int type,
+-			 unsigned int level)
+-{
+-	struct ring_buffer_event *event;
+-	struct trace_power *entry;
+-	struct trace_array_cpu *data;
+-	unsigned long irq_flags;
+-	struct trace_array *tr = power_trace;
+-
+-	if (!trace_power_enabled)
+-		return;
+-
+-	memset(it, 0, sizeof(struct power_trace));
+-	it->state = level;
+-	it->type = type;
+-	it->stamp = ktime_get();
+-	preempt_disable();
+-	it->end = it->stamp;
+-	data = tr->data[smp_processor_id()];
+-
+-	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+-					 &irq_flags);
+-	if (!event)
+-		goto out;
+-	entry	= ring_buffer_event_data(event);
+-	tracing_generic_entry_update(&entry->ent, 0, 0);
+-	entry->ent.type = TRACE_POWER;
+-	entry->state_data = *it;
+-	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+-
+-	trace_wake_up();
+-
+- out:
+-	preempt_enable();
+-}
+-EXPORT_SYMBOL_GPL(trace_power_mark);
+Index: linux-2.6-tip/kernel/trace/trace_printk.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_printk.c
+@@ -0,0 +1,270 @@
++/*
++ * trace binary printk
++ *
++ * Copyright (C) 2008 Lai Jiangshan <laijs@cn.fujitsu.com>
++ *
++ */
++#include <linux/seq_file.h>
++#include <linux/debugfs.h>
++#include <linux/uaccess.h>
++#include <linux/kernel.h>
++#include <linux/ftrace.h>
++#include <linux/string.h>
++#include <linux/module.h>
++#include <linux/marker.h>
++#include <linux/mutex.h>
++#include <linux/ctype.h>
++#include <linux/list.h>
++#include <linux/slab.h>
++#include <linux/fs.h>
++
++#include "trace.h"
++
++#ifdef CONFIG_MODULES
++
++/*
++ * modules trace_printk()'s formats are autosaved in struct trace_bprintk_fmt
++ * which are queued on trace_bprintk_fmt_list.
++ */
++static LIST_HEAD(trace_bprintk_fmt_list);
++
++/* serialize accesses to trace_bprintk_fmt_list */
++static DEFINE_MUTEX(btrace_mutex);
++
++struct trace_bprintk_fmt {
++	struct list_head list;
++	char fmt[0];
++};
++
++static inline struct trace_bprintk_fmt *lookup_format(const char *fmt)
++{
++	struct trace_bprintk_fmt *pos;
++	list_for_each_entry(pos, &trace_bprintk_fmt_list, list) {
++		if (!strcmp(pos->fmt, fmt))
++			return pos;
++	}
++	return NULL;
++}
++
++static
++void hold_module_trace_bprintk_format(const char **start, const char **end)
++{
++	const char **iter;
++
++	mutex_lock(&btrace_mutex);
++	for (iter = start; iter < end; iter++) {
++		struct trace_bprintk_fmt *tb_fmt = lookup_format(*iter);
++		if (tb_fmt) {
++			*iter = tb_fmt->fmt;
++			continue;
++		}
++
++		tb_fmt = kmalloc(offsetof(struct trace_bprintk_fmt, fmt)
++				+ strlen(*iter) + 1, GFP_KERNEL);
++		if (tb_fmt) {
++			list_add_tail(&tb_fmt->list, &trace_bprintk_fmt_list);
++			strcpy(tb_fmt->fmt, *iter);
++			*iter = tb_fmt->fmt;
++		} else
++			*iter = NULL;
++	}
++	mutex_unlock(&btrace_mutex);
++}
++
++static int module_trace_bprintk_format_notify(struct notifier_block *self,
++		unsigned long val, void *data)
++{
++	struct module *mod = data;
++	if (mod->num_trace_bprintk_fmt) {
++		const char **start = mod->trace_bprintk_fmt_start;
++		const char **end = start + mod->num_trace_bprintk_fmt;
++
++		if (val == MODULE_STATE_COMING)
++			hold_module_trace_bprintk_format(start, end);
++	}
++	return 0;
++}
++
++#else /* !CONFIG_MODULES */
++__init static int
++module_trace_bprintk_format_notify(struct notifier_block *self,
++		unsigned long val, void *data)
++{
++	return 0;
++}
++#endif /* CONFIG_MODULES */
++
++
++__initdata_or_module static
++struct notifier_block module_trace_bprintk_format_nb = {
++	.notifier_call = module_trace_bprintk_format_notify,
++};
++
++int __trace_bprintk(unsigned long ip, const char *fmt, ...)
++ {
++	int ret;
++	va_list ap;
++
++	if (unlikely(!fmt))
++		return 0;
++
++	if (!(trace_flags & TRACE_ITER_PRINTK))
++		return 0;
++
++	va_start(ap, fmt);
++	ret = trace_vbprintk(ip, fmt, ap);
++	va_end(ap);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(__trace_bprintk);
++
++int __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap)
++ {
++	if (unlikely(!fmt))
++		return 0;
++
++	if (!(trace_flags & TRACE_ITER_PRINTK))
++		return 0;
++
++	return trace_vbprintk(ip, fmt, ap);
++}
++EXPORT_SYMBOL_GPL(__ftrace_vbprintk);
++
++int __trace_printk(unsigned long ip, const char *fmt, ...)
++{
++	int ret;
++	va_list ap;
++
++	if (!(trace_flags & TRACE_ITER_PRINTK))
++		return 0;
++
++	va_start(ap, fmt);
++	ret = trace_vprintk(ip, fmt, ap);
++	va_end(ap);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(__trace_printk);
++
++int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
++{
++	if (!(trace_flags & TRACE_ITER_PRINTK))
++		return 0;
++
++	return trace_vprintk(ip, fmt, ap);
++}
++EXPORT_SYMBOL_GPL(__ftrace_vprintk);
++
++static void *
++t_next(struct seq_file *m, void *v, loff_t *pos)
++{
++	const char **fmt = m->private;
++	const char **next = fmt;
++
++	(*pos)++;
++
++	if ((unsigned long)fmt >= (unsigned long)__stop___trace_bprintk_fmt)
++		return NULL;
++
++	next = fmt;
++	m->private = ++next;
++
++	return fmt;
++}
++
++static void *t_start(struct seq_file *m, loff_t *pos)
++{
++	return t_next(m, NULL, pos);
++}
++
++static int t_show(struct seq_file *m, void *v)
++{
++	const char **fmt = v;
++	const char *str = *fmt;
++	int i;
++
++	seq_printf(m, "0x%lx : \"", (unsigned long)fmt);
++
++	/*
++	 * Tabs and new lines need to be converted.
++	 */
++	for (i = 0; str[i]; i++) {
++		switch (str[i]) {
++		case '\n':
++			seq_puts(m, "\\n");
++			break;
++		case '\t':
++			seq_puts(m, "\\t");
++			break;
++		case '\\':
++			seq_puts(m, "\\");
++			break;
++		case '"':
++			seq_puts(m, "\\\"");
++			break;
++		default:
++			seq_putc(m, str[i]);
++		}
++	}
++	seq_puts(m, "\"\n");
++
++	return 0;
++}
++
++static void t_stop(struct seq_file *m, void *p)
++{
++}
++
++static const struct seq_operations show_format_seq_ops = {
++	.start = t_start,
++	.next = t_next,
++	.show = t_show,
++	.stop = t_stop,
++};
++
++static int
++ftrace_formats_open(struct inode *inode, struct file *file)
++{
++	int ret;
++
++	ret = seq_open(file, &show_format_seq_ops);
++	if (!ret) {
++		struct seq_file *m = file->private_data;
++
++		m->private = __start___trace_bprintk_fmt;
++	}
++	return ret;
++}
++
++static const struct file_operations ftrace_formats_fops = {
++	.open = ftrace_formats_open,
++	.read = seq_read,
++	.llseek = seq_lseek,
++	.release = seq_release,
++};
++
++static __init int init_trace_printk_function_export(void)
++{
++	struct dentry *d_tracer;
++	struct dentry *entry;
++
++	d_tracer = tracing_init_dentry();
++	if (!d_tracer)
++		return 0;
++
++	entry = debugfs_create_file("printk_formats", 0444, d_tracer,
++				    NULL, &ftrace_formats_fops);
++	if (!entry)
++		pr_warning("Could not create debugfs "
++			   "'printk_formats' entry\n");
++
++	return 0;
++}
++
++fs_initcall(init_trace_printk_function_export);
++
++static __init int init_trace_printk(void)
++{
++	return register_module_notifier(&module_trace_bprintk_format_nb);
++}
++
++early_initcall(init_trace_printk);
+Index: linux-2.6-tip/kernel/trace/trace_sched_switch.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_sched_switch.c
++++ linux-2.6-tip/kernel/trace/trace_sched_switch.c
+@@ -18,6 +18,7 @@ static struct trace_array	*ctx_trace;
+ static int __read_mostly	tracer_enabled;
+ static int			sched_ref;
+ static DEFINE_MUTEX(sched_register_mutex);
++static int			sched_stopped;
+ 
+ static void
+ probe_sched_switch(struct rq *__rq, struct task_struct *prev,
+@@ -28,7 +29,7 @@ probe_sched_switch(struct rq *__rq, stru
+ 	int cpu;
+ 	int pc;
+ 
+-	if (!sched_ref)
++	if (!sched_ref || sched_stopped)
+ 		return;
+ 
+ 	tracing_record_cmdline(prev);
+@@ -43,7 +44,7 @@ probe_sched_switch(struct rq *__rq, stru
+ 	data = ctx_trace->data[cpu];
+ 
+ 	if (likely(!atomic_read(&data->disabled)))
+-		tracing_sched_switch_trace(ctx_trace, data, prev, next, flags, pc);
++		tracing_sched_switch_trace(ctx_trace, prev, next, flags, pc);
+ 
+ 	local_irq_restore(flags);
+ }
+@@ -66,7 +67,7 @@ probe_sched_wakeup(struct rq *__rq, stru
+ 	data = ctx_trace->data[cpu];
+ 
+ 	if (likely(!atomic_read(&data->disabled)))
+-		tracing_sched_wakeup_trace(ctx_trace, data, wakee, current,
++		tracing_sched_wakeup_trace(ctx_trace, wakee, current,
+ 					   flags, pc);
+ 
+ 	local_irq_restore(flags);
+@@ -93,7 +94,7 @@ static int tracing_sched_register(void)
+ 	ret = register_trace_sched_switch(probe_sched_switch);
+ 	if (ret) {
+ 		pr_info("sched trace: Couldn't activate tracepoint"
+-			" probe to kernel_sched_schedule\n");
++			" probe to kernel_sched_switch\n");
+ 		goto fail_deprobe_wake_new;
+ 	}
+ 
+@@ -185,12 +186,6 @@ void tracing_sched_switch_assign_trace(s
+ 	ctx_trace = tr;
+ }
+ 
+-static void start_sched_trace(struct trace_array *tr)
+-{
+-	tracing_reset_online_cpus(tr);
+-	tracing_start_sched_switch_record();
+-}
+-
+ static void stop_sched_trace(struct trace_array *tr)
+ {
+ 	tracing_stop_sched_switch_record();
+@@ -199,7 +194,8 @@ static void stop_sched_trace(struct trac
+ static int sched_switch_trace_init(struct trace_array *tr)
+ {
+ 	ctx_trace = tr;
+-	start_sched_trace(tr);
++	tracing_reset_online_cpus(tr);
++	tracing_start_sched_switch_record();
+ 	return 0;
+ }
+ 
+@@ -211,13 +207,12 @@ static void sched_switch_trace_reset(str
+ 
+ static void sched_switch_trace_start(struct trace_array *tr)
+ {
+-	tracing_reset_online_cpus(tr);
+-	tracing_start_sched_switch();
++	sched_stopped = 0;
+ }
+ 
+ static void sched_switch_trace_stop(struct trace_array *tr)
+ {
+-	tracing_stop_sched_switch();
++	sched_stopped = 1;
+ }
+ 
+ static struct tracer sched_switch_trace __read_mostly =
+@@ -227,6 +222,7 @@ static struct tracer sched_switch_trace 
+ 	.reset		= sched_switch_trace_reset,
+ 	.start		= sched_switch_trace_start,
+ 	.stop		= sched_switch_trace_stop,
++	.wait_pipe	= poll_wait_pipe,
+ #ifdef CONFIG_FTRACE_SELFTEST
+ 	.selftest    = trace_selftest_startup_sched_switch,
+ #endif
+Index: linux-2.6-tip/kernel/trace/trace_sched_wakeup.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_sched_wakeup.c
++++ linux-2.6-tip/kernel/trace/trace_sched_wakeup.c
+@@ -25,12 +25,14 @@ static int __read_mostly	tracer_enabled;
+ static struct task_struct	*wakeup_task;
+ static int			wakeup_cpu;
+ static unsigned			wakeup_prio = -1;
++static int			wakeup_rt;
+ 
+-static raw_spinlock_t wakeup_lock =
+-	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
++static __raw_spinlock_t wakeup_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ 
+ static void __wakeup_reset(struct trace_array *tr);
+ 
++static int save_lat_flag;
++
+ #ifdef CONFIG_FUNCTION_TRACER
+ /*
+  * irqsoff uses its own tracer function to keep the overhead down:
+@@ -71,7 +73,7 @@ wakeup_tracer_call(unsigned long ip, uns
+ 	if (task_cpu(wakeup_task) != cpu)
+ 		goto unlock;
+ 
+-	trace_function(tr, data, ip, parent_ip, flags, pc);
++	trace_function(tr, ip, parent_ip, flags, pc);
+ 
+  unlock:
+ 	__raw_spin_unlock(&wakeup_lock);
+@@ -151,7 +153,8 @@ probe_wakeup_sched_switch(struct rq *rq,
+ 	if (unlikely(!tracer_enabled || next != wakeup_task))
+ 		goto out_unlock;
+ 
+-	trace_function(wakeup_trace, data, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
++	trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
++	tracing_sched_switch_trace(wakeup_trace, prev, next, flags, pc);
+ 
+ 	/*
+ 	 * usecs conversion is slow so we try to delay the conversion
+@@ -182,13 +185,10 @@ out:
+ 
+ static void __wakeup_reset(struct trace_array *tr)
+ {
+-	struct trace_array_cpu *data;
+ 	int cpu;
+ 
+-	for_each_possible_cpu(cpu) {
+-		data = tr->data[cpu];
++	for_each_possible_cpu(cpu)
+ 		tracing_reset(tr, cpu);
+-	}
+ 
+ 	wakeup_cpu = -1;
+ 	wakeup_prio = -1;
+@@ -213,6 +213,7 @@ static void wakeup_reset(struct trace_ar
+ static void
+ probe_wakeup(struct rq *rq, struct task_struct *p, int success)
+ {
++	struct trace_array_cpu *data;
+ 	int cpu = smp_processor_id();
+ 	unsigned long flags;
+ 	long disabled;
+@@ -224,7 +225,7 @@ probe_wakeup(struct rq *rq, struct task_
+ 	tracing_record_cmdline(p);
+ 	tracing_record_cmdline(current);
+ 
+-	if (likely(!rt_task(p)) ||
++	if ((wakeup_rt && !rt_task(p)) ||
+ 			p->prio >= wakeup_prio ||
+ 			p->prio >= current->prio)
+ 		return;
+@@ -252,9 +253,10 @@ probe_wakeup(struct rq *rq, struct task_
+ 
+ 	local_save_flags(flags);
+ 
+-	wakeup_trace->data[wakeup_cpu]->preempt_timestamp = ftrace_now(cpu);
+-	trace_function(wakeup_trace, wakeup_trace->data[wakeup_cpu],
+-		       CALLER_ADDR1, CALLER_ADDR2, flags, pc);
++	data = wakeup_trace->data[wakeup_cpu];
++	data->preempt_timestamp = ftrace_now(cpu);
++	tracing_sched_wakeup_trace(wakeup_trace, p, current, flags, pc);
++	trace_function(wakeup_trace, CALLER_ADDR1, CALLER_ADDR2, flags, pc);
+ 
+ out_locked:
+ 	__raw_spin_unlock(&wakeup_lock);
+@@ -262,12 +264,6 @@ out:
+ 	atomic_dec(&wakeup_trace->data[cpu]->disabled);
+ }
+ 
+-/*
+- * save_tracer_enabled is used to save the state of the tracer_enabled
+- * variable when we disable it when we open a trace output file.
+- */
+-static int save_tracer_enabled;
+-
+ static void start_wakeup_tracer(struct trace_array *tr)
+ {
+ 	int ret;
+@@ -289,7 +285,7 @@ static void start_wakeup_tracer(struct t
+ 	ret = register_trace_sched_switch(probe_wakeup_sched_switch);
+ 	if (ret) {
+ 		pr_info("sched trace: Couldn't activate tracepoint"
+-			" probe to kernel_sched_schedule\n");
++			" probe to kernel_sched_switch\n");
+ 		goto fail_deprobe_wake_new;
+ 	}
+ 
+@@ -306,13 +302,10 @@ static void start_wakeup_tracer(struct t
+ 
+ 	register_ftrace_function(&trace_ops);
+ 
+-	if (tracing_is_enabled()) {
++	if (tracing_is_enabled())
+ 		tracer_enabled = 1;
+-		save_tracer_enabled = 1;
+-	} else {
++	else
+ 		tracer_enabled = 0;
+-		save_tracer_enabled = 0;
+-	}
+ 
+ 	return;
+ fail_deprobe_wake_new:
+@@ -324,54 +317,54 @@ fail_deprobe:
+ static void stop_wakeup_tracer(struct trace_array *tr)
+ {
+ 	tracer_enabled = 0;
+-	save_tracer_enabled = 0;
+ 	unregister_ftrace_function(&trace_ops);
+ 	unregister_trace_sched_switch(probe_wakeup_sched_switch);
+ 	unregister_trace_sched_wakeup_new(probe_wakeup);
+ 	unregister_trace_sched_wakeup(probe_wakeup);
+ }
+ 
+-static int wakeup_tracer_init(struct trace_array *tr)
++static int __wakeup_tracer_init(struct trace_array *tr)
+ {
++	save_lat_flag = trace_flags & TRACE_ITER_LATENCY_FMT;
++	trace_flags |= TRACE_ITER_LATENCY_FMT;
++
+ 	tracing_max_latency = 0;
+ 	wakeup_trace = tr;
+ 	start_wakeup_tracer(tr);
+ 	return 0;
+ }
+ 
++static int wakeup_tracer_init(struct trace_array *tr)
++{
++	wakeup_rt = 0;
++	return __wakeup_tracer_init(tr);
++}
++
++static int wakeup_rt_tracer_init(struct trace_array *tr)
++{
++	wakeup_rt = 1;
++	return __wakeup_tracer_init(tr);
++}
++
+ static void wakeup_tracer_reset(struct trace_array *tr)
+ {
+ 	stop_wakeup_tracer(tr);
+ 	/* make sure we put back any tasks we are tracing */
+ 	wakeup_reset(tr);
++
++	if (!save_lat_flag)
++		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
+ }
+ 
+ static void wakeup_tracer_start(struct trace_array *tr)
+ {
+ 	wakeup_reset(tr);
+ 	tracer_enabled = 1;
+-	save_tracer_enabled = 1;
+ }
+ 
+ static void wakeup_tracer_stop(struct trace_array *tr)
+ {
+ 	tracer_enabled = 0;
+-	save_tracer_enabled = 0;
+-}
+-
+-static void wakeup_tracer_open(struct trace_iterator *iter)
+-{
+-	/* stop the trace while dumping */
+-	tracer_enabled = 0;
+-}
+-
+-static void wakeup_tracer_close(struct trace_iterator *iter)
+-{
+-	/* forget about any processes we were recording */
+-	if (save_tracer_enabled) {
+-		wakeup_reset(iter->tr);
+-		tracer_enabled = 1;
+-	}
+ }
+ 
+ static struct tracer wakeup_tracer __read_mostly =
+@@ -381,8 +374,20 @@ static struct tracer wakeup_tracer __rea
+ 	.reset		= wakeup_tracer_reset,
+ 	.start		= wakeup_tracer_start,
+ 	.stop		= wakeup_tracer_stop,
+-	.open		= wakeup_tracer_open,
+-	.close		= wakeup_tracer_close,
++	.print_max	= 1,
++#ifdef CONFIG_FTRACE_SELFTEST
++	.selftest    = trace_selftest_startup_wakeup,
++#endif
++};
++
++static struct tracer wakeup_rt_tracer __read_mostly =
++{
++	.name		= "wakeup_rt",
++	.init		= wakeup_rt_tracer_init,
++	.reset		= wakeup_tracer_reset,
++	.start		= wakeup_tracer_start,
++	.stop		= wakeup_tracer_stop,
++	.wait_pipe	= poll_wait_pipe,
+ 	.print_max	= 1,
+ #ifdef CONFIG_FTRACE_SELFTEST
+ 	.selftest    = trace_selftest_startup_wakeup,
+@@ -397,6 +402,10 @@ __init static int init_wakeup_tracer(voi
+ 	if (ret)
+ 		return ret;
+ 
++	ret = register_tracer(&wakeup_rt_tracer);
++	if (ret)
++		return ret;
++
+ 	return 0;
+ }
+ device_initcall(init_wakeup_tracer);
+Index: linux-2.6-tip/kernel/trace/trace_selftest.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_selftest.c
++++ linux-2.6-tip/kernel/trace/trace_selftest.c
+@@ -1,5 +1,6 @@
+ /* Include in trace.c */
+ 
++#include <linux/stringify.h>
+ #include <linux/kthread.h>
+ #include <linux/delay.h>
+ 
+@@ -9,11 +10,12 @@ static inline int trace_valid_entry(stru
+ 	case TRACE_FN:
+ 	case TRACE_CTX:
+ 	case TRACE_WAKE:
+-	case TRACE_CONT:
+ 	case TRACE_STACK:
+ 	case TRACE_PRINT:
+ 	case TRACE_SPECIAL:
+ 	case TRACE_BRANCH:
++	case TRACE_GRAPH_ENT:
++	case TRACE_GRAPH_RET:
+ 		return 1;
+ 	}
+ 	return 0;
+@@ -99,9 +101,6 @@ static inline void warn_failed_init_trac
+ 
+ #ifdef CONFIG_DYNAMIC_FTRACE
+ 
+-#define __STR(x) #x
+-#define STR(x) __STR(x)
+-
+ /* Test dynamic code modification and ftrace filters */
+ int trace_selftest_startup_dynamic_tracing(struct tracer *trace,
+ 					   struct trace_array *tr,
+@@ -125,17 +124,17 @@ int trace_selftest_startup_dynamic_traci
+ 	func();
+ 
+ 	/*
+-	 * Some archs *cough*PowerPC*cough* add charachters to the
++	 * Some archs *cough*PowerPC*cough* add characters to the
+ 	 * start of the function names. We simply put a '*' to
+-	 * accomodate them.
++	 * accommodate them.
+ 	 */
+-	func_name = "*" STR(DYN_FTRACE_TEST_NAME);
++	func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
+ 
+ 	/* filter only on our function */
+ 	ftrace_set_filter(func_name, strlen(func_name), 1);
+ 
+ 	/* enable tracing */
+-	ret = trace->init(tr);
++	ret = tracer_init(trace, tr);
+ 	if (ret) {
+ 		warn_failed_init_tracer(trace, ret);
+ 		goto out;
+@@ -209,7 +208,7 @@ trace_selftest_startup_function(struct t
+ 	ftrace_enabled = 1;
+ 	tracer_enabled = 1;
+ 
+-	ret = trace->init(tr);
++	ret = tracer_init(trace, tr);
+ 	if (ret) {
+ 		warn_failed_init_tracer(trace, ret);
+ 		goto out;
+@@ -247,6 +246,90 @@ trace_selftest_startup_function(struct t
+ }
+ #endif /* CONFIG_FUNCTION_TRACER */
+ 
++
++#ifdef CONFIG_FUNCTION_GRAPH_TRACER
++
++/* Maximum number of functions to trace before diagnosing a hang */
++#define GRAPH_MAX_FUNC_TEST	100000000
++
++static void __ftrace_dump(bool disable_tracing);
++static unsigned int graph_hang_thresh;
++
++/* Wrap the real function entry probe to avoid possible hanging */
++static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
++{
++	/* This is harmlessly racy, we want to approximately detect a hang */
++	if (unlikely(++graph_hang_thresh > GRAPH_MAX_FUNC_TEST)) {
++		ftrace_graph_stop();
++		printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
++		if (ftrace_dump_on_oops)
++			__ftrace_dump(false);
++		return 0;
++	}
++
++	return trace_graph_entry(trace);
++}
++
++/*
++ * Pretty much the same than for the function tracer from which the selftest
++ * has been borrowed.
++ */
++int
++trace_selftest_startup_function_graph(struct tracer *trace,
++					struct trace_array *tr)
++{
++	int ret;
++	unsigned long count;
++
++	/*
++	 * Simulate the init() callback but we attach a watchdog callback
++	 * to detect and recover from possible hangs
++	 */
++	tracing_reset_online_cpus(tr);
++	ret = register_ftrace_graph(&trace_graph_return,
++				    &trace_graph_entry_watchdog);
++	if (ret) {
++		warn_failed_init_tracer(trace, ret);
++		goto out;
++	}
++	tracing_start_cmdline_record();
++
++	/* Sleep for a 1/10 of a second */
++	msleep(100);
++
++	/* Have we just recovered from a hang? */
++	if (graph_hang_thresh > GRAPH_MAX_FUNC_TEST) {
++		tracing_selftest_disabled = true;
++		ret = -1;
++		goto out;
++	}
++
++	tracing_stop();
++
++	/* check the trace buffer */
++	ret = trace_test_buffer(tr, &count);
++
++	trace->reset(tr);
++	tracing_start();
++
++	if (!ret && !count) {
++		printk(KERN_CONT ".. no entries found ..");
++		ret = -1;
++		goto out;
++	}
++
++	/* Don't test dynamic tracing, the function tracer already did */
++
++out:
++	/* Stop it if we failed */
++	if (ret)
++		ftrace_graph_stop();
++
++	return ret;
++}
++#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
++
++
+ #ifdef CONFIG_IRQSOFF_TRACER
+ int
+ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr)
+@@ -256,7 +339,7 @@ trace_selftest_startup_irqsoff(struct tr
+ 	int ret;
+ 
+ 	/* start the tracing */
+-	ret = trace->init(tr);
++	ret = tracer_init(trace, tr);
+ 	if (ret) {
+ 		warn_failed_init_tracer(trace, ret);
+ 		return ret;
+@@ -268,6 +351,14 @@ trace_selftest_startup_irqsoff(struct tr
+ 	local_irq_disable();
+ 	udelay(100);
+ 	local_irq_enable();
++
++	/*
++	 * Stop the tracer to avoid a warning subsequent
++	 * to buffer flipping failure because tracing_stop()
++	 * disables the tr and max buffers, making flipping impossible
++	 * in case of parallels max irqs off latencies.
++	 */
++	trace->stop(tr);
+ 	/* stop the tracing. */
+ 	tracing_stop();
+ 	/* check both trace buffers */
+@@ -310,7 +401,7 @@ trace_selftest_startup_preemptoff(struct
+ 	}
+ 
+ 	/* start the tracing */
+-	ret = trace->init(tr);
++	ret = tracer_init(trace, tr);
+ 	if (ret) {
+ 		warn_failed_init_tracer(trace, ret);
+ 		return ret;
+@@ -322,6 +413,14 @@ trace_selftest_startup_preemptoff(struct
+ 	preempt_disable();
+ 	udelay(100);
+ 	preempt_enable();
++
++	/*
++	 * Stop the tracer to avoid a warning subsequent
++	 * to buffer flipping failure because tracing_stop()
++	 * disables the tr and max buffers, making flipping impossible
++	 * in case of parallels max preempt off latencies.
++	 */
++	trace->stop(tr);
+ 	/* stop the tracing. */
+ 	tracing_stop();
+ 	/* check both trace buffers */
+@@ -364,10 +463,10 @@ trace_selftest_startup_preemptirqsoff(st
+ 	}
+ 
+ 	/* start the tracing */
+-	ret = trace->init(tr);
++	ret = tracer_init(trace, tr);
+ 	if (ret) {
+ 		warn_failed_init_tracer(trace, ret);
+-		goto out;
++		goto out_no_start;
+ 	}
+ 
+ 	/* reset the max latency */
+@@ -381,31 +480,35 @@ trace_selftest_startup_preemptirqsoff(st
+ 	/* reverse the order of preempt vs irqs */
+ 	local_irq_enable();
+ 
++	/*
++	 * Stop the tracer to avoid a warning subsequent
++	 * to buffer flipping failure because tracing_stop()
++	 * disables the tr and max buffers, making flipping impossible
++	 * in case of parallels max irqs/preempt off latencies.
++	 */
++	trace->stop(tr);
+ 	/* stop the tracing. */
+ 	tracing_stop();
+ 	/* check both trace buffers */
+ 	ret = trace_test_buffer(tr, NULL);
+-	if (ret) {
+-		tracing_start();
++	if (ret)
+ 		goto out;
+-	}
+ 
+ 	ret = trace_test_buffer(&max_tr, &count);
+-	if (ret) {
+-		tracing_start();
++	if (ret)
+ 		goto out;
+-	}
+ 
+ 	if (!ret && !count) {
+ 		printk(KERN_CONT ".. no entries found ..");
+ 		ret = -1;
+-		tracing_start();
+ 		goto out;
+ 	}
+ 
+ 	/* do the test by disabling interrupts first this time */
+ 	tracing_max_latency = 0;
+ 	tracing_start();
++	trace->start(tr);
++
+ 	preempt_disable();
+ 	local_irq_disable();
+ 	udelay(100);
+@@ -413,6 +516,7 @@ trace_selftest_startup_preemptirqsoff(st
+ 	/* reverse the order of preempt vs irqs */
+ 	local_irq_enable();
+ 
++	trace->stop(tr);
+ 	/* stop the tracing. */
+ 	tracing_stop();
+ 	/* check both trace buffers */
+@@ -428,9 +532,10 @@ trace_selftest_startup_preemptirqsoff(st
+ 		goto out;
+ 	}
+ 
+- out:
+-	trace->reset(tr);
++out:
+ 	tracing_start();
++out_no_start:
++	trace->reset(tr);
+ 	tracing_max_latency = save_max;
+ 
+ 	return ret;
+@@ -496,7 +601,7 @@ trace_selftest_startup_wakeup(struct tra
+ 	wait_for_completion(&isrt);
+ 
+ 	/* start the tracing */
+-	ret = trace->init(tr);
++	ret = tracer_init(trace, tr);
+ 	if (ret) {
+ 		warn_failed_init_tracer(trace, ret);
+ 		return ret;
+@@ -557,7 +662,7 @@ trace_selftest_startup_sched_switch(stru
+ 	int ret;
+ 
+ 	/* start the tracing */
+-	ret = trace->init(tr);
++	ret = tracer_init(trace, tr);
+ 	if (ret) {
+ 		warn_failed_init_tracer(trace, ret);
+ 		return ret;
+@@ -589,10 +694,10 @@ trace_selftest_startup_sysprof(struct tr
+ 	int ret;
+ 
+ 	/* start the tracing */
+-	ret = trace->init(tr);
++	ret = tracer_init(trace, tr);
+ 	if (ret) {
+ 		warn_failed_init_tracer(trace, ret);
+-		return 0;
++		return ret;
+ 	}
+ 
+ 	/* Sleep for a 1/10 of a second */
+@@ -604,6 +709,11 @@ trace_selftest_startup_sysprof(struct tr
+ 	trace->reset(tr);
+ 	tracing_start();
+ 
++	if (!ret && !count) {
++		printk(KERN_CONT ".. no entries found ..");
++		ret = -1;
++	}
++
+ 	return ret;
+ }
+ #endif /* CONFIG_SYSPROF_TRACER */
+@@ -616,7 +726,7 @@ trace_selftest_startup_branch(struct tra
+ 	int ret;
+ 
+ 	/* start the tracing */
+-	ret = trace->init(tr);
++	ret = tracer_init(trace, tr);
+ 	if (ret) {
+ 		warn_failed_init_tracer(trace, ret);
+ 		return ret;
+@@ -631,6 +741,11 @@ trace_selftest_startup_branch(struct tra
+ 	trace->reset(tr);
+ 	tracing_start();
+ 
++	if (!ret && !count) {
++		printk(KERN_CONT ".. no entries found ..");
++		ret = -1;
++	}
++
+ 	return ret;
+ }
+ #endif /* CONFIG_BRANCH_TRACER */
+Index: linux-2.6-tip/kernel/trace/trace_stack.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_stack.c
++++ linux-2.6-tip/kernel/trace/trace_stack.c
+@@ -27,8 +27,7 @@ static struct stack_trace max_stack_trac
+ };
+ 
+ static unsigned long max_stack_size;
+-static raw_spinlock_t max_stack_lock =
+-	(raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
++static __raw_spinlock_t max_stack_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ 
+ static int stack_trace_disabled __read_mostly;
+ static DEFINE_PER_CPU(int, trace_active);
+@@ -245,16 +244,31 @@ static int trace_lookup_stack(struct seq
+ #endif
+ }
+ 
++static void print_disabled(struct seq_file *m)
++{
++	seq_puts(m, "#\n"
++		 "#  Stack tracer disabled\n"
++		 "#\n"
++		 "# To enable the stack tracer, either add 'stacktrace' to the\n"
++		 "# kernel command line\n"
++		 "# or 'echo 1 > /proc/sys/kernel/stack_tracer_enabled'\n"
++		 "#\n");
++}
++
+ static int t_show(struct seq_file *m, void *v)
+ {
+ 	long i;
+ 	int size;
+ 
+ 	if (v == SEQ_START_TOKEN) {
+-		seq_printf(m, "        Depth   Size      Location"
++		seq_printf(m, "        Depth    Size   Location"
+ 			   "    (%d entries)\n"
+-			   "        -----   ----      --------\n",
++			   "        -----    ----   --------\n",
+ 			   max_stack_trace.nr_entries);
++
++		if (!stack_tracer_enabled && !max_stack_size)
++			print_disabled(m);
++
+ 		return 0;
+ 	}
+ 
+Index: linux-2.6-tip/kernel/trace/trace_stat.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_stat.c
+@@ -0,0 +1,326 @@
++/*
++ * Infrastructure for statistic tracing (histogram output).
++ *
++ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
++ *
++ * Based on the code from trace_branch.c which is
++ * Copyright (C) 2008 Steven Rostedt <srostedt@redhat.com>
++ *
++ */
++
++
++#include <linux/list.h>
++#include <linux/debugfs.h>
++#include "trace_stat.h"
++#include "trace.h"
++
++
++/* List of stat entries from a tracer */
++struct trace_stat_list {
++	struct list_head	list;
++	void			*stat;
++};
++
++/* A stat session is the stats output in one file */
++struct tracer_stat_session {
++	struct list_head	session_list;
++	struct tracer_stat	*ts;
++	struct list_head	stat_list;
++	struct mutex		stat_mutex;
++	struct dentry		*file;
++};
++
++/* All of the sessions currently in use. Each stat file embed one session */
++static LIST_HEAD(all_stat_sessions);
++static DEFINE_MUTEX(all_stat_sessions_mutex);
++
++/* The root directory for all stat files */
++static struct dentry		*stat_dir;
++
++
++static void reset_stat_session(struct tracer_stat_session *session)
++{
++	struct trace_stat_list *node, *next;
++
++	list_for_each_entry_safe(node, next, &session->stat_list, list)
++		kfree(node);
++
++	INIT_LIST_HEAD(&session->stat_list);
++}
++
++static void destroy_session(struct tracer_stat_session *session)
++{
++	debugfs_remove(session->file);
++	reset_stat_session(session);
++	mutex_destroy(&session->stat_mutex);
++	kfree(session);
++}
++
++/*
++ * For tracers that don't provide a stat_cmp callback.
++ * This one will force an immediate insertion on tail of
++ * the list.
++ */
++static int dummy_cmp(void *p1, void *p2)
++{
++	return 1;
++}
++
++/*
++ * Initialize the stat list at each trace_stat file opening.
++ * All of these copies and sorting are required on all opening
++ * since the stats could have changed between two file sessions.
++ */
++static int stat_seq_init(struct tracer_stat_session *session)
++{
++	struct trace_stat_list *iter_entry, *new_entry;
++	struct tracer_stat *ts = session->ts;
++	void *stat;
++	int ret = 0;
++	int i;
++
++	mutex_lock(&session->stat_mutex);
++	reset_stat_session(session);
++
++	if (!ts->stat_cmp)
++		ts->stat_cmp = dummy_cmp;
++
++	stat = ts->stat_start(ts);
++	if (!stat)
++		goto exit;
++
++	/*
++	 * The first entry. Actually this is the second, but the first
++	 * one (the stat_list head) is pointless.
++	 */
++	new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
++	if (!new_entry) {
++		ret = -ENOMEM;
++		goto exit;
++	}
++
++	INIT_LIST_HEAD(&new_entry->list);
++
++	list_add(&new_entry->list, &session->stat_list);
++
++	new_entry->stat = stat;
++
++	/*
++	 * Iterate over the tracer stat entries and store them in a sorted
++	 * list.
++	 */
++	for (i = 1; ; i++) {
++		stat = ts->stat_next(stat, i);
++
++		/* End of insertion */
++		if (!stat)
++			break;
++
++		new_entry = kmalloc(sizeof(struct trace_stat_list), GFP_KERNEL);
++		if (!new_entry) {
++			ret = -ENOMEM;
++			goto exit_free_list;
++		}
++
++		INIT_LIST_HEAD(&new_entry->list);
++		new_entry->stat = stat;
++
++		list_for_each_entry_reverse(iter_entry, &session->stat_list,
++				list) {
++
++			/* Insertion with a descendent sorting */
++			if (ts->stat_cmp(iter_entry->stat,
++					new_entry->stat) >= 0) {
++
++				list_add(&new_entry->list, &iter_entry->list);
++				break;
++			}
++		}
++
++		/* The current larger value */
++		if (list_empty(&new_entry->list))
++			list_add(&new_entry->list, &session->stat_list);
++	}
++exit:
++	mutex_unlock(&session->stat_mutex);
++	return ret;
++
++exit_free_list:
++	reset_stat_session(session);
++	mutex_unlock(&session->stat_mutex);
++	return ret;
++}
++
++
++static void *stat_seq_start(struct seq_file *s, loff_t *pos)
++{
++	struct tracer_stat_session *session = s->private;
++
++	/* Prevent from tracer switch or stat_list modification */
++	mutex_lock(&session->stat_mutex);
++
++	/* If we are in the beginning of the file, print the headers */
++	if (!*pos && session->ts->stat_headers)
++		return SEQ_START_TOKEN;
++
++	return seq_list_start(&session->stat_list, *pos);
++}
++
++static void *stat_seq_next(struct seq_file *s, void *p, loff_t *pos)
++{
++	struct tracer_stat_session *session = s->private;
++
++	if (p == SEQ_START_TOKEN)
++		return seq_list_start(&session->stat_list, *pos);
++
++	return seq_list_next(p, &session->stat_list, pos);
++}
++
++static void stat_seq_stop(struct seq_file *s, void *p)
++{
++	struct tracer_stat_session *session = s->private;
++	mutex_unlock(&session->stat_mutex);
++}
++
++static int stat_seq_show(struct seq_file *s, void *v)
++{
++	struct tracer_stat_session *session = s->private;
++	struct trace_stat_list *l = list_entry(v, struct trace_stat_list, list);
++
++	if (v == SEQ_START_TOKEN)
++		return session->ts->stat_headers(s);
++
++	return session->ts->stat_show(s, l->stat);
++}
++
++static const struct seq_operations trace_stat_seq_ops = {
++	.start		= stat_seq_start,
++	.next		= stat_seq_next,
++	.stop		= stat_seq_stop,
++	.show		= stat_seq_show
++};
++
++/* The session stat is refilled and resorted at each stat file opening */
++static int tracing_stat_open(struct inode *inode, struct file *file)
++{
++	int ret;
++
++	struct tracer_stat_session *session = inode->i_private;
++
++	ret = seq_open(file, &trace_stat_seq_ops);
++	if (!ret) {
++		struct seq_file *m = file->private_data;
++		m->private = session;
++		ret = stat_seq_init(session);
++	}
++
++	return ret;
++}
++
++/*
++ * Avoid consuming memory with our now useless list.
++ */
++static int tracing_stat_release(struct inode *i, struct file *f)
++{
++	struct tracer_stat_session *session = i->i_private;
++
++	mutex_lock(&session->stat_mutex);
++	reset_stat_session(session);
++	mutex_unlock(&session->stat_mutex);
++
++	return 0;
++}
++
++static const struct file_operations tracing_stat_fops = {
++	.open		= tracing_stat_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= tracing_stat_release
++};
++
++static int tracing_stat_init(void)
++{
++	struct dentry *d_tracing;
++
++	d_tracing = tracing_init_dentry();
++
++	stat_dir = debugfs_create_dir("trace_stat", d_tracing);
++	if (!stat_dir)
++		pr_warning("Could not create debugfs "
++			   "'trace_stat' entry\n");
++	return 0;
++}
++
++static int init_stat_file(struct tracer_stat_session *session)
++{
++	if (!stat_dir && tracing_stat_init())
++		return -ENODEV;
++
++	session->file = debugfs_create_file(session->ts->name, 0644,
++					    stat_dir,
++					    session, &tracing_stat_fops);
++	if (!session->file)
++		return -ENOMEM;
++	return 0;
++}
++
++int register_stat_tracer(struct tracer_stat *trace)
++{
++	struct tracer_stat_session *session, *node, *tmp;
++	int ret;
++
++	if (!trace)
++		return -EINVAL;
++
++	if (!trace->stat_start || !trace->stat_next || !trace->stat_show)
++		return -EINVAL;
++
++	/* Already registered? */
++	mutex_lock(&all_stat_sessions_mutex);
++	list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
++		if (node->ts == trace) {
++			mutex_unlock(&all_stat_sessions_mutex);
++			return -EINVAL;
++		}
++	}
++	mutex_unlock(&all_stat_sessions_mutex);
++
++	/* Init the session */
++	session = kmalloc(sizeof(struct tracer_stat_session), GFP_KERNEL);
++	if (!session)
++		return -ENOMEM;
++
++	session->ts = trace;
++	INIT_LIST_HEAD(&session->session_list);
++	INIT_LIST_HEAD(&session->stat_list);
++	mutex_init(&session->stat_mutex);
++	session->file = NULL;
++
++	ret = init_stat_file(session);
++	if (ret) {
++		destroy_session(session);
++		return ret;
++	}
++
++	/* Register */
++	mutex_lock(&all_stat_sessions_mutex);
++	list_add_tail(&session->session_list, &all_stat_sessions);
++	mutex_unlock(&all_stat_sessions_mutex);
++
++	return 0;
++}
++
++void unregister_stat_tracer(struct tracer_stat *trace)
++{
++	struct tracer_stat_session *node, *tmp;
++
++	mutex_lock(&all_stat_sessions_mutex);
++	list_for_each_entry_safe(node, tmp, &all_stat_sessions, session_list) {
++		if (node->ts == trace) {
++			list_del(&node->session_list);
++			destroy_session(node);
++			break;
++		}
++	}
++	mutex_unlock(&all_stat_sessions_mutex);
++}
+Index: linux-2.6-tip/kernel/trace/trace_stat.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_stat.h
+@@ -0,0 +1,31 @@
++#ifndef __TRACE_STAT_H
++#define __TRACE_STAT_H
++
++#include <linux/seq_file.h>
++
++/*
++ * If you want to provide a stat file (one-shot statistics), fill
++ * an iterator with stat_start/stat_next and a stat_show callbacks.
++ * The others callbacks are optional.
++ */
++struct tracer_stat {
++	/* The name of your stat file */
++	const char		*name;
++	/* Iteration over statistic entries */
++	void			*(*stat_start)(struct tracer_stat *trace);
++	void			*(*stat_next)(void *prev, int idx);
++	/* Compare two entries for stats sorting */
++	int			(*stat_cmp)(void *p1, void *p2);
++	/* Print a stat entry */
++	int			(*stat_show)(struct seq_file *s, void *p);
++	/* Print the headers of your stat entries */
++	int			(*stat_headers)(struct seq_file *s);
++};
++
++/*
++ * Destroy or create a stat file
++ */
++extern int register_stat_tracer(struct tracer_stat *trace);
++extern void unregister_stat_tracer(struct tracer_stat *trace);
++
++#endif /* __TRACE_STAT_H */
+Index: linux-2.6-tip/kernel/trace/trace_syscalls.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_syscalls.c
+@@ -0,0 +1,250 @@
++#include <linux/kernel.h>
++#include <linux/ftrace.h>
++#include <asm/syscall.h>
++
++#include "trace_output.h"
++#include "trace.h"
++
++/* Keep a counter of the syscall tracing users */
++static int refcount;
++
++/* Prevent from races on thread flags toggling */
++static DEFINE_MUTEX(syscall_trace_lock);
++
++/* Option to display the parameters types */
++enum {
++	TRACE_SYSCALLS_OPT_TYPES = 0x1,
++};
++
++static struct tracer_opt syscalls_opts[] = {
++	{ TRACER_OPT(syscall_arg_type, TRACE_SYSCALLS_OPT_TYPES) },
++	{ }
++};
++
++static struct tracer_flags syscalls_flags = {
++	.val = 0, /* By default: no parameters types */
++	.opts = syscalls_opts
++};
++
++enum print_line_t
++print_syscall_enter(struct trace_iterator *iter, int flags)
++{
++	struct trace_seq *s = &iter->seq;
++	struct trace_entry *ent = iter->ent;
++	struct syscall_trace_enter *trace;
++	struct syscall_metadata *entry;
++	int i, ret, syscall;
++
++	trace_assign_type(trace, ent);
++
++	syscall = trace->nr;
++
++	entry = syscall_nr_to_meta(syscall);
++	if (!entry)
++		goto end;
++
++	ret = trace_seq_printf(s, "%s(", entry->name);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	for (i = 0; i < entry->nb_args; i++) {
++		/* parameter types */
++		if (syscalls_flags.val & TRACE_SYSCALLS_OPT_TYPES) {
++			ret = trace_seq_printf(s, "%s ", entry->types[i]);
++			if (!ret)
++				return TRACE_TYPE_PARTIAL_LINE;
++		}
++		/* parameter values */
++		ret = trace_seq_printf(s, "%s: %lx%s ", entry->args[i],
++				       trace->args[i],
++				       i == entry->nb_args - 1 ? ")" : ",");
++		if (!ret)
++			return TRACE_TYPE_PARTIAL_LINE;
++	}
++
++end:
++	trace_seq_printf(s, "\n");
++	return TRACE_TYPE_HANDLED;
++}
++
++enum print_line_t
++print_syscall_exit(struct trace_iterator *iter, int flags)
++{
++	struct trace_seq *s = &iter->seq;
++	struct trace_entry *ent = iter->ent;
++	struct syscall_trace_exit *trace;
++	int syscall;
++	struct syscall_metadata *entry;
++	int ret;
++
++	trace_assign_type(trace, ent);
++
++	syscall = trace->nr;
++
++	entry = syscall_nr_to_meta(syscall);
++	if (!entry) {
++		trace_seq_printf(s, "\n");
++		return TRACE_TYPE_HANDLED;
++	}
++
++	ret = trace_seq_printf(s, "%s -> 0x%lx\n", entry->name,
++				trace->ret);
++	if (!ret)
++		return TRACE_TYPE_PARTIAL_LINE;
++
++	return TRACE_TYPE_HANDLED;
++}
++
++void start_ftrace_syscalls(void)
++{
++	unsigned long flags;
++	struct task_struct *g, *t;
++
++	mutex_lock(&syscall_trace_lock);
++
++	/* Don't enable the flag on the tasks twice */
++	if (++refcount != 1)
++		goto unlock;
++
++	arch_init_ftrace_syscalls();
++	read_lock_irqsave(&tasklist_lock, flags);
++
++	do_each_thread(g, t) {
++		set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
++	} while_each_thread(g, t);
++
++	read_unlock_irqrestore(&tasklist_lock, flags);
++
++unlock:
++	mutex_unlock(&syscall_trace_lock);
++}
++
++void stop_ftrace_syscalls(void)
++{
++	unsigned long flags;
++	struct task_struct *g, *t;
++
++	mutex_lock(&syscall_trace_lock);
++
++	/* There are perhaps still some users */
++	if (--refcount)
++		goto unlock;
++
++	read_lock_irqsave(&tasklist_lock, flags);
++
++	do_each_thread(g, t) {
++		clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
++	} while_each_thread(g, t);
++
++	read_unlock_irqrestore(&tasklist_lock, flags);
++
++unlock:
++	mutex_unlock(&syscall_trace_lock);
++}
++
++void ftrace_syscall_enter(struct pt_regs *regs)
++{
++	struct syscall_trace_enter *entry;
++	struct syscall_metadata *sys_data;
++	struct ring_buffer_event *event;
++	int size;
++	int syscall_nr;
++
++	syscall_nr = syscall_get_nr(current, regs);
++
++	sys_data = syscall_nr_to_meta(syscall_nr);
++	if (!sys_data)
++		return;
++
++	size = sizeof(*entry) + sizeof(unsigned long) * sys_data->nb_args;
++
++	event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_ENTER, size,
++							0, 0);
++	if (!event)
++		return;
++
++	entry = ring_buffer_event_data(event);
++	entry->nr = syscall_nr;
++	syscall_get_arguments(current, regs, 0, sys_data->nb_args, entry->args);
++
++	trace_current_buffer_unlock_commit(event, 0, 0);
++	trace_wake_up();
++}
++
++void ftrace_syscall_exit(struct pt_regs *regs)
++{
++	struct syscall_trace_exit *entry;
++	struct syscall_metadata *sys_data;
++	struct ring_buffer_event *event;
++	int syscall_nr;
++
++	syscall_nr = syscall_get_nr(current, regs);
++
++	sys_data = syscall_nr_to_meta(syscall_nr);
++	if (!sys_data)
++		return;
++
++	event = trace_current_buffer_lock_reserve(TRACE_SYSCALL_EXIT,
++				sizeof(*entry), 0, 0);
++	if (!event)
++		return;
++
++	entry = ring_buffer_event_data(event);
++	entry->nr = syscall_nr;
++	entry->ret = syscall_get_return_value(current, regs);
++
++	trace_current_buffer_unlock_commit(event, 0, 0);
++	trace_wake_up();
++}
++
++static int init_syscall_tracer(struct trace_array *tr)
++{
++	start_ftrace_syscalls();
++
++	return 0;
++}
++
++static void reset_syscall_tracer(struct trace_array *tr)
++{
++	stop_ftrace_syscalls();
++	tracing_reset_online_cpus(tr);
++}
++
++static struct trace_event syscall_enter_event = {
++	.type	 	= TRACE_SYSCALL_ENTER,
++	.trace		= print_syscall_enter,
++};
++
++static struct trace_event syscall_exit_event = {
++	.type	 	= TRACE_SYSCALL_EXIT,
++	.trace		= print_syscall_exit,
++};
++
++static struct tracer syscall_tracer __read_mostly = {
++	.name	     	= "syscall",
++	.init		= init_syscall_tracer,
++	.reset		= reset_syscall_tracer,
++	.flags		= &syscalls_flags,
++};
++
++__init int register_ftrace_syscalls(void)
++{
++	int ret;
++
++	ret = register_ftrace_event(&syscall_enter_event);
++	if (!ret) {
++		printk(KERN_WARNING "event %d failed to register\n",
++		       syscall_enter_event.type);
++		WARN_ON_ONCE(1);
++	}
++
++	ret = register_ftrace_event(&syscall_exit_event);
++	if (!ret) {
++		printk(KERN_WARNING "event %d failed to register\n",
++		       syscall_exit_event.type);
++		WARN_ON_ONCE(1);
++	}
++
++	return register_tracer(&syscall_tracer);
++}
++device_initcall(register_ftrace_syscalls);
+Index: linux-2.6-tip/kernel/trace/trace_sysprof.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/trace/trace_sysprof.c
++++ linux-2.6-tip/kernel/trace/trace_sysprof.c
+@@ -88,7 +88,7 @@ static void backtrace_address(void *data
+ 	}
+ }
+ 
+-const static struct stacktrace_ops backtrace_ops = {
++static const struct stacktrace_ops backtrace_ops = {
+ 	.warning		= backtrace_warning,
+ 	.warning_symbol		= backtrace_warning_symbol,
+ 	.stack			= backtrace_stack,
+@@ -226,15 +226,6 @@ static void stop_stack_timers(void)
+ 		stop_stack_timer(cpu);
+ }
+ 
+-static void start_stack_trace(struct trace_array *tr)
+-{
+-	mutex_lock(&sample_timer_lock);
+-	tracing_reset_online_cpus(tr);
+-	start_stack_timers();
+-	tracer_enabled = 1;
+-	mutex_unlock(&sample_timer_lock);
+-}
+-
+ static void stop_stack_trace(struct trace_array *tr)
+ {
+ 	mutex_lock(&sample_timer_lock);
+@@ -247,12 +238,18 @@ static int stack_trace_init(struct trace
+ {
+ 	sysprof_trace = tr;
+ 
+-	start_stack_trace(tr);
++	tracing_start_cmdline_record();
++
++	mutex_lock(&sample_timer_lock);
++	start_stack_timers();
++	tracer_enabled = 1;
++	mutex_unlock(&sample_timer_lock);
+ 	return 0;
+ }
+ 
+ static void stack_trace_reset(struct trace_array *tr)
+ {
++	tracing_stop_cmdline_record();
+ 	stop_stack_trace(tr);
+ }
+ 
+@@ -317,7 +314,7 @@ sysprof_sample_write(struct file *filp, 
+ 	return cnt;
+ }
+ 
+-static struct file_operations sysprof_sample_fops = {
++static const struct file_operations sysprof_sample_fops = {
+ 	.read		= sysprof_sample_read,
+ 	.write		= sysprof_sample_write,
+ };
+@@ -330,5 +327,5 @@ void init_tracer_sysprof_debugfs(struct 
+ 			d_tracer, NULL, &sysprof_sample_fops);
+ 	if (entry)
+ 		return;
+-	pr_warning("Could not create debugfs 'dyn_ftrace_total_info' entry\n");
++	pr_warning("Could not create debugfs 'sysprof_sample_period' entry\n");
+ }
+Index: linux-2.6-tip/kernel/trace/trace_workqueue.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_workqueue.c
+@@ -0,0 +1,288 @@
++/*
++ * Workqueue statistical tracer.
++ *
++ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
++ *
++ */
++
++
++#include <trace/workqueue.h>
++#include <linux/list.h>
++#include <linux/percpu.h>
++#include "trace_stat.h"
++#include "trace.h"
++
++
++/* A cpu workqueue thread */
++struct cpu_workqueue_stats {
++	struct list_head            list;
++/* Useful to know if we print the cpu headers */
++	bool		            first_entry;
++	int		            cpu;
++	pid_t			    pid;
++/* Can be inserted from interrupt or user context, need to be atomic */
++	atomic_t	            inserted;
++/*
++ *  Don't need to be atomic, works are serialized in a single workqueue thread
++ *  on a single CPU.
++ */
++	unsigned int		    executed;
++};
++
++/* List of workqueue threads on one cpu */
++struct workqueue_global_stats {
++	struct list_head	list;
++	spinlock_t		lock;
++};
++
++/* Don't need a global lock because allocated before the workqueues, and
++ * never freed.
++ */
++static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
++#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
++
++/* Insertion of a work */
++static void
++probe_workqueue_insertion(struct task_struct *wq_thread,
++			  struct work_struct *work)
++{
++	int cpu = cpumask_first(&wq_thread->cpus_allowed);
++	struct cpu_workqueue_stats *node, *next;
++	unsigned long flags;
++
++	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
++	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
++							list) {
++		if (node->pid == wq_thread->pid) {
++			atomic_inc(&node->inserted);
++			goto found;
++		}
++	}
++	pr_debug("trace_workqueue: entry not found\n");
++found:
++	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
++}
++
++/* Execution of a work */
++static void
++probe_workqueue_execution(struct task_struct *wq_thread,
++			  struct work_struct *work)
++{
++	int cpu = cpumask_first(&wq_thread->cpus_allowed);
++	struct cpu_workqueue_stats *node, *next;
++	unsigned long flags;
++
++	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
++	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
++							list) {
++		if (node->pid == wq_thread->pid) {
++			node->executed++;
++			goto found;
++		}
++	}
++	pr_debug("trace_workqueue: entry not found\n");
++found:
++	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
++}
++
++/* Creation of a cpu workqueue thread */
++static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
++{
++	struct cpu_workqueue_stats *cws;
++	unsigned long flags;
++
++	WARN_ON(cpu < 0);
++
++	/* Workqueues are sometimes created in atomic context */
++	cws = kzalloc(sizeof(struct cpu_workqueue_stats), GFP_ATOMIC);
++	if (!cws) {
++		pr_warning("trace_workqueue: not enough memory\n");
++		return;
++	}
++	INIT_LIST_HEAD(&cws->list);
++	cws->cpu = cpu;
++
++	cws->pid = wq_thread->pid;
++
++	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
++	if (list_empty(&workqueue_cpu_stat(cpu)->list))
++		cws->first_entry = true;
++	list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
++	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
++}
++
++/* Destruction of a cpu workqueue thread */
++static void probe_workqueue_destruction(struct task_struct *wq_thread)
++{
++	/* Workqueue only execute on one cpu */
++	int cpu = cpumask_first(&wq_thread->cpus_allowed);
++	struct cpu_workqueue_stats *node, *next;
++	unsigned long flags;
++
++	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
++	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
++							list) {
++		if (node->pid == wq_thread->pid) {
++			list_del(&node->list);
++			kfree(node);
++			goto found;
++		}
++	}
++
++	pr_debug("trace_workqueue: don't find workqueue to destroy\n");
++found:
++	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
++
++}
++
++static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
++{
++	unsigned long flags;
++	struct cpu_workqueue_stats *ret = NULL;
++
++
++	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
++
++	if (!list_empty(&workqueue_cpu_stat(cpu)->list))
++		ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
++				 struct cpu_workqueue_stats, list);
++
++	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
++
++	return ret;
++}
++
++static void *workqueue_stat_start(void)
++{
++	int cpu;
++	void *ret = NULL;
++
++	for_each_possible_cpu(cpu) {
++		ret = workqueue_stat_start_cpu(cpu);
++		if (ret)
++			return ret;
++	}
++	return NULL;
++}
++
++static void *workqueue_stat_next(void *prev, int idx)
++{
++	struct cpu_workqueue_stats *prev_cws = prev;
++	int cpu = prev_cws->cpu;
++	unsigned long flags;
++	void *ret = NULL;
++
++	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
++	if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
++		spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
++		do {
++			cpu = cpumask_next(cpu, cpu_possible_mask);
++			if (cpu >= nr_cpu_ids)
++				return NULL;
++		} while (!(ret = workqueue_stat_start_cpu(cpu)));
++		return ret;
++	}
++	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
++
++	return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
++			  list);
++}
++
++static int workqueue_stat_show(struct seq_file *s, void *p)
++{
++	struct cpu_workqueue_stats *cws = p;
++	unsigned long flags;
++	int cpu = cws->cpu;
++	struct pid *pid;
++	struct task_struct *tsk;
++
++	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
++	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
++		seq_printf(s, "\n");
++	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
++
++	pid = find_get_pid(cws->pid);
++	if (pid) {
++		tsk = get_pid_task(pid, PIDTYPE_PID);
++		if (tsk) {
++			seq_printf(s, "%3d %6d     %6u       %s\n", cws->cpu,
++				   atomic_read(&cws->inserted), cws->executed,
++				   tsk->comm);
++			put_task_struct(tsk);
++		}
++		put_pid(pid);
++	}
++
++	return 0;
++}
++
++static int workqueue_stat_headers(struct seq_file *s)
++{
++	seq_printf(s, "# CPU  INSERTED  EXECUTED   NAME\n");
++	seq_printf(s, "# |      |         |          |\n");
++	return 0;
++}
++
++struct tracer_stat workqueue_stats __read_mostly = {
++	.name = "workqueues",
++	.stat_start = workqueue_stat_start,
++	.stat_next = workqueue_stat_next,
++	.stat_show = workqueue_stat_show,
++	.stat_headers = workqueue_stat_headers
++};
++
++
++int __init stat_workqueue_init(void)
++{
++	if (register_stat_tracer(&workqueue_stats)) {
++		pr_warning("Unable to register workqueue stat tracer\n");
++		return 1;
++	}
++
++	return 0;
++}
++fs_initcall(stat_workqueue_init);
++
++/*
++ * Workqueues are created very early, just after pre-smp initcalls.
++ * So we must register our tracepoints at this stage.
++ */
++int __init trace_workqueue_early_init(void)
++{
++	int ret, cpu;
++
++	ret = register_trace_workqueue_insertion(probe_workqueue_insertion);
++	if (ret)
++		goto out;
++
++	ret = register_trace_workqueue_execution(probe_workqueue_execution);
++	if (ret)
++		goto no_insertion;
++
++	ret = register_trace_workqueue_creation(probe_workqueue_creation);
++	if (ret)
++		goto no_execution;
++
++	ret = register_trace_workqueue_destruction(probe_workqueue_destruction);
++	if (ret)
++		goto no_creation;
++
++	for_each_possible_cpu(cpu) {
++		spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
++		INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
++	}
++
++	return 0;
++
++no_creation:
++	unregister_trace_workqueue_creation(probe_workqueue_creation);
++no_execution:
++	unregister_trace_workqueue_execution(probe_workqueue_execution);
++no_insertion:
++	unregister_trace_workqueue_insertion(probe_workqueue_insertion);
++out:
++	pr_warning("trace_workqueue: unable to trace workqueues\n");
++
++	return 1;
++}
++early_initcall(trace_workqueue_early_init);
+Index: linux-2.6-tip/kernel/tracepoint.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/tracepoint.c
++++ linux-2.6-tip/kernel/tracepoint.c
+@@ -272,12 +272,15 @@ static void disable_tracepoint(struct tr
+  *
+  * Updates the probe callback corresponding to a range of tracepoints.
+  */
+-void tracepoint_update_probe_range(struct tracepoint *begin,
+-	struct tracepoint *end)
++void
++tracepoint_update_probe_range(struct tracepoint *begin, struct tracepoint *end)
+ {
+ 	struct tracepoint *iter;
+ 	struct tracepoint_entry *mark_entry;
+ 
++	if (!begin)
++		return;
++
+ 	mutex_lock(&tracepoints_mutex);
+ 	for (iter = begin; iter < end; iter++) {
+ 		mark_entry = get_tracepoint(iter->name);
+Index: linux-2.6-tip/kernel/workqueue.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/workqueue.c
++++ linux-2.6-tip/kernel/workqueue.c
+@@ -26,6 +26,7 @@
+ #include <linux/slab.h>
+ #include <linux/cpu.h>
+ #include <linux/notifier.h>
++#include <linux/syscalls.h>
+ #include <linux/kthread.h>
+ #include <linux/hardirq.h>
+ #include <linux/mempolicy.h>
+@@ -33,6 +34,9 @@
+ #include <linux/kallsyms.h>
+ #include <linux/debug_locks.h>
+ #include <linux/lockdep.h>
++#include <trace/workqueue.h>
++
++#include <asm/uaccess.h>
+ 
+ /*
+  * The per-CPU workqueue (if single thread, we always use the first
+@@ -125,9 +129,13 @@ struct cpu_workqueue_struct *get_wq_data
+ 	return (void *) (atomic_long_read(&work->data) & WORK_STRUCT_WQ_DATA_MASK);
+ }
+ 
++DEFINE_TRACE(workqueue_insertion);
++
+ static void insert_work(struct cpu_workqueue_struct *cwq,
+ 			struct work_struct *work, struct list_head *head)
+ {
++	trace_workqueue_insertion(cwq->thread, work);
++
+ 	set_wq_data(work, cwq);
+ 	/*
+ 	 * Ensure that we get the right work->data if we see the
+@@ -157,13 +165,14 @@ static void __queue_work(struct cpu_work
+  *
+  * We queue the work to the CPU on which it was submitted, but if the CPU dies
+  * it can be processed by another CPU.
++ *
++ * Especially no such guarantee on PREEMPT_RT.
+  */
+ int queue_work(struct workqueue_struct *wq, struct work_struct *work)
+ {
+-	int ret;
++	int ret = 0, cpu = raw_smp_processor_id();
+ 
+-	ret = queue_work_on(get_cpu(), wq, work);
+-	put_cpu();
++	ret = queue_work_on(cpu, wq, work);
+ 
+ 	return ret;
+ }
+@@ -200,7 +209,7 @@ static void delayed_work_timer_fn(unsign
+ 	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
+ 	struct workqueue_struct *wq = cwq->wq;
+ 
+-	__queue_work(wq_per_cpu(wq, smp_processor_id()), &dwork->work);
++	__queue_work(wq_per_cpu(wq, raw_smp_processor_id()), &dwork->work);
+ }
+ 
+ /**
+@@ -259,6 +268,8 @@ int queue_delayed_work_on(int cpu, struc
+ }
+ EXPORT_SYMBOL_GPL(queue_delayed_work_on);
+ 
++DEFINE_TRACE(workqueue_execution);
++
+ static void run_workqueue(struct cpu_workqueue_struct *cwq)
+ {
+ 	spin_lock_irq(&cwq->lock);
+@@ -284,7 +295,7 @@ static void run_workqueue(struct cpu_wor
+ 		 */
+ 		struct lockdep_map lockdep_map = work->lockdep_map;
+ #endif
+-
++		trace_workqueue_execution(cwq->thread, work);
+ 		cwq->current_work = work;
+ 		list_del_init(cwq->worklist.next);
+ 		spin_unlock_irq(&cwq->lock);
+@@ -765,6 +776,8 @@ init_cpu_workqueue(struct workqueue_stru
+ 	return cwq;
+ }
+ 
++DEFINE_TRACE(workqueue_creation);
++
+ static int create_workqueue_thread(struct cpu_workqueue_struct *cwq, int cpu)
+ {
+ 	struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
+@@ -787,6 +800,8 @@ static int create_workqueue_thread(struc
+ 		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
+ 	cwq->thread = p;
+ 
++	trace_workqueue_creation(cwq->thread, cpu);
++
+ 	return 0;
+ }
+ 
+@@ -868,6 +883,8 @@ struct workqueue_struct *__create_workqu
+ }
+ EXPORT_SYMBOL_GPL(__create_workqueue_key);
+ 
++DEFINE_TRACE(workqueue_destruction);
++
+ static void cleanup_workqueue_thread(struct cpu_workqueue_struct *cwq)
+ {
+ 	/*
+@@ -891,10 +908,54 @@ static void cleanup_workqueue_thread(str
+ 	 * checks list_empty(), and a "normal" queue_work() can't use
+ 	 * a dead CPU.
+ 	 */
++	trace_workqueue_destruction(cwq->thread);
+ 	kthread_stop(cwq->thread);
+ 	cwq->thread = NULL;
+ }
+ 
++void set_workqueue_thread_prio(struct workqueue_struct *wq, int cpu,
++			       int policy, int rt_priority, int nice)
++{
++	struct sched_param param = { .sched_priority = rt_priority };
++	struct cpu_workqueue_struct *cwq;
++	mm_segment_t oldfs = get_fs();
++	struct task_struct *p;
++	unsigned long flags;
++	int ret;
++
++	cwq = per_cpu_ptr(wq->cpu_wq, cpu);
++	spin_lock_irqsave(&cwq->lock, flags);
++	p = cwq->thread;
++	spin_unlock_irqrestore(&cwq->lock, flags);
++
++	set_user_nice(p, nice);
++
++	set_fs(KERNEL_DS);
++	ret = sys_sched_setscheduler(p->pid, policy, &param);
++	set_fs(oldfs);
++
++	WARN_ON(ret);
++}
++
++void set_workqueue_prio(struct workqueue_struct *wq, int policy,
++			int rt_priority, int nice)
++{
++	int cpu;
++
++	/* We don't need the distraction of CPUs appearing and vanishing. */
++	get_online_cpus();
++	spin_lock(&workqueue_lock);
++	if (is_wq_single_threaded(wq))
++		set_workqueue_thread_prio(wq, 0, policy, rt_priority, nice);
++	else {
++		for_each_online_cpu(cpu)
++			set_workqueue_thread_prio(wq, cpu, policy,
++						  rt_priority, nice);
++	}
++	spin_unlock(&workqueue_lock);
++	put_online_cpus();
++}
++
+ /**
+  * destroy_workqueue - safely terminate a workqueue
+  * @wq: target workqueue
+@@ -1021,6 +1082,7 @@ void __init init_workqueues(void)
+ 	hotcpu_notifier(workqueue_cpu_callback, 0);
+ 	keventd_wq = create_workqueue("events");
+ 	BUG_ON(!keventd_wq);
++	set_workqueue_prio(keventd_wq, SCHED_FIFO, 1, -20);
+ #ifdef CONFIG_SMP
+ 	work_on_cpu_wq = create_workqueue("work_on_cpu");
+ 	BUG_ON(!work_on_cpu_wq);
+Index: linux-2.6-tip/lib/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/lib/Kconfig
++++ linux-2.6-tip/lib/Kconfig
+@@ -2,6 +2,9 @@
+ # Library configuration
+ #
+ 
++config BINARY_PRINTF
++	def_bool n
++
+ menu "Library routines"
+ 
+ config BITREVERSE
+@@ -98,6 +101,20 @@ config LZO_DECOMPRESS
+ 	tristate
+ 
+ #
++# These all provide a common interface (hence the apparent duplication with
++# ZLIB_INFLATE; DECOMPRESS_GZIP is just a wrapper.)
++#
++config DECOMPRESS_GZIP
++	select ZLIB_INFLATE
++	tristate
++
++config DECOMPRESS_BZIP2
++	tristate
++
++config DECOMPRESS_LZMA
++	tristate
++
++#
+ # Generic allocator support is selected if needed
+ #
+ config GENERIC_ALLOCATOR
+@@ -136,12 +153,6 @@ config TEXTSEARCH_BM
+ config TEXTSEARCH_FSM
+ 	tristate
+ 
+-#
+-# plist support is select#ed if needed
+-#
+-config PLIST
+-	boolean
+-
+ config HAS_IOMEM
+ 	boolean
+ 	depends on !NO_IOMEM
+@@ -165,6 +176,7 @@ config HAVE_LMB
+ 
+ config CPUMASK_OFFSTACK
+ 	bool "Force CPU masks off stack" if DEBUG_PER_CPU_MAPS
++	depends on !PREEMPT_RT && BROKEN
+ 	help
+ 	  Use dynamic allocation for cpumask_var_t, instead of putting
+ 	  them on the stack.  This is a bit more expensive, but avoids
+Index: linux-2.6-tip/lib/Kconfig.debug
+===================================================================
+--- linux-2.6-tip.orig/lib/Kconfig.debug
++++ linux-2.6-tip/lib/Kconfig.debug
+@@ -9,8 +9,20 @@ config PRINTK_TIME
+ 	  operations.  This is useful for identifying long delays
+ 	  in kernel startup.
+ 
++config ALLOW_WARNINGS
++	bool "Continue building despite compiler warnings"
++	default y
++	help
++	  By disabling this option you will enable -Werror on building C
++	  files. This causes all warnings to abort the compilation, just as
++	  errors do. (It is generally not recommended to disable this option as
++	  the overwhelming majority of warnings is harmless and also gcc puts
++	  out false-positive warnings. It is useful for automated testing
++	  though.)
++
+ config ENABLE_WARN_DEPRECATED
+ 	bool "Enable __deprecated logic"
++	depends on ALLOW_WARNINGS
+ 	default y
+ 	help
+ 	  Enable the __deprecated logic in the kernel build.
+@@ -19,12 +31,13 @@ config ENABLE_WARN_DEPRECATED
+ 
+ config ENABLE_MUST_CHECK
+ 	bool "Enable __must_check logic"
+-	default y
++	depends on ALLOW_WARNINGS
+ 	help
+ 	  Enable the __must_check logic in the kernel build.  Disable this to
+ 	  suppress the "warning: ignoring return value of 'foo', declared with
+ 	  attribute warn_unused_result" messages.
+ 
++
+ config FRAME_WARN
+ 	int "Warn for stack frames larger than (needs gcc 4.4)"
+ 	range 0 8192
+@@ -95,7 +108,6 @@ config HEADERS_CHECK
+ 
+ config DEBUG_SECTION_MISMATCH
+ 	bool "Enable full Section mismatch analysis"
+-	depends on UNDEFINED
+ 	# This option is on purpose disabled for now.
+ 	# It will be enabled when we are down to a resonable number
+ 	# of section mismatch warnings (< 10 for an allyesconfig build)
+@@ -186,6 +198,44 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE
+ 	default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC
+ 	default 1 if BOOTPARAM_SOFTLOCKUP_PANIC
+ 
++config DETECT_HUNG_TASK
++	bool "Detect Hung Tasks"
++	depends on DEBUG_KERNEL
++	default y
++	help
++	  Say Y here to enable the kernel to detect "hung tasks",
++	  which are bugs that cause the task to be stuck in
++	  uninterruptible "D" state indefinitiley.
++
++	  When a hung task is detected, the kernel will print the
++	  current stack trace (which you should report), but the
++	  task will stay in uninterruptible state. If lockdep is
++	  enabled then all held locks will also be reported. This
++	  feature has negligible overhead.
++
++config BOOTPARAM_HUNG_TASK_PANIC
++	bool "Panic (Reboot) On Hung Tasks"
++	depends on DETECT_HUNG_TASK
++	help
++	  Say Y here to enable the kernel to panic on "hung tasks",
++	  which are bugs that cause the kernel to leave a task stuck
++	  in uninterruptible "D" state.
++
++	  The panic can be used in combination with panic_timeout,
++	  to cause the system to reboot automatically after a
++	  hung task has been detected. This feature is useful for
++	  high-availability systems that have uptime guarantees and
++	  where a hung tasks must be resolved ASAP.
++
++	  Say N if unsure.
++
++config BOOTPARAM_HUNG_TASK_PANIC_VALUE
++	int
++	depends on DETECT_HUNG_TASK
++	range 0 1
++	default 0 if !BOOTPARAM_HUNG_TASK_PANIC
++	default 1 if BOOTPARAM_HUNG_TASK_PANIC
++
+ config SCHED_DEBUG
+ 	bool "Collect scheduler debugging info"
+ 	depends on DEBUG_KERNEL && PROC_FS
+@@ -262,7 +312,7 @@ config DEBUG_OBJECTS_ENABLE_DEFAULT
+ 
+ config DEBUG_SLAB
+ 	bool "Debug slab memory allocations"
+-	depends on DEBUG_KERNEL && SLAB
++	depends on DEBUG_KERNEL && SLAB && !KMEMCHECK
+ 	help
+ 	  Say Y here to have the kernel do limited verification on memory
+ 	  allocation as well as poisoning memory on free to catch use of freed
+@@ -274,7 +324,7 @@ config DEBUG_SLAB_LEAK
+ 
+ config SLUB_DEBUG_ON
+ 	bool "SLUB debugging on by default"
+-	depends on SLUB && SLUB_DEBUG
++	depends on SLUB && SLUB_DEBUG && !KMEMCHECK
+ 	default n
+ 	help
+ 	  Boot with debugging on by default. SLUB boots by default with
+@@ -314,6 +364,8 @@ config DEBUG_RT_MUTEXES
+ 	help
+ 	 This allows rt mutex semantics violations and rt mutex related
+ 	 deadlocks (lockups) to be detected and reported automatically.
++	 When realtime preemption is enabled this includes spinlocks,
++	 rwlocks, mutexes and (rw)semaphores
+ 
+ config DEBUG_PI_LIST
+ 	bool
+@@ -337,7 +389,7 @@ config DEBUG_SPINLOCK
+ 
+ config DEBUG_MUTEXES
+ 	bool "Mutex debugging: basic checks"
+-	depends on DEBUG_KERNEL
++	depends on DEBUG_KERNEL && !PREEMPT_RT
+ 	help
+ 	 This feature allows mutex semantics violations to be detected and
+ 	 reported.
+@@ -402,7 +454,7 @@ config LOCKDEP
+ 	bool
+ 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+ 	select STACKTRACE
+-	select FRAME_POINTER if !X86 && !MIPS && !PPC
++	select FRAME_POINTER if !MIPS && !PPC
+ 	select KALLSYMS
+ 	select KALLSYMS_ALL
+ 
+@@ -902,6 +954,19 @@ config DYNAMIC_PRINTK_DEBUG
+ 	  debugging for all modules. This mode can be turned off via the above
+ 	  disable command.
+ 
++config DMA_API_DEBUG
++	bool "Enable debugging of DMA-API usage"
++	depends on HAVE_DMA_API_DEBUG
++	help
++	  Enable this option to debug the use of the DMA API by device drivers.
++	  With this option you will be able to detect common bugs in device
++	  drivers like double-freeing of DMA mappings or freeing mappings that
++	  were never allocated.
++	  This option causes a performance degredation.  Use only if you want
++	  to debug device drivers. If unsure, say N.
++
+ source "samples/Kconfig"
+ 
+ source "lib/Kconfig.kgdb"
++
++source "lib/Kconfig.kmemcheck"
+Index: linux-2.6-tip/lib/Kconfig.kmemcheck
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/lib/Kconfig.kmemcheck
+@@ -0,0 +1,91 @@
++config HAVE_ARCH_KMEMCHECK
++	bool
++
++menuconfig KMEMCHECK
++	bool "kmemcheck: trap use of uninitialized memory"
++	depends on DEBUG_KERNEL
++	depends on !X86_USE_3DNOW
++	depends on SLUB || SLAB
++	depends on !CC_OPTIMIZE_FOR_SIZE
++	depends on !FUNCTION_TRACER
++	select FRAME_POINTER
++	select STACKTRACE
++	default n
++	help
++	  This option enables tracing of dynamically allocated kernel memory
++	  to see if memory is used before it has been given an initial value.
++	  Be aware that this requires half of your memory for bookkeeping and
++	  will insert extra code at *every* read and write to tracked memory
++	  thus slow down the kernel code (but user code is unaffected).
++
++	  The kernel may be started with kmemcheck=0 or kmemcheck=1 to disable
++	  or enable kmemcheck at boot-time. If the kernel is started with
++	  kmemcheck=0, the large memory and CPU overhead is not incurred.
++
++choice
++	prompt "kmemcheck: default mode at boot"
++	depends on KMEMCHECK
++	default KMEMCHECK_ONESHOT_BY_DEFAULT
++	help
++	  This option controls the default behaviour of kmemcheck when the
++	  kernel boots and no kmemcheck= parameter is given.
++
++config KMEMCHECK_DISABLED_BY_DEFAULT
++	bool "disabled"
++	depends on KMEMCHECK
++
++config KMEMCHECK_ENABLED_BY_DEFAULT
++	bool "enabled"
++	depends on KMEMCHECK
++
++config KMEMCHECK_ONESHOT_BY_DEFAULT
++	bool "one-shot"
++	depends on KMEMCHECK
++	help
++	  In one-shot mode, only the first error detected is reported before
++	  kmemcheck is disabled.
++
++endchoice
++
++config KMEMCHECK_QUEUE_SIZE
++	int "kmemcheck: error queue size"
++	depends on KMEMCHECK
++	default 64
++	help
++	  Select the maximum number of errors to store in the queue. Since
++	  errors can occur virtually anywhere and in any context, we need a
++	  temporary storage area which is guarantueed not to generate any
++	  other faults. The queue will be emptied as soon as a tasklet may
++	  be scheduled. If the queue is full, new error reports will be
++	  lost.
++
++config KMEMCHECK_SHADOW_COPY_SHIFT
++	int "kmemcheck: shadow copy size (5 => 32 bytes, 6 => 64 bytes)"
++	depends on KMEMCHECK
++	range 2 8
++	default 5
++	help
++	  Select the number of shadow bytes to save along with each entry of
++	  the queue. These bytes indicate what parts of an allocation are
++	  initialized, uninitialized, etc. and will be displayed when an
++	  error is detected to help the debugging of a particular problem.
++
++config KMEMCHECK_PARTIAL_OK
++	bool "kmemcheck: allow partially uninitialized memory"
++	depends on KMEMCHECK
++	default y
++	help
++	  This option works around certain GCC optimizations that produce
++	  32-bit reads from 16-bit variables where the upper 16 bits are
++	  thrown away afterwards. This may of course also hide some real
++	  bugs.
++
++config KMEMCHECK_BITOPS_OK
++	bool "kmemcheck: allow bit-field manipulation"
++	depends on KMEMCHECK
++	default n
++	help
++	  This option silences warnings that would be generated for bit-field
++	  accesses where not all the bits are initialized at the same time.
++	  This may also hide some real bugs.
++
+Index: linux-2.6-tip/lib/Makefile
+===================================================================
+--- linux-2.6-tip.orig/lib/Makefile
++++ linux-2.6-tip/lib/Makefile
+@@ -11,7 +11,8 @@ lib-y := ctype.o string.o vsprintf.o cmd
+ 	 rbtree.o radix-tree.o dump_stack.o \
+ 	 idr.o int_sqrt.o extable.o prio_tree.o \
+ 	 sha1.o irq_regs.o reciprocal_div.o argv_split.o \
+-	 proportions.o prio_heap.o ratelimit.o show_mem.o is_single_threaded.o
++	 proportions.o prio_heap.o ratelimit.o show_mem.o \
++	 is_single_threaded.o plist.o decompress.o
+ 
+ lib-$(CONFIG_MMU) += ioremap.o
+ lib-$(CONFIG_SMP) += cpumask.o
+@@ -33,14 +34,14 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o 
+ obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
+ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
+ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
+-lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
++obj-$(CONFIG_PREEMPT_RT) += plist.o
++obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+ lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+ lib-$(CONFIG_GENERIC_FIND_FIRST_BIT) += find_next_bit.o
+ lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+ lib-$(CONFIG_GENERIC_FIND_LAST_BIT) += find_last_bit.o
+ obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
+ obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+-obj-$(CONFIG_PLIST) += plist.o
+ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
+ obj-$(CONFIG_DEBUG_LIST) += list_debug.o
+ obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
+@@ -65,6 +66,10 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solom
+ obj-$(CONFIG_LZO_COMPRESS) += lzo/
+ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
+ 
++lib-$(CONFIG_DECOMPRESS_GZIP) += decompress_inflate.o
++lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
++lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
++
+ obj-$(CONFIG_TEXTSEARCH) += textsearch.o
+ obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
+ obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
+@@ -84,6 +89,8 @@ obj-$(CONFIG_HAVE_ARCH_TRACEHOOK) += sys
+ 
+ obj-$(CONFIG_DYNAMIC_PRINTK_DEBUG) += dynamic_printk.o
+ 
++obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
++
+ hostprogs-y	:= gen_crc32table
+ clean-files	:= crc32table.h
+ 
+Index: linux-2.6-tip/lib/debugobjects.c
+===================================================================
+--- linux-2.6-tip.orig/lib/debugobjects.c
++++ linux-2.6-tip/lib/debugobjects.c
+@@ -25,14 +25,14 @@
+ 
+ struct debug_bucket {
+ 	struct hlist_head	list;
+-	spinlock_t		lock;
++	raw_spinlock_t		lock;
+ };
+ 
+ static struct debug_bucket	obj_hash[ODEBUG_HASH_SIZE];
+ 
+-static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE];
++static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
+ 
+-static DEFINE_SPINLOCK(pool_lock);
++static DEFINE_RAW_SPINLOCK(pool_lock);
+ 
+ static HLIST_HEAD(obj_pool);
+ 
+@@ -50,12 +50,23 @@ static int			debug_objects_enabled __rea
+ 
+ static struct debug_obj_descr	*descr_test  __read_mostly;
+ 
++static void free_obj_work(struct work_struct *work);
++static DECLARE_WORK(debug_obj_work, free_obj_work);
++
+ static int __init enable_object_debug(char *str)
+ {
+ 	debug_objects_enabled = 1;
+ 	return 0;
+ }
++
++static int __init disable_object_debug(char *str)
++{
++	debug_objects_enabled = 0;
++	return 0;
++}
++
+ early_param("debug_objects", enable_object_debug);
++early_param("no_debug_objects", disable_object_debug);
+ 
+ static const char *obj_states[ODEBUG_STATE_MAX] = {
+ 	[ODEBUG_STATE_NONE]		= "none",
+@@ -146,25 +157,51 @@ alloc_object(void *addr, struct debug_bu
+ }
+ 
+ /*
+- * Put the object back into the pool or give it back to kmem_cache:
++ * workqueue function to free objects.
+  */
+-static void free_object(struct debug_obj *obj)
++static void free_obj_work(struct work_struct *work)
+ {
+-	unsigned long idx = (unsigned long)(obj - obj_static_pool);
++	struct debug_obj *obj;
+ 	unsigned long flags;
+ 
+-	if (obj_pool_free < ODEBUG_POOL_SIZE || idx < ODEBUG_POOL_SIZE) {
+-		spin_lock_irqsave(&pool_lock, flags);
+-		hlist_add_head(&obj->node, &obj_pool);
+-		obj_pool_free++;
+-		obj_pool_used--;
+-		spin_unlock_irqrestore(&pool_lock, flags);
+-	} else {
+-		spin_lock_irqsave(&pool_lock, flags);
+-		obj_pool_used--;
++	spin_lock_irqsave(&pool_lock, flags);
++	while (obj_pool_free > ODEBUG_POOL_SIZE) {
++		obj = hlist_entry(obj_pool.first, typeof(*obj), node);
++		hlist_del(&obj->node);
++		obj_pool_free--;
++		/*
++		 * We release pool_lock across kmem_cache_free() to
++		 * avoid contention on pool_lock.
++		 */
+ 		spin_unlock_irqrestore(&pool_lock, flags);
+ 		kmem_cache_free(obj_cache, obj);
++		spin_lock_irqsave(&pool_lock, flags);
+ 	}
++	spin_unlock_irqrestore(&pool_lock, flags);
++}
++
++/*
++ * Put the object back into the pool and schedule work to free objects
++ * if necessary.
++ */
++static void free_object(struct debug_obj *obj)
++{
++	unsigned long flags;
++	int sched = 0;
++
++	spin_lock_irqsave(&pool_lock, flags);
++	/*
++	 * schedule work when the pool is filled and the cache is
++	 * initialized:
++	 */
++	if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache)
++		sched = !work_pending(&debug_obj_work);
++	hlist_add_head(&obj->node, &obj_pool);
++	obj_pool_free++;
++	obj_pool_used--;
++	spin_unlock_irqrestore(&pool_lock, flags);
++	if (sched)
++		schedule_work(&debug_obj_work);
+ }
+ 
+ /*
+@@ -876,6 +913,63 @@ void __init debug_objects_early_init(voi
+ }
+ 
+ /*
++ * Convert the statically allocated objects to dynamic ones:
++ */
++static int debug_objects_replace_static_objects(void)
++{
++	struct debug_bucket *db = obj_hash;
++	struct hlist_node *node, *tmp;
++	struct debug_obj *obj, *new;
++	HLIST_HEAD(objects);
++	int i, cnt = 0;
++
++	for (i = 0; i < ODEBUG_POOL_SIZE; i++) {
++		obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL);
++		if (!obj)
++			goto free;
++		hlist_add_head(&obj->node, &objects);
++	}
++
++	/*
++	 * When debug_objects_mem_init() is called we know that only
++	 * one CPU is up, so disabling interrupts is enough
++	 * protection. This avoids the lockdep hell of lock ordering.
++	 */
++	local_irq_disable();
++
++	/* Remove the statically allocated objects from the pool */
++	hlist_for_each_entry_safe(obj, node, tmp, &obj_pool, node)
++		hlist_del(&obj->node);
++	/* Move the allocated objects to the pool */
++	hlist_move_list(&objects, &obj_pool);
++
++	/* Replace the active object references */
++	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
++		hlist_move_list(&db->list, &objects);
++
++		hlist_for_each_entry(obj, node, &objects, node) {
++			new = hlist_entry(obj_pool.first, typeof(*obj), node);
++			hlist_del(&new->node);
++			/* copy object data */
++			*new = *obj;
++			hlist_add_head(&new->node, &db->list);
++			cnt++;
++		}
++	}
++
++	printk(KERN_DEBUG "ODEBUG: %d of %d active objects replaced\n", cnt,
++	       obj_pool_used);
++	local_irq_enable();
++	return 0;
++free:
++	hlist_for_each_entry_safe(obj, node, tmp, &objects, node) {
++		hlist_del(&obj->node);
++		kmem_cache_free(obj_cache, obj);
++	}
++	return -ENOMEM;
++}
++
++/*
+  * Called after the kmem_caches are functional to setup a dedicated
+  * cache pool, which has the SLAB_DEBUG_OBJECTS flag set. This flag
+  * prevents that the debug code is called on kmem_cache_free() for the
+@@ -890,8 +984,11 @@ void __init debug_objects_mem_init(void)
+ 				      sizeof (struct debug_obj), 0,
+ 				      SLAB_DEBUG_OBJECTS, NULL);
+ 
+-	if (!obj_cache)
++	if (!obj_cache || debug_objects_replace_static_objects()) {
+ 		debug_objects_enabled = 0;
+-	else
++		if (obj_cache)
++			kmem_cache_destroy(obj_cache);
++		printk(KERN_WARNING "ODEBUG: out of memory.\n");
++	} else
+ 		debug_objects_selftest();
+ }
+Index: linux-2.6-tip/lib/decompress.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/lib/decompress.c
+@@ -0,0 +1,54 @@
++/*
++ * decompress.c
++ *
++ * Detect the decompression method based on magic number
++ */
++
++#include <linux/decompress/generic.h>
++
++#include <linux/decompress/bunzip2.h>
++#include <linux/decompress/unlzma.h>
++#include <linux/decompress/inflate.h>
++
++#include <linux/types.h>
++#include <linux/string.h>
++
++#ifndef CONFIG_DECOMPRESS_GZIP
++# define gunzip NULL
++#endif
++#ifndef CONFIG_DECOMPRESS_BZIP2
++# define bunzip2 NULL
++#endif
++#ifndef CONFIG_DECOMPRESS_LZMA
++# define unlzma NULL
++#endif
++
++static const struct compress_format {
++	unsigned char magic[2];
++	const char *name;
++	decompress_fn decompressor;
++} compressed_formats[] = {
++	{ {037, 0213}, "gzip", gunzip },
++	{ {037, 0236}, "gzip", gunzip },
++	{ {0x42, 0x5a}, "bzip2", bunzip2 },
++	{ {0x5d, 0x00}, "lzma", unlzma },
++	{ {0, 0}, NULL, NULL }
++};
++
++decompress_fn decompress_method(const unsigned char *inbuf, int len,
++				const char **name)
++{
++	const struct compress_format *cf;
++
++	if (len < 2)
++		return NULL;	/* Need at least this much... */
++
++	for (cf = compressed_formats; cf->name; cf++) {
++		if (!memcmp(inbuf, cf->magic, 2))
++			break;
++
++	}
++	if (name)
++		*name = cf->name;
++	return cf->decompressor;
++}
+Index: linux-2.6-tip/lib/decompress_bunzip2.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/lib/decompress_bunzip2.c
+@@ -0,0 +1,736 @@
++/* vi: set sw = 4 ts = 4: */
++/*	Small bzip2 deflate implementation, by Rob Landley (rob@landley.net).
++
++	Based on bzip2 decompression code by Julian R Seward (jseward@acm.org),
++	which also acknowledges contributions by Mike Burrows, David Wheeler,
++	Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten,
++	Robert Sedgewick, and Jon L. Bentley.
++
++	This code is licensed under the LGPLv2:
++		LGPL (http://www.gnu.org/copyleft/lgpl.html
++*/
++
++/*
++	Size and speed optimizations by Manuel Novoa III  (mjn3@codepoet.org).
++
++	More efficient reading of Huffman codes, a streamlined read_bunzip()
++	function, and various other tweaks.  In (limited) tests, approximately
++	20% faster than bzcat on x86 and about 10% faster on arm.
++
++	Note that about 2/3 of the time is spent in read_unzip() reversing
++	the Burrows-Wheeler transformation.  Much of that time is delay
++	resulting from cache misses.
++
++	I would ask that anyone benefiting from this work, especially those
++	using it in commercial products, consider making a donation to my local
++	non-profit hospice organization in the name of the woman I loved, who
++	passed away Feb. 12, 2003.
++
++		In memory of Toni W. Hagan
++
++		Hospice of Acadiana, Inc.
++		2600 Johnston St., Suite 200
++		Lafayette, LA 70503-3240
++
++		Phone (337) 232-1234 or 1-800-738-2226
++		Fax   (337) 232-1297
++
++		http://www.hospiceacadiana.com/
++
++	Manuel
++ */
++
++/*
++	Made it fit for running in Linux Kernel by Alain Knaff (alain@knaff.lu)
++*/
++
++
++#ifndef STATIC
++#include <linux/decompress/bunzip2.h>
++#endif /* !STATIC */
++
++#include <linux/decompress/mm.h>
++#include <linux/slab.h>
++
++#ifndef INT_MAX
++#define INT_MAX 0x7fffffff
++#endif
++
++/* Constants for Huffman coding */
++#define MAX_GROUPS		6
++#define GROUP_SIZE   		50	/* 64 would have been more efficient */
++#define MAX_HUFCODE_BITS 	20	/* Longest Huffman code allowed */
++#define MAX_SYMBOLS 		258	/* 256 literals + RUNA + RUNB */
++#define SYMBOL_RUNA		0
++#define SYMBOL_RUNB		1
++
++/* Status return values */
++#define RETVAL_OK			0
++#define RETVAL_LAST_BLOCK		(-1)
++#define RETVAL_NOT_BZIP_DATA		(-2)
++#define RETVAL_UNEXPECTED_INPUT_EOF	(-3)
++#define RETVAL_UNEXPECTED_OUTPUT_EOF	(-4)
++#define RETVAL_DATA_ERROR		(-5)
++#define RETVAL_OUT_OF_MEMORY		(-6)
++#define RETVAL_OBSOLETE_INPUT		(-7)
++
++/* Other housekeeping constants */
++#define BZIP2_IOBUF_SIZE		4096
++
++/* This is what we know about each Huffman coding group */
++struct group_data {
++	/* We have an extra slot at the end of limit[] for a sentinal value. */
++	int limit[MAX_HUFCODE_BITS+1];
++	int base[MAX_HUFCODE_BITS];
++	int permute[MAX_SYMBOLS];
++	int minLen, maxLen;
++};
++
++/* Structure holding all the housekeeping data, including IO buffers and
++   memory that persists between calls to bunzip */
++struct bunzip_data {
++	/* State for interrupting output loop */
++	int writeCopies, writePos, writeRunCountdown, writeCount, writeCurrent;
++	/* I/O tracking data (file handles, buffers, positions, etc.) */
++	int (*fill)(void*, unsigned int);
++	int inbufCount, inbufPos /*, outbufPos*/;
++	unsigned char *inbuf /*,*outbuf*/;
++	unsigned int inbufBitCount, inbufBits;
++	/* The CRC values stored in the block header and calculated from the
++	data */
++	unsigned int crc32Table[256], headerCRC, totalCRC, writeCRC;
++	/* Intermediate buffer and its size (in bytes) */
++	unsigned int *dbuf, dbufSize;
++	/* These things are a bit too big to go on the stack */
++	unsigned char selectors[32768];		/* nSelectors = 15 bits */
++	struct group_data groups[MAX_GROUPS];	/* Huffman coding tables */
++	int io_error;			/* non-zero if we have IO error */
++};
++
++
++/* Return the next nnn bits of input.  All reads from the compressed input
++   are done through this function.  All reads are big endian */
++static unsigned int INIT get_bits(struct bunzip_data *bd, char bits_wanted)
++{
++	unsigned int bits = 0;
++
++	/* If we need to get more data from the byte buffer, do so.
++	   (Loop getting one byte at a time to enforce endianness and avoid
++	   unaligned access.) */
++	while (bd->inbufBitCount < bits_wanted) {
++		/* If we need to read more data from file into byte buffer, do
++		   so */
++		if (bd->inbufPos == bd->inbufCount) {
++			if (bd->io_error)
++				return 0;
++			bd->inbufCount = bd->fill(bd->inbuf, BZIP2_IOBUF_SIZE);
++			if (bd->inbufCount <= 0) {
++				bd->io_error = RETVAL_UNEXPECTED_INPUT_EOF;
++				return 0;
++			}
++			bd->inbufPos = 0;
++		}
++		/* Avoid 32-bit overflow (dump bit buffer to top of output) */
++		if (bd->inbufBitCount >= 24) {
++			bits = bd->inbufBits&((1 << bd->inbufBitCount)-1);
++			bits_wanted -= bd->inbufBitCount;
++			bits <<= bits_wanted;
++			bd->inbufBitCount = 0;
++		}
++		/* Grab next 8 bits of input from buffer. */
++		bd->inbufBits = (bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
++		bd->inbufBitCount += 8;
++	}
++	/* Calculate result */
++	bd->inbufBitCount -= bits_wanted;
++	bits |= (bd->inbufBits >> bd->inbufBitCount)&((1 << bits_wanted)-1);
++
++	return bits;
++}
++
++/* Unpacks the next block and sets up for the inverse burrows-wheeler step. */
++
++static int INIT get_next_block(struct bunzip_data *bd)
++{
++	struct group_data *hufGroup = NULL;
++	int *base = NULL;
++	int *limit = NULL;
++	int dbufCount, nextSym, dbufSize, groupCount, selector,
++		i, j, k, t, runPos, symCount, symTotal, nSelectors,
++		byteCount[256];
++	unsigned char uc, symToByte[256], mtfSymbol[256], *selectors;
++	unsigned int *dbuf, origPtr;
++
++	dbuf = bd->dbuf;
++	dbufSize = bd->dbufSize;
++	selectors = bd->selectors;
++
++	/* Read in header signature and CRC, then validate signature.
++	   (last block signature means CRC is for whole file, return now) */
++	i = get_bits(bd, 24);
++	j = get_bits(bd, 24);
++	bd->headerCRC = get_bits(bd, 32);
++	if ((i == 0x177245) && (j == 0x385090))
++		return RETVAL_LAST_BLOCK;
++	if ((i != 0x314159) || (j != 0x265359))
++		return RETVAL_NOT_BZIP_DATA;
++	/* We can add support for blockRandomised if anybody complains.
++	   There was some code for this in busybox 1.0.0-pre3, but nobody ever
++	   noticed that it didn't actually work. */
++	if (get_bits(bd, 1))
++		return RETVAL_OBSOLETE_INPUT;
++	origPtr = get_bits(bd, 24);
++	if (origPtr > dbufSize)
++		return RETVAL_DATA_ERROR;
++	/* mapping table: if some byte values are never used (encoding things
++	   like ascii text), the compression code removes the gaps to have fewer
++	   symbols to deal with, and writes a sparse bitfield indicating which
++	   values were present.  We make a translation table to convert the
++	   symbols back to the corresponding bytes. */
++	t = get_bits(bd, 16);
++	symTotal = 0;
++	for (i = 0; i < 16; i++) {
++		if (t&(1 << (15-i))) {
++			k = get_bits(bd, 16);
++			for (j = 0; j < 16; j++)
++				if (k&(1 << (15-j)))
++					symToByte[symTotal++] = (16*i)+j;
++		}
++	}
++	/* How many different Huffman coding groups does this block use? */
++	groupCount = get_bits(bd, 3);
++	if (groupCount < 2 || groupCount > MAX_GROUPS)
++		return RETVAL_DATA_ERROR;
++	/* nSelectors: Every GROUP_SIZE many symbols we select a new
++	   Huffman coding group.  Read in the group selector list,
++	   which is stored as MTF encoded bit runs.  (MTF = Move To
++	   Front, as each value is used it's moved to the start of the
++	   list.) */
++	nSelectors = get_bits(bd, 15);
++	if (!nSelectors)
++		return RETVAL_DATA_ERROR;
++	for (i = 0; i < groupCount; i++)
++		mtfSymbol[i] = i;
++	for (i = 0; i < nSelectors; i++) {
++		/* Get next value */
++		for (j = 0; get_bits(bd, 1); j++)
++			if (j >= groupCount)
++				return RETVAL_DATA_ERROR;
++		/* Decode MTF to get the next selector */
++		uc = mtfSymbol[j];
++		for (; j; j--)
++			mtfSymbol[j] = mtfSymbol[j-1];
++		mtfSymbol[0] = selectors[i] = uc;
++	}
++	/* Read the Huffman coding tables for each group, which code
++	   for symTotal literal symbols, plus two run symbols (RUNA,
++	   RUNB) */
++	symCount = symTotal+2;
++	for (j = 0; j < groupCount; j++) {
++		unsigned char length[MAX_SYMBOLS], temp[MAX_HUFCODE_BITS+1];
++		int	minLen,	maxLen, pp;
++		/* Read Huffman code lengths for each symbol.  They're
++		   stored in a way similar to mtf; record a starting
++		   value for the first symbol, and an offset from the
++		   previous value for everys symbol after that.
++		   (Subtracting 1 before the loop and then adding it
++		   back at the end is an optimization that makes the
++		   test inside the loop simpler: symbol length 0
++		   becomes negative, so an unsigned inequality catches
++		   it.) */
++		t = get_bits(bd, 5)-1;
++		for (i = 0; i < symCount; i++) {
++			for (;;) {
++				if (((unsigned)t) > (MAX_HUFCODE_BITS-1))
++					return RETVAL_DATA_ERROR;
++
++				/* If first bit is 0, stop.  Else
++				   second bit indicates whether to
++				   increment or decrement the value.
++				   Optimization: grab 2 bits and unget
++				   the second if the first was 0. */
++
++				k = get_bits(bd, 2);
++				if (k < 2) {
++					bd->inbufBitCount++;
++					break;
++				}
++				/* Add one if second bit 1, else
++				 * subtract 1.  Avoids if/else */
++				t += (((k+1)&2)-1);
++			}
++			/* Correct for the initial -1, to get the
++			 * final symbol length */
++			length[i] = t+1;
++		}
++		/* Find largest and smallest lengths in this group */
++		minLen = maxLen = length[0];
++
++		for (i = 1; i < symCount; i++) {
++			if (length[i] > maxLen)
++				maxLen = length[i];
++			else if (length[i] < minLen)
++				minLen = length[i];
++		}
++
++		/* Calculate permute[], base[], and limit[] tables from
++		 * length[].
++		 *
++		 * permute[] is the lookup table for converting
++		 * Huffman coded symbols into decoded symbols.  base[]
++		 * is the amount to subtract from the value of a
++		 * Huffman symbol of a given length when using
++		 * permute[].
++		 *
++		 * limit[] indicates the largest numerical value a
++		 * symbol with a given number of bits can have.  This
++		 * is how the Huffman codes can vary in length: each
++		 * code with a value > limit[length] needs another
++		 * bit.
++		 */
++		hufGroup = bd->groups+j;
++		hufGroup->minLen = minLen;
++		hufGroup->maxLen = maxLen;
++		/* Note that minLen can't be smaller than 1, so we
++		   adjust the base and limit array pointers so we're
++		   not always wasting the first entry.  We do this
++		   again when using them (during symbol decoding).*/
++		base = hufGroup->base-1;
++		limit = hufGroup->limit-1;
++		/* Calculate permute[].  Concurently, initialize
++		 * temp[] and limit[]. */
++		pp = 0;
++		for (i = minLen; i <= maxLen; i++) {
++			temp[i] = limit[i] = 0;
++			for (t = 0; t < symCount; t++)
++				if (length[t] == i)
++					hufGroup->permute[pp++] = t;
++		}
++		/* Count symbols coded for at each bit length */
++		for (i = 0; i < symCount; i++)
++			temp[length[i]]++;
++		/* Calculate limit[] (the largest symbol-coding value
++		 *at each bit length, which is (previous limit <<
++		 *1)+symbols at this level), and base[] (number of
++		 *symbols to ignore at each bit length, which is limit
++		 *minus the cumulative count of symbols coded for
++		 *already). */
++		pp = t = 0;
++		for (i = minLen; i < maxLen; i++) {
++			pp += temp[i];
++			/* We read the largest possible symbol size
++			   and then unget bits after determining how
++			   many we need, and those extra bits could be
++			   set to anything.  (They're noise from
++			   future symbols.)  At each level we're
++			   really only interested in the first few
++			   bits, so here we set all the trailing
++			   to-be-ignored bits to 1 so they don't
++			   affect the value > limit[length]
++			   comparison. */
++			limit[i] = (pp << (maxLen - i)) - 1;
++			pp <<= 1;
++			base[i+1] = pp-(t += temp[i]);
++		}
++		limit[maxLen+1] = INT_MAX; /* Sentinal value for
++					    * reading next sym. */
++		limit[maxLen] = pp+temp[maxLen]-1;
++		base[minLen] = 0;
++	}
++	/* We've finished reading and digesting the block header.  Now
++	   read this block's Huffman coded symbols from the file and
++	   undo the Huffman coding and run length encoding, saving the
++	   result into dbuf[dbufCount++] = uc */
++
++	/* Initialize symbol occurrence counters and symbol Move To
++	 * Front table */
++	for (i = 0; i < 256; i++) {
++		byteCount[i] = 0;
++		mtfSymbol[i] = (unsigned char)i;
++	}
++	/* Loop through compressed symbols. */
++	runPos = dbufCount = symCount = selector = 0;
++	for (;;) {
++		/* Determine which Huffman coding group to use. */
++		if (!(symCount--)) {
++			symCount = GROUP_SIZE-1;
++			if (selector >= nSelectors)
++				return RETVAL_DATA_ERROR;
++			hufGroup = bd->groups+selectors[selector++];
++			base = hufGroup->base-1;
++			limit = hufGroup->limit-1;
++		}
++		/* Read next Huffman-coded symbol. */
++		/* Note: It is far cheaper to read maxLen bits and
++		   back up than it is to read minLen bits and then an
++		   additional bit at a time, testing as we go.
++		   Because there is a trailing last block (with file
++		   CRC), there is no danger of the overread causing an
++		   unexpected EOF for a valid compressed file.  As a
++		   further optimization, we do the read inline
++		   (falling back to a call to get_bits if the buffer
++		   runs dry).  The following (up to got_huff_bits:) is
++		   equivalent to j = get_bits(bd, hufGroup->maxLen);
++		 */
++		while (bd->inbufBitCount < hufGroup->maxLen) {
++			if (bd->inbufPos == bd->inbufCount) {
++				j = get_bits(bd, hufGroup->maxLen);
++				goto got_huff_bits;
++			}
++			bd->inbufBits =
++				(bd->inbufBits << 8)|bd->inbuf[bd->inbufPos++];
++			bd->inbufBitCount += 8;
++		};
++		bd->inbufBitCount -= hufGroup->maxLen;
++		j = (bd->inbufBits >> bd->inbufBitCount)&
++			((1 << hufGroup->maxLen)-1);
++got_huff_bits:
++		/* Figure how how many bits are in next symbol and
++		 * unget extras */
++		i = hufGroup->minLen;
++		while (j > limit[i])
++			++i;
++		bd->inbufBitCount += (hufGroup->maxLen - i);
++		/* Huffman decode value to get nextSym (with bounds checking) */
++		if ((i > hufGroup->maxLen)
++			|| (((unsigned)(j = (j>>(hufGroup->maxLen-i))-base[i]))
++				>= MAX_SYMBOLS))
++			return RETVAL_DATA_ERROR;
++		nextSym = hufGroup->permute[j];
++		/* We have now decoded the symbol, which indicates
++		   either a new literal byte, or a repeated run of the
++		   most recent literal byte.  First, check if nextSym
++		   indicates a repeated run, and if so loop collecting
++		   how many times to repeat the last literal. */
++		if (((unsigned)nextSym) <= SYMBOL_RUNB) { /* RUNA or RUNB */
++			/* If this is the start of a new run, zero out
++			 * counter */
++			if (!runPos) {
++				runPos = 1;
++				t = 0;
++			}
++			/* Neat trick that saves 1 symbol: instead of
++			   or-ing 0 or 1 at each bit position, add 1
++			   or 2 instead.  For example, 1011 is 1 << 0
++			   + 1 << 1 + 2 << 2.  1010 is 2 << 0 + 2 << 1
++			   + 1 << 2.  You can make any bit pattern
++			   that way using 1 less symbol than the basic
++			   or 0/1 method (except all bits 0, which
++			   would use no symbols, but a run of length 0
++			   doesn't mean anything in this context).
++			   Thus space is saved. */
++			t += (runPos << nextSym);
++			/* +runPos if RUNA; +2*runPos if RUNB */
++
++			runPos <<= 1;
++			continue;
++		}
++		/* When we hit the first non-run symbol after a run,
++		   we now know how many times to repeat the last
++		   literal, so append that many copies to our buffer
++		   of decoded symbols (dbuf) now.  (The last literal
++		   used is the one at the head of the mtfSymbol
++		   array.) */
++		if (runPos) {
++			runPos = 0;
++			if (dbufCount+t >= dbufSize)
++				return RETVAL_DATA_ERROR;
++
++			uc = symToByte[mtfSymbol[0]];
++			byteCount[uc] += t;
++			while (t--)
++				dbuf[dbufCount++] = uc;
++		}
++		/* Is this the terminating symbol? */
++		if (nextSym > symTotal)
++			break;
++		/* At this point, nextSym indicates a new literal
++		   character.  Subtract one to get the position in the
++		   MTF array at which this literal is currently to be
++		   found.  (Note that the result can't be -1 or 0,
++		   because 0 and 1 are RUNA and RUNB.  But another
++		   instance of the first symbol in the mtf array,
++		   position 0, would have been handled as part of a
++		   run above.  Therefore 1 unused mtf position minus 2
++		   non-literal nextSym values equals -1.) */
++		if (dbufCount >= dbufSize)
++			return RETVAL_DATA_ERROR;
++		i = nextSym - 1;
++		uc = mtfSymbol[i];
++		/* Adjust the MTF array.  Since we typically expect to
++		 *move only a small number of symbols, and are bound
++		 *by 256 in any case, using memmove here would
++		 *typically be bigger and slower due to function call
++		 *overhead and other assorted setup costs. */
++		do {
++			mtfSymbol[i] = mtfSymbol[i-1];
++		} while (--i);
++		mtfSymbol[0] = uc;
++		uc = symToByte[uc];
++		/* We have our literal byte.  Save it into dbuf. */
++		byteCount[uc]++;
++		dbuf[dbufCount++] = (unsigned int)uc;
++	}
++	/* At this point, we've read all the Huffman-coded symbols
++	   (and repeated runs) for this block from the input stream,
++	   and decoded them into the intermediate buffer.  There are
++	   dbufCount many decoded bytes in dbuf[].  Now undo the
++	   Burrows-Wheeler transform on dbuf.  See
++	   http://dogma.net/markn/articles/bwt/bwt.htm
++	 */
++	/* Turn byteCount into cumulative occurrence counts of 0 to n-1. */
++	j = 0;
++	for (i = 0; i < 256; i++) {
++		k = j+byteCount[i];
++		byteCount[i] = j;
++		j = k;
++	}
++	/* Figure out what order dbuf would be in if we sorted it. */
++	for (i = 0; i < dbufCount; i++) {
++		uc = (unsigned char)(dbuf[i] & 0xff);
++		dbuf[byteCount[uc]] |= (i << 8);
++		byteCount[uc]++;
++	}
++	/* Decode first byte by hand to initialize "previous" byte.
++	   Note that it doesn't get output, and if the first three
++	   characters are identical it doesn't qualify as a run (hence
++	   writeRunCountdown = 5). */
++	if (dbufCount) {
++		if (origPtr >= dbufCount)
++			return RETVAL_DATA_ERROR;
++		bd->writePos = dbuf[origPtr];
++		bd->writeCurrent = (unsigned char)(bd->writePos&0xff);
++		bd->writePos >>= 8;
++		bd->writeRunCountdown = 5;
++	}
++	bd->writeCount = dbufCount;
++
++	return RETVAL_OK;
++}
++
++/* Undo burrows-wheeler transform on intermediate buffer to produce output.
++   If start_bunzip was initialized with out_fd =-1, then up to len bytes of
++   data are written to outbuf.  Return value is number of bytes written or
++   error (all errors are negative numbers).  If out_fd!=-1, outbuf and len
++   are ignored, data is written to out_fd and return is RETVAL_OK or error.
++*/
++
++static int INIT read_bunzip(struct bunzip_data *bd, char *outbuf, int len)
++{
++	const unsigned int *dbuf;
++	int pos, xcurrent, previous, gotcount;
++
++	/* If last read was short due to end of file, return last block now */
++	if (bd->writeCount < 0)
++		return bd->writeCount;
++
++	gotcount = 0;
++	dbuf = bd->dbuf;
++	pos = bd->writePos;
++	xcurrent = bd->writeCurrent;
++
++	/* We will always have pending decoded data to write into the output
++	   buffer unless this is the very first call (in which case we haven't
++	   Huffman-decoded a block into the intermediate buffer yet). */
++
++	if (bd->writeCopies) {
++		/* Inside the loop, writeCopies means extra copies (beyond 1) */
++		--bd->writeCopies;
++		/* Loop outputting bytes */
++		for (;;) {
++			/* If the output buffer is full, snapshot
++			 * state and return */
++			if (gotcount >= len) {
++				bd->writePos = pos;
++				bd->writeCurrent = xcurrent;
++				bd->writeCopies++;
++				return len;
++			}
++			/* Write next byte into output buffer, updating CRC */
++			outbuf[gotcount++] = xcurrent;
++			bd->writeCRC = (((bd->writeCRC) << 8)
++				^bd->crc32Table[((bd->writeCRC) >> 24)
++				^xcurrent]);
++			/* Loop now if we're outputting multiple
++			 * copies of this byte */
++			if (bd->writeCopies) {
++				--bd->writeCopies;
++				continue;
++			}
++decode_next_byte:
++			if (!bd->writeCount--)
++				break;
++			/* Follow sequence vector to undo
++			 * Burrows-Wheeler transform */
++			previous = xcurrent;
++			pos = dbuf[pos];
++			xcurrent = pos&0xff;
++			pos >>= 8;
++			/* After 3 consecutive copies of the same
++			   byte, the 4th is a repeat count.  We count
++			   down from 4 instead *of counting up because
++			   testing for non-zero is faster */
++			if (--bd->writeRunCountdown) {
++				if (xcurrent != previous)
++					bd->writeRunCountdown = 4;
++			} else {
++				/* We have a repeated run, this byte
++				 * indicates the count */
++				bd->writeCopies = xcurrent;
++				xcurrent = previous;
++				bd->writeRunCountdown = 5;
++				/* Sometimes there are just 3 bytes
++				 * (run length 0) */
++				if (!bd->writeCopies)
++					goto decode_next_byte;
++				/* Subtract the 1 copy we'd output
++				 * anyway to get extras */
++				--bd->writeCopies;
++			}
++		}
++		/* Decompression of this block completed successfully */
++		bd->writeCRC = ~bd->writeCRC;
++		bd->totalCRC = ((bd->totalCRC << 1) |
++				(bd->totalCRC >> 31)) ^ bd->writeCRC;
++		/* If this block had a CRC error, force file level CRC error. */
++		if (bd->writeCRC != bd->headerCRC) {
++			bd->totalCRC = bd->headerCRC+1;
++			return RETVAL_LAST_BLOCK;
++		}
++	}
++
++	/* Refill the intermediate buffer by Huffman-decoding next
++	 * block of input */
++	/* (previous is just a convenient unused temp variable here) */
++	previous = get_next_block(bd);
++	if (previous) {
++		bd->writeCount = previous;
++		return (previous != RETVAL_LAST_BLOCK) ? previous : gotcount;
++	}
++	bd->writeCRC = 0xffffffffUL;
++	pos = bd->writePos;
++	xcurrent = bd->writeCurrent;
++	goto decode_next_byte;
++}
++
++static int INIT nofill(void *buf, unsigned int len)
++{
++	return -1;
++}
++
++/* Allocate the structure, read file header.  If in_fd ==-1, inbuf must contain
++   a complete bunzip file (len bytes long).  If in_fd!=-1, inbuf and len are
++   ignored, and data is read from file handle into temporary buffer. */
++static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, int len,
++			     int (*fill)(void*, unsigned int))
++{
++	struct bunzip_data *bd;
++	unsigned int i, j, c;
++	const unsigned int BZh0 =
++		(((unsigned int)'B') << 24)+(((unsigned int)'Z') << 16)
++		+(((unsigned int)'h') << 8)+(unsigned int)'0';
++
++	/* Figure out how much data to allocate */
++	i = sizeof(struct bunzip_data);
++
++	/* Allocate bunzip_data.  Most fields initialize to zero. */
++	bd = *bdp = malloc(i);
++	memset(bd, 0, sizeof(struct bunzip_data));
++	/* Setup input buffer */
++	bd->inbuf = inbuf;
++	bd->inbufCount = len;
++	if (fill != NULL)
++		bd->fill = fill;
++	else
++		bd->fill = nofill;
++
++	/* Init the CRC32 table (big endian) */
++	for (i = 0; i < 256; i++) {
++		c = i << 24;
++		for (j = 8; j; j--)
++			c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1);
++		bd->crc32Table[i] = c;
++	}
++
++	/* Ensure that file starts with "BZh['1'-'9']." */
++	i = get_bits(bd, 32);
++	if (((unsigned int)(i-BZh0-1)) >= 9)
++		return RETVAL_NOT_BZIP_DATA;
++
++	/* Fourth byte (ascii '1'-'9'), indicates block size in units of 100k of
++	   uncompressed data.  Allocate intermediate buffer for block. */
++	bd->dbufSize = 100000*(i-BZh0);
++
++	bd->dbuf = large_malloc(bd->dbufSize * sizeof(int));
++	return RETVAL_OK;
++}
++
++/* Example usage: decompress src_fd to dst_fd.  (Stops at end of bzip2 data,
++   not end of file.) */
++STATIC int INIT bunzip2(unsigned char *buf, int len,
++			int(*fill)(void*, unsigned int),
++			int(*flush)(void*, unsigned int),
++			unsigned char *outbuf,
++			int *pos,
++			void(*error_fn)(char *x))
++{
++	struct bunzip_data *bd;
++	int i = -1;
++	unsigned char *inbuf;
++
++	set_error_fn(error_fn);
++	if (flush)
++		outbuf = malloc(BZIP2_IOBUF_SIZE);
++	else
++		len -= 4; /* Uncompressed size hack active in pre-boot
++			     environment */
++	if (!outbuf) {
++		error("Could not allocate output bufer");
++		return -1;
++	}
++	if (buf)
++		inbuf = buf;
++	else
++		inbuf = malloc(BZIP2_IOBUF_SIZE);
++	if (!inbuf) {
++		error("Could not allocate input bufer");
++		goto exit_0;
++	}
++	i = start_bunzip(&bd, inbuf, len, fill);
++	if (!i) {
++		for (;;) {
++			i = read_bunzip(bd, outbuf, BZIP2_IOBUF_SIZE);
++			if (i <= 0)
++				break;
++			if (!flush)
++				outbuf += i;
++			else
++				if (i != flush(outbuf, i)) {
++					i = RETVAL_UNEXPECTED_OUTPUT_EOF;
++					break;
++				}
++		}
++	}
++	/* Check CRC and release memory */
++	if (i == RETVAL_LAST_BLOCK) {
++		if (bd->headerCRC != bd->totalCRC)
++			error("Data integrity error when decompressing.");
++		else
++			i = RETVAL_OK;
++	} else if (i == RETVAL_UNEXPECTED_OUTPUT_EOF) {
++		error("Compressed file ends unexpectedly");
++	}
++	if (bd->dbuf)
++		large_free(bd->dbuf);
++	if (pos)
++		*pos = bd->inbufPos;
++	free(bd);
++	if (!buf)
++		free(inbuf);
++exit_0:
++	if (flush)
++		free(outbuf);
++	return i;
++}
++
++#define decompress bunzip2
+Index: linux-2.6-tip/lib/decompress_inflate.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/lib/decompress_inflate.c
+@@ -0,0 +1,168 @@
++#ifdef STATIC
++/* Pre-boot environment: included */
++
++/* prevent inclusion of _LINUX_KERNEL_H in pre-boot environment: lots
++ * errors about console_printk etc... on ARM */
++#define _LINUX_KERNEL_H
++
++#include "zlib_inflate/inftrees.c"
++#include "zlib_inflate/inffast.c"
++#include "zlib_inflate/inflate.c"
++
++#else /* STATIC */
++/* initramfs et al: linked */
++
++#include <linux/zutil.h>
++
++#include "zlib_inflate/inftrees.h"
++#include "zlib_inflate/inffast.h"
++#include "zlib_inflate/inflate.h"
++
++#include "zlib_inflate/infutil.h"
++
++#endif /* STATIC */
++
++#include <linux/decompress/mm.h>
++#include <linux/slab.h>
++
++#define INBUF_LEN (16*1024)
++
++/* Included from initramfs et al code */
++STATIC int INIT gunzip(unsigned char *buf, int len,
++		       int(*fill)(void*, unsigned int),
++		       int(*flush)(void*, unsigned int),
++		       unsigned char *out_buf,
++		       int *pos,
++		       void(*error_fn)(char *x)) {
++	u8 *zbuf;
++	struct z_stream_s *strm;
++	int rc;
++	size_t out_len;
++
++	set_error_fn(error_fn);
++	rc = -1;
++	if (flush) {
++		out_len = 0x8000; /* 32 K */
++		out_buf = malloc(out_len);
++	} else {
++		out_len = 0x7fffffff; /* no limit */
++	}
++	if (!out_buf) {
++		error("Out of memory while allocating output buffer");
++		goto gunzip_nomem1;
++	}
++
++	if (buf)
++		zbuf = buf;
++	else {
++		zbuf = malloc(INBUF_LEN);
++		len = 0;
++	}
++	if (!zbuf) {
++		error("Out of memory while allocating input buffer");
++		goto gunzip_nomem2;
++	}
++
++	strm = malloc(sizeof(*strm));
++	if (strm == NULL) {
++		error("Out of memory while allocating z_stream");
++		goto gunzip_nomem3;
++	}
++
++	strm->workspace = malloc(flush ? zlib_inflate_workspacesize() :
++				 sizeof(struct inflate_state));
++	if (strm->workspace == NULL) {
++		error("Out of memory while allocating workspace");
++		goto gunzip_nomem4;
++	}
++
++	if (len == 0)
++		len = fill(zbuf, INBUF_LEN);
++
++	/* verify the gzip header */
++	if (len < 10 ||
++	   zbuf[0] != 0x1f || zbuf[1] != 0x8b || zbuf[2] != 0x08) {
++		if (pos)
++			*pos = 0;
++		error("Not a gzip file");
++		goto gunzip_5;
++	}
++
++	/* skip over gzip header (1f,8b,08... 10 bytes total +
++	 * possible asciz filename)
++	 */
++	strm->next_in = zbuf + 10;
++	/* skip over asciz filename */
++	if (zbuf[3] & 0x8) {
++		while (strm->next_in[0])
++			strm->next_in++;
++		strm->next_in++;
++	}
++	strm->avail_in = len - (strm->next_in - zbuf);
++
++	strm->next_out = out_buf;
++	strm->avail_out = out_len;
++
++	rc = zlib_inflateInit2(strm, -MAX_WBITS);
++
++	if (!flush) {
++		WS(strm)->inflate_state.wsize = 0;
++		WS(strm)->inflate_state.window = NULL;
++	}
++
++	while (rc == Z_OK) {
++		if (strm->avail_in == 0) {
++			/* TODO: handle case where both pos and fill are set */
++			len = fill(zbuf, INBUF_LEN);
++			if (len < 0) {
++				rc = -1;
++				error("read error");
++				break;
++			}
++			strm->next_in = zbuf;
++			strm->avail_in = len;
++		}
++		rc = zlib_inflate(strm, 0);
++
++		/* Write any data generated */
++		if (flush && strm->next_out > out_buf) {
++			int l = strm->next_out - out_buf;
++			if (l != flush(out_buf, l)) {
++				rc = -1;
++				error("write error");
++				break;
++			}
++			strm->next_out = out_buf;
++			strm->avail_out = out_len;
++		}
++
++		/* after Z_FINISH, only Z_STREAM_END is "we unpacked it all" */
++		if (rc == Z_STREAM_END) {
++			rc = 0;
++			break;
++		} else if (rc != Z_OK) {
++			error("uncompression error");
++			rc = -1;
++		}
++	}
++
++	zlib_inflateEnd(strm);
++	if (pos)
++		/* add + 8 to skip over trailer */
++		*pos = strm->next_in - zbuf+8;
++
++gunzip_5:
++	free(strm->workspace);
++gunzip_nomem4:
++	free(strm);
++gunzip_nomem3:
++	if (!buf)
++		free(zbuf);
++gunzip_nomem2:
++	if (flush)
++		free(out_buf);
++gunzip_nomem1:
++	return rc; /* returns Z_OK (0) if successful */
++}
++
++#define decompress gunzip
+Index: linux-2.6-tip/lib/decompress_unlzma.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/lib/decompress_unlzma.c
+@@ -0,0 +1,648 @@
++/* Lzma decompressor for Linux kernel. Shamelessly snarfed
++ *from busybox 1.1.1
++ *
++ *Linux kernel adaptation
++ *Copyright (C) 2006  Alain < alain@knaff.lu >
++ *
++ *Based on small lzma deflate implementation/Small range coder
++ *implementation for lzma.
++ *Copyright (C) 2006  Aurelien Jacobs < aurel@gnuage.org >
++ *
++ *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
++ *Copyright (C) 1999-2005  Igor Pavlov
++ *
++ *Copyrights of the parts, see headers below.
++ *
++ *
++ *This program is free software; you can redistribute it and/or
++ *modify it under the terms of the GNU Lesser General Public
++ *License as published by the Free Software Foundation; either
++ *version 2.1 of the License, or (at your option) any later version.
++ *
++ *This program is distributed in the hope that it will be useful,
++ *but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ *Lesser General Public License for more details.
++ *
++ *You should have received a copy of the GNU Lesser General Public
++ *License along with this library; if not, write to the Free Software
++ *Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
++ */
++
++#ifndef STATIC
++#include <linux/decompress/unlzma.h>
++#endif /* STATIC */
++
++#include <linux/decompress/mm.h>
++#include <linux/slab.h>
++
++#define	MIN(a, b) (((a) < (b)) ? (a) : (b))
++
++static long long INIT read_int(unsigned char *ptr, int size)
++{
++	int i;
++	long long ret = 0;
++
++	for (i = 0; i < size; i++)
++		ret = (ret << 8) | ptr[size-i-1];
++	return ret;
++}
++
++#define ENDIAN_CONVERT(x) \
++  x = (typeof(x))read_int((unsigned char *)&x, sizeof(x))
++
++
++/* Small range coder implementation for lzma.
++ *Copyright (C) 2006  Aurelien Jacobs < aurel@gnuage.org >
++ *
++ *Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
++ *Copyright (c) 1999-2005  Igor Pavlov
++ */
++
++#include <linux/compiler.h>
++
++#define LZMA_IOBUF_SIZE	0x10000
++
++struct rc {
++	int (*fill)(void*, unsigned int);
++	uint8_t *ptr;
++	uint8_t *buffer;
++	uint8_t *buffer_end;
++	int buffer_size;
++	uint32_t code;
++	uint32_t range;
++	uint32_t bound;
++};
++
++
++#define RC_TOP_BITS 24
++#define RC_MOVE_BITS 5
++#define RC_MODEL_TOTAL_BITS 11
++
++
++/* Called twice: once at startup and once in rc_normalize() */
++static void INIT rc_read(struct rc *rc)
++{
++	rc->buffer_size = rc->fill((char *)rc->buffer, LZMA_IOBUF_SIZE);
++	if (rc->buffer_size <= 0)
++		error("unexpected EOF");
++	rc->ptr = rc->buffer;
++	rc->buffer_end = rc->buffer + rc->buffer_size;
++}
++
++/* Called once */
++static inline void INIT rc_init(struct rc *rc,
++				       int (*fill)(void*, unsigned int),
++				       char *buffer, int buffer_size)
++{
++	rc->fill = fill;
++	rc->buffer = (uint8_t *)buffer;
++	rc->buffer_size = buffer_size;
++	rc->buffer_end = rc->buffer + rc->buffer_size;
++	rc->ptr = rc->buffer;
++
++	rc->code = 0;
++	rc->range = 0xFFFFFFFF;
++}
++
++static inline void INIT rc_init_code(struct rc *rc)
++{
++	int i;
++
++	for (i = 0; i < 5; i++) {
++		if (rc->ptr >= rc->buffer_end)
++			rc_read(rc);
++		rc->code = (rc->code << 8) | *rc->ptr++;
++	}
++}
++
++
++/* Called once. TODO: bb_maybe_free() */
++static inline void INIT rc_free(struct rc *rc)
++{
++	free(rc->buffer);
++}
++
++/* Called twice, but one callsite is in inline'd rc_is_bit_0_helper() */
++static void INIT rc_do_normalize(struct rc *rc)
++{
++	if (rc->ptr >= rc->buffer_end)
++		rc_read(rc);
++	rc->range <<= 8;
++	rc->code = (rc->code << 8) | *rc->ptr++;
++}
++static inline void INIT rc_normalize(struct rc *rc)
++{
++	if (rc->range < (1 << RC_TOP_BITS))
++		rc_do_normalize(rc);
++}
++
++/* Called 9 times */
++/* Why rc_is_bit_0_helper exists?
++ *Because we want to always expose (rc->code < rc->bound) to optimizer
++ */
++static inline uint32_t INIT rc_is_bit_0_helper(struct rc *rc, uint16_t *p)
++{
++	rc_normalize(rc);
++	rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS);
++	return rc->bound;
++}
++static inline int INIT rc_is_bit_0(struct rc *rc, uint16_t *p)
++{
++	uint32_t t = rc_is_bit_0_helper(rc, p);
++	return rc->code < t;
++}
++
++/* Called ~10 times, but very small, thus inlined */
++static inline void INIT rc_update_bit_0(struct rc *rc, uint16_t *p)
++{
++	rc->range = rc->bound;
++	*p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS;
++}
++static inline void rc_update_bit_1(struct rc *rc, uint16_t *p)
++{
++	rc->range -= rc->bound;
++	rc->code -= rc->bound;
++	*p -= *p >> RC_MOVE_BITS;
++}
++
++/* Called 4 times in unlzma loop */
++static int INIT rc_get_bit(struct rc *rc, uint16_t *p, int *symbol)
++{
++	if (rc_is_bit_0(rc, p)) {
++		rc_update_bit_0(rc, p);
++		*symbol *= 2;
++		return 0;
++	} else {
++		rc_update_bit_1(rc, p);
++		*symbol = *symbol * 2 + 1;
++		return 1;
++	}
++}
++
++/* Called once */
++static inline int INIT rc_direct_bit(struct rc *rc)
++{
++	rc_normalize(rc);
++	rc->range >>= 1;
++	if (rc->code >= rc->range) {
++		rc->code -= rc->range;
++		return 1;
++	}
++	return 0;
++}
++
++/* Called twice */
++static inline void INIT
++rc_bit_tree_decode(struct rc *rc, uint16_t *p, int num_levels, int *symbol)
++{
++	int i = num_levels;
++
++	*symbol = 1;
++	while (i--)
++		rc_get_bit(rc, p + *symbol, symbol);
++	*symbol -= 1 << num_levels;
++}
++
++
++/*
++ * Small lzma deflate implementation.
++ * Copyright (C) 2006  Aurelien Jacobs < aurel@gnuage.org >
++ *
++ * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/)
++ * Copyright (C) 1999-2005  Igor Pavlov
++ */
++
++
++struct lzma_header {
++	uint8_t pos;
++	uint32_t dict_size;
++	uint64_t dst_size;
++} __attribute__ ((packed)) ;
++
++
++#define LZMA_BASE_SIZE 1846
++#define LZMA_LIT_SIZE 768
++
++#define LZMA_NUM_POS_BITS_MAX 4
++
++#define LZMA_LEN_NUM_LOW_BITS 3
++#define LZMA_LEN_NUM_MID_BITS 3
++#define LZMA_LEN_NUM_HIGH_BITS 8
++
++#define LZMA_LEN_CHOICE 0
++#define LZMA_LEN_CHOICE_2 (LZMA_LEN_CHOICE + 1)
++#define LZMA_LEN_LOW (LZMA_LEN_CHOICE_2 + 1)
++#define LZMA_LEN_MID (LZMA_LEN_LOW \
++		      + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS)))
++#define LZMA_LEN_HIGH (LZMA_LEN_MID \
++		       +(1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS)))
++#define LZMA_NUM_LEN_PROBS (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS))
++
++#define LZMA_NUM_STATES 12
++#define LZMA_NUM_LIT_STATES 7
++
++#define LZMA_START_POS_MODEL_INDEX 4
++#define LZMA_END_POS_MODEL_INDEX 14
++#define LZMA_NUM_FULL_DISTANCES (1 << (LZMA_END_POS_MODEL_INDEX >> 1))
++
++#define LZMA_NUM_POS_SLOT_BITS 6
++#define LZMA_NUM_LEN_TO_POS_STATES 4
++
++#define LZMA_NUM_ALIGN_BITS 4
++
++#define LZMA_MATCH_MIN_LEN 2
++
++#define LZMA_IS_MATCH 0
++#define LZMA_IS_REP (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
++#define LZMA_IS_REP_G0 (LZMA_IS_REP + LZMA_NUM_STATES)
++#define LZMA_IS_REP_G1 (LZMA_IS_REP_G0 + LZMA_NUM_STATES)
++#define LZMA_IS_REP_G2 (LZMA_IS_REP_G1 + LZMA_NUM_STATES)
++#define LZMA_IS_REP_0_LONG (LZMA_IS_REP_G2 + LZMA_NUM_STATES)
++#define LZMA_POS_SLOT (LZMA_IS_REP_0_LONG \
++		       + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX))
++#define LZMA_SPEC_POS (LZMA_POS_SLOT \
++		       +(LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS))
++#define LZMA_ALIGN (LZMA_SPEC_POS \
++		    + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX)
++#define LZMA_LEN_CODER (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS))
++#define LZMA_REP_LEN_CODER (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS)
++#define LZMA_LITERAL (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS)
++
++
++struct writer {
++	uint8_t *buffer;
++	uint8_t previous_byte;
++	size_t buffer_pos;
++	int bufsize;
++	size_t global_pos;
++	int(*flush)(void*, unsigned int);
++	struct lzma_header *header;
++};
++
++struct cstate {
++	int state;
++	uint32_t rep0, rep1, rep2, rep3;
++};
++
++static inline size_t INIT get_pos(struct writer *wr)
++{
++	return
++		wr->global_pos + wr->buffer_pos;
++}
++
++static inline uint8_t INIT peek_old_byte(struct writer *wr,
++						uint32_t offs)
++{
++	if (!wr->flush) {
++		int32_t pos;
++		while (offs > wr->header->dict_size)
++			offs -= wr->header->dict_size;
++		pos = wr->buffer_pos - offs;
++		return wr->buffer[pos];
++	} else {
++		uint32_t pos = wr->buffer_pos - offs;
++		while (pos >= wr->header->dict_size)
++			pos += wr->header->dict_size;
++		return wr->buffer[pos];
++	}
++
++}
++
++static inline void INIT write_byte(struct writer *wr, uint8_t byte)
++{
++	wr->buffer[wr->buffer_pos++] = wr->previous_byte = byte;
++	if (wr->flush && wr->buffer_pos == wr->header->dict_size) {
++		wr->buffer_pos = 0;
++		wr->global_pos += wr->header->dict_size;
++		wr->flush((char *)wr->buffer, wr->header->dict_size);
++	}
++}
++
++
++static inline void INIT copy_byte(struct writer *wr, uint32_t offs)
++{
++	write_byte(wr, peek_old_byte(wr, offs));
++}
++
++static inline void INIT copy_bytes(struct writer *wr,
++					 uint32_t rep0, int len)
++{
++	do {
++		copy_byte(wr, rep0);
++		len--;
++	} while (len != 0 && wr->buffer_pos < wr->header->dst_size);
++}
++
++static inline void INIT process_bit0(struct writer *wr, struct rc *rc,
++				     struct cstate *cst, uint16_t *p,
++				     int pos_state, uint16_t *prob,
++				     int lc, uint32_t literal_pos_mask) {
++	int mi = 1;
++	rc_update_bit_0(rc, prob);
++	prob = (p + LZMA_LITERAL +
++		(LZMA_LIT_SIZE
++		 * (((get_pos(wr) & literal_pos_mask) << lc)
++		    + (wr->previous_byte >> (8 - lc))))
++		);
++
++	if (cst->state >= LZMA_NUM_LIT_STATES) {
++		int match_byte = peek_old_byte(wr, cst->rep0);
++		do {
++			int bit;
++			uint16_t *prob_lit;
++
++			match_byte <<= 1;
++			bit = match_byte & 0x100;
++			prob_lit = prob + 0x100 + bit + mi;
++			if (rc_get_bit(rc, prob_lit, &mi)) {
++				if (!bit)
++					break;
++			} else {
++				if (bit)
++					break;
++			}
++		} while (mi < 0x100);
++	}
++	while (mi < 0x100) {
++		uint16_t *prob_lit = prob + mi;
++		rc_get_bit(rc, prob_lit, &mi);
++	}
++	write_byte(wr, mi);
++	if (cst->state < 4)
++		cst->state = 0;
++	else if (cst->state < 10)
++		cst->state -= 3;
++	else
++		cst->state -= 6;
++}
++
++static inline void INIT process_bit1(struct writer *wr, struct rc *rc,
++					    struct cstate *cst, uint16_t *p,
++					    int pos_state, uint16_t *prob) {
++  int offset;
++	uint16_t *prob_len;
++	int num_bits;
++	int len;
++
++	rc_update_bit_1(rc, prob);
++	prob = p + LZMA_IS_REP + cst->state;
++	if (rc_is_bit_0(rc, prob)) {
++		rc_update_bit_0(rc, prob);
++		cst->rep3 = cst->rep2;
++		cst->rep2 = cst->rep1;
++		cst->rep1 = cst->rep0;
++		cst->state = cst->state < LZMA_NUM_LIT_STATES ? 0 : 3;
++		prob = p + LZMA_LEN_CODER;
++	} else {
++		rc_update_bit_1(rc, prob);
++		prob = p + LZMA_IS_REP_G0 + cst->state;
++		if (rc_is_bit_0(rc, prob)) {
++			rc_update_bit_0(rc, prob);
++			prob = (p + LZMA_IS_REP_0_LONG
++				+ (cst->state <<
++				   LZMA_NUM_POS_BITS_MAX) +
++				pos_state);
++			if (rc_is_bit_0(rc, prob)) {
++				rc_update_bit_0(rc, prob);
++
++				cst->state = cst->state < LZMA_NUM_LIT_STATES ?
++					9 : 11;
++				copy_byte(wr, cst->rep0);
++				return;
++			} else {
++				rc_update_bit_1(rc, prob);
++			}
++		} else {
++			uint32_t distance;
++
++			rc_update_bit_1(rc, prob);
++			prob = p + LZMA_IS_REP_G1 + cst->state;
++			if (rc_is_bit_0(rc, prob)) {
++				rc_update_bit_0(rc, prob);
++				distance = cst->rep1;
++			} else {
++				rc_update_bit_1(rc, prob);
++				prob = p + LZMA_IS_REP_G2 + cst->state;
++				if (rc_is_bit_0(rc, prob)) {
++					rc_update_bit_0(rc, prob);
++					distance = cst->rep2;
++				} else {
++					rc_update_bit_1(rc, prob);
++					distance = cst->rep3;
++					cst->rep3 = cst->rep2;
++				}
++				cst->rep2 = cst->rep1;
++			}
++			cst->rep1 = cst->rep0;
++			cst->rep0 = distance;
++		}
++		cst->state = cst->state < LZMA_NUM_LIT_STATES ? 8 : 11;
++		prob = p + LZMA_REP_LEN_CODER;
++	}
++
++	prob_len = prob + LZMA_LEN_CHOICE;
++	if (rc_is_bit_0(rc, prob_len)) {
++		rc_update_bit_0(rc, prob_len);
++		prob_len = (prob + LZMA_LEN_LOW
++			    + (pos_state <<
++			       LZMA_LEN_NUM_LOW_BITS));
++		offset = 0;
++		num_bits = LZMA_LEN_NUM_LOW_BITS;
++	} else {
++		rc_update_bit_1(rc, prob_len);
++		prob_len = prob + LZMA_LEN_CHOICE_2;
++		if (rc_is_bit_0(rc, prob_len)) {
++			rc_update_bit_0(rc, prob_len);
++			prob_len = (prob + LZMA_LEN_MID
++				    + (pos_state <<
++				       LZMA_LEN_NUM_MID_BITS));
++			offset = 1 << LZMA_LEN_NUM_LOW_BITS;
++			num_bits = LZMA_LEN_NUM_MID_BITS;
++		} else {
++			rc_update_bit_1(rc, prob_len);
++			prob_len = prob + LZMA_LEN_HIGH;
++			offset = ((1 << LZMA_LEN_NUM_LOW_BITS)
++				  + (1 << LZMA_LEN_NUM_MID_BITS));
++			num_bits = LZMA_LEN_NUM_HIGH_BITS;
++		}
++	}
++
++	rc_bit_tree_decode(rc, prob_len, num_bits, &len);
++	len += offset;
++
++	if (cst->state < 4) {
++		int pos_slot;
++
++		cst->state += LZMA_NUM_LIT_STATES;
++		prob =
++			p + LZMA_POS_SLOT +
++			((len <
++			  LZMA_NUM_LEN_TO_POS_STATES ? len :
++			  LZMA_NUM_LEN_TO_POS_STATES - 1)
++			 << LZMA_NUM_POS_SLOT_BITS);
++		rc_bit_tree_decode(rc, prob,
++				   LZMA_NUM_POS_SLOT_BITS,
++				   &pos_slot);
++		if (pos_slot >= LZMA_START_POS_MODEL_INDEX) {
++			int i, mi;
++			num_bits = (pos_slot >> 1) - 1;
++			cst->rep0 = 2 | (pos_slot & 1);
++			if (pos_slot < LZMA_END_POS_MODEL_INDEX) {
++				cst->rep0 <<= num_bits;
++				prob = p + LZMA_SPEC_POS +
++					cst->rep0 - pos_slot - 1;
++			} else {
++				num_bits -= LZMA_NUM_ALIGN_BITS;
++				while (num_bits--)
++					cst->rep0 = (cst->rep0 << 1) |
++						rc_direct_bit(rc);
++				prob = p + LZMA_ALIGN;
++				cst->rep0 <<= LZMA_NUM_ALIGN_BITS;
++				num_bits = LZMA_NUM_ALIGN_BITS;
++			}
++			i = 1;
++			mi = 1;
++			while (num_bits--) {
++				if (rc_get_bit(rc, prob + mi, &mi))
++					cst->rep0 |= i;
++				i <<= 1;
++			}
++		} else
++			cst->rep0 = pos_slot;
++		if (++(cst->rep0) == 0)
++			return;
++	}
++
++	len += LZMA_MATCH_MIN_LEN;
++
++	copy_bytes(wr, cst->rep0, len);
++}
++
++
++
++STATIC inline int INIT unlzma(unsigned char *buf, int in_len,
++			      int(*fill)(void*, unsigned int),
++			      int(*flush)(void*, unsigned int),
++			      unsigned char *output,
++			      int *posp,
++			      void(*error_fn)(char *x)
++	)
++{
++	struct lzma_header header;
++	int lc, pb, lp;
++	uint32_t pos_state_mask;
++	uint32_t literal_pos_mask;
++	uint16_t *p;
++	int num_probs;
++	struct rc rc;
++	int i, mi;
++	struct writer wr;
++	struct cstate cst;
++	unsigned char *inbuf;
++	int ret = -1;
++
++	set_error_fn(error_fn);
++	if (!flush)
++		in_len -= 4; /* Uncompressed size hack active in pre-boot
++				environment */
++	if (buf)
++		inbuf = buf;
++	else
++		inbuf = malloc(LZMA_IOBUF_SIZE);
++	if (!inbuf) {
++		error("Could not allocate input bufer");
++		goto exit_0;
++	}
++
++	cst.state = 0;
++	cst.rep0 = cst.rep1 = cst.rep2 = cst.rep3 = 1;
++
++	wr.header = &header;
++	wr.flush = flush;
++	wr.global_pos = 0;
++	wr.previous_byte = 0;
++	wr.buffer_pos = 0;
++
++	rc_init(&rc, fill, inbuf, in_len);
++
++	for (i = 0; i < sizeof(header); i++) {
++		if (rc.ptr >= rc.buffer_end)
++			rc_read(&rc);
++		((unsigned char *)&header)[i] = *rc.ptr++;
++	}
++
++	if (header.pos >= (9 * 5 * 5))
++		error("bad header");
++
++	mi = 0;
++	lc = header.pos;
++	while (lc >= 9) {
++		mi++;
++		lc -= 9;
++	}
++	pb = 0;
++	lp = mi;
++	while (lp >= 5) {
++		pb++;
++		lp -= 5;
++	}
++	pos_state_mask = (1 << pb) - 1;
++	literal_pos_mask = (1 << lp) - 1;
++
++	ENDIAN_CONVERT(header.dict_size);
++	ENDIAN_CONVERT(header.dst_size);
++
++	if (header.dict_size == 0)
++		header.dict_size = 1;
++
++	if (output)
++		wr.buffer = output;
++	else {
++		wr.bufsize = MIN(header.dst_size, header.dict_size);
++		wr.buffer = large_malloc(wr.bufsize);
++	}
++	if (wr.buffer == NULL)
++		goto exit_1;
++
++	num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp));
++	p = (uint16_t *) large_malloc(num_probs * sizeof(*p));
++	if (p == 0)
++		goto exit_2;
++	num_probs = LZMA_LITERAL + (LZMA_LIT_SIZE << (lc + lp));
++	for (i = 0; i < num_probs; i++)
++		p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1;
++
++	rc_init_code(&rc);
++
++	while (get_pos(&wr) < header.dst_size) {
++		int pos_state =	get_pos(&wr) & pos_state_mask;
++		uint16_t *prob = p + LZMA_IS_MATCH +
++			(cst.state << LZMA_NUM_POS_BITS_MAX) + pos_state;
++		if (rc_is_bit_0(&rc, prob))
++			process_bit0(&wr, &rc, &cst, p, pos_state, prob,
++				     lc, literal_pos_mask);
++		else {
++			process_bit1(&wr, &rc, &cst, p, pos_state, prob);
++			if (cst.rep0 == 0)
++				break;
++		}
++	}
++
++	if (posp)
++		*posp = rc.ptr-rc.buffer;
++	if (wr.flush)
++		wr.flush(wr.buffer, wr.buffer_pos);
++	ret = 0;
++	large_free(p);
++exit_2:
++	if (!output)
++		large_free(wr.buffer);
++exit_1:
++	if (!buf)
++		free(inbuf);
++exit_0:
++	return ret;
++}
++
++#define decompress unlzma
+Index: linux-2.6-tip/lib/dma-debug.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/lib/dma-debug.c
+@@ -0,0 +1,955 @@
++/*
++ * Copyright (C) 2008 Advanced Micro Devices, Inc.
++ *
++ * Author: Joerg Roedel <joerg.roedel@amd.com>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 as published
++ * by the Free Software Foundation.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
++ */
++
++#include <linux/scatterlist.h>
++#include <linux/dma-mapping.h>
++#include <linux/stacktrace.h>
++#include <linux/dma-debug.h>
++#include <linux/spinlock.h>
++#include <linux/debugfs.h>
++#include <linux/device.h>
++#include <linux/types.h>
++#include <linux/sched.h>
++#include <linux/list.h>
++#include <linux/slab.h>
++
++#include <asm/sections.h>
++
++#define HASH_SIZE       1024ULL
++#define HASH_FN_SHIFT   13
++#define HASH_FN_MASK    (HASH_SIZE - 1)
++
++enum {
++	dma_debug_single,
++	dma_debug_page,
++	dma_debug_sg,
++	dma_debug_coherent,
++};
++
++#define DMA_DEBUG_STACKTRACE_ENTRIES 5
++
++struct dma_debug_entry {
++	struct list_head list;
++	struct device    *dev;
++	int              type;
++	phys_addr_t      paddr;
++	u64              dev_addr;
++	u64              size;
++	int              direction;
++	int		 sg_call_ents;
++	int		 sg_mapped_ents;
++#ifdef CONFIG_STACKTRACE
++	struct		 stack_trace stacktrace;
++	unsigned long	 st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
++#endif
++};
++
++struct hash_bucket {
++	struct list_head list;
++	spinlock_t lock;
++} ____cacheline_aligned_in_smp;
++
++/* Hash list to save the allocated dma addresses */
++static struct hash_bucket dma_entry_hash[HASH_SIZE];
++/* List of pre-allocated dma_debug_entry's */
++static LIST_HEAD(free_entries);
++/* Lock for the list above */
++static DEFINE_SPINLOCK(free_entries_lock);
++
++/* Global disable flag - will be set in case of an error */
++static bool global_disable __read_mostly;
++
++/* Global error count */
++static u32 error_count;
++
++/* Global error show enable*/
++static u32 show_all_errors __read_mostly;
++/* Number of errors to show */
++static u32 show_num_errors = 1;
++
++static u32 num_free_entries;
++static u32 min_free_entries;
++
++/* number of preallocated entries requested by kernel cmdline */
++static u32 req_entries;
++
++/* debugfs dentry's for the stuff above */
++static struct dentry *dma_debug_dent        __read_mostly;
++static struct dentry *global_disable_dent   __read_mostly;
++static struct dentry *error_count_dent      __read_mostly;
++static struct dentry *show_all_errors_dent  __read_mostly;
++static struct dentry *show_num_errors_dent  __read_mostly;
++static struct dentry *num_free_entries_dent __read_mostly;
++static struct dentry *min_free_entries_dent __read_mostly;
++
++static const char *type2name[4] = { "single", "page",
++				    "scather-gather", "coherent" };
++
++static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
++				   "DMA_FROM_DEVICE", "DMA_NONE" };
++
++/*
++ * The access to some variables in this macro is racy. We can't use atomic_t
++ * here because all these variables are exported to debugfs. Some of them even
++ * writeable. This is also the reason why a lock won't help much. But anyway,
++ * the races are no big deal. Here is why:
++ *
++ *   error_count: the addition is racy, but the worst thing that can happen is
++ *                that we don't count some errors
++ *   show_num_errors: the subtraction is racy. Also no big deal because in
++ *                    worst case this will result in one warning more in the
++ *                    system log than the user configured. This variable is
++ *                    writeable via debugfs.
++ */
++static inline void dump_entry_trace(struct dma_debug_entry *entry)
++{
++#ifdef CONFIG_STACKTRACE
++	if (entry) {
++		printk(KERN_WARNING "Mapped at:\n");
++		print_stack_trace(&entry->stacktrace, 0);
++	}
++#endif
++}
++
++#define err_printk(dev, entry, format, arg...) do {		\
++		error_count += 1;				\
++		if (show_all_errors || show_num_errors > 0) {	\
++			WARN(1, "%s %s: " format,		\
++			     dev_driver_string(dev),		\
++			     dev_name(dev) , ## arg);		\
++			dump_entry_trace(entry);		\
++		}						\
++		if (!show_all_errors && show_num_errors > 0)	\
++			show_num_errors -= 1;			\
++	} while (0);
++
++/*
++ * Hash related functions
++ *
++ * Every DMA-API request is saved into a struct dma_debug_entry. To
++ * have quick access to these structs they are stored into a hash.
++ */
++static int hash_fn(struct dma_debug_entry *entry)
++{
++	/*
++	 * Hash function is based on the dma address.
++	 * We use bits 20-27 here as the index into the hash
++	 */
++	return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
++}
++
++/*
++ * Request exclusive access to a hash bucket for a given dma_debug_entry.
++ */
++static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
++					   unsigned long *flags)
++{
++	int idx = hash_fn(entry);
++	unsigned long __flags;
++
++	spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
++	*flags = __flags;
++	return &dma_entry_hash[idx];
++}
++
++/*
++ * Give up exclusive access to the hash bucket
++ */
++static void put_hash_bucket(struct hash_bucket *bucket,
++			    unsigned long *flags)
++{
++	unsigned long __flags = *flags;
++
++	spin_unlock_irqrestore(&bucket->lock, __flags);
++}
++
++/*
++ * Search a given entry in the hash bucket list
++ */
++static struct dma_debug_entry *hash_bucket_find(struct hash_bucket *bucket,
++						struct dma_debug_entry *ref)
++{
++	struct dma_debug_entry *entry;
++
++	list_for_each_entry(entry, &bucket->list, list) {
++		if ((entry->dev_addr == ref->dev_addr) &&
++		    (entry->dev == ref->dev))
++			return entry;
++	}
++
++	return NULL;
++}
++
++/*
++ * Add an entry to a hash bucket
++ */
++static void hash_bucket_add(struct hash_bucket *bucket,
++			    struct dma_debug_entry *entry)
++{
++	list_add_tail(&entry->list, &bucket->list);
++}
++
++/*
++ * Remove entry from a hash bucket list
++ */
++static void hash_bucket_del(struct dma_debug_entry *entry)
++{
++	list_del(&entry->list);
++}
++
++/*
++ * Dump mapping entries for debugging purposes
++ */
++void debug_dma_dump_mappings(struct device *dev)
++{
++	int idx;
++
++	for (idx = 0; idx < HASH_SIZE; idx++) {
++		struct hash_bucket *bucket = &dma_entry_hash[idx];
++		struct dma_debug_entry *entry;
++		unsigned long flags;
++
++		spin_lock_irqsave(&bucket->lock, flags);
++
++		list_for_each_entry(entry, &bucket->list, list) {
++			if (!dev || dev == entry->dev) {
++				dev_info(entry->dev,
++					 "%s idx %d P=%Lx D=%Lx L=%Lx %s\n",
++					 type2name[entry->type], idx,
++					 (unsigned long long)entry->paddr,
++					 entry->dev_addr, entry->size,
++					 dir2name[entry->direction]);
++			}
++		}
++
++		spin_unlock_irqrestore(&bucket->lock, flags);
++	}
++}
++EXPORT_SYMBOL(debug_dma_dump_mappings);
++
++/*
++ * Wrapper function for adding an entry to the hash.
++ * This function takes care of locking itself.
++ */
++static void add_dma_entry(struct dma_debug_entry *entry)
++{
++	struct hash_bucket *bucket;
++	unsigned long flags;
++
++	bucket = get_hash_bucket(entry, &flags);
++	hash_bucket_add(bucket, entry);
++	put_hash_bucket(bucket, &flags);
++}
++
++/* struct dma_entry allocator
++ *
++ * The next two functions implement the allocator for
++ * struct dma_debug_entries.
++ */
++static struct dma_debug_entry *dma_entry_alloc(void)
++{
++	struct dma_debug_entry *entry = NULL;
++	unsigned long flags;
++
++	spin_lock_irqsave(&free_entries_lock, flags);
++
++	if (list_empty(&free_entries)) {
++		printk(KERN_ERR "DMA-API: debugging out of memory "
++				"- disabling\n");
++		global_disable = true;
++		goto out;
++	}
++
++	entry = list_entry(free_entries.next, struct dma_debug_entry, list);
++	list_del(&entry->list);
++	memset(entry, 0, sizeof(*entry));
++
++#ifdef CONFIG_STACKTRACE
++	entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
++	entry->stacktrace.entries = entry->st_entries;
++	entry->stacktrace.skip = 2;
++	save_stack_trace(&entry->stacktrace);
++#endif
++	num_free_entries -= 1;
++	if (num_free_entries < min_free_entries)
++		min_free_entries = num_free_entries;
++
++out:
++	spin_unlock_irqrestore(&free_entries_lock, flags);
++
++	return entry;
++}
++
++static void dma_entry_free(struct dma_debug_entry *entry)
++{
++	unsigned long flags;
++
++	/*
++	 * add to beginning of the list - this way the entries are
++	 * more likely cache hot when they are reallocated.
++	 */
++	spin_lock_irqsave(&free_entries_lock, flags);
++	list_add(&entry->list, &free_entries);
++	num_free_entries += 1;
++	spin_unlock_irqrestore(&free_entries_lock, flags);
++}
++
++/*
++ * DMA-API debugging init code
++ *
++ * The init code does two things:
++ *   1. Initialize core data structures
++ *   2. Preallocate a given number of dma_debug_entry structs
++ */
++
++static int prealloc_memory(u32 num_entries)
++{
++	struct dma_debug_entry *entry, *next_entry;
++	int i;
++
++	for (i = 0; i < num_entries; ++i) {
++		entry = kzalloc(sizeof(*entry), GFP_KERNEL);
++		if (!entry)
++			goto out_err;
++
++		list_add_tail(&entry->list, &free_entries);
++	}
++
++	num_free_entries = num_entries;
++	min_free_entries = num_entries;
++
++	printk(KERN_INFO "DMA-API: preallocated %d debug entries\n",
++			num_entries);
++
++	return 0;
++
++out_err:
++
++	list_for_each_entry_safe(entry, next_entry, &free_entries, list) {
++		list_del(&entry->list);
++		kfree(entry);
++	}
++
++	return -ENOMEM;
++}
++
++static int dma_debug_fs_init(void)
++{
++	dma_debug_dent = debugfs_create_dir("dma-api", NULL);
++	if (!dma_debug_dent) {
++		printk(KERN_ERR "DMA-API: can not create debugfs directory\n");
++		return -ENOMEM;
++	}
++
++	global_disable_dent = debugfs_create_bool("disabled", 0444,
++			dma_debug_dent,
++			(u32 *)&global_disable);
++	if (!global_disable_dent)
++		goto out_err;
++
++	error_count_dent = debugfs_create_u32("error_count", 0444,
++			dma_debug_dent, &error_count);
++	if (!error_count_dent)
++		goto out_err;
++
++	show_all_errors_dent = debugfs_create_u32("all_errors", 0644,
++			dma_debug_dent,
++			&show_all_errors);
++	if (!show_all_errors_dent)
++		goto out_err;
++
++	show_num_errors_dent = debugfs_create_u32("num_errors", 0644,
++			dma_debug_dent,
++			&show_num_errors);
++	if (!show_num_errors_dent)
++		goto out_err;
++
++	num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444,
++			dma_debug_dent,
++			&num_free_entries);
++	if (!num_free_entries_dent)
++		goto out_err;
++
++	min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444,
++			dma_debug_dent,
++			&min_free_entries);
++	if (!min_free_entries_dent)
++		goto out_err;
++
++	return 0;
++
++out_err:
++	debugfs_remove_recursive(dma_debug_dent);
++
++	return -ENOMEM;
++}
++
++static int device_dma_allocations(struct device *dev)
++{
++	struct dma_debug_entry *entry;
++	unsigned long flags;
++	int count = 0, i;
++
++	for (i = 0; i < HASH_SIZE; ++i) {
++		spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
++		list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
++			if (entry->dev == dev)
++				count += 1;
++		}
++		spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
++	}
++
++	return count;
++}
++
++static int dma_debug_device_change(struct notifier_block *nb,
++				    unsigned long action, void *data)
++{
++	struct device *dev = data;
++	int count;
++
++
++	switch (action) {
++	case BUS_NOTIFY_UNBIND_DRIVER:
++		count = device_dma_allocations(dev);
++		if (count == 0)
++			break;
++		err_printk(dev, NULL, "DMA-API: device driver has pending "
++				"DMA allocations while released from device "
++				"[count=%d]\n", count);
++		break;
++	default:
++		break;
++	}
++
++	return 0;
++}
++
++void dma_debug_add_bus(struct bus_type *bus)
++{
++	struct notifier_block *nb;
++
++	nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
++	if (nb == NULL) {
++		printk(KERN_ERR "dma_debug_add_bus: out of memory\n");
++		return;
++	}
++
++	nb->notifier_call = dma_debug_device_change;
++
++	bus_register_notifier(bus, nb);
++}
++
++/*
++ * Let the architectures decide how many entries should be preallocated.
++ */
++void dma_debug_init(u32 num_entries)
++{
++	int i;
++
++	if (global_disable)
++		return;
++
++	for (i = 0; i < HASH_SIZE; ++i) {
++		INIT_LIST_HEAD(&dma_entry_hash[i].list);
++		dma_entry_hash[i].lock = SPIN_LOCK_UNLOCKED;
++	}
++
++	if (dma_debug_fs_init() != 0) {
++		printk(KERN_ERR "DMA-API: error creating debugfs entries "
++				"- disabling\n");
++		global_disable = true;
++
++		return;
++	}
++
++	if (req_entries)
++		num_entries = req_entries;
++
++	if (prealloc_memory(num_entries) != 0) {
++		printk(KERN_ERR "DMA-API: debugging out of memory error "
++				"- disabled\n");
++		global_disable = true;
++
++		return;
++	}
++
++	printk(KERN_INFO "DMA-API: debugging enabled by kernel config\n");
++}
++
++static __init int dma_debug_cmdline(char *str)
++{
++	if (!str)
++		return -EINVAL;
++
++	if (strncmp(str, "off", 3) == 0) {
++		printk(KERN_INFO "DMA-API: debugging disabled on kernel "
++				 "command line\n");
++		global_disable = true;
++	}
++
++	return 0;
++}
++
++static __init int dma_debug_entries_cmdline(char *str)
++{
++	int res;
++
++	if (!str)
++		return -EINVAL;
++
++	res = get_option(&str, &req_entries);
++
++	if (!res)
++		req_entries = 0;
++
++	return 0;
++}
++
++__setup("dma_debug=", dma_debug_cmdline);
++__setup("dma_debug_entries=", dma_debug_entries_cmdline);
++
++static void check_unmap(struct dma_debug_entry *ref)
++{
++	struct dma_debug_entry *entry;
++	struct hash_bucket *bucket;
++	unsigned long flags;
++
++	if (dma_mapping_error(ref->dev, ref->dev_addr)) {
++		err_printk(ref->dev, NULL, "DMA-API: device driver tries "
++			   "to free an invalid DMA memory address\n");
++		return;
++	}
++
++	bucket = get_hash_bucket(ref, &flags);
++	entry = hash_bucket_find(bucket, ref);
++
++	if (!entry) {
++		err_printk(ref->dev, NULL, "DMA-API: device driver tries "
++			   "to free DMA memory it has not allocated "
++			   "[device address=0x%016llx] [size=%llu bytes]\n",
++			   ref->dev_addr, ref->size);
++		goto out;
++	}
++
++	if (ref->size != entry->size) {
++		err_printk(ref->dev, entry, "DMA-API: device driver frees "
++			   "DMA memory with different size "
++			   "[device address=0x%016llx] [map size=%llu bytes] "
++			   "[unmap size=%llu bytes]\n",
++			   ref->dev_addr, entry->size, ref->size);
++	}
++
++	if (ref->type != entry->type) {
++		err_printk(ref->dev, entry, "DMA-API: device driver frees "
++			   "DMA memory with wrong function "
++			   "[device address=0x%016llx] [size=%llu bytes] "
++			   "[mapped as %s] [unmapped as %s]\n",
++			   ref->dev_addr, ref->size,
++			   type2name[entry->type], type2name[ref->type]);
++	} else if ((entry->type == dma_debug_coherent) &&
++		   (ref->paddr != entry->paddr)) {
++		err_printk(ref->dev, entry, "DMA-API: device driver frees "
++			   "DMA memory with different CPU address "
++			   "[device address=0x%016llx] [size=%llu bytes] "
++			   "[cpu alloc address=%p] [cpu free address=%p]",
++			   ref->dev_addr, ref->size,
++			   (void *)entry->paddr, (void *)ref->paddr);
++	}
++
++	if (ref->sg_call_ents && ref->type == dma_debug_sg &&
++	    ref->sg_call_ents != entry->sg_call_ents) {
++		err_printk(ref->dev, entry, "DMA-API: device driver frees "
++			   "DMA sg list with different entry count "
++			   "[map count=%d] [unmap count=%d]\n",
++			   entry->sg_call_ents, ref->sg_call_ents);
++	}
++
++	/*
++	 * This may be no bug in reality - but most implementations of the
++	 * DMA API don't handle this properly, so check for it here
++	 */
++	if (ref->direction != entry->direction) {
++		err_printk(ref->dev, entry, "DMA-API: device driver frees "
++			   "DMA memory with different direction "
++			   "[device address=0x%016llx] [size=%llu bytes] "
++			   "[mapped with %s] [unmapped with %s]\n",
++			   ref->dev_addr, ref->size,
++			   dir2name[entry->direction],
++			   dir2name[ref->direction]);
++	}
++
++	hash_bucket_del(entry);
++	dma_entry_free(entry);
++
++out:
++	put_hash_bucket(bucket, &flags);
++}
++
++static void check_for_stack(struct device *dev, void *addr)
++{
++	if (object_is_on_stack(addr))
++		err_printk(dev, NULL, "DMA-API: device driver maps memory from"
++				"stack [addr=%p]\n", addr);
++}
++
++static inline bool overlap(void *addr, u64 size, void *start, void *end)
++{
++	void *addr2 = (char *)addr + size;
++
++	return ((addr >= start && addr < end) ||
++		(addr2 >= start && addr2 < end) ||
++		((addr < start) && (addr2 >= end)));
++}
++
++static void check_for_illegal_area(struct device *dev, void *addr, u64 size)
++{
++	if (overlap(addr, size, _text, _etext) ||
++	    overlap(addr, size, __start_rodata, __end_rodata))
++		err_printk(dev, NULL, "DMA-API: device driver maps "
++				"memory from kernel text or rodata "
++				"[addr=%p] [size=%llu]\n", addr, size);
++}
++
++static void check_sync(struct device *dev, dma_addr_t addr,
++		       u64 size, u64 offset, int direction, bool to_cpu)
++{
++	struct dma_debug_entry ref = {
++		.dev            = dev,
++		.dev_addr       = addr,
++		.size           = size,
++		.direction      = direction,
++	};
++	struct dma_debug_entry *entry;
++	struct hash_bucket *bucket;
++	unsigned long flags;
++
++	bucket = get_hash_bucket(&ref, &flags);
++
++	entry = hash_bucket_find(bucket, &ref);
++
++	if (!entry) {
++		err_printk(dev, NULL, "DMA-API: device driver tries "
++				"to sync DMA memory it has not allocated "
++				"[device address=0x%016llx] [size=%llu bytes]\n",
++				addr, size);
++		goto out;
++	}
++
++	if ((offset + size) > entry->size) {
++		err_printk(dev, entry, "DMA-API: device driver syncs"
++				" DMA memory outside allocated range "
++				"[device address=0x%016llx] "
++				"[allocation size=%llu bytes] [sync offset=%llu] "
++				"[sync size=%llu]\n", entry->dev_addr, entry->size,
++				offset, size);
++	}
++
++	if (direction != entry->direction) {
++		err_printk(dev, entry, "DMA-API: device driver syncs "
++				"DMA memory with different direction "
++				"[device address=0x%016llx] [size=%llu bytes] "
++				"[mapped with %s] [synced with %s]\n",
++				addr, entry->size,
++				dir2name[entry->direction],
++				dir2name[direction]);
++	}
++
++	if (entry->direction == DMA_BIDIRECTIONAL)
++		goto out;
++
++	if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
++		      !(direction == DMA_TO_DEVICE))
++		err_printk(dev, entry, "DMA-API: device driver syncs "
++				"device read-only DMA memory for cpu "
++				"[device address=0x%016llx] [size=%llu bytes] "
++				"[mapped with %s] [synced with %s]\n",
++				addr, entry->size,
++				dir2name[entry->direction],
++				dir2name[direction]);
++
++	if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
++		       !(direction == DMA_FROM_DEVICE))
++		err_printk(dev, entry, "DMA-API: device driver syncs "
++				"device write-only DMA memory to device "
++				"[device address=0x%016llx] [size=%llu bytes] "
++				"[mapped with %s] [synced with %s]\n",
++				addr, entry->size,
++				dir2name[entry->direction],
++				dir2name[direction]);
++
++out:
++	put_hash_bucket(bucket, &flags);
++
++}
++
++void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
++			size_t size, int direction, dma_addr_t dma_addr,
++			bool map_single)
++{
++	struct dma_debug_entry *entry;
++
++	if (unlikely(global_disable))
++		return;
++
++	if (unlikely(dma_mapping_error(dev, dma_addr)))
++		return;
++
++	entry = dma_entry_alloc();
++	if (!entry)
++		return;
++
++	entry->dev       = dev;
++	entry->type      = dma_debug_page;
++	entry->paddr     = page_to_phys(page) + offset;
++	entry->dev_addr  = dma_addr;
++	entry->size      = size;
++	entry->direction = direction;
++
++	if (map_single)
++		entry->type = dma_debug_single;
++
++	if (!PageHighMem(page)) {
++		void *addr = ((char *)page_address(page)) + offset;
++		check_for_stack(dev, addr);
++		check_for_illegal_area(dev, addr, size);
++	}
++
++	add_dma_entry(entry);
++}
++EXPORT_SYMBOL(debug_dma_map_page);
++
++void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
++			  size_t size, int direction, bool map_single)
++{
++	struct dma_debug_entry ref = {
++		.type           = dma_debug_page,
++		.dev            = dev,
++		.dev_addr       = addr,
++		.size           = size,
++		.direction      = direction,
++	};
++
++	if (unlikely(global_disable))
++		return;
++
++	if (map_single)
++		ref.type = dma_debug_single;
++
++	check_unmap(&ref);
++}
++EXPORT_SYMBOL(debug_dma_unmap_page);
++
++void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
++		      int nents, int mapped_ents, int direction)
++{
++	struct dma_debug_entry *entry;
++	struct scatterlist *s;
++	int i;
++
++	if (unlikely(global_disable))
++		return;
++
++	for_each_sg(sg, s, mapped_ents, i) {
++		entry = dma_entry_alloc();
++		if (!entry)
++			return;
++
++		entry->type           = dma_debug_sg;
++		entry->dev            = dev;
++		entry->paddr          = sg_phys(s);
++		entry->size           = s->length;
++		entry->dev_addr       = s->dma_address;
++		entry->direction      = direction;
++		entry->sg_call_ents   = nents;
++		entry->sg_mapped_ents = mapped_ents;
++
++		if (!PageHighMem(sg_page(s))) {
++			check_for_stack(dev, sg_virt(s));
++			check_for_illegal_area(dev, sg_virt(s), s->length);
++		}
++
++		add_dma_entry(entry);
++	}
++}
++EXPORT_SYMBOL(debug_dma_map_sg);
++
++void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
++			int nelems, int dir)
++{
++	struct dma_debug_entry *entry;
++	struct scatterlist *s;
++	int mapped_ents = 0, i;
++	unsigned long flags;
++
++	if (unlikely(global_disable))
++		return;
++
++	for_each_sg(sglist, s, nelems, i) {
++
++		struct dma_debug_entry ref = {
++			.type           = dma_debug_sg,
++			.dev            = dev,
++			.paddr          = sg_phys(s),
++			.dev_addr       = s->dma_address,
++			.size           = s->length,
++			.direction      = dir,
++			.sg_call_ents   = 0,
++		};
++
++		if (mapped_ents && i >= mapped_ents)
++			break;
++
++		if (mapped_ents == 0) {
++			struct hash_bucket *bucket;
++			ref.sg_call_ents = nelems;
++			bucket = get_hash_bucket(&ref, &flags);
++			entry = hash_bucket_find(bucket, &ref);
++			if (entry)
++				mapped_ents = entry->sg_mapped_ents;
++			put_hash_bucket(bucket, &flags);
++		}
++
++		check_unmap(&ref);
++	}
++}
++EXPORT_SYMBOL(debug_dma_unmap_sg);
++
++void debug_dma_alloc_coherent(struct device *dev, size_t size,
++			      dma_addr_t dma_addr, void *virt)
++{
++	struct dma_debug_entry *entry;
++
++	if (unlikely(global_disable))
++		return;
++
++	if (unlikely(virt == NULL))
++		return;
++
++	entry = dma_entry_alloc();
++	if (!entry)
++		return;
++
++	entry->type      = dma_debug_coherent;
++	entry->dev       = dev;
++	entry->paddr     = virt_to_phys(virt);
++	entry->size      = size;
++	entry->dev_addr  = dma_addr;
++	entry->direction = DMA_BIDIRECTIONAL;
++
++	add_dma_entry(entry);
++}
++EXPORT_SYMBOL(debug_dma_alloc_coherent);
++
++void debug_dma_free_coherent(struct device *dev, size_t size,
++			 void *virt, dma_addr_t addr)
++{
++	struct dma_debug_entry ref = {
++		.type           = dma_debug_coherent,
++		.dev            = dev,
++		.paddr          = virt_to_phys(virt),
++		.dev_addr       = addr,
++		.size           = size,
++		.direction      = DMA_BIDIRECTIONAL,
++	};
++
++	if (unlikely(global_disable))
++		return;
++
++	check_unmap(&ref);
++}
++EXPORT_SYMBOL(debug_dma_free_coherent);
++
++void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
++				   size_t size, int direction)
++{
++	if (unlikely(global_disable))
++		return;
++
++	check_sync(dev, dma_handle, size, 0, direction, true);
++}
++EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
++
++void debug_dma_sync_single_for_device(struct device *dev,
++				      dma_addr_t dma_handle, size_t size,
++				      int direction)
++{
++	if (unlikely(global_disable))
++		return;
++
++	check_sync(dev, dma_handle, size, 0, direction, false);
++}
++EXPORT_SYMBOL(debug_dma_sync_single_for_device);
++
++void debug_dma_sync_single_range_for_cpu(struct device *dev,
++					 dma_addr_t dma_handle,
++					 unsigned long offset, size_t size,
++					 int direction)
++{
++	if (unlikely(global_disable))
++		return;
++
++	check_sync(dev, dma_handle, size, offset, direction, true);
++}
++EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
++
++void debug_dma_sync_single_range_for_device(struct device *dev,
++					    dma_addr_t dma_handle,
++					    unsigned long offset,
++					    size_t size, int direction)
++{
++	if (unlikely(global_disable))
++		return;
++
++	check_sync(dev, dma_handle, size, offset, direction, false);
++}
++EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
++
++void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
++			       int nelems, int direction)
++{
++	struct scatterlist *s;
++	int i;
++
++	if (unlikely(global_disable))
++		return;
++
++	for_each_sg(sg, s, nelems, i) {
++		check_sync(dev, s->dma_address, s->dma_length, 0,
++				direction, true);
++	}
++}
++EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
++
++void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
++				  int nelems, int direction)
++{
++	struct scatterlist *s;
++	int i;
++
++	if (unlikely(global_disable))
++		return;
++
++	for_each_sg(sg, s, nelems, i) {
++		check_sync(dev, s->dma_address, s->dma_length, 0,
++				direction, false);
++	}
++}
++EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
++
+Index: linux-2.6-tip/lib/kernel_lock.c
+===================================================================
+--- linux-2.6-tip.orig/lib/kernel_lock.c
++++ linux-2.6-tip/lib/kernel_lock.c
+@@ -11,121 +11,90 @@
+ #include <linux/semaphore.h>
+ 
+ /*
+- * The 'big kernel lock'
++ * The 'big kernel semaphore'
+  *
+- * This spinlock is taken and released recursively by lock_kernel()
++ * This mutex is taken and released recursively by lock_kernel()
+  * and unlock_kernel().  It is transparently dropped and reacquired
+  * over schedule().  It is used to protect legacy code that hasn't
+  * been migrated to a proper locking design yet.
+  *
++ * Note: code locked by this semaphore will only be serialized against
++ * other code using the same locking facility. The code guarantees that
++ * the task remains on the same CPU.
++ *
+  * Don't use in new code.
+  */
+-static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kernel_flag);
+-
++DECLARE_MUTEX(kernel_sem);
+ 
+ /*
+- * Acquire/release the underlying lock from the scheduler.
++ * Re-acquire the kernel semaphore.
+  *
+- * This is called with preemption disabled, and should
+- * return an error value if it cannot get the lock and
+- * TIF_NEED_RESCHED gets set.
++ * This function is called with preemption off.
+  *
+- * If it successfully gets the lock, it should increment
+- * the preemption count like any spinlock does.
++ * We are executing in schedule() so the code must be extremely careful
++ * about recursion, both due to the down() and due to the enabling of
++ * preemption. schedule() will re-check the preemption flag after
++ * reacquiring the semaphore.
+  *
+- * (This works on UP too - _raw_spin_trylock will never
+- * return false in that case)
++ * Called with interrupts disabled.
+  */
+ int __lockfunc __reacquire_kernel_lock(void)
+ {
+-	while (!_raw_spin_trylock(&kernel_flag)) {
+-		if (test_thread_flag(TIF_NEED_RESCHED))
+-			return -EAGAIN;
+-		cpu_relax();
+-	}
+-	preempt_disable();
++	struct task_struct *task = current;
++	int saved_lock_depth = task->lock_depth;
++
++	local_irq_enable();
++	BUG_ON(saved_lock_depth < 0);
++
++	task->lock_depth = -1;
++
++	down(&kernel_sem);
++
++	task->lock_depth = saved_lock_depth;
++
++	local_irq_disable();
++
+ 	return 0;
+ }
+ 
+ void __lockfunc __release_kernel_lock(void)
+ {
+-	_raw_spin_unlock(&kernel_flag);
+-	preempt_enable_no_resched();
++	up(&kernel_sem);
+ }
+ 
+ /*
+- * These are the BKL spinlocks - we try to be polite about preemption.
+- * If SMP is not on (ie UP preemption), this all goes away because the
+- * _raw_spin_trylock() will always succeed.
++ * Getting the big kernel semaphore.
+  */
+-#ifdef CONFIG_PREEMPT
+-static inline void __lock_kernel(void)
++void __lockfunc lock_kernel(void)
+ {
+-	preempt_disable();
+-	if (unlikely(!_raw_spin_trylock(&kernel_flag))) {
+-		/*
+-		 * If preemption was disabled even before this
+-		 * was called, there's nothing we can be polite
+-		 * about - just spin.
+-		 */
+-		if (preempt_count() > 1) {
+-			_raw_spin_lock(&kernel_flag);
+-			return;
+-		}
++	struct task_struct *task = current;
++	int depth = task->lock_depth + 1;
+ 
++	if (likely(!depth)) {
+ 		/*
+-		 * Otherwise, let's wait for the kernel lock
+-		 * with preemption enabled..
++		 * No recursion worries - we set up lock_depth _after_
+ 		 */
+-		do {
+-			preempt_enable();
+-			while (spin_is_locked(&kernel_flag))
+-				cpu_relax();
+-			preempt_disable();
+-		} while (!_raw_spin_trylock(&kernel_flag));
++		down(&kernel_sem);
++#ifdef CONFIG_DEBUG_RT_MUTEXES
++		current->last_kernel_lock = __builtin_return_address(0);
++#endif
+ 	}
+-}
+ 
+-#else
+-
+-/*
+- * Non-preemption case - just get the spinlock
+- */
+-static inline void __lock_kernel(void)
+-{
+-	_raw_spin_lock(&kernel_flag);
++	task->lock_depth = depth;
+ }
+-#endif
+ 
+-static inline void __unlock_kernel(void)
++void __lockfunc unlock_kernel(void)
+ {
+-	/*
+-	 * the BKL is not covered by lockdep, so we open-code the
+-	 * unlocking sequence (and thus avoid the dep-chain ops):
+-	 */
+-	_raw_spin_unlock(&kernel_flag);
+-	preempt_enable();
+-}
++	struct task_struct *task = current;
+ 
+-/*
+- * Getting the big kernel lock.
+- *
+- * This cannot happen asynchronously, so we only need to
+- * worry about other CPU's.
+- */
+-void __lockfunc lock_kernel(void)
+-{
+-	int depth = current->lock_depth+1;
+-	if (likely(!depth))
+-		__lock_kernel();
+-	current->lock_depth = depth;
+-}
++	BUG_ON(task->lock_depth < 0);
+ 
+-void __lockfunc unlock_kernel(void)
+-{
+-	BUG_ON(current->lock_depth < 0);
+-	if (likely(--current->lock_depth < 0))
+-		__unlock_kernel();
++	if (likely(--task->lock_depth == -1)) {
++#ifdef CONFIG_DEBUG_RT_MUTEXES
++		current->last_kernel_lock = NULL;
++#endif
++		up(&kernel_sem);
++	}
+ }
+ 
+ EXPORT_SYMBOL(lock_kernel);
+Index: linux-2.6-tip/lib/locking-selftest.c
+===================================================================
+--- linux-2.6-tip.orig/lib/locking-selftest.c
++++ linux-2.6-tip/lib/locking-selftest.c
+@@ -157,11 +157,11 @@ static void init_shared_classes(void)
+ #define SOFTIRQ_ENTER()				\
+ 		local_bh_disable();		\
+ 		local_irq_disable();		\
+-		trace_softirq_enter();		\
+-		WARN_ON(!in_softirq());
++		lockdep_softirq_enter();	\
++		/* FIXME: preemptible softirqs. WARN_ON(!in_softirq()); */
+ 
+ #define SOFTIRQ_EXIT()				\
+-		trace_softirq_exit();		\
++		lockdep_softirq_exit();		\
+ 		local_irq_enable();		\
+ 		local_bh_enable();
+ 
+@@ -550,6 +550,11 @@ GENERATE_TESTCASE(init_held_rsem)
+ #undef E
+ 
+ /*
++ * FIXME: turns these into raw-spinlock tests on -rt
++ */
++#ifndef CONFIG_PREEMPT_RT
++
++/*
+  * locking an irq-safe lock with irqs enabled:
+  */
+ #define E1()				\
+@@ -890,6 +895,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_
+ #include "locking-selftest-softirq.h"
+ // GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)
+ 
++#endif /* !CONFIG_PREEMPT_RT */
++
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ # define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
+ # define I_RWLOCK(x)	lockdep_reset_lock(&rwlock_##x.dep_map)
+@@ -940,6 +947,9 @@ static void dotest(void (*testcase_fn)(v
+ {
+ 	unsigned long saved_preempt_count = preempt_count();
+ 	int expected_failure = 0;
++#if defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_DEBUG_RT_MUTEXES)
++	long saved_lock_count = atomic_read(&current->lock_count);
++#endif
+ 
+ 	WARN_ON(irqs_disabled());
+ 
+@@ -989,6 +999,9 @@ static void dotest(void (*testcase_fn)(v
+ #endif
+ 
+ 	reset_locks();
++#if defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_DEBUG_RT_MUTEXES)
++        atomic_set(&current->lock_count, saved_lock_count);
++#endif
+ }
+ 
+ static inline void print_testname(const char *testname)
+@@ -998,7 +1011,7 @@ static inline void print_testname(const 
+ 
+ #define DO_TESTCASE_1(desc, name, nr)				\
+ 	print_testname(desc"/"#nr);				\
+-	dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);		\
++	dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
+ 	printk("\n");
+ 
+ #define DO_TESTCASE_1B(desc, name, nr)				\
+@@ -1006,17 +1019,17 @@ static inline void print_testname(const 
+ 	dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
+ 	printk("\n");
+ 
+-#define DO_TESTCASE_3(desc, name, nr)				\
+-	print_testname(desc"/"#nr);				\
+-	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);	\
+-	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
++#define DO_TESTCASE_3(desc, name, nr)					\
++	print_testname(desc"/"#nr);					\
++	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);		\
++	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
+ 	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
+ 	printk("\n");
+ 
+-#define DO_TESTCASE_3RW(desc, name, nr)				\
+-	print_testname(desc"/"#nr);				\
++#define DO_TESTCASE_3RW(desc, name, nr)					\
++	print_testname(desc"/"#nr);					\
+ 	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN|LOCKTYPE_RWLOCK);\
+-	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
++	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
+ 	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
+ 	printk("\n");
+ 
+@@ -1047,7 +1060,7 @@ static inline void print_testname(const 
+ 	print_testname(desc);					\
+ 	dotest(name##_spin, FAILURE, LOCKTYPE_SPIN);		\
+ 	dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK);		\
+-	dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK);		\
++	dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK);	\
+ 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
+ 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
+ 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
+@@ -1179,6 +1192,7 @@ void locking_selftest(void)
+ 	/*
+ 	 * irq-context testcases:
+ 	 */
++#ifndef CONFIG_PREEMPT_RT
+ 	DO_TESTCASE_2x6("irqs-on + irq-safe-A", irqsafe1);
+ 	DO_TESTCASE_2x3("sirq-safe-A => hirqs-on", irqsafe2A);
+ 	DO_TESTCASE_2x6("safe-A + irqs-on", irqsafe2B);
+@@ -1188,6 +1202,7 @@ void locking_selftest(void)
+ 
+ 	DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
+ //	DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
++#endif
+ 
+ 	if (unexpected_testcase_failures) {
+ 		printk("-----------------------------------------------------------------\n");
+Index: linux-2.6-tip/lib/swiotlb.c
+===================================================================
+--- linux-2.6-tip.orig/lib/swiotlb.c
++++ linux-2.6-tip/lib/swiotlb.c
+@@ -145,7 +145,7 @@ static void *swiotlb_bus_to_virt(dma_add
+ 	return phys_to_virt(swiotlb_bus_to_phys(address));
+ }
+ 
+-int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
++int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
+ {
+ 	return 0;
+ }
+@@ -315,9 +315,9 @@ address_needs_mapping(struct device *hwd
+ 	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
+ }
+ 
+-static inline int range_needs_mapping(void *ptr, size_t size)
++static inline int range_needs_mapping(phys_addr_t paddr, size_t size)
+ {
+-	return swiotlb_force || swiotlb_arch_range_needs_mapping(ptr, size);
++	return swiotlb_force || swiotlb_arch_range_needs_mapping(paddr, size);
+ }
+ 
+ static int is_swiotlb_buffer(char *addr)
+@@ -636,11 +636,14 @@ swiotlb_full(struct device *dev, size_t 
+  * Once the device is given the dma address, the device owns this memory until
+  * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+  */
+-dma_addr_t
+-swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
+-			 int dir, struct dma_attrs *attrs)
+-{
+-	dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, ptr);
++dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
++			    unsigned long offset, size_t size,
++			    enum dma_data_direction dir,
++			    struct dma_attrs *attrs)
++{
++	phys_addr_t phys = page_to_phys(page) + offset;
++	void *ptr = page_address(page) + offset;
++	dma_addr_t dev_addr = swiotlb_phys_to_bus(dev, phys);
+ 	void *map;
+ 
+ 	BUG_ON(dir == DMA_NONE);
+@@ -649,37 +652,30 @@ swiotlb_map_single_attrs(struct device *
+ 	 * we can safely return the device addr and not worry about bounce
+ 	 * buffering it.
+ 	 */
+-	if (!address_needs_mapping(hwdev, dev_addr, size) &&
+-	    !range_needs_mapping(ptr, size))
++	if (!address_needs_mapping(dev, dev_addr, size) &&
++	    !range_needs_mapping(virt_to_phys(ptr), size))
+ 		return dev_addr;
+ 
+ 	/*
+ 	 * Oh well, have to allocate and map a bounce buffer.
+ 	 */
+-	map = map_single(hwdev, virt_to_phys(ptr), size, dir);
++	map = map_single(dev, phys, size, dir);
+ 	if (!map) {
+-		swiotlb_full(hwdev, size, dir, 1);
++		swiotlb_full(dev, size, dir, 1);
+ 		map = io_tlb_overflow_buffer;
+ 	}
+ 
+-	dev_addr = swiotlb_virt_to_bus(hwdev, map);
++	dev_addr = swiotlb_virt_to_bus(dev, map);
+ 
+ 	/*
+ 	 * Ensure that the address returned is DMA'ble
+ 	 */
+-	if (address_needs_mapping(hwdev, dev_addr, size))
++	if (address_needs_mapping(dev, dev_addr, size))
+ 		panic("map_single: bounce buffer is not DMA'ble");
+ 
+ 	return dev_addr;
+ }
+-EXPORT_SYMBOL(swiotlb_map_single_attrs);
+-
+-dma_addr_t
+-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
+-{
+-	return swiotlb_map_single_attrs(hwdev, ptr, size, dir, NULL);
+-}
+-EXPORT_SYMBOL(swiotlb_map_single);
++EXPORT_SYMBOL_GPL(swiotlb_map_page);
+ 
+ /*
+  * Unmap a single streaming mode DMA translation.  The dma_addr and size must
+@@ -689,9 +685,9 @@ EXPORT_SYMBOL(swiotlb_map_single);
+  * After this call, reads by the cpu to the buffer are guaranteed to see
+  * whatever the device wrote there.
+  */
+-void
+-swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
+-			   size_t size, int dir, struct dma_attrs *attrs)
++void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
++			size_t size, enum dma_data_direction dir,
++			struct dma_attrs *attrs)
+ {
+ 	char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+ 
+@@ -701,15 +697,7 @@ swiotlb_unmap_single_attrs(struct device
+ 	else if (dir == DMA_FROM_DEVICE)
+ 		dma_mark_clean(dma_addr, size);
+ }
+-EXPORT_SYMBOL(swiotlb_unmap_single_attrs);
+-
+-void
+-swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
+-		     int dir)
+-{
+-	return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL);
+-}
+-EXPORT_SYMBOL(swiotlb_unmap_single);
++EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
+ 
+ /*
+  * Make physical memory consistent for a single streaming mode DMA translation
+@@ -736,7 +724,7 @@ swiotlb_sync_single(struct device *hwdev
+ 
+ void
+ swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+-			    size_t size, int dir)
++			    size_t size, enum dma_data_direction dir)
+ {
+ 	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
+ }
+@@ -744,7 +732,7 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_cp
+ 
+ void
+ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+-			       size_t size, int dir)
++			       size_t size, enum dma_data_direction dir)
+ {
+ 	swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
+ }
+@@ -769,7 +757,8 @@ swiotlb_sync_single_range(struct device 
+ 
+ void
+ swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+-				  unsigned long offset, size_t size, int dir)
++				  unsigned long offset, size_t size,
++				  enum dma_data_direction dir)
+ {
+ 	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
+ 				  SYNC_FOR_CPU);
+@@ -778,7 +767,8 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra
+ 
+ void
+ swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
+-				     unsigned long offset, size_t size, int dir)
++				     unsigned long offset, size_t size,
++				     enum dma_data_direction dir)
+ {
+ 	swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
+ 				  SYNC_FOR_DEVICE);
+@@ -803,7 +793,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra
+  */
+ int
+ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
+-		     int dir, struct dma_attrs *attrs)
++		     enum dma_data_direction dir, struct dma_attrs *attrs)
+ {
+ 	struct scatterlist *sg;
+ 	int i;
+@@ -811,10 +801,10 @@ swiotlb_map_sg_attrs(struct device *hwde
+ 	BUG_ON(dir == DMA_NONE);
+ 
+ 	for_each_sg(sgl, sg, nelems, i) {
+-		void *addr = sg_virt(sg);
+-		dma_addr_t dev_addr = swiotlb_virt_to_bus(hwdev, addr);
++		phys_addr_t paddr = sg_phys(sg);
++		dma_addr_t dev_addr = swiotlb_phys_to_bus(hwdev, paddr);
+ 
+-		if (range_needs_mapping(addr, sg->length) ||
++		if (range_needs_mapping(paddr, sg->length) ||
+ 		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
+ 			void *map = map_single(hwdev, sg_phys(sg),
+ 					       sg->length, dir);
+@@ -850,7 +840,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
+  */
+ void
+ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
+-		       int nelems, int dir, struct dma_attrs *attrs)
++		       int nelems, enum dma_data_direction dir, struct dma_attrs *attrs)
+ {
+ 	struct scatterlist *sg;
+ 	int i;
+@@ -858,11 +848,11 @@ swiotlb_unmap_sg_attrs(struct device *hw
+ 	BUG_ON(dir == DMA_NONE);
+ 
+ 	for_each_sg(sgl, sg, nelems, i) {
+-		if (sg->dma_address != swiotlb_virt_to_bus(hwdev, sg_virt(sg)))
++		if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
+ 			unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
+ 				     sg->dma_length, dir);
+ 		else if (dir == DMA_FROM_DEVICE)
+-			dma_mark_clean(sg_virt(sg), sg->dma_length);
++			dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
+ 	}
+ }
+ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
+@@ -892,17 +882,17 @@ swiotlb_sync_sg(struct device *hwdev, st
+ 	BUG_ON(dir == DMA_NONE);
+ 
+ 	for_each_sg(sgl, sg, nelems, i) {
+-		if (sg->dma_address != swiotlb_virt_to_bus(hwdev, sg_virt(sg)))
++		if (sg->dma_address != swiotlb_phys_to_bus(hwdev, sg_phys(sg)))
+ 			sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
+ 				    sg->dma_length, dir, target);
+ 		else if (dir == DMA_FROM_DEVICE)
+-			dma_mark_clean(sg_virt(sg), sg->dma_length);
++			dma_mark_clean(swiotlb_bus_to_virt(sg->dma_address), sg->dma_length);
+ 	}
+ }
+ 
+ void
+ swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+-			int nelems, int dir)
++			int nelems, enum dma_data_direction dir)
+ {
+ 	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
+ }
+@@ -910,7 +900,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
+ 
+ void
+ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+-			   int nelems, int dir)
++			   int nelems, enum dma_data_direction dir)
+ {
+ 	swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
+ }
+Index: linux-2.6-tip/lib/vsprintf.c
+===================================================================
+--- linux-2.6-tip.orig/lib/vsprintf.c
++++ linux-2.6-tip/lib/vsprintf.c
+@@ -396,7 +396,38 @@ static noinline char* put_dec(char *buf,
+ #define SMALL	32		/* Must be 32 == 0x20 */
+ #define SPECIAL	64		/* 0x */
+ 
+-static char *number(char *buf, char *end, unsigned long long num, int base, int size, int precision, int type)
++enum format_type {
++	FORMAT_TYPE_NONE, /* Just a string part */
++	FORMAT_TYPE_WIDTH,
++	FORMAT_TYPE_PRECISION,
++	FORMAT_TYPE_CHAR,
++	FORMAT_TYPE_STR,
++	FORMAT_TYPE_PTR,
++	FORMAT_TYPE_PERCENT_CHAR,
++	FORMAT_TYPE_INVALID,
++	FORMAT_TYPE_LONG_LONG,
++	FORMAT_TYPE_ULONG,
++	FORMAT_TYPE_LONG,
++	FORMAT_TYPE_USHORT,
++	FORMAT_TYPE_SHORT,
++	FORMAT_TYPE_UINT,
++	FORMAT_TYPE_INT,
++	FORMAT_TYPE_NRCHARS,
++	FORMAT_TYPE_SIZE_T,
++	FORMAT_TYPE_PTRDIFF
++};
++
++struct printf_spec {
++	enum format_type	type;
++	int			flags;		/* flags to number() */
++	int			field_width;	/* width of output field */
++	int			base;
++	int			precision;	/* # of digits/chars */
++	int			qualifier;
++};
++
++static char *number(char *buf, char *end, unsigned long long num,
++			struct printf_spec spec)
+ {
+ 	/* we are called with base 8, 10 or 16, only, thus don't need "G..."  */
+ 	static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+@@ -404,32 +435,32 @@ static char *number(char *buf, char *end
+ 	char tmp[66];
+ 	char sign;
+ 	char locase;
+-	int need_pfx = ((type & SPECIAL) && base != 10);
++	int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
+ 	int i;
+ 
+ 	/* locase = 0 or 0x20. ORing digits or letters with 'locase'
+ 	 * produces same digits or (maybe lowercased) letters */
+-	locase = (type & SMALL);
+-	if (type & LEFT)
+-		type &= ~ZEROPAD;
++	locase = (spec.flags & SMALL);
++	if (spec.flags & LEFT)
++		spec.flags &= ~ZEROPAD;
+ 	sign = 0;
+-	if (type & SIGN) {
++	if (spec.flags & SIGN) {
+ 		if ((signed long long) num < 0) {
+ 			sign = '-';
+ 			num = - (signed long long) num;
+-			size--;
+-		} else if (type & PLUS) {
++			spec.field_width--;
++		} else if (spec.flags & PLUS) {
+ 			sign = '+';
+-			size--;
+-		} else if (type & SPACE) {
++			spec.field_width--;
++		} else if (spec.flags & SPACE) {
+ 			sign = ' ';
+-			size--;
++			spec.field_width--;
+ 		}
+ 	}
+ 	if (need_pfx) {
+-		size--;
+-		if (base == 16)
+-			size--;
++		spec.field_width--;
++		if (spec.base == 16)
++			spec.field_width--;
+ 	}
+ 
+ 	/* generate full string in tmp[], in reverse order */
+@@ -441,10 +472,10 @@ static char *number(char *buf, char *end
+ 		tmp[i++] = (digits[do_div(num,base)] | locase);
+ 	} while (num != 0);
+ 	*/
+-	else if (base != 10) { /* 8 or 16 */
+-		int mask = base - 1;
++	else if (spec.base != 10) { /* 8 or 16 */
++		int mask = spec.base - 1;
+ 		int shift = 3;
+-		if (base == 16) shift = 4;
++		if (spec.base == 16) shift = 4;
+ 		do {
+ 			tmp[i++] = (digits[((unsigned char)num) & mask] | locase);
+ 			num >>= shift;
+@@ -454,12 +485,12 @@ static char *number(char *buf, char *end
+ 	}
+ 
+ 	/* printing 100 using %2d gives "100", not "00" */
+-	if (i > precision)
+-		precision = i;
++	if (i > spec.precision)
++		spec.precision = i;
+ 	/* leading space padding */
+-	size -= precision;
+-	if (!(type & (ZEROPAD+LEFT))) {
+-		while(--size >= 0) {
++	spec.field_width -= spec.precision;
++	if (!(spec.flags & (ZEROPAD+LEFT))) {
++		while(--spec.field_width >= 0) {
+ 			if (buf < end)
+ 				*buf = ' ';
+ 			++buf;
+@@ -476,23 +507,23 @@ static char *number(char *buf, char *end
+ 		if (buf < end)
+ 			*buf = '0';
+ 		++buf;
+-		if (base == 16) {
++		if (spec.base == 16) {
+ 			if (buf < end)
+ 				*buf = ('X' | locase);
+ 			++buf;
+ 		}
+ 	}
+ 	/* zero or space padding */
+-	if (!(type & LEFT)) {
+-		char c = (type & ZEROPAD) ? '0' : ' ';
+-		while (--size >= 0) {
++	if (!(spec.flags & LEFT)) {
++		char c = (spec.flags & ZEROPAD) ? '0' : ' ';
++		while (--spec.field_width >= 0) {
+ 			if (buf < end)
+ 				*buf = c;
+ 			++buf;
+ 		}
+ 	}
+ 	/* hmm even more zero padding? */
+-	while (i <= --precision) {
++	while (i <= --spec.precision) {
+ 		if (buf < end)
+ 			*buf = '0';
+ 		++buf;
+@@ -504,7 +535,7 @@ static char *number(char *buf, char *end
+ 		++buf;
+ 	}
+ 	/* trailing space padding */
+-	while (--size >= 0) {
++	while (--spec.field_width >= 0) {
+ 		if (buf < end)
+ 			*buf = ' ';
+ 		++buf;
+@@ -512,17 +543,17 @@ static char *number(char *buf, char *end
+ 	return buf;
+ }
+ 
+-static char *string(char *buf, char *end, char *s, int field_width, int precision, int flags)
++static char *string(char *buf, char *end, char *s, struct printf_spec spec)
+ {
+ 	int len, i;
+ 
+ 	if ((unsigned long)s < PAGE_SIZE)
+ 		s = "<NULL>";
+ 
+-	len = strnlen(s, precision);
++	len = strnlen(s, spec.precision);
+ 
+-	if (!(flags & LEFT)) {
+-		while (len < field_width--) {
++	if (!(spec.flags & LEFT)) {
++		while (len < spec.field_width--) {
+ 			if (buf < end)
+ 				*buf = ' ';
+ 			++buf;
+@@ -533,7 +564,7 @@ static char *string(char *buf, char *end
+ 			*buf = *s;
+ 		++buf; ++s;
+ 	}
+-	while (len < field_width--) {
++	while (len < spec.field_width--) {
+ 		if (buf < end)
+ 			*buf = ' ';
+ 		++buf;
+@@ -541,21 +572,24 @@ static char *string(char *buf, char *end
+ 	return buf;
+ }
+ 
+-static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags)
++static char *symbol_string(char *buf, char *end, void *ptr,
++				struct printf_spec spec)
+ {
+ 	unsigned long value = (unsigned long) ptr;
+ #ifdef CONFIG_KALLSYMS
+ 	char sym[KSYM_SYMBOL_LEN];
+ 	sprint_symbol(sym, value);
+-	return string(buf, end, sym, field_width, precision, flags);
++	return string(buf, end, sym, spec);
+ #else
+-	field_width = 2*sizeof(void *);
+-	flags |= SPECIAL | SMALL | ZEROPAD;
+-	return number(buf, end, value, 16, field_width, precision, flags);
++	spec.field_width = 2*sizeof(void *);
++	spec.flags |= SPECIAL | SMALL | ZEROPAD;
++	spec.base = 16;
++	return number(buf, end, value, spec);
+ #endif
+ }
+ 
+-static char *resource_string(char *buf, char *end, struct resource *res, int field_width, int precision, int flags)
++static char *resource_string(char *buf, char *end, struct resource *res,
++				struct printf_spec spec)
+ {
+ #ifndef IO_RSRC_PRINTK_SIZE
+ #define IO_RSRC_PRINTK_SIZE	4
+@@ -564,7 +598,11 @@ static char *resource_string(char *buf, 
+ #ifndef MEM_RSRC_PRINTK_SIZE
+ #define MEM_RSRC_PRINTK_SIZE	8
+ #endif
+-
++	struct printf_spec num_spec = {
++		.base = 16,
++		.precision = -1,
++		.flags = SPECIAL | SMALL | ZEROPAD,
++	};
+ 	/* room for the actual numbers, the two "0x", -, [, ] and the final zero */
+ 	char sym[4*sizeof(resource_size_t) + 8];
+ 	char *p = sym, *pend = sym + sizeof(sym);
+@@ -576,17 +614,18 @@ static char *resource_string(char *buf, 
+ 		size = MEM_RSRC_PRINTK_SIZE;
+ 
+ 	*p++ = '[';
+-	p = number(p, pend, res->start, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
++	num_spec.field_width = size;
++	p = number(p, pend, res->start, num_spec);
+ 	*p++ = '-';
+-	p = number(p, pend, res->end, 16, size, -1, SPECIAL | SMALL | ZEROPAD);
++	p = number(p, pend, res->end, num_spec);
+ 	*p++ = ']';
+ 	*p = 0;
+ 
+-	return string(buf, end, sym, field_width, precision, flags);
++	return string(buf, end, sym, spec);
+ }
+ 
+-static char *mac_address_string(char *buf, char *end, u8 *addr, int field_width,
+-				int precision, int flags)
++static char *mac_address_string(char *buf, char *end, u8 *addr,
++				struct printf_spec spec)
+ {
+ 	char mac_addr[6 * 3]; /* (6 * 2 hex digits), 5 colons and trailing zero */
+ 	char *p = mac_addr;
+@@ -594,16 +633,17 @@ static char *mac_address_string(char *bu
+ 
+ 	for (i = 0; i < 6; i++) {
+ 		p = pack_hex_byte(p, addr[i]);
+-		if (!(flags & SPECIAL) && i != 5)
++		if (!(spec.flags & SPECIAL) && i != 5)
+ 			*p++ = ':';
+ 	}
+ 	*p = '\0';
++	spec.flags &= ~SPECIAL;
+ 
+-	return string(buf, end, mac_addr, field_width, precision, flags & ~SPECIAL);
++	return string(buf, end, mac_addr, spec);
+ }
+ 
+-static char *ip6_addr_string(char *buf, char *end, u8 *addr, int field_width,
+-			 int precision, int flags)
++static char *ip6_addr_string(char *buf, char *end, u8 *addr,
++				struct printf_spec spec)
+ {
+ 	char ip6_addr[8 * 5]; /* (8 * 4 hex digits), 7 colons and trailing zero */
+ 	char *p = ip6_addr;
+@@ -612,16 +652,17 @@ static char *ip6_addr_string(char *buf, 
+ 	for (i = 0; i < 8; i++) {
+ 		p = pack_hex_byte(p, addr[2 * i]);
+ 		p = pack_hex_byte(p, addr[2 * i + 1]);
+-		if (!(flags & SPECIAL) && i != 7)
++		if (!(spec.flags & SPECIAL) && i != 7)
+ 			*p++ = ':';
+ 	}
+ 	*p = '\0';
++	spec.flags &= ~SPECIAL;
+ 
+-	return string(buf, end, ip6_addr, field_width, precision, flags & ~SPECIAL);
++	return string(buf, end, ip6_addr, spec);
+ }
+ 
+-static char *ip4_addr_string(char *buf, char *end, u8 *addr, int field_width,
+-			 int precision, int flags)
++static char *ip4_addr_string(char *buf, char *end, u8 *addr,
++				struct printf_spec spec)
+ {
+ 	char ip4_addr[4 * 4]; /* (4 * 3 decimal digits), 3 dots and trailing zero */
+ 	char temp[3];	/* hold each IP quad in reverse order */
+@@ -637,8 +678,9 @@ static char *ip4_addr_string(char *buf, 
+ 			*p++ = '.';
+ 	}
+ 	*p = '\0';
++	spec.flags &= ~SPECIAL;
+ 
+-	return string(buf, end, ip4_addr, field_width, precision, flags & ~SPECIAL);
++	return string(buf, end, ip4_addr, spec);
+ }
+ 
+ /*
+@@ -663,41 +705,233 @@ static char *ip4_addr_string(char *buf, 
+  * function pointers are really function descriptors, which contain a
+  * pointer to the real address.
+  */
+-static char *pointer(const char *fmt, char *buf, char *end, void *ptr, int field_width, int precision, int flags)
++static char *pointer(const char *fmt, char *buf, char *end, void *ptr,
++			struct printf_spec spec)
+ {
+ 	if (!ptr)
+-		return string(buf, end, "(null)", field_width, precision, flags);
++		return string(buf, end, "(null)", spec);
+ 
+ 	switch (*fmt) {
+ 	case 'F':
+ 		ptr = dereference_function_descriptor(ptr);
+ 		/* Fallthrough */
+ 	case 'S':
+-		return symbol_string(buf, end, ptr, field_width, precision, flags);
++		return symbol_string(buf, end, ptr, spec);
+ 	case 'R':
+-		return resource_string(buf, end, ptr, field_width, precision, flags);
++		return resource_string(buf, end, ptr, spec);
+ 	case 'm':
+-		flags |= SPECIAL;
++		spec.flags |= SPECIAL;
+ 		/* Fallthrough */
+ 	case 'M':
+-		return mac_address_string(buf, end, ptr, field_width, precision, flags);
++		return mac_address_string(buf, end, ptr, spec);
+ 	case 'i':
+-		flags |= SPECIAL;
++		spec.flags |= SPECIAL;
+ 		/* Fallthrough */
+ 	case 'I':
+ 		if (fmt[1] == '6')
+-			return ip6_addr_string(buf, end, ptr, field_width, precision, flags);
++			return ip6_addr_string(buf, end, ptr, spec);
+ 		if (fmt[1] == '4')
+-			return ip4_addr_string(buf, end, ptr, field_width, precision, flags);
+-		flags &= ~SPECIAL;
++			return ip4_addr_string(buf, end, ptr, spec);
++		spec.flags &= ~SPECIAL;
++		break;
++	}
++	spec.flags |= SMALL;
++	if (spec.field_width == -1) {
++		spec.field_width = 2*sizeof(void *);
++		spec.flags |= ZEROPAD;
++	}
++	spec.base = 16;
++
++	return number(buf, end, (unsigned long) ptr, spec);
++}
++
++/*
++ * Helper function to decode printf style format.
++ * Each call decode a token from the format and return the
++ * number of characters read (or likely the delta where it wants
++ * to go on the next call).
++ * The decoded token is returned through the parameters
++ *
++ * 'h', 'l', or 'L' for integer fields
++ * 'z' support added 23/7/1999 S.H.
++ * 'z' changed to 'Z' --davidm 1/25/99
++ * 't' added for ptrdiff_t
++ *
++ * @fmt: the format string
++ * @type of the token returned
++ * @flags: various flags such as +, -, # tokens..
++ * @field_width: overwritten width
++ * @base: base of the number (octal, hex, ...)
++ * @precision: precision of a number
++ * @qualifier: qualifier of a number (long, size_t, ...)
++ */
++static int format_decode(const char *fmt, struct printf_spec *spec)
++{
++	const char *start = fmt;
++
++	/* we finished early by reading the field width */
++	if (spec->type == FORMAT_TYPE_WIDTH) {
++		if (spec->field_width < 0) {
++			spec->field_width = -spec->field_width;
++			spec->flags |= LEFT;
++		}
++		spec->type = FORMAT_TYPE_NONE;
++		goto precision;
++	}
++
++	/* we finished early by reading the precision */
++	if (spec->type == FORMAT_TYPE_PRECISION) {
++		if (spec->precision < 0)
++			spec->precision = 0;
++
++		spec->type = FORMAT_TYPE_NONE;
++		goto qualifier;
++	}
++
++	/* By default */
++	spec->type = FORMAT_TYPE_NONE;
++
++	for (; *fmt ; ++fmt) {
++		if (*fmt == '%')
++			break;
++	}
++
++	/* Return the current non-format string */
++	if (fmt != start || !*fmt)
++		return fmt - start;
++
++	/* Process flags */
++	spec->flags = 0;
++
++	while (1) { /* this also skips first '%' */
++		bool found = true;
++
++		++fmt;
++
++		switch (*fmt) {
++		case '-': spec->flags |= LEFT;    break;
++		case '+': spec->flags |= PLUS;    break;
++		case ' ': spec->flags |= SPACE;   break;
++		case '#': spec->flags |= SPECIAL; break;
++		case '0': spec->flags |= ZEROPAD; break;
++		default:  found = false;
++		}
++
++		if (!found)
++			break;
++	}
++
++	/* get field width */
++	spec->field_width = -1;
++
++	if (isdigit(*fmt))
++		spec->field_width = skip_atoi(&fmt);
++	else if (*fmt == '*') {
++		/* it's the next argument */
++		spec->type = FORMAT_TYPE_WIDTH;
++		return ++fmt - start;
++	}
++
++precision:
++	/* get the precision */
++	spec->precision = -1;
++	if (*fmt == '.') {
++		++fmt;
++		if (isdigit(*fmt)) {
++			spec->precision = skip_atoi(&fmt);
++			if (spec->precision < 0)
++				spec->precision = 0;
++		} else if (*fmt == '*') {
++			/* it's the next argument */
++			spec->type = FORMAT_TYPE_PRECISION;
++			return ++fmt - start;
++		}
++	}
++
++qualifier:
++	/* get the conversion qualifier */
++	spec->qualifier = -1;
++	if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
++	    *fmt == 'Z' || *fmt == 'z' || *fmt == 't') {
++		spec->qualifier = *fmt;
++		++fmt;
++		if (spec->qualifier == 'l' && *fmt == 'l') {
++			spec->qualifier = 'L';
++			++fmt;
++		}
++	}
++
++	/* default base */
++	spec->base = 10;
++	switch (*fmt) {
++	case 'c':
++		spec->type = FORMAT_TYPE_CHAR;
++		return ++fmt - start;
++
++	case 's':
++		spec->type = FORMAT_TYPE_STR;
++		return ++fmt - start;
++
++	case 'p':
++		spec->type = FORMAT_TYPE_PTR;
++		return fmt - start;
++		/* skip alnum */
++
++	case 'n':
++		spec->type = FORMAT_TYPE_NRCHARS;
++		return ++fmt - start;
++
++	case '%':
++		spec->type = FORMAT_TYPE_PERCENT_CHAR;
++		return ++fmt - start;
++
++	/* integer number formats - set up the flags and "break" */
++	case 'o':
++		spec->base = 8;
++		break;
++
++	case 'x':
++		spec->flags |= SMALL;
++
++	case 'X':
++		spec->base = 16;
++		break;
++
++	case 'd':
++	case 'i':
++		spec->flags |= SIGN;
++	case 'u':
+ 		break;
++
++	default:
++		spec->type = FORMAT_TYPE_INVALID;
++		return fmt - start;
+ 	}
+-	flags |= SMALL;
+-	if (field_width == -1) {
+-		field_width = 2*sizeof(void *);
+-		flags |= ZEROPAD;
++
++	if (spec->qualifier == 'L')
++		spec->type = FORMAT_TYPE_LONG_LONG;
++	else if (spec->qualifier == 'l') {
++		if (spec->flags & SIGN)
++			spec->type = FORMAT_TYPE_LONG;
++		else
++			spec->type = FORMAT_TYPE_ULONG;
++	} else if (spec->qualifier == 'Z' || spec->qualifier == 'z') {
++		spec->type = FORMAT_TYPE_SIZE_T;
++	} else if (spec->qualifier == 't') {
++		spec->type = FORMAT_TYPE_PTRDIFF;
++	} else if (spec->qualifier == 'h') {
++		if (spec->flags & SIGN)
++			spec->type = FORMAT_TYPE_SHORT;
++		else
++			spec->type = FORMAT_TYPE_USHORT;
++	} else {
++		if (spec->flags & SIGN)
++			spec->type = FORMAT_TYPE_INT;
++		else
++			spec->type = FORMAT_TYPE_UINT;
+ 	}
+-	return number(buf, end, (unsigned long) ptr, 16, field_width, precision, flags);
++
++	return ++fmt - start;
+ }
+ 
+ /**
+@@ -726,18 +960,9 @@ static char *pointer(const char *fmt, ch
+ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
+ {
+ 	unsigned long long num;
+-	int base;
+ 	char *str, *end, c;
+-
+-	int flags;		/* flags to number() */
+-
+-	int field_width;	/* width of output field */
+-	int precision;		/* min. # of digits for integers; max
+-				   number of chars for from string */
+-	int qualifier;		/* 'h', 'l', or 'L' for integer fields */
+-				/* 'z' support added 23/7/1999 S.H.    */
+-				/* 'z' changed to 'Z' --davidm 1/25/99 */
+-				/* 't' added for ptrdiff_t */
++	int read;
++	struct printf_spec spec = {0};
+ 
+ 	/* Reject out-of-range values early.  Large positive sizes are
+ 	   used for unknown buffer sizes. */
+@@ -758,184 +983,144 @@ int vsnprintf(char *buf, size_t size, co
+ 		size = end - buf;
+ 	}
+ 
+-	for (; *fmt ; ++fmt) {
+-		if (*fmt != '%') {
+-			if (str < end)
+-				*str = *fmt;
+-			++str;
+-			continue;
+-		}
++	while (*fmt) {
++		const char *old_fmt = fmt;
+ 
+-		/* process flags */
+-		flags = 0;
+-		repeat:
+-			++fmt;		/* this also skips first '%' */
+-			switch (*fmt) {
+-				case '-': flags |= LEFT; goto repeat;
+-				case '+': flags |= PLUS; goto repeat;
+-				case ' ': flags |= SPACE; goto repeat;
+-				case '#': flags |= SPECIAL; goto repeat;
+-				case '0': flags |= ZEROPAD; goto repeat;
+-			}
++		read = format_decode(fmt, &spec);
+ 
+-		/* get field width */
+-		field_width = -1;
+-		if (isdigit(*fmt))
+-			field_width = skip_atoi(&fmt);
+-		else if (*fmt == '*') {
+-			++fmt;
+-			/* it's the next argument */
+-			field_width = va_arg(args, int);
+-			if (field_width < 0) {
+-				field_width = -field_width;
+-				flags |= LEFT;
+-			}
+-		}
++		fmt += read;
+ 
+-		/* get the precision */
+-		precision = -1;
+-		if (*fmt == '.') {
+-			++fmt;	
+-			if (isdigit(*fmt))
+-				precision = skip_atoi(&fmt);
+-			else if (*fmt == '*') {
+-				++fmt;
+-				/* it's the next argument */
+-				precision = va_arg(args, int);
++		switch (spec.type) {
++		case FORMAT_TYPE_NONE: {
++			int copy = read;
++			if (str < end) {
++				if (copy > end - str)
++					copy = end - str;
++				memcpy(str, old_fmt, copy);
+ 			}
+-			if (precision < 0)
+-				precision = 0;
++			str += read;
++			break;
+ 		}
+ 
+-		/* get the conversion qualifier */
+-		qualifier = -1;
+-		if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' ||
+-		    *fmt =='Z' || *fmt == 'z' || *fmt == 't') {
+-			qualifier = *fmt;
+-			++fmt;
+-			if (qualifier == 'l' && *fmt == 'l') {
+-				qualifier = 'L';
+-				++fmt;
+-			}
+-		}
++		case FORMAT_TYPE_WIDTH:
++			spec.field_width = va_arg(args, int);
++			break;
+ 
+-		/* default base */
+-		base = 10;
++		case FORMAT_TYPE_PRECISION:
++			spec.precision = va_arg(args, int);
++			break;
+ 
+-		switch (*fmt) {
+-			case 'c':
+-				if (!(flags & LEFT)) {
+-					while (--field_width > 0) {
+-						if (str < end)
+-							*str = ' ';
+-						++str;
+-					}
+-				}
+-				c = (unsigned char) va_arg(args, int);
+-				if (str < end)
+-					*str = c;
+-				++str;
+-				while (--field_width > 0) {
++		case FORMAT_TYPE_CHAR:
++			if (!(spec.flags & LEFT)) {
++				while (--spec.field_width > 0) {
+ 					if (str < end)
+ 						*str = ' ';
+ 					++str;
+-				}
+-				continue;
+ 
+-			case 's':
+-				str = string(str, end, va_arg(args, char *), field_width, precision, flags);
+-				continue;
+-
+-			case 'p':
+-				str = pointer(fmt+1, str, end,
+-						va_arg(args, void *),
+-						field_width, precision, flags);
+-				/* Skip all alphanumeric pointer suffixes */
+-				while (isalnum(fmt[1]))
+-					fmt++;
+-				continue;
+-
+-			case 'n':
+-				/* FIXME:
+-				* What does C99 say about the overflow case here? */
+-				if (qualifier == 'l') {
+-					long * ip = va_arg(args, long *);
+-					*ip = (str - buf);
+-				} else if (qualifier == 'Z' || qualifier == 'z') {
+-					size_t * ip = va_arg(args, size_t *);
+-					*ip = (str - buf);
+-				} else {
+-					int * ip = va_arg(args, int *);
+-					*ip = (str - buf);
+ 				}
+-				continue;
+-
+-			case '%':
++			}
++			c = (unsigned char) va_arg(args, int);
++			if (str < end)
++				*str = c;
++			++str;
++			while (--spec.field_width > 0) {
+ 				if (str < end)
+-					*str = '%';
++					*str = ' ';
+ 				++str;
+-				continue;
++			}
++			break;
+ 
+-				/* integer number formats - set up the flags and "break" */
+-			case 'o':
+-				base = 8;
+-				break;
++		case FORMAT_TYPE_STR:
++			str = string(str, end, va_arg(args, char *), spec);
++			break;
+ 
+-			case 'x':
+-				flags |= SMALL;
+-			case 'X':
+-				base = 16;
+-				break;
++		case FORMAT_TYPE_PTR:
++			str = pointer(fmt+1, str, end, va_arg(args, void *),
++				      spec);
++			while (isalnum(*fmt))
++				fmt++;
++			break;
+ 
+-			case 'd':
+-			case 'i':
+-				flags |= SIGN;
+-			case 'u':
+-				break;
++		case FORMAT_TYPE_PERCENT_CHAR:
++			if (str < end)
++				*str = '%';
++			++str;
++			break;
+ 
+-			default:
++		case FORMAT_TYPE_INVALID:
++			if (str < end)
++				*str = '%';
++			++str;
++			if (*fmt) {
+ 				if (str < end)
+-					*str = '%';
++					*str = *fmt;
+ 				++str;
+-				if (*fmt) {
+-					if (str < end)
+-						*str = *fmt;
+-					++str;
+-				} else {
+-					--fmt;
+-				}
+-				continue;
++			} else {
++				--fmt;
++			}
++			break;
++
++		case FORMAT_TYPE_NRCHARS: {
++			int qualifier = spec.qualifier;
++
++			if (qualifier == 'l') {
++				long *ip = va_arg(args, long *);
++				*ip = (str - buf);
++			} else if (qualifier == 'Z' ||
++					qualifier == 'z') {
++				size_t *ip = va_arg(args, size_t *);
++				*ip = (str - buf);
++			} else {
++				int *ip = va_arg(args, int *);
++				*ip = (str - buf);
++			}
++			break;
+ 		}
+-		if (qualifier == 'L')
+-			num = va_arg(args, long long);
+-		else if (qualifier == 'l') {
+-			num = va_arg(args, unsigned long);
+-			if (flags & SIGN)
+-				num = (signed long) num;
+-		} else if (qualifier == 'Z' || qualifier == 'z') {
+-			num = va_arg(args, size_t);
+-		} else if (qualifier == 't') {
+-			num = va_arg(args, ptrdiff_t);
+-		} else if (qualifier == 'h') {
+-			num = (unsigned short) va_arg(args, int);
+-			if (flags & SIGN)
+-				num = (signed short) num;
+-		} else {
+-			num = va_arg(args, unsigned int);
+-			if (flags & SIGN)
+-				num = (signed int) num;
++
++		default:
++			switch (spec.type) {
++			case FORMAT_TYPE_LONG_LONG:
++				num = va_arg(args, long long);
++				break;
++			case FORMAT_TYPE_ULONG:
++				num = va_arg(args, unsigned long);
++				break;
++			case FORMAT_TYPE_LONG:
++				num = va_arg(args, long);
++				break;
++			case FORMAT_TYPE_SIZE_T:
++				num = va_arg(args, size_t);
++				break;
++			case FORMAT_TYPE_PTRDIFF:
++				num = va_arg(args, ptrdiff_t);
++				break;
++			case FORMAT_TYPE_USHORT:
++				num = (unsigned short) va_arg(args, int);
++				break;
++			case FORMAT_TYPE_SHORT:
++				num = (short) va_arg(args, int);
++				break;
++			case FORMAT_TYPE_INT:
++				num = (int) va_arg(args, int);
++				break;
++			default:
++				num = va_arg(args, unsigned int);
++			}
++
++			str = number(str, end, num, spec);
+ 		}
+-		str = number(str, end, num, base,
+-				field_width, precision, flags);
+ 	}
++
+ 	if (size > 0) {
+ 		if (str < end)
+ 			*str = '\0';
+ 		else
+ 			end[-1] = '\0';
+ 	}
++
+ 	/* the trailing null byte doesn't count towards the total */
+ 	return str-buf;
++
+ }
+ EXPORT_SYMBOL(vsnprintf);
+ 
+@@ -1058,6 +1243,372 @@ int sprintf(char * buf, const char *fmt,
+ }
+ EXPORT_SYMBOL(sprintf);
+ 
++#ifdef CONFIG_BINARY_PRINTF
++/*
++ * bprintf service:
++ * vbin_printf() - VA arguments to binary data
++ * bstr_printf() - Binary data to text string
++ */
++
++/**
++ * vbin_printf - Parse a format string and place args' binary value in a buffer
++ * @bin_buf: The buffer to place args' binary value
++ * @size: The size of the buffer(by words(32bits), not characters)
++ * @fmt: The format string to use
++ * @args: Arguments for the format string
++ *
++ * The format follows C99 vsnprintf, except %n is ignored, and its argument
++ * is skiped.
++ *
++ * The return value is the number of words(32bits) which would be generated for
++ * the given input.
++ *
++ * NOTE:
++ * If the return value is greater than @size, the resulting bin_buf is NOT
++ * valid for bstr_printf().
++ */
++int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args)
++{
++	struct printf_spec spec = {0};
++	char *str, *end;
++	int read;
++
++	str = (char *)bin_buf;
++	end = (char *)(bin_buf + size);
++
++#define save_arg(type)							\
++do {									\
++	if (sizeof(type) == 8) {					\
++		unsigned long long value;				\
++		str = PTR_ALIGN(str, sizeof(u32));			\
++		value = va_arg(args, unsigned long long);		\
++		if (str + sizeof(type) <= end) {			\
++			*(u32 *)str = *(u32 *)&value;			\
++			*(u32 *)(str + 4) = *((u32 *)&value + 1);	\
++		}							\
++	} else {							\
++		unsigned long value;					\
++		str = PTR_ALIGN(str, sizeof(type));			\
++		value = va_arg(args, int);				\
++		if (str + sizeof(type) <= end)				\
++			*(typeof(type) *)str = (type)value;		\
++	}								\
++	str += sizeof(type);						\
++} while (0)
++
++
++	while (*fmt) {
++		read = format_decode(fmt, &spec);
++
++		fmt += read;
++
++		switch (spec.type) {
++		case FORMAT_TYPE_NONE:
++			break;
++
++		case FORMAT_TYPE_WIDTH:
++		case FORMAT_TYPE_PRECISION:
++			save_arg(int);
++			break;
++
++		case FORMAT_TYPE_CHAR:
++			save_arg(char);
++			break;
++
++		case FORMAT_TYPE_STR: {
++			const char *save_str = va_arg(args, char *);
++			size_t len;
++			if ((unsigned long)save_str > (unsigned long)-PAGE_SIZE
++					|| (unsigned long)save_str < PAGE_SIZE)
++				save_str = "<NULL>";
++			len = strlen(save_str);
++			if (str + len + 1 < end)
++				memcpy(str, save_str, len + 1);
++			str += len + 1;
++			break;
++		}
++
++		case FORMAT_TYPE_PTR:
++			save_arg(void *);
++			/* skip all alphanumeric pointer suffixes */
++			while (isalnum(*fmt))
++				fmt++;
++			break;
++
++		case FORMAT_TYPE_PERCENT_CHAR:
++			break;
++
++		case FORMAT_TYPE_INVALID:
++			if (!*fmt)
++				--fmt;
++			break;
++
++		case FORMAT_TYPE_NRCHARS: {
++			/* skip %n 's argument */
++			int qualifier = spec.qualifier;
++			void *skip_arg;
++			if (qualifier == 'l')
++				skip_arg = va_arg(args, long *);
++			else if (qualifier == 'Z' || qualifier == 'z')
++				skip_arg = va_arg(args, size_t *);
++			else
++				skip_arg = va_arg(args, int *);
++			break;
++		}
++
++		default:
++			switch (spec.type) {
++
++			case FORMAT_TYPE_LONG_LONG:
++				save_arg(long long);
++				break;
++			case FORMAT_TYPE_ULONG:
++			case FORMAT_TYPE_LONG:
++				save_arg(unsigned long);
++				break;
++			case FORMAT_TYPE_SIZE_T:
++				save_arg(size_t);
++				break;
++			case FORMAT_TYPE_PTRDIFF:
++				save_arg(ptrdiff_t);
++				break;
++			case FORMAT_TYPE_USHORT:
++			case FORMAT_TYPE_SHORT:
++				save_arg(short);
++				break;
++			default:
++				save_arg(int);
++			}
++		}
++	}
++	return (u32 *)(PTR_ALIGN(str, sizeof(u32))) - bin_buf;
++
++#undef save_arg
++}
++EXPORT_SYMBOL_GPL(vbin_printf);
++
++/**
++ * bstr_printf - Format a string from binary arguments and place it in a buffer
++ * @buf: The buffer to place the result into
++ * @size: The size of the buffer, including the trailing null space
++ * @fmt: The format string to use
++ * @bin_buf: Binary arguments for the format string
++ *
++ * This function like C99 vsnprintf, but the difference is that vsnprintf gets
++ * arguments from stack, and bstr_printf gets arguments from @bin_buf which is
++ * a binary buffer that generated by vbin_printf.
++ *
++ * The format follows C99 vsnprintf, but has some extensions:
++ * %pS output the name of a text symbol
++ * %pF output the name of a function pointer
++ * %pR output the address range in a struct resource
++ * %n is ignored
++ *
++ * The return value is the number of characters which would
++ * be generated for the given input, excluding the trailing
++ * '\0', as per ISO C99. If you want to have the exact
++ * number of characters written into @buf as return value
++ * (not including the trailing '\0'), use vscnprintf(). If the
++ * return is greater than or equal to @size, the resulting
++ * string is truncated.
++ */
++int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
++{
++	unsigned long long num;
++	char *str, *end, c;
++	const char *args = (const char *)bin_buf;
++
++	struct printf_spec spec = {0};
++
++	if (unlikely((int) size < 0)) {
++		/* There can be only one.. */
++		static char warn = 1;
++		WARN_ON(warn);
++		warn = 0;
++		return 0;
++	}
++
++	str = buf;
++	end = buf + size;
++
++#define get_arg(type)							\
++({									\
++	typeof(type) value;						\
++	if (sizeof(type) == 8) {					\
++		args = PTR_ALIGN(args, sizeof(u32));			\
++		*(u32 *)&value = *(u32 *)args;				\
++		*((u32 *)&value + 1) = *(u32 *)(args + 4);		\
++	} else {							\
++		args = PTR_ALIGN(args, sizeof(type));			\
++		value = *(typeof(type) *)args;				\
++	}								\
++	args += sizeof(type);						\
++	value;								\
++})
++
++	/* Make sure end is always >= buf */
++	if (end < buf) {
++		end = ((void *)-1);
++		size = end - buf;
++	}
++
++	while (*fmt) {
++		int read;
++		const char *old_fmt = fmt;
++
++		read = format_decode(fmt, &spec);
++
++		fmt += read;
++
++		switch (spec.type) {
++		case FORMAT_TYPE_NONE: {
++			int copy = read;
++			if (str < end) {
++				if (copy > end - str)
++					copy = end - str;
++				memcpy(str, old_fmt, copy);
++			}
++			str += read;
++			break;
++		}
++
++		case FORMAT_TYPE_WIDTH:
++			spec.field_width = get_arg(int);
++			break;
++
++		case FORMAT_TYPE_PRECISION:
++			spec.precision = get_arg(int);
++			break;
++
++		case FORMAT_TYPE_CHAR:
++			if (!(spec.flags & LEFT)) {
++				while (--spec.field_width > 0) {
++					if (str < end)
++						*str = ' ';
++					++str;
++				}
++			}
++			c = (unsigned char) get_arg(char);
++			if (str < end)
++				*str = c;
++			++str;
++			while (--spec.field_width > 0) {
++				if (str < end)
++					*str = ' ';
++				++str;
++			}
++			break;
++
++		case FORMAT_TYPE_STR: {
++			const char *str_arg = args;
++			size_t len = strlen(str_arg);
++			args += len + 1;
++			str = string(str, end, (char *)str_arg, spec);
++			break;
++		}
++
++		case FORMAT_TYPE_PTR:
++			str = pointer(fmt+1, str, end, get_arg(void *), spec);
++			while (isalnum(*fmt))
++				fmt++;
++			break;
++
++		case FORMAT_TYPE_PERCENT_CHAR:
++			if (str < end)
++				*str = '%';
++			++str;
++			break;
++
++		case FORMAT_TYPE_INVALID:
++			if (str < end)
++				*str = '%';
++			++str;
++			if (*fmt) {
++				if (str < end)
++					*str = *fmt;
++				++str;
++			} else {
++				--fmt;
++			}
++			break;
++
++		case FORMAT_TYPE_NRCHARS:
++			/* skip */
++			break;
++
++		default:
++			switch (spec.type) {
++
++			case FORMAT_TYPE_LONG_LONG:
++				num = get_arg(long long);
++				break;
++			case FORMAT_TYPE_ULONG:
++				num = get_arg(unsigned long);
++				break;
++			case FORMAT_TYPE_LONG:
++				num = get_arg(unsigned long);
++				break;
++			case FORMAT_TYPE_SIZE_T:
++				num = get_arg(size_t);
++				break;
++			case FORMAT_TYPE_PTRDIFF:
++				num = get_arg(ptrdiff_t);
++				break;
++			case FORMAT_TYPE_USHORT:
++				num = get_arg(unsigned short);
++				break;
++			case FORMAT_TYPE_SHORT:
++				num = get_arg(short);
++				break;
++			case FORMAT_TYPE_UINT:
++				num = get_arg(unsigned int);
++				break;
++			default:
++				num = get_arg(int);
++			}
++
++			str = number(str, end, num, spec);
++		}
++	}
++
++	if (size > 0) {
++		if (str < end)
++			*str = '\0';
++		else
++			end[-1] = '\0';
++	}
++
++#undef get_arg
++
++	/* the trailing null byte doesn't count towards the total */
++	return str - buf;
++}
++EXPORT_SYMBOL_GPL(bstr_printf);
++
++/**
++ * bprintf - Parse a format string and place args' binary value in a buffer
++ * @bin_buf: The buffer to place args' binary value
++ * @size: The size of the buffer(by words(32bits), not characters)
++ * @fmt: The format string to use
++ * @...: Arguments for the format string
++ *
++ * The function returns the number of words(u32) written
++ * into @bin_buf.
++ */
++int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...)
++{
++	va_list args;
++	int ret;
++
++	va_start(args, fmt);
++	ret = vbin_printf(bin_buf, size, fmt, args);
++	va_end(args);
++	return ret;
++}
++EXPORT_SYMBOL_GPL(bprintf);
++
++#endif /* CONFIG_BINARY_PRINTF */
++
+ /**
+  * vsscanf - Unformat a buffer into a list of arguments
+  * @buf:	input buffer
+Index: linux-2.6-tip/lib/zlib_inflate/inflate.h
+===================================================================
+--- linux-2.6-tip.orig/lib/zlib_inflate/inflate.h
++++ linux-2.6-tip/lib/zlib_inflate/inflate.h
+@@ -1,3 +1,6 @@
++#ifndef INFLATE_H
++#define INFLATE_H
++
+ /* inflate.h -- internal inflate state definition
+  * Copyright (C) 1995-2004 Mark Adler
+  * For conditions of distribution and use, see copyright notice in zlib.h
+@@ -105,3 +108,4 @@ struct inflate_state {
+     unsigned short work[288];   /* work area for code table building */
+     code codes[ENOUGH];         /* space for code tables */
+ };
++#endif
+Index: linux-2.6-tip/lib/zlib_inflate/inftrees.h
+===================================================================
+--- linux-2.6-tip.orig/lib/zlib_inflate/inftrees.h
++++ linux-2.6-tip/lib/zlib_inflate/inftrees.h
+@@ -1,3 +1,6 @@
++#ifndef INFTREES_H
++#define INFTREES_H
++
+ /* inftrees.h -- header to use inftrees.c
+  * Copyright (C) 1995-2005 Mark Adler
+  * For conditions of distribution and use, see copyright notice in zlib.h
+@@ -53,3 +56,4 @@ typedef enum {
+ extern int zlib_inflate_table (codetype type, unsigned short *lens,
+                              unsigned codes, code **table,
+                              unsigned *bits, unsigned short *work);
++#endif
+Index: linux-2.6-tip/mm/Makefile
+===================================================================
+--- linux-2.6-tip.orig/mm/Makefile
++++ linux-2.6-tip/mm/Makefile
+@@ -26,10 +26,15 @@ obj-$(CONFIG_SLOB) += slob.o
+ obj-$(CONFIG_MMU_NOTIFIER) += mmu_notifier.o
+ obj-$(CONFIG_SLAB) += slab.o
+ obj-$(CONFIG_SLUB) += slub.o
++obj-$(CONFIG_KMEMCHECK) += kmemcheck.o
+ obj-$(CONFIG_FAILSLAB) += failslab.o
+ obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
+ obj-$(CONFIG_FS_XIP) += filemap_xip.o
+ obj-$(CONFIG_MIGRATION) += migrate.o
++ifdef CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
++obj-$(CONFIG_SMP) += percpu.o
++else
+ obj-$(CONFIG_SMP) += allocpercpu.o
++endif
+ obj-$(CONFIG_QUICKLIST) += quicklist.o
+ obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+Index: linux-2.6-tip/mm/allocpercpu.c
+===================================================================
+--- linux-2.6-tip.orig/mm/allocpercpu.c
++++ linux-2.6-tip/mm/allocpercpu.c
+@@ -99,45 +99,51 @@ static int __percpu_populate_mask(void *
+ 	__percpu_populate_mask((__pdata), (size), (gfp), &(mask))
+ 
+ /**
+- * percpu_alloc_mask - initial setup of per-cpu data
++ * alloc_percpu - initial setup of per-cpu data
+  * @size: size of per-cpu object
+- * @gfp: may sleep or not etc.
+- * @mask: populate per-data for cpu's selected through mask bits
++ * @align: alignment
+  *
+- * Populating per-cpu data for all online cpu's would be a typical use case,
+- * which is simplified by the percpu_alloc() wrapper.
+- * Per-cpu objects are populated with zeroed buffers.
++ * Allocate dynamic percpu area.  Percpu objects are populated with
++ * zeroed buffers.
+  */
+-void *__percpu_alloc_mask(size_t size, gfp_t gfp, cpumask_t *mask)
++void *__alloc_percpu(size_t size, size_t align)
+ {
+ 	/*
+ 	 * We allocate whole cache lines to avoid false sharing
+ 	 */
+ 	size_t sz = roundup(nr_cpu_ids * sizeof(void *), cache_line_size());
+-	void *pdata = kzalloc(sz, gfp);
++	void *pdata = kzalloc(sz, GFP_KERNEL);
+ 	void *__pdata = __percpu_disguise(pdata);
+ 
++	/*
++	 * Can't easily make larger alignment work with kmalloc.  WARN
++	 * on it.  Larger alignment should only be used for module
++	 * percpu sections on SMP for which this path isn't used.
++	 */
++	WARN_ON_ONCE(align > SMP_CACHE_BYTES);
++
+ 	if (unlikely(!pdata))
+ 		return NULL;
+-	if (likely(!__percpu_populate_mask(__pdata, size, gfp, mask)))
++	if (likely(!__percpu_populate_mask(__pdata, size, GFP_KERNEL,
++					   &cpu_possible_map)))
+ 		return __pdata;
+ 	kfree(pdata);
+ 	return NULL;
+ }
+-EXPORT_SYMBOL_GPL(__percpu_alloc_mask);
++EXPORT_SYMBOL_GPL(__alloc_percpu);
+ 
+ /**
+- * percpu_free - final cleanup of per-cpu data
++ * free_percpu - final cleanup of per-cpu data
+  * @__pdata: object to clean up
+  *
+  * We simply clean up any per-cpu object left. No need for the client to
+  * track and specify through a bis mask which per-cpu objects are to free.
+  */
+-void percpu_free(void *__pdata)
++void free_percpu(void *__pdata)
+ {
+ 	if (unlikely(!__pdata))
+ 		return;
+ 	__percpu_depopulate_mask(__pdata, &cpu_possible_map);
+ 	kfree(__percpu_disguise(__pdata));
+ }
+-EXPORT_SYMBOL_GPL(percpu_free);
++EXPORT_SYMBOL_GPL(free_percpu);
+Index: linux-2.6-tip/mm/bootmem.c
+===================================================================
+--- linux-2.6-tip.orig/mm/bootmem.c
++++ linux-2.6-tip/mm/bootmem.c
+@@ -318,6 +318,8 @@ static int __init mark_bootmem(unsigned 
+ 		pos = bdata->node_low_pfn;
+ 	}
+ 	BUG();
++
++	return 0;
+ }
+ 
+ /**
+@@ -382,7 +384,6 @@ int __init reserve_bootmem_node(pg_data_
+ 	return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
+ }
+ 
+-#ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
+ /**
+  * reserve_bootmem - mark a page range as usable
+  * @addr: starting address of the range
+@@ -403,7 +404,6 @@ int __init reserve_bootmem(unsigned long
+ 
+ 	return mark_bootmem(start, end, 1, flags);
+ }
+-#endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
+ 
+ static unsigned long align_idx(struct bootmem_data *bdata, unsigned long idx,
+ 			unsigned long step)
+@@ -429,8 +429,8 @@ static unsigned long align_off(struct bo
+ }
+ 
+ static void * __init alloc_bootmem_core(struct bootmem_data *bdata,
+-				unsigned long size, unsigned long align,
+-				unsigned long goal, unsigned long limit)
++					unsigned long size, unsigned long align,
++					unsigned long goal, unsigned long limit)
+ {
+ 	unsigned long fallback = 0;
+ 	unsigned long min, max, start, sidx, midx, step;
+@@ -530,17 +530,34 @@ find_block:
+ 	return NULL;
+ }
+ 
++static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
++					unsigned long size, unsigned long align,
++					unsigned long goal, unsigned long limit)
++{
++#ifdef CONFIG_HAVE_ARCH_BOOTMEM
++	bootmem_data_t *p_bdata;
++
++	p_bdata = bootmem_arch_preferred_node(bdata, size, align, goal, limit);
++	if (p_bdata)
++		return alloc_bootmem_core(p_bdata, size, align, goal, limit);
++#endif
++	return NULL;
++}
++
+ static void * __init ___alloc_bootmem_nopanic(unsigned long size,
+ 					unsigned long align,
+ 					unsigned long goal,
+ 					unsigned long limit)
+ {
+ 	bootmem_data_t *bdata;
++	void *region;
+ 
+ restart:
+-	list_for_each_entry(bdata, &bdata_list, list) {
+-		void *region;
++	region = alloc_arch_preferred_bootmem(NULL, size, align, goal, limit);
++	if (region)
++		return region;
+ 
++	list_for_each_entry(bdata, &bdata_list, list) {
+ 		if (goal && bdata->node_low_pfn <= PFN_DOWN(goal))
+ 			continue;
+ 		if (limit && bdata->node_min_pfn >= PFN_DOWN(limit))
+@@ -618,6 +635,10 @@ static void * __init ___alloc_bootmem_no
+ {
+ 	void *ptr;
+ 
++	ptr = alloc_arch_preferred_bootmem(bdata, size, align, goal, limit);
++	if (ptr)
++		return ptr;
++
+ 	ptr = alloc_bootmem_core(bdata, size, align, goal, limit);
+ 	if (ptr)
+ 		return ptr;
+@@ -674,6 +695,10 @@ void * __init __alloc_bootmem_node_nopan
+ {
+ 	void *ptr;
+ 
++	ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
++	if (ptr)
++		return ptr;
++
+ 	ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
+ 	if (ptr)
+ 		return ptr;
+Index: linux-2.6-tip/mm/failslab.c
+===================================================================
+--- linux-2.6-tip.orig/mm/failslab.c
++++ linux-2.6-tip/mm/failslab.c
+@@ -1,4 +1,5 @@
+ #include <linux/fault-inject.h>
++#include <linux/gfp.h>
+ 
+ static struct {
+ 	struct fault_attr attr;
+Index: linux-2.6-tip/mm/filemap.c
+===================================================================
+--- linux-2.6-tip.orig/mm/filemap.c
++++ linux-2.6-tip/mm/filemap.c
+@@ -1823,7 +1823,7 @@ static size_t __iovec_copy_from_user_ina
+ 		int copy = min(bytes, iov->iov_len - base);
+ 
+ 		base = 0;
+-		left = __copy_from_user_inatomic_nocache(vaddr, buf, copy);
++		left = __copy_from_user_inatomic(vaddr, buf, copy);
+ 		copied += copy;
+ 		bytes -= copy;
+ 		vaddr += copy;
+@@ -1846,13 +1846,12 @@ size_t iov_iter_copy_from_user_atomic(st
+ 	char *kaddr;
+ 	size_t copied;
+ 
+-	BUG_ON(!in_atomic());
++//	BUG_ON(!in_atomic());
+ 	kaddr = kmap_atomic(page, KM_USER0);
+ 	if (likely(i->nr_segs == 1)) {
+ 		int left;
+ 		char __user *buf = i->iov->iov_base + i->iov_offset;
+-		left = __copy_from_user_inatomic_nocache(kaddr + offset,
+-							buf, bytes);
++		left = __copy_from_user_inatomic(kaddr + offset, buf, bytes);
+ 		copied = bytes - left;
+ 	} else {
+ 		copied = __iovec_copy_from_user_inatomic(kaddr + offset,
+@@ -1880,7 +1879,7 @@ size_t iov_iter_copy_from_user(struct pa
+ 	if (likely(i->nr_segs == 1)) {
+ 		int left;
+ 		char __user *buf = i->iov->iov_base + i->iov_offset;
+-		left = __copy_from_user_nocache(kaddr + offset, buf, bytes);
++		left = __copy_from_user(kaddr + offset, buf, bytes);
+ 		copied = bytes - left;
+ 	} else {
+ 		copied = __iovec_copy_from_user_inatomic(kaddr + offset,
+Index: linux-2.6-tip/mm/kmemcheck.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/mm/kmemcheck.c
+@@ -0,0 +1,122 @@
++#include <linux/gfp.h>
++#include <linux/mm_types.h>
++#include <linux/mm.h>
++#include <linux/slab.h>
++#include <linux/kmemcheck.h>
++
++void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node)
++{
++	struct page *shadow;
++	int pages;
++	int i;
++
++	pages = 1 << order;
++
++	/*
++	 * With kmemcheck enabled, we need to allocate a memory area for the
++	 * shadow bits as well.
++	 */
++	shadow = alloc_pages_node(node, flags | __GFP_NOTRACK, order);
++	if (!shadow) {
++		if (printk_ratelimit())
++			printk(KERN_ERR "kmemcheck: failed to allocate "
++				"shadow bitmap\n");
++		return;
++	}
++
++	for(i = 0; i < pages; ++i)
++		page[i].shadow = page_address(&shadow[i]);
++
++	/*
++	 * Mark it as non-present for the MMU so that our accesses to
++	 * this memory will trigger a page fault and let us analyze
++	 * the memory accesses.
++	 */
++	kmemcheck_hide_pages(page, pages);
++}
++
++void kmemcheck_free_shadow(struct page *page, int order)
++{
++	struct page *shadow;
++	int pages;
++	int i;
++
++	if (!kmemcheck_page_is_tracked(page))
++		return;
++
++	pages = 1 << order;
++
++	kmemcheck_show_pages(page, pages);
++
++	shadow = virt_to_page(page[0].shadow);
++
++	for(i = 0; i < pages; ++i)
++		page[i].shadow = NULL;
++
++	__free_pages(shadow, order);
++}
++
++void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object,
++			  size_t size)
++{
++	/*
++	 * Has already been memset(), which initializes the shadow for us
++	 * as well.
++	 */
++	if (gfpflags & __GFP_ZERO)
++		return;
++
++	/* No need to initialize the shadow of a non-tracked slab. */
++	if (s->flags & SLAB_NOTRACK)
++		return;
++
++	if (!kmemcheck_enabled || gfpflags & __GFP_NOTRACK) {
++		/*
++		 * Allow notracked objects to be allocated from
++		 * tracked caches. Note however that these objects
++		 * will still get page faults on access, they just
++		 * won't ever be flagged as uninitialized. If page
++		 * faults are not acceptable, the slab cache itself
++		 * should be marked NOTRACK.
++		 */
++		kmemcheck_mark_initialized(object, size);
++	} else if (!s->ctor) {
++		/*
++		 * New objects should be marked uninitialized before
++		 * they're returned to the called.
++		 */
++		kmemcheck_mark_uninitialized(object, size);
++	}
++}
++
++void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size)
++{
++	/* TODO: RCU freeing is unsupported for now; hide false positives. */
++	if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU))
++		kmemcheck_mark_freed(object, size);
++}
++
++void kmemcheck_pagealloc_alloc(struct page *page, unsigned int order,
++			       gfp_t gfpflags)
++{
++	int pages;
++
++	if (gfpflags & (__GFP_HIGHMEM | __GFP_NOTRACK))
++		return;
++
++	pages = 1 << order;
++
++	/*
++	 * NOTE: We choose to track GFP_ZERO pages too; in fact, they
++	 * can become uninitialized by copying uninitialized memory
++	 * into them.
++	 */
++
++	/* XXX: Can use zone->node for node? */
++	kmemcheck_alloc_shadow(page, order, gfpflags, -1);
++
++	if (gfpflags & __GFP_ZERO)
++		kmemcheck_mark_initialized_pages(page, pages);
++	else
++		kmemcheck_mark_uninitialized_pages(page, pages);
++}
+Index: linux-2.6-tip/mm/memory.c
+===================================================================
+--- linux-2.6-tip.orig/mm/memory.c
++++ linux-2.6-tip/mm/memory.c
+@@ -48,6 +48,8 @@
+ #include <linux/rmap.h>
+ #include <linux/module.h>
+ #include <linux/delayacct.h>
++#include <linux/kprobes.h>
++#include <linux/mutex.h>
+ #include <linux/init.h>
+ #include <linux/writeback.h>
+ #include <linux/memcontrol.h>
+@@ -922,10 +924,13 @@ static unsigned long unmap_page_range(st
+ 	return addr;
+ }
+ 
+-#ifdef CONFIG_PREEMPT
++#if defined(CONFIG_PREEMPT) && !defined(CONFIG_PREEMPT_RT)
+ # define ZAP_BLOCK_SIZE	(8 * PAGE_SIZE)
+ #else
+-/* No preempt: go for improved straight-line efficiency */
++/*
++ * No preempt: go for improved straight-line efficiency
++ * on PREEMPT_RT this is not a critical latency-path.
++ */
+ # define ZAP_BLOCK_SIZE	(1024 * PAGE_SIZE)
+ #endif
+ 
+@@ -955,17 +960,14 @@ static unsigned long unmap_page_range(st
+  * ensure that any thus-far unmapped pages are flushed before unmap_vmas()
+  * drops the lock and schedules.
+  */
+-unsigned long unmap_vmas(struct mmu_gather **tlbp,
++unsigned long unmap_vmas(struct mmu_gather *tlb,
+ 		struct vm_area_struct *vma, unsigned long start_addr,
+ 		unsigned long end_addr, unsigned long *nr_accounted,
+ 		struct zap_details *details)
+ {
+ 	long zap_work = ZAP_BLOCK_SIZE;
+-	unsigned long tlb_start = 0;	/* For tlb_finish_mmu */
+-	int tlb_start_valid = 0;
+ 	unsigned long start = start_addr;
+ 	spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
+-	int fullmm = (*tlbp)->fullmm;
+ 	struct mm_struct *mm = vma->vm_mm;
+ 
+ 	mmu_notifier_invalidate_range_start(mm, start_addr, end_addr);
+@@ -986,11 +988,6 @@ unsigned long unmap_vmas(struct mmu_gath
+ 			untrack_pfn_vma(vma, 0, 0);
+ 
+ 		while (start != end) {
+-			if (!tlb_start_valid) {
+-				tlb_start = start;
+-				tlb_start_valid = 1;
+-			}
+-
+ 			if (unlikely(is_vm_hugetlb_page(vma))) {
+ 				/*
+ 				 * It is undesirable to test vma->vm_file as it
+@@ -1011,7 +1008,7 @@ unsigned long unmap_vmas(struct mmu_gath
+ 
+ 				start = end;
+ 			} else
+-				start = unmap_page_range(*tlbp, vma,
++				start = unmap_page_range(tlb, vma,
+ 						start, end, &zap_work, details);
+ 
+ 			if (zap_work > 0) {
+@@ -1019,19 +1016,13 @@ unsigned long unmap_vmas(struct mmu_gath
+ 				break;
+ 			}
+ 
+-			tlb_finish_mmu(*tlbp, tlb_start, start);
+-
+ 			if (need_resched() ||
+ 				(i_mmap_lock && spin_needbreak(i_mmap_lock))) {
+-				if (i_mmap_lock) {
+-					*tlbp = NULL;
++				if (i_mmap_lock)
+ 					goto out;
+-				}
+ 				cond_resched();
+ 			}
+ 
+-			*tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
+-			tlb_start_valid = 0;
+ 			zap_work = ZAP_BLOCK_SIZE;
+ 		}
+ 	}
+@@ -1051,16 +1042,15 @@ unsigned long zap_page_range(struct vm_a
+ 		unsigned long size, struct zap_details *details)
+ {
+ 	struct mm_struct *mm = vma->vm_mm;
+-	struct mmu_gather *tlb;
++	struct mmu_gather tlb;
+ 	unsigned long end = address + size;
+ 	unsigned long nr_accounted = 0;
+ 
+ 	lru_add_drain();
+-	tlb = tlb_gather_mmu(mm, 0);
++	tlb_gather_mmu(&tlb, mm, 0);
+ 	update_hiwater_rss(mm);
+ 	end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
+-	if (tlb)
+-		tlb_finish_mmu(tlb, address, end);
++	tlb_finish_mmu(&tlb, address, end);
+ 	return end;
+ }
+ 
+@@ -1667,7 +1657,7 @@ int remap_pfn_range(struct vm_area_struc
+ 	 */
+ 	if (addr == vma->vm_start && end == vma->vm_end) {
+ 		vma->vm_pgoff = pfn;
+-		vma->vm_flags |= VM_PFNMAP_AT_MMAP;
++		vma->vm_flags |= VM_PFN_AT_MMAP;
+ 	} else if (is_cow_mapping(vma->vm_flags))
+ 		return -EINVAL;
+ 
+@@ -1680,7 +1670,7 @@ int remap_pfn_range(struct vm_area_struc
+ 		 * needed from higher level routine calling unmap_vmas
+ 		 */
+ 		vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP);
+-		vma->vm_flags &= ~VM_PFNMAP_AT_MMAP;
++		vma->vm_flags &= ~VM_PFN_AT_MMAP;
+ 		return -EINVAL;
+ 	}
+ 
+@@ -2902,6 +2892,28 @@ unlock:
+ 	return 0;
+ }
+ 
++void pagefault_disable(void)
++{
++	current->pagefault_disabled++;
++	/*
++	 * make sure to have issued the store before a pagefault
++	 * can hit.
++	 */
++	barrier();
++}
++EXPORT_SYMBOL(pagefault_disable);
++
++void pagefault_enable(void)
++{
++	/*
++	 * make sure to issue those last loads/stores before enabling
++	 * the pagefault handler again.
++	 */
++	barrier();
++	current->pagefault_disabled--;
++}
++EXPORT_SYMBOL(pagefault_enable);
++
+ /*
+  * By the time we get here, we already hold the mm semaphore
+  */
+Index: linux-2.6-tip/mm/mempolicy.c
+===================================================================
+--- linux-2.6-tip.orig/mm/mempolicy.c
++++ linux-2.6-tip/mm/mempolicy.c
+@@ -1421,7 +1421,7 @@ unsigned slab_node(struct mempolicy *pol
+ 	}
+ 
+ 	default:
+-		BUG();
++		panic("slab_node: bad policy mode!");
+ 	}
+ }
+ 
+Index: linux-2.6-tip/mm/page_alloc.c
+===================================================================
+--- linux-2.6-tip.orig/mm/page_alloc.c
++++ linux-2.6-tip/mm/page_alloc.c
+@@ -23,6 +23,7 @@
+ #include <linux/bootmem.h>
+ #include <linux/compiler.h>
+ #include <linux/kernel.h>
++#include <linux/kmemcheck.h>
+ #include <linux/module.h>
+ #include <linux/suspend.h>
+ #include <linux/pagevec.h>
+@@ -162,6 +163,53 @@ static unsigned long __meminitdata dma_r
+   EXPORT_SYMBOL(movable_zone);
+ #endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+ 
++#ifdef CONFIG_PREEMPT_RT
++static DEFINE_PER_CPU_LOCKED(int, pcp_locks);
++#endif
++
++static inline void __lock_cpu_pcp(unsigned long *flags, int cpu)
++{
++#ifdef CONFIG_PREEMPT_RT
++	spin_lock(&__get_cpu_lock(pcp_locks, cpu));
++	flags = 0;
++#else
++	local_irq_save(*flags);
++#endif
++}
++
++static inline void lock_cpu_pcp(unsigned long *flags, int *this_cpu)
++{
++#ifdef CONFIG_PREEMPT_RT
++	(void)get_cpu_var_locked(pcp_locks, this_cpu);
++	flags = 0;
++#else
++	local_irq_save(*flags);
++	*this_cpu = smp_processor_id();
++#endif
++}
++
++static inline void unlock_cpu_pcp(unsigned long flags, int this_cpu)
++{
++#ifdef CONFIG_PREEMPT_RT
++	put_cpu_var_locked(pcp_locks, this_cpu);
++#else
++	local_irq_restore(flags);
++#endif
++}
++
++static struct per_cpu_pageset *
++get_zone_pcp(struct zone *zone, unsigned long *flags, int *this_cpu)
++{
++	lock_cpu_pcp(flags, this_cpu);
++	return zone_pcp(zone, *this_cpu);
++}
++
++static void
++put_zone_pcp(struct zone *zone, unsigned long flags, int this_cpu)
++{
++	unlock_cpu_pcp(flags, this_cpu);
++}
++
+ #if MAX_NUMNODES > 1
+ int nr_node_ids __read_mostly = MAX_NUMNODES;
+ EXPORT_SYMBOL(nr_node_ids);
+@@ -516,38 +564,45 @@ static inline int free_pages_check(struc
+  * And clear the zone's pages_scanned counter, to hold off the "all pages are
+  * pinned" detection logic.
+  */
+-static void free_pages_bulk(struct zone *zone, int count,
+-					struct list_head *list, int order)
++static void
++free_pages_bulk(struct zone *zone, struct list_head *list, int order)
+ {
+-	spin_lock(&zone->lock);
++	unsigned long flags;
++
++	spin_lock_irqsave(&zone->lock, flags);
+ 	zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
+ 	zone->pages_scanned = 0;
+-	while (count--) {
+-		struct page *page;
+ 
+-		VM_BUG_ON(list_empty(list));
+-		page = list_entry(list->prev, struct page, lru);
+-		/* have to delete it as __free_one_page list manipulates */
++	while (!list_empty(list)) {
++		struct page *page = list_first_entry(list, struct page, lru);
++
+ 		list_del(&page->lru);
+ 		__free_one_page(page, zone, order);
++#ifdef CONFIG_PREEMPT_RT
++		cond_resched_lock(&zone->lock);
++#endif
+ 	}
+-	spin_unlock(&zone->lock);
++	spin_unlock_irqrestore(&zone->lock, flags);
+ }
+ 
+ static void free_one_page(struct zone *zone, struct page *page, int order)
+ {
+-	spin_lock(&zone->lock);
++	unsigned long flags;
++
++	spin_lock_irqsave(&zone->lock, flags);
++
+ 	zone_clear_flag(zone, ZONE_ALL_UNRECLAIMABLE);
+ 	zone->pages_scanned = 0;
+ 	__free_one_page(page, zone, order);
+-	spin_unlock(&zone->lock);
++	spin_unlock_irqrestore(&zone->lock, flags);
+ }
+ 
+ static void __free_pages_ok(struct page *page, unsigned int order)
+ {
+ 	unsigned long flags;
+-	int i;
+-	int bad = 0;
++	int i, this_cpu, bad = 0;
++
++	kmemcheck_free_shadow(page, order);
+ 
+ 	for (i = 0 ; i < (1 << order) ; ++i)
+ 		bad += free_pages_check(page + i);
+@@ -562,10 +617,10 @@ static void __free_pages_ok(struct page 
+ 	arch_free_page(page, order);
+ 	kernel_map_pages(page, 1 << order, 0);
+ 
+-	local_irq_save(flags);
+-	__count_vm_events(PGFREE, 1 << order);
++	lock_cpu_pcp(&flags, &this_cpu);
++	count_vm_events(PGFREE, 1 << order);
++	unlock_cpu_pcp(flags, this_cpu);
+ 	free_one_page(page_zone(page), page, order);
+-	local_irq_restore(flags);
+ }
+ 
+ /*
+@@ -885,6 +940,16 @@ static int rmqueue_bulk(struct zone *zon
+ 	return i;
+ }
+ 
++static void
++isolate_pcp_pages(int count, struct list_head *src, struct list_head *dst)
++{
++	while (count--) {
++		struct page *page = list_last_entry(src, struct page, lru);
++		list_move(&page->lru, dst);
++	}
++}
++
++
+ #ifdef CONFIG_NUMA
+ /*
+  * Called from the vmstat counter updater to drain pagesets of this
+@@ -896,17 +961,20 @@ static int rmqueue_bulk(struct zone *zon
+  */
+ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
+ {
++	LIST_HEAD(free_list);
+ 	unsigned long flags;
+ 	int to_drain;
++	int this_cpu;
+ 
+-	local_irq_save(flags);
++	lock_cpu_pcp(&flags, &this_cpu);
+ 	if (pcp->count >= pcp->batch)
+ 		to_drain = pcp->batch;
+ 	else
+ 		to_drain = pcp->count;
+-	free_pages_bulk(zone, to_drain, &pcp->list, 0);
++	isolate_pcp_pages(to_drain, &pcp->list, &free_list);
+ 	pcp->count -= to_drain;
+-	local_irq_restore(flags);
++	unlock_cpu_pcp(flags, this_cpu);
++	free_pages_bulk(zone, &free_list, 0);
+ }
+ #endif
+ 
+@@ -925,17 +993,23 @@ static void drain_pages(unsigned int cpu
+ 	for_each_zone(zone) {
+ 		struct per_cpu_pageset *pset;
+ 		struct per_cpu_pages *pcp;
++		LIST_HEAD(free_list);
+ 
+ 		if (!populated_zone(zone))
+ 			continue;
+ 
++		__lock_cpu_pcp(&flags, cpu);
+ 		pset = zone_pcp(zone, cpu);
+-
++		if (!pset) {
++			unlock_cpu_pcp(flags, cpu);
++			WARN_ON(1);
++			continue;
++		}
+ 		pcp = &pset->pcp;
+-		local_irq_save(flags);
+-		free_pages_bulk(zone, pcp->count, &pcp->list, 0);
++		isolate_pcp_pages(pcp->count, &pcp->list, &free_list);
+ 		pcp->count = 0;
+-		local_irq_restore(flags);
++		unlock_cpu_pcp(flags, cpu);
++		free_pages_bulk(zone, &free_list, 0);
+ 	}
+ }
+ 
+@@ -947,12 +1021,52 @@ void drain_local_pages(void *arg)
+ 	drain_pages(smp_processor_id());
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++static void drain_local_pages_work(struct work_struct *wrk)
++{
++	drain_pages(smp_processor_id());
++}
++#endif
++
+ /*
+  * Spill all the per-cpu pages from all CPUs back into the buddy allocator
+  */
+ void drain_all_pages(void)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	/*
++	 * HACK!!!!!
++	 *  For RT we can't use IPIs to run drain_local_pages, since
++	 *  that code will call spin_locks that will now sleep.
++	 *  But, schedule_on_each_cpu will call kzalloc, which will
++	 *  call page_alloc which was what calls this.
++	 *
++	 *  Luckily, there's a condition to get here, and that is if
++	 *  the order passed in to alloc_pages is greater than 0
++	 *  (alloced more than a page size).  The slabs only allocate
++	 *  what is needed, and the allocation made by schedule_on_each_cpu
++	 *  does an alloc of "sizeof(void *)*nr_cpu_ids".
++	 *
++	 *  So we can safely call schedule_on_each_cpu if that number
++	 *  is less than a page. Otherwise don't bother. At least warn of
++	 *  this issue.
++	 *
++	 * And yes, this is one big hack.  Please fix ;-)
++	 */
++	if (sizeof(void *)*nr_cpu_ids < PAGE_SIZE)
++		schedule_on_each_cpu(drain_local_pages_work);
++	else {
++		static int once;
++		if (!once) {
++			printk(KERN_ERR "Can't drain all CPUS due to possible recursion\n");
++			once = 1;
++		}
++		drain_local_pages(NULL);
++	}
++
++#else
+ 	on_each_cpu(drain_local_pages, NULL, 1);
++#endif
+ }
+ 
+ #ifdef CONFIG_HIBERNATION
+@@ -997,8 +1111,12 @@ void mark_free_pages(struct zone *zone)
+ static void free_hot_cold_page(struct page *page, int cold)
+ {
+ 	struct zone *zone = page_zone(page);
++	struct per_cpu_pageset *pset;
+ 	struct per_cpu_pages *pcp;
+ 	unsigned long flags;
++	int this_cpu;
++
++	kmemcheck_free_shadow(page, 0);
+ 
+ 	if (PageAnon(page))
+ 		page->mapping = NULL;
+@@ -1012,9 +1130,11 @@ static void free_hot_cold_page(struct pa
+ 	arch_free_page(page, 0);
+ 	kernel_map_pages(page, 1, 0);
+ 
+-	pcp = &zone_pcp(zone, get_cpu())->pcp;
+-	local_irq_save(flags);
+-	__count_vm_event(PGFREE);
++	pset = get_zone_pcp(zone, &flags, &this_cpu);
++	pcp = &pset->pcp;
++
++	count_vm_event(PGFREE);
++
+ 	if (cold)
+ 		list_add_tail(&page->lru, &pcp->list);
+ 	else
+@@ -1022,11 +1142,14 @@ static void free_hot_cold_page(struct pa
+ 	set_page_private(page, get_pageblock_migratetype(page));
+ 	pcp->count++;
+ 	if (pcp->count >= pcp->high) {
+-		free_pages_bulk(zone, pcp->batch, &pcp->list, 0);
++		LIST_HEAD(free_list);
++
++		isolate_pcp_pages(pcp->batch, &pcp->list, &free_list);
+ 		pcp->count -= pcp->batch;
+-	}
+-	local_irq_restore(flags);
+-	put_cpu();
++		put_zone_pcp(zone, flags, this_cpu);
++		free_pages_bulk(zone, &free_list, 0);
++	} else
++		put_zone_pcp(zone, flags, this_cpu);
+ }
+ 
+ void free_hot_page(struct page *page)
+@@ -1053,6 +1176,16 @@ void split_page(struct page *page, unsig
+ 
+ 	VM_BUG_ON(PageCompound(page));
+ 	VM_BUG_ON(!page_count(page));
++
++#ifdef CONFIG_KMEMCHECK
++	/*
++	 * Split shadow pages too, because free(page[0]) would
++	 * otherwise free the whole shadow.
++	 */
++	if (kmemcheck_page_is_tracked(page))
++		split_page(virt_to_page(page[0].shadow), order);
++#endif
++
+ 	for (i = 1; i < (1 << order); i++)
+ 		set_page_refcounted(page + i);
+ }
+@@ -1068,16 +1201,15 @@ static struct page *buffered_rmqueue(str
+ 	unsigned long flags;
+ 	struct page *page;
+ 	int cold = !!(gfp_flags & __GFP_COLD);
+-	int cpu;
++	struct per_cpu_pageset *pset;
+ 	int migratetype = allocflags_to_migratetype(gfp_flags);
++	int this_cpu;
+ 
+ again:
+-	cpu  = get_cpu();
++	pset = get_zone_pcp(zone, &flags, &this_cpu);
+ 	if (likely(order == 0)) {
+-		struct per_cpu_pages *pcp;
++		struct per_cpu_pages *pcp = &pset->pcp;
+ 
+-		pcp = &zone_pcp(zone, cpu)->pcp;
+-		local_irq_save(flags);
+ 		if (!pcp->count) {
+ 			pcp->count = rmqueue_bulk(zone, 0,
+ 					pcp->batch, &pcp->list, migratetype);
+@@ -1106,7 +1238,7 @@ again:
+ 		list_del(&page->lru);
+ 		pcp->count--;
+ 	} else {
+-		spin_lock_irqsave(&zone->lock, flags);
++		spin_lock(&zone->lock);
+ 		page = __rmqueue(zone, order, migratetype);
+ 		spin_unlock(&zone->lock);
+ 		if (!page)
+@@ -1115,8 +1247,7 @@ again:
+ 
+ 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
+ 	zone_statistics(preferred_zone, zone);
+-	local_irq_restore(flags);
+-	put_cpu();
++	put_zone_pcp(zone, flags, this_cpu);
+ 
+ 	VM_BUG_ON(bad_range(zone, page));
+ 	if (prep_new_page(page, order, gfp_flags))
+@@ -1124,8 +1255,7 @@ again:
+ 	return page;
+ 
+ failed:
+-	local_irq_restore(flags);
+-	put_cpu();
++	put_zone_pcp(zone, flags, this_cpu);
+ 	return NULL;
+ }
+ 
+@@ -1479,6 +1609,8 @@ __alloc_pages_internal(gfp_t gfp_mask, u
+ 	unsigned long did_some_progress;
+ 	unsigned long pages_reclaimed = 0;
+ 
++	lockdep_trace_alloc(gfp_mask);
++
+ 	might_sleep_if(wait);
+ 
+ 	if (should_fail_alloc_page(gfp_mask, order))
+@@ -1578,12 +1710,15 @@ nofail_alloc:
+ 	 */
+ 	cpuset_update_task_memory_state();
+ 	p->flags |= PF_MEMALLOC;
++
++	lockdep_set_current_reclaim_state(gfp_mask);
+ 	reclaim_state.reclaimed_slab = 0;
+ 	p->reclaim_state = &reclaim_state;
+ 
+ 	did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
+ 
+ 	p->reclaim_state = NULL;
++	lockdep_clear_current_reclaim_state();
+ 	p->flags &= ~PF_MEMALLOC;
+ 
+ 	cond_resched();
+@@ -1667,7 +1802,10 @@ nopage:
+ 		dump_stack();
+ 		show_mem();
+ 	}
++	return page;
+ got_pg:
++	if (kmemcheck_enabled)
++		kmemcheck_pagealloc_alloc(page, order, gfp_mask);
+ 	return page;
+ }
+ EXPORT_SYMBOL(__alloc_pages_internal);
+@@ -2134,7 +2272,7 @@ static int find_next_best_node(int node,
+ 	int n, val;
+ 	int min_val = INT_MAX;
+ 	int best_node = -1;
+-	node_to_cpumask_ptr(tmp, 0);
++	const struct cpumask *tmp = cpumask_of_node(0);
+ 
+ 	/* Use the local node if we haven't already */
+ 	if (!node_isset(node, *used_node_mask)) {
+@@ -2155,8 +2293,8 @@ static int find_next_best_node(int node,
+ 		val += (n < node);
+ 
+ 		/* Give preference to headless and unused nodes */
+-		node_to_cpumask_ptr_next(tmp, n);
+-		if (!cpus_empty(*tmp))
++		tmp = cpumask_of_node(n);
++		if (!cpumask_empty(tmp))
+ 			val += PENALTY_FOR_NODE_WITH_CPUS;
+ 
+ 		/* Slight preference for less loaded node */
+@@ -2814,12 +2952,27 @@ static inline void free_zone_pagesets(in
+ 	struct zone *zone;
+ 
+ 	for_each_zone(zone) {
+-		struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
++		unsigned long flags;
++		struct per_cpu_pageset *pset;
++
++		/*
++		 * On PREEMPT_RT the allocator is preemptible, therefore
++		 * kstopmachine can preempt a process in the middle of an
++		 * allocation, freeing the pset underneath such a process
++		 * isn't a good idea.
++		 *
++		 * Take the per-cpu pcp lock to allow the task to complete
++		 * before we free it. New tasks will be held off by the
++		 * cpu_online() check in get_cpu_var_locked().
++		 */
++		__lock_cpu_pcp(&flags, cpu);
++		pset = zone_pcp(zone, cpu);
++		zone_pcp(zone, cpu) = NULL;
++		unlock_cpu_pcp(flags, cpu);
+ 
+ 		/* Free per_cpu_pageset if it is slab allocated */
+ 		if (pset != &boot_pageset[cpu])
+ 			kfree(pset);
+-		zone_pcp(zone, cpu) = NULL;
+ 	}
+ }
+ 
+Index: linux-2.6-tip/mm/percpu.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/mm/percpu.c
+@@ -0,0 +1,1326 @@
++/*
++ * linux/mm/percpu.c - percpu memory allocator
++ *
++ * Copyright (C) 2009		SUSE Linux Products GmbH
++ * Copyright (C) 2009		Tejun Heo <tj@kernel.org>
++ *
++ * This file is released under the GPLv2.
++ *
++ * This is percpu allocator which can handle both static and dynamic
++ * areas.  Percpu areas are allocated in chunks in vmalloc area.  Each
++ * chunk is consisted of num_possible_cpus() units and the first chunk
++ * is used for static percpu variables in the kernel image (special
++ * boot time alloc/init handling necessary as these areas need to be
++ * brought up before allocation services are running).  Unit grows as
++ * necessary and all units grow or shrink in unison.  When a chunk is
++ * filled up, another chunk is allocated.  ie. in vmalloc area
++ *
++ *  c0                           c1                         c2
++ *  -------------------          -------------------        ------------
++ * | u0 | u1 | u2 | u3 |        | u0 | u1 | u2 | u3 |      | u0 | u1 | u
++ *  -------------------  ......  -------------------  ....  ------------
++ *
++ * Allocation is done in offset-size areas of single unit space.  Ie,
++ * an area of 512 bytes at 6k in c1 occupies 512 bytes at 6k of c1:u0,
++ * c1:u1, c1:u2 and c1:u3.  Percpu access can be done by configuring
++ * percpu base registers UNIT_SIZE apart.
++ *
++ * There are usually many small percpu allocations many of them as
++ * small as 4 bytes.  The allocator organizes chunks into lists
++ * according to free size and tries to allocate from the fullest one.
++ * Each chunk keeps the maximum contiguous area size hint which is
++ * guaranteed to be eqaul to or larger than the maximum contiguous
++ * area in the chunk.  This helps the allocator not to iterate the
++ * chunk maps unnecessarily.
++ *
++ * Allocation state in each chunk is kept using an array of integers
++ * on chunk->map.  A positive value in the map represents a free
++ * region and negative allocated.  Allocation inside a chunk is done
++ * by scanning this map sequentially and serving the first matching
++ * entry.  This is mostly copied from the percpu_modalloc() allocator.
++ * Chunks are also linked into a rb tree to ease address to chunk
++ * mapping during free.
++ *
++ * To use this allocator, arch code should do the followings.
++ *
++ * - define CONFIG_HAVE_DYNAMIC_PER_CPU_AREA
++ *
++ * - define __addr_to_pcpu_ptr() and __pcpu_ptr_to_addr() to translate
++ *   regular address to percpu pointer and back if they need to be
++ *   different from the default
++ *
++ * - use pcpu_setup_first_chunk() during percpu area initialization to
++ *   setup the first chunk containing the kernel static percpu area
++ */
++
++#include <linux/bitmap.h>
++#include <linux/bootmem.h>
++#include <linux/list.h>
++#include <linux/mm.h>
++#include <linux/module.h>
++#include <linux/mutex.h>
++#include <linux/percpu.h>
++#include <linux/pfn.h>
++#include <linux/rbtree.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++#include <linux/vmalloc.h>
++#include <linux/workqueue.h>
++
++#include <asm/cacheflush.h>
++#include <asm/sections.h>
++#include <asm/tlbflush.h>
++
++#define PCPU_SLOT_BASE_SHIFT		5	/* 1-31 shares the same slot */
++#define PCPU_DFL_MAP_ALLOC		16	/* start a map with 16 ents */
++
++/* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
++#ifndef __addr_to_pcpu_ptr
++#define __addr_to_pcpu_ptr(addr)					\
++	(void *)((unsigned long)(addr) - (unsigned long)pcpu_base_addr	\
++		 + (unsigned long)__per_cpu_start)
++#endif
++#ifndef __pcpu_ptr_to_addr
++#define __pcpu_ptr_to_addr(ptr)						\
++	(void *)((unsigned long)(ptr) + (unsigned long)pcpu_base_addr	\
++		 - (unsigned long)__per_cpu_start)
++#endif
++
++struct pcpu_chunk {
++	struct list_head	list;		/* linked to pcpu_slot lists */
++	struct rb_node		rb_node;	/* key is chunk->vm->addr */
++	int			free_size;	/* free bytes in the chunk */
++	int			contig_hint;	/* max contiguous size hint */
++	struct vm_struct	*vm;		/* mapped vmalloc region */
++	int			map_used;	/* # of map entries used */
++	int			map_alloc;	/* # of map entries allocated */
++	int			*map;		/* allocation map */
++	bool			immutable;	/* no [de]population allowed */
++	struct page		**page;		/* points to page array */
++	struct page		*page_ar[];	/* #cpus * UNIT_PAGES */
++};
++
++static int pcpu_unit_pages __read_mostly;
++static int pcpu_unit_size __read_mostly;
++static int pcpu_chunk_size __read_mostly;
++static int pcpu_nr_slots __read_mostly;
++static size_t pcpu_chunk_struct_size __read_mostly;
++
++/* the address of the first chunk which starts with the kernel static area */
++void *pcpu_base_addr __read_mostly;
++EXPORT_SYMBOL_GPL(pcpu_base_addr);
++
++/* optional reserved chunk, only accessible for reserved allocations */
++static struct pcpu_chunk *pcpu_reserved_chunk;
++/* offset limit of the reserved chunk */
++static int pcpu_reserved_chunk_limit;
++
++/*
++ * Synchronization rules.
++ *
++ * There are two locks - pcpu_alloc_mutex and pcpu_lock.  The former
++ * protects allocation/reclaim paths, chunks and chunk->page arrays.
++ * The latter is a spinlock and protects the index data structures -
++ * chunk slots, rbtree, chunks and area maps in chunks.
++ *
++ * During allocation, pcpu_alloc_mutex is kept locked all the time and
++ * pcpu_lock is grabbed and released as necessary.  All actual memory
++ * allocations are done using GFP_KERNEL with pcpu_lock released.
++ *
++ * Free path accesses and alters only the index data structures, so it
++ * can be safely called from atomic context.  When memory needs to be
++ * returned to the system, free path schedules reclaim_work which
++ * grabs both pcpu_alloc_mutex and pcpu_lock, unlinks chunks to be
++ * reclaimed, release both locks and frees the chunks.  Note that it's
++ * necessary to grab both locks to remove a chunk from circulation as
++ * allocation path might be referencing the chunk with only
++ * pcpu_alloc_mutex locked.
++ */
++static DEFINE_MUTEX(pcpu_alloc_mutex);	/* protects whole alloc and reclaim */
++static DEFINE_SPINLOCK(pcpu_lock);	/* protects index data structures */
++
++static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
++static struct rb_root pcpu_addr_root = RB_ROOT;	/* chunks by address */
++
++/* reclaim work to release fully free chunks, scheduled from free path */
++static void pcpu_reclaim(struct work_struct *work);
++static DECLARE_WORK(pcpu_reclaim_work, pcpu_reclaim);
++
++static int __pcpu_size_to_slot(int size)
++{
++	int highbit = fls(size);	/* size is in bytes */
++	return max(highbit - PCPU_SLOT_BASE_SHIFT + 2, 1);
++}
++
++static int pcpu_size_to_slot(int size)
++{
++	if (size == pcpu_unit_size)
++		return pcpu_nr_slots - 1;
++	return __pcpu_size_to_slot(size);
++}
++
++static int pcpu_chunk_slot(const struct pcpu_chunk *chunk)
++{
++	if (chunk->free_size < sizeof(int) || chunk->contig_hint < sizeof(int))
++		return 0;
++
++	return pcpu_size_to_slot(chunk->free_size);
++}
++
++static int pcpu_page_idx(unsigned int cpu, int page_idx)
++{
++	return cpu * pcpu_unit_pages + page_idx;
++}
++
++static struct page **pcpu_chunk_pagep(struct pcpu_chunk *chunk,
++				      unsigned int cpu, int page_idx)
++{
++	return &chunk->page[pcpu_page_idx(cpu, page_idx)];
++}
++
++static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
++				     unsigned int cpu, int page_idx)
++{
++	return (unsigned long)chunk->vm->addr +
++		(pcpu_page_idx(cpu, page_idx) << PAGE_SHIFT);
++}
++
++static bool pcpu_chunk_page_occupied(struct pcpu_chunk *chunk,
++				     int page_idx)
++{
++	return *pcpu_chunk_pagep(chunk, 0, page_idx) != NULL;
++}
++
++/**
++ * pcpu_mem_alloc - allocate memory
++ * @size: bytes to allocate
++ *
++ * Allocate @size bytes.  If @size is smaller than PAGE_SIZE,
++ * kzalloc() is used; otherwise, vmalloc() is used.  The returned
++ * memory is always zeroed.
++ *
++ * CONTEXT:
++ * Does GFP_KERNEL allocation.
++ *
++ * RETURNS:
++ * Pointer to the allocated area on success, NULL on failure.
++ */
++static void *pcpu_mem_alloc(size_t size)
++{
++	if (size <= PAGE_SIZE)
++		return kzalloc(size, GFP_KERNEL);
++	else {
++		void *ptr = vmalloc(size);
++		if (ptr)
++			memset(ptr, 0, size);
++		return ptr;
++	}
++}
++
++/**
++ * pcpu_mem_free - free memory
++ * @ptr: memory to free
++ * @size: size of the area
++ *
++ * Free @ptr.  @ptr should have been allocated using pcpu_mem_alloc().
++ */
++static void pcpu_mem_free(void *ptr, size_t size)
++{
++	if (size <= PAGE_SIZE)
++		kfree(ptr);
++	else
++		vfree(ptr);
++}
++
++/**
++ * pcpu_chunk_relocate - put chunk in the appropriate chunk slot
++ * @chunk: chunk of interest
++ * @oslot: the previous slot it was on
++ *
++ * This function is called after an allocation or free changed @chunk.
++ * New slot according to the changed state is determined and @chunk is
++ * moved to the slot.  Note that the reserved chunk is never put on
++ * chunk slots.
++ *
++ * CONTEXT:
++ * pcpu_lock.
++ */
++static void pcpu_chunk_relocate(struct pcpu_chunk *chunk, int oslot)
++{
++	int nslot = pcpu_chunk_slot(chunk);
++
++	if (chunk != pcpu_reserved_chunk && oslot != nslot) {
++		if (oslot < nslot)
++			list_move(&chunk->list, &pcpu_slot[nslot]);
++		else
++			list_move_tail(&chunk->list, &pcpu_slot[nslot]);
++	}
++}
++
++static struct rb_node **pcpu_chunk_rb_search(void *addr,
++					     struct rb_node **parentp)
++{
++	struct rb_node **p = &pcpu_addr_root.rb_node;
++	struct rb_node *parent = NULL;
++	struct pcpu_chunk *chunk;
++
++	while (*p) {
++		parent = *p;
++		chunk = rb_entry(parent, struct pcpu_chunk, rb_node);
++
++		if (addr < chunk->vm->addr)
++			p = &(*p)->rb_left;
++		else if (addr > chunk->vm->addr)
++			p = &(*p)->rb_right;
++		else
++			break;
++	}
++
++	if (parentp)
++		*parentp = parent;
++	return p;
++}
++
++/**
++ * pcpu_chunk_addr_search - search for chunk containing specified address
++ * @addr: address to search for
++ *
++ * Look for chunk which might contain @addr.  More specifically, it
++ * searchs for the chunk with the highest start address which isn't
++ * beyond @addr.
++ *
++ * CONTEXT:
++ * pcpu_lock.
++ *
++ * RETURNS:
++ * The address of the found chunk.
++ */
++static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
++{
++	struct rb_node *n, *parent;
++	struct pcpu_chunk *chunk;
++
++	/* is it in the reserved chunk? */
++	if (pcpu_reserved_chunk) {
++		void *start = pcpu_reserved_chunk->vm->addr;
++
++		if (addr >= start && addr < start + pcpu_reserved_chunk_limit)
++			return pcpu_reserved_chunk;
++	}
++
++	/* nah... search the regular ones */
++	n = *pcpu_chunk_rb_search(addr, &parent);
++	if (!n) {
++		/* no exactly matching chunk, the parent is the closest */
++		n = parent;
++		BUG_ON(!n);
++	}
++	chunk = rb_entry(n, struct pcpu_chunk, rb_node);
++
++	if (addr < chunk->vm->addr) {
++		/* the parent was the next one, look for the previous one */
++		n = rb_prev(n);
++		BUG_ON(!n);
++		chunk = rb_entry(n, struct pcpu_chunk, rb_node);
++	}
++
++	return chunk;
++}
++
++/**
++ * pcpu_chunk_addr_insert - insert chunk into address rb tree
++ * @new: chunk to insert
++ *
++ * Insert @new into address rb tree.
++ *
++ * CONTEXT:
++ * pcpu_lock.
++ */
++static void pcpu_chunk_addr_insert(struct pcpu_chunk *new)
++{
++	struct rb_node **p, *parent;
++
++	p = pcpu_chunk_rb_search(new->vm->addr, &parent);
++	BUG_ON(*p);
++	rb_link_node(&new->rb_node, parent, p);
++	rb_insert_color(&new->rb_node, &pcpu_addr_root);
++}
++
++/**
++ * pcpu_extend_area_map - extend area map for allocation
++ * @chunk: target chunk
++ *
++ * Extend area map of @chunk so that it can accomodate an allocation.
++ * A single allocation can split an area into three areas, so this
++ * function makes sure that @chunk->map has at least two extra slots.
++ *
++ * CONTEXT:
++ * pcpu_alloc_mutex, pcpu_lock.  pcpu_lock is released and reacquired
++ * if area map is extended.
++ *
++ * RETURNS:
++ * 0 if noop, 1 if successfully extended, -errno on failure.
++ */
++static int pcpu_extend_area_map(struct pcpu_chunk *chunk)
++{
++	int new_alloc;
++	int *new;
++	size_t size;
++
++	/* has enough? */
++	if (chunk->map_alloc >= chunk->map_used + 2)
++		return 0;
++
++	spin_unlock_irq(&pcpu_lock);
++
++	new_alloc = PCPU_DFL_MAP_ALLOC;
++	while (new_alloc < chunk->map_used + 2)
++		new_alloc *= 2;
++
++	new = pcpu_mem_alloc(new_alloc * sizeof(new[0]));
++	if (!new) {
++		spin_lock_irq(&pcpu_lock);
++		return -ENOMEM;
++	}
++
++	/*
++	 * Acquire pcpu_lock and switch to new area map.  Only free
++	 * could have happened inbetween, so map_used couldn't have
++	 * grown.
++	 */
++	spin_lock_irq(&pcpu_lock);
++	BUG_ON(new_alloc < chunk->map_used + 2);
++
++	size = chunk->map_alloc * sizeof(chunk->map[0]);
++	memcpy(new, chunk->map, size);
++
++	/*
++	 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
++	 * one of the first chunks and still using static map.
++	 */
++	if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
++		pcpu_mem_free(chunk->map, size);
++
++	chunk->map_alloc = new_alloc;
++	chunk->map = new;
++	return 0;
++}
++
++/**
++ * pcpu_split_block - split a map block
++ * @chunk: chunk of interest
++ * @i: index of map block to split
++ * @head: head size in bytes (can be 0)
++ * @tail: tail size in bytes (can be 0)
++ *
++ * Split the @i'th map block into two or three blocks.  If @head is
++ * non-zero, @head bytes block is inserted before block @i moving it
++ * to @i+1 and reducing its size by @head bytes.
++ *
++ * If @tail is non-zero, the target block, which can be @i or @i+1
++ * depending on @head, is reduced by @tail bytes and @tail byte block
++ * is inserted after the target block.
++ *
++ * @chunk->map must have enough free slots to accomodate the split.
++ *
++ * CONTEXT:
++ * pcpu_lock.
++ */
++static void pcpu_split_block(struct pcpu_chunk *chunk, int i,
++			     int head, int tail)
++{
++	int nr_extra = !!head + !!tail;
++
++	BUG_ON(chunk->map_alloc < chunk->map_used + nr_extra);
++
++	/* insert new subblocks */
++	memmove(&chunk->map[i + nr_extra], &chunk->map[i],
++		sizeof(chunk->map[0]) * (chunk->map_used - i));
++	chunk->map_used += nr_extra;
++
++	if (head) {
++		chunk->map[i + 1] = chunk->map[i] - head;
++		chunk->map[i++] = head;
++	}
++	if (tail) {
++		chunk->map[i++] -= tail;
++		chunk->map[i] = tail;
++	}
++}
++
++/**
++ * pcpu_alloc_area - allocate area from a pcpu_chunk
++ * @chunk: chunk of interest
++ * @size: wanted size in bytes
++ * @align: wanted align
++ *
++ * Try to allocate @size bytes area aligned at @align from @chunk.
++ * Note that this function only allocates the offset.  It doesn't
++ * populate or map the area.
++ *
++ * @chunk->map must have at least two free slots.
++ *
++ * CONTEXT:
++ * pcpu_lock.
++ *
++ * RETURNS:
++ * Allocated offset in @chunk on success, -1 if no matching area is
++ * found.
++ */
++static int pcpu_alloc_area(struct pcpu_chunk *chunk, int size, int align)
++{
++	int oslot = pcpu_chunk_slot(chunk);
++	int max_contig = 0;
++	int i, off;
++
++	for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++])) {
++		bool is_last = i + 1 == chunk->map_used;
++		int head, tail;
++
++		/* extra for alignment requirement */
++		head = ALIGN(off, align) - off;
++		BUG_ON(i == 0 && head != 0);
++
++		if (chunk->map[i] < 0)
++			continue;
++		if (chunk->map[i] < head + size) {
++			max_contig = max(chunk->map[i], max_contig);
++			continue;
++		}
++
++		/*
++		 * If head is small or the previous block is free,
++		 * merge'em.  Note that 'small' is defined as smaller
++		 * than sizeof(int), which is very small but isn't too
++		 * uncommon for percpu allocations.
++		 */
++		if (head && (head < sizeof(int) || chunk->map[i - 1] > 0)) {
++			if (chunk->map[i - 1] > 0)
++				chunk->map[i - 1] += head;
++			else {
++				chunk->map[i - 1] -= head;
++				chunk->free_size -= head;
++			}
++			chunk->map[i] -= head;
++			off += head;
++			head = 0;
++		}
++
++		/* if tail is small, just keep it around */
++		tail = chunk->map[i] - head - size;
++		if (tail < sizeof(int))
++			tail = 0;
++
++		/* split if warranted */
++		if (head || tail) {
++			pcpu_split_block(chunk, i, head, tail);
++			if (head) {
++				i++;
++				off += head;
++				max_contig = max(chunk->map[i - 1], max_contig);
++			}
++			if (tail)
++				max_contig = max(chunk->map[i + 1], max_contig);
++		}
++
++		/* update hint and mark allocated */
++		if (is_last)
++			chunk->contig_hint = max_contig; /* fully scanned */
++		else
++			chunk->contig_hint = max(chunk->contig_hint,
++						 max_contig);
++
++		chunk->free_size -= chunk->map[i];
++		chunk->map[i] = -chunk->map[i];
++
++		pcpu_chunk_relocate(chunk, oslot);
++		return off;
++	}
++
++	chunk->contig_hint = max_contig;	/* fully scanned */
++	pcpu_chunk_relocate(chunk, oslot);
++
++	/* tell the upper layer that this chunk has no matching area */
++	return -1;
++}
++
++/**
++ * pcpu_free_area - free area to a pcpu_chunk
++ * @chunk: chunk of interest
++ * @freeme: offset of area to free
++ *
++ * Free area starting from @freeme to @chunk.  Note that this function
++ * only modifies the allocation map.  It doesn't depopulate or unmap
++ * the area.
++ *
++ * CONTEXT:
++ * pcpu_lock.
++ */
++static void pcpu_free_area(struct pcpu_chunk *chunk, int freeme)
++{
++	int oslot = pcpu_chunk_slot(chunk);
++	int i, off;
++
++	for (i = 0, off = 0; i < chunk->map_used; off += abs(chunk->map[i++]))
++		if (off == freeme)
++			break;
++	BUG_ON(off != freeme);
++	BUG_ON(chunk->map[i] > 0);
++
++	chunk->map[i] = -chunk->map[i];
++	chunk->free_size += chunk->map[i];
++
++	/* merge with previous? */
++	if (i > 0 && chunk->map[i - 1] >= 0) {
++		chunk->map[i - 1] += chunk->map[i];
++		chunk->map_used--;
++		memmove(&chunk->map[i], &chunk->map[i + 1],
++			(chunk->map_used - i) * sizeof(chunk->map[0]));
++		i--;
++	}
++	/* merge with next? */
++	if (i + 1 < chunk->map_used && chunk->map[i + 1] >= 0) {
++		chunk->map[i] += chunk->map[i + 1];
++		chunk->map_used--;
++		memmove(&chunk->map[i + 1], &chunk->map[i + 2],
++			(chunk->map_used - (i + 1)) * sizeof(chunk->map[0]));
++	}
++
++	chunk->contig_hint = max(chunk->map[i], chunk->contig_hint);
++	pcpu_chunk_relocate(chunk, oslot);
++}
++
++/**
++ * pcpu_unmap - unmap pages out of a pcpu_chunk
++ * @chunk: chunk of interest
++ * @page_start: page index of the first page to unmap
++ * @page_end: page index of the last page to unmap + 1
++ * @flush: whether to flush cache and tlb or not
++ *
++ * For each cpu, unmap pages [@page_start,@page_end) out of @chunk.
++ * If @flush is true, vcache is flushed before unmapping and tlb
++ * after.
++ */
++static void pcpu_unmap(struct pcpu_chunk *chunk, int page_start, int page_end,
++		       bool flush)
++{
++	unsigned int last = num_possible_cpus() - 1;
++	unsigned int cpu;
++
++	/* unmap must not be done on immutable chunk */
++	WARN_ON(chunk->immutable);
++
++	/*
++	 * Each flushing trial can be very expensive, issue flush on
++	 * the whole region at once rather than doing it for each cpu.
++	 * This could be an overkill but is more scalable.
++	 */
++	if (flush)
++		flush_cache_vunmap(pcpu_chunk_addr(chunk, 0, page_start),
++				   pcpu_chunk_addr(chunk, last, page_end));
++
++	for_each_possible_cpu(cpu)
++		unmap_kernel_range_noflush(
++				pcpu_chunk_addr(chunk, cpu, page_start),
++				(page_end - page_start) << PAGE_SHIFT);
++
++	/* ditto as flush_cache_vunmap() */
++	if (flush)
++		flush_tlb_kernel_range(pcpu_chunk_addr(chunk, 0, page_start),
++				       pcpu_chunk_addr(chunk, last, page_end));
++}
++
++/**
++ * pcpu_depopulate_chunk - depopulate and unmap an area of a pcpu_chunk
++ * @chunk: chunk to depopulate
++ * @off: offset to the area to depopulate
++ * @size: size of the area to depopulate in bytes
++ * @flush: whether to flush cache and tlb or not
++ *
++ * For each cpu, depopulate and unmap pages [@page_start,@page_end)
++ * from @chunk.  If @flush is true, vcache is flushed before unmapping
++ * and tlb after.
++ *
++ * CONTEXT:
++ * pcpu_alloc_mutex.
++ */
++static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size,
++				  bool flush)
++{
++	int page_start = PFN_DOWN(off);
++	int page_end = PFN_UP(off + size);
++	int unmap_start = -1;
++	int uninitialized_var(unmap_end);
++	unsigned int cpu;
++	int i;
++
++	for (i = page_start; i < page_end; i++) {
++		for_each_possible_cpu(cpu) {
++			struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i);
++
++			if (!*pagep)
++				continue;
++
++			__free_page(*pagep);
++
++			/*
++			 * If it's partial depopulation, it might get
++			 * populated or depopulated again.  Mark the
++			 * page gone.
++			 */
++			*pagep = NULL;
++
++			unmap_start = unmap_start < 0 ? i : unmap_start;
++			unmap_end = i + 1;
++		}
++	}
++
++	if (unmap_start >= 0)
++		pcpu_unmap(chunk, unmap_start, unmap_end, flush);
++}
++
++/**
++ * pcpu_map - map pages into a pcpu_chunk
++ * @chunk: chunk of interest
++ * @page_start: page index of the first page to map
++ * @page_end: page index of the last page to map + 1
++ *
++ * For each cpu, map pages [@page_start,@page_end) into @chunk.
++ * vcache is flushed afterwards.
++ */
++static int pcpu_map(struct pcpu_chunk *chunk, int page_start, int page_end)
++{
++	unsigned int last = num_possible_cpus() - 1;
++	unsigned int cpu;
++	int err;
++
++	/* map must not be done on immutable chunk */
++	WARN_ON(chunk->immutable);
++
++	for_each_possible_cpu(cpu) {
++		err = map_kernel_range_noflush(
++				pcpu_chunk_addr(chunk, cpu, page_start),
++				(page_end - page_start) << PAGE_SHIFT,
++				PAGE_KERNEL,
++				pcpu_chunk_pagep(chunk, cpu, page_start));
++		if (err < 0)
++			return err;
++	}
++
++	/* flush at once, please read comments in pcpu_unmap() */
++	flush_cache_vmap(pcpu_chunk_addr(chunk, 0, page_start),
++			 pcpu_chunk_addr(chunk, last, page_end));
++	return 0;
++}
++
++/**
++ * pcpu_populate_chunk - populate and map an area of a pcpu_chunk
++ * @chunk: chunk of interest
++ * @off: offset to the area to populate
++ * @size: size of the area to populate in bytes
++ *
++ * For each cpu, populate and map pages [@page_start,@page_end) into
++ * @chunk.  The area is cleared on return.
++ *
++ * CONTEXT:
++ * pcpu_alloc_mutex, does GFP_KERNEL allocation.
++ */
++static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size)
++{
++	const gfp_t alloc_mask = GFP_KERNEL | __GFP_HIGHMEM | __GFP_COLD;
++	int page_start = PFN_DOWN(off);
++	int page_end = PFN_UP(off + size);
++	int map_start = -1;
++	int uninitialized_var(map_end);
++	unsigned int cpu;
++	int i;
++
++	for (i = page_start; i < page_end; i++) {
++		if (pcpu_chunk_page_occupied(chunk, i)) {
++			if (map_start >= 0) {
++				if (pcpu_map(chunk, map_start, map_end))
++					goto err;
++				map_start = -1;
++			}
++			continue;
++		}
++
++		map_start = map_start < 0 ? i : map_start;
++		map_end = i + 1;
++
++		for_each_possible_cpu(cpu) {
++			struct page **pagep = pcpu_chunk_pagep(chunk, cpu, i);
++
++			*pagep = alloc_pages_node(cpu_to_node(cpu),
++						  alloc_mask, 0);
++			if (!*pagep)
++				goto err;
++		}
++	}
++
++	if (map_start >= 0 && pcpu_map(chunk, map_start, map_end))
++		goto err;
++
++	for_each_possible_cpu(cpu)
++		memset(chunk->vm->addr + cpu * pcpu_unit_size + off, 0,
++		       size);
++
++	return 0;
++err:
++	/* likely under heavy memory pressure, give memory back */
++	pcpu_depopulate_chunk(chunk, off, size, true);
++	return -ENOMEM;
++}
++
++static void free_pcpu_chunk(struct pcpu_chunk *chunk)
++{
++	if (!chunk)
++		return;
++	if (chunk->vm)
++		free_vm_area(chunk->vm);
++	pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0]));
++	kfree(chunk);
++}
++
++static struct pcpu_chunk *alloc_pcpu_chunk(void)
++{
++	struct pcpu_chunk *chunk;
++
++	chunk = kzalloc(pcpu_chunk_struct_size, GFP_KERNEL);
++	if (!chunk)
++		return NULL;
++
++	chunk->map = pcpu_mem_alloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0]));
++	chunk->map_alloc = PCPU_DFL_MAP_ALLOC;
++	chunk->map[chunk->map_used++] = pcpu_unit_size;
++	chunk->page = chunk->page_ar;
++
++	chunk->vm = get_vm_area(pcpu_chunk_size, GFP_KERNEL);
++	if (!chunk->vm) {
++		free_pcpu_chunk(chunk);
++		return NULL;
++	}
++
++	INIT_LIST_HEAD(&chunk->list);
++	chunk->free_size = pcpu_unit_size;
++	chunk->contig_hint = pcpu_unit_size;
++
++	return chunk;
++}
++
++/**
++ * pcpu_alloc - the percpu allocator
++ * @size: size of area to allocate in bytes
++ * @align: alignment of area (max PAGE_SIZE)
++ * @reserved: allocate from the reserved chunk if available
++ *
++ * Allocate percpu area of @size bytes aligned at @align.
++ *
++ * CONTEXT:
++ * Does GFP_KERNEL allocation.
++ *
++ * RETURNS:
++ * Percpu pointer to the allocated area on success, NULL on failure.
++ */
++static void *pcpu_alloc(size_t size, size_t align, bool reserved)
++{
++	struct pcpu_chunk *chunk;
++	int slot, off;
++
++	if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
++		WARN(true, "illegal size (%zu) or align (%zu) for "
++		     "percpu allocation\n", size, align);
++		return NULL;
++	}
++
++	mutex_lock(&pcpu_alloc_mutex);
++	spin_lock_irq(&pcpu_lock);
++
++	/* serve reserved allocations from the reserved chunk if available */
++	if (reserved && pcpu_reserved_chunk) {
++		chunk = pcpu_reserved_chunk;
++		if (size > chunk->contig_hint ||
++		    pcpu_extend_area_map(chunk) < 0)
++			goto fail_unlock;
++		off = pcpu_alloc_area(chunk, size, align);
++		if (off >= 0)
++			goto area_found;
++		goto fail_unlock;
++	}
++
++restart:
++	/* search through normal chunks */
++	for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) {
++		list_for_each_entry(chunk, &pcpu_slot[slot], list) {
++			if (size > chunk->contig_hint)
++				continue;
++
++			switch (pcpu_extend_area_map(chunk)) {
++			case 0:
++				break;
++			case 1:
++				goto restart;	/* pcpu_lock dropped, restart */
++			default:
++				goto fail_unlock;
++			}
++
++			off = pcpu_alloc_area(chunk, size, align);
++			if (off >= 0)
++				goto area_found;
++		}
++	}
++
++	/* hmmm... no space left, create a new chunk */
++	spin_unlock_irq(&pcpu_lock);
++
++	chunk = alloc_pcpu_chunk();
++	if (!chunk)
++		goto fail_unlock_mutex;
++
++	spin_lock_irq(&pcpu_lock);
++	pcpu_chunk_relocate(chunk, -1);
++	pcpu_chunk_addr_insert(chunk);
++	goto restart;
++
++area_found:
++	spin_unlock_irq(&pcpu_lock);
++
++	/* populate, map and clear the area */
++	if (pcpu_populate_chunk(chunk, off, size)) {
++		spin_lock_irq(&pcpu_lock);
++		pcpu_free_area(chunk, off);
++		goto fail_unlock;
++	}
++
++	mutex_unlock(&pcpu_alloc_mutex);
++
++	return __addr_to_pcpu_ptr(chunk->vm->addr + off);
++
++fail_unlock:
++	spin_unlock_irq(&pcpu_lock);
++fail_unlock_mutex:
++	mutex_unlock(&pcpu_alloc_mutex);
++	return NULL;
++}
++
++/**
++ * __alloc_percpu - allocate dynamic percpu area
++ * @size: size of area to allocate in bytes
++ * @align: alignment of area (max PAGE_SIZE)
++ *
++ * Allocate percpu area of @size bytes aligned at @align.  Might
++ * sleep.  Might trigger writeouts.
++ *
++ * CONTEXT:
++ * Does GFP_KERNEL allocation.
++ *
++ * RETURNS:
++ * Percpu pointer to the allocated area on success, NULL on failure.
++ */
++void *__alloc_percpu(size_t size, size_t align)
++{
++	return pcpu_alloc(size, align, false);
++}
++EXPORT_SYMBOL_GPL(__alloc_percpu);
++
++/**
++ * __alloc_reserved_percpu - allocate reserved percpu area
++ * @size: size of area to allocate in bytes
++ * @align: alignment of area (max PAGE_SIZE)
++ *
++ * Allocate percpu area of @size bytes aligned at @align from reserved
++ * percpu area if arch has set it up; otherwise, allocation is served
++ * from the same dynamic area.  Might sleep.  Might trigger writeouts.
++ *
++ * CONTEXT:
++ * Does GFP_KERNEL allocation.
++ *
++ * RETURNS:
++ * Percpu pointer to the allocated area on success, NULL on failure.
++ */
++void *__alloc_reserved_percpu(size_t size, size_t align)
++{
++	return pcpu_alloc(size, align, true);
++}
++
++/**
++ * pcpu_reclaim - reclaim fully free chunks, workqueue function
++ * @work: unused
++ *
++ * Reclaim all fully free chunks except for the first one.
++ *
++ * CONTEXT:
++ * workqueue context.
++ */
++static void pcpu_reclaim(struct work_struct *work)
++{
++	LIST_HEAD(todo);
++	struct list_head *head = &pcpu_slot[pcpu_nr_slots - 1];
++	struct pcpu_chunk *chunk, *next;
++
++	mutex_lock(&pcpu_alloc_mutex);
++	spin_lock_irq(&pcpu_lock);
++
++	list_for_each_entry_safe(chunk, next, head, list) {
++		WARN_ON(chunk->immutable);
++
++		/* spare the first one */
++		if (chunk == list_first_entry(head, struct pcpu_chunk, list))
++			continue;
++
++		rb_erase(&chunk->rb_node, &pcpu_addr_root);
++		list_move(&chunk->list, &todo);
++	}
++
++	spin_unlock_irq(&pcpu_lock);
++	mutex_unlock(&pcpu_alloc_mutex);
++
++	list_for_each_entry_safe(chunk, next, &todo, list) {
++		pcpu_depopulate_chunk(chunk, 0, pcpu_unit_size, false);
++		free_pcpu_chunk(chunk);
++	}
++}
++
++/**
++ * free_percpu - free percpu area
++ * @ptr: pointer to area to free
++ *
++ * Free percpu area @ptr.
++ *
++ * CONTEXT:
++ * Can be called from atomic context.
++ */
++void free_percpu(void *ptr)
++{
++	void *addr = __pcpu_ptr_to_addr(ptr);
++	struct pcpu_chunk *chunk;
++	unsigned long flags;
++	int off;
++
++	if (!ptr)
++		return;
++
++	spin_lock_irqsave(&pcpu_lock, flags);
++
++	chunk = pcpu_chunk_addr_search(addr);
++	off = addr - chunk->vm->addr;
++
++	pcpu_free_area(chunk, off);
++
++	/* if there are more than one fully free chunks, wake up grim reaper */
++	if (chunk->free_size == pcpu_unit_size) {
++		struct pcpu_chunk *pos;
++
++		list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
++			if (pos != chunk) {
++				schedule_work(&pcpu_reclaim_work);
++				break;
++			}
++	}
++
++	spin_unlock_irqrestore(&pcpu_lock, flags);
++}
++EXPORT_SYMBOL_GPL(free_percpu);
++
++/**
++ * pcpu_setup_first_chunk - initialize the first percpu chunk
++ * @get_page_fn: callback to fetch page pointer
++ * @static_size: the size of static percpu area in bytes
++ * @reserved_size: the size of reserved percpu area in bytes
++ * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
++ * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
++ * @base_addr: mapped address, NULL for auto
++ * @populate_pte_fn: callback to allocate pagetable, NULL if unnecessary
++ *
++ * Initialize the first percpu chunk which contains the kernel static
++ * perpcu area.  This function is to be called from arch percpu area
++ * setup path.  The first two parameters are mandatory.  The rest are
++ * optional.
++ *
++ * @get_page_fn() should return pointer to percpu page given cpu
++ * number and page number.  It should at least return enough pages to
++ * cover the static area.  The returned pages for static area should
++ * have been initialized with valid data.  If @unit_size is specified,
++ * it can also return pages after the static area.  NULL return
++ * indicates end of pages for the cpu.  Note that @get_page_fn() must
++ * return the same number of pages for all cpus.
++ *
++ * @reserved_size, if non-zero, specifies the amount of bytes to
++ * reserve after the static area in the first chunk.  This reserves
++ * the first chunk such that it's available only through reserved
++ * percpu allocation.  This is primarily used to serve module percpu
++ * static areas on architectures where the addressing model has
++ * limited offset range for symbol relocations to guarantee module
++ * percpu symbols fall inside the relocatable range.
++ *
++ * @dyn_size, if non-negative, determines the number of bytes
++ * available for dynamic allocation in the first chunk.  Specifying
++ * non-negative value makes percpu leave alone the area beyond
++ * @static_size + @reserved_size + @dyn_size.
++ *
++ * @unit_size, if non-negative, specifies unit size and must be
++ * aligned to PAGE_SIZE and equal to or larger than @static_size +
++ * @reserved_size + if non-negative, @dyn_size.
++ *
++ * Non-null @base_addr means that the caller already allocated virtual
++ * region for the first chunk and mapped it.  percpu must not mess
++ * with the chunk.  Note that @base_addr with 0 @unit_size or non-NULL
++ * @populate_pte_fn doesn't make any sense.
++ *
++ * @populate_pte_fn is used to populate the pagetable.  NULL means the
++ * caller already populated the pagetable.
++ *
++ * If the first chunk ends up with both reserved and dynamic areas, it
++ * is served by two chunks - one to serve the core static and reserved
++ * areas and the other for the dynamic area.  They share the same vm
++ * and page map but uses different area allocation map to stay away
++ * from each other.  The latter chunk is circulated in the chunk slots
++ * and available for dynamic allocation like any other chunks.
++ *
++ * RETURNS:
++ * The determined pcpu_unit_size which can be used to initialize
++ * percpu access.
++ */
++size_t __init pcpu_setup_first_chunk(pcpu_get_page_fn_t get_page_fn,
++				     size_t static_size, size_t reserved_size,
++				     ssize_t dyn_size, ssize_t unit_size,
++				     void *base_addr,
++				     pcpu_populate_pte_fn_t populate_pte_fn)
++{
++	static struct vm_struct first_vm;
++	static int smap[2], dmap[2];
++	size_t size_sum = static_size + reserved_size +
++			  (dyn_size >= 0 ? dyn_size : 0);
++	struct pcpu_chunk *schunk, *dchunk = NULL;
++	unsigned int cpu;
++	int nr_pages;
++	int err, i;
++
++	/* santiy checks */
++	BUILD_BUG_ON(ARRAY_SIZE(smap) >= PCPU_DFL_MAP_ALLOC ||
++		     ARRAY_SIZE(dmap) >= PCPU_DFL_MAP_ALLOC);
++	BUG_ON(!static_size);
++	if (unit_size >= 0) {
++		BUG_ON(unit_size < size_sum);
++		BUG_ON(unit_size & ~PAGE_MASK);
++		BUG_ON(unit_size < PCPU_MIN_UNIT_SIZE);
++	} else
++		BUG_ON(base_addr);
++	BUG_ON(base_addr && populate_pte_fn);
++
++	if (unit_size >= 0)
++		pcpu_unit_pages = unit_size >> PAGE_SHIFT;
++	else
++		pcpu_unit_pages = max_t(int, PCPU_MIN_UNIT_SIZE >> PAGE_SHIFT,
++					PFN_UP(size_sum));
++
++	pcpu_unit_size = pcpu_unit_pages << PAGE_SHIFT;
++	pcpu_chunk_size = num_possible_cpus() * pcpu_unit_size;
++	pcpu_chunk_struct_size = sizeof(struct pcpu_chunk)
++		+ num_possible_cpus() * pcpu_unit_pages * sizeof(struct page *);
++
++	if (dyn_size < 0)
++		dyn_size = pcpu_unit_size - static_size - reserved_size;
++
++	/*
++	 * Allocate chunk slots.  The additional last slot is for
++	 * empty chunks.
++	 */
++	pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2;
++	pcpu_slot = alloc_bootmem(pcpu_nr_slots * sizeof(pcpu_slot[0]));
++	for (i = 0; i < pcpu_nr_slots; i++)
++		INIT_LIST_HEAD(&pcpu_slot[i]);
++
++	/*
++	 * Initialize static chunk.  If reserved_size is zero, the
++	 * static chunk covers static area + dynamic allocation area
++	 * in the first chunk.  If reserved_size is not zero, it
++	 * covers static area + reserved area (mostly used for module
++	 * static percpu allocation).
++	 */
++	schunk = alloc_bootmem(pcpu_chunk_struct_size);
++	INIT_LIST_HEAD(&schunk->list);
++	schunk->vm = &first_vm;
++	schunk->map = smap;
++	schunk->map_alloc = ARRAY_SIZE(smap);
++	schunk->page = schunk->page_ar;
++
++	if (reserved_size) {
++		schunk->free_size = reserved_size;
++		pcpu_reserved_chunk = schunk;	/* not for dynamic alloc */
++	} else {
++		schunk->free_size = dyn_size;
++		dyn_size = 0;			/* dynamic area covered */
++	}
++	schunk->contig_hint = schunk->free_size;
++
++	schunk->map[schunk->map_used++] = -static_size;
++	if (schunk->free_size)
++		schunk->map[schunk->map_used++] = schunk->free_size;
++
++	pcpu_reserved_chunk_limit = static_size + schunk->free_size;
++
++	/* init dynamic chunk if necessary */
++	if (dyn_size) {
++		dchunk = alloc_bootmem(sizeof(struct pcpu_chunk));
++		INIT_LIST_HEAD(&dchunk->list);
++		dchunk->vm = &first_vm;
++		dchunk->map = dmap;
++		dchunk->map_alloc = ARRAY_SIZE(dmap);
++		dchunk->page = schunk->page_ar;	/* share page map with schunk */
++
++		dchunk->contig_hint = dchunk->free_size = dyn_size;
++		dchunk->map[dchunk->map_used++] = -pcpu_reserved_chunk_limit;
++		dchunk->map[dchunk->map_used++] = dchunk->free_size;
++	}
++
++	/* allocate vm address */
++	first_vm.flags = VM_ALLOC;
++	first_vm.size = pcpu_chunk_size;
++
++	if (!base_addr)
++		vm_area_register_early(&first_vm, PAGE_SIZE);
++	else {
++		/*
++		 * Pages already mapped.  No need to remap into
++		 * vmalloc area.  In this case the first chunks can't
++		 * be mapped or unmapped by percpu and are marked
++		 * immutable.
++		 */
++		first_vm.addr = base_addr;
++		schunk->immutable = true;
++		if (dchunk)
++			dchunk->immutable = true;
++	}
++
++	/* assign pages */
++	nr_pages = -1;
++	for_each_possible_cpu(cpu) {
++		for (i = 0; i < pcpu_unit_pages; i++) {
++			struct page *page = get_page_fn(cpu, i);
++
++			if (!page)
++				break;
++			*pcpu_chunk_pagep(schunk, cpu, i) = page;
++		}
++
++		BUG_ON(i < PFN_UP(static_size));
++
++		if (nr_pages < 0)
++			nr_pages = i;
++		else
++			BUG_ON(nr_pages != i);
++	}
++
++	/* map them */
++	if (populate_pte_fn) {
++		for_each_possible_cpu(cpu)
++			for (i = 0; i < nr_pages; i++)
++				populate_pte_fn(pcpu_chunk_addr(schunk,
++								cpu, i));
++
++		err = pcpu_map(schunk, 0, nr_pages);
++		if (err)
++			panic("failed to setup static percpu area, err=%d\n",
++			      err);
++	}
++
++	/* link the first chunk in */
++	if (!dchunk) {
++		pcpu_chunk_relocate(schunk, -1);
++		pcpu_chunk_addr_insert(schunk);
++	} else {
++		pcpu_chunk_relocate(dchunk, -1);
++		pcpu_chunk_addr_insert(dchunk);
++	}
++
++	/* we're done */
++	pcpu_base_addr = (void *)pcpu_chunk_addr(schunk, 0, 0);
++	return pcpu_unit_size;
++}
++
++/*
++ * Embedding first chunk setup helper.
++ */
++static void *pcpue_ptr __initdata;
++static size_t pcpue_size __initdata;
++static size_t pcpue_unit_size __initdata;
++
++static struct page * __init pcpue_get_page(unsigned int cpu, int pageno)
++{
++	size_t off = (size_t)pageno << PAGE_SHIFT;
++
++	if (off >= pcpue_size)
++		return NULL;
++
++	return virt_to_page(pcpue_ptr + cpu * pcpue_unit_size + off);
++}
++
++/**
++ * pcpu_embed_first_chunk - embed the first percpu chunk into bootmem
++ * @static_size: the size of static percpu area in bytes
++ * @reserved_size: the size of reserved percpu area in bytes
++ * @dyn_size: free size for dynamic allocation in bytes, -1 for auto
++ * @unit_size: unit size in bytes, must be multiple of PAGE_SIZE, -1 for auto
++ *
++ * This is a helper to ease setting up embedded first percpu chunk and
++ * can be called where pcpu_setup_first_chunk() is expected.
++ *
++ * If this function is used to setup the first chunk, it is allocated
++ * as a contiguous area using bootmem allocator and used as-is without
++ * being mapped into vmalloc area.  This enables the first chunk to
++ * piggy back on the linear physical mapping which often uses larger
++ * page size.
++ *
++ * When @dyn_size is positive, dynamic area might be larger than
++ * specified to fill page alignment.  Also, when @dyn_size is auto,
++ * @dyn_size does not fill the whole first chunk but only what's
++ * necessary for page alignment after static and reserved areas.
++ *
++ * If the needed size is smaller than the minimum or specified unit
++ * size, the leftover is returned to the bootmem allocator.
++ *
++ * RETURNS:
++ * The determined pcpu_unit_size which can be used to initialize
++ * percpu access on success, -errno on failure.
++ */
++ssize_t __init pcpu_embed_first_chunk(size_t static_size, size_t reserved_size,
++				      ssize_t dyn_size, ssize_t unit_size)
++{
++	unsigned int cpu;
++
++	/* determine parameters and allocate */
++	pcpue_size = PFN_ALIGN(static_size + reserved_size +
++			       (dyn_size >= 0 ? dyn_size : 0));
++	if (dyn_size != 0)
++		dyn_size = pcpue_size - static_size - reserved_size;
++
++	if (unit_size >= 0) {
++		BUG_ON(unit_size < pcpue_size);
++		pcpue_unit_size = unit_size;
++	} else
++		pcpue_unit_size = max_t(size_t, pcpue_size, PCPU_MIN_UNIT_SIZE);
++
++	pcpue_ptr = __alloc_bootmem_nopanic(
++					num_possible_cpus() * pcpue_unit_size,
++					PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
++	if (!pcpue_ptr)
++		return -ENOMEM;
++
++	/* return the leftover and copy */
++	for_each_possible_cpu(cpu) {
++		void *ptr = pcpue_ptr + cpu * pcpue_unit_size;
++
++		free_bootmem(__pa(ptr + pcpue_size),
++			     pcpue_unit_size - pcpue_size);
++		memcpy(ptr, __per_cpu_load, static_size);
++	}
++
++	/* we're ready, commit */
++	pr_info("PERCPU: Embedded %zu pages at %p, static data %zu bytes\n",
++		pcpue_size >> PAGE_SHIFT, pcpue_ptr, static_size);
++
++	return pcpu_setup_first_chunk(pcpue_get_page, static_size,
++				      reserved_size, dyn_size,
++				      pcpue_unit_size, pcpue_ptr, NULL);
++}
+Index: linux-2.6-tip/mm/quicklist.c
+===================================================================
+--- linux-2.6-tip.orig/mm/quicklist.c
++++ linux-2.6-tip/mm/quicklist.c
+@@ -19,7 +19,7 @@
+ #include <linux/module.h>
+ #include <linux/quicklist.h>
+ 
+-DEFINE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
++DEFINE_PER_CPU_LOCKED(struct quicklist, quicklist)[CONFIG_NR_QUICK];
+ 
+ #define FRACTION_OF_NODE_MEM	16
+ 
+@@ -29,7 +29,7 @@ static unsigned long max_pages(unsigned 
+ 	int node = numa_node_id();
+ 	struct zone *zones = NODE_DATA(node)->node_zones;
+ 	int num_cpus_on_node;
+-	node_to_cpumask_ptr(cpumask_on_node, node);
++	const struct cpumask *cpumask_on_node = cpumask_of_node(node);
+ 
+ 	node_free_pages =
+ #ifdef CONFIG_ZONE_DMA
+@@ -66,17 +66,14 @@ void quicklist_trim(int nr, void (*dtor)
+ {
+ 	long pages_to_free;
+ 	struct quicklist *q;
++	int cpu;
+ 
+-	q = &get_cpu_var(quicklist)[nr];
++	q = &get_cpu_var_locked(quicklist, &cpu)[nr];
+ 	if (q->nr_pages > min_pages) {
+ 		pages_to_free = min_pages_to_free(q, min_pages, max_free);
+ 
+ 		while (pages_to_free > 0) {
+-			/*
+-			 * We pass a gfp_t of 0 to quicklist_alloc here
+-			 * because we will never call into the page allocator.
+-			 */
+-			void *p = quicklist_alloc(nr, 0, NULL);
++			void *p = __quicklist_alloc(q);
+ 
+ 			if (dtor)
+ 				dtor(p);
+@@ -84,7 +81,7 @@ void quicklist_trim(int nr, void (*dtor)
+ 			pages_to_free--;
+ 		}
+ 	}
+-	put_cpu_var(quicklist);
++	put_cpu_var_locked(quicklist, cpu);
+ }
+ 
+ unsigned long quicklist_total_size(void)
+@@ -94,7 +91,7 @@ unsigned long quicklist_total_size(void)
+ 	struct quicklist *ql, *q;
+ 
+ 	for_each_online_cpu(cpu) {
+-		ql = per_cpu(quicklist, cpu);
++		ql = per_cpu_var_locked(quicklist, cpu);
+ 		for (q = ql; q < ql + CONFIG_NR_QUICK; q++)
+ 			count += q->nr_pages;
+ 	}
+Index: linux-2.6-tip/mm/slab.c
+===================================================================
+--- linux-2.6-tip.orig/mm/slab.c
++++ linux-2.6-tip/mm/slab.c
+@@ -102,6 +102,7 @@
+ #include	<linux/cpu.h>
+ #include	<linux/sysctl.h>
+ #include	<linux/module.h>
++#include	<trace/kmemtrace.h>
+ #include	<linux/rcupdate.h>
+ #include	<linux/string.h>
+ #include	<linux/uaccess.h>
+@@ -112,12 +113,145 @@
+ #include	<linux/rtmutex.h>
+ #include	<linux/reciprocal_div.h>
+ #include	<linux/debugobjects.h>
++#include	<linux/kmemcheck.h>
+ 
+ #include	<asm/cacheflush.h>
+ #include	<asm/tlbflush.h>
+ #include	<asm/page.h>
+ 
+ /*
++ * On !PREEMPT_RT, raw irq flags are used as a per-CPU locking
++ * mechanism.
++ *
++ * On PREEMPT_RT, we use per-CPU locks for this. That's why the
++ * calling convention is changed slightly: a new 'flags' argument
++ * is passed to 'irq disable/enable' - the PREEMPT_RT code stores
++ * the CPU number of the lock there.
++ */
++#ifndef CONFIG_PREEMPT_RT
++
++# define slab_irq_disable(cpu) \
++	do { local_irq_disable(); (cpu) = smp_processor_id(); } while (0)
++# define slab_irq_enable(cpu)		local_irq_enable()
++
++static inline void slab_irq_disable_this_rt(int cpu)
++{
++}
++
++static inline void slab_irq_enable_rt(int cpu)
++{
++}
++
++# define slab_irq_save(flags, cpu) \
++	do { local_irq_save(flags); (cpu) = smp_processor_id(); } while (0)
++# define slab_irq_restore(flags, cpu)	local_irq_restore(flags)
++
++/*
++ * In the __GFP_WAIT case we enable/disable interrupts on !PREEMPT_RT,
++ * which has no per-CPU locking effect since we are holding the cache
++ * lock in that case already.
++ */
++static void slab_irq_enable_GFP_WAIT(gfp_t flags, int *cpu)
++{
++	if (flags & __GFP_WAIT)
++		local_irq_enable();
++}
++
++static void slab_irq_disable_GFP_WAIT(gfp_t flags, int *cpu)
++{
++	if (flags & __GFP_WAIT)
++		local_irq_disable();
++}
++
++# define slab_spin_lock_irq(lock, cpu) \
++	do { spin_lock_irq(lock); (cpu) = smp_processor_id(); } while (0)
++# define slab_spin_unlock_irq(lock, cpu) spin_unlock_irq(lock)
++
++# define slab_spin_lock_irqsave(lock, flags, cpu) \
++	do { spin_lock_irqsave(lock, flags); (cpu) = smp_processor_id(); } while (0)
++# define slab_spin_unlock_irqrestore(lock, flags, cpu) \
++	do { spin_unlock_irqrestore(lock, flags); } while (0)
++
++#else /* CONFIG_PREEMPT_RT */
++
++/*
++ * Instead of serializing the per-cpu state by disabling interrupts we do so
++ * by a lock. This keeps the code preemptable - albeit at the cost of remote
++ * memory access when the task does get migrated away.
++ */
++DEFINE_PER_CPU_LOCKED(struct list_head, slab) = { 0, };
++
++static void _slab_irq_disable(int *cpu)
++{
++	(void)get_cpu_var_locked(slab, cpu);
++}
++
++#define slab_irq_disable(cpu) _slab_irq_disable(&(cpu))
++
++static inline void slab_irq_enable(int cpu)
++{
++	LIST_HEAD(list);
++
++	list_splice_init(&__get_cpu_var_locked(slab, cpu), &list);
++	put_cpu_var_locked(slab, cpu);
++
++	while (!list_empty(&list)) {
++		struct page *page = list_first_entry(&list, struct page, lru);
++		list_del(&page->lru);
++		__free_pages(page, page->index);
++	}
++}
++
++static inline void slab_irq_disable_this_rt(int cpu)
++{
++	spin_lock(&__get_cpu_lock(slab, cpu));
++}
++
++static inline void slab_irq_enable_rt(int cpu)
++{
++	LIST_HEAD(list);
++
++	list_splice_init(&__get_cpu_var_locked(slab, cpu), &list);
++	spin_unlock(&__get_cpu_lock(slab, cpu));
++
++	while (!list_empty(&list)) {
++		struct page *page = list_first_entry(&list, struct page, lru);
++		list_del(&page->lru);
++		__free_pages(page, page->index);
++	}
++}
++
++# define slab_irq_save(flags, cpu) \
++	do { slab_irq_disable(cpu); (void) (flags); } while (0)
++# define slab_irq_restore(flags, cpu) \
++	do { slab_irq_enable(cpu); (void) (flags); } while (0)
++
++/*
++ * On PREEMPT_RT we have to drop the locks unconditionally to avoid lock
++ * recursion on the cache_grow()->alloc_slabmgmt() path.
++ */
++static void slab_irq_enable_GFP_WAIT(gfp_t flags, int *cpu)
++{
++	slab_irq_enable(*cpu);
++}
++
++static void slab_irq_disable_GFP_WAIT(gfp_t flags, int *cpu)
++{
++	slab_irq_disable(*cpu);
++}
++
++# define slab_spin_lock_irq(lock, cpu) \
++		do { slab_irq_disable(cpu); spin_lock(lock); } while (0)
++# define slab_spin_unlock_irq(lock, cpu) \
++		do { spin_unlock(lock); slab_irq_enable(cpu); } while (0)
++# define slab_spin_lock_irqsave(lock, flags, cpu) \
++	do { slab_irq_disable(cpu); spin_lock_irqsave(lock, flags); } while (0)
++# define slab_spin_unlock_irqrestore(lock, flags, cpu) \
++	do { spin_unlock_irqrestore(lock, flags); slab_irq_enable(cpu); } while (0)
++
++#endif /* CONFIG_PREEMPT_RT */
++
++/*
+  * DEBUG	- 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
+  *		  0 for faster, smaller code (especially in the critical paths).
+  *
+@@ -177,13 +311,13 @@
+ 			 SLAB_STORE_USER | \
+ 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
+ 			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
+-			 SLAB_DEBUG_OBJECTS)
++			 SLAB_DEBUG_OBJECTS | SLAB_NOTRACK)
+ #else
+ # define CREATE_MASK	(SLAB_HWCACHE_ALIGN | \
+ 			 SLAB_CACHE_DMA | \
+ 			 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
+ 			 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
+-			 SLAB_DEBUG_OBJECTS)
++			 SLAB_DEBUG_OBJECTS | SLAB_NOTRACK)
+ #endif
+ 
+ /*
+@@ -313,7 +447,7 @@ struct kmem_list3 __initdata initkmem_li
+ static int drain_freelist(struct kmem_cache *cache,
+ 			struct kmem_list3 *l3, int tofree);
+ static void free_block(struct kmem_cache *cachep, void **objpp, int len,
+-			int node);
++			int node, int *this_cpu);
+ static int enable_cpucache(struct kmem_cache *cachep);
+ static void cache_reap(struct work_struct *unused);
+ 
+@@ -372,87 +506,6 @@ static void kmem_list3_init(struct kmem_
+ 	MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid);	\
+ 	} while (0)
+ 
+-/*
+- * struct kmem_cache
+- *
+- * manages a cache.
+- */
+-
+-struct kmem_cache {
+-/* 1) per-cpu data, touched during every alloc/free */
+-	struct array_cache *array[NR_CPUS];
+-/* 2) Cache tunables. Protected by cache_chain_mutex */
+-	unsigned int batchcount;
+-	unsigned int limit;
+-	unsigned int shared;
+-
+-	unsigned int buffer_size;
+-	u32 reciprocal_buffer_size;
+-/* 3) touched by every alloc & free from the backend */
+-
+-	unsigned int flags;		/* constant flags */
+-	unsigned int num;		/* # of objs per slab */
+-
+-/* 4) cache_grow/shrink */
+-	/* order of pgs per slab (2^n) */
+-	unsigned int gfporder;
+-
+-	/* force GFP flags, e.g. GFP_DMA */
+-	gfp_t gfpflags;
+-
+-	size_t colour;			/* cache colouring range */
+-	unsigned int colour_off;	/* colour offset */
+-	struct kmem_cache *slabp_cache;
+-	unsigned int slab_size;
+-	unsigned int dflags;		/* dynamic flags */
+-
+-	/* constructor func */
+-	void (*ctor)(void *obj);
+-
+-/* 5) cache creation/removal */
+-	const char *name;
+-	struct list_head next;
+-
+-/* 6) statistics */
+-#if STATS
+-	unsigned long num_active;
+-	unsigned long num_allocations;
+-	unsigned long high_mark;
+-	unsigned long grown;
+-	unsigned long reaped;
+-	unsigned long errors;
+-	unsigned long max_freeable;
+-	unsigned long node_allocs;
+-	unsigned long node_frees;
+-	unsigned long node_overflow;
+-	atomic_t allochit;
+-	atomic_t allocmiss;
+-	atomic_t freehit;
+-	atomic_t freemiss;
+-#endif
+-#if DEBUG
+-	/*
+-	 * If debugging is enabled, then the allocator can add additional
+-	 * fields and/or padding to every object. buffer_size contains the total
+-	 * object size including these internal fields, the following two
+-	 * variables contain the offset to the user object and its size.
+-	 */
+-	int obj_offset;
+-	int obj_size;
+-#endif
+-	/*
+-	 * We put nodelists[] at the end of kmem_cache, because we want to size
+-	 * this array to nr_node_ids slots instead of MAX_NUMNODES
+-	 * (see kmem_cache_init())
+-	 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
+-	 * is statically defined, so we reserve the max number of nodes.
+-	 */
+-	struct kmem_list3 *nodelists[MAX_NUMNODES];
+-	/*
+-	 * Do not add fields after nodelists[]
+-	 */
+-};
+-
+ #define CFLGS_OFF_SLAB		(0x80000000UL)
+ #define	OFF_SLAB(x)	((x)->flags & CFLGS_OFF_SLAB)
+ 
+@@ -568,6 +621,14 @@ static void **dbg_userword(struct kmem_c
+ 
+ #endif
+ 
++#ifdef CONFIG_KMEMTRACE
++size_t slab_buffer_size(struct kmem_cache *cachep)
++{
++	return cachep->buffer_size;
++}
++EXPORT_SYMBOL(slab_buffer_size);
++#endif
++
+ /*
+  * Do not go above this order unless 0 objects fit into the slab.
+  */
+@@ -756,9 +817,10 @@ int slab_is_available(void)
+ 
+ static DEFINE_PER_CPU(struct delayed_work, reap_work);
+ 
+-static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
++static inline struct array_cache *
++cpu_cache_get(struct kmem_cache *cachep, int this_cpu)
+ {
+-	return cachep->array[smp_processor_id()];
++	return cachep->array[this_cpu];
+ }
+ 
+ static inline struct kmem_cache *__find_general_cachep(size_t size,
+@@ -992,7 +1054,7 @@ static int transfer_objects(struct array
+ #ifndef CONFIG_NUMA
+ 
+ #define drain_alien_cache(cachep, alien) do { } while (0)
+-#define reap_alien(cachep, l3) do { } while (0)
++#define reap_alien(cachep, l3, this_cpu) 0
+ 
+ static inline struct array_cache **alloc_alien_cache(int node, int limit)
+ {
+@@ -1003,27 +1065,29 @@ static inline void free_alien_cache(stru
+ {
+ }
+ 
+-static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
++static inline int
++cache_free_alien(struct kmem_cache *cachep, void *objp, int *this_cpu)
+ {
+ 	return 0;
+ }
+ 
+ static inline void *alternate_node_alloc(struct kmem_cache *cachep,
+-		gfp_t flags)
++		gfp_t flags, int *this_cpu)
+ {
+ 	return NULL;
+ }
+ 
+ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
+-		 gfp_t flags, int nodeid)
++		 gfp_t flags, int nodeid, int *this_cpu)
+ {
+ 	return NULL;
+ }
+ 
+ #else	/* CONFIG_NUMA */
+ 
+-static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
+-static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
++static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
++				int nodeid, int *this_cpu);
++static void *alternate_node_alloc(struct kmem_cache *, gfp_t, int *);
+ 
+ static struct array_cache **alloc_alien_cache(int node, int limit)
+ {
+@@ -1064,7 +1128,8 @@ static void free_alien_cache(struct arra
+ }
+ 
+ static void __drain_alien_cache(struct kmem_cache *cachep,
+-				struct array_cache *ac, int node)
++				struct array_cache *ac, int node,
++				int *this_cpu)
+ {
+ 	struct kmem_list3 *rl3 = cachep->nodelists[node];
+ 
+@@ -1078,7 +1143,7 @@ static void __drain_alien_cache(struct k
+ 		if (rl3->shared)
+ 			transfer_objects(rl3->shared, ac, ac->limit);
+ 
+-		free_block(cachep, ac->entry, ac->avail, node);
++		free_block(cachep, ac->entry, ac->avail, node, this_cpu);
+ 		ac->avail = 0;
+ 		spin_unlock(&rl3->list_lock);
+ 	}
+@@ -1087,38 +1152,42 @@ static void __drain_alien_cache(struct k
+ /*
+  * Called from cache_reap() to regularly drain alien caches round robin.
+  */
+-static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
++static int
++reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3, int *this_cpu)
+ {
+-	int node = __get_cpu_var(reap_node);
++	int node = per_cpu(reap_node, *this_cpu);
+ 
+ 	if (l3->alien) {
+ 		struct array_cache *ac = l3->alien[node];
+ 
+ 		if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
+-			__drain_alien_cache(cachep, ac, node);
++			__drain_alien_cache(cachep, ac, node, this_cpu);
+ 			spin_unlock_irq(&ac->lock);
++			return 1;
+ 		}
+ 	}
++	return 0;
+ }
+ 
+ static void drain_alien_cache(struct kmem_cache *cachep,
+ 				struct array_cache **alien)
+ {
+-	int i = 0;
++	int i = 0, this_cpu;
+ 	struct array_cache *ac;
+ 	unsigned long flags;
+ 
+ 	for_each_online_node(i) {
+ 		ac = alien[i];
+ 		if (ac) {
+-			spin_lock_irqsave(&ac->lock, flags);
+-			__drain_alien_cache(cachep, ac, i);
+-			spin_unlock_irqrestore(&ac->lock, flags);
++			slab_spin_lock_irqsave(&ac->lock, flags, this_cpu);
++			__drain_alien_cache(cachep, ac, i, &this_cpu);
++			slab_spin_unlock_irqrestore(&ac->lock, flags, this_cpu);
+ 		}
+ 	}
+ }
+ 
+-static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
++static inline int
++cache_free_alien(struct kmem_cache *cachep, void *objp, int *this_cpu)
+ {
+ 	struct slab *slabp = virt_to_slab(objp);
+ 	int nodeid = slabp->nodeid;
+@@ -1126,7 +1195,7 @@ static inline int cache_free_alien(struc
+ 	struct array_cache *alien = NULL;
+ 	int node;
+ 
+-	node = numa_node_id();
++	node = cpu_to_node(*this_cpu);
+ 
+ 	/*
+ 	 * Make sure we are not freeing a object from another node to the array
+@@ -1142,30 +1211,31 @@ static inline int cache_free_alien(struc
+ 		spin_lock(&alien->lock);
+ 		if (unlikely(alien->avail == alien->limit)) {
+ 			STATS_INC_ACOVERFLOW(cachep);
+-			__drain_alien_cache(cachep, alien, nodeid);
++			__drain_alien_cache(cachep, alien, nodeid, this_cpu);
+ 		}
+ 		alien->entry[alien->avail++] = objp;
+ 		spin_unlock(&alien->lock);
+ 	} else {
+ 		spin_lock(&(cachep->nodelists[nodeid])->list_lock);
+-		free_block(cachep, &objp, 1, nodeid);
++		free_block(cachep, &objp, 1, nodeid, this_cpu);
+ 		spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
+ 	}
+ 	return 1;
+ }
+ #endif
+ 
+-static void __cpuinit cpuup_canceled(long cpu)
++static void __cpuinit cpuup_canceled(int cpu)
+ {
+ 	struct kmem_cache *cachep;
+ 	struct kmem_list3 *l3 = NULL;
+ 	int node = cpu_to_node(cpu);
+-	node_to_cpumask_ptr(mask, node);
++	const struct cpumask *mask = cpumask_of_node(node);
+ 
+ 	list_for_each_entry(cachep, &cache_chain, next) {
+ 		struct array_cache *nc;
+ 		struct array_cache *shared;
+ 		struct array_cache **alien;
++		int orig_cpu = cpu;
+ 
+ 		/* cpu is dead; no one can alloc from it. */
+ 		nc = cachep->array[cpu];
+@@ -1180,7 +1250,8 @@ static void __cpuinit cpuup_canceled(lon
+ 		/* Free limit for this kmem_list3 */
+ 		l3->free_limit -= cachep->batchcount;
+ 		if (nc)
+-			free_block(cachep, nc->entry, nc->avail, node);
++			free_block(cachep, nc->entry, nc->avail, node,
++				   &cpu);
+ 
+ 		if (!cpus_empty(*mask)) {
+ 			spin_unlock_irq(&l3->list_lock);
+@@ -1190,7 +1261,7 @@ static void __cpuinit cpuup_canceled(lon
+ 		shared = l3->shared;
+ 		if (shared) {
+ 			free_block(cachep, shared->entry,
+-				   shared->avail, node);
++				   shared->avail, node, &cpu);
+ 			l3->shared = NULL;
+ 		}
+ 
+@@ -1206,6 +1277,7 @@ static void __cpuinit cpuup_canceled(lon
+ 		}
+ free_array_cache:
+ 		kfree(nc);
++		BUG_ON(cpu != orig_cpu);
+ 	}
+ 	/*
+ 	 * In the previous loop, all the objects were freed to
+@@ -1220,7 +1292,7 @@ free_array_cache:
+ 	}
+ }
+ 
+-static int __cpuinit cpuup_prepare(long cpu)
++static int __cpuinit cpuup_prepare(int cpu)
+ {
+ 	struct kmem_cache *cachep;
+ 	struct kmem_list3 *l3 = NULL;
+@@ -1328,10 +1400,19 @@ static int __cpuinit cpuup_callback(stru
+ 	long cpu = (long)hcpu;
+ 	int err = 0;
+ 
++
+ 	switch (action) {
+ 	case CPU_UP_PREPARE:
+ 	case CPU_UP_PREPARE_FROZEN:
+ 		mutex_lock(&cache_chain_mutex);
++		/*
++		 * lock/unlock cycle to push any holders away -- no new ones
++		 * can come in due to the cpu still being offline.
++		 *
++		 * XXX -- weird case anyway, can it happen?
++		 */
++		slab_irq_disable_this_rt(cpu);
++		slab_irq_enable_rt(cpu);
+ 		err = cpuup_prepare(cpu);
+ 		mutex_unlock(&cache_chain_mutex);
+ 		break;
+@@ -1371,10 +1452,14 @@ static int __cpuinit cpuup_callback(stru
+ 	case CPU_UP_CANCELED:
+ 	case CPU_UP_CANCELED_FROZEN:
+ 		mutex_lock(&cache_chain_mutex);
++		slab_irq_disable_this_rt(cpu);
+ 		cpuup_canceled(cpu);
++		slab_irq_enable_rt(cpu);
+ 		mutex_unlock(&cache_chain_mutex);
+ 		break;
+ 	}
++
++
+ 	return err ? NOTIFY_BAD : NOTIFY_OK;
+ }
+ 
+@@ -1389,11 +1474,13 @@ static void init_list(struct kmem_cache 
+ 			int nodeid)
+ {
+ 	struct kmem_list3 *ptr;
++	int this_cpu;
+ 
+ 	ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid);
+ 	BUG_ON(!ptr);
+ 
+-	local_irq_disable();
++	WARN_ON(spin_is_locked(&list->list_lock));
++	slab_irq_disable(this_cpu);
+ 	memcpy(ptr, list, sizeof(struct kmem_list3));
+ 	/*
+ 	 * Do not assume that spinlocks can be initialized via memcpy:
+@@ -1402,7 +1489,7 @@ static void init_list(struct kmem_cache 
+ 
+ 	MAKE_ALL_LISTS(cachep, ptr, nodeid);
+ 	cachep->nodelists[nodeid] = ptr;
+-	local_irq_enable();
++	slab_irq_enable(this_cpu);
+ }
+ 
+ /*
+@@ -1434,6 +1521,12 @@ void __init kmem_cache_init(void)
+ 	int order;
+ 	int node;
+ 
++#ifdef CONFIG_PREEMPT_RT
++	for_each_possible_cpu(i) {
++		INIT_LIST_HEAD(&__get_cpu_var_locked(slab, i));
++	}
++#endif
++
+ 	if (num_possible_nodes() == 1) {
+ 		use_alien_caches = 0;
+ 		numa_platform = 0;
+@@ -1565,36 +1658,34 @@ void __init kmem_cache_init(void)
+ 	/* 4) Replace the bootstrap head arrays */
+ 	{
+ 		struct array_cache *ptr;
++		int this_cpu;
+ 
+ 		ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
+ 
+-		local_irq_disable();
+-		BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
+-		memcpy(ptr, cpu_cache_get(&cache_cache),
+-		       sizeof(struct arraycache_init));
++		slab_irq_disable(this_cpu);
++		BUG_ON(cpu_cache_get(&cache_cache, this_cpu) != &initarray_cache.cache);
++		memcpy(ptr, cpu_cache_get(&cache_cache, this_cpu),
++				sizeof(struct arraycache_init));
+ 		/*
+ 		 * Do not assume that spinlocks can be initialized via memcpy:
+ 		 */
+ 		spin_lock_init(&ptr->lock);
+-
+-		cache_cache.array[smp_processor_id()] = ptr;
+-		local_irq_enable();
++		cache_cache.array[this_cpu] = ptr;
++		slab_irq_enable(this_cpu);
+ 
+ 		ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
+ 
+-		local_irq_disable();
+-		BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
+-		       != &initarray_generic.cache);
+-		memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
+-		       sizeof(struct arraycache_init));
++		slab_irq_disable(this_cpu);
++		BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep, this_cpu)
++				!= &initarray_generic.cache);
++		memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep, this_cpu),
++				sizeof(struct arraycache_init));
+ 		/*
+ 		 * Do not assume that spinlocks can be initialized via memcpy:
+ 		 */
+ 		spin_lock_init(&ptr->lock);
+-
+-		malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
+-		    ptr;
+-		local_irq_enable();
++		malloc_sizes[INDEX_AC].cs_cachep->array[this_cpu] = ptr;
++		slab_irq_enable(this_cpu);
+ 	}
+ 	/* 5) Replace the bootstrap kmem_list3's */
+ 	{
+@@ -1680,7 +1771,7 @@ static void *kmem_getpages(struct kmem_c
+ 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+ 		flags |= __GFP_RECLAIMABLE;
+ 
+-	page = alloc_pages_node(nodeid, flags, cachep->gfporder);
++	page = alloc_pages_node(nodeid, flags & ~__GFP_NOTRACK, cachep->gfporder);
+ 	if (!page)
+ 		return NULL;
+ 
+@@ -1693,24 +1784,39 @@ static void *kmem_getpages(struct kmem_c
+ 			NR_SLAB_UNRECLAIMABLE, nr_pages);
+ 	for (i = 0; i < nr_pages; i++)
+ 		__SetPageSlab(page + i);
++
++	if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
++		kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
++
++		if (cachep->ctor)
++			kmemcheck_mark_uninitialized_pages(page, nr_pages);
++		else
++			kmemcheck_mark_unallocated_pages(page, nr_pages);
++	}
++
+ 	return page_address(page);
+ }
+ 
+ /*
+  * Interface to system's page release.
+  */
+-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
++static void kmem_freepages(struct kmem_cache *cachep, void *addr, int cpu)
+ {
+ 	unsigned long i = (1 << cachep->gfporder);
+-	struct page *page = virt_to_page(addr);
++	struct page *page, *basepage = virt_to_page(addr);
+ 	const unsigned long nr_freed = i;
+ 
++	page = basepage;
++
++	kmemcheck_free_shadow(page, cachep->gfporder);
++
+ 	if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
+ 		sub_zone_page_state(page_zone(page),
+ 				NR_SLAB_RECLAIMABLE, nr_freed);
+ 	else
+ 		sub_zone_page_state(page_zone(page),
+ 				NR_SLAB_UNRECLAIMABLE, nr_freed);
++
+ 	while (i--) {
+ 		BUG_ON(!PageSlab(page));
+ 		__ClearPageSlab(page);
+@@ -1718,6 +1824,13 @@ static void kmem_freepages(struct kmem_c
+ 	}
+ 	if (current->reclaim_state)
+ 		current->reclaim_state->reclaimed_slab += nr_freed;
++
++#ifdef CONFIG_PREEMPT_RT
++	if (cpu >= 0) {
++		basepage->index = cachep->gfporder;
++		list_add(&basepage->lru, &__get_cpu_var_locked(slab, cpu));
++	} else
++#endif
+ 	free_pages((unsigned long)addr, cachep->gfporder);
+ }
+ 
+@@ -1726,7 +1839,7 @@ static void kmem_rcu_free(struct rcu_hea
+ 	struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
+ 	struct kmem_cache *cachep = slab_rcu->cachep;
+ 
+-	kmem_freepages(cachep, slab_rcu->addr);
++	kmem_freepages(cachep, slab_rcu->addr, -1);
+ 	if (OFF_SLAB(cachep))
+ 		kmem_cache_free(cachep->slabp_cache, slab_rcu);
+ }
+@@ -1746,7 +1859,7 @@ static void store_stackinfo(struct kmem_
+ 
+ 	*addr++ = 0x12345678;
+ 	*addr++ = caller;
+-	*addr++ = smp_processor_id();
++	*addr++ = raw_smp_processor_id();
+ 	size -= 3 * sizeof(unsigned long);
+ 	{
+ 		unsigned long *sptr = &caller;
+@@ -1936,6 +2049,10 @@ static void slab_destroy_debugcheck(stru
+ }
+ #endif
+ 
++static void
++__cache_free(struct kmem_cache *cachep, void *objp, int *this_cpu);
++
++
+ /**
+  * slab_destroy - destroy and release all objects in a slab
+  * @cachep: cache pointer being destroyed
+@@ -1945,7 +2062,8 @@ static void slab_destroy_debugcheck(stru
+  * Before calling the slab must have been unlinked from the cache.  The
+  * cache-lock is not held/needed.
+  */
+-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
++static void
++slab_destroy(struct kmem_cache *cachep, struct slab *slabp, int *this_cpu)
+ {
+ 	void *addr = slabp->s_mem - slabp->colouroff;
+ 
+@@ -1958,9 +2076,13 @@ static void slab_destroy(struct kmem_cac
+ 		slab_rcu->addr = addr;
+ 		call_rcu(&slab_rcu->head, kmem_rcu_free);
+ 	} else {
+-		kmem_freepages(cachep, addr);
+-		if (OFF_SLAB(cachep))
+-			kmem_cache_free(cachep->slabp_cache, slabp);
++		kmem_freepages(cachep, addr, *this_cpu);
++		if (OFF_SLAB(cachep)) {
++			if (this_cpu)
++				__cache_free(cachep->slabp_cache, slabp, this_cpu);
++			else
++				kmem_cache_free(cachep->slabp_cache, slabp);
++		}
+ 	}
+ }
+ 
+@@ -2057,6 +2179,8 @@ static size_t calculate_slab_order(struc
+ 
+ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
+ {
++	int this_cpu;
++
+ 	if (g_cpucache_up == FULL)
+ 		return enable_cpucache(cachep);
+ 
+@@ -2100,10 +2224,12 @@ static int __init_refok setup_cpu_cache(
+ 			jiffies + REAPTIMEOUT_LIST3 +
+ 			((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+ 
+-	cpu_cache_get(cachep)->avail = 0;
+-	cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
+-	cpu_cache_get(cachep)->batchcount = 1;
+-	cpu_cache_get(cachep)->touched = 0;
++	this_cpu = raw_smp_processor_id();
++
++	cpu_cache_get(cachep, this_cpu)->avail = 0;
++	cpu_cache_get(cachep, this_cpu)->limit = BOOT_CPUCACHE_ENTRIES;
++	cpu_cache_get(cachep, this_cpu)->batchcount = 1;
++	cpu_cache_get(cachep, this_cpu)->touched = 0;
+ 	cachep->batchcount = 1;
+ 	cachep->limit = BOOT_CPUCACHE_ENTRIES;
+ 	return 0;
+@@ -2394,19 +2520,19 @@ EXPORT_SYMBOL(kmem_cache_create);
+ #if DEBUG
+ static void check_irq_off(void)
+ {
++/*
++ * On PREEMPT_RT we use locks to protect the per-CPU lists,
++ * and keep interrupts enabled.
++ */
++#ifndef CONFIG_PREEMPT_RT
+ 	BUG_ON(!irqs_disabled());
++#endif
+ }
+ 
+ static void check_irq_on(void)
+ {
++#ifndef CONFIG_PREEMPT_RT
+ 	BUG_ON(irqs_disabled());
+-}
+-
+-static void check_spinlock_acquired(struct kmem_cache *cachep)
+-{
+-#ifdef CONFIG_SMP
+-	check_irq_off();
+-	assert_spin_locked(&cachep->nodelists[numa_node_id()]->list_lock);
+ #endif
+ }
+ 
+@@ -2421,34 +2547,67 @@ static void check_spinlock_acquired_node
+ #else
+ #define check_irq_off()	do { } while(0)
+ #define check_irq_on()	do { } while(0)
+-#define check_spinlock_acquired(x) do { } while(0)
+ #define check_spinlock_acquired_node(x, y) do { } while(0)
+ #endif
+ 
+-static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
++static int drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+ 			struct array_cache *ac,
+ 			int force, int node);
+ 
+-static void do_drain(void *arg)
++static void __do_drain(void *arg, int this_cpu)
+ {
+ 	struct kmem_cache *cachep = arg;
++	int node = cpu_to_node(this_cpu);
+ 	struct array_cache *ac;
+-	int node = numa_node_id();
+ 
+ 	check_irq_off();
+-	ac = cpu_cache_get(cachep);
++	ac = cpu_cache_get(cachep, this_cpu);
+ 	spin_lock(&cachep->nodelists[node]->list_lock);
+-	free_block(cachep, ac->entry, ac->avail, node);
++	free_block(cachep, ac->entry, ac->avail, node, &this_cpu);
+ 	spin_unlock(&cachep->nodelists[node]->list_lock);
+ 	ac->avail = 0;
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++static void do_drain(void *arg, int this_cpu)
++{
++	__do_drain(arg, this_cpu);
++}
++#else
++static void do_drain(void *arg)
++{
++	__do_drain(arg, smp_processor_id());
++}
++#endif
++
++#ifdef CONFIG_PREEMPT_RT
++/*
++ * execute func() for all CPUs. On PREEMPT_RT we dont actually have
++ * to run on the remote CPUs - we only have to take their CPU-locks.
++ * (This is a rare operation, so cacheline bouncing is not an issue.)
++ */
++static void
++slab_on_each_cpu(void (*func)(void *arg, int this_cpu), void *arg)
++{
++	unsigned int i;
++
++	check_irq_on();
++	for_each_online_cpu(i) {
++		spin_lock(&__get_cpu_lock(slab, i));
++		func(arg, i);
++		spin_unlock(&__get_cpu_lock(slab, i));
++	}
++}
++#else
++# define slab_on_each_cpu(func, cachep) on_each_cpu(func, cachep, 1)
++#endif
++
+ static void drain_cpu_caches(struct kmem_cache *cachep)
+ {
+ 	struct kmem_list3 *l3;
+ 	int node;
+ 
+-	on_each_cpu(do_drain, cachep, 1);
++	slab_on_each_cpu(do_drain, cachep);
+ 	check_irq_on();
+ 	for_each_online_node(node) {
+ 		l3 = cachep->nodelists[node];
+@@ -2473,16 +2632,16 @@ static int drain_freelist(struct kmem_ca
+ 			struct kmem_list3 *l3, int tofree)
+ {
+ 	struct list_head *p;
+-	int nr_freed;
++	int nr_freed, this_cpu;
+ 	struct slab *slabp;
+ 
+ 	nr_freed = 0;
+ 	while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
+ 
+-		spin_lock_irq(&l3->list_lock);
++		slab_spin_lock_irq(&l3->list_lock, this_cpu);
+ 		p = l3->slabs_free.prev;
+ 		if (p == &l3->slabs_free) {
+-			spin_unlock_irq(&l3->list_lock);
++			slab_spin_unlock_irq(&l3->list_lock, this_cpu);
+ 			goto out;
+ 		}
+ 
+@@ -2491,13 +2650,9 @@ static int drain_freelist(struct kmem_ca
+ 		BUG_ON(slabp->inuse);
+ #endif
+ 		list_del(&slabp->list);
+-		/*
+-		 * Safe to drop the lock. The slab is no longer linked
+-		 * to the cache.
+-		 */
+ 		l3->free_objects -= cache->num;
+-		spin_unlock_irq(&l3->list_lock);
+-		slab_destroy(cache, slabp);
++		slab_destroy(cache, slabp, &this_cpu);
++		slab_spin_unlock_irq(&l3->list_lock, this_cpu);
+ 		nr_freed++;
+ 	}
+ out:
+@@ -2753,8 +2908,8 @@ static void slab_map_pages(struct kmem_c
+  * Grow (by 1) the number of slabs within a cache.  This is called by
+  * kmem_cache_alloc() when there are no active objs left in a cache.
+  */
+-static int cache_grow(struct kmem_cache *cachep,
+-		gfp_t flags, int nodeid, void *objp)
++static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid,
++		      void *objp, int *this_cpu)
+ {
+ 	struct slab *slabp;
+ 	size_t offset;
+@@ -2782,8 +2937,7 @@ static int cache_grow(struct kmem_cache 
+ 
+ 	offset *= cachep->colour_off;
+ 
+-	if (local_flags & __GFP_WAIT)
+-		local_irq_enable();
++	slab_irq_enable_GFP_WAIT(local_flags, this_cpu);
+ 
+ 	/*
+ 	 * The test for missing atomic flag is performed here, rather than
+@@ -2812,8 +2966,8 @@ static int cache_grow(struct kmem_cache 
+ 
+ 	cache_init_objs(cachep, slabp);
+ 
+-	if (local_flags & __GFP_WAIT)
+-		local_irq_disable();
++	slab_irq_disable_GFP_WAIT(local_flags, this_cpu);
++
+ 	check_irq_off();
+ 	spin_lock(&l3->list_lock);
+ 
+@@ -2824,10 +2978,9 @@ static int cache_grow(struct kmem_cache 
+ 	spin_unlock(&l3->list_lock);
+ 	return 1;
+ opps1:
+-	kmem_freepages(cachep, objp);
++	kmem_freepages(cachep, objp, -1);
+ failed:
+-	if (local_flags & __GFP_WAIT)
+-		local_irq_disable();
++	slab_irq_disable_GFP_WAIT(local_flags, this_cpu);
+ 	return 0;
+ }
+ 
+@@ -2949,7 +3102,8 @@ bad:
+ #define check_slabp(x,y) do { } while(0)
+ #endif
+ 
+-static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
++static void *
++cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags, int *this_cpu)
+ {
+ 	int batchcount;
+ 	struct kmem_list3 *l3;
+@@ -2959,7 +3113,7 @@ static void *cache_alloc_refill(struct k
+ retry:
+ 	check_irq_off();
+ 	node = numa_node_id();
+-	ac = cpu_cache_get(cachep);
++	ac = cpu_cache_get(cachep, *this_cpu);
+ 	batchcount = ac->batchcount;
+ 	if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
+ 		/*
+@@ -2969,7 +3123,7 @@ retry:
+ 		 */
+ 		batchcount = BATCHREFILL_LIMIT;
+ 	}
+-	l3 = cachep->nodelists[node];
++	l3 = cachep->nodelists[cpu_to_node(*this_cpu)];
+ 
+ 	BUG_ON(ac->avail > 0 || !l3);
+ 	spin_lock(&l3->list_lock);
+@@ -2992,7 +3146,7 @@ retry:
+ 
+ 		slabp = list_entry(entry, struct slab, list);
+ 		check_slabp(cachep, slabp);
+-		check_spinlock_acquired(cachep);
++		check_spinlock_acquired_node(cachep, cpu_to_node(*this_cpu));
+ 
+ 		/*
+ 		 * The slab was either on partial or free list so
+@@ -3006,8 +3160,9 @@ retry:
+ 			STATS_INC_ACTIVE(cachep);
+ 			STATS_SET_HIGH(cachep);
+ 
+-			ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
+-							    node);
++			ac->entry[ac->avail++] =
++				slab_get_obj(cachep, slabp,
++					     cpu_to_node(*this_cpu));
+ 		}
+ 		check_slabp(cachep, slabp);
+ 
+@@ -3026,10 +3181,10 @@ alloc_done:
+ 
+ 	if (unlikely(!ac->avail)) {
+ 		int x;
+-		x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
++		x = cache_grow(cachep, flags | GFP_THISNODE, cpu_to_node(*this_cpu), NULL, this_cpu);
+ 
+ 		/* cache_grow can reenable interrupts, then ac could change. */
+-		ac = cpu_cache_get(cachep);
++		ac = cpu_cache_get(cachep, *this_cpu);
+ 		if (!x && ac->avail == 0)	/* no objects in sight? abort */
+ 			return NULL;
+ 
+@@ -3116,21 +3271,22 @@ static bool slab_should_failslab(struct 
+ 	return should_failslab(obj_size(cachep), flags);
+ }
+ 
+-static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
++static inline void *
++____cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu)
+ {
+ 	void *objp;
+ 	struct array_cache *ac;
+ 
+ 	check_irq_off();
+ 
+-	ac = cpu_cache_get(cachep);
++	ac = cpu_cache_get(cachep, *this_cpu);
+ 	if (likely(ac->avail)) {
+ 		STATS_INC_ALLOCHIT(cachep);
+ 		ac->touched = 1;
+ 		objp = ac->entry[--ac->avail];
+ 	} else {
+ 		STATS_INC_ALLOCMISS(cachep);
+-		objp = cache_alloc_refill(cachep, flags);
++		objp = cache_alloc_refill(cachep, flags, this_cpu);
+ 	}
+ 	return objp;
+ }
+@@ -3142,7 +3298,8 @@ static inline void *____cache_alloc(stru
+  * If we are in_interrupt, then process context, including cpusets and
+  * mempolicy, may not apply and should not be used for allocation policy.
+  */
+-static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
++static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags,
++				int *this_cpu)
+ {
+ 	int nid_alloc, nid_here;
+ 
+@@ -3154,7 +3311,7 @@ static void *alternate_node_alloc(struct
+ 	else if (current->mempolicy)
+ 		nid_alloc = slab_node(current->mempolicy);
+ 	if (nid_alloc != nid_here)
+-		return ____cache_alloc_node(cachep, flags, nid_alloc);
++		return ____cache_alloc_node(cachep, flags, nid_alloc, this_cpu);
+ 	return NULL;
+ }
+ 
+@@ -3166,7 +3323,7 @@ static void *alternate_node_alloc(struct
+  * allocator to do its reclaim / fallback magic. We then insert the
+  * slab into the proper nodelist and then allocate from it.
+  */
+-static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
++static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu)
+ {
+ 	struct zonelist *zonelist;
+ 	gfp_t local_flags;
+@@ -3194,7 +3351,8 @@ retry:
+ 			cache->nodelists[nid] &&
+ 			cache->nodelists[nid]->free_objects) {
+ 				obj = ____cache_alloc_node(cache,
+-					flags | GFP_THISNODE, nid);
++					flags | GFP_THISNODE, nid,
++					this_cpu);
+ 				if (obj)
+ 					break;
+ 		}
+@@ -3207,20 +3365,21 @@ retry:
+ 		 * We may trigger various forms of reclaim on the allowed
+ 		 * set and go into memory reserves if necessary.
+ 		 */
+-		if (local_flags & __GFP_WAIT)
+-			local_irq_enable();
++		slab_irq_enable_GFP_WAIT(local_flags, this_cpu);
++
+ 		kmem_flagcheck(cache, flags);
+ 		obj = kmem_getpages(cache, local_flags, -1);
+-		if (local_flags & __GFP_WAIT)
+-			local_irq_disable();
++
++		slab_irq_disable_GFP_WAIT(local_flags, this_cpu);
++
+ 		if (obj) {
+ 			/*
+ 			 * Insert into the appropriate per node queues
+ 			 */
+ 			nid = page_to_nid(virt_to_page(obj));
+-			if (cache_grow(cache, flags, nid, obj)) {
++			if (cache_grow(cache, flags, nid, obj, this_cpu)) {
+ 				obj = ____cache_alloc_node(cache,
+-					flags | GFP_THISNODE, nid);
++					flags | GFP_THISNODE, nid, this_cpu);
+ 				if (!obj)
+ 					/*
+ 					 * Another processor may allocate the
+@@ -3241,7 +3400,7 @@ retry:
+  * A interface to enable slab creation on nodeid
+  */
+ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
+-				int nodeid)
++				int nodeid, int *this_cpu)
+ {
+ 	struct list_head *entry;
+ 	struct slab *slabp;
+@@ -3289,11 +3448,11 @@ retry:
+ 
+ must_grow:
+ 	spin_unlock(&l3->list_lock);
+-	x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
++	x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL, this_cpu);
+ 	if (x)
+ 		goto retry;
+ 
+-	return fallback_alloc(cachep, flags);
++	return fallback_alloc(cachep, flags, this_cpu);
+ 
+ done:
+ 	return obj;
+@@ -3316,40 +3475,47 @@ __cache_alloc_node(struct kmem_cache *ca
+ 		   void *caller)
+ {
+ 	unsigned long save_flags;
++	int this_cpu;
+ 	void *ptr;
+ 
++	lockdep_trace_alloc(flags);
++
+ 	if (slab_should_failslab(cachep, flags))
+ 		return NULL;
+ 
+ 	cache_alloc_debugcheck_before(cachep, flags);
+-	local_irq_save(save_flags);
++
++	slab_irq_save(save_flags, this_cpu);
+ 
+ 	if (unlikely(nodeid == -1))
+-		nodeid = numa_node_id();
++		nodeid = cpu_to_node(this_cpu);
+ 
+ 	if (unlikely(!cachep->nodelists[nodeid])) {
+ 		/* Node not bootstrapped yet */
+-		ptr = fallback_alloc(cachep, flags);
++		ptr = fallback_alloc(cachep, flags, &this_cpu);
+ 		goto out;
+ 	}
+ 
+-	if (nodeid == numa_node_id()) {
++	if (nodeid == cpu_to_node(this_cpu)) {
+ 		/*
+ 		 * Use the locally cached objects if possible.
+ 		 * However ____cache_alloc does not allow fallback
+ 		 * to other nodes. It may fail while we still have
+ 		 * objects on other nodes available.
+ 		 */
+-		ptr = ____cache_alloc(cachep, flags);
++		ptr = ____cache_alloc(cachep, flags, &this_cpu);
+ 		if (ptr)
+ 			goto out;
+ 	}
+ 	/* ___cache_alloc_node can fall back to other nodes */
+-	ptr = ____cache_alloc_node(cachep, flags, nodeid);
++	ptr = ____cache_alloc_node(cachep, flags, nodeid, &this_cpu);
+   out:
+-	local_irq_restore(save_flags);
++	slab_irq_restore(save_flags, this_cpu);
+ 	ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
+ 
++	if (likely(ptr))
++		kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
++
+ 	if (unlikely((flags & __GFP_ZERO) && ptr))
+ 		memset(ptr, 0, obj_size(cachep));
+ 
+@@ -3357,33 +3523,33 @@ __cache_alloc_node(struct kmem_cache *ca
+ }
+ 
+ static __always_inline void *
+-__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
++__do_cache_alloc(struct kmem_cache *cache, gfp_t flags, int *this_cpu)
+ {
+ 	void *objp;
+ 
+ 	if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
+-		objp = alternate_node_alloc(cache, flags);
++		objp = alternate_node_alloc(cache, flags, this_cpu);
+ 		if (objp)
+ 			goto out;
+ 	}
+-	objp = ____cache_alloc(cache, flags);
+ 
++	objp = ____cache_alloc(cache, flags, this_cpu);
+ 	/*
+ 	 * We may just have run out of memory on the local node.
+ 	 * ____cache_alloc_node() knows how to locate memory on other nodes
+ 	 */
+- 	if (!objp)
+- 		objp = ____cache_alloc_node(cache, flags, numa_node_id());
+-
++	if (!objp)
++		objp = ____cache_alloc_node(cache, flags,
++					    cpu_to_node(*this_cpu), this_cpu);
+   out:
+ 	return objp;
+ }
+ #else
+ 
+ static __always_inline void *
+-__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
++__do_cache_alloc(struct kmem_cache *cachep, gfp_t flags, int *this_cpu)
+ {
+-	return ____cache_alloc(cachep, flags);
++	return ____cache_alloc(cachep, flags, this_cpu);
+ }
+ 
+ #endif /* CONFIG_NUMA */
+@@ -3392,18 +3558,24 @@ static __always_inline void *
+ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
+ {
+ 	unsigned long save_flags;
++	int this_cpu;
+ 	void *objp;
+ 
++	lockdep_trace_alloc(flags);
++
+ 	if (slab_should_failslab(cachep, flags))
+ 		return NULL;
+ 
+ 	cache_alloc_debugcheck_before(cachep, flags);
+-	local_irq_save(save_flags);
+-	objp = __do_cache_alloc(cachep, flags);
+-	local_irq_restore(save_flags);
++	slab_irq_save(save_flags, this_cpu);
++	objp = __do_cache_alloc(cachep, flags, &this_cpu);
++	slab_irq_restore(save_flags, this_cpu);
+ 	objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
+ 	prefetchw(objp);
+ 
++	if (likely(objp))
++		kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
++
+ 	if (unlikely((flags & __GFP_ZERO) && objp))
+ 		memset(objp, 0, obj_size(cachep));
+ 
+@@ -3414,7 +3586,7 @@ __cache_alloc(struct kmem_cache *cachep,
+  * Caller needs to acquire correct kmem_list's list_lock
+  */
+ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
+-		       int node)
++		       int node, int *this_cpu)
+ {
+ 	int i;
+ 	struct kmem_list3 *l3;
+@@ -3443,7 +3615,7 @@ static void free_block(struct kmem_cache
+ 				 * a different cache, refer to comments before
+ 				 * alloc_slabmgmt.
+ 				 */
+-				slab_destroy(cachep, slabp);
++				slab_destroy(cachep, slabp, this_cpu);
+ 			} else {
+ 				list_add(&slabp->list, &l3->slabs_free);
+ 			}
+@@ -3457,11 +3629,12 @@ static void free_block(struct kmem_cache
+ 	}
+ }
+ 
+-static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
++static void
++cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac, int *this_cpu)
+ {
+ 	int batchcount;
+ 	struct kmem_list3 *l3;
+-	int node = numa_node_id();
++	int node = cpu_to_node(*this_cpu);
+ 
+ 	batchcount = ac->batchcount;
+ #if DEBUG
+@@ -3483,7 +3656,7 @@ static void cache_flusharray(struct kmem
+ 		}
+ 	}
+ 
+-	free_block(cachep, ac->entry, batchcount, node);
++	free_block(cachep, ac->entry, batchcount, node, this_cpu);
+ free_done:
+ #if STATS
+ 	{
+@@ -3512,13 +3685,15 @@ free_done:
+  * Release an obj back to its cache. If the obj has a constructed state, it must
+  * be in this state _before_ it is released.  Called with disabled ints.
+  */
+-static inline void __cache_free(struct kmem_cache *cachep, void *objp)
++static void __cache_free(struct kmem_cache *cachep, void *objp, int *this_cpu)
+ {
+-	struct array_cache *ac = cpu_cache_get(cachep);
++	struct array_cache *ac = cpu_cache_get(cachep, *this_cpu);
+ 
+ 	check_irq_off();
+ 	objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
+ 
++	kmemcheck_slab_free(cachep, objp, obj_size(cachep));
++
+ 	/*
+ 	 * Skip calling cache_free_alien() when the platform is not numa.
+ 	 * This will avoid cache misses that happen while accessing slabp (which
+@@ -3526,7 +3701,7 @@ static inline void __cache_free(struct k
+ 	 * variable to skip the call, which is mostly likely to be present in
+ 	 * the cache.
+ 	 */
+-	if (numa_platform && cache_free_alien(cachep, objp))
++	if (numa_platform && cache_free_alien(cachep, objp, this_cpu))
+ 		return;
+ 
+ 	if (likely(ac->avail < ac->limit)) {
+@@ -3535,7 +3710,7 @@ static inline void __cache_free(struct k
+ 		return;
+ 	} else {
+ 		STATS_INC_FREEMISS(cachep);
+-		cache_flusharray(cachep, ac);
++		cache_flusharray(cachep, ac, this_cpu);
+ 		ac->entry[ac->avail++] = objp;
+ 	}
+ }
+@@ -3550,10 +3725,23 @@ static inline void __cache_free(struct k
+  */
+ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+ {
+-	return __cache_alloc(cachep, flags, __builtin_return_address(0));
++	void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
++
++	trace_kmem_cache_alloc(_RET_IP_, ret,
++			       obj_size(cachep), cachep->buffer_size, flags);
++
++	return ret;
+ }
+ EXPORT_SYMBOL(kmem_cache_alloc);
+ 
++#ifdef CONFIG_KMEMTRACE
++void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
++{
++	return __cache_alloc(cachep, flags, __builtin_return_address(0));
++}
++EXPORT_SYMBOL(kmem_cache_alloc_notrace);
++#endif
++
+ /**
+  * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
+  * @cachep: the cache we're checking against
+@@ -3598,23 +3786,46 @@ out:
+ #ifdef CONFIG_NUMA
+ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+ {
+-	return __cache_alloc_node(cachep, flags, nodeid,
+-			__builtin_return_address(0));
++	void *ret = __cache_alloc_node(cachep, flags, nodeid,
++				       __builtin_return_address(0));
++
++	trace_kmem_cache_alloc_node(_RET_IP_, ret,
++				    obj_size(cachep), cachep->buffer_size,
++				    flags, nodeid);
++
++	return ret;
+ }
+ EXPORT_SYMBOL(kmem_cache_alloc_node);
+ 
++#ifdef CONFIG_KMEMTRACE
++void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
++				    gfp_t flags,
++				    int nodeid)
++{
++	return __cache_alloc_node(cachep, flags, nodeid,
++				  __builtin_return_address(0));
++}
++EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
++#endif
++
+ static __always_inline void *
+ __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
+ {
+ 	struct kmem_cache *cachep;
++	void *ret;
+ 
+ 	cachep = kmem_find_general_cachep(size, flags);
+ 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
+ 		return cachep;
+-	return kmem_cache_alloc_node(cachep, flags, node);
++	ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
++
++	trace_kmalloc_node((unsigned long) caller, ret,
++			   size, cachep->buffer_size, flags, node);
++
++	return ret;
+ }
+ 
+-#ifdef CONFIG_DEBUG_SLAB
++#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
+ void *__kmalloc_node(size_t size, gfp_t flags, int node)
+ {
+ 	return __do_kmalloc_node(size, flags, node,
+@@ -3647,6 +3858,7 @@ static __always_inline void *__do_kmallo
+ 					  void *caller)
+ {
+ 	struct kmem_cache *cachep;
++	void *ret;
+ 
+ 	/* If you want to save a few bytes .text space: replace
+ 	 * __ with kmem_.
+@@ -3656,11 +3868,16 @@ static __always_inline void *__do_kmallo
+ 	cachep = __find_general_cachep(size, flags);
+ 	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
+ 		return cachep;
+-	return __cache_alloc(cachep, flags, caller);
++	ret = __cache_alloc(cachep, flags, caller);
++
++	trace_kmalloc((unsigned long) caller, ret,
++		      size, cachep->buffer_size, flags);
++
++	return ret;
+ }
+ 
+ 
+-#ifdef CONFIG_DEBUG_SLAB
++#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE)
+ void *__kmalloc(size_t size, gfp_t flags)
+ {
+ 	return __do_kmalloc(size, flags, __builtin_return_address(0));
+@@ -3692,13 +3909,16 @@ EXPORT_SYMBOL(__kmalloc);
+ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
+ {
+ 	unsigned long flags;
++	int this_cpu;
+ 
+-	local_irq_save(flags);
++	slab_irq_save(flags, this_cpu);
+ 	debug_check_no_locks_freed(objp, obj_size(cachep));
+ 	if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
+ 		debug_check_no_obj_freed(objp, obj_size(cachep));
+-	__cache_free(cachep, objp);
+-	local_irq_restore(flags);
++	__cache_free(cachep, objp, &this_cpu);
++	slab_irq_restore(flags, this_cpu);
++
++	trace_kmem_cache_free(_RET_IP_, objp);
+ }
+ EXPORT_SYMBOL(kmem_cache_free);
+ 
+@@ -3715,16 +3935,19 @@ void kfree(const void *objp)
+ {
+ 	struct kmem_cache *c;
+ 	unsigned long flags;
++	int this_cpu;
++
++	trace_kfree(_RET_IP_, objp);
+ 
+ 	if (unlikely(ZERO_OR_NULL_PTR(objp)))
+ 		return;
+-	local_irq_save(flags);
++	slab_irq_save(flags, this_cpu);
+ 	kfree_debugcheck(objp);
+ 	c = virt_to_cache(objp);
+ 	debug_check_no_locks_freed(objp, obj_size(c));
+ 	debug_check_no_obj_freed(objp, obj_size(c));
+-	__cache_free(c, (void *)objp);
+-	local_irq_restore(flags);
++	__cache_free(c, (void *)objp, &this_cpu);
++	slab_irq_restore(flags, this_cpu);
+ }
+ EXPORT_SYMBOL(kfree);
+ 
+@@ -3745,7 +3968,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name);
+  */
+ static int alloc_kmemlist(struct kmem_cache *cachep)
+ {
+-	int node;
++	int node, this_cpu;
+ 	struct kmem_list3 *l3;
+ 	struct array_cache *new_shared;
+ 	struct array_cache **new_alien = NULL;
+@@ -3773,11 +3996,11 @@ static int alloc_kmemlist(struct kmem_ca
+ 		if (l3) {
+ 			struct array_cache *shared = l3->shared;
+ 
+-			spin_lock_irq(&l3->list_lock);
++			slab_spin_lock_irq(&l3->list_lock, this_cpu);
+ 
+ 			if (shared)
+ 				free_block(cachep, shared->entry,
+-						shared->avail, node);
++					   shared->avail, node, &this_cpu);
+ 
+ 			l3->shared = new_shared;
+ 			if (!l3->alien) {
+@@ -3786,7 +4009,7 @@ static int alloc_kmemlist(struct kmem_ca
+ 			}
+ 			l3->free_limit = (1 + nr_cpus_node(node)) *
+ 					cachep->batchcount + cachep->num;
+-			spin_unlock_irq(&l3->list_lock);
++			slab_spin_unlock_irq(&l3->list_lock, this_cpu);
+ 			kfree(shared);
+ 			free_alien_cache(new_alien);
+ 			continue;
+@@ -3833,42 +4056,50 @@ struct ccupdate_struct {
+ 	struct array_cache *new[NR_CPUS];
+ };
+ 
+-static void do_ccupdate_local(void *info)
++static void __do_ccupdate_local(void *info, int this_cpu)
+ {
+ 	struct ccupdate_struct *new = info;
+ 	struct array_cache *old;
+ 
+ 	check_irq_off();
+-	old = cpu_cache_get(new->cachep);
++	old = cpu_cache_get(new->cachep, this_cpu);
++
++	new->cachep->array[this_cpu] = new->new[this_cpu];
++	new->new[this_cpu] = old;
++}
+ 
+-	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
+-	new->new[smp_processor_id()] = old;
++#ifdef CONFIG_PREEMPT_RT
++static void do_ccupdate_local(void *arg, int this_cpu)
++{
++	__do_ccupdate_local(arg, this_cpu);
+ }
++#else
++static void do_ccupdate_local(void *arg)
++{
++	__do_ccupdate_local(arg, smp_processor_id());
++}
++#endif
+ 
+ /* Always called with the cache_chain_mutex held */
+ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+ 				int batchcount, int shared)
+ {
+-	struct ccupdate_struct *new;
+-	int i;
+-
+-	new = kzalloc(sizeof(*new), GFP_KERNEL);
+-	if (!new)
+-		return -ENOMEM;
++	struct ccupdate_struct new;
++	int i, this_cpu;
+ 
++	memset(&new.new, 0, sizeof(new.new));
+ 	for_each_online_cpu(i) {
+-		new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
++		new.new[i] = alloc_arraycache(cpu_to_node(i), limit,
+ 						batchcount);
+-		if (!new->new[i]) {
++		if (!new.new[i]) {
+ 			for (i--; i >= 0; i--)
+-				kfree(new->new[i]);
+-			kfree(new);
++				kfree(new.new[i]);
+ 			return -ENOMEM;
+ 		}
+ 	}
+-	new->cachep = cachep;
++	new.cachep = cachep;
+ 
+-	on_each_cpu(do_ccupdate_local, (void *)new, 1);
++	slab_on_each_cpu(do_ccupdate_local, (void *)&new);
+ 
+ 	check_irq_on();
+ 	cachep->batchcount = batchcount;
+@@ -3876,15 +4107,15 @@ static int do_tune_cpucache(struct kmem_
+ 	cachep->shared = shared;
+ 
+ 	for_each_online_cpu(i) {
+-		struct array_cache *ccold = new->new[i];
++		struct array_cache *ccold = new.new[i];
+ 		if (!ccold)
+ 			continue;
+-		spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
+-		free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i));
+-		spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock);
++		slab_spin_lock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock, this_cpu);
++		free_block(cachep, ccold->entry, ccold->avail, cpu_to_node(i), &this_cpu);
++		slab_spin_unlock_irq(&cachep->nodelists[cpu_to_node(i)]->list_lock, this_cpu);
+ 		kfree(ccold);
+ 	}
+-	kfree(new);
++
+ 	return alloc_kmemlist(cachep);
+ }
+ 
+@@ -3946,29 +4177,31 @@ static int enable_cpucache(struct kmem_c
+  * Drain an array if it contains any elements taking the l3 lock only if
+  * necessary. Note that the l3 listlock also protects the array_cache
+  * if drain_array() is used on the shared array.
++ * returns non-zero if some work is done
+  */
+-void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+-			 struct array_cache *ac, int force, int node)
++int drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
++		 struct array_cache *ac, int force, int node)
+ {
+-	int tofree;
++	int tofree, this_cpu;
+ 
+ 	if (!ac || !ac->avail)
+-		return;
++		return 0;
+ 	if (ac->touched && !force) {
+ 		ac->touched = 0;
+ 	} else {
+-		spin_lock_irq(&l3->list_lock);
++		slab_spin_lock_irq(&l3->list_lock, this_cpu);
+ 		if (ac->avail) {
+ 			tofree = force ? ac->avail : (ac->limit + 4) / 5;
+ 			if (tofree > ac->avail)
+ 				tofree = (ac->avail + 1) / 2;
+-			free_block(cachep, ac->entry, tofree, node);
++			free_block(cachep, ac->entry, tofree, node, &this_cpu);
+ 			ac->avail -= tofree;
+ 			memmove(ac->entry, &(ac->entry[tofree]),
+ 				sizeof(void *) * ac->avail);
+ 		}
+-		spin_unlock_irq(&l3->list_lock);
++		slab_spin_unlock_irq(&l3->list_lock, this_cpu);
+ 	}
++	return 1;
+ }
+ 
+ /**
+@@ -3985,11 +4218,12 @@ void drain_array(struct kmem_cache *cach
+  */
+ static void cache_reap(struct work_struct *w)
+ {
++	int this_cpu = raw_smp_processor_id(), node = cpu_to_node(this_cpu);
+ 	struct kmem_cache *searchp;
+ 	struct kmem_list3 *l3;
+-	int node = numa_node_id();
+ 	struct delayed_work *work =
+ 		container_of(w, struct delayed_work, work);
++	int work_done = 0;
+ 
+ 	if (!mutex_trylock(&cache_chain_mutex))
+ 		/* Give up. Setup the next iteration. */
+@@ -4005,9 +4239,12 @@ static void cache_reap(struct work_struc
+ 		 */
+ 		l3 = searchp->nodelists[node];
+ 
+-		reap_alien(searchp, l3);
++		work_done += reap_alien(searchp, l3, &this_cpu);
+ 
+-		drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
++		node = cpu_to_node(this_cpu);
++
++		work_done += drain_array(searchp, l3,
++			    cpu_cache_get(searchp, this_cpu), 0, node);
+ 
+ 		/*
+ 		 * These are racy checks but it does not matter
+@@ -4018,7 +4255,7 @@ static void cache_reap(struct work_struc
+ 
+ 		l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
+ 
+-		drain_array(searchp, l3, l3->shared, 0, node);
++		work_done += drain_array(searchp, l3, l3->shared, 0, node);
+ 
+ 		if (l3->free_touched)
+ 			l3->free_touched = 0;
+@@ -4037,7 +4274,8 @@ next:
+ 	next_reap_node();
+ out:
+ 	/* Set up the next iteration */
+-	schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
++	schedule_delayed_work(work,
++		round_jiffies_relative((1+!work_done) * REAPTIMEOUT_CPUC));
+ }
+ 
+ #ifdef CONFIG_SLABINFO
+@@ -4096,7 +4334,7 @@ static int s_show(struct seq_file *m, vo
+ 	unsigned long num_slabs, free_objects = 0, shared_avail = 0;
+ 	const char *name;
+ 	char *error = NULL;
+-	int node;
++	int this_cpu, node;
+ 	struct kmem_list3 *l3;
+ 
+ 	active_objs = 0;
+@@ -4107,7 +4345,7 @@ static int s_show(struct seq_file *m, vo
+ 			continue;
+ 
+ 		check_irq_on();
+-		spin_lock_irq(&l3->list_lock);
++		slab_spin_lock_irq(&l3->list_lock, this_cpu);
+ 
+ 		list_for_each_entry(slabp, &l3->slabs_full, list) {
+ 			if (slabp->inuse != cachep->num && !error)
+@@ -4132,7 +4370,7 @@ static int s_show(struct seq_file *m, vo
+ 		if (l3->shared)
+ 			shared_avail += l3->shared->avail;
+ 
+-		spin_unlock_irq(&l3->list_lock);
++		slab_spin_unlock_irq(&l3->list_lock, this_cpu);
+ 	}
+ 	num_slabs += active_slabs;
+ 	num_objs = num_slabs * cachep->num;
+@@ -4341,7 +4579,7 @@ static int leaks_show(struct seq_file *m
+ 	struct kmem_list3 *l3;
+ 	const char *name;
+ 	unsigned long *n = m->private;
+-	int node;
++	int node, this_cpu;
+ 	int i;
+ 
+ 	if (!(cachep->flags & SLAB_STORE_USER))
+@@ -4359,13 +4597,13 @@ static int leaks_show(struct seq_file *m
+ 			continue;
+ 
+ 		check_irq_on();
+-		spin_lock_irq(&l3->list_lock);
++		slab_spin_lock_irq(&l3->list_lock, this_cpu);
+ 
+ 		list_for_each_entry(slabp, &l3->slabs_full, list)
+ 			handle_slab(n, cachep, slabp);
+ 		list_for_each_entry(slabp, &l3->slabs_partial, list)
+ 			handle_slab(n, cachep, slabp);
+-		spin_unlock_irq(&l3->list_lock);
++		slab_spin_unlock_irq(&l3->list_lock, this_cpu);
+ 	}
+ 	name = cachep->name;
+ 	if (n[0] == n[1]) {
+Index: linux-2.6-tip/mm/slob.c
+===================================================================
+--- linux-2.6-tip.orig/mm/slob.c
++++ linux-2.6-tip/mm/slob.c
+@@ -65,6 +65,7 @@
+ #include <linux/module.h>
+ #include <linux/rcupdate.h>
+ #include <linux/list.h>
++#include <trace/kmemtrace.h>
+ #include <asm/atomic.h>
+ 
+ /*
+@@ -463,27 +464,38 @@ void *__kmalloc_node(size_t size, gfp_t 
+ {
+ 	unsigned int *m;
+ 	int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
++	void *ret;
++
++	lockdep_trace_alloc(gfp);
+ 
+ 	if (size < PAGE_SIZE - align) {
+ 		if (!size)
+ 			return ZERO_SIZE_PTR;
+ 
+ 		m = slob_alloc(size + align, gfp, align, node);
++
+ 		if (!m)
+ 			return NULL;
+ 		*m = size;
+-		return (void *)m + align;
++		ret = (void *)m + align;
++
++		trace_kmalloc_node(_RET_IP_, ret,
++				   size, size + align, gfp, node);
+ 	} else {
+-		void *ret;
++		unsigned int order = get_order(size);
+ 
+-		ret = slob_new_page(gfp | __GFP_COMP, get_order(size), node);
++		ret = slob_new_page(gfp | __GFP_COMP, order, node);
+ 		if (ret) {
+ 			struct page *page;
+ 			page = virt_to_page(ret);
+ 			page->private = size;
+ 		}
+-		return ret;
++
++		trace_kmalloc_node(_RET_IP_, ret,
++				   size, PAGE_SIZE << order, gfp, node);
+ 	}
++
++	return ret;
+ }
+ EXPORT_SYMBOL(__kmalloc_node);
+ 
+@@ -491,6 +503,8 @@ void kfree(const void *block)
+ {
+ 	struct slob_page *sp;
+ 
++	trace_kfree(_RET_IP_, block);
++
+ 	if (unlikely(ZERO_OR_NULL_PTR(block)))
+ 		return;
+ 
+@@ -570,10 +584,17 @@ void *kmem_cache_alloc_node(struct kmem_
+ {
+ 	void *b;
+ 
+-	if (c->size < PAGE_SIZE)
++	if (c->size < PAGE_SIZE) {
+ 		b = slob_alloc(c->size, flags, c->align, node);
+-	else
++		trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
++					    SLOB_UNITS(c->size) * SLOB_UNIT,
++					    flags, node);
++	} else {
+ 		b = slob_new_page(flags, get_order(c->size), node);
++		trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
++					    PAGE_SIZE << get_order(c->size),
++					    flags, node);
++	}
+ 
+ 	if (c->ctor)
+ 		c->ctor(b);
+@@ -609,6 +630,8 @@ void kmem_cache_free(struct kmem_cache *
+ 	} else {
+ 		__kmem_cache_free(b, c->size);
+ 	}
++
++	trace_kmem_cache_free(_RET_IP_, b);
+ }
+ EXPORT_SYMBOL(kmem_cache_free);
+ 
+Index: linux-2.6-tip/mm/slub.c
+===================================================================
+--- linux-2.6-tip.orig/mm/slub.c
++++ linux-2.6-tip/mm/slub.c
+@@ -17,6 +17,7 @@
+ #include <linux/slab.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
++#include <trace/kmemtrace.h>
+ #include <linux/cpu.h>
+ #include <linux/cpuset.h>
+ #include <linux/mempolicy.h>
+@@ -25,6 +26,7 @@
+ #include <linux/kallsyms.h>
+ #include <linux/memory.h>
+ #include <linux/math64.h>
++#include <linux/kmemcheck.h>
+ #include <linux/fault-inject.h>
+ 
+ /*
+@@ -145,7 +147,7 @@
+ 		SLAB_TRACE | SLAB_DESTROY_BY_RCU)
+ 
+ #define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
+-		SLAB_CACHE_DMA)
++		SLAB_CACHE_DMA | SLAB_NOTRACK)
+ 
+ #ifndef ARCH_KMALLOC_MINALIGN
+ #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
+@@ -1069,6 +1071,8 @@ static inline struct page *alloc_slab_pa
+ {
+ 	int order = oo_order(oo);
+ 
++	flags |= __GFP_NOTRACK;
++
+ 	if (node == -1)
+ 		return alloc_pages(flags, order);
+ 	else
+@@ -1096,6 +1100,24 @@ static struct page *allocate_slab(struct
+ 
+ 		stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);
+ 	}
++
++	if (kmemcheck_enabled
++		&& !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS)))
++	{
++		int pages = 1 << oo_order(oo);
++
++		kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
++
++		/*
++		 * Objects from caches that have a constructor don't get
++		 * cleared when they're allocated, so we need to do it here.
++		 */
++		if (s->ctor)
++			kmemcheck_mark_uninitialized_pages(page, pages);
++		else
++			kmemcheck_mark_unallocated_pages(page, pages);
++	}
++
+ 	page->objects = oo_objects(oo);
+ 	mod_zone_page_state(page_zone(page),
+ 		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
+@@ -1169,6 +1191,8 @@ static void __free_slab(struct kmem_cach
+ 		__ClearPageSlubDebug(page);
+ 	}
+ 
++	kmemcheck_free_shadow(page, compound_order(page));
++
+ 	mod_zone_page_state(page_zone(page),
+ 		(s->flags & SLAB_RECLAIM_ACCOUNT) ?
+ 		NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
+@@ -1599,6 +1623,7 @@ static __always_inline void *slab_alloc(
+ 	unsigned long flags;
+ 	unsigned int objsize;
+ 
++	lockdep_trace_alloc(gfpflags);
+ 	might_sleep_if(gfpflags & __GFP_WAIT);
+ 
+ 	if (should_failslab(s->objsize, gfpflags))
+@@ -1621,23 +1646,51 @@ static __always_inline void *slab_alloc(
+ 	if (unlikely((gfpflags & __GFP_ZERO) && object))
+ 		memset(object, 0, objsize);
+ 
++	kmemcheck_slab_alloc(s, gfpflags, object, c->objsize);
+ 	return object;
+ }
+ 
+ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
+ {
+-	return slab_alloc(s, gfpflags, -1, _RET_IP_);
++	void *ret = slab_alloc(s, gfpflags, -1, _RET_IP_);
++
++	trace_kmem_cache_alloc(_RET_IP_, ret, s->objsize, s->size, gfpflags);
++
++	return ret;
+ }
+ EXPORT_SYMBOL(kmem_cache_alloc);
+ 
++#ifdef CONFIG_KMEMTRACE
++void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
++{
++	return slab_alloc(s, gfpflags, -1, _RET_IP_);
++}
++EXPORT_SYMBOL(kmem_cache_alloc_notrace);
++#endif
++
+ #ifdef CONFIG_NUMA
+ void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
+ {
+-	return slab_alloc(s, gfpflags, node, _RET_IP_);
++	void *ret = slab_alloc(s, gfpflags, node, _RET_IP_);
++
++	trace_kmem_cache_alloc_node(_RET_IP_, ret,
++				    s->objsize, s->size, gfpflags, node);
++
++	return ret;
+ }
+ EXPORT_SYMBOL(kmem_cache_alloc_node);
+ #endif
+ 
++#ifdef CONFIG_KMEMTRACE
++void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
++				    gfp_t gfpflags,
++				    int node)
++{
++	return slab_alloc(s, gfpflags, node, _RET_IP_);
++}
++EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
++#endif
++
+ /*
+  * Slow patch handling. This may still be called frequently since objects
+  * have a longer lifetime than the cpu slabs in most processing loads.
+@@ -1725,6 +1778,7 @@ static __always_inline void slab_free(st
+ 
+ 	local_irq_save(flags);
+ 	c = get_cpu_slab(s, smp_processor_id());
++	kmemcheck_slab_free(s, object, c->objsize);
+ 	debug_check_no_locks_freed(object, c->objsize);
+ 	if (!(s->flags & SLAB_DEBUG_OBJECTS))
+ 		debug_check_no_obj_freed(object, s->objsize);
+@@ -1745,6 +1799,8 @@ void kmem_cache_free(struct kmem_cache *
+ 	page = virt_to_head_page(x);
+ 
+ 	slab_free(s, page, x, _RET_IP_);
++
++	trace_kmem_cache_free(_RET_IP_, x);
+ }
+ EXPORT_SYMBOL(kmem_cache_free);
+ 
+@@ -2478,7 +2534,7 @@ EXPORT_SYMBOL(kmem_cache_destroy);
+  *		Kmalloc subsystem
+  *******************************************************************/
+ 
+-struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
++struct kmem_cache kmalloc_caches[SLUB_PAGE_SHIFT] __cacheline_aligned;
+ EXPORT_SYMBOL(kmalloc_caches);
+ 
+ static int __init setup_slub_min_order(char *str)
+@@ -2540,7 +2596,7 @@ panic:
+ }
+ 
+ #ifdef CONFIG_ZONE_DMA
+-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
++static struct kmem_cache *kmalloc_caches_dma[SLUB_PAGE_SHIFT];
+ 
+ static void sysfs_add_func(struct work_struct *w)
+ {
+@@ -2586,7 +2642,8 @@ static noinline struct kmem_cache *dma_k
+ 
+ 	if (!s || !text || !kmem_cache_open(s, flags, text,
+ 			realsize, ARCH_KMALLOC_MINALIGN,
+-			SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) {
++			SLAB_CACHE_DMA|SLAB_NOTRACK|__SYSFS_ADD_DEFERRED,
++			NULL)) {
+ 		kfree(s);
+ 		kfree(text);
+ 		goto unlock_out;
+@@ -2660,8 +2717,9 @@ static struct kmem_cache *get_slab(size_
+ void *__kmalloc(size_t size, gfp_t flags)
+ {
+ 	struct kmem_cache *s;
++	void *ret;
+ 
+-	if (unlikely(size > PAGE_SIZE))
++	if (unlikely(size > SLUB_MAX_SIZE))
+ 		return kmalloc_large(size, flags);
+ 
+ 	s = get_slab(size, flags);
+@@ -2669,15 +2727,20 @@ void *__kmalloc(size_t size, gfp_t flags
+ 	if (unlikely(ZERO_OR_NULL_PTR(s)))
+ 		return s;
+ 
+-	return slab_alloc(s, flags, -1, _RET_IP_);
++	ret = slab_alloc(s, flags, -1, _RET_IP_);
++
++	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
++
++	return ret;
+ }
+ EXPORT_SYMBOL(__kmalloc);
+ 
+ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
+ {
+-	struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
+-						get_order(size));
++	struct page *page;
+ 
++	flags |= __GFP_COMP | __GFP_NOTRACK;
++	page = alloc_pages_node(node, flags, get_order(size));
+ 	if (page)
+ 		return page_address(page);
+ 	else
+@@ -2688,16 +2751,28 @@ static void *kmalloc_large_node(size_t s
+ void *__kmalloc_node(size_t size, gfp_t flags, int node)
+ {
+ 	struct kmem_cache *s;
++	void *ret;
++
++	if (unlikely(size > SLUB_MAX_SIZE)) {
++		ret = kmalloc_large_node(size, flags, node);
+ 
+-	if (unlikely(size > PAGE_SIZE))
+-		return kmalloc_large_node(size, flags, node);
++		trace_kmalloc_node(_RET_IP_, ret,
++				   size, PAGE_SIZE << get_order(size),
++				   flags, node);
++
++		return ret;
++	}
+ 
+ 	s = get_slab(size, flags);
+ 
+ 	if (unlikely(ZERO_OR_NULL_PTR(s)))
+ 		return s;
+ 
+-	return slab_alloc(s, flags, node, _RET_IP_);
++	ret = slab_alloc(s, flags, node, _RET_IP_);
++
++	trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
++
++	return ret;
+ }
+ EXPORT_SYMBOL(__kmalloc_node);
+ #endif
+@@ -2746,6 +2821,8 @@ void kfree(const void *x)
+ 	struct page *page;
+ 	void *object = (void *)x;
+ 
++	trace_kfree(_RET_IP_, x);
++
+ 	if (unlikely(ZERO_OR_NULL_PTR(x)))
+ 		return;
+ 
+@@ -2989,7 +3066,7 @@ void __init kmem_cache_init(void)
+ 		caches++;
+ 	}
+ 
+-	for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) {
++	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
+ 		create_kmalloc_cache(&kmalloc_caches[i],
+ 			"kmalloc", 1 << i, GFP_KERNEL);
+ 		caches++;
+@@ -3026,7 +3103,7 @@ void __init kmem_cache_init(void)
+ 	slab_state = UP;
+ 
+ 	/* Provide the correct kmalloc names now that the caches are up */
+-	for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++)
++	for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
+ 		kmalloc_caches[i]. name =
+ 			kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
+ 
+@@ -3225,8 +3302,9 @@ static struct notifier_block __cpuinitda
+ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
+ {
+ 	struct kmem_cache *s;
++	void *ret;
+ 
+-	if (unlikely(size > PAGE_SIZE))
++	if (unlikely(size > SLUB_MAX_SIZE))
+ 		return kmalloc_large(size, gfpflags);
+ 
+ 	s = get_slab(size, gfpflags);
+@@ -3234,15 +3312,21 @@ void *__kmalloc_track_caller(size_t size
+ 	if (unlikely(ZERO_OR_NULL_PTR(s)))
+ 		return s;
+ 
+-	return slab_alloc(s, gfpflags, -1, caller);
++	ret = slab_alloc(s, gfpflags, -1, caller);
++
++	/* Honor the call site pointer we recieved. */
++	trace_kmalloc(caller, ret, size, s->size, gfpflags);
++
++	return ret;
+ }
+ 
+ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
+ 					int node, unsigned long caller)
+ {
+ 	struct kmem_cache *s;
++	void *ret;
+ 
+-	if (unlikely(size > PAGE_SIZE))
++	if (unlikely(size > SLUB_MAX_SIZE))
+ 		return kmalloc_large_node(size, gfpflags, node);
+ 
+ 	s = get_slab(size, gfpflags);
+@@ -3250,7 +3334,12 @@ void *__kmalloc_node_track_caller(size_t
+ 	if (unlikely(ZERO_OR_NULL_PTR(s)))
+ 		return s;
+ 
+-	return slab_alloc(s, gfpflags, node, caller);
++	ret = slab_alloc(s, gfpflags, node, caller);
++
++	/* Honor the call site pointer we recieved. */
++	trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
++
++	return ret;
+ }
+ 
+ #ifdef CONFIG_SLUB_DEBUG
+@@ -4305,6 +4394,8 @@ static char *create_unique_id(struct kme
+ 		*p++ = 'a';
+ 	if (s->flags & SLAB_DEBUG_FREE)
+ 		*p++ = 'F';
++	if (!(s->flags & SLAB_NOTRACK))
++		*p++ = 't';
+ 	if (p != name + 1)
+ 		*p++ = '-';
+ 	p += sprintf(p, "%07d", s->size);
+Index: linux-2.6-tip/mm/swapfile.c
+===================================================================
+--- linux-2.6-tip.orig/mm/swapfile.c
++++ linux-2.6-tip/mm/swapfile.c
+@@ -585,13 +585,14 @@ int free_swap_and_cache(swp_entry_t entr
+ 	p = swap_info_get(entry);
+ 	if (p) {
+ 		if (swap_entry_free(p, entry) == 1) {
++			spin_unlock(&swap_lock);
+ 			page = find_get_page(&swapper_space, entry.val);
+ 			if (page && !trylock_page(page)) {
+ 				page_cache_release(page);
+ 				page = NULL;
+ 			}
+-		}
+-		spin_unlock(&swap_lock);
++		} else
++			spin_unlock(&swap_lock);
+ 	}
+ 	if (page) {
+ 		/*
+@@ -1649,7 +1650,7 @@ SYSCALL_DEFINE2(swapon, const char __use
+ 	union swap_header *swap_header = NULL;
+ 	unsigned int nr_good_pages = 0;
+ 	int nr_extents = 0;
+-	sector_t span;
++	sector_t uninitialized_var(span);
+ 	unsigned long maxpages = 1;
+ 	unsigned long swapfilepages;
+ 	unsigned short *swap_map = NULL;
+Index: linux-2.6-tip/mm/util.c
+===================================================================
+--- linux-2.6-tip.orig/mm/util.c
++++ linux-2.6-tip/mm/util.c
+@@ -4,6 +4,7 @@
+ #include <linux/module.h>
+ #include <linux/err.h>
+ #include <linux/sched.h>
++#include <linux/tracepoint.h>
+ #include <asm/uaccess.h>
+ 
+ /**
+@@ -206,3 +207,18 @@ int __attribute__((weak)) get_user_pages
+ 	return ret;
+ }
+ EXPORT_SYMBOL_GPL(get_user_pages_fast);
++
++/* Tracepoints definitions. */
++DEFINE_TRACE(kmalloc);
++DEFINE_TRACE(kmem_cache_alloc);
++DEFINE_TRACE(kmalloc_node);
++DEFINE_TRACE(kmem_cache_alloc_node);
++DEFINE_TRACE(kfree);
++DEFINE_TRACE(kmem_cache_free);
++
++EXPORT_TRACEPOINT_SYMBOL(kmalloc);
++EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
++EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
++EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
++EXPORT_TRACEPOINT_SYMBOL(kfree);
++EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
+Index: linux-2.6-tip/mm/vmalloc.c
+===================================================================
+--- linux-2.6-tip.orig/mm/vmalloc.c
++++ linux-2.6-tip/mm/vmalloc.c
+@@ -24,6 +24,7 @@
+ #include <linux/radix-tree.h>
+ #include <linux/rcupdate.h>
+ #include <linux/bootmem.h>
++#include <linux/pfn.h>
+ 
+ #include <asm/atomic.h>
+ #include <asm/uaccess.h>
+@@ -152,8 +153,8 @@ static int vmap_pud_range(pgd_t *pgd, un
+  *
+  * Ie. pte at addr+N*PAGE_SIZE shall point to pfn corresponding to pages[N]
+  */
+-static int vmap_page_range(unsigned long start, unsigned long end,
+-				pgprot_t prot, struct page **pages)
++static int vmap_page_range_noflush(unsigned long start, unsigned long end,
++				   pgprot_t prot, struct page **pages)
+ {
+ 	pgd_t *pgd;
+ 	unsigned long next;
+@@ -169,13 +170,22 @@ static int vmap_page_range(unsigned long
+ 		if (err)
+ 			break;
+ 	} while (pgd++, addr = next, addr != end);
+-	flush_cache_vmap(start, end);
+ 
+ 	if (unlikely(err))
+ 		return err;
+ 	return nr;
+ }
+ 
++static int vmap_page_range(unsigned long start, unsigned long end,
++			   pgprot_t prot, struct page **pages)
++{
++	int ret;
++
++	ret = vmap_page_range_noflush(start, end, prot, pages);
++	flush_cache_vmap(start, end);
++	return ret;
++}
++
+ static inline int is_vmalloc_or_module_addr(const void *x)
+ {
+ 	/*
+@@ -990,6 +1000,32 @@ void *vm_map_ram(struct page **pages, un
+ }
+ EXPORT_SYMBOL(vm_map_ram);
+ 
++/**
++ * vm_area_register_early - register vmap area early during boot
++ * @vm: vm_struct to register
++ * @align: requested alignment
++ *
++ * This function is used to register kernel vm area before
++ * vmalloc_init() is called.  @vm->size and @vm->flags should contain
++ * proper values on entry and other fields should be zero.  On return,
++ * vm->addr contains the allocated address.
++ *
++ * DO NOT USE THIS FUNCTION UNLESS YOU KNOW WHAT YOU'RE DOING.
++ */
++void __init vm_area_register_early(struct vm_struct *vm, size_t align)
++{
++	static size_t vm_init_off __initdata;
++	unsigned long addr;
++
++	addr = ALIGN(VMALLOC_START + vm_init_off, align);
++	vm_init_off = PFN_ALIGN(addr + vm->size) - VMALLOC_START;
++
++	vm->addr = (void *)addr;
++
++	vm->next = vmlist;
++	vmlist = vm;
++}
++
+ void __init vmalloc_init(void)
+ {
+ 	struct vmap_area *va;
+@@ -1017,6 +1053,58 @@ void __init vmalloc_init(void)
+ 	vmap_initialized = true;
+ }
+ 
++/**
++ * map_kernel_range_noflush - map kernel VM area with the specified pages
++ * @addr: start of the VM area to map
++ * @size: size of the VM area to map
++ * @prot: page protection flags to use
++ * @pages: pages to map
++ *
++ * Map PFN_UP(@size) pages at @addr.  The VM area @addr and @size
++ * specify should have been allocated using get_vm_area() and its
++ * friends.
++ *
++ * NOTE:
++ * This function does NOT do any cache flushing.  The caller is
++ * responsible for calling flush_cache_vmap() on to-be-mapped areas
++ * before calling this function.
++ *
++ * RETURNS:
++ * The number of pages mapped on success, -errno on failure.
++ */
++int map_kernel_range_noflush(unsigned long addr, unsigned long size,
++			     pgprot_t prot, struct page **pages)
++{
++	return vmap_page_range_noflush(addr, addr + size, prot, pages);
++}
++
++/**
++ * unmap_kernel_range_noflush - unmap kernel VM area
++ * @addr: start of the VM area to unmap
++ * @size: size of the VM area to unmap
++ *
++ * Unmap PFN_UP(@size) pages at @addr.  The VM area @addr and @size
++ * specify should have been allocated using get_vm_area() and its
++ * friends.
++ *
++ * NOTE:
++ * This function does NOT do any cache flushing.  The caller is
++ * responsible for calling flush_cache_vunmap() on to-be-mapped areas
++ * before calling this function and flush_tlb_kernel_range() after.
++ */
++void unmap_kernel_range_noflush(unsigned long addr, unsigned long size)
++{
++	vunmap_page_range(addr, addr + size);
++}
++
++/**
++ * unmap_kernel_range - unmap kernel VM area and flush cache and TLB
++ * @addr: start of the VM area to unmap
++ * @size: size of the VM area to unmap
++ *
++ * Similar to unmap_kernel_range_noflush() but flushes vcache before
++ * the unmapping and tlb after.
++ */
+ void unmap_kernel_range(unsigned long addr, unsigned long size)
+ {
+ 	unsigned long end = addr + size;
+@@ -1267,6 +1355,7 @@ EXPORT_SYMBOL(vfree);
+ void vunmap(const void *addr)
+ {
+ 	BUG_ON(in_interrupt());
++	might_sleep();
+ 	__vunmap(addr, 0);
+ }
+ EXPORT_SYMBOL(vunmap);
+@@ -1286,6 +1375,8 @@ void *vmap(struct page **pages, unsigned
+ {
+ 	struct vm_struct *area;
+ 
++	might_sleep();
++
+ 	if (count > num_physpages)
+ 		return NULL;
+ 
+Index: linux-2.6-tip/mm/vmscan.c
+===================================================================
+--- linux-2.6-tip.orig/mm/vmscan.c
++++ linux-2.6-tip/mm/vmscan.c
+@@ -23,6 +23,7 @@
+ #include <linux/file.h>
+ #include <linux/writeback.h>
+ #include <linux/blkdev.h>
++#include <linux/interrupt.h>
+ #include <linux/buffer_head.h>	/* for try_to_release_page(),
+ 					buffer_heads_over_limit */
+ #include <linux/mm_inline.h>
+@@ -1125,7 +1126,7 @@ static unsigned long shrink_inactive_lis
+ 		}
+ 
+ 		nr_reclaimed += nr_freed;
+-		local_irq_disable();
++		local_irq_disable_nort();
+ 		if (current_is_kswapd()) {
+ 			__count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
+ 			__count_vm_events(KSWAPD_STEAL, nr_freed);
+@@ -1166,9 +1167,14 @@ static unsigned long shrink_inactive_lis
+ 			}
+ 		}
+   	} while (nr_scanned < max_scan);
++	/*
++	 * Non-PREEMPT_RT relies on IRQs-off protecting the page_states
++	 * per-CPU data. PREEMPT_RT has that data protected even in
++	 * __mod_page_state(), so no need to keep IRQs disabled.
++	 */
+ 	spin_unlock(&zone->lru_lock);
+ done:
+-	local_irq_enable();
++	local_irq_enable_nort();
+ 	pagevec_release(&pvec);
+ 	return nr_reclaimed;
+ }
+@@ -1963,7 +1969,9 @@ static int kswapd(void *p)
+ 	struct reclaim_state reclaim_state = {
+ 		.reclaimed_slab = 0,
+ 	};
+-	node_to_cpumask_ptr(cpumask, pgdat->node_id);
++	const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
++
++	lockdep_set_current_reclaim_state(GFP_KERNEL);
+ 
+ 	if (!cpumask_empty(cpumask))
+ 		set_cpus_allowed_ptr(tsk, cpumask);
+@@ -2198,7 +2206,9 @@ static int __devinit cpu_callback(struct
+ 	if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
+ 		for_each_node_state(nid, N_HIGH_MEMORY) {
+ 			pg_data_t *pgdat = NODE_DATA(nid);
+-			node_to_cpumask_ptr(mask, pgdat->node_id);
++			const struct cpumask *mask;
++
++			mask = cpumask_of_node(pgdat->node_id);
+ 
+ 			if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
+ 				/* One of our CPUs online: restore mask */
+Index: linux-2.6-tip/net/9p/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/net/9p/Kconfig
++++ linux-2.6-tip/net/9p/Kconfig
+@@ -4,6 +4,8 @@
+ 
+ menuconfig NET_9P
+ 	depends on NET && EXPERIMENTAL
++	# build breakage
++	depends on 0
+ 	tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
+ 	help
+ 	  If you say Y here, you will get experimental support for
+Index: linux-2.6-tip/net/core/skbuff.c
+===================================================================
+--- linux-2.6-tip.orig/net/core/skbuff.c
++++ linux-2.6-tip/net/core/skbuff.c
+@@ -39,6 +39,7 @@
+ #include <linux/module.h>
+ #include <linux/types.h>
+ #include <linux/kernel.h>
++#include <linux/kmemcheck.h>
+ #include <linux/mm.h>
+ #include <linux/interrupt.h>
+ #include <linux/in.h>
+@@ -197,6 +198,8 @@ struct sk_buff *__alloc_skb(unsigned int
+ 	skb->data = data;
+ 	skb_reset_tail_pointer(skb);
+ 	skb->end = skb->tail + size;
++	kmemcheck_annotate_bitfield(skb->flags1);
++	kmemcheck_annotate_bitfield(skb->flags2);
+ 	/* make sure we initialize shinfo sequentially */
+ 	shinfo = skb_shinfo(skb);
+ 	atomic_set(&shinfo->dataref, 1);
+@@ -211,6 +214,8 @@ struct sk_buff *__alloc_skb(unsigned int
+ 		struct sk_buff *child = skb + 1;
+ 		atomic_t *fclone_ref = (atomic_t *) (child + 1);
+ 
++		kmemcheck_annotate_bitfield(child->flags1);
++		kmemcheck_annotate_bitfield(child->flags2);
+ 		skb->fclone = SKB_FCLONE_ORIG;
+ 		atomic_set(fclone_ref, 1);
+ 
+@@ -240,7 +245,7 @@ nodata:
+ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
+ 		unsigned int length, gfp_t gfp_mask)
+ {
+-	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
++	int node = dev_to_node(&dev->dev);
+ 	struct sk_buff *skb;
+ 
+ 	skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask, 0, node);
+@@ -378,7 +383,7 @@ static void skb_release_head_state(struc
+ 	secpath_put(skb->sp);
+ #endif
+ 	if (skb->destructor) {
+-		WARN_ON(in_irq());
++//		WARN_ON(in_irq());
+ 		skb->destructor(skb);
+ 	}
+ #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+@@ -600,6 +605,9 @@ struct sk_buff *skb_clone(struct sk_buff
+ 		n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ 		if (!n)
+ 			return NULL;
++
++		kmemcheck_annotate_bitfield(n->flags1);
++		kmemcheck_annotate_bitfield(n->flags2);
+ 		n->fclone = SKB_FCLONE_UNAVAILABLE;
+ 	}
+ 
+Index: linux-2.6-tip/net/core/sock.c
+===================================================================
+--- linux-2.6-tip.orig/net/core/sock.c
++++ linux-2.6-tip/net/core/sock.c
+@@ -894,6 +894,8 @@ static struct sock *sk_prot_alloc(struct
+ 		sk = kmalloc(prot->obj_size, priority);
+ 
+ 	if (sk != NULL) {
++		kmemcheck_annotate_bitfield(sk->flags);
++
+ 		if (security_sk_alloc(sk, family, priority))
+ 			goto out_free;
+ 
+@@ -1947,8 +1949,9 @@ static DECLARE_BITMAP(proto_inuse_idx, P
+ #ifdef CONFIG_NET_NS
+ void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
+ {
+-	int cpu = smp_processor_id();
++	int cpu = get_cpu();
+ 	per_cpu_ptr(net->core.inuse, cpu)->val[prot->inuse_idx] += val;
++	put_cpu();
+ }
+ EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
+ 
+@@ -1994,7 +1997,9 @@ static DEFINE_PER_CPU(struct prot_inuse,
+ 
+ void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
+ {
+-	__get_cpu_var(prot_inuse).val[prot->inuse_idx] += val;
++	int cpu = get_cpu();
++	per_cpu(prot_inuse, cpu).val[prot->inuse_idx] += val;
++	put_cpu();
+ }
+ EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
+ 
+Index: linux-2.6-tip/net/ipv4/af_inet.c
+===================================================================
+--- linux-2.6-tip.orig/net/ipv4/af_inet.c
++++ linux-2.6-tip/net/ipv4/af_inet.c
+@@ -1375,10 +1375,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field);
+ int snmp_mib_init(void *ptr[2], size_t mibsize)
+ {
+ 	BUG_ON(ptr == NULL);
+-	ptr[0] = __alloc_percpu(mibsize);
++	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+ 	if (!ptr[0])
+ 		goto err0;
+-	ptr[1] = __alloc_percpu(mibsize);
++	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+ 	if (!ptr[1])
+ 		goto err1;
+ 	return 0;
+Index: linux-2.6-tip/net/ipv4/inet_timewait_sock.c
+===================================================================
+--- linux-2.6-tip.orig/net/ipv4/inet_timewait_sock.c
++++ linux-2.6-tip/net/ipv4/inet_timewait_sock.c
+@@ -9,6 +9,7 @@
+  */
+ 
+ #include <linux/kernel.h>
++#include <linux/kmemcheck.h>
+ #include <net/inet_hashtables.h>
+ #include <net/inet_timewait_sock.h>
+ #include <net/ip.h>
+@@ -117,6 +118,8 @@ struct inet_timewait_sock *inet_twsk_all
+ 	if (tw != NULL) {
+ 		const struct inet_sock *inet = inet_sk(sk);
+ 
++		kmemcheck_annotate_bitfield(tw->flags);
++
+ 		/* Give us an identity. */
+ 		tw->tw_daddr	    = inet->daddr;
+ 		tw->tw_rcv_saddr    = inet->rcv_saddr;
+Index: linux-2.6-tip/net/ipv4/route.c
+===================================================================
+--- linux-2.6-tip.orig/net/ipv4/route.c
++++ linux-2.6-tip/net/ipv4/route.c
+@@ -204,13 +204,13 @@ struct rt_hash_bucket {
+ };
+ 
+ #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || \
+-	defined(CONFIG_PROVE_LOCKING)
++	defined(CONFIG_PROVE_LOCKING) || defined(CONFIG_PREEMPT_RT)
+ /*
+  * Instead of using one spinlock for each rt_hash_bucket, we use a table of spinlocks
+  * The size of this table is a power of two and depends on the number of CPUS.
+  * (on lockdep we have a quite big spinlock_t, so keep the size down there)
+  */
+-#ifdef CONFIG_LOCKDEP
++#if defined(CONFIG_LOCKDEP) || defined(CONFIG_PREEMPT_RT)
+ # define RT_HASH_LOCK_SZ	256
+ #else
+ # if NR_CPUS >= 32
+@@ -242,7 +242,7 @@ static __init void rt_hash_lock_init(voi
+ 		spin_lock_init(&rt_hash_locks[i]);
+ }
+ #else
+-# define rt_hash_lock_addr(slot) NULL
++# define rt_hash_lock_addr(slot) ((spinlock_t *)NULL)
+ 
+ static inline void rt_hash_lock_init(void)
+ {
+@@ -3356,7 +3356,7 @@ int __init ip_rt_init(void)
+ 	int rc = 0;
+ 
+ #ifdef CONFIG_NET_CLS_ROUTE
+-	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct));
++	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
+ 	if (!ip_rt_acct)
+ 		panic("IP: failed to allocate ip_rt_acct\n");
+ #endif
+Index: linux-2.6-tip/net/netfilter/ipvs/ip_vs_ctl.c
+===================================================================
+--- linux-2.6-tip.orig/net/netfilter/ipvs/ip_vs_ctl.c
++++ linux-2.6-tip/net/netfilter/ipvs/ip_vs_ctl.c
+@@ -2315,6 +2315,7 @@ __ip_vs_get_dest_entries(const struct ip
+ static inline void
+ __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+ {
++	memset(u, 0, sizeof(*u));
+ #ifdef CONFIG_IP_VS_PROTO_TCP
+ 	u->tcp_timeout =
+ 		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+Index: linux-2.6-tip/net/netfilter/nf_conntrack_ftp.c
+===================================================================
+--- linux-2.6-tip.orig/net/netfilter/nf_conntrack_ftp.c
++++ linux-2.6-tip/net/netfilter/nf_conntrack_ftp.c
+@@ -588,3 +588,4 @@ static int __init nf_conntrack_ftp_init(
+ 
+ module_init(nf_conntrack_ftp_init);
+ module_exit(nf_conntrack_ftp_fini);
++
+Index: linux-2.6-tip/net/netfilter/nf_conntrack_proto_sctp.c
+===================================================================
+--- linux-2.6-tip.orig/net/netfilter/nf_conntrack_proto_sctp.c
++++ linux-2.6-tip/net/netfilter/nf_conntrack_proto_sctp.c
+@@ -373,6 +373,9 @@ static int sctp_packet(struct nf_conn *c
+ 	}
+ 	write_unlock_bh(&sctp_lock);
+ 
++	if (new_state == SCTP_CONNTRACK_MAX)
++		goto out;
++
+ 	nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
+ 
+ 	if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
+Index: linux-2.6-tip/net/packet/af_packet.c
+===================================================================
+--- linux-2.6-tip.orig/net/packet/af_packet.c
++++ linux-2.6-tip/net/packet/af_packet.c
+@@ -711,7 +711,7 @@ static int tpacket_rcv(struct sk_buff *s
+ 		hdrlen = sizeof(*h.h2);
+ 		break;
+ 	default:
+-		BUG();
++		panic("AF_PACKET: bad tp->version");
+ 	}
+ 
+ 	sll = h.raw + TPACKET_ALIGN(hdrlen);
+Index: linux-2.6-tip/net/rfkill/rfkill.c
+===================================================================
+--- linux-2.6-tip.orig/net/rfkill/rfkill.c
++++ linux-2.6-tip/net/rfkill/rfkill.c
+@@ -387,6 +387,7 @@ static const char *rfkill_get_type_str(e
+ 		return "wwan";
+ 	default:
+ 		BUG();
++		return NULL;
+ 	}
+ }
+ 
+Index: linux-2.6-tip/net/sunrpc/svc.c
+===================================================================
+--- linux-2.6-tip.orig/net/sunrpc/svc.c
++++ linux-2.6-tip/net/sunrpc/svc.c
+@@ -317,8 +317,7 @@ svc_pool_map_set_cpumask(struct task_str
+ 	}
+ 	case SVC_POOL_PERNODE:
+ 	{
+-		node_to_cpumask_ptr(nodecpumask, node);
+-		set_cpus_allowed_ptr(task, nodecpumask);
++		set_cpus_allowed_ptr(task, cpumask_of_node(node));
+ 		break;
+ 	}
+ 	}
+Index: linux-2.6-tip/net/sunrpc/svcauth_unix.c
+===================================================================
+--- linux-2.6-tip.orig/net/sunrpc/svcauth_unix.c
++++ linux-2.6-tip/net/sunrpc/svcauth_unix.c
+@@ -682,7 +682,7 @@ svcauth_unix_set_client(struct svc_rqst 
+ 		sin6 = svc_addr_in6(rqstp);
+ 		break;
+ 	default:
+-		BUG();
++		panic("svcauth_unix_set_client: bad address family!");
+ 	}
+ 
+ 	rqstp->rq_client = NULL;
+@@ -863,3 +863,4 @@ struct auth_ops svcauth_unix = {
+ 	.set_client	= svcauth_unix_set_client,
+ };
+ 
++
+Index: linux-2.6-tip/samples/tracepoints/tp-samples-trace.h
+===================================================================
+--- linux-2.6-tip.orig/samples/tracepoints/tp-samples-trace.h
++++ linux-2.6-tip/samples/tracepoints/tp-samples-trace.h
+@@ -5,9 +5,9 @@
+ #include <linux/tracepoint.h>
+ 
+ DECLARE_TRACE(subsys_event,
+-	TPPROTO(struct inode *inode, struct file *file),
+-	TPARGS(inode, file));
++	TP_PROTO(struct inode *inode, struct file *file),
++	TP_ARGS(inode, file));
+ DECLARE_TRACE(subsys_eventb,
+-	TPPROTO(void),
+-	TPARGS());
++	TP_PROTO(void),
++	TP_ARGS());
+ #endif
+Index: linux-2.6-tip/samples/tracepoints/tracepoint-sample.c
+===================================================================
+--- linux-2.6-tip.orig/samples/tracepoints/tracepoint-sample.c
++++ linux-2.6-tip/samples/tracepoints/tracepoint-sample.c
+@@ -1,6 +1,6 @@
+ /* tracepoint-sample.c
+  *
+- * Executes a tracepoint when /proc/tracepoint-example is opened.
++ * Executes a tracepoint when /proc/tracepoint-sample is opened.
+  *
+  * (C) Copyright 2007 Mathieu Desnoyers <mathieu.desnoyers@polymtl.ca>
+  *
+@@ -16,7 +16,7 @@
+ DEFINE_TRACE(subsys_event);
+ DEFINE_TRACE(subsys_eventb);
+ 
+-struct proc_dir_entry *pentry_example;
++struct proc_dir_entry *pentry_sample;
+ 
+ static int my_open(struct inode *inode, struct file *file)
+ {
+@@ -32,25 +32,25 @@ static struct file_operations mark_ops =
+ 	.open = my_open,
+ };
+ 
+-static int __init example_init(void)
++static int __init sample_init(void)
+ {
+-	printk(KERN_ALERT "example init\n");
+-	pentry_example = proc_create("tracepoint-example", 0444, NULL,
++	printk(KERN_ALERT "sample init\n");
++	pentry_sample = proc_create("tracepoint-sample", 0444, NULL,
+ 		&mark_ops);
+-	if (!pentry_example)
++	if (!pentry_sample)
+ 		return -EPERM;
+ 	return 0;
+ }
+ 
+-static void __exit example_exit(void)
++static void __exit sample_exit(void)
+ {
+-	printk(KERN_ALERT "example exit\n");
+-	remove_proc_entry("tracepoint-example", NULL);
++	printk(KERN_ALERT "sample exit\n");
++	remove_proc_entry("tracepoint-sample", NULL);
+ }
+ 
+-module_init(example_init)
+-module_exit(example_exit)
++module_init(sample_init)
++module_exit(sample_exit)
+ 
+ MODULE_LICENSE("GPL");
+ MODULE_AUTHOR("Mathieu Desnoyers");
+-MODULE_DESCRIPTION("Tracepoint example");
++MODULE_DESCRIPTION("Tracepoint sample");
+Index: linux-2.6-tip/scripts/Makefile.build
+===================================================================
+--- linux-2.6-tip.orig/scripts/Makefile.build
++++ linux-2.6-tip/scripts/Makefile.build
+@@ -112,13 +112,13 @@ endif
+ # ---------------------------------------------------------------------------
+ 
+ # Default is built-in, unless we know otherwise
+-modkern_cflags := $(CFLAGS_KERNEL)
++modkern_cflags = $(if $(part-of-module), $(CFLAGS_MODULE), $(CFLAGS_KERNEL))
+ quiet_modtag := $(empty)   $(empty)
+ 
+-$(real-objs-m)        : modkern_cflags := $(CFLAGS_MODULE)
+-$(real-objs-m:.o=.i)  : modkern_cflags := $(CFLAGS_MODULE)
+-$(real-objs-m:.o=.s)  : modkern_cflags := $(CFLAGS_MODULE)
+-$(real-objs-m:.o=.lst): modkern_cflags := $(CFLAGS_MODULE)
++$(real-objs-m)        : part-of-module := y
++$(real-objs-m:.o=.i)  : part-of-module := y
++$(real-objs-m:.o=.s)  : part-of-module := y
++$(real-objs-m:.o=.lst): part-of-module := y
+ 
+ $(real-objs-m)        : quiet_modtag := [M]
+ $(real-objs-m:.o=.i)  : quiet_modtag := [M]
+@@ -205,7 +205,8 @@ endif
+ ifdef CONFIG_FTRACE_MCOUNT_RECORD
+ cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \
+ 	"$(if $(CONFIG_64BIT),64,32)" \
+-	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" "$(@)";
++	"$(OBJDUMP)" "$(OBJCOPY)" "$(CC)" "$(LD)" "$(NM)" "$(RM)" "$(MV)" \
++	"$(if $(part-of-module),1,0)" "$(@)";
+ endif
+ 
+ define rule_cc_o_c
+Index: linux-2.6-tip/scripts/Makefile.lib
+===================================================================
+--- linux-2.6-tip.orig/scripts/Makefile.lib
++++ linux-2.6-tip/scripts/Makefile.lib
+@@ -186,3 +186,17 @@ quiet_cmd_gzip = GZIP    $@
+ cmd_gzip = gzip -f -9 < $< > $@
+ 
+ 
++# Bzip2
++# ---------------------------------------------------------------------------
++
++# Bzip2 does not include size in file... so we have to fake that
++size_append=$(CONFIG_SHELL) $(srctree)/scripts/bin_size
++
++quiet_cmd_bzip2 = BZIP2    $@
++cmd_bzip2 = (bzip2 -9 < $< && $(size_append) $<) > $@ || (rm -f $@ ; false)
++
++# Lzma
++# ---------------------------------------------------------------------------
++
++quiet_cmd_lzma = LZMA    $@
++cmd_lzma = (lzma -9 -c $< && $(size_append) $<) >$@ || (rm -f $@ ; false)
+Index: linux-2.6-tip/scripts/bin_size
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/scripts/bin_size
+@@ -0,0 +1,10 @@
++#!/bin/sh
++
++if [ $# = 0 ] ; then
++   echo Usage: $0 file
++fi
++
++size_dec=`stat -c "%s" $1`
++size_hex_echo_string=`printf "%08x" $size_dec |
++     sed 's/\(..\)\(..\)\(..\)\(..\)/\\\\x\4\\\\x\3\\\\x\2\\\\x\1/g'`
++/bin/echo -ne $size_hex_echo_string
+Index: linux-2.6-tip/scripts/gcc-x86_32-has-stack-protector.sh
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/scripts/gcc-x86_32-has-stack-protector.sh
+@@ -0,0 +1,8 @@
++#!/bin/sh
++
++echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
++if [ "$?" -eq "0" ] ; then
++	echo y
++else
++	echo n
++fi
+Index: linux-2.6-tip/scripts/gcc-x86_64-has-stack-protector.sh
+===================================================================
+--- linux-2.6-tip.orig/scripts/gcc-x86_64-has-stack-protector.sh
++++ linux-2.6-tip/scripts/gcc-x86_64-has-stack-protector.sh
+@@ -1,6 +1,8 @@
+ #!/bin/sh
+ 
+-echo "int foo(void) { char X[200]; return 3; }" | $1 -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
++echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+ if [ "$?" -eq "0" ] ; then
+-	echo $2
++	echo y
++else
++	echo n
+ fi
+Index: linux-2.6-tip/scripts/gen_initramfs_list.sh
+===================================================================
+--- linux-2.6-tip.orig/scripts/gen_initramfs_list.sh
++++ linux-2.6-tip/scripts/gen_initramfs_list.sh
+@@ -5,7 +5,7 @@
+ # Released under the terms of the GNU GPL
+ #
+ # Generate a cpio packed initramfs. It uses gen_init_cpio to generate
+-# the cpio archive, and gzip to pack it.
++# the cpio archive, and then compresses it.
+ # The script may also be used to generate the inputfile used for gen_init_cpio
+ # This script assumes that gen_init_cpio is located in usr/ directory
+ 
+@@ -16,8 +16,8 @@ usage() {
+ cat << EOF
+ Usage:
+ $0 [-o <file>] [-u <uid>] [-g <gid>] {-d | <cpio_source>} ...
+-	-o <file>      Create gzipped initramfs file named <file> using
+-		       gen_init_cpio and gzip
++	-o <file>      Create compressed initramfs file named <file> using
++		       gen_init_cpio and compressor depending on the extension
+ 	-u <uid>       User ID to map to user ID 0 (root).
+ 		       <uid> is only meaningful if <cpio_source> is a
+ 		       directory.  "squash" forces all files to uid 0.
+@@ -225,6 +225,7 @@ cpio_list=
+ output="/dev/stdout"
+ output_file=""
+ is_cpio_compressed=
++compr="gzip -9 -f"
+ 
+ arg="$1"
+ case "$arg" in
+@@ -233,11 +234,15 @@ case "$arg" in
+ 		echo "deps_initramfs := \\"
+ 		shift
+ 		;;
+-	"-o")	# generate gzipped cpio image named $1
++	"-o")	# generate compressed cpio image named $1
+ 		shift
+ 		output_file="$1"
+ 		cpio_list="$(mktemp ${TMPDIR:-/tmp}/cpiolist.XXXXXX)"
+ 		output=${cpio_list}
++		echo "$output_file" | grep -q "\.gz$" && compr="gzip -9 -f"
++		echo "$output_file" | grep -q "\.bz2$" && compr="bzip2 -9 -f"
++		echo "$output_file" | grep -q "\.lzma$" && compr="lzma -9 -f"
++		echo "$output_file" | grep -q "\.cpio$" && compr="cat"
+ 		shift
+ 		;;
+ esac
+@@ -274,7 +279,7 @@ while [ $# -gt 0 ]; do
+ 	esac
+ done
+ 
+-# If output_file is set we will generate cpio archive and gzip it
++# If output_file is set we will generate cpio archive and compress it
+ # we are carefull to delete tmp files
+ if [ ! -z ${output_file} ]; then
+ 	if [ -z ${cpio_file} ]; then
+@@ -287,7 +292,8 @@ if [ ! -z ${output_file} ]; then
+ 	if [ "${is_cpio_compressed}" = "compressed" ]; then
+ 		cat ${cpio_tfile} > ${output_file}
+ 	else
+-		cat ${cpio_tfile} | gzip -f -9 - > ${output_file}
++		(cat ${cpio_tfile} | ${compr}  - > ${output_file}) \
++		|| (rm -f ${output_file} ; false)
+ 	fi
+ 	[ -z ${cpio_file} ] && rm ${cpio_tfile}
+ fi
+Index: linux-2.6-tip/scripts/headers_check.pl
+===================================================================
+--- linux-2.6-tip.orig/scripts/headers_check.pl
++++ linux-2.6-tip/scripts/headers_check.pl
+@@ -38,7 +38,7 @@ foreach my $file (@files) {
+ 		&check_asm_types();
+ 		&check_sizetypes();
+ 		&check_prototypes();
+-		&check_config();
++		# Dropped for now. Too much noise &check_config();
+ 	}
+ 	close FH;
+ }
+Index: linux-2.6-tip/scripts/kallsyms.c
+===================================================================
+--- linux-2.6-tip.orig/scripts/kallsyms.c
++++ linux-2.6-tip/scripts/kallsyms.c
+@@ -500,6 +500,51 @@ static void optimize_token_table(void)
+ 	optimize_result();
+ }
+ 
++/* guess for "linker script provide" symbol */
++static int may_be_linker_script_provide_symbol(const struct sym_entry *se)
++{
++	const char *symbol = (char *)se->sym + 1;
++	int len = se->len - 1;
++
++	if (len < 8)
++		return 0;
++
++	if (symbol[0] != '_' || symbol[1] != '_')
++		return 0;
++
++	/* __start_XXXXX */
++	if (!memcmp(symbol + 2, "start_", 6))
++		return 1;
++
++	/* __stop_XXXXX */
++	if (!memcmp(symbol + 2, "stop_", 5))
++		return 1;
++
++	/* __end_XXXXX */
++	if (!memcmp(symbol + 2, "end_", 4))
++		return 1;
++
++	/* __XXXXX_start */
++	if (!memcmp(symbol + len - 6, "_start", 6))
++		return 1;
++
++	/* __XXXXX_end */
++	if (!memcmp(symbol + len - 4, "_end", 4))
++		return 1;
++
++	return 0;
++}
++
++static int prefix_underscores_count(const char *str)
++{
++	const char *tail = str;
++
++	while (*tail != '_')
++		tail++;
++
++	return tail - str;
++}
++
+ static int compare_symbols(const void *a, const void *b)
+ {
+ 	const struct sym_entry *sa;
+@@ -521,6 +566,18 @@ static int compare_symbols(const void *a
+ 	if (wa != wb)
+ 		return wa - wb;
+ 
++	/* sort by "linker script provide" type */
++	wa = may_be_linker_script_provide_symbol(sa);
++	wb = may_be_linker_script_provide_symbol(sb);
++	if (wa != wb)
++		return wa - wb;
++
++	/* sort by the number of prefix underscores */
++	wa = prefix_underscores_count((const char *)sa->sym + 1);
++	wb = prefix_underscores_count((const char *)sb->sym + 1);
++	if (wa != wb)
++		return wa - wb;
++
+ 	/* sort by initial order, so that other symbols are left undisturbed */
+ 	return sa->start_pos - sb->start_pos;
+ }
+Index: linux-2.6-tip/scripts/mod/modpost.c
+===================================================================
+--- linux-2.6-tip.orig/scripts/mod/modpost.c
++++ linux-2.6-tip/scripts/mod/modpost.c
+@@ -415,8 +415,9 @@ static int parse_elf(struct elf_info *in
+ 		const char *secstrings
+ 			= (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+ 		const char *secname;
++		int nobits = sechdrs[i].sh_type == SHT_NOBITS;
+ 
+-		if (sechdrs[i].sh_offset > info->size) {
++		if (!nobits && sechdrs[i].sh_offset > info->size) {
+ 			fatal("%s is truncated. sechdrs[i].sh_offset=%lu > "
+ 			      "sizeof(*hrd)=%zu\n", filename,
+ 			      (unsigned long)sechdrs[i].sh_offset,
+@@ -425,6 +426,8 @@ static int parse_elf(struct elf_info *in
+ 		}
+ 		secname = secstrings + sechdrs[i].sh_name;
+ 		if (strcmp(secname, ".modinfo") == 0) {
++			if (nobits)
++				fatal("%s has NOBITS .modinfo\n", filename);
+ 			info->modinfo = (void *)hdr + sechdrs[i].sh_offset;
+ 			info->modinfo_len = sechdrs[i].sh_size;
+ 		} else if (strcmp(secname, "__ksymtab") == 0)
+Index: linux-2.6-tip/scripts/recordmcount.pl
+===================================================================
+--- linux-2.6-tip.orig/scripts/recordmcount.pl
++++ linux-2.6-tip/scripts/recordmcount.pl
+@@ -100,14 +100,19 @@ $P =~ s@.*/@@g;
+ 
+ my $V = '0.1';
+ 
+-if ($#ARGV < 6) {
+-	print "usage: $P arch objdump objcopy cc ld nm rm mv inputfile\n";
++if ($#ARGV < 7) {
++	print "usage: $P arch bits objdump objcopy cc ld nm rm mv is_module inputfile\n";
+ 	print "version: $V\n";
+ 	exit(1);
+ }
+ 
+ my ($arch, $bits, $objdump, $objcopy, $cc,
+-    $ld, $nm, $rm, $mv, $inputfile) = @ARGV;
++    $ld, $nm, $rm, $mv, $is_module, $inputfile) = @ARGV;
++
++# This file refers to mcount and shouldn't be ftraced, so lets' ignore it
++if ($inputfile eq "kernel/trace/ftrace.o") {
++    exit(0);
++}
+ 
+ # Acceptable sections to record.
+ my %text_sections = (
+@@ -201,6 +206,13 @@ if ($arch eq "x86_64") {
+     $alignment = 2;
+     $section_type = '%progbits';
+ 
++} elsif ($arch eq "ia64") {
++    $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
++    $type = "data8";
++
++    if ($is_module eq "0") {
++        $cc .= " -mconstant-gp";
++    }
+ } else {
+     die "Arch $arch is not supported with CONFIG_FTRACE_MCOUNT_RECORD";
+ }
+@@ -263,7 +275,6 @@ if (!$found_version) {
+ 	"\tDisabling local function references.\n";
+ }
+ 
+-
+ #
+ # Step 1: find all the local (static functions) and weak symbols.
+ #        't' is local, 'w/W' is weak (we never use a weak function)
+@@ -331,13 +342,16 @@ sub update_funcs
+ #
+ # Step 2: find the sections and mcount call sites
+ #
+-open(IN, "$objdump -dr $inputfile|") || die "error running $objdump";
++open(IN, "$objdump -hdr $inputfile|") || die "error running $objdump";
+ 
+ my $text;
+ 
++my $read_headers = 1;
++
+ while (<IN>) {
+     # is it a section?
+     if (/$section_regex/) {
++	$read_headers = 0;
+ 
+ 	# Only record text sections that we know are safe
+ 	if (defined($text_sections{$1})) {
+@@ -371,6 +385,19 @@ while (<IN>) {
+ 		$ref_func = $text;
+ 	    }
+ 	}
++    } elsif ($read_headers && /$mcount_section/) {
++	#
++	# Somehow the make process can execute this script on an
++	# object twice. If it does, we would duplicate the mcount
++	# section and it will cause the function tracer self test
++	# to fail. Check if the mcount section exists, and if it does,
++	# warn and exit.
++	#
++	print STDERR "ERROR: $mcount_section already in $inputfile\n" .
++	    "\tThis may be an indication that your build is corrupted.\n" .
++	    "\tDelete $inputfile and try again. If the same object file\n" .
++	    "\tstill causes an issue, then disable CONFIG_DYNAMIC_FTRACE.\n";
++	exit(-1);
+     }
+ 
+     # is this a call site to mcount? If so, record it to print later
+Index: linux-2.6-tip/security/capability.c
+===================================================================
+--- linux-2.6-tip.orig/security/capability.c
++++ linux-2.6-tip/security/capability.c
+@@ -11,6 +11,7 @@
+  */
+ 
+ #include <linux/security.h>
++#include <net/sock.h>
+ 
+ static int cap_acct(struct file *file)
+ {
+@@ -680,6 +681,9 @@ static int cap_socket_getpeersec_dgram(s
+ 
+ static int cap_sk_alloc_security(struct sock *sk, int family, gfp_t priority)
+ {
++#ifdef CONFIG_SECURITY_NETWORK
++	sk->sk_security = NULL;
++#endif
+ 	return 0;
+ }
+ 
+Index: linux-2.6-tip/security/keys/keyctl.c
+===================================================================
+--- linux-2.6-tip.orig/security/keys/keyctl.c
++++ linux-2.6-tip/security/keys/keyctl.c
+@@ -896,7 +896,7 @@ long keyctl_instantiate_key(key_serial_t
+ {
+ 	const struct cred *cred = current_cred();
+ 	struct request_key_auth *rka;
+-	struct key *instkey, *dest_keyring;
++	struct key *instkey, *uninitialized_var(dest_keyring);
+ 	void *payload;
+ 	long ret;
+ 	bool vm = false;
+@@ -974,7 +974,7 @@ long keyctl_negate_key(key_serial_t id, 
+ {
+ 	const struct cred *cred = current_cred();
+ 	struct request_key_auth *rka;
+-	struct key *instkey, *dest_keyring;
++	struct key *instkey, *uninitialized_var(dest_keyring);
+ 	long ret;
+ 
+ 	kenter("%d,%u,%d", id, timeout, ringid);
+Index: linux-2.6-tip/security/selinux/netnode.c
+===================================================================
+--- linux-2.6-tip.orig/security/selinux/netnode.c
++++ linux-2.6-tip/security/selinux/netnode.c
+@@ -140,6 +140,7 @@ static struct sel_netnode *sel_netnode_f
+ 		break;
+ 	default:
+ 		BUG();
++		return NULL;
+ 	}
+ 
+ 	list_for_each_entry_rcu(node, &sel_netnode_hash[idx].list, list)
+Index: linux-2.6-tip/sound/drivers/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/sound/drivers/Kconfig
++++ linux-2.6-tip/sound/drivers/Kconfig
+@@ -33,7 +33,7 @@ if SND_DRIVERS
+ 
+ config SND_PCSP
+ 	tristate "PC-Speaker support (READ HELP!)"
+-	depends on PCSPKR_PLATFORM && X86_PC && HIGH_RES_TIMERS
++	depends on PCSPKR_PLATFORM && X86 && HIGH_RES_TIMERS
+ 	depends on INPUT
+ 	depends on EXPERIMENTAL
+ 	select SND_PCM
+@@ -91,6 +91,8 @@ config SND_VIRMIDI
+ 
+ config SND_MTPAV
+ 	tristate "MOTU MidiTimePiece AV multiport MIDI"
++	# sometimes crashes
++	depends on 0
+ 	select SND_RAWMIDI
+ 	help
+ 	  To use a MOTU MidiTimePiece AV multiport MIDI adapter
+Index: linux-2.6-tip/sound/isa/sb/sb8.c
+===================================================================
+--- linux-2.6-tip.orig/sound/isa/sb/sb8.c
++++ linux-2.6-tip/sound/isa/sb/sb8.c
+@@ -101,7 +101,7 @@ static int __devinit snd_sb8_probe(struc
+ 	struct snd_card *card;
+ 	struct snd_sb8 *acard;
+ 	struct snd_opl3 *opl3;
+-	int err;
++	int uninitialized_var(err);
+ 
+ 	card = snd_card_new(index[dev], id[dev], THIS_MODULE,
+ 			    sizeof(struct snd_sb8));
+Index: linux-2.6-tip/sound/oss/ad1848.c
+===================================================================
+--- linux-2.6-tip.orig/sound/oss/ad1848.c
++++ linux-2.6-tip/sound/oss/ad1848.c
+@@ -2879,7 +2879,7 @@ static struct isapnp_device_id id_table[
+ 	{0}
+ };
+ 
+-MODULE_DEVICE_TABLE(isapnp, id_table);
++MODULE_STATIC_DEVICE_TABLE(isapnp, id_table);
+ 
+ static struct pnp_dev *activate_dev(char *devname, char *resname, struct pnp_dev *dev)
+ {
+Index: linux-2.6-tip/sound/pci/pcxhr/pcxhr.c
+===================================================================
+--- linux-2.6-tip.orig/sound/pci/pcxhr/pcxhr.c
++++ linux-2.6-tip/sound/pci/pcxhr/pcxhr.c
+@@ -224,7 +224,7 @@ static int pcxhr_pll_freq_register(unsig
+ static int pcxhr_get_clock_reg(struct pcxhr_mgr *mgr, unsigned int rate,
+ 			       unsigned int *reg, unsigned int *freq)
+ {
+-	unsigned int val, realfreq, pllreg;
++	unsigned int val, realfreq, uninitialized_var(pllreg);
+ 	struct pcxhr_rmh rmh;
+ 	int err;
+ 
+@@ -298,7 +298,9 @@ static int pcxhr_sub_set_clock(struct pc
+ 			       unsigned int rate,
+ 			       int *changed)
+ {
+-	unsigned int val, realfreq, speed;
++	unsigned int uninitialized_var(val),
++		     uninitialized_var(realfreq),
++		     speed;
+ 	struct pcxhr_rmh rmh;
+ 	int err;
+ 
+@@ -681,7 +683,7 @@ static void pcxhr_trigger_tasklet(unsign
+ {
+ 	unsigned long flags;
+ 	int i, j, err;
+-	struct pcxhr_pipe *pipe;
++	struct pcxhr_pipe *uninitialized_var(pipe);
+ 	struct snd_pcxhr *chip;
+ 	struct pcxhr_mgr *mgr = (struct pcxhr_mgr*)(arg);
+ 	int capture_mask = 0;
+Index: linux-2.6-tip/sound/pci/pcxhr/pcxhr_mixer.c
+===================================================================
+--- linux-2.6-tip.orig/sound/pci/pcxhr/pcxhr_mixer.c
++++ linux-2.6-tip/sound/pci/pcxhr/pcxhr_mixer.c
+@@ -936,7 +936,7 @@ static int pcxhr_iec958_get(struct snd_k
+ 			    struct snd_ctl_elem_value *ucontrol)
+ {
+ 	struct snd_pcxhr *chip = snd_kcontrol_chip(kcontrol);
+-	unsigned char aes_bits;
++	unsigned char uninitialized_var(aes_bits);
+ 	int i, err;
+ 
+ 	mutex_lock(&chip->mgr->mixer_mutex);
+@@ -1264,3 +1264,4 @@ int pcxhr_create_mixer(struct pcxhr_mgr 
+ 
+ 	return 0;
+ }
++
+Index: linux-2.6-tip/sound/pci/via82xx.c
+===================================================================
+--- linux-2.6-tip.orig/sound/pci/via82xx.c
++++ linux-2.6-tip/sound/pci/via82xx.c
+@@ -2428,7 +2428,7 @@ static int __devinit snd_via82xx_probe(s
+ 				       const struct pci_device_id *pci_id)
+ {
+ 	struct snd_card *card;
+-	struct via82xx *chip;
++	struct via82xx *uninitialized_var(chip);
+ 	int chip_type = 0, card_type;
+ 	unsigned int i;
+ 	int err;
+Index: linux-2.6-tip/sound/pci/via82xx_modem.c
+===================================================================
+--- linux-2.6-tip.orig/sound/pci/via82xx_modem.c
++++ linux-2.6-tip/sound/pci/via82xx_modem.c
+@@ -1162,7 +1162,7 @@ static int __devinit snd_via82xx_probe(s
+ 				       const struct pci_device_id *pci_id)
+ {
+ 	struct snd_card *card;
+-	struct via82xx_modem *chip;
++	struct via82xx_modem *uninitialized_var(chip);
+ 	int chip_type = 0, card_type;
+ 	unsigned int i;
+ 	int err;
+Index: linux-2.6-tip/sound/pci/vx222/vx222.c
+===================================================================
+--- linux-2.6-tip.orig/sound/pci/vx222/vx222.c
++++ linux-2.6-tip/sound/pci/vx222/vx222.c
+@@ -194,7 +194,7 @@ static int __devinit snd_vx222_probe(str
+ 	static int dev;
+ 	struct snd_card *card;
+ 	struct snd_vx_hardware *hw;
+-	struct snd_vx222 *vx;
++	struct snd_vx222 *uninitialized_var(vx);
+ 	int err;
+ 
+ 	if (dev >= SNDRV_CARDS)
+Index: linux-2.6-tip/usr/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/usr/Kconfig
++++ linux-2.6-tip/usr/Kconfig
+@@ -44,3 +44,92 @@ config INITRAMFS_ROOT_GID
+ 	  owned by group root in the initial ramdisk image.
+ 
+ 	  If you are not sure, leave it set to "0".
++
++config RD_GZIP
++	bool "Initial ramdisk compressed using gzip"
++	default y
++	depends on BLK_DEV_INITRD=y
++	select DECOMPRESS_GZIP
++	help
++	  Support loading of a gzip encoded initial ramdisk or cpio buffer.
++	  If unsure, say Y.
++
++config RD_BZIP2
++	bool "Initial ramdisk compressed using bzip2"
++	default n
++	depends on BLK_DEV_INITRD=y
++	select DECOMPRESS_BZIP2
++	help
++	  Support loading of a bzip2 encoded initial ramdisk or cpio buffer
++	  If unsure, say N.
++
++config RD_LZMA
++	bool "Initial ramdisk compressed using lzma"
++	default n
++	depends on BLK_DEV_INITRD=y
++	select DECOMPRESS_LZMA
++	help
++	  Support loading of a lzma encoded initial ramdisk or cpio buffer
++	  If unsure, say N.
++
++choice
++	prompt "Built-in initramfs compression mode"
++	help
++	  This setting is only meaningful if the INITRAMFS_SOURCE is
++	  set. It decides by which algorithm the INITRAMFS_SOURCE will
++	  be compressed.
++	  Several compression algorithms are available, which differ
++	  in efficiency, compression and decompression speed.
++	  Compression speed is only relevant when building a kernel.
++	  Decompression speed is relevant at each boot.
++
++	  If you have any problems with bzip2 or lzma compressed
++	  initramfs, mail me (Alain Knaff) <alain@knaff.lu>.
++
++	  High compression options are mostly useful for users who
++	  are low on disk space (embedded systems), but for whom ram
++	  size matters less.
++
++	  If in doubt, select 'gzip'
++
++config INITRAMFS_COMPRESSION_NONE
++	bool "None"
++	help
++	  Do not compress the built-in initramfs at all. This may
++	  sound wasteful in space, but, you should be aware that the
++	  built-in initramfs will be compressed at a later stage
++	  anyways along with the rest of the kernel, on those
++	  architectures that support this.
++	  However, not compressing the initramfs may lead to slightly
++	  higher memory consumption during a short time at boot, while
++	  both the cpio image and the unpacked filesystem image will
++	  be present in memory simultaneously
++
++config INITRAMFS_COMPRESSION_GZIP
++	bool "Gzip"
++	depends on RD_GZIP
++	help
++	  The old and tried gzip compression. Its compression ratio is
++	  the poorest among the 3 choices; however its speed (both
++	  compression and decompression) is the fastest.
++
++config INITRAMFS_COMPRESSION_BZIP2
++	bool "Bzip2"
++	depends on RD_BZIP2
++	help
++	  Its compression ratio and speed is intermediate.
++	  Decompression speed is slowest among the three.  The initramfs
++	  size is about 10% smaller with bzip2, in comparison to gzip.
++	  Bzip2 uses a large amount of memory. For modern kernels you
++	  will need at least 8MB RAM or more for booting.
++
++config INITRAMFS_COMPRESSION_LZMA
++	bool "LZMA"
++	depends on RD_LZMA
++	help
++	  The most recent compression algorithm.
++	  Its ratio is best, decompression speed is between the other
++	  two. Compression is slowest.	The initramfs size is about 33%
++	  smaller with LZMA in comparison to gzip.
++
++endchoice
+Index: linux-2.6-tip/usr/Makefile
+===================================================================
+--- linux-2.6-tip.orig/usr/Makefile
++++ linux-2.6-tip/usr/Makefile
+@@ -6,13 +6,25 @@ klibcdirs:;
+ PHONY += klibcdirs
+ 
+ 
++# No compression
++suffix_$(CONFIG_INITRAMFS_COMPRESSION_NONE)   =
++
++# Gzip, but no bzip2
++suffix_$(CONFIG_INITRAMFS_COMPRESSION_GZIP)   = .gz
++
++# Bzip2
++suffix_$(CONFIG_INITRAMFS_COMPRESSION_BZIP2)  = .bz2
++
++# Lzma
++suffix_$(CONFIG_INITRAMFS_COMPRESSION_LZMA)   = .lzma
++
+ # Generate builtin.o based on initramfs_data.o
+-obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data.o
++obj-$(CONFIG_BLK_DEV_INITRD) := initramfs_data$(suffix_y).o
+ 
+-# initramfs_data.o contains the initramfs_data.cpio.gz image.
++# initramfs_data.o contains the compressed initramfs_data.cpio image.
+ # The image is included using .incbin, a dependency which is not
+ # tracked automatically.
+-$(obj)/initramfs_data.o: $(obj)/initramfs_data.cpio.gz FORCE
++$(obj)/initramfs_data$(suffix_y).o: $(obj)/initramfs_data.cpio$(suffix_y) FORCE
+ 
+ #####
+ # Generate the initramfs cpio archive
+@@ -25,28 +37,28 @@ ramfs-args  := \
+         $(if $(CONFIG_INITRAMFS_ROOT_UID), -u $(CONFIG_INITRAMFS_ROOT_UID)) \
+         $(if $(CONFIG_INITRAMFS_ROOT_GID), -g $(CONFIG_INITRAMFS_ROOT_GID))
+ 
+-# .initramfs_data.cpio.gz.d is used to identify all files included
++# .initramfs_data.cpio.d is used to identify all files included
+ # in initramfs and to detect if any files are added/removed.
+ # Removed files are identified by directory timestamp being updated
+ # The dependency list is generated by gen_initramfs.sh -l
+-ifneq ($(wildcard $(obj)/.initramfs_data.cpio.gz.d),)
+-	include $(obj)/.initramfs_data.cpio.gz.d
++ifneq ($(wildcard $(obj)/.initramfs_data.cpio.d),)
++	include $(obj)/.initramfs_data.cpio.d
+ endif
+ 
+ quiet_cmd_initfs = GEN     $@
+       cmd_initfs = $(initramfs) -o $@ $(ramfs-args) $(ramfs-input)
+ 
+-targets := initramfs_data.cpio.gz
++targets := initramfs_data.cpio.gz initramfs_data.cpio.bz2 initramfs_data.cpio.lzma initramfs_data.cpio
+ # do not try to update files included in initramfs
+ $(deps_initramfs): ;
+ 
+ $(deps_initramfs): klibcdirs
+-# We rebuild initramfs_data.cpio.gz if:
+-# 1) Any included file is newer then initramfs_data.cpio.gz
++# We rebuild initramfs_data.cpio if:
++# 1) Any included file is newer then initramfs_data.cpio
+ # 2) There are changes in which files are included (added or deleted)
+-# 3) If gen_init_cpio are newer than initramfs_data.cpio.gz
++# 3) If gen_init_cpio are newer than initramfs_data.cpio
+ # 4) arguments to gen_initramfs.sh changes
+-$(obj)/initramfs_data.cpio.gz: $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs
+-	$(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.gz.d
++$(obj)/initramfs_data.cpio$(suffix_y): $(obj)/gen_init_cpio $(deps_initramfs) klibcdirs
++	$(Q)$(initramfs) -l $(ramfs-input) > $(obj)/.initramfs_data.cpio.d
+ 	$(call if_changed,initfs)
+ 
+Index: linux-2.6-tip/usr/initramfs_data.S
+===================================================================
+--- linux-2.6-tip.orig/usr/initramfs_data.S
++++ linux-2.6-tip/usr/initramfs_data.S
+@@ -26,5 +26,5 @@ SECTIONS
+ */
+ 
+ .section .init.ramfs,"a"
+-.incbin "usr/initramfs_data.cpio.gz"
++.incbin "usr/initramfs_data.cpio"
+ 
+Index: linux-2.6-tip/usr/initramfs_data.bz2.S
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/usr/initramfs_data.bz2.S
+@@ -0,0 +1,29 @@
++/*
++  initramfs_data includes the compressed binary that is the
++  filesystem used for early user space.
++  Note: Older versions of "as" (prior to binutils 2.11.90.0.23
++  released on 2001-07-14) dit not support .incbin.
++  If you are forced to use older binutils than that then the
++  following trick can be applied to create the resulting binary:
++
++
++  ld -m elf_i386  --format binary --oformat elf32-i386 -r \
++  -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o
++   ld -m elf_i386  -r -o built-in.o initramfs_data.o
++
++  initramfs_data.scr looks like this:
++SECTIONS
++{
++       .init.ramfs : { *(.data) }
++}
++
++  The above example is for i386 - the parameters vary from architectures.
++  Eventually look up LDFLAGS_BLOB in an older version of the
++  arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced.
++
++  Using .incbin has the advantage over ld that the correct flags are set
++  in the ELF header, as required by certain architectures.
++*/
++
++.section .init.ramfs,"a"
++.incbin "usr/initramfs_data.cpio.bz2"
+Index: linux-2.6-tip/usr/initramfs_data.gz.S
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/usr/initramfs_data.gz.S
+@@ -0,0 +1,29 @@
++/*
++  initramfs_data includes the compressed binary that is the
++  filesystem used for early user space.
++  Note: Older versions of "as" (prior to binutils 2.11.90.0.23
++  released on 2001-07-14) dit not support .incbin.
++  If you are forced to use older binutils than that then the
++  following trick can be applied to create the resulting binary:
++
++
++  ld -m elf_i386  --format binary --oformat elf32-i386 -r \
++  -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o
++   ld -m elf_i386  -r -o built-in.o initramfs_data.o
++
++  initramfs_data.scr looks like this:
++SECTIONS
++{
++       .init.ramfs : { *(.data) }
++}
++
++  The above example is for i386 - the parameters vary from architectures.
++  Eventually look up LDFLAGS_BLOB in an older version of the
++  arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced.
++
++  Using .incbin has the advantage over ld that the correct flags are set
++  in the ELF header, as required by certain architectures.
++*/
++
++.section .init.ramfs,"a"
++.incbin "usr/initramfs_data.cpio.gz"
+Index: linux-2.6-tip/usr/initramfs_data.lzma.S
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/usr/initramfs_data.lzma.S
+@@ -0,0 +1,29 @@
++/*
++  initramfs_data includes the compressed binary that is the
++  filesystem used for early user space.
++  Note: Older versions of "as" (prior to binutils 2.11.90.0.23
++  released on 2001-07-14) dit not support .incbin.
++  If you are forced to use older binutils than that then the
++  following trick can be applied to create the resulting binary:
++
++
++  ld -m elf_i386  --format binary --oformat elf32-i386 -r \
++  -T initramfs_data.scr initramfs_data.cpio.gz -o initramfs_data.o
++   ld -m elf_i386  -r -o built-in.o initramfs_data.o
++
++  initramfs_data.scr looks like this:
++SECTIONS
++{
++       .init.ramfs : { *(.data) }
++}
++
++  The above example is for i386 - the parameters vary from architectures.
++  Eventually look up LDFLAGS_BLOB in an older version of the
++  arch/$(ARCH)/Makefile to see the flags used before .incbin was introduced.
++
++  Using .incbin has the advantage over ld that the correct flags are set
++  in the ELF header, as required by certain architectures.
++*/
++
++.section .init.ramfs,"a"
++.incbin "usr/initramfs_data.cpio.lzma"
+Index: linux-2.6-tip/scripts/Kbuild.include
+===================================================================
+--- linux-2.6-tip.orig/scripts/Kbuild.include
++++ linux-2.6-tip/scripts/Kbuild.include
+@@ -98,8 +98,9 @@ as-option = $(call try-run,\
+ # as-instr
+ # Usage: cflags-y += $(call as-instr,instr,option1,option2)
+ 
+-as-instr = $(call try-run,\
+-	echo -e "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" -,$(2),$(3))
++as-instr = $(call try-run,		\
++	echo -e "$(1)" > "$$TMP"; 	\
++	$(CC) $(KBUILD_AFLAGS) -c -xassembler -o /dev/null "$$TMP",$(2),$(3))
+ 
+ # cc-option
+ # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
+Index: linux-2.6-tip/arch/mn10300/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/mn10300/Kconfig
++++ linux-2.6-tip/arch/mn10300/Kconfig
+@@ -186,6 +186,17 @@ config PREEMPT
+ 	  Say Y here if you are building a kernel for a desktop, embedded
+ 	  or real-time system.  Say N if you are unsure.
+ 
++config PREEMPT_BKL
++	bool "Preempt The Big Kernel Lock"
++	depends on PREEMPT
++	default y
++	help
++	  This option reduces the latency of the kernel by making the
++	  big kernel lock preemptible.
++
++	  Say Y here if you are building a kernel for a desktop system.
++	  Say N if you are unsure.
++
+ config MN10300_CURRENT_IN_E2
+ 	bool "Hold current task address in E2 register"
+ 	default y
+Index: linux-2.6-tip/kernel/posix-timers.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/posix-timers.c
++++ linux-2.6-tip/kernel/posix-timers.c
+@@ -420,6 +420,7 @@ static enum hrtimer_restart posix_timer_
+ static struct pid *good_sigevent(sigevent_t * event)
+ {
+ 	struct task_struct *rtn = current->group_leader;
++	int sig = event->sigev_signo;
+ 
+ 	if ((event->sigev_notify & SIGEV_THREAD_ID ) &&
+ 		(!(rtn = find_task_by_vpid(event->sigev_notify_thread_id)) ||
+@@ -428,7 +429,8 @@ static struct pid *good_sigevent(sigeven
+ 		return NULL;
+ 
+ 	if (((event->sigev_notify & ~SIGEV_THREAD_ID) != SIGEV_NONE) &&
+-	    ((event->sigev_signo <= 0) || (event->sigev_signo > SIGRTMAX)))
++	    (sig <= 0 || sig > SIGRTMAX || sig_kernel_only(sig) ||
++	     sig_kernel_coredump(sig)))
+ 		return NULL;
+ 
+ 	return task_pid(rtn);
+@@ -787,6 +789,7 @@ retry:
+ 
+ 	unlock_timer(timr, flag);
+ 	if (error == TIMER_RETRY) {
++		hrtimer_wait_for_timer(&timr->it.real.timer);
+ 		rtn = NULL;	// We already got the old time...
+ 		goto retry;
+ 	}
+@@ -825,6 +828,7 @@ retry_delete:
+ 
+ 	if (timer_delete_hook(timer) == TIMER_RETRY) {
+ 		unlock_timer(timer, flags);
++		hrtimer_wait_for_timer(&timer->it.real.timer);
+ 		goto retry_delete;
+ 	}
+ 
+@@ -854,6 +858,7 @@ retry_delete:
+ 
+ 	if (timer_delete_hook(timer) == TIMER_RETRY) {
+ 		unlock_timer(timer, flags);
++		hrtimer_wait_for_timer(&timer->it.real.timer);
+ 		goto retry_delete;
+ 	}
+ 	list_del(&timer->list);
+Index: linux-2.6-tip/include/linux/srcu.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/srcu.h
++++ linux-2.6-tip/include/linux/srcu.h
+@@ -27,6 +27,8 @@
+ #ifndef _LINUX_SRCU_H
+ #define _LINUX_SRCU_H
+ 
++#include <linux/wait.h>
++
+ struct srcu_struct_array {
+ 	int c[2];
+ };
+@@ -50,4 +52,24 @@ void srcu_read_unlock(struct srcu_struct
+ void synchronize_srcu(struct srcu_struct *sp);
+ long srcu_batches_completed(struct srcu_struct *sp);
+ 
++/*
++ * fully compatible with srcu, but optimized for writers.
++ */
++
++struct qrcu_struct {
++	int completed;
++	atomic_t ctr[2];
++	wait_queue_head_t wq;
++	struct mutex mutex;
++};
++
++int init_qrcu_struct(struct qrcu_struct *qp);
++int qrcu_read_lock(struct qrcu_struct *qp);
++void qrcu_read_unlock(struct qrcu_struct *qp, int idx);
++void synchronize_qrcu(struct qrcu_struct *qp);
++
++static inline void cleanup_qrcu_struct(struct qrcu_struct *qp)
++{
++}
++
+ #endif
+Index: linux-2.6-tip/kernel/srcu.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/srcu.c
++++ linux-2.6-tip/kernel/srcu.c
+@@ -255,3 +255,89 @@ EXPORT_SYMBOL_GPL(srcu_read_lock);
+ EXPORT_SYMBOL_GPL(srcu_read_unlock);
+ EXPORT_SYMBOL_GPL(synchronize_srcu);
+ EXPORT_SYMBOL_GPL(srcu_batches_completed);
++
++int init_qrcu_struct(struct qrcu_struct *qp)
++{
++	qp->completed = 0;
++	atomic_set(qp->ctr + 0, 1);
++	atomic_set(qp->ctr + 1, 0);
++	init_waitqueue_head(&qp->wq);
++	mutex_init(&qp->mutex);
++
++	return 0;
++}
++
++int qrcu_read_lock(struct qrcu_struct *qp)
++{
++	for (;;) {
++		int idx = qp->completed & 0x1;
++		if (likely(atomic_inc_not_zero(qp->ctr + idx)))
++			return idx;
++	}
++}
++
++void qrcu_read_unlock(struct qrcu_struct *qp, int idx)
++{
++	if (atomic_dec_and_test(qp->ctr + idx))
++		wake_up(&qp->wq);
++}
++
++void synchronize_qrcu(struct qrcu_struct *qp)
++{
++	int idx;
++
++	smp_mb();  /* Force preceding change to happen before fastpath check. */
++
++	/*
++	 * Fastpath: If the two counters sum to "1" at a given point in
++	 * time, there are no readers.  However, it takes two separate
++	 * loads to sample both counters, which won't occur simultaneously.
++	 * So we might race with a counter switch, so that we might see
++	 * ctr[0]==0, then the counter might switch, then we might see
++	 * ctr[1]==1 (unbeknownst to us because there is a reader still
++	 * there).  So we do a read memory barrier and recheck.  If the
++	 * same race happens again, there must have been a second counter
++	 * switch.  This second counter switch could not have happened
++	 * until all preceding readers finished, so if the condition
++	 * is true both times, we may safely proceed.
++	 *
++	 * This relies critically on the atomic increment and atomic
++	 * decrement being seen as executing in order.
++	 */
++
++	if (atomic_read(&qp->ctr[0]) + atomic_read(&qp->ctr[1]) <= 1) {
++		smp_rmb();  /* Keep two checks independent. */
++		if (atomic_read(&qp->ctr[0]) + atomic_read(&qp->ctr[1]) <= 1)
++			goto out;
++	}
++
++	mutex_lock(&qp->mutex);
++
++	idx = qp->completed & 0x1;
++	if (atomic_read(qp->ctr + idx) == 1)
++		goto out_unlock;
++
++	atomic_inc(qp->ctr + (idx ^ 0x1));
++
++	/*
++	 * Prevent subsequent decrement from being seen before previous
++	 * increment -- such an inversion could cause the fastpath
++	 * above to falsely conclude that there were no readers.  Also,
++	 * reduce the likelihood that qrcu_read_lock() will loop.
++	 */
++
++	smp_mb__after_atomic_inc();
++	qp->completed++;
++
++	atomic_dec(qp->ctr + idx);
++	__wait_event(qp->wq, !atomic_read(qp->ctr + idx));
++out_unlock:
++	mutex_unlock(&qp->mutex);
++out:
++	smp_mb(); /* force subsequent free after qrcu_read_unlock(). */
++}
++
++EXPORT_SYMBOL_GPL(init_qrcu_struct);
++EXPORT_SYMBOL_GPL(qrcu_read_lock);
++EXPORT_SYMBOL_GPL(qrcu_read_unlock);
++EXPORT_SYMBOL_GPL(synchronize_qrcu);
+Index: linux-2.6-tip/drivers/net/sungem.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/sungem.c
++++ linux-2.6-tip/drivers/net/sungem.c
+@@ -1032,12 +1032,8 @@ static int gem_start_xmit(struct sk_buff
+ 			(csum_stuff_off << 21));
+ 	}
+ 
+-	local_irq_save(flags);
+-	if (!spin_trylock(&gp->tx_lock)) {
+-		/* Tell upper layer to requeue */
+-		local_irq_restore(flags);
+-		return NETDEV_TX_LOCKED;
+-	}
++	spin_lock_irqsave(&gp->tx_lock, flags);
++
+ 	/* We raced with gem_do_stop() */
+ 	if (!gp->running) {
+ 		spin_unlock_irqrestore(&gp->tx_lock, flags);
+Index: linux-2.6-tip/arch/x86/kernel/tsc_sync.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/tsc_sync.c
++++ linux-2.6-tip/arch/x86/kernel/tsc_sync.c
+@@ -33,7 +33,7 @@ static __cpuinitdata atomic_t stop_count
+  * we want to have the fastest, inlined, non-debug version
+  * of a critical section, to be able to prove TSC time-warps:
+  */
+-static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
++static __cpuinitdata __raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED;
+ static __cpuinitdata cycles_t last_tsc;
+ static __cpuinitdata cycles_t max_warp;
+ static __cpuinitdata int nr_warps;
+@@ -103,6 +103,7 @@ static __cpuinit void check_tsc_warp(voi
+  */
+ void __cpuinit check_tsc_sync_source(int cpu)
+ {
++	unsigned long flags;
+ 	int cpus = 2;
+ 
+ 	/*
+@@ -129,8 +130,11 @@ void __cpuinit check_tsc_sync_source(int
+ 	/*
+ 	 * Wait for the target to arrive:
+ 	 */
++	local_save_flags(flags);
++	local_irq_enable();
+ 	while (atomic_read(&start_count) != cpus-1)
+ 		cpu_relax();
++	local_irq_restore(flags);
+ 	/*
+ 	 * Trigger the target to continue into the measurement too:
+ 	 */
+Index: linux-2.6-tip/drivers/input/keyboard/atkbd.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/input/keyboard/atkbd.c
++++ linux-2.6-tip/drivers/input/keyboard/atkbd.c
+@@ -1556,8 +1556,23 @@ static struct dmi_system_id atkbd_dmi_qu
+ 	{ }
+ };
+ 
++static int __read_mostly noatkbd;
++
++static int __init noatkbd_setup(char *str)
++{
++        noatkbd = 1;
++        printk(KERN_INFO "debug: not setting up AT keyboard.\n");
++
++        return 1;
++}
++
++__setup("noatkbd", noatkbd_setup);
++
+ static int __init atkbd_init(void)
+ {
++	if (noatkbd)
++		return 0;
++
+ 	dmi_check_system(atkbd_dmi_quirk_table);
+ 
+ 	return serio_register_driver(&atkbd_drv);
+Index: linux-2.6-tip/drivers/input/mouse/psmouse-base.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/input/mouse/psmouse-base.c
++++ linux-2.6-tip/drivers/input/mouse/psmouse-base.c
+@@ -1645,10 +1645,25 @@ static int psmouse_get_maxproto(char *bu
+ 	return sprintf(buffer, "%s\n", psmouse_protocol_by_type(type)->name);
+ }
+ 
++static int __read_mostly nopsmouse;
++
++static int __init nopsmouse_setup(char *str)
++{
++        nopsmouse = 1;
++        printk(KERN_INFO "debug: not setting up psmouse.\n");
++
++        return 1;
++}
++
++__setup("nopsmouse", nopsmouse_setup);
++
+ static int __init psmouse_init(void)
+ {
+ 	int err;
+ 
++	if (nopsmouse)
++		return 0;
++
+ 	kpsmoused_wq = create_singlethread_workqueue("kpsmoused");
+ 	if (!kpsmoused_wq) {
+ 		printk(KERN_ERR "psmouse: failed to create kpsmoused workqueue\n");
+Index: linux-2.6-tip/kernel/rtmutex-debug.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/rtmutex-debug.h
++++ linux-2.6-tip/kernel/rtmutex-debug.h
+@@ -17,17 +17,17 @@ extern void debug_rt_mutex_free_waiter(s
+ extern void debug_rt_mutex_init(struct rt_mutex *lock, const char *name);
+ extern void debug_rt_mutex_lock(struct rt_mutex *lock);
+ extern void debug_rt_mutex_unlock(struct rt_mutex *lock);
+-extern void debug_rt_mutex_proxy_lock(struct rt_mutex *lock,
+-				      struct task_struct *powner);
++extern void
++debug_rt_mutex_proxy_lock(struct rt_mutex *lock, struct task_struct *powner);
+ extern void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock);
+ extern void debug_rt_mutex_deadlock(int detect, struct rt_mutex_waiter *waiter,
+ 				    struct rt_mutex *lock);
+ extern void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter);
+-# define debug_rt_mutex_reset_waiter(w)			\
++# define debug_rt_mutex_reset_waiter(w) \
+ 	do { (w)->deadlock_lock = NULL; } while (0)
+ 
+-static inline int debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter,
+-						 int detect)
++static inline int
++debug_rt_mutex_detect_deadlock(struct rt_mutex_waiter *waiter, int detect)
+ {
+-	return (waiter != NULL);
++	return waiter != NULL;
+ }
+Index: linux-2.6-tip/drivers/net/8139too.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/8139too.c
++++ linux-2.6-tip/drivers/net/8139too.c
+@@ -2209,7 +2209,11 @@ static irqreturn_t rtl8139_interrupt (in
+  */
+ static void rtl8139_poll_controller(struct net_device *dev)
+ {
+-	disable_irq(dev->irq);
++	/*
++	 * use _nosync() variant - might be used by netconsole
++	 * from atomic contexts:
++	 */
++	disable_irq_nosync(dev->irq);
+ 	rtl8139_interrupt(dev->irq, dev);
+ 	enable_irq(dev->irq);
+ }
+Index: linux-2.6-tip/drivers/pci/msi.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/pci/msi.c
++++ linux-2.6-tip/drivers/pci/msi.c
+@@ -323,6 +323,10 @@ static void __pci_restore_msi_state(stru
+ 		return;
+ 
+ 	entry = get_irq_msi(dev->irq);
++	if (!entry) {
++		WARN_ON(1);
++		return;
++	}
+ 	pos = entry->msi_attrib.pos;
+ 
+ 	pci_intx_for_msi(dev, 0);
+Index: linux-2.6-tip/drivers/block/floppy.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/block/floppy.c
++++ linux-2.6-tip/drivers/block/floppy.c
+@@ -4148,6 +4148,28 @@ static void floppy_device_release(struct
+ {
+ }
+ 
++static int floppy_suspend(struct platform_device *dev, pm_message_t state)
++{
++	floppy_release_irq_and_dma();
++
++	return 0;
++}
++
++static int floppy_resume(struct platform_device *dev)
++{
++	floppy_grab_irq_and_dma();
++
++	return 0;
++}
++
++static struct platform_driver floppy_driver = {
++	.suspend	= floppy_suspend,
++	.resume		= floppy_resume,
++	.driver		= {
++		.name	= "floppy",
++	},
++};
++
+ static struct platform_device floppy_device[N_DRIVE];
+ 
+ static struct kobject *floppy_find(dev_t dev, int *part, void *data)
+@@ -4196,10 +4218,14 @@ static int __init floppy_init(void)
+ 	if (err)
+ 		goto out_put_disk;
+ 
++	err = platform_driver_register(&floppy_driver);
++	if (err)
++		goto out_unreg_blkdev;
++
+ 	floppy_queue = blk_init_queue(do_fd_request, &floppy_lock);
+ 	if (!floppy_queue) {
+ 		err = -ENOMEM;
+-		goto out_unreg_blkdev;
++		goto out_unreg_driver;
+ 	}
+ 	blk_queue_max_sectors(floppy_queue, 64);
+ 
+@@ -4346,6 +4372,8 @@ out_flush_work:
+ out_unreg_region:
+ 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
+ 	blk_cleanup_queue(floppy_queue);
++out_unreg_driver:
++	platform_driver_unregister(&floppy_driver);
+ out_unreg_blkdev:
+ 	unregister_blkdev(FLOPPY_MAJOR, "fd");
+ out_put_disk:
+@@ -4567,6 +4595,7 @@ static void __exit floppy_module_exit(vo
+ 	blk_unregister_region(MKDEV(FLOPPY_MAJOR, 0), 256);
+ 	unregister_blkdev(FLOPPY_MAJOR, "fd");
+ 
++	platform_driver_unregister(&floppy_driver);
+ 	for (drive = 0; drive < N_DRIVE; drive++) {
+ 		del_timer_sync(&motor_off_timer[drive]);
+ 
+Index: linux-2.6-tip/net/core/flow.c
+===================================================================
+--- linux-2.6-tip.orig/net/core/flow.c
++++ linux-2.6-tip/net/core/flow.c
+@@ -39,9 +39,10 @@ atomic_t flow_cache_genid = ATOMIC_INIT(
+ 
+ static u32 flow_hash_shift;
+ #define flow_hash_size	(1 << flow_hash_shift)
+-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
+ 
+-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
++static DEFINE_PER_CPU_LOCKED(struct flow_cache_entry **, flow_tables);
++
++#define flow_table(cpu) (per_cpu_var_locked(flow_tables, cpu))
+ 
+ static struct kmem_cache *flow_cachep __read_mostly;
+ 
+@@ -168,24 +169,24 @@ static int flow_key_compare(struct flowi
+ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+ 			flow_resolve_t resolver)
+ {
+-	struct flow_cache_entry *fle, **head;
++	struct flow_cache_entry **table, *fle, **head = NULL /* shut up GCC */;
+ 	unsigned int hash;
+ 	int cpu;
+ 
+ 	local_bh_disable();
+-	cpu = smp_processor_id();
++	table = get_cpu_var_locked(flow_tables, &cpu);
+ 
+ 	fle = NULL;
+ 	/* Packet really early in init?  Making flow_cache_init a
+ 	 * pre-smp initcall would solve this.  --RR */
+-	if (!flow_table(cpu))
++	if (!table)
+ 		goto nocache;
+ 
+ 	if (flow_hash_rnd_recalc(cpu))
+ 		flow_new_hash_rnd(cpu);
+ 	hash = flow_hash_code(key, cpu);
+ 
+-	head = &flow_table(cpu)[hash];
++	head = &table[hash];
+ 	for (fle = *head; fle; fle = fle->next) {
+ 		if (fle->family == family &&
+ 		    fle->dir == dir &&
+@@ -195,6 +196,7 @@ void *flow_cache_lookup(struct net *net,
+ 
+ 				if (ret)
+ 					atomic_inc(fle->object_ref);
++				put_cpu_var_locked(flow_tables, cpu);
+ 				local_bh_enable();
+ 
+ 				return ret;
+@@ -220,6 +222,8 @@ void *flow_cache_lookup(struct net *net,
+ 	}
+ 
+ nocache:
++	put_cpu_var_locked(flow_tables, cpu);
++
+ 	{
+ 		int err;
+ 		void *obj;
+@@ -249,14 +253,15 @@ nocache:
+ static void flow_cache_flush_tasklet(unsigned long data)
+ {
+ 	struct flow_flush_info *info = (void *)data;
++	struct flow_cache_entry **table;
+ 	int i;
+ 	int cpu;
+ 
+-	cpu = smp_processor_id();
++	table = get_cpu_var_locked(flow_tables, &cpu);
+ 	for (i = 0; i < flow_hash_size; i++) {
+ 		struct flow_cache_entry *fle;
+ 
+-		fle = flow_table(cpu)[i];
++		fle = table[i];
+ 		for (; fle; fle = fle->next) {
+ 			unsigned genid = atomic_read(&flow_cache_genid);
+ 
+@@ -267,6 +272,7 @@ static void flow_cache_flush_tasklet(uns
+ 			atomic_dec(fle->object_ref);
+ 		}
+ 	}
++	put_cpu_var_locked(flow_tables, cpu);
+ 
+ 	if (atomic_dec_and_test(&info->cpuleft))
+ 		complete(&info->completion);
+Index: linux-2.6-tip/fs/nfs/iostat.h
+===================================================================
+--- linux-2.6-tip.orig/fs/nfs/iostat.h
++++ linux-2.6-tip/fs/nfs/iostat.h
+@@ -28,7 +28,7 @@ static inline void nfs_inc_server_stats(
+ 	cpu = get_cpu();
+ 	iostats = per_cpu_ptr(server->io_stats, cpu);
+ 	iostats->events[stat]++;
+-	put_cpu_no_resched();
++	put_cpu();
+ }
+ 
+ static inline void nfs_inc_stats(const struct inode *inode,
+@@ -47,7 +47,7 @@ static inline void nfs_add_server_stats(
+ 	cpu = get_cpu();
+ 	iostats = per_cpu_ptr(server->io_stats, cpu);
+ 	iostats->bytes[stat] += addend;
+-	put_cpu_no_resched();
++	put_cpu();
+ }
+ 
+ static inline void nfs_add_stats(const struct inode *inode,
+Index: linux-2.6-tip/drivers/net/loopback.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/loopback.c
++++ linux-2.6-tip/drivers/net/loopback.c
+@@ -76,13 +76,13 @@ static int loopback_xmit(struct sk_buff 
+ 
+ 	skb->protocol = eth_type_trans(skb,dev);
+ 
+-	/* it's OK to use per_cpu_ptr() because BHs are off */
+ 	pcpu_lstats = dev->ml_priv;
+-	lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id());
++	lb_stats = per_cpu_ptr(pcpu_lstats, get_cpu());
+ 	lb_stats->bytes += skb->len;
+ 	lb_stats->packets++;
++	put_cpu();
+ 
+-	netif_rx(skb);
++	netif_rx_ni(skb);
+ 
+ 	return 0;
+ }
+Index: linux-2.6-tip/include/asm-generic/cmpxchg-local.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-generic/cmpxchg-local.h
++++ linux-2.6-tip/include/asm-generic/cmpxchg-local.h
+@@ -20,7 +20,7 @@ static inline unsigned long __cmpxchg_lo
+ 	if (size == 8 && sizeof(unsigned long) != 8)
+ 		wrong_size_cmpxchg(ptr);
+ 
+-	local_irq_save(flags);
++	raw_local_irq_save(flags);
+ 	switch (size) {
+ 	case 1: prev = *(u8 *)ptr;
+ 		if (prev == old)
+@@ -41,7 +41,7 @@ static inline unsigned long __cmpxchg_lo
+ 	default:
+ 		wrong_size_cmpxchg(ptr);
+ 	}
+-	local_irq_restore(flags);
++	raw_local_irq_restore(flags);
+ 	return prev;
+ }
+ 
+@@ -54,11 +54,11 @@ static inline u64 __cmpxchg64_local_gene
+ 	u64 prev;
+ 	unsigned long flags;
+ 
+-	local_irq_save(flags);
++	raw_local_irq_save(flags);
+ 	prev = *(u64 *)ptr;
+ 	if (prev == old)
+ 		*(u64 *)ptr = new;
+-	local_irq_restore(flags);
++	raw_local_irq_restore(flags);
+ 	return prev;
+ }
+ 
+Index: linux-2.6-tip/kernel/Kconfig.preempt
+===================================================================
+--- linux-2.6-tip.orig/kernel/Kconfig.preempt
++++ linux-2.6-tip/kernel/Kconfig.preempt
+@@ -1,14 +1,13 @@
+-
+ choice
+-	prompt "Preemption Model"
+-	default PREEMPT_NONE
++	prompt "Preemption Mode"
++	default PREEMPT_RT
+ 
+ config PREEMPT_NONE
+ 	bool "No Forced Preemption (Server)"
+ 	help
+-	  This is the traditional Linux preemption model, geared towards
++	  This is the traditional Linux preemption model geared towards
+ 	  throughput. It will still provide good latencies most of the
+-	  time, but there are no guarantees and occasional longer delays
++	  time but there are no guarantees and occasional long delays
+ 	  are possible.
+ 
+ 	  Select this option if you are building a kernel for a server or
+@@ -21,7 +20,7 @@ config PREEMPT_VOLUNTARY
+ 	help
+ 	  This option reduces the latency of the kernel by adding more
+ 	  "explicit preemption points" to the kernel code. These new
+-	  preemption points have been selected to reduce the maximum
++	  preemption points have been selected to minimize the maximum
+ 	  latency of rescheduling, providing faster application reactions,
+ 	  at the cost of slightly lower throughput.
+ 
+@@ -33,22 +32,91 @@ config PREEMPT_VOLUNTARY
+ 
+ 	  Select this if you are building a kernel for a desktop system.
+ 
+-config PREEMPT
++config PREEMPT_DESKTOP
+ 	bool "Preemptible Kernel (Low-Latency Desktop)"
+ 	help
+ 	  This option reduces the latency of the kernel by making
+-	  all kernel code (that is not executing in a critical section)
++	  all kernel code that is not executing in a critical section
+ 	  preemptible.  This allows reaction to interactive events by
+ 	  permitting a low priority process to be preempted involuntarily
+ 	  even if it is in kernel mode executing a system call and would
+-	  otherwise not be about to reach a natural preemption point.
+-	  This allows applications to run more 'smoothly' even when the
+-	  system is under load, at the cost of slightly lower throughput
+-	  and a slight runtime overhead to kernel code.
++	  otherwise not about to reach a preemption point.  This allows
++	  applications to run more 'smoothly' even when the system is
++	  under load, at the cost of slighly lower throughput and a
++	  slight runtime overhead to kernel code.
++
++	  (According to profiles, when this mode is selected then even
++	  during kernel-intense workloads the system is in an immediately
++	  preemptible state more than 50% of the time.)
+ 
+ 	  Select this if you are building a kernel for a desktop or
+ 	  embedded system with latency requirements in the milliseconds
+ 	  range.
+ 
++config PREEMPT_RT
++	bool "Complete Preemption (Real-Time)"
++	select PREEMPT_SOFTIRQS
++	select PREEMPT_HARDIRQS
++	select PREEMPT_RCU
++	select RT_MUTEXES
++	help
++	  This option further reduces the scheduling latency of the
++	  kernel by replacing almost every spinlock used by the kernel
++	  with preemptible mutexes and thus making all but the most
++	  critical kernel code involuntarily preemptible. The remaining
++	  handful of lowlevel non-preemptible codepaths are short and
++	  have a deterministic latency of a couple of tens of
++	  microseconds (depending on the hardware).  This also allows
++	  applications to run more 'smoothly' even when the system is
++	  under load, at the cost of lower throughput and runtime
++	  overhead to kernel code.
++
++	  (According to profiles, when this mode is selected then even
++	  during kernel-intense workloads the system is in an immediately
++	  preemptible state more than 95% of the time.)
++
++	  Select this if you are building a kernel for a desktop,
++	  embedded or real-time system with guaranteed latency
++	  requirements of 100 usecs or lower.
++
+ endchoice
+ 
++config PREEMPT
++	bool
++	default y
++	depends on PREEMPT_DESKTOP || PREEMPT_RT
++
++config PREEMPT_SOFTIRQS
++	bool "Thread Softirqs"
++	default n
++#	depends on PREEMPT
++	help
++	  This option reduces the latency of the kernel by 'threading'
++          soft interrupts. This means that all softirqs will execute
++          in softirqd's context. While this helps latency, it can also
++          reduce performance.
++
++          The threading of softirqs can also be controlled via
++          /proc/sys/kernel/softirq_preemption runtime flag and the
++          sofirq-preempt=0/1 boot-time option.
++
++	  Say N if you are unsure.
++
++config PREEMPT_HARDIRQS
++	bool "Thread Hardirqs"
++	default n
++	depends on GENERIC_HARDIRQS_NO__DO_IRQ
++	select PREEMPT_SOFTIRQS
++	help
++	  This option reduces the latency of the kernel by 'threading'
++          hardirqs. This means that all (or selected) hardirqs will run
++          in their own kernel thread context. While this helps latency,
++          this feature can also reduce performance.
++
++          The threading of hardirqs can also be controlled via the
++          /proc/sys/kernel/hardirq_preemption runtime flag and the
++          hardirq-preempt=0/1 boot-time option. Per-irq threading can
++          be enabled/disable via the /proc/irq/<IRQ>/<handler>/threaded
++          runtime flags.
++
++	  Say N if you are unsure.
+Index: linux-2.6-tip/kernel/irq/autoprobe.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/irq/autoprobe.c
++++ linux-2.6-tip/kernel/irq/autoprobe.c
+@@ -7,6 +7,7 @@
+  */
+ 
+ #include <linux/irq.h>
++#include <linux/delay.h>
+ #include <linux/module.h>
+ #include <linux/interrupt.h>
+ #include <linux/delay.h>
+Index: linux-2.6-tip/include/linux/hrtimer.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/hrtimer.h
++++ linux-2.6-tip/include/linux/hrtimer.h
+@@ -105,6 +105,7 @@ struct hrtimer {
+ 	struct hrtimer_clock_base	*base;
+ 	unsigned long			state;
+ 	struct list_head		cb_entry;
++	int				irqsafe;
+ #ifdef CONFIG_TIMER_STATS
+ 	int				start_pid;
+ 	void				*start_site;
+@@ -140,6 +141,7 @@ struct hrtimer_clock_base {
+ 	struct hrtimer_cpu_base	*cpu_base;
+ 	clockid_t		index;
+ 	struct rb_root		active;
++	struct list_head	expired;
+ 	struct rb_node		*first;
+ 	ktime_t			resolution;
+ 	ktime_t			(*get_time)(void);
+@@ -166,13 +168,16 @@ struct hrtimer_clock_base {
+  * @nr_events:		Total number of timer interrupt events
+  */
+ struct hrtimer_cpu_base {
+-	spinlock_t			lock;
++	raw_spinlock_t			lock;
+ 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
+ #ifdef CONFIG_HIGH_RES_TIMERS
+ 	ktime_t				expires_next;
+ 	int				hres_active;
+ 	unsigned long			nr_events;
+ #endif
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++	wait_queue_head_t		wait;
++#endif
+ };
+ 
+ static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
+@@ -360,6 +365,13 @@ static inline int hrtimer_restart(struct
+ 	return hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
+ }
+ 
++/* Softirq preemption could deadlock timer removal */
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++  extern void hrtimer_wait_for_timer(const struct hrtimer *timer);
++#else
++# define hrtimer_wait_for_timer(timer)	do { cpu_relax(); } while (0)
++#endif
++
+ /* Query timers: */
+ extern ktime_t hrtimer_get_remaining(const struct hrtimer *timer);
+ extern int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp);
+Index: linux-2.6-tip/kernel/hrtimer.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/hrtimer.c
++++ linux-2.6-tip/kernel/hrtimer.c
+@@ -476,9 +476,9 @@ static inline int hrtimer_is_hres_enable
+ /*
+  * Is the high resolution mode active ?
+  */
+-static inline int hrtimer_hres_active(void)
++static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base)
+ {
+-	return __get_cpu_var(hrtimer_bases).hres_active;
++	return cpu_base->hres_active;
+ }
+ 
+ /*
+@@ -538,8 +538,7 @@ static int hrtimer_reprogram(struct hrti
+ 	 * When the callback is running, we do not reprogram the clock event
+ 	 * device. The timer callback is either running on a different CPU or
+ 	 * the callback is executed in the hrtimer_interrupt context. The
+-	 * reprogramming is handled either by the softirq, which called the
+-	 * callback or at the end of the hrtimer_interrupt.
++	 * reprogramming is handled at the end of the hrtimer_interrupt.
+ 	 */
+ 	if (hrtimer_callback_running(timer))
+ 		return 0;
+@@ -573,11 +572,12 @@ static int hrtimer_reprogram(struct hrti
+  */
+ static void retrigger_next_event(void *arg)
+ {
+-	struct hrtimer_cpu_base *base;
++	struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
++
+ 	struct timespec realtime_offset;
+ 	unsigned long seq;
+ 
+-	if (!hrtimer_hres_active())
++	if (!hrtimer_hres_active(base))
+ 		return;
+ 
+ 	do {
+@@ -587,8 +587,6 @@ static void retrigger_next_event(void *a
+ 					-wall_to_monotonic.tv_nsec);
+ 	} while (read_seqretry(&xtime_lock, seq));
+ 
+-	base = &__get_cpu_var(hrtimer_bases);
+-
+ 	/* Adjust CLOCK_REALTIME offset */
+ 	spin_lock(&base->lock);
+ 	base->clock_base[CLOCK_REALTIME].offset =
+@@ -643,6 +641,8 @@ static inline void hrtimer_init_timer_hr
+ {
+ }
+ 
++static void __run_hrtimer(struct hrtimer *timer);
++static int hrtimer_rt_defer(struct hrtimer *timer);
+ 
+ /*
+  * When High resolution timers are active, try to reprogram. Note, that in case
+@@ -654,7 +654,27 @@ static inline int hrtimer_enqueue_reprog
+ 					    struct hrtimer_clock_base *base,
+ 					    int wakeup)
+ {
++#ifdef CONFIG_PREEMPT_RT
++again:
++#endif
+ 	if (base->cpu_base->hres_active && hrtimer_reprogram(timer, base)) {
++#ifdef CONFIG_PREEMPT_RT
++		/*
++		 * Move softirq based timers away from the rbtree in
++		 * case it expired already. Otherwise we would have a
++		 * stale base->first entry until the softirq runs.
++		 */
++		if (!hrtimer_rt_defer(timer)) {
++			__run_hrtimer(timer);
++			/*
++			 * __run_hrtimer might have requeued timer and
++			 * it could be base->first again.
++			 */
++			if (base->first == &timer->node)
++				goto again;
++			return 1;
++		}
++#endif
+ 		if (wakeup) {
+ 			spin_unlock(&base->cpu_base->lock);
+ 			raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+@@ -671,10 +691,8 @@ static inline int hrtimer_enqueue_reprog
+ /*
+  * Switch to high resolution mode
+  */
+-static int hrtimer_switch_to_hres(void)
++static int hrtimer_switch_to_hres(struct hrtimer_cpu_base *base)
+ {
+-	int cpu = smp_processor_id();
+-	struct hrtimer_cpu_base *base = &per_cpu(hrtimer_bases, cpu);
+ 	unsigned long flags;
+ 
+ 	if (base->hres_active)
+@@ -685,7 +703,7 @@ static int hrtimer_switch_to_hres(void)
+ 	if (tick_init_highres()) {
+ 		local_irq_restore(flags);
+ 		printk(KERN_WARNING "Could not switch to high resolution "
+-				    "mode on CPU %d\n", cpu);
++		       "mode on CPU %d\n", raw_smp_processor_id());
+ 		return 0;
+ 	}
+ 	base->hres_active = 1;
+@@ -697,16 +715,20 @@ static int hrtimer_switch_to_hres(void)
+ 	/* "Retrigger" the interrupt to get things going */
+ 	retrigger_next_event(NULL);
+ 	local_irq_restore(flags);
+-	printk(KERN_DEBUG "Switched to high resolution mode on CPU %d\n",
+-	       smp_processor_id());
+ 	return 1;
+ }
+ 
+ #else
+ 
+-static inline int hrtimer_hres_active(void) { return 0; }
++static inline int hrtimer_hres_active(struct hrtimer_cpu_base *base)
++{
++	return 0;
++}
+ static inline int hrtimer_is_hres_enabled(void) { return 0; }
+-static inline int hrtimer_switch_to_hres(void) { return 0; }
++static inline int hrtimer_switch_to_hres(struct hrtimer_cpu_base *base)
++{
++	return 0;
++}
+ static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
+ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
+ 					    struct hrtimer_clock_base *base,
+@@ -714,6 +736,13 @@ static inline int hrtimer_enqueue_reprog
+ {
+ 	return 0;
+ }
++
++static inline int hrtimer_reprogram(struct hrtimer *timer,
++				    struct hrtimer_clock_base *base)
++{
++	return 0;
++}
++
+ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
+ static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { }
+ 
+@@ -836,6 +865,32 @@ static int enqueue_hrtimer(struct hrtime
+ 	return leftmost;
+ }
+ 
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++# define wake_up_timer_waiters(b)	wake_up(&(b)->wait)
++
++/**
++ * hrtimer_wait_for_timer - Wait for a running timer
++ *
++ * @timer:	timer to wait for
++ *
++ * The function waits in case the timers callback function is
++ * currently executed on the waitqueue of the timer base. The
++ * waitqueue is woken up after the timer callback function has
++ * finished execution.
++ */
++void hrtimer_wait_for_timer(const struct hrtimer *timer)
++{
++	struct hrtimer_clock_base *base = timer->base;
++
++	if (base && base->cpu_base && !timer->irqsafe)
++		wait_event(base->cpu_base->wait,
++			   !(timer->state & HRTIMER_STATE_CALLBACK));
++}
++
++#else
++# define wake_up_timer_waiters(b)	do { } while (0)
++#endif
++
+ /*
+  * __remove_hrtimer - internal function to remove a timer
+  *
+@@ -851,6 +906,11 @@ static void __remove_hrtimer(struct hrti
+ 			     unsigned long newstate, int reprogram)
+ {
+ 	if (timer->state & HRTIMER_STATE_ENQUEUED) {
++
++		if (unlikely(!list_empty(&timer->cb_entry))) {
++			list_del_init(&timer->cb_entry);
++			goto out;
++		}
+ 		/*
+ 		 * Remove the timer from the rbtree and replace the
+ 		 * first entry pointer if necessary.
+@@ -858,11 +918,12 @@ static void __remove_hrtimer(struct hrti
+ 		if (base->first == &timer->node) {
+ 			base->first = rb_next(&timer->node);
+ 			/* Reprogram the clock event device. if enabled */
+-			if (reprogram && hrtimer_hres_active())
++			if (reprogram && hrtimer_hres_active(base->cpu_base))
+ 				hrtimer_force_reprogram(base->cpu_base);
+ 		}
+ 		rb_erase(&timer->node, &base->active);
+ 	}
++out:
+ 	timer->state = newstate;
+ }
+ 
+@@ -1022,7 +1083,7 @@ int hrtimer_cancel(struct hrtimer *timer
+ 
+ 		if (ret >= 0)
+ 			return ret;
+-		cpu_relax();
++		hrtimer_wait_for_timer(timer);
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(hrtimer_cancel);
+@@ -1062,7 +1123,7 @@ ktime_t hrtimer_get_next_event(void)
+ 
+ 	spin_lock_irqsave(&cpu_base->lock, flags);
+ 
+-	if (!hrtimer_hres_active()) {
++	if (!hrtimer_hres_active(cpu_base)) {
+ 		for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++, base++) {
+ 			struct hrtimer *timer;
+ 
+@@ -1176,6 +1237,114 @@ static void __run_hrtimer(struct hrtimer
+ 	timer->state &= ~HRTIMER_STATE_CALLBACK;
+ }
+ 
++#ifdef CONFIG_PREEMPT_RT
++
++static void hrtimer_rt_reprogram(int restart, struct hrtimer *timer,
++				 struct hrtimer_clock_base *base)
++{
++	/*
++	 * Note, we clear the callback flag before we requeue the
++	 * timer otherwise we trigger the callback_running() check
++	 * in hrtimer_reprogram().
++	 */
++	timer->state &= ~HRTIMER_STATE_CALLBACK;
++
++	if (restart != HRTIMER_NORESTART) {
++		BUG_ON(hrtimer_active(timer));
++		/*
++		 * Enqueue the timer, if it's the leftmost timer then
++		 * we need to reprogram it.
++		 */
++		if (!enqueue_hrtimer(timer, base))
++			return;
++
++		if (hrtimer_reprogram(timer, base))
++			goto requeue;
++
++	} else if (hrtimer_active(timer)) {
++		/*
++		 * If the timer was rearmed on another CPU, reprogram
++		 * the event device.
++		 */
++		if (base->first == &timer->node &&
++		    hrtimer_reprogram(timer, base))
++			goto requeue;
++	}
++	return;
++
++requeue:
++	/*
++	 * Timer is expired. Thus move it from tree to pending list
++	 * again.
++	 */
++	__remove_hrtimer(timer, base, timer->state, 0);
++	list_add_tail(&timer->cb_entry, &base->expired);
++}
++
++/*
++ * The changes in mainline which removed the callback modes from
++ * hrtimer are not yet working with -rt. The non wakeup_process()
++ * based callbacks which involve sleeping locks need to be treated
++ * seperately.
++ */
++static void hrtimer_rt_run_pending(void)
++{
++	enum hrtimer_restart (*fn)(struct hrtimer *);
++	struct hrtimer_cpu_base *cpu_base;
++	struct hrtimer_clock_base *base;
++	struct hrtimer *timer;
++	int index, restart;
++
++	local_irq_disable();
++	cpu_base = &per_cpu(hrtimer_bases, smp_processor_id());
++
++	spin_lock(&cpu_base->lock);
++
++	for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
++		base = &cpu_base->clock_base[index];
++
++		while (!list_empty(&base->expired)) {
++			timer = list_first_entry(&base->expired,
++						 struct hrtimer, cb_entry);
++
++			/*
++			 * Same as the above __run_hrtimer function
++			 * just we run with interrupts enabled.
++			 */
++			debug_hrtimer_deactivate(timer);
++			__remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0);
++			timer_stats_account_hrtimer(timer);
++			fn = timer->function;
++
++			spin_unlock_irq(&cpu_base->lock);
++			restart = fn(timer);
++			spin_lock_irq(&cpu_base->lock);
++
++			hrtimer_rt_reprogram(restart, timer, base);
++		}
++	}
++	spin_unlock_irq(&cpu_base->lock);
++
++	wake_up_timer_waiters(cpu_base);
++}
++
++static int hrtimer_rt_defer(struct hrtimer *timer)
++{
++	if (timer->irqsafe)
++		return 0;
++
++	__remove_hrtimer(timer, timer->base, timer->state, 0);
++	list_add_tail(&timer->cb_entry, &timer->base->expired);
++	return 1;
++}
++
++#else
++
++static inline void hrtimer_rt_run_pending(void) { }
++static inline int hrtimer_rt_defer(struct hrtimer *timer) { return 0; }
++
++#endif
++
+ #ifdef CONFIG_HIGH_RES_TIMERS
+ 
+ static int force_clock_reprogram;
+@@ -1211,7 +1380,7 @@ void hrtimer_interrupt(struct clock_even
+ 	struct hrtimer_clock_base *base;
+ 	ktime_t expires_next, now;
+ 	int nr_retries = 0;
+-	int i;
++	int i, raise = 0;
+ 
+ 	BUG_ON(!cpu_base->hres_active);
+ 	cpu_base->nr_events++;
+@@ -1264,7 +1433,10 @@ void hrtimer_interrupt(struct clock_even
+ 				break;
+ 			}
+ 
+-			__run_hrtimer(timer);
++			if (!hrtimer_rt_defer(timer))
++				__run_hrtimer(timer);
++			else
++				raise = 1;
+ 		}
+ 		spin_unlock(&cpu_base->lock);
+ 		base++;
+@@ -1277,6 +1449,9 @@ void hrtimer_interrupt(struct clock_even
+ 		if (tick_program_event(expires_next, force_clock_reprogram))
+ 			goto retry;
+ 	}
++
++	if (raise)
++		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ }
+ 
+ /*
+@@ -1285,9 +1460,11 @@ void hrtimer_interrupt(struct clock_even
+  */
+ static void __hrtimer_peek_ahead_timers(void)
+ {
++	struct hrtimer_cpu_base *cpu_base;
+ 	struct tick_device *td;
+ 
+-	if (!hrtimer_hres_active())
++	cpu_base = &__get_cpu_var(hrtimer_bases);
++	if (!hrtimer_hres_active(cpu_base))
+ 		return;
+ 
+ 	td = &__get_cpu_var(tick_cpu_device);
+@@ -1313,17 +1490,17 @@ void hrtimer_peek_ahead_timers(void)
+ 	local_irq_restore(flags);
+ }
+ 
+-static void run_hrtimer_softirq(struct softirq_action *h)
+-{
+-	hrtimer_peek_ahead_timers();
+-}
+-
+ #else /* CONFIG_HIGH_RES_TIMERS */
+ 
+ static inline void __hrtimer_peek_ahead_timers(void) { }
+ 
+ #endif	/* !CONFIG_HIGH_RES_TIMERS */
+ 
++static void run_hrtimer_softirq(struct softirq_action *h)
++{
++	hrtimer_rt_run_pending();
++}
++
+ /*
+  * Called from timer softirq every jiffy, expire hrtimers:
+  *
+@@ -1333,7 +1510,9 @@ static inline void __hrtimer_peek_ahead_
+  */
+ void hrtimer_run_pending(void)
+ {
+-	if (hrtimer_hres_active())
++	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
++
++	if (hrtimer_hres_active(cpu_base))
+ 		return;
+ 
+ 	/*
+@@ -1345,7 +1524,7 @@ void hrtimer_run_pending(void)
+ 	 * deadlock vs. xtime_lock.
+ 	 */
+ 	if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
+-		hrtimer_switch_to_hres();
++		hrtimer_switch_to_hres(cpu_base);
+ }
+ 
+ /*
+@@ -1354,11 +1533,12 @@ void hrtimer_run_pending(void)
+ void hrtimer_run_queues(void)
+ {
+ 	struct rb_node *node;
+-	struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases);
++	struct hrtimer_cpu_base *cpu_base;
+ 	struct hrtimer_clock_base *base;
+-	int index, gettime = 1;
++	int index, gettime = 1, raise = 0;
+ 
+-	if (hrtimer_hres_active())
++	cpu_base = &per_cpu(hrtimer_bases, raw_smp_processor_id());
++	if (hrtimer_hres_active(cpu_base))
+ 		return;
+ 
+ 	for (index = 0; index < HRTIMER_MAX_CLOCK_BASES; index++) {
+@@ -1382,10 +1562,16 @@ void hrtimer_run_queues(void)
+ 					hrtimer_get_expires_tv64(timer))
+ 				break;
+ 
+-			__run_hrtimer(timer);
++			if (!hrtimer_rt_defer(timer))
++				__run_hrtimer(timer);
++			else
++				raise = 1;
+ 		}
+ 		spin_unlock(&cpu_base->lock);
+ 	}
++
++	if (raise)
++		raise_softirq_irqoff(HRTIMER_SOFTIRQ);
+ }
+ 
+ /*
+@@ -1407,6 +1593,7 @@ static enum hrtimer_restart hrtimer_wake
+ void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+ {
+ 	sl->timer.function = hrtimer_wakeup;
++	sl->timer.irqsafe = 1;
+ 	sl->task = task;
+ }
+ 
+@@ -1541,10 +1728,15 @@ static void __cpuinit init_hrtimers_cpu(
+ 
+ 	spin_lock_init(&cpu_base->lock);
+ 
+-	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++)
++	for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
+ 		cpu_base->clock_base[i].cpu_base = cpu_base;
++		INIT_LIST_HEAD(&cpu_base->clock_base[i].expired);
++	}
+ 
+ 	hrtimer_init_hres(cpu_base);
++#ifdef CONFIG_PREEMPT_RT
++	init_waitqueue_head(&cpu_base->wait);
++#endif
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+@@ -1657,9 +1849,7 @@ void __init hrtimers_init(void)
+ 	hrtimer_cpu_notify(&hrtimers_nb, (unsigned long)CPU_UP_PREPARE,
+ 			  (void *)(long)smp_processor_id());
+ 	register_cpu_notifier(&hrtimers_nb);
+-#ifdef CONFIG_HIGH_RES_TIMERS
+ 	open_softirq(HRTIMER_SOFTIRQ, run_hrtimer_softirq);
+-#endif
+ }
+ 
+ /**
+Index: linux-2.6-tip/kernel/itimer.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/itimer.c
++++ linux-2.6-tip/kernel/itimer.c
+@@ -161,6 +161,7 @@ again:
+ 		/* We are sharing ->siglock with it_real_fn() */
+ 		if (hrtimer_try_to_cancel(timer) < 0) {
+ 			spin_unlock_irq(&tsk->sighand->siglock);
++			hrtimer_wait_for_timer(&tsk->signal->real_timer);
+ 			goto again;
+ 		}
+ 		expires = timeval_to_ktime(value->it_value);
+Index: linux-2.6-tip/include/linux/bottom_half.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/bottom_half.h
++++ linux-2.6-tip/include/linux/bottom_half.h
+@@ -1,9 +1,17 @@
+ #ifndef _LINUX_BH_H
+ #define _LINUX_BH_H
+ 
++#ifdef CONFIG_PREEMPT_HARDIRQS
++# define local_bh_disable()		do { } while (0)
++# define __local_bh_disable(ip)		do { } while (0)
++# define _local_bh_enable()		do { } while (0)
++# define local_bh_enable()		do { } while (0)
++# define local_bh_enable_ip(ip)		do { } while (0)
++#else
+ extern void local_bh_disable(void);
+ extern void _local_bh_enable(void);
+ extern void local_bh_enable(void);
+ extern void local_bh_enable_ip(unsigned long ip);
++#endif
+ 
+ #endif /* _LINUX_BH_H */
+Index: linux-2.6-tip/include/linux/preempt.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/preempt.h
++++ linux-2.6-tip/include/linux/preempt.h
+@@ -9,6 +9,7 @@
+ #include <linux/thread_info.h>
+ #include <linux/linkage.h>
+ #include <linux/list.h>
++#include <linux/thread_info.h>
+ 
+ #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER)
+   extern void add_preempt_count(int val);
+@@ -21,11 +22,12 @@
+ #define inc_preempt_count() add_preempt_count(1)
+ #define dec_preempt_count() sub_preempt_count(1)
+ 
+-#define preempt_count()	(current_thread_info()->preempt_count)
++#define preempt_count()		(current_thread_info()->preempt_count)
+ 
+ #ifdef CONFIG_PREEMPT
+ 
+ asmlinkage void preempt_schedule(void);
++asmlinkage void preempt_schedule_irq(void);
+ 
+ #define preempt_disable() \
+ do { \
+@@ -33,12 +35,19 @@ do { \
+ 	barrier(); \
+ } while (0)
+ 
+-#define preempt_enable_no_resched() \
++#define __preempt_enable_no_resched() \
+ do { \
+ 	barrier(); \
+ 	dec_preempt_count(); \
+ } while (0)
+ 
++
++#ifdef CONFIG_DEBUG_PREEMPT
++extern void notrace preempt_enable_no_resched(void);
++#else
++# define preempt_enable_no_resched() __preempt_enable_no_resched()
++#endif
++
+ #define preempt_check_resched() \
+ do { \
+ 	if (unlikely(test_thread_flag(TIF_NEED_RESCHED))) \
+@@ -47,7 +56,7 @@ do { \
+ 
+ #define preempt_enable() \
+ do { \
+-	preempt_enable_no_resched(); \
++	__preempt_enable_no_resched(); \
+ 	barrier(); \
+ 	preempt_check_resched(); \
+ } while (0)
+@@ -84,6 +93,7 @@ do { \
+ 
+ #define preempt_disable()		do { } while (0)
+ #define preempt_enable_no_resched()	do { } while (0)
++#define __preempt_enable_no_resched()	do { } while (0)
+ #define preempt_enable()		do { } while (0)
+ #define preempt_check_resched()		do { } while (0)
+ 
+@@ -91,6 +101,8 @@ do { \
+ #define preempt_enable_no_resched_notrace()	do { } while (0)
+ #define preempt_enable_notrace()		do { } while (0)
+ 
++#define preempt_schedule_irq()		do { } while (0)
++
+ #endif
+ 
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+Index: linux-2.6-tip/net/ipv4/tcp.c
+===================================================================
+--- linux-2.6-tip.orig/net/ipv4/tcp.c
++++ linux-2.6-tip/net/ipv4/tcp.c
+@@ -1323,11 +1323,11 @@ int tcp_recvmsg(struct kiocb *iocb, stru
+ 		    (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
+ 		    !sysctl_tcp_low_latency &&
+ 		    dma_find_channel(DMA_MEMCPY)) {
+-			preempt_enable_no_resched();
++			preempt_enable();
+ 			tp->ucopy.pinned_list =
+ 					dma_pin_iovec_pages(msg->msg_iov, len);
+ 		} else {
+-			preempt_enable_no_resched();
++			preempt_enable();
+ 		}
+ 	}
+ #endif
+Index: linux-2.6-tip/arch/x86/include/asm/rwsem.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/rwsem.h
++++ linux-2.6-tip/arch/x86/include/asm/rwsem.h
+@@ -44,14 +44,14 @@
+ 
+ struct rwsem_waiter;
+ 
+-extern asmregparm struct rw_semaphore *
+- rwsem_down_read_failed(struct rw_semaphore *sem);
+-extern asmregparm struct rw_semaphore *
+- rwsem_down_write_failed(struct rw_semaphore *sem);
+-extern asmregparm struct rw_semaphore *
+- rwsem_wake(struct rw_semaphore *);
+-extern asmregparm struct rw_semaphore *
+- rwsem_downgrade_wake(struct rw_semaphore *sem);
++extern asmregparm struct compat_rw_semaphore *
++ rwsem_down_read_failed(struct compat_rw_semaphore *sem);
++extern asmregparm struct compat_rw_semaphore *
++ rwsem_down_write_failed(struct compat_rw_semaphore *sem);
++extern asmregparm struct compat_rw_semaphore *
++ rwsem_wake(struct compat_rw_semaphore *);
++extern asmregparm struct compat_rw_semaphore *
++ rwsem_downgrade_wake(struct compat_rw_semaphore *sem);
+ 
+ /*
+  * the semaphore definition
+@@ -64,7 +64,7 @@ extern asmregparm struct rw_semaphore *
+ #define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
+ #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+ 
+-struct rw_semaphore {
++struct compat_rw_semaphore {
+ 	signed long		count;
+ 	spinlock_t		wait_lock;
+ 	struct list_head	wait_list;
+@@ -86,23 +86,23 @@ struct rw_semaphore {
+ 	LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) \
+ }
+ 
+-#define DECLARE_RWSEM(name)					\
+-	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
++#define COMPAT_DECLARE_RWSEM(name)					\
++	struct compat_rw_semaphore name = __RWSEM_INITIALIZER(name)
+ 
+-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
++extern void __compat_init_rwsem(struct compat_rw_semaphore *sem, const char *name,
+ 			 struct lock_class_key *key);
+ 
+-#define init_rwsem(sem)						\
++#define compat_init_rwsem(sem)						\
+ do {								\
+ 	static struct lock_class_key __key;			\
+ 								\
+-	__init_rwsem((sem), #sem, &__key);			\
++	__compat_init_rwsem((sem), #sem, &__key);			\
+ } while (0)
+ 
+ /*
+  * lock for reading
+  */
+-static inline void __down_read(struct rw_semaphore *sem)
++static inline void __down_read(struct compat_rw_semaphore *sem)
+ {
+ 	asm volatile("# beginning down_read\n\t"
+ 		     LOCK_PREFIX "  incl      (%%eax)\n\t"
+@@ -119,7 +119,7 @@ static inline void __down_read(struct rw
+ /*
+  * trylock for reading -- returns 1 if successful, 0 if contention
+  */
+-static inline int __down_read_trylock(struct rw_semaphore *sem)
++static inline int __down_read_trylock(struct compat_rw_semaphore *sem)
+ {
+ 	__s32 result, tmp;
+ 	asm volatile("# beginning __down_read_trylock\n\t"
+@@ -141,7 +141,8 @@ static inline int __down_read_trylock(st
+ /*
+  * lock for writing
+  */
+-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
++static inline void
++__down_write_nested(struct compat_rw_semaphore *sem, int subclass)
+ {
+ 	int tmp;
+ 
+@@ -160,7 +161,7 @@ static inline void __down_write_nested(s
+ 		     : "memory", "cc");
+ }
+ 
+-static inline void __down_write(struct rw_semaphore *sem)
++static inline void __down_write(struct compat_rw_semaphore *sem)
+ {
+ 	__down_write_nested(sem, 0);
+ }
+@@ -168,7 +169,7 @@ static inline void __down_write(struct r
+ /*
+  * trylock for writing -- returns 1 if successful, 0 if contention
+  */
+-static inline int __down_write_trylock(struct rw_semaphore *sem)
++static inline int __down_write_trylock(struct compat_rw_semaphore *sem)
+ {
+ 	signed long ret = cmpxchg(&sem->count,
+ 				  RWSEM_UNLOCKED_VALUE,
+@@ -181,7 +182,7 @@ static inline int __down_write_trylock(s
+ /*
+  * unlock after reading
+  */
+-static inline void __up_read(struct rw_semaphore *sem)
++static inline void __up_read(struct compat_rw_semaphore *sem)
+ {
+ 	__s32 tmp = -RWSEM_ACTIVE_READ_BIAS;
+ 	asm volatile("# beginning __up_read\n\t"
+@@ -199,7 +200,7 @@ static inline void __up_read(struct rw_s
+ /*
+  * unlock after writing
+  */
+-static inline void __up_write(struct rw_semaphore *sem)
++static inline void __up_write(struct compat_rw_semaphore *sem)
+ {
+ 	asm volatile("# beginning __up_write\n\t"
+ 		     "  movl      %2,%%edx\n\t"
+@@ -218,7 +219,7 @@ static inline void __up_write(struct rw_
+ /*
+  * downgrade write lock to read lock
+  */
+-static inline void __downgrade_write(struct rw_semaphore *sem)
++static inline void __downgrade_write(struct compat_rw_semaphore *sem)
+ {
+ 	asm volatile("# beginning __downgrade_write\n\t"
+ 		     LOCK_PREFIX "  addl      %2,(%%eax)\n\t"
+@@ -235,7 +236,7 @@ static inline void __downgrade_write(str
+ /*
+  * implement atomic add functionality
+  */
+-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
++static inline void rwsem_atomic_add(int delta, struct compat_rw_semaphore *sem)
+ {
+ 	asm volatile(LOCK_PREFIX "addl %1,%0"
+ 		     : "+m" (sem->count)
+@@ -245,7 +246,7 @@ static inline void rwsem_atomic_add(int 
+ /*
+  * implement exchange and add functionality
+  */
+-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
++static inline int rwsem_atomic_update(int delta, struct compat_rw_semaphore *sem)
+ {
+ 	int tmp = delta;
+ 
+@@ -256,7 +257,7 @@ static inline int rwsem_atomic_update(in
+ 	return tmp + delta;
+ }
+ 
+-static inline int rwsem_is_locked(struct rw_semaphore *sem)
++static inline int compat_rwsem_is_locked(struct compat_rw_semaphore *sem)
+ {
+ 	return (sem->count != 0);
+ }
+Index: linux-2.6-tip/arch/x86/include/asm/spinlock_types.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/spinlock_types.h
++++ linux-2.6-tip/arch/x86/include/asm/spinlock_types.h
+@@ -7,13 +7,13 @@
+ 
+ typedef struct raw_spinlock {
+ 	unsigned int slock;
+-} raw_spinlock_t;
++} __raw_spinlock_t;
+ 
+ #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+ 
+ typedef struct {
+ 	unsigned int lock;
+-} raw_rwlock_t;
++} __raw_rwlock_t;
+ 
+ #define __RAW_RW_LOCK_UNLOCKED		{ RW_LOCK_BIAS }
+ 
+Index: linux-2.6-tip/arch/x86/kernel/vsyscall_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/vsyscall_64.c
++++ linux-2.6-tip/arch/x86/kernel/vsyscall_64.c
+@@ -59,7 +59,7 @@ int __vgetcpu_mode __section_vgetcpu_mod
+ 
+ struct vsyscall_gtod_data __vsyscall_gtod_data __section_vsyscall_gtod_data =
+ {
+-	.lock = SEQLOCK_UNLOCKED,
++	.lock = __RAW_SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
+ 	.sysctl_enabled = 1,
+ };
+ 
+@@ -78,14 +78,40 @@ void update_vsyscall(struct timespec *wa
+ 	unsigned long flags;
+ 
+ 	write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
++
++	if (likely(vsyscall_gtod_data.sysctl_enabled == 2)) {
++		struct timespec tmp = *(wall_time);
++		cycle_t (*vread)(void);
++		cycle_t now;
++
++		vread = vsyscall_gtod_data.clock.vread;
++		if (likely(vread))
++			now = vread();
++		else
++			now = clock->read();
++
++		/* calculate interval: */
++		now = (now - clock->cycle_last) & clock->mask;
++		/* convert to nsecs: */
++		tmp.tv_nsec += ( now * clock->mult) >> clock->shift;
++
++		while (tmp.tv_nsec >= NSEC_PER_SEC) {
++			tmp.tv_sec += 1;
++			tmp.tv_nsec -= NSEC_PER_SEC;
++		}
++
++		vsyscall_gtod_data.wall_time_sec = tmp.tv_sec;
++		vsyscall_gtod_data.wall_time_nsec = tmp.tv_nsec;
++	} else {
++		vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
++		vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
++	}
+ 	/* copy vsyscall data */
+ 	vsyscall_gtod_data.clock.vread = clock->vread;
+ 	vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
+ 	vsyscall_gtod_data.clock.mask = clock->mask;
+ 	vsyscall_gtod_data.clock.mult = clock->mult;
+ 	vsyscall_gtod_data.clock.shift = clock->shift;
+-	vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
+-	vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
+ 	vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
+ 	write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+ }
+@@ -123,6 +149,26 @@ static __always_inline void do_vgettimeo
+ 	unsigned seq;
+ 	unsigned long mult, shift, nsec;
+ 	cycle_t (*vread)(void);
++
++	if (likely(__vsyscall_gtod_data.sysctl_enabled == 2)) {
++		struct timeval tmp;
++
++		do {
++			barrier();
++			tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
++			tv->tv_usec = __vsyscall_gtod_data.wall_time_nsec;
++			barrier();
++			tmp.tv_sec = __vsyscall_gtod_data.wall_time_sec;
++			tmp.tv_usec = __vsyscall_gtod_data.wall_time_nsec;
++
++		} while (tmp.tv_usec != tv->tv_usec ||
++					tmp.tv_sec != tv->tv_sec);
++
++		tv->tv_usec /= NSEC_PER_MSEC;
++		tv->tv_usec *= USEC_PER_MSEC;
++		return;
++	}
++
+ 	do {
+ 		seq = read_seqbegin(&__vsyscall_gtod_data.lock);
+ 
+@@ -138,7 +184,6 @@ static __always_inline void do_vgettimeo
+ 		 * does not cause time warps:
+ 		 */
+ 		rdtsc_barrier();
+-		now = vread();
+ 		rdtsc_barrier();
+ 
+ 		base = __vsyscall_gtod_data.clock.cycle_last;
+@@ -150,6 +195,7 @@ static __always_inline void do_vgettimeo
+ 		nsec = __vsyscall_gtod_data.wall_time_nsec;
+ 	} while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
+ 
++	now = vread();
+ 	/* calculate interval: */
+ 	cycle_delta = (now - base) & mask;
+ 	/* convert to nsecs: */
+Index: linux-2.6-tip/drivers/input/ff-memless.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/input/ff-memless.c
++++ linux-2.6-tip/drivers/input/ff-memless.c
+@@ -28,6 +28,7 @@
+ #include <linux/input.h>
+ #include <linux/module.h>
+ #include <linux/mutex.h>
++#include <linux/interrupt.h>
+ #include <linux/spinlock.h>
+ #include <linux/jiffies.h>
+ 
+Index: linux-2.6-tip/fs/proc/array.c
+===================================================================
+--- linux-2.6-tip.orig/fs/proc/array.c
++++ linux-2.6-tip/fs/proc/array.c
+@@ -134,12 +134,13 @@ static inline void task_name(struct seq_
+  */
+ static const char *task_state_array[] = {
+ 	"R (running)",		/*  0 */
+-	"S (sleeping)",		/*  1 */
+-	"D (disk sleep)",	/*  2 */
+-	"T (stopped)",		/*  4 */
+-	"T (tracing stop)",	/*  8 */
+-	"Z (zombie)",		/* 16 */
+-	"X (dead)"		/* 32 */
++	"M (running-mutex)",	/*  1 */
++	"S (sleeping)",		/*  2 */
++	"D (disk sleep)",	/*  4 */
++	"T (stopped)",		/*  8 */
++	"T (tracing stop)",	/* 16 */
++	"Z (zombie)",		/* 32 */
++	"X (dead)"		/* 64 */
+ };
+ 
+ static inline const char *get_task_state(struct task_struct *tsk)
+@@ -321,6 +322,19 @@ static inline void task_context_switch_c
+ 			p->nivcsw);
+ }
+ 
++#define get_blocked_on(t)	(-1)
++
++static inline void show_blocked_on(struct seq_file *m, struct task_struct *p)
++{
++	pid_t pid = get_blocked_on(p);
++
++	if (pid < 0)
++		return;
++
++	seq_printf(m, "BlckOn: %d\n", pid);
++}
++
++
+ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
+ 			struct pid *pid, struct task_struct *task)
+ {
+@@ -340,6 +354,7 @@ int proc_pid_status(struct seq_file *m, 
+ 	task_show_regs(m, task);
+ #endif
+ 	task_context_switch_counts(m, task);
++	show_blocked_on(m, task);
+ 	return 0;
+ }
+ 
+Index: linux-2.6-tip/include/linux/bit_spinlock.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/bit_spinlock.h
++++ linux-2.6-tip/include/linux/bit_spinlock.h
+@@ -1,6 +1,8 @@
+ #ifndef __LINUX_BIT_SPINLOCK_H
+ #define __LINUX_BIT_SPINLOCK_H
+ 
++#if 0
++
+ /*
+  *  bit-based spin_lock()
+  *
+@@ -91,5 +93,7 @@ static inline int bit_spin_is_locked(int
+ #endif
+ }
+ 
++#endif
++
+ #endif /* __LINUX_BIT_SPINLOCK_H */
+ 
+Index: linux-2.6-tip/include/linux/pickop.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/pickop.h
+@@ -0,0 +1,32 @@
++#ifndef _LINUX_PICKOP_H
++#define _LINUX_PICKOP_H
++
++#undef PICK_TYPE_EQUAL
++#define PICK_TYPE_EQUAL(var, type) \
++		__builtin_types_compatible_p(typeof(var), type)
++
++extern int __bad_func_type(void);
++
++#define PICK_FUNCTION(type1, type2, func1, func2, arg0, ...)		\
++do {									\
++	if (PICK_TYPE_EQUAL((arg0), type1))				\
++		func1((type1)(arg0), ##__VA_ARGS__);			\
++	else if (PICK_TYPE_EQUAL((arg0), type2))			\
++		func2((type2)(arg0), ##__VA_ARGS__);			\
++	else __bad_func_type();						\
++} while (0)
++
++#define PICK_FUNCTION_RET(type1, type2, func1, func2, arg0, ...)	\
++({									\
++	unsigned long __ret;						\
++									\
++	if (PICK_TYPE_EQUAL((arg0), type1))				\
++		__ret = func1((type1)(arg0), ##__VA_ARGS__);		\
++	else if (PICK_TYPE_EQUAL((arg0), type2))			\
++		__ret = func2((type2)(arg0), ##__VA_ARGS__);		\
++	else __ret = __bad_func_type();					\
++									\
++	__ret;								\
++})
++
++#endif /* _LINUX_PICKOP_H */
+Index: linux-2.6-tip/include/linux/rt_lock.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/linux/rt_lock.h
+@@ -0,0 +1,274 @@
++#ifndef __LINUX_RT_LOCK_H
++#define __LINUX_RT_LOCK_H
++
++/*
++ * Real-Time Preemption Support
++ *
++ * started by Ingo Molnar:
++ *
++ *  Copyright (C) 2004, 2005 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
++ *
++ * This file contains the main data structure definitions.
++ */
++#include <linux/rtmutex.h>
++#include <asm/atomic.h>
++#include <linux/spinlock_types.h>
++
++#ifdef CONFIG_PREEMPT_RT
++# define preempt_rt 1
++/*
++ * spinlocks - an RT mutex plus lock-break field:
++ */
++typedef struct {
++	struct rt_mutex		lock;
++	unsigned int		break_lock;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++	struct lockdep_map	dep_map;
++#endif
++} spinlock_t;
++
++#ifdef CONFIG_DEBUG_RT_MUTEXES
++# define __RT_SPIN_INITIALIZER(name)					\
++	{ .wait_lock = _RAW_SPIN_LOCK_UNLOCKED(name.wait_lock),		\
++	  .save_state = 1,						\
++	  .file = __FILE__,						\
++	  .line = __LINE__, }
++#else
++# define __RT_SPIN_INITIALIZER(name)					\
++	{ .wait_lock = _RAW_SPIN_LOCK_UNLOCKED(name.wait_lock) }
++#endif
++
++#define __SPIN_LOCK_UNLOCKED(name) (spinlock_t)				\
++	{ .lock = __RT_SPIN_INITIALIZER(name),				\
++	  SPIN_DEP_MAP_INIT(name) }
++
++#else /* !PREEMPT_RT */
++
++typedef raw_spinlock_t spinlock_t;
++
++#define __SPIN_LOCK_UNLOCKED	_RAW_SPIN_LOCK_UNLOCKED
++
++#endif
++
++#define SPIN_LOCK_UNLOCKED	__SPIN_LOCK_UNLOCKED(spin_old_style)
++
++
++#define __DEFINE_SPINLOCK(name) \
++	spinlock_t name = __SPIN_LOCK_UNLOCKED(name)
++
++#define DEFINE_SPINLOCK(name) \
++	spinlock_t name __cacheline_aligned_in_smp = __SPIN_LOCK_UNLOCKED(name)
++
++#ifdef CONFIG_PREEMPT_RT
++
++/*
++ * RW-semaphores are a spinlock plus a reader-depth count.
++ *
++ * Note that the semantics are different from the usual
++ * Linux rw-sems, in PREEMPT_RT mode we do not allow
++ * multiple readers to hold the lock at once, we only allow
++ * a read-lock owner to read-lock recursively. This is
++ * better for latency, makes the implementation inherently
++ * fair and makes it simpler as well:
++ */
++struct rw_semaphore {
++	struct rt_mutex		lock;
++	int			read_depth;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++	struct lockdep_map	dep_map;
++#endif
++};
++
++/*
++ * rwlocks - an RW semaphore plus lock-break field:
++ */
++typedef struct {
++	struct rt_mutex		lock;
++	int			read_depth;
++	unsigned int		break_lock;
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++	struct lockdep_map	dep_map;
++#endif
++} rwlock_t;
++
++#define __RW_LOCK_UNLOCKED(name) (rwlock_t) \
++	{ .lock = __RT_SPIN_INITIALIZER(name),	\
++	  RW_DEP_MAP_INIT(name) }
++#else /* !PREEMPT_RT */
++
++typedef raw_rwlock_t rwlock_t;
++
++#define __RW_LOCK_UNLOCKED	_RAW_RW_LOCK_UNLOCKED
++
++#endif
++
++#define RW_LOCK_UNLOCKED	__RW_LOCK_UNLOCKED(rw_old_style)
++
++
++#define DEFINE_RWLOCK(name) \
++	rwlock_t name __cacheline_aligned_in_smp = __RW_LOCK_UNLOCKED(name)
++
++#ifdef CONFIG_PREEMPT_RT
++
++/*
++ * Semaphores - a spinlock plus the semaphore count:
++ */
++struct semaphore {
++	atomic_t		count;
++	struct rt_mutex		lock;
++};
++
++#define DECLARE_MUTEX(name) \
++struct semaphore name = \
++	{ .count = { 1 }, .lock = __RT_MUTEX_INITIALIZER(name.lock) }
++
++extern void
++__sema_init(struct semaphore *sem, int val, char *name, char *file, int line);
++
++#define rt_sema_init(sem, val) \
++		__sema_init(sem, val, #sem, __FILE__, __LINE__)
++
++extern void
++__init_MUTEX(struct semaphore *sem, char *name, char *file, int line);
++#define rt_init_MUTEX(sem) \
++		__init_MUTEX(sem, #sem, __FILE__, __LINE__)
++
++extern void there_is_no_init_MUTEX_LOCKED_for_RT_semaphores(void);
++
++/*
++ * No locked initialization for RT semaphores
++ */
++#define rt_init_MUTEX_LOCKED(sem) \
++		there_is_no_init_MUTEX_LOCKED_for_RT_semaphores()
++extern void  rt_down(struct semaphore *sem);
++extern int  rt_down_interruptible(struct semaphore *sem);
++extern int  rt_down_timeout(struct semaphore *sem, long jiffies);
++extern int  rt_down_trylock(struct semaphore *sem);
++extern void  rt_up(struct semaphore *sem);
++
++#define rt_sem_is_locked(s)	rt_mutex_is_locked(&(s)->lock)
++#define rt_sema_count(s)	atomic_read(&(s)->count)
++
++extern int __bad_func_type(void);
++
++#include <linux/pickop.h>
++
++/*
++ * PICK_SEM_OP() is a small redirector to allow less typing of the lock
++ * types struct compat_semaphore, struct semaphore, at the front of the
++ * PICK_FUNCTION macro.
++ */
++#define PICK_SEM_OP(...) PICK_FUNCTION(struct compat_semaphore *,	\
++	struct semaphore *, ##__VA_ARGS__)
++#define PICK_SEM_OP_RET(...) PICK_FUNCTION_RET(struct compat_semaphore *,\
++	struct semaphore *, ##__VA_ARGS__)
++
++#define sema_init(sem, val) \
++	PICK_SEM_OP(compat_sema_init, rt_sema_init, sem, val)
++
++#define init_MUTEX(sem) PICK_SEM_OP(compat_init_MUTEX, rt_init_MUTEX, sem)
++
++#define init_MUTEX_LOCKED(sem) \
++	PICK_SEM_OP(compat_init_MUTEX_LOCKED, rt_init_MUTEX_LOCKED, sem)
++
++#define down(sem) PICK_SEM_OP(compat_down, rt_down, sem)
++
++#define down_timeout(sem, jiff) \
++	PICK_SEM_OP_RET(compat_down_timeout, rt_down_timeout, sem, jiff)
++
++#define down_interruptible(sem) \
++	PICK_SEM_OP_RET(compat_down_interruptible, rt_down_interruptible, sem)
++
++#define down_trylock(sem) \
++	PICK_SEM_OP_RET(compat_down_trylock, rt_down_trylock, sem)
++
++#define up(sem) PICK_SEM_OP(compat_up, rt_up, sem)
++
++/*
++ * rwsems:
++ */
++
++#define __RWSEM_INITIALIZER(name) \
++	{ .lock = __RT_MUTEX_INITIALIZER(name.lock), \
++	  RW_DEP_MAP_INIT(name) }
++
++#define DECLARE_RWSEM(lockname) \
++	struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
++
++extern void  __rt_rwsem_init(struct rw_semaphore *rwsem, char *name,
++				     struct lock_class_key *key);
++
++# define rt_init_rwsem(sem)				\
++do {							\
++	static struct lock_class_key __key;		\
++							\
++	__rt_rwsem_init((sem), #sem, &__key);		\
++} while (0)
++
++extern void __dont_do_this_in_rt(struct rw_semaphore *rwsem);
++
++#define rt_down_read_non_owner(rwsem)	__dont_do_this_in_rt(rwsem)
++#define rt_up_read_non_owner(rwsem)	__dont_do_this_in_rt(rwsem)
++
++extern void  rt_down_write(struct rw_semaphore *rwsem);
++extern void
++rt_down_read_nested(struct rw_semaphore *rwsem, int subclass);
++extern void
++rt_down_write_nested(struct rw_semaphore *rwsem, int subclass);
++extern void  rt_down_read(struct rw_semaphore *rwsem);
++extern int  rt_down_write_trylock(struct rw_semaphore *rwsem);
++extern int  rt_down_read_trylock(struct rw_semaphore *rwsem);
++extern void  rt_up_read(struct rw_semaphore *rwsem);
++extern void  rt_up_write(struct rw_semaphore *rwsem);
++extern void  rt_downgrade_write(struct rw_semaphore *rwsem);
++
++# define rt_rwsem_is_locked(rws)	(rt_mutex_is_locked(&(rws)->lock))
++
++#define PICK_RWSEM_OP(...) PICK_FUNCTION(struct compat_rw_semaphore *,	\
++	struct rw_semaphore *, ##__VA_ARGS__)
++#define PICK_RWSEM_OP_RET(...) PICK_FUNCTION_RET(struct compat_rw_semaphore *,\
++	struct rw_semaphore *, ##__VA_ARGS__)
++
++#define init_rwsem(rwsem) PICK_RWSEM_OP(compat_init_rwsem, rt_init_rwsem, rwsem)
++
++#define down_read(rwsem) PICK_RWSEM_OP(compat_down_read, rt_down_read, rwsem)
++
++#define down_read_non_owner(rwsem) \
++	PICK_RWSEM_OP(compat_down_read_non_owner, rt_down_read_non_owner, rwsem)
++
++#define down_read_trylock(rwsem) \
++	PICK_RWSEM_OP_RET(compat_down_read_trylock, rt_down_read_trylock, rwsem)
++
++#define down_write(rwsem) PICK_RWSEM_OP(compat_down_write, rt_down_write, rwsem)
++
++#define down_read_nested(rwsem, subclass) \
++	PICK_RWSEM_OP(compat_down_read_nested, rt_down_read_nested,	\
++		rwsem, subclass)
++
++#define down_write_nested(rwsem, subclass) \
++	PICK_RWSEM_OP(compat_down_write_nested, rt_down_write_nested,	\
++		rwsem, subclass)
++
++#define down_write_trylock(rwsem) \
++	PICK_RWSEM_OP_RET(compat_down_write_trylock, rt_down_write_trylock,\
++		rwsem)
++
++#define up_read(rwsem) PICK_RWSEM_OP(compat_up_read, rt_up_read, rwsem)
++
++#define up_read_non_owner(rwsem) \
++	PICK_RWSEM_OP(compat_up_read_non_owner, rt_up_read_non_owner, rwsem)
++
++#define up_write(rwsem) PICK_RWSEM_OP(compat_up_write, rt_up_write, rwsem)
++
++#define downgrade_write(rwsem) \
++	PICK_RWSEM_OP(compat_downgrade_write, rt_downgrade_write, rwsem)
++
++#define rwsem_is_locked(rwsem) \
++	PICK_RWSEM_OP_RET(compat_rwsem_is_locked, rt_rwsem_is_locked, rwsem)
++
++#else
++# define preempt_rt 0
++#endif /* CONFIG_PREEMPT_RT */
++
++#endif
++
+Index: linux-2.6-tip/include/linux/rtmutex.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/rtmutex.h
++++ linux-2.6-tip/include/linux/rtmutex.h
+@@ -24,7 +24,7 @@
+  * @owner:	the mutex owner
+  */
+ struct rt_mutex {
+-	spinlock_t		wait_lock;
++	raw_spinlock_t		wait_lock;
+ 	struct plist_head	wait_list;
+ 	struct task_struct	*owner;
+ #ifdef CONFIG_DEBUG_RT_MUTEXES
+@@ -63,8 +63,8 @@ struct hrtimer_sleeper;
+ #endif
+ 
+ #define __RT_MUTEX_INITIALIZER(mutexname) \
+-	{ .wait_lock = __SPIN_LOCK_UNLOCKED(mutexname.wait_lock) \
+-	, .wait_list = PLIST_HEAD_INIT(mutexname.wait_list, mutexname.wait_lock) \
++	{ .wait_lock = RAW_SPIN_LOCK_UNLOCKED(mutexname) \
++	, .wait_list = PLIST_HEAD_INIT(mutexname.wait_list, &mutexname.wait_lock) \
+ 	, .owner = NULL \
+ 	__DEBUG_RT_MUTEX_INITIALIZER(mutexname)}
+ 
+@@ -88,6 +88,8 @@ extern void rt_mutex_destroy(struct rt_m
+ extern void rt_mutex_lock(struct rt_mutex *lock);
+ extern int rt_mutex_lock_interruptible(struct rt_mutex *lock,
+ 						int detect_deadlock);
++extern int rt_mutex_lock_killable(struct rt_mutex *lock,
++				  int detect_deadlock);
+ extern int rt_mutex_timed_lock(struct rt_mutex *lock,
+ 					struct hrtimer_sleeper *timeout,
+ 					int detect_deadlock);
+@@ -98,7 +100,7 @@ extern void rt_mutex_unlock(struct rt_mu
+ 
+ #ifdef CONFIG_RT_MUTEXES
+ # define INIT_RT_MUTEXES(tsk)						\
+-	.pi_waiters	= PLIST_HEAD_INIT(tsk.pi_waiters, tsk.pi_lock),	\
++	.pi_waiters = PLIST_HEAD_INIT(tsk.pi_waiters, &tsk.pi_lock),	\
+ 	INIT_RT_MUTEX_DEBUG(tsk)
+ #else
+ # define INIT_RT_MUTEXES(tsk)
+Index: linux-2.6-tip/include/linux/rwsem-spinlock.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/rwsem-spinlock.h
++++ linux-2.6-tip/include/linux/rwsem-spinlock.h
+@@ -28,7 +28,7 @@ struct rwsem_waiter;
+  * - if activity is -1 then there is one active writer
+  * - if wait_list is not empty, then there are processes waiting for the semaphore
+  */
+-struct rw_semaphore {
++struct compat_rw_semaphore {
+ 	__s32			activity;
+ 	spinlock_t		wait_lock;
+ 	struct list_head	wait_list;
+@@ -43,33 +43,32 @@ struct rw_semaphore {
+ # define __RWSEM_DEP_MAP_INIT(lockname)
+ #endif
+ 
+-#define __RWSEM_INITIALIZER(name) \
+-{ 0, __SPIN_LOCK_UNLOCKED(name.wait_lock), LIST_HEAD_INIT((name).wait_list) \
+-  __RWSEM_DEP_MAP_INIT(name) }
++#define __COMPAT_RWSEM_INITIALIZER(name) \
++{ 0, SPIN_LOCK_UNLOCKED, LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
+ 
+-#define DECLARE_RWSEM(name) \
+-	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
++#define COMPAT_DECLARE_RWSEM(name) \
++	struct compat_rw_semaphore name = __COMPAT_RWSEM_INITIALIZER(name)
+ 
+-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
++extern void __compat_init_rwsem(struct compat_rw_semaphore *sem, const char *name,
+ 			 struct lock_class_key *key);
+ 
+-#define init_rwsem(sem)						\
++#define compat_init_rwsem(sem)					\
+ do {								\
+ 	static struct lock_class_key __key;			\
+ 								\
+-	__init_rwsem((sem), #sem, &__key);			\
++	__compat_init_rwsem((sem), #sem, &__key);		\
+ } while (0)
+ 
+-extern void __down_read(struct rw_semaphore *sem);
+-extern int __down_read_trylock(struct rw_semaphore *sem);
+-extern void __down_write(struct rw_semaphore *sem);
+-extern void __down_write_nested(struct rw_semaphore *sem, int subclass);
+-extern int __down_write_trylock(struct rw_semaphore *sem);
+-extern void __up_read(struct rw_semaphore *sem);
+-extern void __up_write(struct rw_semaphore *sem);
+-extern void __downgrade_write(struct rw_semaphore *sem);
++extern void __down_read(struct compat_rw_semaphore *sem);
++extern int __down_read_trylock(struct compat_rw_semaphore *sem);
++extern void __down_write(struct compat_rw_semaphore *sem);
++extern void __down_write_nested(struct compat_rw_semaphore *sem, int subclass);
++extern int __down_write_trylock(struct compat_rw_semaphore *sem);
++extern void __up_read(struct compat_rw_semaphore *sem);
++extern void __up_write(struct compat_rw_semaphore *sem);
++extern void __downgrade_write(struct compat_rw_semaphore *sem);
+ 
+-static inline int rwsem_is_locked(struct rw_semaphore *sem)
++static inline int compat_rwsem_is_locked(struct compat_rw_semaphore *sem)
+ {
+ 	return (sem->activity != 0);
+ }
+Index: linux-2.6-tip/include/linux/rwsem.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/rwsem.h
++++ linux-2.6-tip/include/linux/rwsem.h
+@@ -9,53 +9,68 @@
+ 
+ #include <linux/linkage.h>
+ 
++#ifdef CONFIG_PREEMPT_RT
++# include <linux/rt_lock.h>
++#endif
++
+ #include <linux/types.h>
+ #include <linux/kernel.h>
+ #include <asm/system.h>
+ #include <asm/atomic.h>
+ 
+-struct rw_semaphore;
++#ifndef CONFIG_PREEMPT_RT
++/*
++ * On !PREEMPT_RT all rw-semaphores are compat:
++ */
++#define compat_rw_semaphore rw_semaphore
++#endif
++
++struct compat_rw_semaphore;
+ 
+ #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+-#include <linux/rwsem-spinlock.h> /* use a generic implementation */
++# include <linux/rwsem-spinlock.h> /* use a generic implementation */
++#  ifndef CONFIG_PREEMPT_RT
++#  define __RWSEM_INITIALIZER __COMPAT_RWSEM_INITIALIZER
++#  define DECLARE_RWSEM COMPAT_DECLARE_RWSEM
++# endif
+ #else
+-#include <asm/rwsem.h> /* use an arch-specific implementation */
++# include <asm/rwsem.h> /* use an arch-specific implementation */
+ #endif
+ 
+ /*
+  * lock for reading
+  */
+-extern void down_read(struct rw_semaphore *sem);
++extern void compat_down_read(struct compat_rw_semaphore *sem);
+ 
+ /*
+  * trylock for reading -- returns 1 if successful, 0 if contention
+  */
+-extern int down_read_trylock(struct rw_semaphore *sem);
++extern int compat_down_read_trylock(struct compat_rw_semaphore *sem);
+ 
+ /*
+  * lock for writing
+  */
+-extern void down_write(struct rw_semaphore *sem);
++extern void compat_down_write(struct compat_rw_semaphore *sem);
+ 
+ /*
+  * trylock for writing -- returns 1 if successful, 0 if contention
+  */
+-extern int down_write_trylock(struct rw_semaphore *sem);
++extern int compat_down_write_trylock(struct compat_rw_semaphore *sem);
+ 
+ /*
+  * release a read lock
+  */
+-extern void up_read(struct rw_semaphore *sem);
++extern void compat_up_read(struct compat_rw_semaphore *sem);
+ 
+ /*
+  * release a write lock
+  */
+-extern void up_write(struct rw_semaphore *sem);
++extern void compat_up_write(struct compat_rw_semaphore *sem);
+ 
+ /*
+  * downgrade write lock to read lock
+  */
+-extern void downgrade_write(struct rw_semaphore *sem);
++extern void compat_downgrade_write(struct compat_rw_semaphore *sem);
+ 
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ /*
+@@ -71,21 +86,78 @@ extern void downgrade_write(struct rw_se
+  * lockdep_set_class() at lock initialization time.
+  * See Documentation/lockdep-design.txt for more details.)
+  */
+-extern void down_read_nested(struct rw_semaphore *sem, int subclass);
+-extern void down_write_nested(struct rw_semaphore *sem, int subclass);
++extern void
++compat_down_read_nested(struct compat_rw_semaphore *sem, int subclass);
++extern void
++compat_down_write_nested(struct compat_rw_semaphore *sem, int subclass);
+ /*
+  * Take/release a lock when not the owner will release it.
+  *
+  * [ This API should be avoided as much as possible - the
+  *   proper abstraction for this case is completions. ]
+  */
+-extern void down_read_non_owner(struct rw_semaphore *sem);
+-extern void up_read_non_owner(struct rw_semaphore *sem);
++extern void
++compat_down_read_non_owner(struct compat_rw_semaphore *sem);
++extern void
++compat_up_read_non_owner(struct compat_rw_semaphore *sem);
+ #else
+-# define down_read_nested(sem, subclass)		down_read(sem)
+-# define down_write_nested(sem, subclass)	down_write(sem)
+-# define down_read_non_owner(sem)		down_read(sem)
+-# define up_read_non_owner(sem)			up_read(sem)
++# define compat_down_read_nested(sem, subclass)		compat_down_read(sem)
++# define compat_down_write_nested(sem, subclass)	compat_down_write(sem)
++# define compat_down_read_non_owner(sem)		compat_down_read(sem)
++# define compat_up_read_non_owner(sem)			compat_up_read(sem)
+ #endif
+ 
++#ifndef CONFIG_PREEMPT_RT
++
++#define DECLARE_RWSEM COMPAT_DECLARE_RWSEM
++
++/*
++ * NOTE, lockdep: this has to be a macro, so that separate class-keys
++ * get generated by the compiler, if the same function does multiple
++ * init_rwsem() calls to different rwsems.
++ */
++#define init_rwsem(rwsem)	compat_init_rwsem(rwsem)
++
++static inline void down_read(struct compat_rw_semaphore *rwsem)
++{
++	compat_down_read(rwsem);
++}
++static inline int down_read_trylock(struct compat_rw_semaphore *rwsem)
++{
++	return compat_down_read_trylock(rwsem);
++}
++static inline void down_write(struct compat_rw_semaphore *rwsem)
++{
++	compat_down_write(rwsem);
++}
++static inline int down_write_trylock(struct compat_rw_semaphore *rwsem)
++{
++	return compat_down_write_trylock(rwsem);
++}
++static inline void up_read(struct compat_rw_semaphore *rwsem)
++{
++	compat_up_read(rwsem);
++}
++static inline void up_write(struct compat_rw_semaphore *rwsem)
++{
++	compat_up_write(rwsem);
++}
++static inline void downgrade_write(struct compat_rw_semaphore *rwsem)
++{
++	compat_downgrade_write(rwsem);
++}
++static inline int rwsem_is_locked(struct compat_rw_semaphore *sem)
++{
++	return compat_rwsem_is_locked(sem);
++}
++# define down_read_nested(sem, subclass) \
++		compat_down_read_nested(sem, subclass)
++# define down_write_nested(sem, subclass) \
++		compat_down_write_nested(sem, subclass)
++# define down_read_non_owner(sem) \
++		compat_down_read_non_owner(sem)
++# define up_read_non_owner(sem) \
++		compat_up_read_non_owner(sem)
++#endif /* !CONFIG_PREEMPT_RT */
++
+ #endif /* _LINUX_RWSEM_H */
+Index: linux-2.6-tip/include/linux/semaphore.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/semaphore.h
++++ linux-2.6-tip/include/linux/semaphore.h
+@@ -9,41 +9,88 @@
+ #ifndef __LINUX_SEMAPHORE_H
+ #define __LINUX_SEMAPHORE_H
+ 
+-#include <linux/list.h>
+-#include <linux/spinlock.h>
++#ifndef CONFIG_PREEMPT_RT
++# define compat_semaphore semaphore
++#endif
++
++# include <linux/list.h>
++# include <linux/spinlock.h>
+ 
+ /* Please don't access any members of this structure directly */
+-struct semaphore {
++struct compat_semaphore {
+ 	spinlock_t		lock;
+ 	unsigned int		count;
+ 	struct list_head	wait_list;
+ };
+ 
+-#define __SEMAPHORE_INITIALIZER(name, n)				\
++#define __COMPAT_SEMAPHORE_INITIALIZER(name, n)				\
+ {									\
+ 	.lock		= __SPIN_LOCK_UNLOCKED((name).lock),		\
+ 	.count		= n,						\
+ 	.wait_list	= LIST_HEAD_INIT((name).wait_list),		\
+ }
+ 
+-#define DECLARE_MUTEX(name)	\
+-	struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1)
++#define __COMPAT_DECLARE_SEMAPHORE_GENERIC(name, count) \
++	struct compat_semaphore name = __COMPAT_SEMAPHORE_INITIALIZER(name, count)
+ 
+-static inline void sema_init(struct semaphore *sem, int val)
++#define COMPAT_DECLARE_MUTEX(name)	__COMPAT_DECLARE_SEMAPHORE_GENERIC(name, 1)
++static inline void compat_sema_init(struct compat_semaphore *sem, int val)
+ {
+ 	static struct lock_class_key __key;
+-	*sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val);
++	*sem = (struct compat_semaphore) __COMPAT_SEMAPHORE_INITIALIZER(*sem, val);
++
++	spin_lock_init(&sem->lock);
+ 	lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0);
+ }
+ 
+-#define init_MUTEX(sem)		sema_init(sem, 1)
+-#define init_MUTEX_LOCKED(sem)	sema_init(sem, 0)
++#define compat_init_MUTEX(sem)		compat_sema_init(sem, 1)
++#define compat_init_MUTEX_LOCKED(sem)	compat_sema_init(sem, 0)
++
++extern void compat_down(struct compat_semaphore *sem);
++extern int __must_check compat_down_interruptible(struct compat_semaphore *sem);
++extern int __must_check compat_down_killable(struct compat_semaphore *sem);
++extern int __must_check compat_down_trylock(struct compat_semaphore *sem);
++extern int __must_check compat_down_timeout(struct compat_semaphore *sem, long jiffies);
++extern void compat_up(struct compat_semaphore *sem);
++
++#ifdef CONFIG_PREEMPT_RT
++# include <linux/rt_lock.h>
++#else
++#define DECLARE_MUTEX COMPAT_DECLARE_MUTEX
++
++static inline void sema_init(struct compat_semaphore *sem, int val)
++{
++	compat_sema_init(sem, val);
++}
++static inline void init_MUTEX(struct compat_semaphore *sem)
++{
++	compat_init_MUTEX(sem);
++}
++static inline void init_MUTEX_LOCKED(struct compat_semaphore *sem)
++{
++	compat_init_MUTEX_LOCKED(sem);
++}
++static inline void down(struct compat_semaphore *sem)
++{
++	compat_down(sem);
++}
++static inline int down_interruptible(struct compat_semaphore *sem)
++{
++	return compat_down_interruptible(sem);
++}
++static inline int down_trylock(struct compat_semaphore *sem)
++{
++	return compat_down_trylock(sem);
++}
++static inline int down_timeout(struct compat_semaphore *sem, long jiffies)
++{
++	return compat_down_timeout(sem, jiffies);
++}
+ 
+-extern void down(struct semaphore *sem);
+-extern int __must_check down_interruptible(struct semaphore *sem);
+-extern int __must_check down_killable(struct semaphore *sem);
+-extern int __must_check down_trylock(struct semaphore *sem);
+-extern int __must_check down_timeout(struct semaphore *sem, long jiffies);
+-extern void up(struct semaphore *sem);
++static inline void up(struct compat_semaphore *sem)
++{
++	compat_up(sem);
++}
++#endif /* CONFIG_PREEMPT_RT */
+ 
+ #endif /* __LINUX_SEMAPHORE_H */
+Index: linux-2.6-tip/include/linux/seqlock.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/seqlock.h
++++ linux-2.6-tip/include/linux/seqlock.h
+@@ -3,9 +3,11 @@
+ /*
+  * Reader/writer consistent mechanism without starving writers. This type of
+  * lock for data where the reader wants a consistent set of information
+- * and is willing to retry if the information changes.  Readers never
+- * block but they may have to retry if a writer is in
+- * progress. Writers do not wait for readers. 
++ * and is willing to retry if the information changes. Readers block
++ * on write contention (and where applicable, pi-boost the writer).
++ * Readers without contention on entry acquire the critical section
++ * without any atomic operations, but they may have to retry if a writer
++ * enters before the critical section ends. Writers do not wait for readers.
+  *
+  * This is not as cache friendly as brlock. Also, this will not work
+  * for data that contains pointers, because any writer could
+@@ -24,56 +26,110 @@
+  *
+  * Based on x86_64 vsyscall gettimeofday 
+  * by Keith Owens and Andrea Arcangeli
++ *
++ * Priority inheritance and live-lock avoidance by Gregory Haskins
+  */
+ 
++#include <linux/pickop.h>
+ #include <linux/spinlock.h>
+ #include <linux/preempt.h>
+ 
+ typedef struct {
+ 	unsigned sequence;
+-	spinlock_t lock;
+-} seqlock_t;
++	rwlock_t lock;
++} __seqlock_t;
++
++typedef struct {
++	unsigned sequence;
++	raw_spinlock_t lock;
++} __raw_seqlock_t;
++
++#define seqlock_need_resched(seq) lock_need_resched(&(seq)->lock)
++
++#ifdef CONFIG_PREEMPT_RT
++typedef __seqlock_t seqlock_t;
++#else
++typedef __raw_seqlock_t seqlock_t;
++#endif
++
++typedef __raw_seqlock_t raw_seqlock_t;
+ 
+ /*
+  * These macros triggered gcc-3.x compile-time problems.  We think these are
+  * OK now.  Be cautious.
+  */
+-#define __SEQLOCK_UNLOCKED(lockname) \
+-		 { 0, __SPIN_LOCK_UNLOCKED(lockname) }
++#define __RAW_SEQLOCK_UNLOCKED(lockname) \
++		{ 0, RAW_SPIN_LOCK_UNLOCKED(lockname) }
++
++#ifdef CONFIG_PREEMPT_RT
++# define __SEQLOCK_UNLOCKED(lockname) { 0, __RW_LOCK_UNLOCKED(lockname) }
++#else
++# define __SEQLOCK_UNLOCKED(lockname) __RAW_SEQLOCK_UNLOCKED(lockname)
++#endif
+ 
+ #define SEQLOCK_UNLOCKED \
+ 		 __SEQLOCK_UNLOCKED(old_style_seqlock_init)
+ 
+-#define seqlock_init(x)					\
+-	do {						\
+-		(x)->sequence = 0;			\
+-		spin_lock_init(&(x)->lock);		\
+-	} while (0)
++static inline void __raw_seqlock_init(raw_seqlock_t *seqlock)
++{
++	*seqlock = (raw_seqlock_t) __RAW_SEQLOCK_UNLOCKED(x);
++	spin_lock_init(&seqlock->lock);
++}
++
++#ifdef CONFIG_PREEMPT_RT
++static inline void __seqlock_init(seqlock_t *seqlock)
++{
++	*seqlock = (seqlock_t) __SEQLOCK_UNLOCKED(seqlock);
++	rwlock_init(&seqlock->lock);
++}
++#else
++extern void __seqlock_init(seqlock_t *seqlock);
++#endif
++
++#define seqlock_init(seq)						\
++	PICK_FUNCTION(raw_seqlock_t *, seqlock_t *,			\
++			  __raw_seqlock_init, __seqlock_init, seq);
+ 
+ #define DEFINE_SEQLOCK(x) \
+ 		seqlock_t x = __SEQLOCK_UNLOCKED(x)
+ 
++#define DEFINE_RAW_SEQLOCK(name) \
++	raw_seqlock_t name __cacheline_aligned_in_smp = \
++					__RAW_SEQLOCK_UNLOCKED(name)
++
++
+ /* Lock out other writers and update the count.
+  * Acts like a normal spin_lock/unlock.
+  * Don't need preempt_disable() because that is in the spin_lock already.
+  */
+-static inline void write_seqlock(seqlock_t *sl)
++static inline void __write_seqlock(seqlock_t *sl)
+ {
+-	spin_lock(&sl->lock);
++	write_lock(&sl->lock);
+ 	++sl->sequence;
+ 	smp_wmb();
+ }
+ 
+-static inline void write_sequnlock(seqlock_t *sl)
++static __always_inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
++{
++	unsigned long flags;
++
++	local_save_flags(flags);
++	__write_seqlock(sl);
++	return flags;
++}
++
++static inline void __write_sequnlock(seqlock_t *sl)
+ {
+ 	smp_wmb();
+ 	sl->sequence++;
+-	spin_unlock(&sl->lock);
++	write_unlock(&sl->lock);
+ }
+ 
+-static inline int write_tryseqlock(seqlock_t *sl)
++#define __write_sequnlock_irqrestore(sl, flags)	__write_sequnlock(sl)
++
++static inline int __write_tryseqlock(seqlock_t *sl)
+ {
+-	int ret = spin_trylock(&sl->lock);
++	int ret = write_trylock(&sl->lock);
+ 
+ 	if (ret) {
+ 		++sl->sequence;
+@@ -83,18 +139,25 @@ static inline int write_tryseqlock(seqlo
+ }
+ 
+ /* Start of read calculation -- fetch last complete writer token */
+-static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
++static __always_inline unsigned __read_seqbegin(seqlock_t *sl)
+ {
+ 	unsigned ret;
+ 
+-repeat:
+ 	ret = sl->sequence;
+ 	smp_rmb();
+ 	if (unlikely(ret & 1)) {
+-		cpu_relax();
+-		goto repeat;
++		/*
++		 * Serialze with the writer which will ensure they are
++		 * pi-boosted if necessary and prevent us from starving
++		 * them.
++		 */
++		read_lock(&sl->lock);
++		ret = sl->sequence;
++		read_unlock(&sl->lock);
+ 	}
+ 
++	BUG_ON(ret & 1);
++
+ 	return ret;
+ }
+ 
+@@ -103,13 +166,192 @@ repeat:
+  *
+  * If sequence value changed then writer changed data while in section.
+  */
+-static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
++static inline int __read_seqretry(seqlock_t *sl, unsigned iv)
++{
++	smp_rmb();
++	return (sl->sequence != iv);
++}
++
++static __always_inline void __write_seqlock_raw(raw_seqlock_t *sl)
++{
++	spin_lock(&sl->lock);
++	++sl->sequence;
++	smp_wmb();
++}
++
++static __always_inline unsigned long
++__write_seqlock_irqsave_raw(raw_seqlock_t *sl)
++{
++	unsigned long flags;
++
++	local_irq_save(flags);
++	__write_seqlock_raw(sl);
++	return flags;
++}
++
++static __always_inline void __write_seqlock_irq_raw(raw_seqlock_t *sl)
++{
++	local_irq_disable();
++	__write_seqlock_raw(sl);
++}
++
++static __always_inline void __write_seqlock_bh_raw(raw_seqlock_t *sl)
++{
++	local_bh_disable();
++	__write_seqlock_raw(sl);
++}
++
++static __always_inline void __write_sequnlock_raw(raw_seqlock_t *sl)
++{
++	smp_wmb();
++	sl->sequence++;
++	spin_unlock(&sl->lock);
++}
++
++static __always_inline void
++__write_sequnlock_irqrestore_raw(raw_seqlock_t *sl, unsigned long flags)
++{
++	__write_sequnlock_raw(sl);
++	local_irq_restore(flags);
++	preempt_check_resched();
++}
++
++static __always_inline void __write_sequnlock_irq_raw(raw_seqlock_t *sl)
++{
++	__write_sequnlock_raw(sl);
++	local_irq_enable();
++	preempt_check_resched();
++}
++
++static __always_inline void __write_sequnlock_bh_raw(raw_seqlock_t *sl)
++{
++	__write_sequnlock_raw(sl);
++	local_bh_enable();
++}
++
++static __always_inline int __write_tryseqlock_raw(raw_seqlock_t *sl)
++{
++	int ret = spin_trylock(&sl->lock);
++
++	if (ret) {
++		++sl->sequence;
++		smp_wmb();
++	}
++	return ret;
++}
++
++static __always_inline unsigned __read_seqbegin_raw(const raw_seqlock_t *sl)
++{
++	unsigned ret;
++
++repeat:
++	ret = sl->sequence;
++	smp_rmb();
++	if (unlikely(ret & 1)) {
++		cpu_relax();
++		goto repeat;
++	}
++
++	return ret;
++}
++
++static __always_inline int __read_seqretry_raw(const raw_seqlock_t *sl, unsigned start)
+ {
+ 	smp_rmb();
+ 
+ 	return (sl->sequence != start);
+ }
+ 
++extern int __bad_seqlock_type(void);
++
++/*
++ * PICK_SEQ_OP() is a small redirector to allow less typing of the lock
++ * types raw_seqlock_t, seqlock_t, at the front of the PICK_FUNCTION
++ * macro.
++ */
++#define PICK_SEQ_OP(...) 	\
++	PICK_FUNCTION(raw_seqlock_t *, seqlock_t *, ##__VA_ARGS__)
++#define PICK_SEQ_OP_RET(...) \
++	PICK_FUNCTION_RET(raw_seqlock_t *, seqlock_t *, ##__VA_ARGS__)
++
++#define write_seqlock(sl) PICK_SEQ_OP(__write_seqlock_raw, __write_seqlock, sl)
++
++#define write_sequnlock(sl)	\
++	PICK_SEQ_OP(__write_sequnlock_raw, __write_sequnlock, sl)
++
++#define write_tryseqlock(sl)	\
++	PICK_SEQ_OP_RET(__write_tryseqlock_raw, __write_tryseqlock, sl)
++
++#define read_seqbegin(sl) 	\
++	PICK_SEQ_OP_RET(__read_seqbegin_raw, __read_seqbegin, sl)
++
++#define read_seqretry(sl, iv)	\
++	PICK_SEQ_OP_RET(__read_seqretry_raw, __read_seqretry, sl, iv)
++
++#define write_seqlock_irqsave(lock, flags)			\
++do {								\
++	flags = PICK_SEQ_OP_RET(__write_seqlock_irqsave_raw,	\
++		__write_seqlock_irqsave, lock);			\
++} while (0)
++
++#define write_seqlock_irq(lock)	\
++	PICK_SEQ_OP(__write_seqlock_irq_raw, __write_seqlock, lock)
++
++#define write_seqlock_bh(lock)	\
++	PICK_SEQ_OP(__write_seqlock_bh_raw, __write_seqlock, lock)
++
++#define write_sequnlock_irqrestore(lock, flags)		\
++	PICK_SEQ_OP(__write_sequnlock_irqrestore_raw,	\
++		__write_sequnlock_irqrestore, lock, flags)
++
++#define write_sequnlock_bh(lock)	\
++	PICK_SEQ_OP(__write_sequnlock_bh_raw, __write_sequnlock, lock)
++
++#define write_sequnlock_irq(lock)	\
++	PICK_SEQ_OP(__write_sequnlock_irq_raw, __write_sequnlock, lock)
++
++static __always_inline
++unsigned long __seq_irqsave_raw(raw_seqlock_t *sl)
++{
++	unsigned long flags;
++
++	local_irq_save(flags);
++	return flags;
++}
++
++static __always_inline unsigned long __seq_irqsave(seqlock_t *sl)
++{
++	unsigned long flags;
++
++	local_save_flags(flags);
++	return flags;
++}
++
++#define read_seqbegin_irqsave(lock, flags)				\
++({									\
++	flags = PICK_SEQ_OP_RET(__seq_irqsave_raw, __seq_irqsave, lock);\
++	read_seqbegin(lock);						\
++})
++
++static __always_inline int
++__read_seqretry_irqrestore(seqlock_t *sl, unsigned iv, unsigned long flags)
++{
++	return __read_seqretry(sl, iv);
++}
++
++static __always_inline int
++__read_seqretry_irqrestore_raw(raw_seqlock_t *sl, unsigned iv,
++			       unsigned long flags)
++{
++	int ret = read_seqretry(sl, iv);
++	local_irq_restore(flags);
++	preempt_check_resched();
++	return ret;
++}
++
++#define read_seqretry_irqrestore(lock, iv, flags)			\
++	PICK_SEQ_OP_RET(__read_seqretry_irqrestore_raw, 		\
++		__read_seqretry_irqrestore, lock, iv, flags)
+ 
+ /*
+  * Version using sequence counter only.
+@@ -166,32 +408,4 @@ static inline void write_seqcount_end(se
+ 	smp_wmb();
+ 	s->sequence++;
+ }
+-
+-/*
+- * Possible sw/hw IRQ protected versions of the interfaces.
+- */
+-#define write_seqlock_irqsave(lock, flags)				\
+-	do { local_irq_save(flags); write_seqlock(lock); } while (0)
+-#define write_seqlock_irq(lock)						\
+-	do { local_irq_disable();   write_seqlock(lock); } while (0)
+-#define write_seqlock_bh(lock)						\
+-        do { local_bh_disable();    write_seqlock(lock); } while (0)
+-
+-#define write_sequnlock_irqrestore(lock, flags)				\
+-	do { write_sequnlock(lock); local_irq_restore(flags); } while(0)
+-#define write_sequnlock_irq(lock)					\
+-	do { write_sequnlock(lock); local_irq_enable(); } while(0)
+-#define write_sequnlock_bh(lock)					\
+-	do { write_sequnlock(lock); local_bh_enable(); } while(0)
+-
+-#define read_seqbegin_irqsave(lock, flags)				\
+-	({ local_irq_save(flags);   read_seqbegin(lock); })
+-
+-#define read_seqretry_irqrestore(lock, iv, flags)			\
+-	({								\
+-		int ret = read_seqretry(lock, iv);			\
+-		local_irq_restore(flags);				\
+-		ret;							\
+-	})
+-
+ #endif /* __LINUX_SEQLOCK_H */
+Index: linux-2.6-tip/include/linux/spinlock.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/spinlock.h
++++ linux-2.6-tip/include/linux/spinlock.h
+@@ -44,6 +44,42 @@
+  *                        builds the _spin_*() APIs.
+  *
+  *  linux/spinlock.h:     builds the final spin_*() APIs.
++ *
++ *
++ * Public types and naming conventions:
++ * ------------------------------------
++ * spinlock_t:				type:  sleep-lock
++ * raw_spinlock_t:			type:  spin-lock (debug)
++ *
++ * spin_lock([raw_]spinlock_t):		API:   acquire lock, both types
++ *
++ *
++ * Internal types and naming conventions:
++ * -------------------------------------
++ * __raw_spinlock_t:			type: lowlevel spin-lock
++ *
++ * _spin_lock(struct rt_mutex):		API:  acquire sleep-lock
++ * __spin_lock(raw_spinlock_t):		API:  acquire spin-lock (highlevel)
++ * _raw_spin_lock(raw_spinlock_t):	API:  acquire spin-lock (debug)
++ * __raw_spin_lock(__raw_spinlock_t):	API:  acquire spin-lock (lowlevel)
++ *
++ *
++ * spin_lock(raw_spinlock_t) translates into the following chain of
++ * calls/inlines/macros, if spin-lock debugging is enabled:
++ *
++ *       spin_lock()			[include/linux/spinlock.h]
++ * ->    __spin_lock()			[kernel/spinlock.c]
++ *  ->   _raw_spin_lock()		[lib/spinlock_debug.c]
++ *   ->  __raw_spin_lock()		[include/asm/spinlock.h]
++ *
++ * spin_lock(spinlock_t) translates into the following chain of
++ * calls/inlines/macros:
++ *
++ *       spin_lock()			[include/linux/spinlock.h]
++ * ->    _spin_lock()			[include/linux/spinlock.h]
++ *  ->   rt_spin_lock()			[kernel/rtmutex.c]
++ *   ->  rt_spin_lock_fastlock()	[kernel/rtmutex.c]
++ *    -> rt_spin_lock_slowlock()	[kernel/rtmutex.c]
+  */
+ 
+ #include <linux/typecheck.h>
+@@ -52,29 +88,15 @@
+ #include <linux/compiler.h>
+ #include <linux/thread_info.h>
+ #include <linux/kernel.h>
++#include <linux/cache.h>
+ #include <linux/stringify.h>
+ #include <linux/bottom_half.h>
++#include <linux/irqflags.h>
++#include <linux/pickop.h>
+ 
+ #include <asm/system.h>
+ 
+ /*
+- * Must define these before including other files, inline functions need them
+- */
+-#define LOCK_SECTION_NAME ".text.lock."KBUILD_BASENAME
+-
+-#define LOCK_SECTION_START(extra)               \
+-        ".subsection 1\n\t"                     \
+-        extra                                   \
+-        ".ifndef " LOCK_SECTION_NAME "\n\t"     \
+-        LOCK_SECTION_NAME ":\n\t"               \
+-        ".endif\n"
+-
+-#define LOCK_SECTION_END                        \
+-        ".previous\n\t"
+-
+-#define __lockfunc __attribute__((section(".spinlock.text")))
+-
+-/*
+  * Pull the raw_spinlock_t and raw_rwlock_t definitions:
+  */
+ #include <linux/spinlock_types.h>
+@@ -90,36 +112,10 @@ extern int __lockfunc generic__raw_read_
+ # include <linux/spinlock_up.h>
+ #endif
+ 
+-#ifdef CONFIG_DEBUG_SPINLOCK
+-  extern void __spin_lock_init(spinlock_t *lock, const char *name,
+-			       struct lock_class_key *key);
+-# define spin_lock_init(lock)					\
+-do {								\
+-	static struct lock_class_key __key;			\
+-								\
+-	__spin_lock_init((lock), #lock, &__key);		\
+-} while (0)
+-
+-#else
+-# define spin_lock_init(lock)					\
+-	do { *(lock) = SPIN_LOCK_UNLOCKED; } while (0)
+-#endif
+-
+-#ifdef CONFIG_DEBUG_SPINLOCK
+-  extern void __rwlock_init(rwlock_t *lock, const char *name,
+-			    struct lock_class_key *key);
+-# define rwlock_init(lock)					\
+-do {								\
+-	static struct lock_class_key __key;			\
+-								\
+-	__rwlock_init((lock), #lock, &__key);			\
+-} while (0)
+-#else
+-# define rwlock_init(lock)					\
+-	do { *(lock) = RW_LOCK_UNLOCKED; } while (0)
+-#endif
+-
+-#define spin_is_locked(lock)	__raw_spin_is_locked(&(lock)->raw_lock)
++/*
++ * Pull the RT types:
++ */
++#include <linux/rt_lock.h>
+ 
+ #ifdef CONFIG_GENERIC_LOCKBREAK
+ #define spin_is_contended(lock) ((lock)->break_lock)
+@@ -132,12 +128,6 @@ do {								\
+ #endif /*__raw_spin_is_contended*/
+ #endif
+ 
+-/**
+- * spin_unlock_wait - wait until the spinlock gets unlocked
+- * @lock: the spinlock in question.
+- */
+-#define spin_unlock_wait(lock)	__raw_spin_unlock_wait(&(lock)->raw_lock)
+-
+ /*
+  * Pull the _spin_*()/_read_*()/_write_*() functions/declarations:
+  */
+@@ -148,16 +138,16 @@ do {								\
+ #endif
+ 
+ #ifdef CONFIG_DEBUG_SPINLOCK
+- extern void _raw_spin_lock(spinlock_t *lock);
+-#define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
+- extern int _raw_spin_trylock(spinlock_t *lock);
+- extern void _raw_spin_unlock(spinlock_t *lock);
+- extern void _raw_read_lock(rwlock_t *lock);
+- extern int _raw_read_trylock(rwlock_t *lock);
+- extern void _raw_read_unlock(rwlock_t *lock);
+- extern void _raw_write_lock(rwlock_t *lock);
+- extern int _raw_write_trylock(rwlock_t *lock);
+- extern void _raw_write_unlock(rwlock_t *lock);
++ extern __lockfunc void _raw_spin_lock(raw_spinlock_t *lock);
++# define _raw_spin_lock_flags(lock, flags) _raw_spin_lock(lock)
++ extern __lockfunc int _raw_spin_trylock(raw_spinlock_t *lock);
++ extern __lockfunc void _raw_spin_unlock(raw_spinlock_t *lock);
++ extern __lockfunc void _raw_read_lock(raw_rwlock_t *lock);
++ extern __lockfunc int _raw_read_trylock(raw_rwlock_t *lock);
++ extern __lockfunc void _raw_read_unlock(raw_rwlock_t *lock);
++ extern __lockfunc void _raw_write_lock(raw_rwlock_t *lock);
++ extern __lockfunc int _raw_write_trylock(raw_rwlock_t *lock);
++ extern __lockfunc void _raw_write_unlock(raw_rwlock_t *lock);
+ #else
+ # define _raw_spin_lock(lock)		__raw_spin_lock(&(lock)->raw_lock)
+ # define _raw_spin_lock_flags(lock, flags) \
+@@ -172,179 +162,440 @@ do {								\
+ # define _raw_write_unlock(rwlock)	__raw_write_unlock(&(rwlock)->raw_lock)
+ #endif
+ 
+-#define read_can_lock(rwlock)		__raw_read_can_lock(&(rwlock)->raw_lock)
+-#define write_can_lock(rwlock)		__raw_write_can_lock(&(rwlock)->raw_lock)
++extern int __bad_spinlock_type(void);
++extern int __bad_rwlock_type(void);
++
++extern void
++__rt_spin_lock_init(spinlock_t *lock, char *name, struct lock_class_key *key);
++
++extern void __lockfunc rt_spin_lock(spinlock_t *lock);
++extern void __lockfunc rt_spin_lock_nested(spinlock_t *lock, int subclass);
++extern void __lockfunc rt_spin_unlock(spinlock_t *lock);
++extern void __lockfunc rt_spin_unlock_wait(spinlock_t *lock);
++extern int __lockfunc
++rt_spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags);
++extern int __lockfunc rt_spin_trylock(spinlock_t *lock);
++extern int _atomic_dec_and_spin_lock(spinlock_t *lock, atomic_t *atomic);
++
++/*
++ * lockdep-less calls, for derived types like rwlock:
++ * (for trylock they can use rt_mutex_trylock() directly.
++ */
++extern void __lockfunc __rt_spin_lock(struct rt_mutex *lock);
++extern void __lockfunc __rt_spin_unlock(struct rt_mutex *lock);
++
++#ifdef CONFIG_PREEMPT_RT
++# define _spin_lock(l)			rt_spin_lock(l)
++# define _spin_lock_nested(l, s)	rt_spin_lock_nested(l, s)
++# define _spin_lock_bh(l)		rt_spin_lock(l)
++# define _spin_lock_irq(l)		rt_spin_lock(l)
++# define _spin_unlock(l)		rt_spin_unlock(l)
++# define _spin_unlock_no_resched(l)	rt_spin_unlock(l)
++# define _spin_unlock_bh(l)		rt_spin_unlock(l)
++# define _spin_unlock_irq(l)		rt_spin_unlock(l)
++# define _spin_unlock_irqrestore(l, f)	rt_spin_unlock(l)
++static inline unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
++{
++	rt_spin_lock(lock);
++	return 0;
++}
++static inline unsigned long __lockfunc
++_spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
++{
++	rt_spin_lock_nested(lock, subclass);
++	return 0;
++}
++#else
++static inline unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
++{
++	return 0;
++}
++static inline unsigned long __lockfunc
++_spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
++{
++	return 0;
++}
++# define _spin_lock(l)			do { } while (0)
++# define _spin_lock_nested(l, s)	do { } while (0)
++# define _spin_lock_bh(l)		do { } while (0)
++# define _spin_lock_irq(l)		do { } while (0)
++# define _spin_unlock(l)		do { } while (0)
++# define _spin_unlock_no_resched(l)	do { } while (0)
++# define _spin_unlock_bh(l)		do { } while (0)
++# define _spin_unlock_irq(l)		do { } while (0)
++# define _spin_unlock_irqrestore(l, f)	do { } while (0)
++#endif
++
++#define _spin_lock_init(sl, n, f, l) \
++do {							\
++	static struct lock_class_key __key;		\
++							\
++	__rt_spin_lock_init(sl, n, &__key);		\
++} while (0)
++
++# ifdef CONFIG_PREEMPT_RT
++#  define _spin_can_lock(l)		(!rt_mutex_is_locked(&(l)->lock))
++#  define _spin_is_locked(l)		rt_mutex_is_locked(&(l)->lock)
++#  define _spin_unlock_wait(l)		rt_spin_unlock_wait(l)
++
++#  define _spin_trylock(l)		rt_spin_trylock(l)
++#  define _spin_trylock_bh(l)		rt_spin_trylock(l)
++#  define _spin_trylock_irq(l)		rt_spin_trylock(l)
++#  define _spin_trylock_irqsave(l,f)	rt_spin_trylock_irqsave(l, f)
++# else
++
++   extern int this_should_never_be_called_on_non_rt(spinlock_t *lock);
++#  define TSNBCONRT(l) this_should_never_be_called_on_non_rt(l)
++#  define _spin_can_lock(l)		TSNBCONRT(l)
++#  define _spin_is_locked(l)		TSNBCONRT(l)
++#  define _spin_unlock_wait(l)		TSNBCONRT(l)
++
++#  define _spin_trylock(l)		TSNBCONRT(l)
++#  define _spin_trylock_bh(l)		TSNBCONRT(l)
++#  define _spin_trylock_irq(l)		TSNBCONRT(l)
++#  define _spin_trylock_irqsave(l,f)	TSNBCONRT(l)
++#endif
++
++extern void __lockfunc rt_write_lock(rwlock_t *rwlock);
++extern void __lockfunc rt_read_lock(rwlock_t *rwlock);
++extern int __lockfunc rt_write_trylock(rwlock_t *rwlock);
++extern int __lockfunc rt_write_trylock_irqsave(rwlock_t *trylock,
++					       unsigned long *flags);
++extern int __lockfunc rt_read_trylock(rwlock_t *rwlock);
++extern void __lockfunc rt_write_unlock(rwlock_t *rwlock);
++extern void __lockfunc rt_read_unlock(rwlock_t *rwlock);
++extern unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock);
++extern unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock);
++extern void
++__rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key);
++
++#define _rwlock_init(rwl, n, f, l)			\
++do {							\
++	static struct lock_class_key __key;		\
++							\
++	__rt_rwlock_init(rwl, n, &__key);		\
++} while (0)
++
++#ifdef CONFIG_PREEMPT_RT
++# define rt_read_can_lock(rwl)	(!rt_mutex_is_locked(&(rwl)->lock))
++# define rt_write_can_lock(rwl)	(!rt_mutex_is_locked(&(rwl)->lock))
++#else
++ extern int rt_rwlock_can_lock_never_call_on_non_rt(rwlock_t *rwlock);
++# define rt_read_can_lock(rwl)	rt_rwlock_can_lock_never_call_on_non_rt(rwl)
++# define rt_write_can_lock(rwl)	rt_rwlock_can_lock_never_call_on_non_rt(rwl)
++#endif
++
++# define _read_can_lock(rwl)	rt_read_can_lock(rwl)
++# define _write_can_lock(rwl)	rt_write_can_lock(rwl)
++
++# define _read_trylock(rwl)	rt_read_trylock(rwl)
++# define _write_trylock(rwl)	rt_write_trylock(rwl)
++# define _write_trylock_irqsave(rwl, flags) \
++	rt_write_trylock_irqsave(rwl, flags)
++
++# define _read_lock(rwl)	rt_read_lock(rwl)
++# define _write_lock(rwl)	rt_write_lock(rwl)
++# define _read_unlock(rwl)	rt_read_unlock(rwl)
++# define _write_unlock(rwl)	rt_write_unlock(rwl)
++
++# define _read_lock_bh(rwl)	rt_read_lock(rwl)
++# define _write_lock_bh(rwl)	rt_write_lock(rwl)
++# define _read_unlock_bh(rwl)	rt_read_unlock(rwl)
++# define _write_unlock_bh(rwl)	rt_write_unlock(rwl)
++
++# define _read_lock_irq(rwl)	rt_read_lock(rwl)
++# define _write_lock_irq(rwl)	rt_write_lock(rwl)
++# define _read_unlock_irq(rwl)	rt_read_unlock(rwl)
++# define _write_unlock_irq(rwl)	rt_write_unlock(rwl)
++
++# define _read_lock_irqsave(rwl) 	rt_read_lock_irqsave(rwl)
++# define _write_lock_irqsave(rwl)	rt_write_lock_irqsave(rwl)
++
++# define _read_unlock_irqrestore(rwl, f)	rt_read_unlock(rwl)
++# define _write_unlock_irqrestore(rwl, f)	rt_write_unlock(rwl)
++
++#ifdef CONFIG_DEBUG_SPINLOCK
++  extern void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
++				   struct lock_class_key *key);
++# define _raw_spin_lock_init(lock, name, file, line)		\
++do {								\
++	static struct lock_class_key __key;			\
++								\
++	__raw_spin_lock_init((lock), #lock, &__key);		\
++} while (0)
++
++#else
++#define __raw_spin_lock_init(lock) \
++	do { *(lock) = RAW_SPIN_LOCK_UNLOCKED(lock); } while (0)
++# define _raw_spin_lock_init(lock, name, file, line) __raw_spin_lock_init(lock)
++#endif
++
++/*
++ * PICK_SPIN_OP()/PICK_RW_OP() are simple redirectors for PICK_FUNCTION
++ */
++#define PICK_SPIN_OP(...)	\
++	PICK_FUNCTION(raw_spinlock_t *, spinlock_t *, ##__VA_ARGS__)
++#define PICK_SPIN_OP_RET(...)	\
++	PICK_FUNCTION_RET(raw_spinlock_t *, spinlock_t *, ##__VA_ARGS__)
++#define PICK_RW_OP(...)	PICK_FUNCTION(raw_rwlock_t *, rwlock_t *, ##__VA_ARGS__)
++#define PICK_RW_OP_RET(...)	\
++	PICK_FUNCTION_RET(raw_rwlock_t *, rwlock_t *, ##__VA_ARGS__)
++
++#define spin_lock_init(lock) \
++	PICK_SPIN_OP(_raw_spin_lock_init, _spin_lock_init, lock, #lock,	\
++		__FILE__, __LINE__)
++
++#ifdef CONFIG_DEBUG_SPINLOCK
++  extern void __raw_rwlock_init(raw_rwlock_t *lock, const char *name,
++				struct lock_class_key *key);
++# define _raw_rwlock_init(lock, name, file, line)		\
++do {								\
++	static struct lock_class_key __key;			\
++								\
++	__raw_rwlock_init((lock), #lock, &__key);		\
++} while (0)
++#else
++#define __raw_rwlock_init(lock) \
++	do { *(lock) = RAW_RW_LOCK_UNLOCKED(lock); } while (0)
++# define _raw_rwlock_init(lock, name, file, line) __raw_rwlock_init(lock)
++#endif
++
++#define rwlock_init(lock) \
++	PICK_RW_OP(_raw_rwlock_init, _rwlock_init, lock, #lock,	\
++		__FILE__, __LINE__)
++
++#define __spin_is_locked(lock)	__raw_spin_is_locked(&(lock)->raw_lock)
++
++#define spin_is_locked(lock)	\
++	PICK_SPIN_OP_RET(__spin_is_locked, _spin_is_locked, lock)
++
++#define __spin_unlock_wait(lock) __raw_spin_unlock_wait(&(lock)->raw_lock)
++
++#define spin_unlock_wait(lock) \
++	PICK_SPIN_OP(__spin_unlock_wait, _spin_unlock_wait, lock)
+ 
+ /*
+  * Define the various spin_lock and rw_lock methods.  Note we define these
+  * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various
+  * methods are defined as nops in the case they are not required.
+  */
+-#define spin_trylock(lock)		__cond_lock(lock, _spin_trylock(lock))
+-#define read_trylock(lock)		__cond_lock(lock, _read_trylock(lock))
+-#define write_trylock(lock)		__cond_lock(lock, _write_trylock(lock))
++#define spin_trylock(lock)	\
++	__cond_lock(lock, PICK_SPIN_OP_RET(__spin_trylock, _spin_trylock, lock))
++
++#define read_trylock(lock)	\
++	__cond_lock(lock, PICK_RW_OP_RET(__read_trylock, _read_trylock, lock))
+ 
+-#define spin_lock(lock)			_spin_lock(lock)
++#define write_trylock(lock)	\
++	__cond_lock(lock, PICK_RW_OP_RET(__write_trylock, _write_trylock, lock))
++
++#define write_trylock_irqsave(lock, flags) \
++	__cond_lock(lock, PICK_RW_OP_RET(__write_trylock_irqsave, 	\
++		_write_trylock_irqsave, lock, &flags))
++
++#define __spin_can_lock(lock)	__raw_spin_can_lock(&(lock)->raw_lock)
++#define __read_can_lock(lock)	__raw_read_can_lock(&(lock)->raw_lock)
++#define __write_can_lock(lock)	__raw_write_can_lock(&(lock)->raw_lock)
++
++#define read_can_lock(lock) \
++	__cond_lock(lock, PICK_RW_OP_RET(__read_can_lock, _read_can_lock, lock))
++
++#define write_can_lock(lock) \
++	__cond_lock(lock, PICK_RW_OP_RET(__write_can_lock, _write_can_lock,\
++		lock))
++
++#define spin_lock(lock) PICK_SPIN_OP(__spin_lock, _spin_lock, lock)
+ 
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+-# define spin_lock_nested(lock, subclass) _spin_lock_nested(lock, subclass)
+-# define spin_lock_nest_lock(lock, nest_lock)				\
+-	 do {								\
+-		 typecheck(struct lockdep_map *, &(nest_lock)->dep_map);\
+-		 _spin_lock_nest_lock(lock, &(nest_lock)->dep_map);	\
+-	 } while (0)
++# define spin_lock_nested(lock, subclass)	\
++	PICK_SPIN_OP(__spin_lock_nested, _spin_lock_nested, lock, subclass)
+ #else
+-# define spin_lock_nested(lock, subclass) _spin_lock(lock)
+-# define spin_lock_nest_lock(lock, nest_lock) _spin_lock(lock)
++# define spin_lock_nested(lock, subclass) spin_lock(lock)
+ #endif
+ 
+-#define write_lock(lock)		_write_lock(lock)
+-#define read_lock(lock)			_read_lock(lock)
++#define write_lock(lock) PICK_RW_OP(__write_lock, _write_lock, lock)
+ 
+-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
++#define read_lock(lock)	PICK_RW_OP(__read_lock, _read_lock, lock)
+ 
+-#define spin_lock_irqsave(lock, flags)			\
+-	do {						\
+-		typecheck(unsigned long, flags);	\
+-		flags = _spin_lock_irqsave(lock);	\
+-	} while (0)
+-#define read_lock_irqsave(lock, flags)			\
+-	do {						\
+-		typecheck(unsigned long, flags);	\
+-		flags = _read_lock_irqsave(lock);	\
+-	} while (0)
+-#define write_lock_irqsave(lock, flags)			\
+-	do {						\
+-		typecheck(unsigned long, flags);	\
+-		flags = _write_lock_irqsave(lock);	\
+-	} while (0)
++# define spin_lock_irqsave(lock, flags)				\
++do {								\
++	BUILD_CHECK_IRQ_FLAGS(flags);				\
++	flags = PICK_SPIN_OP_RET(__spin_lock_irqsave, _spin_lock_irqsave, \
++			lock);						\
++} while (0)
+ 
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+-#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+-	do {								\
+-		typecheck(unsigned long, flags);			\
+-		flags = _spin_lock_irqsave_nested(lock, subclass);	\
+-	} while (0)
+-#else
+-#define spin_lock_irqsave_nested(lock, flags, subclass)			\
+-	do {								\
+-		typecheck(unsigned long, flags);			\
+-		flags = _spin_lock_irqsave(lock);			\
+-	} while (0)
+-#endif
+-
+-#else
+-
+-#define spin_lock_irqsave(lock, flags)			\
+-	do {						\
+-		typecheck(unsigned long, flags);	\
+-		_spin_lock_irqsave(lock, flags);	\
+-	} while (0)
+-#define read_lock_irqsave(lock, flags)			\
+-	do {						\
+-		typecheck(unsigned long, flags);	\
+-		_read_lock_irqsave(lock, flags);	\
+-	} while (0)
+-#define write_lock_irqsave(lock, flags)			\
+-	do {						\
+-		typecheck(unsigned long, flags);	\
+-		_write_lock_irqsave(lock, flags);	\
+-	} while (0)
+-#define spin_lock_irqsave_nested(lock, flags, subclass)	\
+-	spin_lock_irqsave(lock, flags)
+-
+-#endif
+-
+-#define spin_lock_irq(lock)		_spin_lock_irq(lock)
+-#define spin_lock_bh(lock)		_spin_lock_bh(lock)
+-
+-#define read_lock_irq(lock)		_read_lock_irq(lock)
+-#define read_lock_bh(lock)		_read_lock_bh(lock)
+-
+-#define write_lock_irq(lock)		_write_lock_irq(lock)
+-#define write_lock_bh(lock)		_write_lock_bh(lock)
+-
+-/*
+- * We inline the unlock functions in the nondebug case:
+- */
+-#if defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT) || \
+-	!defined(CONFIG_SMP)
+-# define spin_unlock(lock)		_spin_unlock(lock)
+-# define read_unlock(lock)		_read_unlock(lock)
+-# define write_unlock(lock)		_write_unlock(lock)
+-# define spin_unlock_irq(lock)		_spin_unlock_irq(lock)
+-# define read_unlock_irq(lock)		_read_unlock_irq(lock)
+-# define write_unlock_irq(lock)		_write_unlock_irq(lock)
+-#else
+-# define spin_unlock(lock) \
+-    do {__raw_spin_unlock(&(lock)->raw_lock); __release(lock); } while (0)
+-# define read_unlock(lock) \
+-    do {__raw_read_unlock(&(lock)->raw_lock); __release(lock); } while (0)
+-# define write_unlock(lock) \
+-    do {__raw_write_unlock(&(lock)->raw_lock); __release(lock); } while (0)
+-# define spin_unlock_irq(lock)			\
+-do {						\
+-	__raw_spin_unlock(&(lock)->raw_lock);	\
+-	__release(lock);			\
+-	local_irq_enable();			\
+-} while (0)
+-# define read_unlock_irq(lock)			\
+-do {						\
+-	__raw_read_unlock(&(lock)->raw_lock);	\
+-	__release(lock);			\
+-	local_irq_enable();			\
+-} while (0)
+-# define write_unlock_irq(lock)			\
+-do {						\
+-	__raw_write_unlock(&(lock)->raw_lock);	\
+-	__release(lock);			\
+-	local_irq_enable();			\
+-} while (0)
+-#endif
+-
+-#define spin_unlock_irqrestore(lock, flags)		\
+-	do {						\
+-		typecheck(unsigned long, flags);	\
+-		_spin_unlock_irqrestore(lock, flags);	\
+-	} while (0)
+-#define spin_unlock_bh(lock)		_spin_unlock_bh(lock)
+-
+-#define read_unlock_irqrestore(lock, flags)		\
+-	do {						\
+-		typecheck(unsigned long, flags);	\
+-		_read_unlock_irqrestore(lock, flags);	\
+-	} while (0)
+-#define read_unlock_bh(lock)		_read_unlock_bh(lock)
+-
+-#define write_unlock_irqrestore(lock, flags)		\
+-	do {						\
+-		typecheck(unsigned long, flags);	\
+-		_write_unlock_irqrestore(lock, flags);	\
+-	} while (0)
+-#define write_unlock_bh(lock)		_write_unlock_bh(lock)
+-
+-#define spin_trylock_bh(lock)	__cond_lock(lock, _spin_trylock_bh(lock))
+-
+-#define spin_trylock_irq(lock) \
+-({ \
+-	local_irq_disable(); \
+-	spin_trylock(lock) ? \
+-	1 : ({ local_irq_enable(); 0;  }); \
+-})
++# define spin_lock_irqsave_nested(lock, flags, subclass)		\
++do {									\
++	BUILD_CHECK_IRQ_FLAGS(flags);					\
++	flags = PICK_SPIN_OP_RET(__spin_lock_irqsave_nested, 		\
++		_spin_lock_irqsave_nested, lock, subclass);		\
++} while (0)
++#else
++# define spin_lock_irqsave_nested(lock, flags, subclass) \
++				spin_lock_irqsave(lock, flags)
++#endif
++
++# define read_lock_irqsave(lock, flags)				\
++do {								\
++	BUILD_CHECK_IRQ_FLAGS(flags);				\
++	flags = PICK_RW_OP_RET(__read_lock_irqsave, _read_lock_irqsave, lock);\
++} while (0)
++
++# define write_lock_irqsave(lock, flags)			\
++do {								\
++	BUILD_CHECK_IRQ_FLAGS(flags);				\
++	flags = PICK_RW_OP_RET(__write_lock_irqsave, _write_lock_irqsave,lock);\
++} while (0)
++
++#define spin_lock_irq(lock) PICK_SPIN_OP(__spin_lock_irq, _spin_lock_irq, lock)
++
++#define spin_lock_bh(lock) PICK_SPIN_OP(__spin_lock_bh, _spin_lock_bh, lock)
++
++#define read_lock_irq(lock) PICK_RW_OP(__read_lock_irq, _read_lock_irq, lock)
++
++#define read_lock_bh(lock) PICK_RW_OP(__read_lock_bh, _read_lock_bh, lock)
++
++#define write_lock_irq(lock) PICK_RW_OP(__write_lock_irq, _write_lock_irq, lock)
++
++#define write_lock_bh(lock) PICK_RW_OP(__write_lock_bh, _write_lock_bh, lock)
++
++#define spin_unlock(lock) PICK_SPIN_OP(__spin_unlock, _spin_unlock, lock)
++
++#define read_unlock(lock) PICK_RW_OP(__read_unlock, _read_unlock, lock)
++
++#define write_unlock(lock) PICK_RW_OP(__write_unlock, _write_unlock, lock)
++
++#define spin_unlock_no_resched(lock) \
++	PICK_SPIN_OP(__spin_unlock_no_resched, _spin_unlock_no_resched, lock)
++
++#define spin_unlock_irqrestore(lock, flags)				\
++do {									\
++	BUILD_CHECK_IRQ_FLAGS(flags);					\
++	PICK_SPIN_OP(__spin_unlock_irqrestore, _spin_unlock_irqrestore,	\
++		     lock, flags);					\
++} while (0)
++
++#define spin_unlock_irq(lock)	\
++	PICK_SPIN_OP(__spin_unlock_irq, _spin_unlock_irq, lock)
++#define spin_unlock_bh(lock)	\
++	PICK_SPIN_OP(__spin_unlock_bh, _spin_unlock_bh, lock)
++
++#define read_unlock_irqrestore(lock, flags)				\
++do {									\
++	BUILD_CHECK_IRQ_FLAGS(flags);					\
++	PICK_RW_OP(__read_unlock_irqrestore, _read_unlock_irqrestore,	\
++		lock, flags);						\
++} while (0)
++
++#define read_unlock_irq(lock)	\
++	PICK_RW_OP(__read_unlock_irq, _read_unlock_irq, lock)
++#define read_unlock_bh(lock) PICK_RW_OP(__read_unlock_bh, _read_unlock_bh, lock)
++
++#define write_unlock_irqrestore(lock, flags)				\
++do {									\
++	BUILD_CHECK_IRQ_FLAGS(flags);					\
++	PICK_RW_OP(__write_unlock_irqrestore, _write_unlock_irqrestore, \
++		lock, flags);						\
++} while (0)
++#define write_unlock_irq(lock)	\
++	PICK_RW_OP(__write_unlock_irq, _write_unlock_irq, lock)
++
++#define write_unlock_bh(lock)	\
++	PICK_RW_OP(__write_unlock_bh, _write_unlock_bh, lock)
++
++#define spin_trylock_bh(lock)	\
++	__cond_lock(lock, PICK_SPIN_OP_RET(__spin_trylock_bh, _spin_trylock_bh,\
++		lock))
++
++#define spin_trylock_irq(lock)	\
++	__cond_lock(lock, PICK_SPIN_OP_RET(__spin_trylock_irq,		\
++		_spin_trylock_irq, lock))
+ 
+ #define spin_trylock_irqsave(lock, flags) \
+-({ \
+-	local_irq_save(flags); \
+-	spin_trylock(lock) ? \
+-	1 : ({ local_irq_restore(flags); 0; }); \
+-})
++	__cond_lock(lock, PICK_SPIN_OP_RET(__spin_trylock_irqsave, 	\
++		_spin_trylock_irqsave, lock, &flags))
+ 
+-#define write_trylock_irqsave(lock, flags) \
+-({ \
+-	local_irq_save(flags); \
+-	write_trylock(lock) ? \
+-	1 : ({ local_irq_restore(flags); 0; }); \
+-})
++/*
++ *  bit-based spin_lock()
++ *
++ * Don't use this unless you really need to: spin_lock() and spin_unlock()
++ * are significantly faster.
++ */
++static inline void bit_spin_lock(int bitnum, unsigned long *addr)
++{
++	/*
++	 * Assuming the lock is uncontended, this never enters
++	 * the body of the outer loop. If it is contended, then
++	 * within the inner loop a non-atomic test is used to
++	 * busywait with less bus contention for a good time to
++	 * attempt to acquire the lock bit.
++	 */
++#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT)
++	while (unlikely(test_and_set_bit_lock(bitnum, addr)))
++		while (test_bit(bitnum, addr))
++			cpu_relax();
++#endif
++	__acquire(bitlock);
++}
++
++/*
++ * Return true if it was acquired
++ */
++static inline int bit_spin_trylock(int bitnum, unsigned long *addr)
++{
++#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT)
++	if (unlikely(test_and_set_bit_lock(bitnum, addr)))
++		return 0;
++#endif
++	__acquire(bitlock);
++	return 1;
++}
++
++/*
++ *  bit-based spin_unlock():
++ */
++static inline void bit_spin_unlock(int bitnum, unsigned long *addr)
++{
++#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT)
++# ifdef CONFIG_DEBUG_SPINLOCK
++	BUG_ON(!test_bit(bitnum, addr));
++# endif
++	clear_bit_unlock(bitnum, addr);
++#endif
++	__release(bitlock);
++}
++
++/*
++ *  bit-based spin_unlock() - non-atomic version:
++ */
++static inline void __bit_spin_unlock(int bitnum, unsigned long *addr)
++{
++#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT)
++# ifdef CONFIG_DEBUG_SPINLOCK
++	BUG_ON(!test_bit(bitnum, addr));
++# endif
++	__clear_bit_unlock(bitnum, addr);
++#endif
++	__release(bitlock);
++}
++
++/*
++ * Return true if the lock is held.
++ */
++static inline int bit_spin_is_locked(int bitnum, unsigned long *addr)
++{
++#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) || defined(CONFIG_PREEMPT)
++	return test_bit(bitnum, addr);
++#else
++	return 1;
++#endif
++}
++
++/**
++ * __raw_spin_can_lock - would __raw_spin_trylock() succeed?
++ * @lock: the spinlock in question.
++ */
++#define __raw_spin_can_lock(lock)            (!__raw_spin_is_locked(lock))
+ 
+ /*
+  * Pull the atomic_t declaration:
+@@ -359,14 +610,25 @@ do {						\
+  * Decrements @atomic by 1.  If the result is 0, returns true and locks
+  * @lock.  Returns false for all other cases.
+  */
+-extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
+-#define atomic_dec_and_lock(atomic, lock) \
+-		__cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
++/* "lock on reference count zero" */
++#ifndef ATOMIC_DEC_AND_LOCK
++# include <asm/atomic.h>
++  extern int __atomic_dec_and_spin_lock(raw_spinlock_t *lock, atomic_t *atomic);
++#endif
++
++#define atomic_dec_and_lock(atomic, lock)				\
++	__cond_lock(lock, PICK_SPIN_OP_RET(__atomic_dec_and_spin_lock,	\
++		_atomic_dec_and_spin_lock, lock, atomic))
+ 
+ /**
+  * spin_can_lock - would spin_trylock() succeed?
+  * @lock: the spinlock in question.
+  */
+-#define spin_can_lock(lock)	(!spin_is_locked(lock))
++#define spin_can_lock(lock) \
++	__cond_lock(lock, PICK_SPIN_OP_RET(__spin_can_lock, _spin_can_lock,\
++		lock))
++
++/* FIXME: porting hack! */
++#define spin_lock_nest_lock(lock, nest_lock) spin_lock_nested(lock, 0)
+ 
+ #endif /* __LINUX_SPINLOCK_H */
+Index: linux-2.6-tip/include/linux/spinlock_api_smp.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/spinlock_api_smp.h
++++ linux-2.6-tip/include/linux/spinlock_api_smp.h
+@@ -19,45 +19,60 @@ int in_lock_functions(unsigned long addr
+ 
+ #define assert_spin_locked(x)	BUG_ON(!spin_is_locked(x))
+ 
+-void __lockfunc _spin_lock(spinlock_t *lock)		__acquires(lock);
+-void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
++void __lockfunc __spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map)
+ 							__acquires(lock);
+-void __lockfunc _spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *map)
+-							__acquires(lock);
+-void __lockfunc _read_lock(rwlock_t *lock)		__acquires(lock);
+-void __lockfunc _write_lock(rwlock_t *lock)		__acquires(lock);
+-void __lockfunc _spin_lock_bh(spinlock_t *lock)		__acquires(lock);
+-void __lockfunc _read_lock_bh(rwlock_t *lock)		__acquires(lock);
+-void __lockfunc _write_lock_bh(rwlock_t *lock)		__acquires(lock);
+-void __lockfunc _spin_lock_irq(spinlock_t *lock)	__acquires(lock);
+-void __lockfunc _read_lock_irq(rwlock_t *lock)		__acquires(lock);
+-void __lockfunc _write_lock_irq(rwlock_t *lock)		__acquires(lock);
+-unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
+-							__acquires(lock);
+-unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
+-							__acquires(lock);
+-unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
+-							__acquires(lock);
+-unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
+-							__acquires(lock);
+-int __lockfunc _spin_trylock(spinlock_t *lock);
+-int __lockfunc _read_trylock(rwlock_t *lock);
+-int __lockfunc _write_trylock(rwlock_t *lock);
+-int __lockfunc _spin_trylock_bh(spinlock_t *lock);
+-void __lockfunc _spin_unlock(spinlock_t *lock)		__releases(lock);
+-void __lockfunc _read_unlock(rwlock_t *lock)		__releases(lock);
+-void __lockfunc _write_unlock(rwlock_t *lock)		__releases(lock);
+-void __lockfunc _spin_unlock_bh(spinlock_t *lock)	__releases(lock);
+-void __lockfunc _read_unlock_bh(rwlock_t *lock)		__releases(lock);
+-void __lockfunc _write_unlock_bh(rwlock_t *lock)	__releases(lock);
+-void __lockfunc _spin_unlock_irq(spinlock_t *lock)	__releases(lock);
+-void __lockfunc _read_unlock_irq(rwlock_t *lock)	__releases(lock);
+-void __lockfunc _write_unlock_irq(rwlock_t *lock)	__releases(lock);
+-void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+-							__releases(lock);
+-void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+-							__releases(lock);
+-void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+-							__releases(lock);
++#define ACQUIRE_SPIN		__acquires(lock)
++#define ACQUIRE_RW		__acquires(lock)
++#define RELEASE_SPIN		__releases(lock)
++#define RELEASE_RW		__releases(lock)
++
++void __lockfunc __spin_lock(raw_spinlock_t *lock)		ACQUIRE_SPIN;
++void __lockfunc __spin_lock_nested(raw_spinlock_t *lock, int subclass)
++								ACQUIRE_SPIN;
++void __lockfunc __read_lock(raw_rwlock_t *lock)			ACQUIRE_RW;
++void __lockfunc __write_lock(raw_rwlock_t *lock)		ACQUIRE_RW;
++void __lockfunc __spin_lock_bh(raw_spinlock_t *lock)		ACQUIRE_SPIN;
++void __lockfunc __read_lock_bh(raw_rwlock_t *lock)		ACQUIRE_RW;
++void __lockfunc __write_lock_bh(raw_rwlock_t *lock)		ACQUIRE_RW;
++void __lockfunc __spin_lock_irq(raw_spinlock_t *lock)		ACQUIRE_SPIN;
++void __lockfunc __read_lock_irq(raw_rwlock_t *lock)		ACQUIRE_RW;
++void __lockfunc __write_lock_irq(raw_rwlock_t *lock)		ACQUIRE_RW;
++unsigned long __lockfunc __spin_lock_irqsave(raw_spinlock_t *lock)
++								ACQUIRE_SPIN;
++unsigned long __lockfunc
++__spin_lock_irqsave_nested(raw_spinlock_t *lock, int subclass)	ACQUIRE_SPIN;
++unsigned long __lockfunc __read_lock_irqsave(raw_rwlock_t *lock)
++								ACQUIRE_RW;
++unsigned long __lockfunc __write_lock_irqsave(raw_rwlock_t *lock)
++								ACQUIRE_RW;
++int __lockfunc __spin_trylock(raw_spinlock_t *lock);
++int __lockfunc
++__spin_trylock_irqsave(raw_spinlock_t *lock, unsigned long *flags);
++int __lockfunc __read_trylock(raw_rwlock_t *lock);
++int __lockfunc __write_trylock(raw_rwlock_t *lock);
++int __lockfunc
++__write_trylock_irqsave(raw_rwlock_t *lock, unsigned long *flags);
++int __lockfunc __spin_trylock_bh(raw_spinlock_t *lock);
++int __lockfunc __spin_trylock_irq(raw_spinlock_t *lock);
++void __lockfunc __spin_unlock(raw_spinlock_t *lock)		RELEASE_SPIN;
++void __lockfunc __spin_unlock_no_resched(raw_spinlock_t *lock)
++								RELEASE_SPIN;
++void __lockfunc __read_unlock(raw_rwlock_t *lock)		RELEASE_RW;
++void __lockfunc __write_unlock(raw_rwlock_t *lock)		RELEASE_RW;
++void __lockfunc __spin_unlock_bh(raw_spinlock_t *lock)		RELEASE_SPIN;
++void __lockfunc __read_unlock_bh(raw_rwlock_t *lock)		RELEASE_RW;
++void __lockfunc __write_unlock_bh(raw_rwlock_t *lock)		RELEASE_RW;
++void __lockfunc __spin_unlock_irq(raw_spinlock_t *lock)		RELEASE_SPIN;
++void __lockfunc __read_unlock_irq(raw_rwlock_t *lock)		RELEASE_RW;
++void __lockfunc __write_unlock_irq(raw_rwlock_t *lock)		RELEASE_RW;
++void __lockfunc
++__spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
++								RELEASE_SPIN;
++void __lockfunc
++__read_unlock_irqrestore(raw_rwlock_t *lock, unsigned long flags)
++								RELEASE_RW;
++void
++__lockfunc __write_unlock_irqrestore(raw_rwlock_t *lock, unsigned long flags)
++								RELEASE_RW;
+ 
+ #endif /* __LINUX_SPINLOCK_API_SMP_H */
+Index: linux-2.6-tip/include/linux/spinlock_api_up.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/spinlock_api_up.h
++++ linux-2.6-tip/include/linux/spinlock_api_up.h
+@@ -33,12 +33,22 @@
+ #define __LOCK_IRQ(lock) \
+   do { local_irq_disable(); __LOCK(lock); } while (0)
+ 
+-#define __LOCK_IRQSAVE(lock, flags) \
+-  do { local_irq_save(flags); __LOCK(lock); } while (0)
++#define __LOCK_IRQSAVE(lock) \
++  ({ unsigned long __flags; local_irq_save(__flags); __LOCK(lock); __flags; })
++
++#define __TRYLOCK_IRQSAVE(lock, flags) \
++	({ local_irq_save(*(flags)); __LOCK(lock); 1; })
++
++#define __spin_trylock_irqsave(lock, flags)	__TRYLOCK_IRQSAVE(lock, flags)
++
++#define __write_trylock_irqsave(lock, flags)	__TRYLOCK_IRQSAVE(lock, flags)
+ 
+ #define __UNLOCK(lock) \
+   do { preempt_enable(); __release(lock); (void)(lock); } while (0)
+ 
++#define __UNLOCK_NO_RESCHED(lock) \
++  do { __preempt_enable_no_resched(); __release(lock); (void)(lock); } while (0)
++
+ #define __UNLOCK_BH(lock) \
+   do { preempt_enable_no_resched(); local_bh_enable(); __release(lock); (void)(lock); } while (0)
+ 
+@@ -48,34 +58,36 @@
+ #define __UNLOCK_IRQRESTORE(lock, flags) \
+   do { local_irq_restore(flags); __UNLOCK(lock); } while (0)
+ 
+-#define _spin_lock(lock)			__LOCK(lock)
+-#define _spin_lock_nested(lock, subclass)	__LOCK(lock)
+-#define _read_lock(lock)			__LOCK(lock)
+-#define _write_lock(lock)			__LOCK(lock)
+-#define _spin_lock_bh(lock)			__LOCK_BH(lock)
+-#define _read_lock_bh(lock)			__LOCK_BH(lock)
+-#define _write_lock_bh(lock)			__LOCK_BH(lock)
+-#define _spin_lock_irq(lock)			__LOCK_IRQ(lock)
+-#define _read_lock_irq(lock)			__LOCK_IRQ(lock)
+-#define _write_lock_irq(lock)			__LOCK_IRQ(lock)
+-#define _spin_lock_irqsave(lock, flags)		__LOCK_IRQSAVE(lock, flags)
+-#define _read_lock_irqsave(lock, flags)		__LOCK_IRQSAVE(lock, flags)
+-#define _write_lock_irqsave(lock, flags)	__LOCK_IRQSAVE(lock, flags)
+-#define _spin_trylock(lock)			({ __LOCK(lock); 1; })
+-#define _read_trylock(lock)			({ __LOCK(lock); 1; })
+-#define _write_trylock(lock)			({ __LOCK(lock); 1; })
+-#define _spin_trylock_bh(lock)			({ __LOCK_BH(lock); 1; })
+-#define _spin_unlock(lock)			__UNLOCK(lock)
+-#define _read_unlock(lock)			__UNLOCK(lock)
+-#define _write_unlock(lock)			__UNLOCK(lock)
+-#define _spin_unlock_bh(lock)			__UNLOCK_BH(lock)
+-#define _write_unlock_bh(lock)			__UNLOCK_BH(lock)
+-#define _read_unlock_bh(lock)			__UNLOCK_BH(lock)
+-#define _spin_unlock_irq(lock)			__UNLOCK_IRQ(lock)
+-#define _read_unlock_irq(lock)			__UNLOCK_IRQ(lock)
+-#define _write_unlock_irq(lock)			__UNLOCK_IRQ(lock)
+-#define _spin_unlock_irqrestore(lock, flags)	__UNLOCK_IRQRESTORE(lock, flags)
+-#define _read_unlock_irqrestore(lock, flags)	__UNLOCK_IRQRESTORE(lock, flags)
+-#define _write_unlock_irqrestore(lock, flags)	__UNLOCK_IRQRESTORE(lock, flags)
++#define __spin_lock(lock)			__LOCK(lock)
++#define __spin_lock_nested(lock, subclass)	__LOCK(lock)
++#define __read_lock(lock)			__LOCK(lock)
++#define __write_lock(lock)			__LOCK(lock)
++#define __spin_lock_bh(lock)			__LOCK_BH(lock)
++#define __read_lock_bh(lock)			__LOCK_BH(lock)
++#define __write_lock_bh(lock)			__LOCK_BH(lock)
++#define __spin_lock_irq(lock)			__LOCK_IRQ(lock)
++#define __read_lock_irq(lock)			__LOCK_IRQ(lock)
++#define __write_lock_irq(lock)			__LOCK_IRQ(lock)
++#define __spin_lock_irqsave(lock)		__LOCK_IRQSAVE(lock)
++#define __read_lock_irqsave(lock)		__LOCK_IRQSAVE(lock)
++#define __write_lock_irqsave(lock)		__LOCK_IRQSAVE(lock)
++#define __spin_trylock(lock)			({ __LOCK(lock); 1; })
++#define __read_trylock(lock)			({ __LOCK(lock); 1; })
++#define __write_trylock(lock)			({ __LOCK(lock); 1; })
++#define __spin_trylock_bh(lock)			({ __LOCK_BH(lock); 1; })
++#define __spin_trylock_irq(lock)		({ __LOCK_IRQ(lock); 1; })
++#define __spin_unlock(lock)			__UNLOCK(lock)
++#define __spin_unlock_no_resched(lock)		__UNLOCK_NO_RESCHED(lock)
++#define __read_unlock(lock)			__UNLOCK(lock)
++#define __write_unlock(lock)			__UNLOCK(lock)
++#define __spin_unlock_bh(lock)			__UNLOCK_BH(lock)
++#define __write_unlock_bh(lock)			__UNLOCK_BH(lock)
++#define __read_unlock_bh(lock)			__UNLOCK_BH(lock)
++#define __spin_unlock_irq(lock)			__UNLOCK_IRQ(lock)
++#define __read_unlock_irq(lock)			__UNLOCK_IRQ(lock)
++#define __write_unlock_irq(lock)		__UNLOCK_IRQ(lock)
++#define __spin_unlock_irqrestore(lock, flags)	__UNLOCK_IRQRESTORE(lock, flags)
++#define __read_unlock_irqrestore(lock, flags)	__UNLOCK_IRQRESTORE(lock, flags)
++#define __write_unlock_irqrestore(lock, flags)	__UNLOCK_IRQRESTORE(lock, flags)
+ 
+ #endif /* __LINUX_SPINLOCK_API_UP_H */
+Index: linux-2.6-tip/include/linux/spinlock_types.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/spinlock_types.h
++++ linux-2.6-tip/include/linux/spinlock_types.h
+@@ -15,10 +15,27 @@
+ # include <linux/spinlock_types_up.h>
+ #endif
+ 
++/*
++ * Must define these before including other files, inline functions need them
++ */
++#define LOCK_SECTION_NAME ".text.lock."KBUILD_BASENAME
++
++#define LOCK_SECTION_START(extra)               \
++        ".subsection 1\n\t"                     \
++        extra                                   \
++        ".ifndef " LOCK_SECTION_NAME "\n\t"     \
++        LOCK_SECTION_NAME ":\n\t"               \
++        ".endif\n"
++
++#define LOCK_SECTION_END                        \
++        ".previous\n\t"
++
++#define __lockfunc  __attribute__((section(".spinlock.text")))
++
+ #include <linux/lockdep.h>
+ 
+ typedef struct {
+-	raw_spinlock_t raw_lock;
++	__raw_spinlock_t raw_lock;
+ #ifdef CONFIG_GENERIC_LOCKBREAK
+ 	unsigned int break_lock;
+ #endif
+@@ -29,12 +46,12 @@ typedef struct {
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ 	struct lockdep_map dep_map;
+ #endif
+-} spinlock_t;
++} raw_spinlock_t;
+ 
+ #define SPINLOCK_MAGIC		0xdead4ead
+ 
+ typedef struct {
+-	raw_rwlock_t raw_lock;
++	__raw_rwlock_t raw_lock;
+ #ifdef CONFIG_GENERIC_LOCKBREAK
+ 	unsigned int break_lock;
+ #endif
+@@ -45,7 +62,7 @@ typedef struct {
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ 	struct lockdep_map dep_map;
+ #endif
+-} rwlock_t;
++} raw_rwlock_t;
+ 
+ #define RWLOCK_MAGIC		0xdeaf1eed
+ 
+@@ -64,24 +81,24 @@ typedef struct {
+ #endif
+ 
+ #ifdef CONFIG_DEBUG_SPINLOCK
+-# define __SPIN_LOCK_UNLOCKED(lockname)					\
+-	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
++# define _RAW_SPIN_LOCK_UNLOCKED(lockname)				\
++			{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
+ 				.magic = SPINLOCK_MAGIC,		\
+ 				.owner = SPINLOCK_OWNER_INIT,		\
+ 				.owner_cpu = -1,			\
+ 				SPIN_DEP_MAP_INIT(lockname) }
+-#define __RW_LOCK_UNLOCKED(lockname)					\
+-	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
++#define _RAW_RW_LOCK_UNLOCKED(lockname)					\
++			{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
+ 				.magic = RWLOCK_MAGIC,			\
+ 				.owner = SPINLOCK_OWNER_INIT,		\
+ 				.owner_cpu = -1,			\
+ 				RW_DEP_MAP_INIT(lockname) }
+ #else
+-# define __SPIN_LOCK_UNLOCKED(lockname) \
+-	(spinlock_t)	{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
++# define _RAW_SPIN_LOCK_UNLOCKED(lockname)				\
++			{	.raw_lock = __RAW_SPIN_LOCK_UNLOCKED,	\
+ 				SPIN_DEP_MAP_INIT(lockname) }
+-#define __RW_LOCK_UNLOCKED(lockname) \
+-	(rwlock_t)	{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
++# define _RAW_RW_LOCK_UNLOCKED(lockname)				\
++			{	.raw_lock = __RAW_RW_LOCK_UNLOCKED,	\
+ 				RW_DEP_MAP_INIT(lockname) }
+ #endif
+ 
+@@ -91,10 +108,22 @@ typedef struct {
+  * Please use DEFINE_SPINLOCK()/DEFINE_RWLOCK() or
+  * __SPIN_LOCK_UNLOCKED()/__RW_LOCK_UNLOCKED() as appropriate.
+  */
+-#define SPIN_LOCK_UNLOCKED	__SPIN_LOCK_UNLOCKED(old_style_spin_init)
+-#define RW_LOCK_UNLOCKED	__RW_LOCK_UNLOCKED(old_style_rw_init)
+ 
+-#define DEFINE_SPINLOCK(x)	spinlock_t x = __SPIN_LOCK_UNLOCKED(x)
+-#define DEFINE_RWLOCK(x)	rwlock_t x = __RW_LOCK_UNLOCKED(x)
++# define RAW_SPIN_LOCK_UNLOCKED(lockname) \
++	(raw_spinlock_t) _RAW_SPIN_LOCK_UNLOCKED(lockname)
++
++# define RAW_RW_LOCK_UNLOCKED(lockname)	\
++	(raw_rwlock_t) _RAW_RW_LOCK_UNLOCKED(lockname)
++
++#define DEFINE_RAW_SPINLOCK(name) \
++	raw_spinlock_t name __cacheline_aligned_in_smp = \
++		RAW_SPIN_LOCK_UNLOCKED(name)
++
++#define __DEFINE_RAW_SPINLOCK(name) \
++	raw_spinlock_t name = RAW_SPIN_LOCK_UNLOCKED(name)
++
++#define DEFINE_RAW_RWLOCK(name) \
++	raw_rwlock_t name __cacheline_aligned_in_smp = \
++		RAW_RW_LOCK_UNLOCKED(name)
+ 
+ #endif /* __LINUX_SPINLOCK_TYPES_H */
+Index: linux-2.6-tip/include/linux/spinlock_types_up.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/spinlock_types_up.h
++++ linux-2.6-tip/include/linux/spinlock_types_up.h
+@@ -16,13 +16,13 @@
+ 
+ typedef struct {
+ 	volatile unsigned int slock;
+-} raw_spinlock_t;
++} __raw_spinlock_t;
+ 
+ #define __RAW_SPIN_LOCK_UNLOCKED { 1 }
+ 
+ #else
+ 
+-typedef struct { } raw_spinlock_t;
++typedef struct { } __raw_spinlock_t;
+ 
+ #define __RAW_SPIN_LOCK_UNLOCKED { }
+ 
+@@ -30,7 +30,7 @@ typedef struct { } raw_spinlock_t;
+ 
+ typedef struct {
+ 	/* no debug version on UP */
+-} raw_rwlock_t;
++} __raw_rwlock_t;
+ 
+ #define __RAW_RW_LOCK_UNLOCKED { }
+ 
+Index: linux-2.6-tip/include/linux/spinlock_up.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/spinlock_up.h
++++ linux-2.6-tip/include/linux/spinlock_up.h
+@@ -20,19 +20,19 @@
+ #ifdef CONFIG_DEBUG_SPINLOCK
+ #define __raw_spin_is_locked(x)		((x)->slock == 0)
+ 
+-static inline void __raw_spin_lock(raw_spinlock_t *lock)
++static inline void __raw_spin_lock(__raw_spinlock_t *lock)
+ {
+ 	lock->slock = 0;
+ }
+ 
+ static inline void
+-__raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
++__raw_spin_lock_flags(__raw_spinlock_t *lock, unsigned long flags)
+ {
+ 	local_irq_save(flags);
+ 	lock->slock = 0;
+ }
+ 
+-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
++static inline int __raw_spin_trylock(__raw_spinlock_t *lock)
+ {
+ 	char oldval = lock->slock;
+ 
+@@ -41,7 +41,7 @@ static inline int __raw_spin_trylock(raw
+ 	return oldval > 0;
+ }
+ 
+-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
++static inline void __raw_spin_unlock(__raw_spinlock_t *lock)
+ {
+ 	lock->slock = 1;
+ }
+Index: linux-2.6-tip/kernel/rt.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/rt.c
+@@ -0,0 +1,528 @@
++/*
++ * kernel/rt.c
++ *
++ * Real-Time Preemption Support
++ *
++ * started by Ingo Molnar:
++ *
++ *  Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
++ *  Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
++ *
++ * historic credit for proving that Linux spinlocks can be implemented via
++ * RT-aware mutexes goes to many people: The Pmutex project (Dirk Grambow
++ * and others) who prototyped it on 2.4 and did lots of comparative
++ * research and analysis; TimeSys, for proving that you can implement a
++ * fully preemptible kernel via the use of IRQ threading and mutexes;
++ * Bill Huey for persuasively arguing on lkml that the mutex model is the
++ * right one; and to MontaVista, who ported pmutexes to 2.6.
++ *
++ * This code is a from-scratch implementation and is not based on pmutexes,
++ * but the idea of converting spinlocks to mutexes is used here too.
++ *
++ * lock debugging, locking tree, deadlock detection:
++ *
++ *  Copyright (C) 2004, LynuxWorks, Inc., Igor Manyilov, Bill Huey
++ *  Released under the General Public License (GPL).
++ *
++ * Includes portions of the generic R/W semaphore implementation from:
++ *
++ *  Copyright (c) 2001   David Howells (dhowells@redhat.com).
++ *  - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
++ *  - Derived also from comments by Linus
++ *
++ * Pending ownership of locks and ownership stealing:
++ *
++ *  Copyright (C) 2005, Kihon Technologies Inc., Steven Rostedt
++ *
++ *   (also by Steven Rostedt)
++ *    - Converted single pi_lock to individual task locks.
++ *
++ * By Esben Nielsen:
++ *    Doing priority inheritance with help of the scheduler.
++ *
++ *  Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com>
++ *  - major rework based on Esben Nielsens initial patch
++ *  - replaced thread_info references by task_struct refs
++ *  - removed task->pending_owner dependency
++ *  - BKL drop/reacquire for semaphore style locks to avoid deadlocks
++ *    in the scheduler return path as discussed with Steven Rostedt
++ *
++ *  Copyright (C) 2006, Kihon Technologies Inc.
++ *    Steven Rostedt <rostedt@goodmis.org>
++ *  - debugged and patched Thomas Gleixner's rework.
++ *  - added back the cmpxchg to the rework.
++ *  - turned atomic require back on for SMP.
++ */
++
++#include <linux/spinlock.h>
++#include <linux/rt_lock.h>
++#include <linux/sched.h>
++#include <linux/delay.h>
++#include <linux/module.h>
++#include <linux/spinlock.h>
++#include <linux/kallsyms.h>
++#include <linux/syscalls.h>
++#include <linux/interrupt.h>
++#include <linux/plist.h>
++#include <linux/fs.h>
++#include <linux/futex.h>
++#include <linux/hrtimer.h>
++
++#include "rtmutex_common.h"
++
++#ifdef CONFIG_PREEMPT_RT
++/*
++ * Unlock these on crash:
++ */
++void zap_rt_locks(void)
++{
++	//trace_lock_init();
++}
++#endif
++
++/*
++ * struct mutex functions
++ */
++void __mutex_init(struct mutex *lock, char *name, struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++	/*
++	 * Make sure we are not reinitializing a held lock:
++	 */
++	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
++	lockdep_init_map(&lock->dep_map, name, key, 0);
++#endif
++	__rt_mutex_init(&lock->lock, name);
++}
++EXPORT_SYMBOL(__mutex_init);
++
++void __lockfunc _mutex_lock(struct mutex *lock)
++{
++	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++	rt_mutex_lock(&lock->lock);
++}
++EXPORT_SYMBOL(_mutex_lock);
++
++int __lockfunc _mutex_lock_interruptible(struct mutex *lock)
++{
++	int ret;
++
++	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++	ret = rt_mutex_lock_interruptible(&lock->lock, 0);
++	if (ret)
++		mutex_release(&lock->dep_map, 1, _RET_IP_);
++	return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_interruptible);
++
++int __lockfunc _mutex_lock_killable(struct mutex *lock)
++{
++	int ret;
++
++	mutex_acquire(&lock->dep_map, 0, 0, _RET_IP_);
++	ret = rt_mutex_lock_killable(&lock->lock, 0);
++	if (ret)
++		mutex_release(&lock->dep_map, 1, _RET_IP_);
++	return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_killable);
++
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++void __lockfunc _mutex_lock_nested(struct mutex *lock, int subclass)
++{
++	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
++	rt_mutex_lock(&lock->lock);
++}
++EXPORT_SYMBOL(_mutex_lock_nested);
++
++int __lockfunc _mutex_lock_interruptible_nested(struct mutex *lock, int subclass)
++{
++	int ret;
++
++	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
++	ret = rt_mutex_lock_interruptible(&lock->lock, 0);
++	if (ret)
++		mutex_release(&lock->dep_map, 1, _RET_IP_);
++	return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_interruptible_nested);
++
++int __lockfunc _mutex_lock_killable_nested(struct mutex *lock, int subclass)
++{
++	int ret;
++
++	mutex_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
++	ret = rt_mutex_lock_killable(&lock->lock, 0);
++	if (ret)
++		mutex_release(&lock->dep_map, 1, _RET_IP_);
++	return ret;
++}
++EXPORT_SYMBOL(_mutex_lock_killable_nested);
++#endif
++
++int __lockfunc _mutex_trylock(struct mutex *lock)
++{
++	int ret = rt_mutex_trylock(&lock->lock);
++
++	if (ret)
++		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++
++	return ret;
++}
++EXPORT_SYMBOL(_mutex_trylock);
++
++void __lockfunc _mutex_unlock(struct mutex *lock)
++{
++	mutex_release(&lock->dep_map, 1, _RET_IP_);
++	rt_mutex_unlock(&lock->lock);
++}
++EXPORT_SYMBOL(_mutex_unlock);
++
++/*
++ * rwlock_t functions
++ */
++int __lockfunc rt_write_trylock(rwlock_t *rwlock)
++{
++	int ret = rt_mutex_trylock(&rwlock->lock);
++
++	if (ret)
++		rwlock_acquire(&rwlock->dep_map, 0, 1, _RET_IP_);
++
++	return ret;
++}
++EXPORT_SYMBOL(rt_write_trylock);
++
++int __lockfunc rt_write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags)
++{
++	*flags = 0;
++	return rt_write_trylock(rwlock);
++}
++EXPORT_SYMBOL(rt_write_trylock_irqsave);
++
++int __lockfunc rt_read_trylock(rwlock_t *rwlock)
++{
++	struct rt_mutex *lock = &rwlock->lock;
++	int ret = 1;
++
++	/*
++	 * recursive read locks succeed when current owns the lock
++	 */
++	if (rt_mutex_real_owner(lock) != current || !rwlock->read_depth)
++		ret = rt_mutex_trylock(lock);
++
++	if (ret) {
++		rwlock->read_depth++;
++		rwlock_acquire_read(&rwlock->dep_map, 0, 1, _RET_IP_);
++	}
++
++	return ret;
++}
++EXPORT_SYMBOL(rt_read_trylock);
++
++void __lockfunc rt_write_lock(rwlock_t *rwlock)
++{
++	rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_);
++	__rt_spin_lock(&rwlock->lock);
++}
++EXPORT_SYMBOL(rt_write_lock);
++
++void __lockfunc rt_read_lock(rwlock_t *rwlock)
++{
++	struct rt_mutex *lock = &rwlock->lock;
++
++	rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_);
++
++	/*
++	 * recursive read locks succeed when current owns the lock
++	 */
++	if (rt_mutex_real_owner(lock) != current || !rwlock->read_depth)
++		__rt_spin_lock(lock);
++	rwlock->read_depth++;
++}
++
++EXPORT_SYMBOL(rt_read_lock);
++
++void __lockfunc rt_write_unlock(rwlock_t *rwlock)
++{
++	/* NOTE: we always pass in '1' for nested, for simplicity */
++	rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
++	__rt_spin_unlock(&rwlock->lock);
++}
++EXPORT_SYMBOL(rt_write_unlock);
++
++void __lockfunc rt_read_unlock(rwlock_t *rwlock)
++{
++	rwlock_release(&rwlock->dep_map, 1, _RET_IP_);
++
++	BUG_ON(rwlock->read_depth <= 0);
++
++	/* Release the lock only when read_depth is down to 0 */
++	if (--rwlock->read_depth == 0)
++		__rt_spin_unlock(&rwlock->lock);
++}
++EXPORT_SYMBOL(rt_read_unlock);
++
++unsigned long __lockfunc rt_write_lock_irqsave(rwlock_t *rwlock)
++{
++	rt_write_lock(rwlock);
++
++	return 0;
++}
++EXPORT_SYMBOL(rt_write_lock_irqsave);
++
++unsigned long __lockfunc rt_read_lock_irqsave(rwlock_t *rwlock)
++{
++	rt_read_lock(rwlock);
++
++	return 0;
++}
++EXPORT_SYMBOL(rt_read_lock_irqsave);
++
++void __rt_rwlock_init(rwlock_t *rwlock, char *name, struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++	/*
++	 * Make sure we are not reinitializing a held lock:
++	 */
++	debug_check_no_locks_freed((void *)rwlock, sizeof(*rwlock));
++	lockdep_init_map(&rwlock->dep_map, name, key, 0);
++#endif
++	__rt_mutex_init(&rwlock->lock, name);
++	rwlock->read_depth = 0;
++}
++EXPORT_SYMBOL(__rt_rwlock_init);
++
++/*
++ * rw_semaphores
++ */
++
++void  rt_up_write(struct rw_semaphore *rwsem)
++{
++	rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
++	rt_mutex_unlock(&rwsem->lock);
++}
++EXPORT_SYMBOL(rt_up_write);
++
++void  rt_up_read(struct rw_semaphore *rwsem)
++{
++	rwsem_release(&rwsem->dep_map, 1, _RET_IP_);
++	rt_mutex_unlock(&rwsem->lock);
++}
++EXPORT_SYMBOL(rt_up_read);
++
++/*
++ * downgrade a write lock into a read lock
++ * - just wake up any readers at the front of the queue
++ */
++void  rt_downgrade_write(struct rw_semaphore *rwsem)
++{
++	BUG_ON(rt_mutex_real_owner(&rwsem->lock) != current);
++}
++EXPORT_SYMBOL(rt_downgrade_write);
++
++int  rt_down_write_trylock(struct rw_semaphore *rwsem)
++{
++	int ret = rt_mutex_trylock(&rwsem->lock);
++
++	if (ret)
++		rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
++	return ret;
++}
++EXPORT_SYMBOL(rt_down_write_trylock);
++
++void  rt_down_write(struct rw_semaphore *rwsem)
++{
++	rwsem_acquire(&rwsem->dep_map, 0, 0, _RET_IP_);
++	rt_mutex_lock(&rwsem->lock);
++}
++EXPORT_SYMBOL(rt_down_write);
++
++void  rt_down_write_nested(struct rw_semaphore *rwsem, int subclass)
++{
++	rwsem_acquire(&rwsem->dep_map, subclass, 0, _RET_IP_);
++	rt_mutex_lock(&rwsem->lock);
++}
++EXPORT_SYMBOL(rt_down_write_nested);
++
++int  rt_down_read_trylock(struct rw_semaphore *rwsem)
++{
++	int ret = rt_mutex_trylock(&rwsem->lock);
++
++	if (ret)
++		rwsem_acquire(&rwsem->dep_map, 0, 1, _RET_IP_);
++	return ret;
++}
++EXPORT_SYMBOL(rt_down_read_trylock);
++
++static void __rt_down_read(struct rw_semaphore *rwsem, int subclass)
++{
++	rwsem_acquire_read(&rwsem->dep_map, subclass, 0, _RET_IP_);
++	rt_mutex_lock(&rwsem->lock);
++}
++
++void  rt_down_read(struct rw_semaphore *rwsem)
++{
++	__rt_down_read(rwsem, 0);
++}
++EXPORT_SYMBOL(rt_down_read);
++
++void  rt_down_read_nested(struct rw_semaphore *rwsem, int subclass)
++{
++	__rt_down_read(rwsem, subclass);
++}
++EXPORT_SYMBOL(rt_down_read_nested);
++
++void  __rt_rwsem_init(struct rw_semaphore *rwsem, char *name,
++			      struct lock_class_key *key)
++{
++#ifdef CONFIG_DEBUG_LOCK_ALLOC
++	/*
++	 * Make sure we are not reinitializing a held lock:
++	 */
++	debug_check_no_locks_freed((void *)rwsem, sizeof(*rwsem));
++	lockdep_init_map(&rwsem->dep_map, name, key, 0);
++#endif
++	__rt_mutex_init(&rwsem->lock, name);
++}
++EXPORT_SYMBOL(__rt_rwsem_init);
++
++/*
++ * Semaphores
++ */
++/*
++ * Linux Semaphores implemented via RT-mutexes.
++ *
++ * In the down() variants we use the mutex as the semaphore blocking
++ * object: we always acquire it, decrease the counter and keep the lock
++ * locked if we did the 1->0 transition. The next down() will then block.
++ *
++ * In the up() path we atomically increase the counter and do the
++ * unlock if we were the one doing the 0->1 transition.
++ */
++
++static inline void __down_complete(struct semaphore *sem)
++{
++	int count = atomic_dec_return(&sem->count);
++
++	if (unlikely(count > 0))
++		rt_mutex_unlock(&sem->lock);
++}
++
++void  rt_down(struct semaphore *sem)
++{
++	rt_mutex_lock(&sem->lock);
++	__down_complete(sem);
++}
++EXPORT_SYMBOL(rt_down);
++
++int  rt_down_interruptible(struct semaphore *sem)
++{
++	int ret;
++
++	ret = rt_mutex_lock_interruptible(&sem->lock, 0);
++	if (ret)
++		return ret;
++	__down_complete(sem);
++	return 0;
++}
++EXPORT_SYMBOL(rt_down_interruptible);
++
++int rt_down_timeout(struct semaphore *sem, long jiff)
++{
++	struct hrtimer_sleeper t;
++	struct timespec ts;
++	unsigned long expires = jiffies + jiff + 1;
++	int ret;
++
++	/*
++	 * rt_mutex_slowlock can use an interruptible, but this needs to
++	 * be TASK_INTERRUPTIBLE. The down_timeout uses TASK_UNINTERRUPTIBLE.
++	 * To handle this we loop if a signal caused the timeout and the
++	 * we recalculate the new timeout.
++	 * Yes Thomas, this is a hack! But we can fix it right later.
++	 */
++	do {
++		jiffies_to_timespec(jiff, &ts);
++		hrtimer_init_on_stack(&t.timer, HRTIMER_MODE_REL, CLOCK_MONOTONIC);
++		t.timer._expires = timespec_to_ktime(ts);
++
++		ret = rt_mutex_timed_lock(&sem->lock, &t, 0);
++		if (ret != -EINTR)
++			break;
++
++		/* signal occured, but the down_timeout doesn't handle them */
++		jiff = expires - jiffies;
++
++	} while (jiff > 0);
++
++	if (!ret)
++		__down_complete(sem);
++	else
++		ret = -ETIME;
++
++	return ret;
++}
++EXPORT_SYMBOL(rt_down_timeout);
++
++/*
++ * try to down the semaphore, 0 on success and 1 on failure. (inverted)
++ */
++int  rt_down_trylock(struct semaphore *sem)
++{
++	/*
++	 * Here we are a tiny bit different from ordinary Linux semaphores,
++	 * because we can get 'transient' locking-failures when say a
++	 * process decreases the count from 9 to 8 and locks/releases the
++	 * embedded mutex internally. It would be quite complex to remove
++	 * these transient failures so lets try it the simple way first:
++	 */
++	if (rt_mutex_trylock(&sem->lock)) {
++		__down_complete(sem);
++		return 0;
++	}
++	return 1;
++}
++EXPORT_SYMBOL(rt_down_trylock);
++
++void  rt_up(struct semaphore *sem)
++{
++	int count;
++
++	/*
++	 * Disable preemption to make sure a highprio trylock-er cannot
++	 * preempt us here and get into an infinite loop:
++	 */
++	preempt_disable();
++	count = atomic_inc_return(&sem->count);
++	/*
++	 * If we did the 0 -> 1 transition then we are the ones to unlock it:
++	 */
++	if (likely(count == 1))
++		rt_mutex_unlock(&sem->lock);
++	preempt_enable();
++}
++EXPORT_SYMBOL(rt_up);
++
++void  __sema_init(struct semaphore *sem, int val,
++			  char *name, char *file, int line)
++{
++	atomic_set(&sem->count, val);
++	switch (val) {
++	case 0:
++		__rt_mutex_init(&sem->lock, name);
++		rt_mutex_lock(&sem->lock);
++		break;
++	default:
++		__rt_mutex_init(&sem->lock, name);
++		break;
++	}
++}
++EXPORT_SYMBOL(__sema_init);
++
++void  __init_MUTEX(struct semaphore *sem, char *name, char *file,
++			   int line)
++{
++	__sema_init(sem, 1, name, file, line);
++}
++EXPORT_SYMBOL(__init_MUTEX);
++
+Index: linux-2.6-tip/kernel/rtmutex-debug.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/rtmutex-debug.c
++++ linux-2.6-tip/kernel/rtmutex-debug.c
+@@ -16,6 +16,7 @@
+  *
+  * See rt.c in preempt-rt for proper credits and further information
+  */
++#include <linux/rt_lock.h>
+ #include <linux/sched.h>
+ #include <linux/delay.h>
+ #include <linux/module.h>
+@@ -29,61 +30,6 @@
+ 
+ #include "rtmutex_common.h"
+ 
+-# define TRACE_WARN_ON(x)			WARN_ON(x)
+-# define TRACE_BUG_ON(x)			BUG_ON(x)
+-
+-# define TRACE_OFF()						\
+-do {								\
+-	if (rt_trace_on) {					\
+-		rt_trace_on = 0;				\
+-		console_verbose();				\
+-		if (spin_is_locked(&current->pi_lock))		\
+-			spin_unlock(&current->pi_lock);		\
+-	}							\
+-} while (0)
+-
+-# define TRACE_OFF_NOLOCK()					\
+-do {								\
+-	if (rt_trace_on) {					\
+-		rt_trace_on = 0;				\
+-		console_verbose();				\
+-	}							\
+-} while (0)
+-
+-# define TRACE_BUG_LOCKED()			\
+-do {						\
+-	TRACE_OFF();				\
+-	BUG();					\
+-} while (0)
+-
+-# define TRACE_WARN_ON_LOCKED(c)		\
+-do {						\
+-	if (unlikely(c)) {			\
+-		TRACE_OFF();			\
+-		WARN_ON(1);			\
+-	}					\
+-} while (0)
+-
+-# define TRACE_BUG_ON_LOCKED(c)			\
+-do {						\
+-	if (unlikely(c))			\
+-		TRACE_BUG_LOCKED();		\
+-} while (0)
+-
+-#ifdef CONFIG_SMP
+-# define SMP_TRACE_BUG_ON_LOCKED(c)	TRACE_BUG_ON_LOCKED(c)
+-#else
+-# define SMP_TRACE_BUG_ON_LOCKED(c)	do { } while (0)
+-#endif
+-
+-/*
+- * deadlock detection flag. We turn it off when we detect
+- * the first problem because we dont want to recurse back
+- * into the tracing code when doing error printk or
+- * executing a BUG():
+- */
+-static int rt_trace_on = 1;
+-
+ static void printk_task(struct task_struct *p)
+ {
+ 	if (p)
+@@ -111,8 +57,8 @@ static void printk_lock(struct rt_mutex 
+ 
+ void rt_mutex_debug_task_free(struct task_struct *task)
+ {
+-	WARN_ON(!plist_head_empty(&task->pi_waiters));
+-	WARN_ON(task->pi_blocked_on);
++	DEBUG_LOCKS_WARN_ON(!plist_head_empty(&task->pi_waiters));
++	DEBUG_LOCKS_WARN_ON(task->pi_blocked_on);
+ }
+ 
+ /*
+@@ -125,7 +71,7 @@ void debug_rt_mutex_deadlock(int detect,
+ {
+ 	struct task_struct *task;
+ 
+-	if (!rt_trace_on || detect || !act_waiter)
++	if (!debug_locks || detect || !act_waiter)
+ 		return;
+ 
+ 	task = rt_mutex_owner(act_waiter->lock);
+@@ -139,7 +85,7 @@ void debug_rt_mutex_print_deadlock(struc
+ {
+ 	struct task_struct *task;
+ 
+-	if (!waiter->deadlock_lock || !rt_trace_on)
++	if (!waiter->deadlock_lock || !debug_locks)
+ 		return;
+ 
+ 	rcu_read_lock();
+@@ -149,7 +95,8 @@ void debug_rt_mutex_print_deadlock(struc
+ 		return;
+ 	}
+ 
+-	TRACE_OFF_NOLOCK();
++	if (!debug_locks_off())
++		return;
+ 
+ 	printk("\n============================================\n");
+ 	printk(  "[ BUG: circular locking deadlock detected! ]\n");
+@@ -180,7 +127,6 @@ void debug_rt_mutex_print_deadlock(struc
+ 
+ 	printk("[ turning off deadlock detection."
+ 	       "Please report this trace. ]\n\n");
+-	local_irq_disable();
+ }
+ 
+ void debug_rt_mutex_lock(struct rt_mutex *lock)
+@@ -189,7 +135,8 @@ void debug_rt_mutex_lock(struct rt_mutex
+ 
+ void debug_rt_mutex_unlock(struct rt_mutex *lock)
+ {
+-	TRACE_WARN_ON_LOCKED(rt_mutex_owner(lock) != current);
++	if (debug_locks)
++		DEBUG_LOCKS_WARN_ON(rt_mutex_owner(lock) != current);
+ }
+ 
+ void
+@@ -199,7 +146,7 @@ debug_rt_mutex_proxy_lock(struct rt_mute
+ 
+ void debug_rt_mutex_proxy_unlock(struct rt_mutex *lock)
+ {
+-	TRACE_WARN_ON_LOCKED(!rt_mutex_owner(lock));
++	DEBUG_LOCKS_WARN_ON(!rt_mutex_owner(lock));
+ }
+ 
+ void debug_rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
+@@ -213,9 +160,9 @@ void debug_rt_mutex_init_waiter(struct r
+ void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
+ {
+ 	put_pid(waiter->deadlock_task_pid);
+-	TRACE_WARN_ON(!plist_node_empty(&waiter->list_entry));
+-	TRACE_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
+-	TRACE_WARN_ON(waiter->task);
++	DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->list_entry));
++	DEBUG_LOCKS_WARN_ON(!plist_node_empty(&waiter->pi_list_entry));
++	DEBUG_LOCKS_WARN_ON(waiter->task);
+ 	memset(waiter, 0x22, sizeof(*waiter));
+ }
+ 
+@@ -231,9 +178,36 @@ void debug_rt_mutex_init(struct rt_mutex
+ void
+ rt_mutex_deadlock_account_lock(struct rt_mutex *lock, struct task_struct *task)
+ {
++#ifdef CONFIG_DEBUG_PREEMPT
++	if (atomic_read(&task->lock_count) >= MAX_LOCK_STACK) {
++		if (!debug_locks_off())
++			return;
++		printk("BUG: %s/%d: lock count overflow!\n",
++			task->comm, task->pid);
++		dump_stack();
++		return;
++	}
++#ifdef CONFIG_PREEMPT_RT
++	task->owned_lock[atomic_read(&task->lock_count)] = lock;
++#endif
++	atomic_inc(&task->lock_count);
++#endif
+ }
+ 
+ void rt_mutex_deadlock_account_unlock(struct task_struct *task)
+ {
++#ifdef CONFIG_DEBUG_PREEMPT
++	if (!atomic_read(&task->lock_count)) {
++		if (!debug_locks_off())
++			return;
++		printk("BUG: %s/%d: lock count underflow!\n",
++			task->comm, task->pid);
++		dump_stack();
++		return;
++	}
++	atomic_dec(&task->lock_count);
++#ifdef CONFIG_PREEMPT_RT
++	task->owned_lock[atomic_read(&task->lock_count)] = NULL;
++#endif
++#endif
+ }
+-
+Index: linux-2.6-tip/kernel/rwsem.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/rwsem.c
++++ linux-2.6-tip/kernel/rwsem.c
+@@ -16,7 +16,7 @@
+ /*
+  * lock for reading
+  */
+-void __sched down_read(struct rw_semaphore *sem)
++void __sched compat_down_read(struct compat_rw_semaphore *sem)
+ {
+ 	might_sleep();
+ 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
+@@ -24,12 +24,12 @@ void __sched down_read(struct rw_semapho
+ 	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
+ }
+ 
+-EXPORT_SYMBOL(down_read);
++EXPORT_SYMBOL(compat_down_read);
+ 
+ /*
+  * trylock for reading -- returns 1 if successful, 0 if contention
+  */
+-int down_read_trylock(struct rw_semaphore *sem)
++int compat_down_read_trylock(struct compat_rw_semaphore *sem)
+ {
+ 	int ret = __down_read_trylock(sem);
+ 
+@@ -38,12 +38,12 @@ int down_read_trylock(struct rw_semaphor
+ 	return ret;
+ }
+ 
+-EXPORT_SYMBOL(down_read_trylock);
++EXPORT_SYMBOL(compat_down_read_trylock);
+ 
+ /*
+  * lock for writing
+  */
+-void __sched down_write(struct rw_semaphore *sem)
++void __sched compat_down_write(struct compat_rw_semaphore *sem)
+ {
+ 	might_sleep();
+ 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
+@@ -51,12 +51,12 @@ void __sched down_write(struct rw_semaph
+ 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+ }
+ 
+-EXPORT_SYMBOL(down_write);
++EXPORT_SYMBOL(compat_down_write);
+ 
+ /*
+  * trylock for writing -- returns 1 if successful, 0 if contention
+  */
+-int down_write_trylock(struct rw_semaphore *sem)
++int compat_down_write_trylock(struct compat_rw_semaphore *sem)
+ {
+ 	int ret = __down_write_trylock(sem);
+ 
+@@ -65,36 +65,36 @@ int down_write_trylock(struct rw_semapho
+ 	return ret;
+ }
+ 
+-EXPORT_SYMBOL(down_write_trylock);
++EXPORT_SYMBOL(compat_down_write_trylock);
+ 
+ /*
+  * release a read lock
+  */
+-void up_read(struct rw_semaphore *sem)
++void compat_up_read(struct compat_rw_semaphore *sem)
+ {
+ 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
+ 
+ 	__up_read(sem);
+ }
+ 
+-EXPORT_SYMBOL(up_read);
++EXPORT_SYMBOL(compat_up_read);
+ 
+ /*
+  * release a write lock
+  */
+-void up_write(struct rw_semaphore *sem)
++void compat_up_write(struct compat_rw_semaphore *sem)
+ {
+ 	rwsem_release(&sem->dep_map, 1, _RET_IP_);
+ 
+ 	__up_write(sem);
+ }
+ 
+-EXPORT_SYMBOL(up_write);
++EXPORT_SYMBOL(compat_up_write);
+ 
+ /*
+  * downgrade write lock to read lock
+  */
+-void downgrade_write(struct rw_semaphore *sem)
++void compat_downgrade_write(struct compat_rw_semaphore *sem)
+ {
+ 	/*
+ 	 * lockdep: a downgraded write will live on as a write
+@@ -103,11 +103,11 @@ void downgrade_write(struct rw_semaphore
+ 	__downgrade_write(sem);
+ }
+ 
+-EXPORT_SYMBOL(downgrade_write);
++EXPORT_SYMBOL(compat_downgrade_write);
+ 
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ 
+-void down_read_nested(struct rw_semaphore *sem, int subclass)
++void compat_down_read_nested(struct compat_rw_semaphore *sem, int subclass)
+ {
+ 	might_sleep();
+ 	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
+@@ -115,18 +115,18 @@ void down_read_nested(struct rw_semaphor
+ 	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
+ }
+ 
+-EXPORT_SYMBOL(down_read_nested);
++EXPORT_SYMBOL(compat_down_read_nested);
+ 
+-void down_read_non_owner(struct rw_semaphore *sem)
++void compat_down_read_non_owner(struct compat_rw_semaphore *sem)
+ {
+ 	might_sleep();
+ 
+ 	__down_read(sem);
+ }
+ 
+-EXPORT_SYMBOL(down_read_non_owner);
++EXPORT_SYMBOL(compat_down_read_non_owner);
+ 
+-void down_write_nested(struct rw_semaphore *sem, int subclass)
++void compat_down_write_nested(struct compat_rw_semaphore *sem, int subclass)
+ {
+ 	might_sleep();
+ 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
+@@ -134,14 +134,14 @@ void down_write_nested(struct rw_semapho
+ 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
+ }
+ 
+-EXPORT_SYMBOL(down_write_nested);
++EXPORT_SYMBOL(compat_down_write_nested);
+ 
+-void up_read_non_owner(struct rw_semaphore *sem)
++void compat_up_read_non_owner(struct compat_rw_semaphore *sem)
+ {
+ 	__up_read(sem);
+ }
+ 
+-EXPORT_SYMBOL(up_read_non_owner);
++EXPORT_SYMBOL(compat_up_read_non_owner);
+ 
+ #endif
+ 
+Index: linux-2.6-tip/kernel/semaphore.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/semaphore.c
++++ linux-2.6-tip/kernel/semaphore.c
+@@ -33,11 +33,11 @@
+ #include <linux/spinlock.h>
+ #include <linux/ftrace.h>
+ 
+-static noinline void __down(struct semaphore *sem);
+-static noinline int __down_interruptible(struct semaphore *sem);
+-static noinline int __down_killable(struct semaphore *sem);
+-static noinline int __down_timeout(struct semaphore *sem, long jiffies);
+-static noinline void __up(struct semaphore *sem);
++static noinline void __down(struct compat_semaphore *sem);
++static noinline int __down_interruptible(struct compat_semaphore *sem);
++static noinline int __down_killable(struct compat_semaphore *sem);
++static noinline int __down_timeout(struct compat_semaphore *sem, long jiffies);
++static noinline void __up(struct compat_semaphore *sem);
+ 
+ /**
+  * down - acquire the semaphore
+@@ -50,7 +50,7 @@ static noinline void __up(struct semapho
+  * Use of this function is deprecated, please use down_interruptible() or
+  * down_killable() instead.
+  */
+-void down(struct semaphore *sem)
++void compat_down(struct compat_semaphore *sem)
+ {
+ 	unsigned long flags;
+ 
+@@ -61,7 +61,7 @@ void down(struct semaphore *sem)
+ 		__down(sem);
+ 	spin_unlock_irqrestore(&sem->lock, flags);
+ }
+-EXPORT_SYMBOL(down);
++EXPORT_SYMBOL(compat_down);
+ 
+ /**
+  * down_interruptible - acquire the semaphore unless interrupted
+@@ -72,7 +72,7 @@ EXPORT_SYMBOL(down);
+  * If the sleep is interrupted by a signal, this function will return -EINTR.
+  * If the semaphore is successfully acquired, this function returns 0.
+  */
+-int down_interruptible(struct semaphore *sem)
++int compat_down_interruptible(struct compat_semaphore *sem)
+ {
+ 	unsigned long flags;
+ 	int result = 0;
+@@ -86,7 +86,7 @@ int down_interruptible(struct semaphore 
+ 
+ 	return result;
+ }
+-EXPORT_SYMBOL(down_interruptible);
++EXPORT_SYMBOL(compat_down_interruptible);
+ 
+ /**
+  * down_killable - acquire the semaphore unless killed
+@@ -98,7 +98,7 @@ EXPORT_SYMBOL(down_interruptible);
+  * -EINTR.  If the semaphore is successfully acquired, this function returns
+  * 0.
+  */
+-int down_killable(struct semaphore *sem)
++int compat_down_killable(struct compat_semaphore *sem)
+ {
+ 	unsigned long flags;
+ 	int result = 0;
+@@ -112,7 +112,7 @@ int down_killable(struct semaphore *sem)
+ 
+ 	return result;
+ }
+-EXPORT_SYMBOL(down_killable);
++EXPORT_SYMBOL(compat_down_killable);
+ 
+ /**
+  * down_trylock - try to acquire the semaphore, without waiting
+@@ -127,7 +127,7 @@ EXPORT_SYMBOL(down_killable);
+  * Unlike mutex_trylock, this function can be used from interrupt context,
+  * and the semaphore can be released by any task or interrupt.
+  */
+-int down_trylock(struct semaphore *sem)
++int compat_down_trylock(struct compat_semaphore *sem)
+ {
+ 	unsigned long flags;
+ 	int count;
+@@ -140,7 +140,7 @@ int down_trylock(struct semaphore *sem)
+ 
+ 	return (count < 0);
+ }
+-EXPORT_SYMBOL(down_trylock);
++EXPORT_SYMBOL(compat_down_trylock);
+ 
+ /**
+  * down_timeout - acquire the semaphore within a specified time
+@@ -152,7 +152,7 @@ EXPORT_SYMBOL(down_trylock);
+  * If the semaphore is not released within the specified number of jiffies,
+  * this function returns -ETIME.  It returns 0 if the semaphore was acquired.
+  */
+-int down_timeout(struct semaphore *sem, long jiffies)
++int compat_down_timeout(struct compat_semaphore *sem, long jiffies)
+ {
+ 	unsigned long flags;
+ 	int result = 0;
+@@ -166,7 +166,7 @@ int down_timeout(struct semaphore *sem, 
+ 
+ 	return result;
+ }
+-EXPORT_SYMBOL(down_timeout);
++EXPORT_SYMBOL(compat_down_timeout);
+ 
+ /**
+  * up - release the semaphore
+@@ -175,7 +175,7 @@ EXPORT_SYMBOL(down_timeout);
+  * Release the semaphore.  Unlike mutexes, up() may be called from any
+  * context and even by tasks which have never called down().
+  */
+-void up(struct semaphore *sem)
++void compat_up(struct compat_semaphore *sem)
+ {
+ 	unsigned long flags;
+ 
+@@ -186,7 +186,7 @@ void up(struct semaphore *sem)
+ 		__up(sem);
+ 	spin_unlock_irqrestore(&sem->lock, flags);
+ }
+-EXPORT_SYMBOL(up);
++EXPORT_SYMBOL(compat_up);
+ 
+ /* Functions for the contended case */
+ 
+@@ -201,7 +201,7 @@ struct semaphore_waiter {
+  * constant, and thus optimised away by the compiler.  Likewise the
+  * 'timeout' parameter for the cases without timeouts.
+  */
+-static inline int __sched __down_common(struct semaphore *sem, long state,
++static inline int __sched __down_common(struct compat_semaphore *sem, long state,
+ 								long timeout)
+ {
+ 	struct task_struct *task = current;
+@@ -233,27 +233,27 @@ static inline int __sched __down_common(
+ 	return -EINTR;
+ }
+ 
+-static noinline void __sched __down(struct semaphore *sem)
++static noinline void __sched __down(struct compat_semaphore *sem)
+ {
+ 	__down_common(sem, TASK_UNINTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+ }
+ 
+-static noinline int __sched __down_interruptible(struct semaphore *sem)
++static noinline int __sched __down_interruptible(struct compat_semaphore *sem)
+ {
+ 	return __down_common(sem, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
+ }
+ 
+-static noinline int __sched __down_killable(struct semaphore *sem)
++static noinline int __sched __down_killable(struct compat_semaphore *sem)
+ {
+ 	return __down_common(sem, TASK_KILLABLE, MAX_SCHEDULE_TIMEOUT);
+ }
+ 
+-static noinline int __sched __down_timeout(struct semaphore *sem, long jiffies)
++static noinline int __sched __down_timeout(struct compat_semaphore *sem, long jiffies)
+ {
+ 	return __down_common(sem, TASK_UNINTERRUPTIBLE, jiffies);
+ }
+ 
+-static noinline void __sched __up(struct semaphore *sem)
++static noinline void __sched __up(struct compat_semaphore *sem)
+ {
+ 	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
+ 						struct semaphore_waiter, list);
+Index: linux-2.6-tip/kernel/spinlock.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/spinlock.c
++++ linux-2.6-tip/kernel/spinlock.c
+@@ -21,7 +21,7 @@
+ #include <linux/debug_locks.h>
+ #include <linux/module.h>
+ 
+-int __lockfunc _spin_trylock(spinlock_t *lock)
++int __lockfunc __spin_trylock(raw_spinlock_t *lock)
+ {
+ 	preempt_disable();
+ 	if (_raw_spin_trylock(lock)) {
+@@ -32,9 +32,46 @@ int __lockfunc _spin_trylock(spinlock_t 
+ 	preempt_enable();
+ 	return 0;
+ }
+-EXPORT_SYMBOL(_spin_trylock);
++EXPORT_SYMBOL(__spin_trylock);
+ 
+-int __lockfunc _read_trylock(rwlock_t *lock)
++int __lockfunc __spin_trylock_irq(raw_spinlock_t *lock)
++{
++	local_irq_disable();
++	preempt_disable();
++
++	if (_raw_spin_trylock(lock)) {
++		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++		return 1;
++	}
++
++	__preempt_enable_no_resched();
++	local_irq_enable();
++	preempt_check_resched();
++
++	return 0;
++}
++EXPORT_SYMBOL(__spin_trylock_irq);
++
++int __lockfunc __spin_trylock_irqsave(raw_spinlock_t *lock,
++					 unsigned long *flags)
++{
++	local_irq_save(*flags);
++	preempt_disable();
++
++	if (_raw_spin_trylock(lock)) {
++		spin_acquire(&lock->dep_map, 0, 1, _RET_IP_);
++		return 1;
++	}
++
++	__preempt_enable_no_resched();
++	local_irq_restore(*flags);
++	preempt_check_resched();
++
++	return 0;
++}
++EXPORT_SYMBOL(__spin_trylock_irqsave);
++
++int __lockfunc __read_trylock(raw_rwlock_t *lock)
+ {
+ 	preempt_disable();
+ 	if (_raw_read_trylock(lock)) {
+@@ -45,9 +82,9 @@ int __lockfunc _read_trylock(rwlock_t *l
+ 	preempt_enable();
+ 	return 0;
+ }
+-EXPORT_SYMBOL(_read_trylock);
++EXPORT_SYMBOL(__read_trylock);
+ 
+-int __lockfunc _write_trylock(rwlock_t *lock)
++int __lockfunc __write_trylock(raw_rwlock_t *lock)
+ {
+ 	preempt_disable();
+ 	if (_raw_write_trylock(lock)) {
+@@ -58,7 +95,21 @@ int __lockfunc _write_trylock(rwlock_t *
+ 	preempt_enable();
+ 	return 0;
+ }
+-EXPORT_SYMBOL(_write_trylock);
++EXPORT_SYMBOL(__write_trylock);
++
++int __lockfunc __write_trylock_irqsave(raw_rwlock_t *lock, unsigned long *flags)
++{
++	int ret;
++
++	local_irq_save(*flags);
++	ret = __write_trylock(lock);
++	if (ret)
++		return ret;
++
++	local_irq_restore(*flags);
++	return 0;
++}
++EXPORT_SYMBOL(__write_trylock_irqsave);
+ 
+ /*
+  * If lockdep is enabled then we use the non-preemption spin-ops
+@@ -67,15 +118,15 @@ EXPORT_SYMBOL(_write_trylock);
+  */
+ #if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC)
+ 
+-void __lockfunc _read_lock(rwlock_t *lock)
++void __lockfunc __read_lock(raw_rwlock_t *lock)
+ {
+ 	preempt_disable();
+ 	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+ 	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+ }
+-EXPORT_SYMBOL(_read_lock);
++EXPORT_SYMBOL(__read_lock);
+ 
+-unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
++unsigned long __lockfunc __spin_lock_irqsave(raw_spinlock_t *lock)
+ {
+ 	unsigned long flags;
+ 
+@@ -94,27 +145,27 @@ unsigned long __lockfunc _spin_lock_irqs
+ #endif
+ 	return flags;
+ }
+-EXPORT_SYMBOL(_spin_lock_irqsave);
++EXPORT_SYMBOL(__spin_lock_irqsave);
+ 
+-void __lockfunc _spin_lock_irq(spinlock_t *lock)
++void __lockfunc __spin_lock_irq(raw_spinlock_t *lock)
+ {
+ 	local_irq_disable();
+ 	preempt_disable();
+ 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+ }
+-EXPORT_SYMBOL(_spin_lock_irq);
++EXPORT_SYMBOL(__spin_lock_irq);
+ 
+-void __lockfunc _spin_lock_bh(spinlock_t *lock)
++void __lockfunc __spin_lock_bh(raw_spinlock_t *lock)
+ {
+ 	local_bh_disable();
+ 	preempt_disable();
+ 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+ }
+-EXPORT_SYMBOL(_spin_lock_bh);
++EXPORT_SYMBOL(__spin_lock_bh);
+ 
+-unsigned long __lockfunc _read_lock_irqsave(rwlock_t *lock)
++unsigned long __lockfunc __read_lock_irqsave(raw_rwlock_t *lock)
+ {
+ 	unsigned long flags;
+ 
+@@ -124,27 +175,27 @@ unsigned long __lockfunc _read_lock_irqs
+ 	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+ 	return flags;
+ }
+-EXPORT_SYMBOL(_read_lock_irqsave);
++EXPORT_SYMBOL(__read_lock_irqsave);
+ 
+-void __lockfunc _read_lock_irq(rwlock_t *lock)
++void __lockfunc __read_lock_irq(raw_rwlock_t *lock)
+ {
+ 	local_irq_disable();
+ 	preempt_disable();
+ 	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+ 	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+ }
+-EXPORT_SYMBOL(_read_lock_irq);
++EXPORT_SYMBOL(__read_lock_irq);
+ 
+-void __lockfunc _read_lock_bh(rwlock_t *lock)
++void __lockfunc __read_lock_bh(raw_rwlock_t *lock)
+ {
+ 	local_bh_disable();
+ 	preempt_disable();
+ 	rwlock_acquire_read(&lock->dep_map, 0, 0, _RET_IP_);
+ 	LOCK_CONTENDED(lock, _raw_read_trylock, _raw_read_lock);
+ }
+-EXPORT_SYMBOL(_read_lock_bh);
++EXPORT_SYMBOL(__read_lock_bh);
+ 
+-unsigned long __lockfunc _write_lock_irqsave(rwlock_t *lock)
++unsigned long __lockfunc __write_lock_irqsave(raw_rwlock_t *lock)
+ {
+ 	unsigned long flags;
+ 
+@@ -154,43 +205,43 @@ unsigned long __lockfunc _write_lock_irq
+ 	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+ 	return flags;
+ }
+-EXPORT_SYMBOL(_write_lock_irqsave);
++EXPORT_SYMBOL(__write_lock_irqsave);
+ 
+-void __lockfunc _write_lock_irq(rwlock_t *lock)
++void __lockfunc __write_lock_irq(raw_rwlock_t *lock)
+ {
+ 	local_irq_disable();
+ 	preempt_disable();
+ 	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ 	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+ }
+-EXPORT_SYMBOL(_write_lock_irq);
++EXPORT_SYMBOL(__write_lock_irq);
+ 
+-void __lockfunc _write_lock_bh(rwlock_t *lock)
++void __lockfunc __write_lock_bh(raw_rwlock_t *lock)
+ {
+ 	local_bh_disable();
+ 	preempt_disable();
+ 	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ 	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+ }
+-EXPORT_SYMBOL(_write_lock_bh);
++EXPORT_SYMBOL(__write_lock_bh);
+ 
+-void __lockfunc _spin_lock(spinlock_t *lock)
++void __lockfunc __spin_lock(raw_spinlock_t *lock)
+ {
+ 	preempt_disable();
+ 	spin_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+ }
+ 
+-EXPORT_SYMBOL(_spin_lock);
++EXPORT_SYMBOL(__spin_lock);
+ 
+-void __lockfunc _write_lock(rwlock_t *lock)
++void __lockfunc __write_lock(raw_rwlock_t *lock)
+ {
+ 	preempt_disable();
+ 	rwlock_acquire(&lock->dep_map, 0, 0, _RET_IP_);
+ 	LOCK_CONTENDED(lock, _raw_write_trylock, _raw_write_lock);
+ }
+ 
+-EXPORT_SYMBOL(_write_lock);
++EXPORT_SYMBOL(__write_lock);
+ 
+ #else /* CONFIG_PREEMPT: */
+ 
+@@ -203,7 +254,7 @@ EXPORT_SYMBOL(_write_lock);
+  */
+ 
+ #define BUILD_LOCK_OPS(op, locktype)					\
+-void __lockfunc _##op##_lock(locktype##_t *lock)			\
++void __lockfunc __##op##_lock(locktype##_t *lock)			\
+ {									\
+ 	for (;;) {							\
+ 		preempt_disable();					\
+@@ -213,15 +264,16 @@ void __lockfunc _##op##_lock(locktype##_
+ 									\
+ 		if (!(lock)->break_lock)				\
+ 			(lock)->break_lock = 1;				\
+-		while (!op##_can_lock(lock) && (lock)->break_lock)	\
+-			_raw_##op##_relax(&lock->raw_lock);		\
++		while (!__raw_##op##_can_lock(&(lock)->raw_lock) &&	\
++					(lock)->break_lock)		\
++			__raw_##op##_relax(&lock->raw_lock);		\
+ 	}								\
+ 	(lock)->break_lock = 0;						\
+ }									\
+ 									\
+-EXPORT_SYMBOL(_##op##_lock);						\
++EXPORT_SYMBOL(__##op##_lock);						\
+ 									\
+-unsigned long __lockfunc _##op##_lock_irqsave(locktype##_t *lock)	\
++unsigned long __lockfunc __##op##_lock_irqsave(locktype##_t *lock)	\
+ {									\
+ 	unsigned long flags;						\
+ 									\
+@@ -235,23 +287,24 @@ unsigned long __lockfunc _##op##_lock_ir
+ 									\
+ 		if (!(lock)->break_lock)				\
+ 			(lock)->break_lock = 1;				\
+-		while (!op##_can_lock(lock) && (lock)->break_lock)	\
+-			_raw_##op##_relax(&lock->raw_lock);		\
++		while (!__raw_##op##_can_lock(&(lock)->raw_lock) &&	\
++						 (lock)->break_lock)	\
++			__raw_##op##_relax(&lock->raw_lock);		\
+ 	}								\
+ 	(lock)->break_lock = 0;						\
+ 	return flags;							\
+ }									\
+ 									\
+-EXPORT_SYMBOL(_##op##_lock_irqsave);					\
++EXPORT_SYMBOL(__##op##_lock_irqsave);					\
+ 									\
+-void __lockfunc _##op##_lock_irq(locktype##_t *lock)			\
++void __lockfunc __##op##_lock_irq(locktype##_t *lock)			\
+ {									\
+-	_##op##_lock_irqsave(lock);					\
++	__##op##_lock_irqsave(lock);					\
+ }									\
+ 									\
+-EXPORT_SYMBOL(_##op##_lock_irq);					\
++EXPORT_SYMBOL(__##op##_lock_irq);					\
+ 									\
+-void __lockfunc _##op##_lock_bh(locktype##_t *lock)			\
++void __lockfunc __##op##_lock_bh(locktype##_t *lock)			\
+ {									\
+ 	unsigned long flags;						\
+ 									\
+@@ -260,39 +313,48 @@ void __lockfunc _##op##_lock_bh(locktype
+ 	/* irq-disabling. We use the generic preemption-aware	*/	\
+ 	/* function:						*/	\
+ 	/**/								\
+-	flags = _##op##_lock_irqsave(lock);				\
++	flags = __##op##_lock_irqsave(lock);				\
+ 	local_bh_disable();						\
+ 	local_irq_restore(flags);					\
+ }									\
+ 									\
+-EXPORT_SYMBOL(_##op##_lock_bh)
++EXPORT_SYMBOL(__##op##_lock_bh)
+ 
+ /*
+  * Build preemption-friendly versions of the following
+  * lock-spinning functions:
+  *
+- *         _[spin|read|write]_lock()
+- *         _[spin|read|write]_lock_irq()
+- *         _[spin|read|write]_lock_irqsave()
+- *         _[spin|read|write]_lock_bh()
++ *         __[spin|read|write]_lock()
++ *         __[spin|read|write]_lock_irq()
++ *         __[spin|read|write]_lock_irqsave()
++ *         __[spin|read|write]_lock_bh()
+  */
+-BUILD_LOCK_OPS(spin, spinlock);
+-BUILD_LOCK_OPS(read, rwlock);
+-BUILD_LOCK_OPS(write, rwlock);
++BUILD_LOCK_OPS(spin, raw_spinlock);
++BUILD_LOCK_OPS(read, raw_rwlock);
++BUILD_LOCK_OPS(write, raw_rwlock);
+ 
+ #endif /* CONFIG_PREEMPT */
+ 
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ 
+-void __lockfunc _spin_lock_nested(spinlock_t *lock, int subclass)
++void __lockfunc __spin_lock_nested(raw_spinlock_t *lock, int subclass)
+ {
+ 	preempt_disable();
+ 	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+ 	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+ }
+-EXPORT_SYMBOL(_spin_lock_nested);
++EXPORT_SYMBOL(__spin_lock_nested);
++
++void __lockfunc __spin_lock_nest_lock(raw_spinlock_t *lock,
++				     struct lockdep_map *nest_lock)
++{
++	preempt_disable();
++	spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
++	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
++}
++EXPORT_SYMBOL(__spin_lock_nest_lock);
+ 
+-unsigned long __lockfunc _spin_lock_irqsave_nested(spinlock_t *lock, int subclass)
++unsigned long __lockfunc __spin_lock_irqsave_nested(raw_spinlock_t *lock, int subclass)
+ {
+ 	unsigned long flags;
+ 
+@@ -311,125 +373,130 @@ unsigned long __lockfunc _spin_lock_irqs
+ #endif
+ 	return flags;
+ }
+-EXPORT_SYMBOL(_spin_lock_irqsave_nested);
+-
+-void __lockfunc _spin_lock_nest_lock(spinlock_t *lock,
+-				     struct lockdep_map *nest_lock)
+-{
+-	preempt_disable();
+-	spin_acquire_nest(&lock->dep_map, 0, 0, nest_lock, _RET_IP_);
+-	LOCK_CONTENDED(lock, _raw_spin_trylock, _raw_spin_lock);
+-}
+-EXPORT_SYMBOL(_spin_lock_nest_lock);
++EXPORT_SYMBOL(__spin_lock_irqsave_nested);
+ 
+ #endif
+ 
+-void __lockfunc _spin_unlock(spinlock_t *lock)
++void __lockfunc __spin_unlock(raw_spinlock_t *lock)
+ {
+ 	spin_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_spin_unlock(lock);
+ 	preempt_enable();
+ }
+-EXPORT_SYMBOL(_spin_unlock);
++EXPORT_SYMBOL(__spin_unlock);
+ 
+-void __lockfunc _write_unlock(rwlock_t *lock)
++void __lockfunc __spin_unlock_no_resched(raw_spinlock_t *lock)
++{
++	spin_release(&lock->dep_map, 1, _RET_IP_);
++	_raw_spin_unlock(lock);
++	__preempt_enable_no_resched();
++}
++/* not exported */
++
++void __lockfunc __write_unlock(raw_rwlock_t *lock)
+ {
+ 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_write_unlock(lock);
+ 	preempt_enable();
+ }
+-EXPORT_SYMBOL(_write_unlock);
++EXPORT_SYMBOL(__write_unlock);
+ 
+-void __lockfunc _read_unlock(rwlock_t *lock)
++void __lockfunc __read_unlock(raw_rwlock_t *lock)
+ {
+ 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_read_unlock(lock);
+ 	preempt_enable();
+ }
+-EXPORT_SYMBOL(_read_unlock);
++EXPORT_SYMBOL(__read_unlock);
+ 
+-void __lockfunc _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
++void __lockfunc __spin_unlock_irqrestore(raw_spinlock_t *lock, unsigned long flags)
+ {
+ 	spin_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_spin_unlock(lock);
++	__preempt_enable_no_resched();
+ 	local_irq_restore(flags);
+-	preempt_enable();
++	preempt_check_resched();
+ }
+-EXPORT_SYMBOL(_spin_unlock_irqrestore);
++EXPORT_SYMBOL(__spin_unlock_irqrestore);
+ 
+-void __lockfunc _spin_unlock_irq(spinlock_t *lock)
++void __lockfunc __spin_unlock_irq(raw_spinlock_t *lock)
+ {
+ 	spin_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_spin_unlock(lock);
++	__preempt_enable_no_resched();
+ 	local_irq_enable();
+-	preempt_enable();
++	preempt_check_resched();
+ }
+-EXPORT_SYMBOL(_spin_unlock_irq);
++EXPORT_SYMBOL(__spin_unlock_irq);
+ 
+-void __lockfunc _spin_unlock_bh(spinlock_t *lock)
++void __lockfunc __spin_unlock_bh(raw_spinlock_t *lock)
+ {
+ 	spin_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_spin_unlock(lock);
+-	preempt_enable_no_resched();
++	__preempt_enable_no_resched();
+ 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+ }
+-EXPORT_SYMBOL(_spin_unlock_bh);
++EXPORT_SYMBOL(__spin_unlock_bh);
+ 
+-void __lockfunc _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
++void __lockfunc __read_unlock_irqrestore(raw_rwlock_t *lock, unsigned long flags)
+ {
+ 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_read_unlock(lock);
++	__preempt_enable_no_resched();
+ 	local_irq_restore(flags);
+-	preempt_enable();
++	preempt_check_resched();
+ }
+-EXPORT_SYMBOL(_read_unlock_irqrestore);
++EXPORT_SYMBOL(__read_unlock_irqrestore);
+ 
+-void __lockfunc _read_unlock_irq(rwlock_t *lock)
++void __lockfunc __read_unlock_irq(raw_rwlock_t *lock)
+ {
+ 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_read_unlock(lock);
++	__preempt_enable_no_resched();
+ 	local_irq_enable();
+-	preempt_enable();
++	preempt_check_resched();
+ }
+-EXPORT_SYMBOL(_read_unlock_irq);
++EXPORT_SYMBOL(__read_unlock_irq);
+ 
+-void __lockfunc _read_unlock_bh(rwlock_t *lock)
++void __lockfunc __read_unlock_bh(raw_rwlock_t *lock)
+ {
+ 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_read_unlock(lock);
+-	preempt_enable_no_resched();
++	__preempt_enable_no_resched();
+ 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+ }
+-EXPORT_SYMBOL(_read_unlock_bh);
++EXPORT_SYMBOL(__read_unlock_bh);
+ 
+-void __lockfunc _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
++void __lockfunc __write_unlock_irqrestore(raw_rwlock_t *lock, unsigned long flags)
+ {
+ 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_write_unlock(lock);
++	__preempt_enable_no_resched();
+ 	local_irq_restore(flags);
+-	preempt_enable();
++	preempt_check_resched();
+ }
+-EXPORT_SYMBOL(_write_unlock_irqrestore);
++EXPORT_SYMBOL(__write_unlock_irqrestore);
+ 
+-void __lockfunc _write_unlock_irq(rwlock_t *lock)
++void __lockfunc __write_unlock_irq(raw_rwlock_t *lock)
+ {
+ 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_write_unlock(lock);
++	__preempt_enable_no_resched();
+ 	local_irq_enable();
+-	preempt_enable();
++	preempt_check_resched();
+ }
+-EXPORT_SYMBOL(_write_unlock_irq);
++EXPORT_SYMBOL(__write_unlock_irq);
+ 
+-void __lockfunc _write_unlock_bh(rwlock_t *lock)
++void __lockfunc __write_unlock_bh(raw_rwlock_t *lock)
+ {
+ 	rwlock_release(&lock->dep_map, 1, _RET_IP_);
+ 	_raw_write_unlock(lock);
+-	preempt_enable_no_resched();
++	__preempt_enable_no_resched();
+ 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+ }
+-EXPORT_SYMBOL(_write_unlock_bh);
++EXPORT_SYMBOL(__write_unlock_bh);
+ 
+-int __lockfunc _spin_trylock_bh(spinlock_t *lock)
++int __lockfunc __spin_trylock_bh(raw_spinlock_t *lock)
+ {
+ 	local_bh_disable();
+ 	preempt_disable();
+@@ -438,11 +505,11 @@ int __lockfunc _spin_trylock_bh(spinlock
+ 		return 1;
+ 	}
+ 
+-	preempt_enable_no_resched();
++	__preempt_enable_no_resched();
+ 	local_bh_enable_ip((unsigned long)__builtin_return_address(0));
+ 	return 0;
+ }
+-EXPORT_SYMBOL(_spin_trylock_bh);
++EXPORT_SYMBOL(__spin_trylock_bh);
+ 
+ notrace int in_lock_functions(unsigned long addr)
+ {
+@@ -450,6 +517,17 @@ notrace int in_lock_functions(unsigned l
+ 	extern char __lock_text_start[], __lock_text_end[];
+ 
+ 	return addr >= (unsigned long)__lock_text_start
+-	&& addr < (unsigned long)__lock_text_end;
++		&& addr < (unsigned long)__lock_text_end;
+ }
+ EXPORT_SYMBOL(in_lock_functions);
++
++void notrace __debug_atomic_dec_and_test(atomic_t *v)
++{
++	static int warn_once = 1;
++
++	if (!atomic_read(v) && warn_once) {
++		warn_once = 0;
++		printk("BUG: atomic counter underflow!\n");
++		WARN_ON(1);
++	}
++}
+Index: linux-2.6-tip/lib/dec_and_lock.c
+===================================================================
+--- linux-2.6-tip.orig/lib/dec_and_lock.c
++++ linux-2.6-tip/lib/dec_and_lock.c
+@@ -17,7 +17,7 @@
+  * because the spin-lock and the decrement must be
+  * "atomic".
+  */
+-int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
++int __atomic_dec_and_spin_lock(raw_spinlock_t *lock, atomic_t *atomic)
+ {
+ #ifdef CONFIG_SMP
+ 	/* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+@@ -32,4 +32,4 @@ int _atomic_dec_and_lock(atomic_t *atomi
+ 	return 0;
+ }
+ 
+-EXPORT_SYMBOL(_atomic_dec_and_lock);
++EXPORT_SYMBOL(__atomic_dec_and_spin_lock);
+Index: linux-2.6-tip/lib/plist.c
+===================================================================
+--- linux-2.6-tip.orig/lib/plist.c
++++ linux-2.6-tip/lib/plist.c
+@@ -54,7 +54,9 @@ static void plist_check_list(struct list
+ 
+ static void plist_check_head(struct plist_head *head)
+ {
++#ifndef CONFIG_PREEMPT_RT
+ 	WARN_ON(!head->lock);
++#endif
+ 	if (head->lock)
+ 		WARN_ON_SMP(!spin_is_locked(head->lock));
+ 	plist_check_list(&head->prio_list);
+Index: linux-2.6-tip/lib/rwsem-spinlock.c
+===================================================================
+--- linux-2.6-tip.orig/lib/rwsem-spinlock.c
++++ linux-2.6-tip/lib/rwsem-spinlock.c
+@@ -20,7 +20,7 @@ struct rwsem_waiter {
+ /*
+  * initialise the semaphore
+  */
+-void __init_rwsem(struct rw_semaphore *sem, const char *name,
++void __compat_init_rwsem(struct compat_rw_semaphore *sem, const char *name,
+ 		  struct lock_class_key *key)
+ {
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+@@ -44,8 +44,8 @@ void __init_rwsem(struct rw_semaphore *s
+  * - woken process blocks are discarded from the list after having task zeroed
+  * - writers are only woken if wakewrite is non-zero
+  */
+-static inline struct rw_semaphore *
+-__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
++static inline struct compat_rw_semaphore *
++__rwsem_do_wake(struct compat_rw_semaphore *sem, int wakewrite)
+ {
+ 	struct rwsem_waiter *waiter;
+ 	struct task_struct *tsk;
+@@ -103,8 +103,8 @@ __rwsem_do_wake(struct rw_semaphore *sem
+ /*
+  * wake a single writer
+  */
+-static inline struct rw_semaphore *
+-__rwsem_wake_one_writer(struct rw_semaphore *sem)
++static inline struct compat_rw_semaphore *
++__rwsem_wake_one_writer(struct compat_rw_semaphore *sem)
+ {
+ 	struct rwsem_waiter *waiter;
+ 	struct task_struct *tsk;
+@@ -125,7 +125,7 @@ __rwsem_wake_one_writer(struct rw_semaph
+ /*
+  * get a read lock on the semaphore
+  */
+-void __sched __down_read(struct rw_semaphore *sem)
++void __sched __down_read(struct compat_rw_semaphore *sem)
+ {
+ 	struct rwsem_waiter waiter;
+ 	struct task_struct *tsk;
+@@ -168,7 +168,7 @@ void __sched __down_read(struct rw_semap
+ /*
+  * trylock for reading -- returns 1 if successful, 0 if contention
+  */
+-int __down_read_trylock(struct rw_semaphore *sem)
++int __down_read_trylock(struct compat_rw_semaphore *sem)
+ {
+ 	unsigned long flags;
+ 	int ret = 0;
+@@ -191,7 +191,8 @@ int __down_read_trylock(struct rw_semaph
+  * get a write lock on the semaphore
+  * - we increment the waiting count anyway to indicate an exclusive lock
+  */
+-void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
++void __sched
++__down_write_nested(struct compat_rw_semaphore *sem, int subclass)
+ {
+ 	struct rwsem_waiter waiter;
+ 	struct task_struct *tsk;
+@@ -231,7 +232,7 @@ void __sched __down_write_nested(struct 
+ 	;
+ }
+ 
+-void __sched __down_write(struct rw_semaphore *sem)
++void __sched __down_write(struct compat_rw_semaphore *sem)
+ {
+ 	__down_write_nested(sem, 0);
+ }
+@@ -239,7 +240,7 @@ void __sched __down_write(struct rw_sema
+ /*
+  * trylock for writing -- returns 1 if successful, 0 if contention
+  */
+-int __down_write_trylock(struct rw_semaphore *sem)
++int __down_write_trylock(struct compat_rw_semaphore *sem)
+ {
+ 	unsigned long flags;
+ 	int ret = 0;
+@@ -260,7 +261,7 @@ int __down_write_trylock(struct rw_semap
+ /*
+  * release a read lock on the semaphore
+  */
+-void __up_read(struct rw_semaphore *sem)
++void __up_read(struct compat_rw_semaphore *sem)
+ {
+ 	unsigned long flags;
+ 
+@@ -275,7 +276,7 @@ void __up_read(struct rw_semaphore *sem)
+ /*
+  * release a write lock on the semaphore
+  */
+-void __up_write(struct rw_semaphore *sem)
++void __up_write(struct compat_rw_semaphore *sem)
+ {
+ 	unsigned long flags;
+ 
+@@ -292,7 +293,7 @@ void __up_write(struct rw_semaphore *sem
+  * downgrade a write lock into a read lock
+  * - just wake up any readers at the front of the queue
+  */
+-void __downgrade_write(struct rw_semaphore *sem)
++void __downgrade_write(struct compat_rw_semaphore *sem)
+ {
+ 	unsigned long flags;
+ 
+@@ -305,7 +306,7 @@ void __downgrade_write(struct rw_semapho
+ 	spin_unlock_irqrestore(&sem->wait_lock, flags);
+ }
+ 
+-EXPORT_SYMBOL(__init_rwsem);
++EXPORT_SYMBOL(__compat_init_rwsem);
+ EXPORT_SYMBOL(__down_read);
+ EXPORT_SYMBOL(__down_read_trylock);
+ EXPORT_SYMBOL(__down_write_nested);
+Index: linux-2.6-tip/lib/rwsem.c
+===================================================================
+--- linux-2.6-tip.orig/lib/rwsem.c
++++ linux-2.6-tip/lib/rwsem.c
+@@ -11,8 +11,8 @@
+ /*
+  * Initialize an rwsem:
+  */
+-void __init_rwsem(struct rw_semaphore *sem, const char *name,
+-		  struct lock_class_key *key)
++void __compat_init_rwsem(struct rw_semaphore *sem, const char *name,
++			 struct lock_class_key *key)
+ {
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ 	/*
+@@ -26,7 +26,7 @@ void __init_rwsem(struct rw_semaphore *s
+ 	INIT_LIST_HEAD(&sem->wait_list);
+ }
+ 
+-EXPORT_SYMBOL(__init_rwsem);
++EXPORT_SYMBOL(__compat_init_rwsem);
+ 
+ struct rwsem_waiter {
+ 	struct list_head list;
+Index: linux-2.6-tip/lib/spinlock_debug.c
+===================================================================
+--- linux-2.6-tip.orig/lib/spinlock_debug.c
++++ linux-2.6-tip/lib/spinlock_debug.c
+@@ -13,8 +13,8 @@
+ #include <linux/delay.h>
+ #include <linux/module.h>
+ 
+-void __spin_lock_init(spinlock_t *lock, const char *name,
+-		      struct lock_class_key *key)
++void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
++			  struct lock_class_key *key)
+ {
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ 	/*
+@@ -23,16 +23,16 @@ void __spin_lock_init(spinlock_t *lock, 
+ 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ 	lockdep_init_map(&lock->dep_map, name, key, 0);
+ #endif
+-	lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
++	lock->raw_lock = (__raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
+ 	lock->magic = SPINLOCK_MAGIC;
+ 	lock->owner = SPINLOCK_OWNER_INIT;
+ 	lock->owner_cpu = -1;
+ }
+ 
+-EXPORT_SYMBOL(__spin_lock_init);
++EXPORT_SYMBOL(__raw_spin_lock_init);
+ 
+-void __rwlock_init(rwlock_t *lock, const char *name,
+-		   struct lock_class_key *key)
++void __raw_rwlock_init(raw_rwlock_t *lock, const char *name,
++		       struct lock_class_key *key)
+ {
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ 	/*
+@@ -41,15 +41,15 @@ void __rwlock_init(rwlock_t *lock, const
+ 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
+ 	lockdep_init_map(&lock->dep_map, name, key, 0);
+ #endif
+-	lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED;
++	lock->raw_lock = (__raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED;
+ 	lock->magic = RWLOCK_MAGIC;
+ 	lock->owner = SPINLOCK_OWNER_INIT;
+ 	lock->owner_cpu = -1;
+ }
+ 
+-EXPORT_SYMBOL(__rwlock_init);
++EXPORT_SYMBOL(__raw_rwlock_init);
+ 
+-static void spin_bug(spinlock_t *lock, const char *msg)
++static void spin_bug(raw_spinlock_t *lock, const char *msg)
+ {
+ 	struct task_struct *owner = NULL;
+ 
+@@ -73,7 +73,7 @@ static void spin_bug(spinlock_t *lock, c
+ #define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
+ 
+ static inline void
+-debug_spin_lock_before(spinlock_t *lock)
++debug_spin_lock_before(raw_spinlock_t *lock)
+ {
+ 	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
+ 	SPIN_BUG_ON(lock->owner == current, lock, "recursion");
+@@ -81,13 +81,13 @@ debug_spin_lock_before(spinlock_t *lock)
+ 							lock, "cpu recursion");
+ }
+ 
+-static inline void debug_spin_lock_after(spinlock_t *lock)
++static inline void debug_spin_lock_after(raw_spinlock_t *lock)
+ {
+ 	lock->owner_cpu = raw_smp_processor_id();
+ 	lock->owner = current;
+ }
+ 
+-static inline void debug_spin_unlock(spinlock_t *lock)
++static inline void debug_spin_unlock(raw_spinlock_t *lock)
+ {
+ 	SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
+ 	SPIN_BUG_ON(!spin_is_locked(lock), lock, "already unlocked");
+@@ -98,7 +98,7 @@ static inline void debug_spin_unlock(spi
+ 	lock->owner_cpu = -1;
+ }
+ 
+-static void __spin_lock_debug(spinlock_t *lock)
++static void __spin_lock_debug(raw_spinlock_t *lock)
+ {
+ 	u64 i;
+ 	u64 loops = loops_per_jiffy * HZ;
+@@ -125,7 +125,7 @@ static void __spin_lock_debug(spinlock_t
+ 	}
+ }
+ 
+-void _raw_spin_lock(spinlock_t *lock)
++void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)
+ {
+ 	debug_spin_lock_before(lock);
+ 	if (unlikely(!__raw_spin_trylock(&lock->raw_lock)))
+@@ -133,7 +133,7 @@ void _raw_spin_lock(spinlock_t *lock)
+ 	debug_spin_lock_after(lock);
+ }
+ 
+-int _raw_spin_trylock(spinlock_t *lock)
++int __lockfunc _raw_spin_trylock(raw_spinlock_t *lock)
+ {
+ 	int ret = __raw_spin_trylock(&lock->raw_lock);
+ 
+@@ -148,13 +148,13 @@ int _raw_spin_trylock(spinlock_t *lock)
+ 	return ret;
+ }
+ 
+-void _raw_spin_unlock(spinlock_t *lock)
++void __lockfunc _raw_spin_unlock(raw_spinlock_t *lock)
+ {
+ 	debug_spin_unlock(lock);
+ 	__raw_spin_unlock(&lock->raw_lock);
+ }
+ 
+-static void rwlock_bug(rwlock_t *lock, const char *msg)
++static void rwlock_bug(raw_rwlock_t *lock, const char *msg)
+ {
+ 	if (!debug_locks_off())
+ 		return;
+@@ -167,8 +167,8 @@ static void rwlock_bug(rwlock_t *lock, c
+ 
+ #define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
+ 
+-#if 0		/* __write_lock_debug() can lock up - maybe this can too? */
+-static void __read_lock_debug(rwlock_t *lock)
++#if 1		/* __write_lock_debug() can lock up - maybe this can too? */
++static void __raw_read_lock_debug(raw_rwlock_t *lock)
+ {
+ 	u64 i;
+ 	u64 loops = loops_per_jiffy * HZ;
+@@ -193,13 +193,13 @@ static void __read_lock_debug(rwlock_t *
+ }
+ #endif
+ 
+-void _raw_read_lock(rwlock_t *lock)
++void __lockfunc _raw_read_lock(raw_rwlock_t *lock)
+ {
+ 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
+-	__raw_read_lock(&lock->raw_lock);
++	__raw_read_lock_debug(lock);
+ }
+ 
+-int _raw_read_trylock(rwlock_t *lock)
++int __lockfunc _raw_read_trylock(raw_rwlock_t *lock)
+ {
+ 	int ret = __raw_read_trylock(&lock->raw_lock);
+ 
+@@ -212,13 +212,13 @@ int _raw_read_trylock(rwlock_t *lock)
+ 	return ret;
+ }
+ 
+-void _raw_read_unlock(rwlock_t *lock)
++void __lockfunc _raw_read_unlock(raw_rwlock_t *lock)
+ {
+ 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
+ 	__raw_read_unlock(&lock->raw_lock);
+ }
+ 
+-static inline void debug_write_lock_before(rwlock_t *lock)
++static inline void debug_write_lock_before(raw_rwlock_t *lock)
+ {
+ 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
+ 	RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
+@@ -226,13 +226,13 @@ static inline void debug_write_lock_befo
+ 							lock, "cpu recursion");
+ }
+ 
+-static inline void debug_write_lock_after(rwlock_t *lock)
++static inline void debug_write_lock_after(raw_rwlock_t *lock)
+ {
+ 	lock->owner_cpu = raw_smp_processor_id();
+ 	lock->owner = current;
+ }
+ 
+-static inline void debug_write_unlock(rwlock_t *lock)
++static inline void debug_write_unlock(raw_rwlock_t *lock)
+ {
+ 	RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
+ 	RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
+@@ -242,8 +242,8 @@ static inline void debug_write_unlock(rw
+ 	lock->owner_cpu = -1;
+ }
+ 
+-#if 0		/* This can cause lockups */
+-static void __write_lock_debug(rwlock_t *lock)
++#if 1		/* This can cause lockups */
++static void __raw_write_lock_debug(raw_rwlock_t *lock)
+ {
+ 	u64 i;
+ 	u64 loops = loops_per_jiffy * HZ;
+@@ -268,14 +268,14 @@ static void __write_lock_debug(rwlock_t 
+ }
+ #endif
+ 
+-void _raw_write_lock(rwlock_t *lock)
++void __lockfunc _raw_write_lock(raw_rwlock_t *lock)
+ {
+ 	debug_write_lock_before(lock);
+-	__raw_write_lock(&lock->raw_lock);
++	__raw_write_lock_debug(lock);
+ 	debug_write_lock_after(lock);
+ }
+ 
+-int _raw_write_trylock(rwlock_t *lock)
++int __lockfunc _raw_write_trylock(raw_rwlock_t *lock)
+ {
+ 	int ret = __raw_write_trylock(&lock->raw_lock);
+ 
+@@ -290,7 +290,7 @@ int _raw_write_trylock(rwlock_t *lock)
+ 	return ret;
+ }
+ 
+-void _raw_write_unlock(rwlock_t *lock)
++void __lockfunc _raw_write_unlock(raw_rwlock_t *lock)
+ {
+ 	debug_write_unlock(lock);
+ 	__raw_write_unlock(&lock->raw_lock);
+Index: linux-2.6-tip/drivers/media/dvb/dvb-core/dvb_frontend.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/media/dvb/dvb-core/dvb_frontend.c
++++ linux-2.6-tip/drivers/media/dvb/dvb-core/dvb_frontend.c
+@@ -101,7 +101,7 @@ struct dvb_frontend_private {
+ 	struct dvb_device *dvbdev;
+ 	struct dvb_frontend_parameters parameters;
+ 	struct dvb_fe_events events;
+-	struct semaphore sem;
++	struct compat_semaphore sem;
+ 	struct list_head list_head;
+ 	wait_queue_head_t wait_queue;
+ 	struct task_struct *thread;
+Index: linux-2.6-tip/drivers/net/3c527.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/3c527.c
++++ linux-2.6-tip/drivers/net/3c527.c
+@@ -181,7 +181,7 @@ struct mc32_local
+ 
+ 	u16 rx_ring_tail;       /* index to rx de-queue end */
+ 
+-	struct semaphore cmd_mutex;    /* Serialises issuing of execute commands */
++	struct compat_semaphore cmd_mutex;    /* Serialises issuing of execute commands */
+         struct completion execution_cmd; /* Card has completed an execute command */
+ 	struct completion xceiver_cmd;   /* Card has completed a tx or rx command */
+ };
+Index: linux-2.6-tip/drivers/net/hamradio/6pack.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/hamradio/6pack.c
++++ linux-2.6-tip/drivers/net/hamradio/6pack.c
+@@ -120,7 +120,7 @@ struct sixpack {
+ 	struct timer_list	tx_t;
+ 	struct timer_list	resync_t;
+ 	atomic_t		refcnt;
+-	struct semaphore	dead_sem;
++	struct compat_semaphore	dead_sem;
+ 	spinlock_t		lock;
+ };
+ 
+Index: linux-2.6-tip/drivers/net/hamradio/mkiss.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/hamradio/mkiss.c
++++ linux-2.6-tip/drivers/net/hamradio/mkiss.c
+@@ -84,7 +84,7 @@ struct mkiss {
+ #define CRC_MODE_SMACK_TEST	4
+ 
+ 	atomic_t		refcnt;
+-	struct semaphore	dead_sem;
++	struct compat_semaphore	dead_sem;
+ };
+ 
+ /*---------------------------------------------------------------------------*/
+Index: linux-2.6-tip/drivers/net/ppp_async.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/ppp_async.c
++++ linux-2.6-tip/drivers/net/ppp_async.c
+@@ -67,7 +67,7 @@ struct asyncppp {
+ 	struct tasklet_struct tsk;
+ 
+ 	atomic_t	refcnt;
+-	struct semaphore dead_sem;
++	struct compat_semaphore dead_sem;
+ 	struct ppp_channel chan;	/* interface to generic ppp layer */
+ 	unsigned char	obuf[OBUFSIZE];
+ };
+Index: linux-2.6-tip/drivers/pci/hotplug/ibmphp_hpc.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/pci/hotplug/ibmphp_hpc.c
++++ linux-2.6-tip/drivers/pci/hotplug/ibmphp_hpc.c
+@@ -104,7 +104,7 @@ static int to_debug = 0;
+ static struct mutex sem_hpcaccess;	// lock access to HPC
+ static struct semaphore semOperations;	// lock all operations and
+ 					// access to data structures
+-static struct semaphore sem_exit;	// make sure polling thread goes away
++static struct compat_semaphore sem_exit;	// make sure polling thread goes away
+ static struct task_struct *ibmphp_poll_thread;
+ //----------------------------------------------------------------------------
+ // local function prototypes
+Index: linux-2.6-tip/drivers/scsi/aacraid/aacraid.h
+===================================================================
+--- linux-2.6-tip.orig/drivers/scsi/aacraid/aacraid.h
++++ linux-2.6-tip/drivers/scsi/aacraid/aacraid.h
+@@ -719,7 +719,7 @@ struct aac_fib_context {
+ 	u32			unique;		// unique value representing this context
+ 	ulong			jiffies;	// used for cleanup - dmb changed to ulong
+ 	struct list_head	next;		// used to link context's into a linked list
+-	struct semaphore	wait_sem;	// this is used to wait for the next fib to arrive.
++	struct compat_semaphore	wait_sem;	// this is used to wait for the next fib to arrive.
+ 	int			wait;		// Set to true when thread is in WaitForSingleObject
+ 	unsigned long		count;		// total number of FIBs on FibList
+ 	struct list_head	fib_list;	// this holds fibs and their attachd hw_fibs
+@@ -789,7 +789,7 @@ struct fib {
+ 	 *	This is the event the sendfib routine will wait on if the
+ 	 *	caller did not pass one and this is synch io.
+ 	 */
+-	struct semaphore	event_wait;
++	struct compat_semaphore	event_wait;
+ 	spinlock_t		event_lock;
+ 
+ 	u32			done;	/* gets set to 1 when fib is complete */
+Index: linux-2.6-tip/include/linux/parport.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/parport.h
++++ linux-2.6-tip/include/linux/parport.h
+@@ -264,7 +264,7 @@ enum ieee1284_phase {
+ struct ieee1284_info {
+ 	int mode;
+ 	volatile enum ieee1284_phase phase;
+-	struct semaphore irq;
++	struct compat_semaphore irq;
+ };
+ 
+ /* A parallel port */
+Index: linux-2.6-tip/include/asm-generic/tlb.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-generic/tlb.h
++++ linux-2.6-tip/include/asm-generic/tlb.h
+@@ -22,14 +22,8 @@
+  * and page free order so much..
+  */
+ #ifdef CONFIG_SMP
+-  #ifdef ARCH_FREE_PTR_NR
+-    #define FREE_PTR_NR   ARCH_FREE_PTR_NR
+-  #else
+-    #define FREE_PTE_NR	506
+-  #endif
+   #define tlb_fast_mode(tlb) ((tlb)->nr == ~0U)
+ #else
+-  #define FREE_PTE_NR	1
+   #define tlb_fast_mode(tlb) 1
+ #endif
+ 
+@@ -39,30 +33,48 @@
+ struct mmu_gather {
+ 	struct mm_struct	*mm;
+ 	unsigned int		nr;	/* set to ~0U means fast mode */
++	unsigned int		max; 	/* nr < max */
+ 	unsigned int		need_flush;/* Really unmapped some ptes? */
+ 	unsigned int		fullmm; /* non-zero means full mm flush */
+-	struct page *		pages[FREE_PTE_NR];
++#ifdef HAVE_ARCH_MMU_GATHER
++	struct arch_mmu_gather	arch;
++#endif
++	struct page **		pages;
++	struct page *		local[8];
+ };
+ 
+-/* Users of the generic TLB shootdown code must declare this storage space. */
+-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
++static inline void __tlb_alloc_pages(struct mmu_gather *tlb)
++{
++	unsigned long addr = __get_free_pages(GFP_ATOMIC, 0);
++
++	if (addr) {
++		tlb->pages = (void *)addr;
++		tlb->max = PAGE_SIZE / sizeof(struct page *);
++	}
++}
+ 
+ /* tlb_gather_mmu
+  *	Return a pointer to an initialized struct mmu_gather.
+  */
+-static inline struct mmu_gather *
+-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
++static inline void
++tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned int full_mm_flush)
+ {
+-	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+-
+ 	tlb->mm = mm;
+ 
+-	/* Use fast mode if only one CPU is online */
+-	tlb->nr = num_online_cpus() > 1 ? 0U : ~0U;
++	tlb->max = ARRAY_SIZE(tlb->local);
++	tlb->pages = tlb->local;
++
++	if (num_online_cpus() > 1) {
++		tlb->nr = 0;
++		__tlb_alloc_pages(tlb);
++	} else /* Use fast mode if only one CPU is online */
++		tlb->nr = ~0U;
+ 
+ 	tlb->fullmm = full_mm_flush;
+ 
+-	return tlb;
++#ifdef HAVE_ARCH_MMU_GATHER
++	tlb->arch = ARCH_MMU_GATHER_INIT;
++#endif
+ }
+ 
+ static inline void
+@@ -75,6 +87,8 @@ tlb_flush_mmu(struct mmu_gather *tlb, un
+ 	if (!tlb_fast_mode(tlb)) {
+ 		free_pages_and_swap_cache(tlb->pages, tlb->nr);
+ 		tlb->nr = 0;
++		if (tlb->pages == tlb->local)
++			__tlb_alloc_pages(tlb);
+ 	}
+ }
+ 
+@@ -90,7 +104,8 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
+ 	/* keep the page table cache within bounds */
+ 	check_pgt_cache();
+ 
+-	put_cpu_var(mmu_gathers);
++	if (tlb->pages != tlb->local)
++		free_pages((unsigned long)tlb->pages, 0);
+ }
+ 
+ /* tlb_remove_page
+@@ -106,7 +121,7 @@ static inline void tlb_remove_page(struc
+ 		return;
+ 	}
+ 	tlb->pages[tlb->nr++] = page;
+-	if (tlb->nr >= FREE_PTE_NR)
++	if (tlb->nr >= tlb->max)
+ 		tlb_flush_mmu(tlb, 0, 0);
+ }
+ 
+Index: linux-2.6-tip/mm/swap.c
+===================================================================
+--- linux-2.6-tip.orig/mm/swap.c
++++ linux-2.6-tip/mm/swap.c
+@@ -30,14 +30,49 @@
+ #include <linux/notifier.h>
+ #include <linux/backing-dev.h>
+ #include <linux/memcontrol.h>
++#include <linux/interrupt.h>
+ 
+ #include "internal.h"
+ 
+ /* How many pages do we try to swap or page in/out together? */
+ int page_cluster;
+ 
+-static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
+-static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
++/*
++ * On PREEMPT_RT we don't want to disable preemption for cpu variables.
++ * We grab a cpu and then use that cpu to lock the variables accordingly.
++ *
++ * (On !PREEMPT_RT this turns into normal preempt-off sections, as before.)
++ */
++static DEFINE_PER_CPU_LOCKED(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
++static DEFINE_PER_CPU_LOCKED(struct pagevec, lru_rotate_pvecs);
++
++#define swap_get_cpu_var_irq_save(var, flags, cpu)	\
++	({						\
++		(void)flags;				\
++		&get_cpu_var_locked(var, &cpu);		\
++	})
++
++#define swap_put_cpu_var_irq_restore(var, flags, cpu)	\
++	put_cpu_var_locked(var, cpu)
++
++#define swap_get_cpu_var(var, cpu) \
++	&get_cpu_var_locked(var, &cpu)
++
++#define swap_put_cpu_var(var, cpu)		\
++	put_cpu_var_locked(var, cpu)
++
++#define swap_per_cpu_lock(var, cpu)				\
++	({							\
++		spin_lock(&__get_cpu_lock(var, cpu));		\
++		&__get_cpu_var_locked(var, cpu);	\
++	})
++
++#define swap_per_cpu_unlock(var, cpu)			\
++		spin_unlock(&__get_cpu_lock(var, cpu));
++
++#define swap_get_cpu() raw_smp_processor_id()
++
++#define swap_put_cpu()
+ 
+ /*
+  * This path almost never happens for VM activity - pages are normally
+@@ -141,13 +176,13 @@ void  rotate_reclaimable_page(struct pag
+ 	    !PageUnevictable(page) && PageLRU(page)) {
+ 		struct pagevec *pvec;
+ 		unsigned long flags;
++		int cpu;
+ 
+ 		page_cache_get(page);
+-		local_irq_save(flags);
+-		pvec = &__get_cpu_var(lru_rotate_pvecs);
++		pvec = swap_get_cpu_var_irq_save(lru_rotate_pvecs, flags, cpu);
+ 		if (!pagevec_add(pvec, page))
+ 			pagevec_move_tail(pvec);
+-		local_irq_restore(flags);
++		swap_put_cpu_var_irq_restore(lru_rotate_pvecs, flags, cpu);
+ 	}
+ }
+ 
+@@ -216,12 +251,14 @@ EXPORT_SYMBOL(mark_page_accessed);
+ 
+ void __lru_cache_add(struct page *page, enum lru_list lru)
+ {
+-	struct pagevec *pvec = &get_cpu_var(lru_add_pvecs)[lru];
++	struct pagevec *pvec;
++	int cpu;
+ 
++	pvec = swap_get_cpu_var(lru_add_pvecs, cpu)[lru];
+ 	page_cache_get(page);
+ 	if (!pagevec_add(pvec, page))
+ 		____pagevec_lru_add(pvec, lru);
+-	put_cpu_var(lru_add_pvecs);
++	swap_put_cpu_var(lru_add_pvecs, cpu);
+ }
+ 
+ /**
+@@ -271,31 +308,36 @@ void add_page_to_unevictable_list(struct
+  */
+ static void drain_cpu_pagevecs(int cpu)
+ {
+-	struct pagevec *pvecs = per_cpu(lru_add_pvecs, cpu);
+-	struct pagevec *pvec;
++	struct pagevec *pvecs, *pvec;
+ 	int lru;
+ 
++	pvecs = swap_per_cpu_lock(lru_add_pvecs, cpu)[0];
+ 	for_each_lru(lru) {
+ 		pvec = &pvecs[lru - LRU_BASE];
+ 		if (pagevec_count(pvec))
+ 			____pagevec_lru_add(pvec, lru);
+ 	}
++	swap_per_cpu_unlock(lru_add_pvecs, cpu);
+ 
+-	pvec = &per_cpu(lru_rotate_pvecs, cpu);
++	pvec = swap_per_cpu_lock(lru_rotate_pvecs, cpu);
+ 	if (pagevec_count(pvec)) {
+ 		unsigned long flags;
+ 
+ 		/* No harm done if a racing interrupt already did this */
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		pagevec_move_tail(pvec);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 	}
++	swap_per_cpu_unlock(lru_rotate_pvecs, cpu);
+ }
+ 
+ void lru_add_drain(void)
+ {
+-	drain_cpu_pagevecs(get_cpu());
+-	put_cpu();
++	int cpu;
++
++	cpu = swap_get_cpu();
++	drain_cpu_pagevecs(cpu);
++	swap_put_cpu();
+ }
+ 
+ static void lru_add_drain_per_cpu(struct work_struct *dummy)
+@@ -369,7 +411,7 @@ void release_pages(struct page **pages, 
+ 			}
+ 			__pagevec_free(&pages_to_free);
+ 			pagevec_reinit(&pages_to_free);
+-  		}
++		}
+ 	}
+ 	if (zone)
+ 		spin_unlock_irqrestore(&zone->lru_lock, flags);
+Index: linux-2.6-tip/net/ipv4/netfilter/arp_tables.c
+===================================================================
+--- linux-2.6-tip.orig/net/ipv4/netfilter/arp_tables.c
++++ linux-2.6-tip/net/ipv4/netfilter/arp_tables.c
+@@ -239,7 +239,7 @@ unsigned int arpt_do_table(struct sk_buf
+ 
+ 	read_lock_bh(&table->lock);
+ 	private = table->private;
+-	table_base = (void *)private->entries[smp_processor_id()];
++	table_base = (void *)private->entries[raw_smp_processor_id()];
+ 	e = get_entry(table_base, private->hook_entry[hook]);
+ 	back = get_entry(table_base, private->underflow[hook]);
+ 
+@@ -1159,7 +1159,7 @@ static int do_add_counters(struct net *n
+ 
+ 	i = 0;
+ 	/* Choose the copy that is on our node */
+-	loc_cpu_entry = private->entries[smp_processor_id()];
++	loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ 	ARPT_ENTRY_ITERATE(loc_cpu_entry,
+ 			   private->size,
+ 			   add_counter_to_entry,
+Index: linux-2.6-tip/net/ipv4/netfilter/ip_tables.c
+===================================================================
+--- linux-2.6-tip.orig/net/ipv4/netfilter/ip_tables.c
++++ linux-2.6-tip/net/ipv4/netfilter/ip_tables.c
+@@ -350,7 +350,7 @@ ipt_do_table(struct sk_buff *skb,
+ 	read_lock_bh(&table->lock);
+ 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ 	private = table->private;
+-	table_base = (void *)private->entries[smp_processor_id()];
++	table_base = (void *)private->entries[raw_smp_processor_id()];
+ 	e = get_entry(table_base, private->hook_entry[hook]);
+ 
+ 	/* For return from builtin chain */
+Index: linux-2.6-tip/net/core/dev.c
+===================================================================
+--- linux-2.6-tip.orig/net/core/dev.c
++++ linux-2.6-tip/net/core/dev.c
+@@ -1878,42 +1878,52 @@ gso:
+ 	   Check this and shot the lock. It is not prone from deadlocks.
+ 	   Either shot noqueue qdisc, it is even simpler 8)
+ 	 */
+-	if (dev->flags & IFF_UP) {
+-		int cpu = smp_processor_id(); /* ok because BHs are off */
++	if (!(dev->flags & IFF_UP))
++		goto err;
+ 
+-		if (txq->xmit_lock_owner != cpu) {
++	/* Recursion is detected! It is possible, unfortunately: */
++	if (netif_tx_lock_recursion(txq))
++		goto err_recursion;
+ 
+-			HARD_TX_LOCK(dev, txq, cpu);
++	HARD_TX_LOCK(dev, txq);
+ 
+-			if (!netif_tx_queue_stopped(txq)) {
+-				rc = 0;
+-				if (!dev_hard_start_xmit(skb, dev, txq)) {
+-					HARD_TX_UNLOCK(dev, txq);
+-					goto out;
+-				}
+-			}
+-			HARD_TX_UNLOCK(dev, txq);
+-			if (net_ratelimit())
+-				printk(KERN_CRIT "Virtual device %s asks to "
+-				       "queue packet!\n", dev->name);
+-		} else {
+-			/* Recursion is detected! It is possible,
+-			 * unfortunately */
+-			if (net_ratelimit())
+-				printk(KERN_CRIT "Dead loop on virtual device "
+-				       "%s, fix it urgently!\n", dev->name);
+-		}
++	if (netif_tx_queue_stopped(txq))
++		goto err_tx_unlock;
++
++	if (dev_hard_start_xmit(skb, dev, txq))
++		goto err_tx_unlock;
++
++	rc = 0;
++	HARD_TX_UNLOCK(dev, txq);
++
++out:
++	rcu_read_unlock_bh();
++	return rc;
++
++err_recursion:
++	if (net_ratelimit()) {
++		printk(KERN_CRIT
++		       "Dead loop on virtual device %s, fix it urgently!\n",
++			dev->name);
++	}
++	goto err;
++
++err_tx_unlock:
++	HARD_TX_UNLOCK(dev, txq);
++
++	if (net_ratelimit()) {
++		printk(KERN_CRIT "Virtual device %s asks to queue packet!\n",
++			dev->name);
+ 	}
++	/* Fall through: */
+ 
++err:
+ 	rc = -ENETDOWN;
+ 	rcu_read_unlock_bh();
+ 
+ out_kfree_skb:
+ 	kfree_skb(skb);
+ 	return rc;
+-out:
+-	rcu_read_unlock_bh();
+-	return rc;
+ }
+ 
+ 
+@@ -1986,8 +1996,8 @@ int netif_rx_ni(struct sk_buff *skb)
+ {
+ 	int err;
+ 
+-	preempt_disable();
+ 	err = netif_rx(skb);
++	preempt_disable();
+ 	if (local_softirq_pending())
+ 		do_softirq();
+ 	preempt_enable();
+@@ -1999,7 +2009,8 @@ EXPORT_SYMBOL(netif_rx_ni);
+ 
+ static void net_tx_action(struct softirq_action *h)
+ {
+-	struct softnet_data *sd = &__get_cpu_var(softnet_data);
++	struct softnet_data *sd = &per_cpu(softnet_data,
++					   raw_smp_processor_id());
+ 
+ 	if (sd->completion_queue) {
+ 		struct sk_buff *clist;
+@@ -2015,6 +2026,11 @@ static void net_tx_action(struct softirq
+ 
+ 			WARN_ON(atomic_read(&skb->users));
+ 			__kfree_skb(skb);
++			/*
++			 * Safe to reschedule - the list is private
++			 * at this point.
++			 */
++			cond_resched_softirq_context();
+ 		}
+ 	}
+ 
+@@ -2033,6 +2049,22 @@ static void net_tx_action(struct softirq
+ 			head = head->next_sched;
+ 
+ 			root_lock = qdisc_lock(q);
++			/*
++			 * We are executing in softirq context here, and
++			 * if softirqs are preemptible, we must avoid
++			 * infinite reactivation of the softirq by
++			 * either the tx handler, or by netif_schedule().
++			 * (it would result in an infinitely looping
++			 *  softirq context)
++			 * So we take the spinlock unconditionally.
++			 */
++#ifdef CONFIG_PREEMPT_SOFTIRQS
++			spin_lock(root_lock);
++			smp_mb__before_clear_bit();
++			clear_bit(__QDISC_STATE_SCHED, &q->state);
++			qdisc_run(q);
++			spin_unlock(root_lock);
++#else
+ 			if (spin_trylock(root_lock)) {
+ 				smp_mb__before_clear_bit();
+ 				clear_bit(__QDISC_STATE_SCHED,
+@@ -2049,6 +2081,7 @@ static void net_tx_action(struct softirq
+ 						  &q->state);
+ 				}
+ 			}
++#endif
+ 		}
+ 	}
+ }
+@@ -2257,7 +2290,7 @@ int netif_receive_skb(struct sk_buff *sk
+ 			skb->dev = orig_dev->master;
+ 	}
+ 
+-	__get_cpu_var(netdev_rx_stat).total++;
++	per_cpu(netdev_rx_stat, raw_smp_processor_id()).total++;
+ 
+ 	skb_reset_network_header(skb);
+ 	skb_reset_transport_header(skb);
+@@ -2578,9 +2611,10 @@ EXPORT_SYMBOL(napi_gro_frags);
+ static int process_backlog(struct napi_struct *napi, int quota)
+ {
+ 	int work = 0;
+-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
++	struct softnet_data *queue;
+ 	unsigned long start_time = jiffies;
+ 
++	queue = &per_cpu(softnet_data, raw_smp_processor_id());
+ 	napi->weight = weight_p;
+ 	do {
+ 		struct sk_buff *skb;
+@@ -2612,7 +2646,7 @@ void __napi_schedule(struct napi_struct 
+ 
+ 	local_irq_save(flags);
+ 	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
+-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
++	raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ 	local_irq_restore(flags);
+ }
+ EXPORT_SYMBOL(__napi_schedule);
+@@ -2760,7 +2794,7 @@ out:
+ 
+ softnet_break:
+ 	__get_cpu_var(netdev_rx_stat).time_squeeze++;
+-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
++	raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ 	goto out;
+ }
+ 
+@@ -4231,7 +4265,7 @@ static void __netdev_init_queue_locks_on
+ {
+ 	spin_lock_init(&dev_queue->_xmit_lock);
+ 	netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
+-	dev_queue->xmit_lock_owner = -1;
++	dev_queue->xmit_lock_owner = (void *)-1;
+ }
+ 
+ static void netdev_init_queue_locks(struct net_device *dev)
+Index: linux-2.6-tip/fs/buffer.c
+===================================================================
+--- linux-2.6-tip.orig/fs/buffer.c
++++ linux-2.6-tip/fs/buffer.c
+@@ -40,7 +40,6 @@
+ #include <linux/cpu.h>
+ #include <linux/bitops.h>
+ #include <linux/mpage.h>
+-#include <linux/bit_spinlock.h>
+ 
+ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
+ 
+@@ -469,8 +468,7 @@ static void end_buffer_async_read(struct
+ 	 * decide that the page is now completely done.
+ 	 */
+ 	first = page_buffers(page);
+-	local_irq_save(flags);
+-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
++	spin_lock_irqsave(&first->b_uptodate_lock, flags);
+ 	clear_buffer_async_read(bh);
+ 	unlock_buffer(bh);
+ 	tmp = bh;
+@@ -483,8 +481,7 @@ static void end_buffer_async_read(struct
+ 		}
+ 		tmp = tmp->b_this_page;
+ 	} while (tmp != bh);
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
++	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
+ 
+ 	/*
+ 	 * If none of the buffers had errors and they are all
+@@ -496,8 +493,7 @@ static void end_buffer_async_read(struct
+ 	return;
+ 
+ still_busy:
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
++	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
+ 	return;
+ }
+ 
+@@ -532,8 +528,7 @@ static void end_buffer_async_write(struc
+ 	}
+ 
+ 	first = page_buffers(page);
+-	local_irq_save(flags);
+-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
++	spin_lock_irqsave(&first->b_uptodate_lock, flags);
+ 
+ 	clear_buffer_async_write(bh);
+ 	unlock_buffer(bh);
+@@ -545,14 +540,12 @@ static void end_buffer_async_write(struc
+ 		}
+ 		tmp = tmp->b_this_page;
+ 	}
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
++	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
+ 	end_page_writeback(page);
+ 	return;
+ 
+ still_busy:
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
++	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
+ 	return;
+ }
+ 
+@@ -3311,6 +3304,8 @@ struct buffer_head *alloc_buffer_head(gf
+ 	struct buffer_head *ret = kmem_cache_alloc(bh_cachep, gfp_flags);
+ 	if (ret) {
+ 		INIT_LIST_HEAD(&ret->b_assoc_buffers);
++		spin_lock_init(&ret->b_uptodate_lock);
++		spin_lock_init(&ret->b_state_lock);
+ 		get_cpu_var(bh_accounting).nr++;
+ 		recalc_bh_state();
+ 		put_cpu_var(bh_accounting);
+@@ -3322,6 +3317,8 @@ EXPORT_SYMBOL(alloc_buffer_head);
+ void free_buffer_head(struct buffer_head *bh)
+ {
+ 	BUG_ON(!list_empty(&bh->b_assoc_buffers));
++	BUG_ON(spin_is_locked(&bh->b_uptodate_lock));
++	BUG_ON(spin_is_locked(&bh->b_state_lock));
+ 	kmem_cache_free(bh_cachep, bh);
+ 	get_cpu_var(bh_accounting).nr--;
+ 	recalc_bh_state();
+Index: linux-2.6-tip/fs/ntfs/aops.c
+===================================================================
+--- linux-2.6-tip.orig/fs/ntfs/aops.c
++++ linux-2.6-tip/fs/ntfs/aops.c
+@@ -29,6 +29,7 @@
+ #include <linux/buffer_head.h>
+ #include <linux/writeback.h>
+ #include <linux/bit_spinlock.h>
++#include <linux/interrupt.h>
+ 
+ #include "aops.h"
+ #include "attrib.h"
+@@ -107,8 +108,7 @@ static void ntfs_end_buffer_async_read(s
+ 				"0x%llx.", (unsigned long long)bh->b_blocknr);
+ 	}
+ 	first = page_buffers(page);
+-	local_irq_save(flags);
+-	bit_spin_lock(BH_Uptodate_Lock, &first->b_state);
++	spin_lock_irqsave(&first->b_uptodate_lock, flags);
+ 	clear_buffer_async_read(bh);
+ 	unlock_buffer(bh);
+ 	tmp = bh;
+@@ -123,8 +123,7 @@ static void ntfs_end_buffer_async_read(s
+ 		}
+ 		tmp = tmp->b_this_page;
+ 	} while (tmp != bh);
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
++	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
+ 	/*
+ 	 * If none of the buffers had errors then we can set the page uptodate,
+ 	 * but we first have to perform the post read mst fixups, if the
+@@ -145,13 +144,13 @@ static void ntfs_end_buffer_async_read(s
+ 		recs = PAGE_CACHE_SIZE / rec_size;
+ 		/* Should have been verified before we got here... */
+ 		BUG_ON(!recs);
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		kaddr = kmap_atomic(page, KM_BIO_SRC_IRQ);
+ 		for (i = 0; i < recs; i++)
+ 			post_read_mst_fixup((NTFS_RECORD*)(kaddr +
+ 					i * rec_size), rec_size);
+ 		kunmap_atomic(kaddr, KM_BIO_SRC_IRQ);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 		flush_dcache_page(page);
+ 		if (likely(page_uptodate && !PageError(page)))
+ 			SetPageUptodate(page);
+@@ -159,8 +158,7 @@ static void ntfs_end_buffer_async_read(s
+ 	unlock_page(page);
+ 	return;
+ still_busy:
+-	bit_spin_unlock(BH_Uptodate_Lock, &first->b_state);
+-	local_irq_restore(flags);
++	spin_unlock_irqrestore(&first->b_uptodate_lock, flags);
+ 	return;
+ }
+ 
+Index: linux-2.6-tip/include/linux/buffer_head.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/buffer_head.h
++++ linux-2.6-tip/include/linux/buffer_head.h
+@@ -21,10 +21,6 @@ enum bh_state_bits {
+ 	BH_Dirty,	/* Is dirty */
+ 	BH_Lock,	/* Is locked */
+ 	BH_Req,		/* Has been submitted for I/O */
+-	BH_Uptodate_Lock,/* Used by the first bh in a page, to serialise
+-			  * IO completion of other buffers in the page
+-			  */
+-
+ 	BH_Mapped,	/* Has a disk mapping */
+ 	BH_New,		/* Disk mapping was newly created by get_block */
+ 	BH_Async_Read,	/* Is under end_buffer_async_read I/O */
+@@ -74,6 +70,8 @@ struct buffer_head {
+ 	struct address_space *b_assoc_map;	/* mapping this buffer is
+ 						   associated with */
+ 	atomic_t b_count;		/* users using this buffer_head */
++	spinlock_t b_uptodate_lock;
++	spinlock_t b_state_lock;
+ };
+ 
+ /*
+Index: linux-2.6-tip/include/linux/jbd.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/jbd.h
++++ linux-2.6-tip/include/linux/jbd.h
+@@ -260,6 +260,15 @@ void buffer_assertion_failure(struct buf
+ #define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
+ #endif
+ 
++/*
++ * For assertions that are only valid on SMP (e.g. spin_is_locked()):
++ */
++#ifdef CONFIG_SMP
++# define J_ASSERT_JH_SMP(jh, expr)	J_ASSERT_JH(jh, expr)
++#else
++# define J_ASSERT_JH_SMP(jh, assert)	do { } while (0)
++#endif
++
+ #if defined(JBD_PARANOID_IOFAIL)
+ #define J_EXPECT(expr, why...)		J_ASSERT(expr)
+ #define J_EXPECT_BH(bh, expr, why...)	J_ASSERT_BH(bh, expr)
+@@ -315,32 +324,32 @@ static inline struct journal_head *bh2jh
+ 
+ static inline void jbd_lock_bh_state(struct buffer_head *bh)
+ {
+-	bit_spin_lock(BH_State, &bh->b_state);
++	spin_lock(&bh->b_state_lock);
+ }
+ 
+ static inline int jbd_trylock_bh_state(struct buffer_head *bh)
+ {
+-	return bit_spin_trylock(BH_State, &bh->b_state);
++	return spin_trylock(&bh->b_state_lock);
+ }
+ 
+ static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
+ {
+-	return bit_spin_is_locked(BH_State, &bh->b_state);
++	return spin_is_locked(&bh->b_state_lock);
+ }
+ 
+ static inline void jbd_unlock_bh_state(struct buffer_head *bh)
+ {
+-	bit_spin_unlock(BH_State, &bh->b_state);
++	spin_unlock(&bh->b_state_lock);
+ }
+ 
+ static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
+ {
+-	bit_spin_lock(BH_JournalHead, &bh->b_state);
++	spin_lock_irq(&bh->b_uptodate_lock);
+ }
+ 
+ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
+ {
+-	bit_spin_unlock(BH_JournalHead, &bh->b_state);
++	spin_unlock_irq(&bh->b_uptodate_lock);
+ }
+ 
+ struct jbd_revoke_table_s;
+Index: linux-2.6-tip/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6-tip.orig/fs/jbd/transaction.c
++++ linux-2.6-tip/fs/jbd/transaction.c
+@@ -1582,7 +1582,7 @@ static void __journal_temp_unlink_buffer
+ 	transaction_t *transaction;
+ 	struct buffer_head *bh = jh2bh(jh);
+ 
+-	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
++	J_ASSERT_JH_SMP(jh, jbd_is_locked_bh_state(bh));
+ 	transaction = jh->b_transaction;
+ 	if (transaction)
+ 		assert_spin_locked(&transaction->t_journal->j_list_lock);
+@@ -2077,7 +2077,7 @@ void __journal_file_buffer(struct journa
+ 	int was_dirty = 0;
+ 	struct buffer_head *bh = jh2bh(jh);
+ 
+-	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
++	J_ASSERT_JH_SMP(jh, jbd_is_locked_bh_state(bh));
+ 	assert_spin_locked(&transaction->t_journal->j_list_lock);
+ 
+ 	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
+@@ -2166,7 +2166,7 @@ void __journal_refile_buffer(struct jour
+ 	int was_dirty;
+ 	struct buffer_head *bh = jh2bh(jh);
+ 
+-	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
++	J_ASSERT_JH_SMP(jh, jbd_is_locked_bh_state(bh));
+ 	if (jh->b_transaction)
+ 		assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
+ 
+Index: linux-2.6-tip/fs/proc/stat.c
+===================================================================
+--- linux-2.6-tip.orig/fs/proc/stat.c
++++ linux-2.6-tip/fs/proc/stat.c
+@@ -23,13 +23,14 @@ static int show_stat(struct seq_file *p,
+ {
+ 	int i, j;
+ 	unsigned long jif;
+-	cputime64_t user, nice, system, idle, iowait, irq, softirq, steal;
++	cputime64_t user_rt, user, nice, system_rt, system, idle,
++		    iowait, irq, softirq, steal;
+ 	cputime64_t guest;
+ 	u64 sum = 0;
+ 	struct timespec boottime;
+ 	unsigned int per_irq_sum;
+ 
+-	user = nice = system = idle = iowait =
++	user_rt = user = nice = system_rt = system = idle = iowait =
+ 		irq = softirq = steal = cputime64_zero;
+ 	guest = cputime64_zero;
+ 	getboottime(&boottime);
+@@ -44,6 +45,8 @@ static int show_stat(struct seq_file *p,
+ 		irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq);
+ 		softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq);
+ 		steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal);
++		user_rt = cputime64_add(user_rt, kstat_cpu(i).cpustat.user_rt);
++		system_rt = cputime64_add(system_rt, kstat_cpu(i).cpustat.system_rt);
+ 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
+ 		for_each_irq_nr(j) {
+ 			sum += kstat_irqs_cpu(j, i);
+@@ -52,7 +55,10 @@ static int show_stat(struct seq_file *p,
+ 	}
+ 	sum += arch_irq_stat();
+ 
+-	seq_printf(p, "cpu  %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
++	user = cputime64_add(user_rt, user);
++	system = cputime64_add(system_rt, system);
++
++	seq_printf(p, "cpu  %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+ 		(unsigned long long)cputime64_to_clock_t(user),
+ 		(unsigned long long)cputime64_to_clock_t(nice),
+ 		(unsigned long long)cputime64_to_clock_t(system),
+@@ -61,13 +67,17 @@ static int show_stat(struct seq_file *p,
+ 		(unsigned long long)cputime64_to_clock_t(irq),
+ 		(unsigned long long)cputime64_to_clock_t(softirq),
+ 		(unsigned long long)cputime64_to_clock_t(steal),
++		(unsigned long long)cputime64_to_clock_t(user_rt),
++		(unsigned long long)cputime64_to_clock_t(system_rt),
+ 		(unsigned long long)cputime64_to_clock_t(guest));
+ 	for_each_online_cpu(i) {
+ 
+ 		/* Copy values here to work around gcc-2.95.3, gcc-2.96 */
+-		user = kstat_cpu(i).cpustat.user;
++		user_rt = kstat_cpu(i).cpustat.user_rt;
++		system_rt = kstat_cpu(i).cpustat.system_rt;
++		user = cputime64_add(user_rt, kstat_cpu(i).cpustat.user);
+ 		nice = kstat_cpu(i).cpustat.nice;
+-		system = kstat_cpu(i).cpustat.system;
++ 		system = cputime64_add(system_rt, kstat_cpu(i).cpustat.system);
+ 		idle = kstat_cpu(i).cpustat.idle;
+ 		iowait = kstat_cpu(i).cpustat.iowait;
+ 		irq = kstat_cpu(i).cpustat.irq;
+@@ -75,7 +85,7 @@ static int show_stat(struct seq_file *p,
+ 		steal = kstat_cpu(i).cpustat.steal;
+ 		guest = kstat_cpu(i).cpustat.guest;
+ 		seq_printf(p,
+-			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
++			"cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n",
+ 			i,
+ 			(unsigned long long)cputime64_to_clock_t(user),
+ 			(unsigned long long)cputime64_to_clock_t(nice),
+@@ -85,6 +95,8 @@ static int show_stat(struct seq_file *p,
+ 			(unsigned long long)cputime64_to_clock_t(irq),
+ 			(unsigned long long)cputime64_to_clock_t(softirq),
+ 			(unsigned long long)cputime64_to_clock_t(steal),
++			(unsigned long long)cputime64_to_clock_t(user_rt),
++			(unsigned long long)cputime64_to_clock_t(system_rt),
+ 			(unsigned long long)cputime64_to_clock_t(guest));
+ 	}
+ 	seq_printf(p, "intr %llu", (unsigned long long)sum);
+Index: linux-2.6-tip/drivers/net/3c59x.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/3c59x.c
++++ linux-2.6-tip/drivers/net/3c59x.c
+@@ -791,9 +791,9 @@ static void poll_vortex(struct net_devic
+ {
+ 	struct vortex_private *vp = netdev_priv(dev);
+ 	unsigned long flags;
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	(vp->full_bus_master_rx ? boomerang_interrupt:vortex_interrupt)(dev->irq,dev);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ }
+ #endif
+ 
+@@ -1739,6 +1739,7 @@ vortex_timer(unsigned long data)
+ 	int next_tick = 60*HZ;
+ 	int ok = 0;
+ 	int media_status, old_window;
++	unsigned long flags;
+ 
+ 	if (vortex_debug > 2) {
+ 		printk(KERN_DEBUG "%s: Media selection timer tick happened, %s.\n",
+@@ -1746,7 +1747,7 @@ vortex_timer(unsigned long data)
+ 		printk(KERN_DEBUG "dev->watchdog_timeo=%d\n", dev->watchdog_timeo);
+ 	}
+ 
+-	disable_irq_lockdep(dev->irq);
++	spin_lock_irqsave(&vp->lock, flags);
+ 	old_window = ioread16(ioaddr + EL3_CMD) >> 13;
+ 	EL3WINDOW(4);
+ 	media_status = ioread16(ioaddr + Wn4_Media);
+@@ -1769,10 +1770,7 @@ vortex_timer(unsigned long data)
+ 	case XCVR_MII: case XCVR_NWAY:
+ 		{
+ 			ok = 1;
+-			/* Interrupts are already disabled */
+-			spin_lock(&vp->lock);
+ 			vortex_check_media(dev, 0);
+-			spin_unlock(&vp->lock);
+ 		}
+ 		break;
+ 	  default:					/* Other media types handled by Tx timeouts. */
+@@ -1828,7 +1826,7 @@ leave_media_alone:
+ 			 dev->name, media_tbl[dev->if_port].name);
+ 
+ 	EL3WINDOW(old_window);
+-	enable_irq_lockdep(dev->irq);
++	spin_unlock_irqrestore(&vp->lock, flags);
+ 	mod_timer(&vp->timer, RUN_AT(next_tick));
+ 	if (vp->deferred)
+ 		iowrite16(FakeIntr, ioaddr + EL3_CMD);
+@@ -1862,12 +1860,12 @@ static void vortex_tx_timeout(struct net
+ 			 * Block interrupts because vortex_interrupt does a bare spin_lock()
+ 			 */
+ 			unsigned long flags;
+-			local_irq_save(flags);
++			local_irq_save_nort(flags);
+ 			if (vp->full_bus_master_tx)
+ 				boomerang_interrupt(dev->irq, dev);
+ 			else
+ 				vortex_interrupt(dev->irq, dev);
+-			local_irq_restore(flags);
++			local_irq_restore_nort(flags);
+ 		}
+ 	}
+ 
+Index: linux-2.6-tip/drivers/serial/8250.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/serial/8250.c
++++ linux-2.6-tip/drivers/serial/8250.c
+@@ -30,6 +30,7 @@
+ #include <linux/sysrq.h>
+ #include <linux/delay.h>
+ #include <linux/platform_device.h>
++#include <linux/rt_lock.h>
+ #include <linux/tty.h>
+ #include <linux/tty_flip.h>
+ #include <linux/serial_reg.h>
+@@ -1546,7 +1547,10 @@ static irqreturn_t serial8250_interrupt(
+ {
+ 	struct irq_info *i = dev_id;
+ 	struct list_head *l, *end = NULL;
+-	int pass_counter = 0, handled = 0;
++#ifndef CONFIG_PREEMPT_RT
++	int pass_counter = 0;
++#endif
++	int handled = 0;
+ 
+ 	DEBUG_INTR("serial8250_interrupt(%d)...", irq);
+ 
+@@ -1584,12 +1588,18 @@ static irqreturn_t serial8250_interrupt(
+ 
+ 		l = l->next;
+ 
++		/*
++		 * On preempt-rt we can be preempted and run in our
++		 * own thread.
++		 */
++#ifndef CONFIG_PREEMPT_RT
+ 		if (l == i->head && pass_counter++ > PASS_LIMIT) {
+ 			/* If we hit this, we're dead. */
+ 			printk(KERN_ERR "serial8250: too much work for "
+ 				"irq%d\n", irq);
+ 			break;
+ 		}
++#endif
+ 	} while (l != end);
+ 
+ 	spin_unlock(&i->lock);
+@@ -2707,14 +2717,10 @@ serial8250_console_write(struct console 
+ 
+ 	touch_nmi_watchdog();
+ 
+-	local_irq_save(flags);
+-	if (up->port.sysrq) {
+-		/* serial8250_handle_port() already took the lock */
+-		locked = 0;
+-	} else if (oops_in_progress) {
+-		locked = spin_trylock(&up->port.lock);
+-	} else
+-		spin_lock(&up->port.lock);
++	if (up->port.sysrq || oops_in_progress || preempt_rt)
++		locked = spin_trylock_irqsave(&up->port.lock, flags);
++	else
++		spin_lock_irqsave(&up->port.lock, flags);
+ 
+ 	/*
+ 	 *	First save the IER then disable the interrupts
+@@ -2746,8 +2752,7 @@ serial8250_console_write(struct console 
+ 		check_modem_status(up);
+ 
+ 	if (locked)
+-		spin_unlock(&up->port.lock);
+-	local_irq_restore(flags);
++		spin_unlock_irqrestore(&up->port.lock, flags);
+ }
+ 
+ static int __init serial8250_console_setup(struct console *co, char *options)
+Index: linux-2.6-tip/drivers/char/tty_buffer.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/char/tty_buffer.c
++++ linux-2.6-tip/drivers/char/tty_buffer.c
+@@ -482,10 +482,14 @@ void tty_flip_buffer_push(struct tty_str
+ 		tty->buf.tail->commit = tty->buf.tail->used;
+ 	spin_unlock_irqrestore(&tty->buf.lock, flags);
+ 
++#ifndef CONFIG_PREEMPT_RT
+ 	if (tty->low_latency)
+ 		flush_to_ldisc(&tty->buf.work.work);
+ 	else
+ 		schedule_delayed_work(&tty->buf.work, 1);
++#else
++	flush_to_ldisc(&tty->buf.work.work);
++#endif
+ }
+ EXPORT_SYMBOL(tty_flip_buffer_push);
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/vgtod.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/vgtod.h
++++ linux-2.6-tip/arch/x86/include/asm/vgtod.h
+@@ -5,7 +5,7 @@
+ #include <linux/clocksource.h>
+ 
+ struct vsyscall_gtod_data {
+-	seqlock_t	lock;
++	raw_seqlock_t	lock;
+ 
+ 	/* open coded 'struct timespec' */
+ 	time_t		wall_time_sec;
+Index: linux-2.6-tip/arch/x86/include/asm/i8253.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/i8253.h
++++ linux-2.6-tip/arch/x86/include/asm/i8253.h
+@@ -6,7 +6,7 @@
+ #define PIT_CH0			0x40
+ #define PIT_CH2			0x42
+ 
+-extern spinlock_t i8253_lock;
++extern raw_spinlock_t i8253_lock;
+ 
+ extern struct clock_event_device *global_clock_event;
+ 
+Index: linux-2.6-tip/arch/x86/include/asm/pci_x86.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/pci_x86.h
++++ linux-2.6-tip/arch/x86/include/asm/pci_x86.h
+@@ -83,7 +83,7 @@ struct irq_routing_table {
+ extern unsigned int pcibios_irq_mask;
+ 
+ extern int pcibios_scanned;
+-extern spinlock_t pci_config_lock;
++extern raw_spinlock_t pci_config_lock;
+ 
+ extern int (*pcibios_enable_irq)(struct pci_dev *dev);
+ extern void (*pcibios_disable_irq)(struct pci_dev *dev);
+Index: linux-2.6-tip/arch/x86/include/asm/xor_32.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/include/asm/xor_32.h
++++ linux-2.6-tip/arch/x86/include/asm/xor_32.h
+@@ -865,7 +865,21 @@ static struct xor_block_template xor_blo
+ #include <asm-generic/xor.h>
+ 
+ #undef XOR_TRY_TEMPLATES
+-#define XOR_TRY_TEMPLATES				\
++/*
++ * MMX/SSE ops disable preemption for long periods of time,
++ * so on PREEMPT_RT use the register-based ops only:
++ */
++#ifdef CONFIG_PREEMPT_RT
++# define XOR_TRY_TEMPLATES				\
++	do {						\
++		xor_speed(&xor_block_8regs);		\
++		xor_speed(&xor_block_8regs_p);		\
++		xor_speed(&xor_block_32regs);		\
++		xor_speed(&xor_block_32regs_p);		\
++	} while (0)
++# define XOR_SELECT_TEMPLATE(FASTEST) (FASTEST)
++#else
++# define XOR_TRY_TEMPLATES				\
+ do {							\
+ 	xor_speed(&xor_block_8regs);			\
+ 	xor_speed(&xor_block_8regs_p);			\
+@@ -882,7 +896,8 @@ do {							\
+ /* We force the use of the SSE xor block because it can write around L2.
+    We may also be able to load into the L1 only depending on how the cpu
+    deals with a load to a line that is being prefetched.  */
+-#define XOR_SELECT_TEMPLATE(FASTEST)			\
++# define XOR_SELECT_TEMPLATE(FASTEST)			\
+ 	(cpu_has_xmm ? &xor_block_pIII_sse : FASTEST)
++#endif /* CONFIG_PREEMPT_RT */
+ 
+ #endif /* _ASM_X86_XOR_32_H */
+Index: linux-2.6-tip/arch/x86/kernel/dumpstack_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kernel/dumpstack_32.c
++++ linux-2.6-tip/arch/x86/kernel/dumpstack_32.c
+@@ -93,6 +93,12 @@ show_stack_log_lvl(struct task_struct *t
+ }
+ 
+ 
++#if defined(CONFIG_DEBUG_STACKOVERFLOW) && defined(CONFIG_EVENT_TRACE)
++extern unsigned long worst_stack_left;
++#else
++# define worst_stack_left -1L
++#endif
++
+ void show_registers(struct pt_regs *regs)
+ {
+ 	int i;
+Index: linux-2.6-tip/arch/x86/pci/direct.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/pci/direct.c
++++ linux-2.6-tip/arch/x86/pci/direct.c
+@@ -223,16 +223,23 @@ static int __init pci_check_type1(void)
+ 	unsigned int tmp;
+ 	int works = 0;
+ 
+-	local_irq_save(flags);
++	spin_lock_irqsave(&pci_config_lock, flags);
+ 
+ 	outb(0x01, 0xCFB);
+ 	tmp = inl(0xCF8);
+ 	outl(0x80000000, 0xCF8);
+-	if (inl(0xCF8) == 0x80000000 && pci_sanity_check(&pci_direct_conf1)) {
+-		works = 1;
++
++	if (inl(0xCF8) == 0x80000000) {
++		spin_unlock_irqrestore(&pci_config_lock, flags);
++
++		if (pci_sanity_check(&pci_direct_conf1))
++			works = 1;
++
++		spin_lock_irqsave(&pci_config_lock, flags);
+ 	}
+ 	outl(tmp, 0xCF8);
+-	local_irq_restore(flags);
++
++	spin_unlock_irqrestore(&pci_config_lock, flags);
+ 
+ 	return works;
+ }
+@@ -242,17 +249,19 @@ static int __init pci_check_type2(void)
+ 	unsigned long flags;
+ 	int works = 0;
+ 
+-	local_irq_save(flags);
++	spin_lock_irqsave(&pci_config_lock, flags);
+ 
+ 	outb(0x00, 0xCFB);
+ 	outb(0x00, 0xCF8);
+ 	outb(0x00, 0xCFA);
+-	if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00 &&
+-	    pci_sanity_check(&pci_direct_conf2)) {
+-		works = 1;
+-	}
+ 
+-	local_irq_restore(flags);
++	if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) {
++		spin_unlock_irqrestore(&pci_config_lock, flags);
++
++		if (pci_sanity_check(&pci_direct_conf2))
++			works = 1;
++	} else
++		spin_unlock_irqrestore(&pci_config_lock, flags);
+ 
+ 	return works;
+ }
+Index: linux-2.6-tip/kernel/sched_cpupri.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/sched_cpupri.h
++++ linux-2.6-tip/kernel/sched_cpupri.h
+@@ -12,7 +12,7 @@
+ /* values 2-101 are RT priorities 0-99 */
+ 
+ struct cpupri_vec {
+-	spinlock_t lock;
++	raw_spinlock_t lock;
+ 	int        count;
+ 	cpumask_var_t mask;
+ };
+Index: linux-2.6-tip/include/linux/profile.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/profile.h
++++ linux-2.6-tip/include/linux/profile.h
+@@ -4,14 +4,16 @@
+ #include <linux/kernel.h>
+ #include <linux/init.h>
+ #include <linux/cpumask.h>
++#include <linux/kernel_stat.h>
+ #include <linux/cache.h>
+ 
+ #include <asm/errno.h>
+ 
+-#define CPU_PROFILING	1
+-#define SCHED_PROFILING	2
+-#define SLEEP_PROFILING	3
+-#define KVM_PROFILING	4
++#define CPU_PROFILING		1
++#define SCHED_PROFILING		2
++#define SLEEP_PROFILING		3
++#define KVM_PROFILING		4
++#define PREEMPT_PROFILING	5
+ 
+ struct proc_dir_entry;
+ struct pt_regs;
+@@ -36,6 +38,8 @@ enum profile_type {
+ 	PROFILE_MUNMAP
+ };
+ 
++extern int prof_pid;
++
+ #ifdef CONFIG_PROFILING
+ 
+ extern int prof_on __read_mostly;
+Index: linux-2.6-tip/include/linux/radix-tree.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/radix-tree.h
++++ linux-2.6-tip/include/linux/radix-tree.h
+@@ -167,7 +167,18 @@ radix_tree_gang_lookup_slot(struct radix
+ 			unsigned long first_index, unsigned int max_items);
+ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
+ 				unsigned long index, unsigned long max_scan);
++/*
++ * On a mutex based kernel we can freely schedule within the radix code:
++ */
++#ifdef CONFIG_PREEMPT_RT
++static inline int radix_tree_preload(gfp_t gfp_mask)
++{
++	return 0;
++}
++#else
+ int radix_tree_preload(gfp_t gfp_mask);
++#endif
++
+ void radix_tree_init(void);
+ void *radix_tree_tag_set(struct radix_tree_root *root,
+ 			unsigned long index, unsigned int tag);
+@@ -187,7 +198,9 @@ int radix_tree_tagged(struct radix_tree_
+ 
+ static inline void radix_tree_preload_end(void)
+ {
++#ifndef CONFIG_PREEMPT_RT
+ 	preempt_enable();
++#endif
+ }
+ 
+ #endif /* _LINUX_RADIX_TREE_H */
+Index: linux-2.6-tip/include/linux/smp_lock.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/smp_lock.h
++++ linux-2.6-tip/include/linux/smp_lock.h
+@@ -45,7 +45,7 @@ static inline void cycle_kernel_lock(voi
+ #define unlock_kernel()				do { } while(0)
+ #define release_kernel_lock(task)		do { } while(0)
+ #define cycle_kernel_lock()			do { } while(0)
+-#define reacquire_kernel_lock(task)		0
++#define reacquire_kernel_lock(task)		do { } while(0)
+ #define kernel_locked()				1
+ 
+ #endif /* CONFIG_LOCK_KERNEL */
+Index: linux-2.6-tip/include/linux/workqueue.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/workqueue.h
++++ linux-2.6-tip/include/linux/workqueue.h
+@@ -190,6 +190,9 @@ __create_workqueue_key(const char *name,
+ #define create_freezeable_workqueue(name) __create_workqueue((name), 1, 1, 0)
+ #define create_singlethread_workqueue(name) __create_workqueue((name), 1, 0, 0)
+ 
++extern void set_workqueue_prio(struct workqueue_struct *wq, int policy,
++			       int rt_priority, int nice);
++
+ extern void destroy_workqueue(struct workqueue_struct *wq);
+ 
+ extern int queue_work(struct workqueue_struct *wq, struct work_struct *work);
+Index: linux-2.6-tip/kernel/notifier.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/notifier.c
++++ linux-2.6-tip/kernel/notifier.c
+@@ -71,7 +71,7 @@ static int notifier_chain_unregister(str
+  *	@returns:	notifier_call_chain returns the value returned by the
+  *			last notifier function called.
+  */
+-static int __kprobes notifier_call_chain(struct notifier_block **nl,
++static int __kprobes notrace notifier_call_chain(struct notifier_block **nl,
+ 					unsigned long val, void *v,
+ 					int nr_to_call,	int *nr_calls)
+ {
+@@ -217,7 +217,7 @@ int blocking_notifier_chain_register(str
+ 	 * not yet working and interrupts must remain disabled.  At
+ 	 * such times we must not call down_write().
+ 	 */
+-	if (unlikely(system_state == SYSTEM_BOOTING))
++	if (unlikely(system_state < SYSTEM_RUNNING))
+ 		return notifier_chain_register(&nh->head, n);
+ 
+ 	down_write(&nh->rwsem);
+Index: linux-2.6-tip/kernel/user.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/user.c
++++ linux-2.6-tip/kernel/user.c
+@@ -297,14 +297,14 @@ static void cleanup_user_struct(struct w
+ 	 */
+ 	uids_mutex_lock();
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 
+ 	if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
+ 		uid_hash_remove(up);
+ 		remove_user = 1;
+ 		spin_unlock_irqrestore(&uidhash_lock, flags);
+ 	} else {
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 	}
+ 
+ 	if (!remove_user)
+@@ -405,11 +405,11 @@ void free_uid(struct user_struct *up)
+ 	if (!up)
+ 		return;
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
+ 		free_user(up, flags);
+ 	else
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ }
+ 
+ struct user_struct *alloc_uid(struct user_namespace *ns, uid_t uid)
+Index: linux-2.6-tip/lib/radix-tree.c
+===================================================================
+--- linux-2.6-tip.orig/lib/radix-tree.c
++++ linux-2.6-tip/lib/radix-tree.c
+@@ -157,12 +157,14 @@ radix_tree_node_alloc(struct radix_tree_
+ 		 * succeed in getting a node here (and never reach
+ 		 * kmem_cache_alloc)
+ 		 */
++		rtp = &get_cpu_var(radix_tree_preloads);
+ 		rtp = &__get_cpu_var(radix_tree_preloads);
+ 		if (rtp->nr) {
+ 			ret = rtp->nodes[rtp->nr - 1];
+ 			rtp->nodes[rtp->nr - 1] = NULL;
+ 			rtp->nr--;
+ 		}
++		put_cpu_var(radix_tree_preloads);
+ 	}
+ 	if (ret == NULL)
+ 		ret = kmem_cache_alloc(radix_tree_node_cachep, gfp_mask);
+@@ -195,6 +197,8 @@ radix_tree_node_free(struct radix_tree_n
+ 	call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
+ }
+ 
++#ifndef CONFIG_PREEMPT_RT
++
+ /*
+  * Load up this CPU's radix_tree_node buffer with sufficient objects to
+  * ensure that the addition of a single element in the tree cannot fail.  On
+@@ -227,6 +231,8 @@ out:
+ }
+ EXPORT_SYMBOL(radix_tree_preload);
+ 
++#endif
++
+ /*
+  *	Return the maximum key which can be store into a
+  *	radix tree with height HEIGHT.
+Index: linux-2.6-tip/net/ipv4/proc.c
+===================================================================
+--- linux-2.6-tip.orig/net/ipv4/proc.c
++++ linux-2.6-tip/net/ipv4/proc.c
+@@ -54,8 +54,8 @@ static int sockstat_seq_show(struct seq_
+ 	int orphans, sockets;
+ 
+ 	local_bh_disable();
+-	orphans = percpu_counter_sum_positive(&tcp_orphan_count),
+-	sockets = percpu_counter_sum_positive(&tcp_sockets_allocated),
++	orphans = percpu_counter_sum_positive(&tcp_orphan_count);
++	sockets = percpu_counter_sum_positive(&tcp_sockets_allocated);
+ 	local_bh_enable();
+ 
+ 	socket_seq_show(seq);
+Index: linux-2.6-tip/include/linux/netdevice.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netdevice.h
++++ linux-2.6-tip/include/linux/netdevice.h
+@@ -439,7 +439,7 @@ struct netdev_queue {
+ 	struct Qdisc		*qdisc;
+ 	unsigned long		state;
+ 	spinlock_t		_xmit_lock;
+-	int			xmit_lock_owner;
++	void			*xmit_lock_owner;
+ 	struct Qdisc		*qdisc_sleeping;
+ } ____cacheline_aligned_in_smp;
+ 
+@@ -1625,35 +1625,43 @@ static inline void netif_rx_complete(str
+ 	napi_complete(napi);
+ }
+ 
+-static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
++static inline void __netif_tx_lock(struct netdev_queue *txq)
+ {
+ 	spin_lock(&txq->_xmit_lock);
+-	txq->xmit_lock_owner = cpu;
++	txq->xmit_lock_owner = (void *)current;
++}
++
++/*
++ * Do we hold the xmit_lock already?
++ */
++static inline int netif_tx_lock_recursion(struct netdev_queue *txq)
++{
++	return txq->xmit_lock_owner == (void *)current;
+ }
+ 
+ static inline void __netif_tx_lock_bh(struct netdev_queue *txq)
+ {
+ 	spin_lock_bh(&txq->_xmit_lock);
+-	txq->xmit_lock_owner = smp_processor_id();
++	txq->xmit_lock_owner = (void *)current;
+ }
+ 
+ static inline int __netif_tx_trylock(struct netdev_queue *txq)
+ {
+ 	int ok = spin_trylock(&txq->_xmit_lock);
+ 	if (likely(ok))
+-		txq->xmit_lock_owner = smp_processor_id();
++		txq->xmit_lock_owner = (void *)current;
+ 	return ok;
+ }
+ 
+ static inline void __netif_tx_unlock(struct netdev_queue *txq)
+ {
+-	txq->xmit_lock_owner = -1;
++	txq->xmit_lock_owner = (void *)-1;
+ 	spin_unlock(&txq->_xmit_lock);
+ }
+ 
+ static inline void __netif_tx_unlock_bh(struct netdev_queue *txq)
+ {
+-	txq->xmit_lock_owner = -1;
++	txq->xmit_lock_owner = (void *)-1;
+ 	spin_unlock_bh(&txq->_xmit_lock);
+ }
+ 
+@@ -1666,10 +1674,8 @@ static inline void __netif_tx_unlock_bh(
+ static inline void netif_tx_lock(struct net_device *dev)
+ {
+ 	unsigned int i;
+-	int cpu;
+ 
+ 	spin_lock(&dev->tx_global_lock);
+-	cpu = smp_processor_id();
+ 	for (i = 0; i < dev->num_tx_queues; i++) {
+ 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+ 
+@@ -1679,7 +1685,7 @@ static inline void netif_tx_lock(struct 
+ 		 * the ->hard_start_xmit() handler and already
+ 		 * checked the frozen bit.
+ 		 */
+-		__netif_tx_lock(txq, cpu);
++		__netif_tx_lock(txq);
+ 		set_bit(__QUEUE_STATE_FROZEN, &txq->state);
+ 		__netif_tx_unlock(txq);
+ 	}
+@@ -1715,9 +1721,9 @@ static inline void netif_tx_unlock_bh(st
+ 	local_bh_enable();
+ }
+ 
+-#define HARD_TX_LOCK(dev, txq, cpu) {			\
++#define HARD_TX_LOCK(dev, txq) {			\
+ 	if ((dev->features & NETIF_F_LLTX) == 0) {	\
+-		__netif_tx_lock(txq, cpu);		\
++		__netif_tx_lock(txq);			\
+ 	}						\
+ }
+ 
+@@ -1730,14 +1736,12 @@ static inline void netif_tx_unlock_bh(st
+ static inline void netif_tx_disable(struct net_device *dev)
+ {
+ 	unsigned int i;
+-	int cpu;
+ 
+ 	local_bh_disable();
+-	cpu = smp_processor_id();
+ 	for (i = 0; i < dev->num_tx_queues; i++) {
+ 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+ 
+-		__netif_tx_lock(txq, cpu);
++		__netif_tx_lock(txq);
+ 		netif_tx_stop_queue(txq);
+ 		__netif_tx_unlock(txq);
+ 	}
+Index: linux-2.6-tip/include/net/dn_dev.h
+===================================================================
+--- linux-2.6-tip.orig/include/net/dn_dev.h
++++ linux-2.6-tip/include/net/dn_dev.h
+@@ -76,9 +76,9 @@ struct dn_dev_parms {
+ 	int priority;             /* Priority to be a router            */
+ 	char *name;               /* Name for sysctl                    */
+ 	int ctl_name;             /* Index for sysctl                   */
+-	int  (*up)(struct net_device *);
+-	void (*down)(struct net_device *);
+-	void (*timer3)(struct net_device *, struct dn_ifaddr *ifa);
++	int  (*dn_up)(struct net_device *);
++	void (*dn_down)(struct net_device *);
++	void (*dn_timer3)(struct net_device *, struct dn_ifaddr *ifa);
+ 	void *sysctl;
+ };
+ 
+Index: linux-2.6-tip/net/core/netpoll.c
+===================================================================
+--- linux-2.6-tip.orig/net/core/netpoll.c
++++ linux-2.6-tip/net/core/netpoll.c
+@@ -68,20 +68,20 @@ static void queue_process(struct work_st
+ 
+ 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ 
+-		local_irq_save(flags);
+-		__netif_tx_lock(txq, smp_processor_id());
++		local_irq_save_nort(flags);
++		__netif_tx_lock(txq);
+ 		if (netif_tx_queue_stopped(txq) ||
+ 		    netif_tx_queue_frozen(txq) ||
+ 		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
+ 			skb_queue_head(&npinfo->txq, skb);
+ 			__netif_tx_unlock(txq);
+-			local_irq_restore(flags);
++			local_irq_restore_nort(flags);
+ 
+ 			schedule_delayed_work(&npinfo->tx_work, HZ/10);
+ 			return;
+ 		}
+ 		__netif_tx_unlock(txq);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 	}
+ }
+ 
+@@ -151,7 +151,7 @@ static void poll_napi(struct net_device 
+ 	int budget = 16;
+ 
+ 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
+-		if (napi->poll_owner != smp_processor_id() &&
++		if (napi->poll_owner != raw_smp_processor_id() &&
+ 		    spin_trylock(&napi->poll_lock)) {
+ 			budget = poll_one_napi(dev->npinfo, napi, budget);
+ 			spin_unlock(&napi->poll_lock);
+@@ -208,30 +208,35 @@ static void refill_skbs(void)
+ 
+ static void zap_completion_queue(void)
+ {
+-	unsigned long flags;
+ 	struct softnet_data *sd = &get_cpu_var(softnet_data);
++	struct sk_buff *clist = NULL;
++	unsigned long flags;
+ 
+ 	if (sd->completion_queue) {
+-		struct sk_buff *clist;
+ 
+ 		local_irq_save(flags);
+ 		clist = sd->completion_queue;
+ 		sd->completion_queue = NULL;
+ 		local_irq_restore(flags);
+-
+-		while (clist != NULL) {
+-			struct sk_buff *skb = clist;
+-			clist = clist->next;
+-			if (skb->destructor) {
+-				atomic_inc(&skb->users);
+-				dev_kfree_skb_any(skb); /* put this one back */
+-			} else {
+-				__kfree_skb(skb);
+-			}
+-		}
+ 	}
+ 
++
++	/*
++	 * Took the list private, can drop our softnet
++	 * reference:
++	 */
+ 	put_cpu_var(softnet_data);
++
++	while (clist != NULL) {
++		struct sk_buff *skb = clist;
++		clist = clist->next;
++		if (skb->destructor) {
++			atomic_inc(&skb->users);
++			dev_kfree_skb_any(skb); /* put this one back */
++		} else {
++			__kfree_skb(skb);
++		}
++	}
+ }
+ 
+ static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
+@@ -239,13 +244,26 @@ static struct sk_buff *find_skb(struct n
+ 	int count = 0;
+ 	struct sk_buff *skb;
+ 
++#ifdef CONFIG_PREEMPT_RT
++	/*
++	 * On -rt skb_pool.lock is schedulable, so if we are
++	 * in an atomic context we just try to dequeue from the
++	 * pool and fail if we cannot get one.
++	 */
++	if (in_atomic() || irqs_disabled())
++		goto pick_atomic;
++#endif
+ 	zap_completion_queue();
+ 	refill_skbs();
+ repeat:
+ 
+ 	skb = alloc_skb(len, GFP_ATOMIC);
+-	if (!skb)
++	if (!skb) {
++#ifdef CONFIG_PREEMPT_RT
++pick_atomic:
++#endif
+ 		skb = skb_dequeue(&skb_pool);
++	}
+ 
+ 	if (!skb) {
+ 		if (++count < 10) {
+@@ -265,7 +283,7 @@ static int netpoll_owner_active(struct n
+ 	struct napi_struct *napi;
+ 
+ 	list_for_each_entry(napi, &dev->napi_list, dev_list) {
+-		if (napi->poll_owner == smp_processor_id())
++		if (napi->poll_owner == raw_smp_processor_id())
+ 			return 1;
+ 	}
+ 	return 0;
+@@ -291,7 +309,7 @@ static void netpoll_send_skb(struct netp
+ 
+ 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ 
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		/* try until next clock tick */
+ 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
+ 		     tries > 0; --tries) {
+@@ -310,7 +328,7 @@ static void netpoll_send_skb(struct netp
+ 
+ 			udelay(USEC_PER_POLL);
+ 		}
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 	}
+ 
+ 	if (status != NETDEV_TX_OK) {
+@@ -731,7 +749,7 @@ int netpoll_setup(struct netpoll *np)
+ 				       np->name);
+ 				break;
+ 			}
+-			cond_resched();
++			schedule_timeout_uninterruptible(1);
+ 		}
+ 
+ 		/* If carrier appears to come up instantly, we don't
+Index: linux-2.6-tip/net/decnet/dn_dev.c
+===================================================================
+--- linux-2.6-tip.orig/net/decnet/dn_dev.c
++++ linux-2.6-tip/net/decnet/dn_dev.c
+@@ -90,9 +90,9 @@ static struct dn_dev_parms dn_dev_list[]
+ 	.t3 =		10,
+ 	.name =		"ethernet",
+ 	.ctl_name =	NET_DECNET_CONF_ETHER,
+-	.up =		dn_eth_up,
+-	.down = 	dn_eth_down,
+-	.timer3 =	dn_send_brd_hello,
++	.dn_up =		dn_eth_up,
++	.dn_down = 	dn_eth_down,
++	.dn_timer3 =	dn_send_brd_hello,
+ },
+ {
+ 	.type =		ARPHRD_IPGRE, /* DECnet tunneled over GRE in IP */
+@@ -102,7 +102,7 @@ static struct dn_dev_parms dn_dev_list[]
+ 	.t3 =		10,
+ 	.name =		"ipgre",
+ 	.ctl_name =	NET_DECNET_CONF_GRE,
+-	.timer3 =	dn_send_brd_hello,
++	.dn_timer3 =	dn_send_brd_hello,
+ },
+ #if 0
+ {
+@@ -113,7 +113,7 @@ static struct dn_dev_parms dn_dev_list[]
+ 	.t3 =		120,
+ 	.name =		"x25",
+ 	.ctl_name =	NET_DECNET_CONF_X25,
+-	.timer3 =	dn_send_ptp_hello,
++	.dn_timer3 =	dn_send_ptp_hello,
+ },
+ #endif
+ #if 0
+@@ -125,7 +125,7 @@ static struct dn_dev_parms dn_dev_list[]
+ 	.t3 =		10,
+ 	.name =		"ppp",
+ 	.ctl_name =	NET_DECNET_CONF_PPP,
+-	.timer3 =	dn_send_brd_hello,
++	.dn_timer3 =	dn_send_brd_hello,
+ },
+ #endif
+ {
+@@ -136,7 +136,7 @@ static struct dn_dev_parms dn_dev_list[]
+ 	.t3 =		120,
+ 	.name =		"ddcmp",
+ 	.ctl_name =	NET_DECNET_CONF_DDCMP,
+-	.timer3 =	dn_send_ptp_hello,
++	.dn_timer3 =	dn_send_ptp_hello,
+ },
+ {
+ 	.type =		ARPHRD_LOOPBACK, /* Loopback interface - always last */
+@@ -146,7 +146,7 @@ static struct dn_dev_parms dn_dev_list[]
+ 	.t3 =		10,
+ 	.name =		"loopback",
+ 	.ctl_name =	NET_DECNET_CONF_LOOPBACK,
+-	.timer3 =	dn_send_brd_hello,
++	.dn_timer3 =	dn_send_brd_hello,
+ }
+ };
+ 
+@@ -305,11 +305,11 @@ static int dn_forwarding_proc(ctl_table 
+ 		 */
+ 		tmp = dn_db->parms.forwarding;
+ 		dn_db->parms.forwarding = old;
+-		if (dn_db->parms.down)
+-			dn_db->parms.down(dev);
++		if (dn_db->parms.dn_down)
++			dn_db->parms.dn_down(dev);
+ 		dn_db->parms.forwarding = tmp;
+-		if (dn_db->parms.up)
+-			dn_db->parms.up(dev);
++		if (dn_db->parms.dn_up)
++			dn_db->parms.dn_up(dev);
+ 	}
+ 
+ 	return err;
+@@ -343,11 +343,11 @@ static int dn_forwarding_sysctl(ctl_tabl
+ 		if (value > 2)
+ 			return -EINVAL;
+ 
+-		if (dn_db->parms.down)
+-			dn_db->parms.down(dev);
++		if (dn_db->parms.dn_down)
++			dn_db->parms.dn_down(dev);
+ 		dn_db->parms.forwarding = value;
+-		if (dn_db->parms.up)
+-			dn_db->parms.up(dev);
++		if (dn_db->parms.dn_up)
++			dn_db->parms.dn_up(dev);
+ 	}
+ 
+ 	return 0;
+@@ -1078,10 +1078,10 @@ static void dn_dev_timer_func(unsigned l
+ 	struct dn_ifaddr *ifa;
+ 
+ 	if (dn_db->t3 <= dn_db->parms.t2) {
+-		if (dn_db->parms.timer3) {
++		if (dn_db->parms.dn_timer3) {
+ 			for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) {
+ 				if (!(ifa->ifa_flags & IFA_F_SECONDARY))
+-					dn_db->parms.timer3(dev, ifa);
++					dn_db->parms.dn_timer3(dev, ifa);
+ 			}
+ 		}
+ 		dn_db->t3 = dn_db->parms.t3;
+@@ -1140,8 +1140,8 @@ static struct dn_dev *dn_dev_create(stru
+ 		return NULL;
+ 	}
+ 
+-	if (dn_db->parms.up) {
+-		if (dn_db->parms.up(dev) < 0) {
++	if (dn_db->parms.dn_up) {
++		if (dn_db->parms.dn_up(dev) < 0) {
+ 			neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
+ 			dev->dn_ptr = NULL;
+ 			kfree(dn_db);
+@@ -1235,8 +1235,8 @@ static void dn_dev_delete(struct net_dev
+ 	dn_dev_check_default(dev);
+ 	neigh_ifdown(&dn_neigh_table, dev);
+ 
+-	if (dn_db->parms.down)
+-		dn_db->parms.down(dev);
++	if (dn_db->parms.dn_down)
++		dn_db->parms.dn_down(dev);
+ 
+ 	dev->dn_ptr = NULL;
+ 
+Index: linux-2.6-tip/net/ipv4/icmp.c
+===================================================================
+--- linux-2.6-tip.orig/net/ipv4/icmp.c
++++ linux-2.6-tip/net/ipv4/icmp.c
+@@ -201,7 +201,10 @@ static const struct icmp_control icmp_po
+  */
+ static struct sock *icmp_sk(struct net *net)
+ {
+-	return net->ipv4.icmp_sk[smp_processor_id()];
++	/*
++	 * Should be safe on PREEMPT_SOFTIRQS/HARDIRQS to use raw-smp-processor-id:
++	 */
++	return net->ipv4.icmp_sk[raw_smp_processor_id()];
+ }
+ 
+ static inline struct sock *icmp_xmit_lock(struct net *net)
+Index: linux-2.6-tip/net/ipv6/netfilter/ip6_tables.c
+===================================================================
+--- linux-2.6-tip.orig/net/ipv6/netfilter/ip6_tables.c
++++ linux-2.6-tip/net/ipv6/netfilter/ip6_tables.c
+@@ -376,7 +376,7 @@ ip6t_do_table(struct sk_buff *skb,
+ 	read_lock_bh(&table->lock);
+ 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ 	private = table->private;
+-	table_base = (void *)private->entries[smp_processor_id()];
++	table_base = (void *)private->entries[raw_smp_processor_id()];
+ 	e = get_entry(table_base, private->hook_entry[hook]);
+ 
+ 	/* For return from builtin chain */
+Index: linux-2.6-tip/net/sched/sch_generic.c
+===================================================================
+--- linux-2.6-tip.orig/net/sched/sch_generic.c
++++ linux-2.6-tip/net/sched/sch_generic.c
+@@ -12,6 +12,7 @@
+  */
+ 
+ #include <linux/bitops.h>
++#include <linux/kallsyms.h>
+ #include <linux/module.h>
+ #include <linux/types.h>
+ #include <linux/kernel.h>
+@@ -24,6 +25,7 @@
+ #include <linux/init.h>
+ #include <linux/rcupdate.h>
+ #include <linux/list.h>
++#include <linux/delay.h>
+ #include <net/pkt_sched.h>
+ 
+ /* Main transmission queue. */
+@@ -78,7 +80,7 @@ static inline int handle_dev_cpu_collisi
+ {
+ 	int ret;
+ 
+-	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
++	if (unlikely(netif_tx_lock_recursion(dev_queue))) {
+ 		/*
+ 		 * Same CPU holding the lock. It may be a transient
+ 		 * configuration error, when hard_start_xmit() recurses. We
+@@ -95,7 +97,9 @@ static inline int handle_dev_cpu_collisi
+ 		 * Another cpu is holding lock, requeue & delay xmits for
+ 		 * some time.
+ 		 */
++		preempt_disable(); /* FIXME: we need an _rt version of this */
+ 		__get_cpu_var(netdev_rx_stat).cpu_collision++;
++		preempt_enable();
+ 		ret = dev_requeue_skb(skb, q);
+ 	}
+ 
+@@ -141,7 +145,7 @@ static inline int qdisc_restart(struct Q
+ 	dev = qdisc_dev(q);
+ 	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ 
+-	HARD_TX_LOCK(dev, txq, smp_processor_id());
++	HARD_TX_LOCK(dev, txq);
+ 	if (!netif_tx_queue_stopped(txq) &&
+ 	    !netif_tx_queue_frozen(txq))
+ 		ret = dev_hard_start_xmit(skb, dev, txq);
+@@ -691,9 +695,12 @@ void dev_deactivate(struct net_device *d
+ 	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
+ 	synchronize_rcu();
+ 
+-	/* Wait for outstanding qdisc_run calls. */
++	/*
++	 * Wait for outstanding qdisc_run calls.
++	 * TODO: shouldnt this be wakeup-based, instead of polling it?
++	 */
+ 	while (some_qdisc_is_busy(dev))
+-		yield();
++		msleep(1);
+ }
+ 
+ static void dev_init_scheduler_queue(struct net_device *dev,
+Index: linux-2.6-tip/drivers/net/bnx2.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/bnx2.c
++++ linux-2.6-tip/drivers/net/bnx2.c
+@@ -2662,7 +2662,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2
+ 
+ 	if (unlikely(netif_tx_queue_stopped(txq)) &&
+ 		     (bnx2_tx_avail(bp, txr) > bp->tx_wake_thresh)) {
+-		__netif_tx_lock(txq, smp_processor_id());
++		__netif_tx_lock(txq);
+ 		if ((netif_tx_queue_stopped(txq)) &&
+ 		    (bnx2_tx_avail(bp, txr) > bp->tx_wake_thresh))
+ 			netif_tx_wake_queue(txq);
+Index: linux-2.6-tip/drivers/net/mv643xx_eth.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/mv643xx_eth.c
++++ linux-2.6-tip/drivers/net/mv643xx_eth.c
+@@ -484,7 +484,7 @@ static void txq_maybe_wake(struct tx_que
+ 	struct netdev_queue *nq = netdev_get_tx_queue(mp->dev, txq->index);
+ 
+ 	if (netif_tx_queue_stopped(nq)) {
+-		__netif_tx_lock(nq, smp_processor_id());
++		__netif_tx_lock(nq);
+ 		if (txq->tx_ring_size - txq->tx_desc_count >= MAX_SKB_FRAGS + 1)
+ 			netif_tx_wake_queue(nq);
+ 		__netif_tx_unlock(nq);
+@@ -838,7 +838,7 @@ static void txq_kick(struct tx_queue *tx
+ 	u32 hw_desc_ptr;
+ 	u32 expected_ptr;
+ 
+-	__netif_tx_lock(nq, smp_processor_id());
++	__netif_tx_lock(nq);
+ 
+ 	if (rdlp(mp, TXQ_COMMAND) & (1 << txq->index))
+ 		goto out;
+@@ -862,7 +862,7 @@ static int txq_reclaim(struct tx_queue *
+ 	struct netdev_queue *nq = netdev_get_tx_queue(mp->dev, txq->index);
+ 	int reclaimed;
+ 
+-	__netif_tx_lock(nq, smp_processor_id());
++	__netif_tx_lock(nq);
+ 
+ 	reclaimed = 0;
+ 	while (reclaimed < budget && txq->tx_desc_count > 0) {
+Index: linux-2.6-tip/drivers/net/niu.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/niu.c
++++ linux-2.6-tip/drivers/net/niu.c
+@@ -3519,7 +3519,7 @@ static void niu_tx_work(struct niu *np, 
+ out:
+ 	if (unlikely(netif_tx_queue_stopped(txq) &&
+ 		     (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp)))) {
+-		__netif_tx_lock(txq, smp_processor_id());
++		__netif_tx_lock(txq);
+ 		if (netif_tx_queue_stopped(txq) &&
+ 		    (niu_tx_avail(rp) > NIU_TX_WAKEUP_THRESH(rp)))
+ 			netif_tx_wake_queue(txq);
+Index: linux-2.6-tip/block/blk-core.c
+===================================================================
+--- linux-2.6-tip.orig/block/blk-core.c
++++ linux-2.6-tip/block/blk-core.c
+@@ -212,7 +212,7 @@ EXPORT_SYMBOL(blk_dump_rq_flags);
+  */
+ void blk_plug_device(struct request_queue *q)
+ {
+-	WARN_ON(!irqs_disabled());
++	WARN_ON_NONRT(!irqs_disabled());
+ 
+ 	/*
+ 	 * don't plug a stopped queue, it must be paired with blk_start_queue()
+@@ -252,7 +252,7 @@ EXPORT_SYMBOL(blk_plug_device_unlocked);
+  */
+ int blk_remove_plug(struct request_queue *q)
+ {
+-	WARN_ON(!irqs_disabled());
++	WARN_ON_NONRT(!irqs_disabled());
+ 
+ 	if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q))
+ 		return 0;
+@@ -362,7 +362,7 @@ static void blk_invoke_request_fn(struct
+  **/
+ void blk_start_queue(struct request_queue *q)
+ {
+-	WARN_ON(!irqs_disabled());
++	WARN_ON_NONRT(!irqs_disabled());
+ 
+ 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+ 	blk_invoke_request_fn(q);
+Index: linux-2.6-tip/fs/aio.c
+===================================================================
+--- linux-2.6-tip.orig/fs/aio.c
++++ linux-2.6-tip/fs/aio.c
+@@ -622,9 +622,11 @@ static void use_mm(struct mm_struct *mm)
+ 	task_lock(tsk);
+ 	active_mm = tsk->active_mm;
+ 	atomic_inc(&mm->mm_count);
++	local_irq_disable(); // FIXME
++	switch_mm(active_mm, mm, tsk);
+ 	tsk->mm = mm;
+ 	tsk->active_mm = mm;
+-	switch_mm(active_mm, mm, tsk);
++	local_irq_enable();
+ 	task_unlock(tsk);
+ 
+ 	mmdrop(active_mm);
+Index: linux-2.6-tip/fs/file.c
+===================================================================
+--- linux-2.6-tip.orig/fs/file.c
++++ linux-2.6-tip/fs/file.c
+@@ -102,14 +102,15 @@ void free_fdtable_rcu(struct rcu_head *r
+ 		kfree(fdt->open_fds);
+ 		kfree(fdt);
+ 	} else {
+-		fddef = &get_cpu_var(fdtable_defer_list);
++
++		fddef = &per_cpu(fdtable_defer_list, raw_smp_processor_id());
++
+ 		spin_lock(&fddef->lock);
+ 		fdt->next = fddef->next;
+ 		fddef->next = fdt;
+ 		/* vmallocs are handled from the workqueue context */
+ 		schedule_work(&fddef->wq);
+ 		spin_unlock(&fddef->lock);
+-		put_cpu_var(fdtable_defer_list);
+ 	}
+ }
+ 
+Index: linux-2.6-tip/fs/notify/dnotify/dnotify.c
+===================================================================
+--- linux-2.6-tip.orig/fs/notify/dnotify/dnotify.c
++++ linux-2.6-tip/fs/notify/dnotify/dnotify.c
+@@ -170,7 +170,7 @@ void dnotify_parent(struct dentry *dentr
+ 
+ 	spin_lock(&dentry->d_lock);
+ 	parent = dentry->d_parent;
+-	if (parent->d_inode->i_dnotify_mask & event) {
++	if (unlikely(parent->d_inode->i_dnotify_mask & event)) {
+ 		dget(parent);
+ 		spin_unlock(&dentry->d_lock);
+ 		__inode_dir_notify(parent->d_inode, event);
+Index: linux-2.6-tip/fs/pipe.c
+===================================================================
+--- linux-2.6-tip.orig/fs/pipe.c
++++ linux-2.6-tip/fs/pipe.c
+@@ -386,8 +386,14 @@ redo:
+ 		wake_up_interruptible_sync(&pipe->wait);
+ 		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
+ 	}
++	/*
++	 * Hack: we turn off atime updates for -RT kernels.
++	 * Who uses them on pipes anyway?
++	 */
++#ifndef CONFIG_PREEMPT_RT
+ 	if (ret > 0)
+ 		file_accessed(filp);
++#endif
+ 	return ret;
+ }
+ 
+@@ -559,8 +565,14 @@ out:
+ 		wake_up_interruptible_sync(&pipe->wait);
+ 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ 	}
++	/*
++	 * Hack: we turn off atime updates for -RT kernels.
++	 * Who uses them on pipes anyway?
++	 */
++#ifndef CONFIG_PREEMPT_RT
+ 	if (ret > 0)
+ 		file_update_time(filp);
++#endif
+ 	return ret;
+ }
+ 
+Index: linux-2.6-tip/fs/proc/task_mmu.c
+===================================================================
+--- linux-2.6-tip.orig/fs/proc/task_mmu.c
++++ linux-2.6-tip/fs/proc/task_mmu.c
+@@ -137,8 +137,10 @@ static void *m_start(struct seq_file *m,
+ 	vma = NULL;
+ 	if ((unsigned long)l < mm->map_count) {
+ 		vma = mm->mmap;
+-		while (l-- && vma)
++		while (l-- && vma) {
+ 			vma = vma->vm_next;
++			cond_resched();
++		}
+ 		goto out;
+ 	}
+ 
+Index: linux-2.6-tip/fs/xfs/linux-2.6/mrlock.h
+===================================================================
+--- linux-2.6-tip.orig/fs/xfs/linux-2.6/mrlock.h
++++ linux-2.6-tip/fs/xfs/linux-2.6/mrlock.h
+@@ -21,7 +21,7 @@
+ #include <linux/rwsem.h>
+ 
+ typedef struct {
+-	struct rw_semaphore	mr_lock;
++	struct compat_rw_semaphore	mr_lock;
+ #ifdef DEBUG
+ 	int			mr_writer;
+ #endif
+Index: linux-2.6-tip/fs/xfs/xfs_mount.h
+===================================================================
+--- linux-2.6-tip.orig/fs/xfs/xfs_mount.h
++++ linux-2.6-tip/fs/xfs/xfs_mount.h
+@@ -275,7 +275,7 @@ typedef struct xfs_mount {
+ 	uint			m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */
+ 	uint			m_in_maxlevels;	/* XFS_IN_MAXLEVELS */
+ 	struct xfs_perag	*m_perag;	/* per-ag accounting info */
+-	struct rw_semaphore	m_peraglock;	/* lock for m_perag (pointer) */
++	struct compat_rw_semaphore m_peraglock;	/* lock for m_perag (pointer) */
+ 	struct mutex		m_growlock;	/* growfs mutex */
+ 	int			m_fixedfsid[2];	/* unchanged for life of FS */
+ 	uint			m_dmevmask;	/* DMI events for this FS */
+Index: linux-2.6-tip/drivers/acpi/acpica/acglobal.h
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/acpica/acglobal.h
++++ linux-2.6-tip/drivers/acpi/acpica/acglobal.h
+@@ -190,7 +190,12 @@ ACPI_EXTERN u8 acpi_gbl_global_lock_pres
+  * interrupt level
+  */
+ ACPI_EXTERN spinlock_t _acpi_gbl_gpe_lock;	/* For GPE data structs and registers */
+-ACPI_EXTERN spinlock_t _acpi_gbl_hardware_lock;	/* For ACPI H/W except GPE registers */
++
++/*
++ * Need to be raw because it might be used in acpi_processor_idle():
++ */
++ACPI_EXTERN raw_spinlock_t _acpi_gbl_hardware_lock;	/* For ACPI H/W except GPE registers */
++
+ #define acpi_gbl_gpe_lock	&_acpi_gbl_gpe_lock
+ #define acpi_gbl_hardware_lock	&_acpi_gbl_hardware_lock
+ 
+Index: linux-2.6-tip/drivers/acpi/acpica/hwregs.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/acpica/hwregs.c
++++ linux-2.6-tip/drivers/acpi/acpica/hwregs.c
+@@ -74,7 +74,7 @@ acpi_status acpi_hw_clear_acpi_status(vo
+ 			  ACPI_BITMASK_ALL_FIXED_STATUS,
+ 			  (u16) acpi_gbl_FADT.xpm1a_event_block.address));
+ 
+-	lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
++	spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
+ 
+ 	status = acpi_hw_register_write(ACPI_REGISTER_PM1_STATUS,
+ 					ACPI_BITMASK_ALL_FIXED_STATUS);
+@@ -97,7 +97,7 @@ acpi_status acpi_hw_clear_acpi_status(vo
+ 	status = acpi_ev_walk_gpe_list(acpi_hw_clear_gpe_block, NULL);
+ 
+       unlock_and_exit:
+-	acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
++	spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
+ 	return_ACPI_STATUS(status);
+ }
+ 
+Index: linux-2.6-tip/drivers/acpi/acpica/hwxface.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/acpica/hwxface.c
++++ linux-2.6-tip/drivers/acpi/acpica/hwxface.c
+@@ -313,9 +313,9 @@ acpi_status acpi_get_register(u32 regist
+ 	acpi_status status;
+ 	acpi_cpu_flags flags;
+ 
+-	flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
++	spin_lock_irqsave(acpi_gbl_hardware_lock, flags);
+ 	status = acpi_get_register_unlocked(register_id, return_value);
+-	acpi_os_release_lock(acpi_gbl_hardware_lock, flags);
++	spin_unlock_irqrestore(acpi_gbl_hardware_lock, flags);
+ 
+ 	return (status);
+ }
+@@ -353,7 +353,7 @@ acpi_status acpi_set_register(u32 regist
+ 		return_ACPI_STATUS(AE_BAD_PARAMETER);
+ 	}
+ 
+-	lock_flags = acpi_os_acquire_lock(acpi_gbl_hardware_lock);
++	spin_lock_irqsave(acpi_gbl_hardware_lock, lock_flags);
+ 
+ 	/* Always do a register read first so we can insert the new bits  */
+ 
+@@ -458,7 +458,7 @@ acpi_status acpi_set_register(u32 regist
+ 
+       unlock_and_exit:
+ 
+-	acpi_os_release_lock(acpi_gbl_hardware_lock, lock_flags);
++	spin_unlock_irqrestore(acpi_gbl_hardware_lock, lock_flags);
+ 
+ 	/* Normalize the value that was read */
+ 
+Index: linux-2.6-tip/drivers/acpi/acpica/utmutex.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/acpica/utmutex.c
++++ linux-2.6-tip/drivers/acpi/acpica/utmutex.c
+@@ -117,7 +117,7 @@ void acpi_ut_mutex_terminate(void)
+ 	/* Delete the spinlocks */
+ 
+ 	acpi_os_delete_lock(acpi_gbl_gpe_lock);
+-	acpi_os_delete_lock(acpi_gbl_hardware_lock);
++//	acpi_os_delete_lock(acpi_gbl_hardware_lock);
+ 	return_VOID;
+ }
+ 
+Index: linux-2.6-tip/drivers/acpi/ec.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/acpi/ec.c
++++ linux-2.6-tip/drivers/acpi/ec.c
+@@ -563,8 +563,21 @@ static u32 acpi_ec_gpe_handler(void *dat
+ 	if (test_bit(EC_FLAGS_GPE_MODE, &ec->flags)) {
+ 		gpe_transaction(ec, status);
+ 		if (ec_transaction_done(ec) &&
+-		    (status & ACPI_EC_FLAG_IBF) == 0)
++		    (status & ACPI_EC_FLAG_IBF) == 0) {
++#if 0
+ 			wake_up(&ec->wait);
++#else
++			// hack ...
++			if (waitqueue_active(&ec->wait)) {
++				struct task_struct *task;
++
++				task = list_entry(ec->wait.task_list.next,
++					  wait_queue_t, task_list)->private;
++				if (task)
++					wake_up_process(task);
++			}
++#endif
++		}
+ 	}
+ 
+ 	ec_check_sci(ec, status);
+Index: linux-2.6-tip/ipc/mqueue.c
+===================================================================
+--- linux-2.6-tip.orig/ipc/mqueue.c
++++ linux-2.6-tip/ipc/mqueue.c
+@@ -787,12 +787,17 @@ static inline void pipelined_send(struct
+ 				  struct msg_msg *message,
+ 				  struct ext_wait_queue *receiver)
+ {
++	/*
++	 * Keep them in one critical section for PREEMPT_RT:
++	 */
++	preempt_disable();
+ 	receiver->msg = message;
+ 	list_del(&receiver->list);
+ 	receiver->state = STATE_PENDING;
+ 	wake_up_process(receiver->task);
+ 	smp_wmb();
+ 	receiver->state = STATE_READY;
++	preempt_enable();
+ }
+ 
+ /* pipelined_receive() - if there is task waiting in sys_mq_timedsend()
+Index: linux-2.6-tip/ipc/msg.c
+===================================================================
+--- linux-2.6-tip.orig/ipc/msg.c
++++ linux-2.6-tip/ipc/msg.c
+@@ -259,12 +259,19 @@ static void expunge_all(struct msg_queue
+ 	while (tmp != &msq->q_receivers) {
+ 		struct msg_receiver *msr;
+ 
++		/*
++		 * Make sure that the wakeup doesnt preempt
++		 * this CPU prematurely. (on PREEMPT_RT)
++		 */
++		preempt_disable();
++
+ 		msr = list_entry(tmp, struct msg_receiver, r_list);
+ 		tmp = tmp->next;
+ 		msr->r_msg = NULL;
+-		wake_up_process(msr->r_tsk);
+-		smp_mb();
++		wake_up_process(msr->r_tsk); /* serializes */
+ 		msr->r_msg = ERR_PTR(res);
++
++		preempt_enable();
+ 	}
+ }
+ 
+@@ -611,22 +618,28 @@ static inline int pipelined_send(struct 
+ 		    !security_msg_queue_msgrcv(msq, msg, msr->r_tsk,
+ 					       msr->r_msgtype, msr->r_mode)) {
+ 
++			/*
++			 * Make sure that the wakeup doesnt preempt
++			 * this CPU prematurely. (on PREEMPT_RT)
++			 */
++			preempt_disable();
++
+ 			list_del(&msr->r_list);
+ 			if (msr->r_maxsize < msg->m_ts) {
+ 				msr->r_msg = NULL;
+-				wake_up_process(msr->r_tsk);
+-				smp_mb();
++				wake_up_process(msr->r_tsk); /* serializes */
+ 				msr->r_msg = ERR_PTR(-E2BIG);
+ 			} else {
+ 				msr->r_msg = NULL;
+ 				msq->q_lrpid = task_pid_vnr(msr->r_tsk);
+ 				msq->q_rtime = get_seconds();
+-				wake_up_process(msr->r_tsk);
+-				smp_mb();
++				wake_up_process(msr->r_tsk); /* serializes */
+ 				msr->r_msg = msg;
++				preempt_enable();
+ 
+ 				return 1;
+ 			}
++			preempt_enable();
+ 		}
+ 	}
+ 	return 0;
+Index: linux-2.6-tip/ipc/sem.c
+===================================================================
+--- linux-2.6-tip.orig/ipc/sem.c
++++ linux-2.6-tip/ipc/sem.c
+@@ -415,6 +415,11 @@ static void update_queue (struct sem_arr
+ 			struct sem_queue *n;
+ 
+ 			/*
++			 * make sure that the wakeup doesnt preempt
++			 * _this_ cpu prematurely. (on preempt_rt)
++			 */
++			preempt_disable();
++			/*
+ 			 * Continue scanning. The next operation
+ 			 * that must be checked depends on the type of the
+ 			 * completed operation:
+@@ -450,6 +455,7 @@ static void update_queue (struct sem_arr
+ 			 */
+ 			smp_wmb();
+ 			q->status = error;
++			preempt_enable();
+ 			q = n;
+ 		} else {
+ 			q = list_entry(q->list.next, struct sem_queue, list);
+Index: linux-2.6-tip/include/linux/pagevec.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/pagevec.h
++++ linux-2.6-tip/include/linux/pagevec.h
+@@ -9,7 +9,7 @@
+ #define _LINUX_PAGEVEC_H
+ 
+ /* 14 pointers + two long's align the pagevec structure to a power of two */
+-#define PAGEVEC_SIZE	14
++#define PAGEVEC_SIZE	8
+ 
+ struct page;
+ struct address_space;
+Index: linux-2.6-tip/include/linux/vmstat.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/vmstat.h
++++ linux-2.6-tip/include/linux/vmstat.h
+@@ -75,7 +75,12 @@ DECLARE_PER_CPU(struct vm_event_state, v
+ 
+ static inline void __count_vm_event(enum vm_event_item item)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	get_cpu_var(vm_event_states).event[item]++;
++	put_cpu();
++#else
+ 	__get_cpu_var(vm_event_states).event[item]++;
++#endif
+ }
+ 
+ static inline void count_vm_event(enum vm_event_item item)
+@@ -86,7 +91,12 @@ static inline void count_vm_event(enum v
+ 
+ static inline void __count_vm_events(enum vm_event_item item, long delta)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	get_cpu_var(vm_event_states).event[item] += delta;
++	put_cpu();
++#else
+ 	__get_cpu_var(vm_event_states).event[item] += delta;
++#endif
+ }
+ 
+ static inline void count_vm_events(enum vm_event_item item, long delta)
+Index: linux-2.6-tip/mm/bounce.c
+===================================================================
+--- linux-2.6-tip.orig/mm/bounce.c
++++ linux-2.6-tip/mm/bounce.c
+@@ -51,11 +51,11 @@ static void bounce_copy_vec(struct bio_v
+ 	unsigned long flags;
+ 	unsigned char *vto;
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	vto = kmap_atomic(to->bv_page, KM_BOUNCE_READ);
+ 	memcpy(vto + to->bv_offset, vfrom, to->bv_len);
+ 	kunmap_atomic(vto, KM_BOUNCE_READ);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ }
+ 
+ #else /* CONFIG_HIGHMEM */
+Index: linux-2.6-tip/mm/mmap.c
+===================================================================
+--- linux-2.6-tip.orig/mm/mmap.c
++++ linux-2.6-tip/mm/mmap.c
+@@ -1765,17 +1765,17 @@ static void unmap_region(struct mm_struc
+ 		unsigned long start, unsigned long end)
+ {
+ 	struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
+-	struct mmu_gather *tlb;
++	struct mmu_gather tlb;
+ 	unsigned long nr_accounted = 0;
+ 
+ 	lru_add_drain();
+-	tlb = tlb_gather_mmu(mm, 0);
++	tlb_gather_mmu(&tlb, mm, 0);
+ 	update_hiwater_rss(mm);
+ 	unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
+ 	vm_unacct_memory(nr_accounted);
+-	free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
++	free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
+ 				 next? next->vm_start: 0);
+-	tlb_finish_mmu(tlb, start, end);
++	tlb_finish_mmu(&tlb, start, end);
+ }
+ 
+ /*
+@@ -1957,10 +1957,16 @@ SYSCALL_DEFINE2(munmap, unsigned long, a
+ static inline void verify_mm_writelocked(struct mm_struct *mm)
+ {
+ #ifdef CONFIG_DEBUG_VM
+-	if (unlikely(down_read_trylock(&mm->mmap_sem))) {
++# ifdef CONFIG_PREEMPT_RT
++	if (unlikely(!rt_rwsem_is_locked(&mm->mmap_sem))) {
+ 		WARN_ON(1);
+-		up_read(&mm->mmap_sem);
+ 	}
++# else
++        if (unlikely(down_read_trylock(&mm->mmap_sem))) {
++		WARN_ON(1);
++		up_read(&mm->mmap_sem);
++        }
++# endif
+ #endif
+ }
+ 
+@@ -2074,7 +2080,7 @@ EXPORT_SYMBOL(do_brk);
+ /* Release all mmaps. */
+ void exit_mmap(struct mm_struct *mm)
+ {
+-	struct mmu_gather *tlb;
++	struct mmu_gather tlb;
+ 	struct vm_area_struct *vma;
+ 	unsigned long nr_accounted = 0;
+ 	unsigned long end;
+@@ -2099,13 +2105,13 @@ void exit_mmap(struct mm_struct *mm)
+ 
+ 	lru_add_drain();
+ 	flush_cache_mm(mm);
+-	tlb = tlb_gather_mmu(mm, 1);
++	tlb_gather_mmu(&tlb, mm, 1);
+ 	/* update_hiwater_rss(mm) here? but nobody should be looking */
+ 	/* Use -1 here to ensure all VMAs in the mm are unmapped */
+ 	end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
+ 	vm_unacct_memory(nr_accounted);
+-	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
+-	tlb_finish_mmu(tlb, 0, end);
++	free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
++	tlb_finish_mmu(&tlb, 0, end);
+ 
+ 	/*
+ 	 * Walk the list again, actually closing and freeing it,
+Index: linux-2.6-tip/mm/vmstat.c
+===================================================================
+--- linux-2.6-tip.orig/mm/vmstat.c
++++ linux-2.6-tip/mm/vmstat.c
+@@ -153,10 +153,14 @@ static void refresh_zone_stat_thresholds
+ void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item,
+ 				int delta)
+ {
+-	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+-	s8 *p = pcp->vm_stat_diff + item;
++	struct per_cpu_pageset *pcp;
++	int cpu;
+ 	long x;
++	s8 *p;
+ 
++	cpu = get_cpu();
++	pcp = zone_pcp(zone, cpu);
++	p = pcp->vm_stat_diff + item;
+ 	x = delta + *p;
+ 
+ 	if (unlikely(x > pcp->stat_threshold || x < -pcp->stat_threshold)) {
+@@ -164,6 +168,7 @@ void __mod_zone_page_state(struct zone *
+ 		x = 0;
+ 	}
+ 	*p = x;
++	put_cpu();
+ }
+ EXPORT_SYMBOL(__mod_zone_page_state);
+ 
+@@ -206,9 +211,13 @@ EXPORT_SYMBOL(mod_zone_page_state);
+  */
+ void __inc_zone_state(struct zone *zone, enum zone_stat_item item)
+ {
+-	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+-	s8 *p = pcp->vm_stat_diff + item;
++	struct per_cpu_pageset *pcp;
++	int cpu;
++	s8 *p;
+ 
++	cpu = get_cpu();
++	pcp = zone_pcp(zone, cpu);
++	p = pcp->vm_stat_diff + item;
+ 	(*p)++;
+ 
+ 	if (unlikely(*p > pcp->stat_threshold)) {
+@@ -217,18 +226,34 @@ void __inc_zone_state(struct zone *zone,
+ 		zone_page_state_add(*p + overstep, zone, item);
+ 		*p = -overstep;
+ 	}
++	put_cpu();
+ }
+ 
+ void __inc_zone_page_state(struct page *page, enum zone_stat_item item)
+ {
++#ifdef CONFIG_PREEMPT_RT
++	unsigned long flags;
++	struct zone *zone;
++
++	zone = page_zone(page);
++	local_irq_save(flags);
++	__inc_zone_state(zone, item);
++	local_irq_restore(flags);
++#else
+ 	__inc_zone_state(page_zone(page), item);
++#endif
+ }
+ EXPORT_SYMBOL(__inc_zone_page_state);
+ 
+ void __dec_zone_state(struct zone *zone, enum zone_stat_item item)
+ {
+-	struct per_cpu_pageset *pcp = zone_pcp(zone, smp_processor_id());
+-	s8 *p = pcp->vm_stat_diff + item;
++	struct per_cpu_pageset *pcp;
++	int cpu;
++	s8 *p;
++
++	cpu = get_cpu();
++	pcp = zone_pcp(zone, cpu);
++	p = pcp->vm_stat_diff + item;
+ 
+ 	(*p)--;
+ 
+@@ -238,6 +263,7 @@ void __dec_zone_state(struct zone *zone,
+ 		zone_page_state_add(*p - overstep, zone, item);
+ 		*p = overstep;
+ 	}
++	put_cpu();
+ }
+ 
+ void __dec_zone_page_state(struct page *page, enum zone_stat_item item)
+Index: linux-2.6-tip/drivers/block/paride/pseudo.h
+===================================================================
+--- linux-2.6-tip.orig/drivers/block/paride/pseudo.h
++++ linux-2.6-tip/drivers/block/paride/pseudo.h
+@@ -43,7 +43,7 @@ static unsigned long ps_timeout;
+ static int ps_tq_active = 0;
+ static int ps_nice = 0;
+ 
+-static DEFINE_SPINLOCK(ps_spinlock __attribute__((unused)));
++static __attribute__((unused)) DEFINE_SPINLOCK(ps_spinlock);
+ 
+ static DECLARE_DELAYED_WORK(ps_tq, ps_tq_int);
+ 
+Index: linux-2.6-tip/drivers/video/console/fbcon.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/video/console/fbcon.c
++++ linux-2.6-tip/drivers/video/console/fbcon.c
+@@ -1203,7 +1203,6 @@ static void fbcon_clear(struct vc_data *
+ {
+ 	struct fb_info *info = registered_fb[con2fb_map[vc->vc_num]];
+ 	struct fbcon_ops *ops = info->fbcon_par;
+-
+ 	struct display *p = &fb_display[vc->vc_num];
+ 	u_int y_break;
+ 
+@@ -1235,10 +1234,11 @@ static void fbcon_putcs(struct vc_data *
+ 	struct display *p = &fb_display[vc->vc_num];
+ 	struct fbcon_ops *ops = info->fbcon_par;
+ 
+-	if (!fbcon_is_inactive(vc, info))
++	if (!fbcon_is_inactive(vc, info)) {
+ 		ops->putcs(vc, info, s, count, real_y(p, ypos), xpos,
+ 			   get_color(vc, info, scr_readw(s), 1),
+ 			   get_color(vc, info, scr_readw(s), 0));
++	}
+ }
+ 
+ static void fbcon_putc(struct vc_data *vc, int c, int ypos, int xpos)
+@@ -3225,6 +3225,7 @@ static const struct consw fb_con = {
+ 	.con_screen_pos 	= fbcon_screen_pos,
+ 	.con_getxy 		= fbcon_getxy,
+ 	.con_resize             = fbcon_resize,
++	.con_preemptible 	= 1,
+ };
+ 
+ static struct notifier_block fbcon_event_notifier = {
+Index: linux-2.6-tip/include/linux/console.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/console.h
++++ linux-2.6-tip/include/linux/console.h
+@@ -55,6 +55,7 @@ struct consw {
+ 	void	(*con_invert_region)(struct vc_data *, u16 *, int);
+ 	u16    *(*con_screen_pos)(struct vc_data *, int);
+ 	unsigned long (*con_getxy)(struct vc_data *, unsigned long, int *, int *);
++	int	con_preemptible; // can it reschedule from within printk?
+ };
+ 
+ extern const struct consw *conswitchp;
+@@ -92,6 +93,17 @@ void give_up_console(const struct consw 
+ #define CON_BOOT	(8)
+ #define CON_ANYTIME	(16) /* Safe to call when cpu is offline */
+ #define CON_BRL		(32) /* Used for a braille device */
++#define CON_ATOMIC	(64) /* Safe to call in PREEMPT_RT atomic */
++
++#ifdef CONFIG_PREEMPT_RT
++# define console_atomic_safe(con)		\
++	(((con)->flags & CON_ATOMIC) ||		\
++	 (!in_atomic() && !irqs_disabled()) ||	\
++	 (system_state != SYSTEM_RUNNING) ||	\
++	 oops_in_progress)
++#else
++# define console_atomic_safe(con) (1)
++#endif
+ 
+ struct console {
+ 	char	name[16];
+Index: linux-2.6-tip/drivers/ide/alim15x3.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ide/alim15x3.c
++++ linux-2.6-tip/drivers/ide/alim15x3.c
+@@ -90,7 +90,7 @@ static void ali_set_pio_mode(ide_drive_t
+ 		if (r_clc >= 16)
+ 			r_clc = 0;
+ 	}
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	
+ 	/* 
+ 	 * PIO mode => ATA FIFO on, ATAPI FIFO off
+@@ -112,7 +112,7 @@ static void ali_set_pio_mode(ide_drive_t
+ 	
+ 	pci_write_config_byte(dev, port, s_clc);
+ 	pci_write_config_byte(dev, port + unit + 2, (a_clc << 4) | r_clc);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ }
+ 
+ /**
+@@ -222,7 +222,7 @@ static unsigned int init_chipset_ali15x3
+ 
+ 	isa_dev = pci_get_device(PCI_VENDOR_ID_AL, PCI_DEVICE_ID_AL_M1533, NULL);
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 
+ 	if (m5229_revision < 0xC2) {
+ 		/*
+@@ -313,7 +313,7 @@ out:
+ 	}
+ 	pci_dev_put(north);
+ 	pci_dev_put(isa_dev);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 	return 0;
+ }
+ 
+@@ -375,7 +375,7 @@ static u8 ali_cable_detect(ide_hwif_t *h
+ 	unsigned long flags;
+ 	u8 cbl = ATA_CBL_PATA40, tmpbyte;
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 
+ 	if (m5229_revision >= 0xC2) {
+ 		/*
+@@ -396,7 +396,7 @@ static u8 ali_cable_detect(ide_hwif_t *h
+ 		}
+ 	}
+ 
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	return cbl;
+ }
+Index: linux-2.6-tip/drivers/ide/hpt366.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ide/hpt366.c
++++ linux-2.6-tip/drivers/ide/hpt366.c
+@@ -1330,7 +1330,7 @@ static int __devinit init_dma_hpt366(ide
+ 
+ 	dma_old = inb(base + 2);
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 
+ 	dma_new = dma_old;
+ 	pci_read_config_byte(dev, hwif->channel ? 0x4b : 0x43, &masterdma);
+@@ -1341,7 +1341,7 @@ static int __devinit init_dma_hpt366(ide
+ 	if (dma_new != dma_old)
+ 		outb(dma_new, base + 2);
+ 
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	printk(KERN_INFO "    %s: BM-DMA at 0x%04lx-0x%04lx\n",
+ 			 hwif->name, base, base + 7);
+Index: linux-2.6-tip/drivers/ide/ide-io.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ide/ide-io.c
++++ linux-2.6-tip/drivers/ide/ide-io.c
+@@ -948,7 +948,7 @@ void ide_timer_expiry (unsigned long dat
+ 		/* disable_irq_nosync ?? */
+ 		disable_irq(hwif->irq);
+ 		/* local CPU only, as if we were handling an interrupt */
+-		local_irq_disable();
++		local_irq_disable_nort();
+ 		if (hwif->polling) {
+ 			startstop = handler(drive);
+ 		} else if (drive_is_ready(drive)) {
+Index: linux-2.6-tip/drivers/ide/ide-iops.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ide/ide-iops.c
++++ linux-2.6-tip/drivers/ide/ide-iops.c
+@@ -275,7 +275,7 @@ void ide_input_data(ide_drive_t *drive, 
+ 		unsigned long uninitialized_var(flags);
+ 
+ 		if ((io_32bit & 2) && !mmio) {
+-			local_irq_save(flags);
++			local_irq_save_nort(flags);
+ 			ata_vlb_sync(io_ports->nsect_addr);
+ 		}
+ 
+@@ -285,7 +285,7 @@ void ide_input_data(ide_drive_t *drive, 
+ 			insl(data_addr, buf, len / 4);
+ 
+ 		if ((io_32bit & 2) && !mmio)
+-			local_irq_restore(flags);
++			local_irq_restore_nort(flags);
+ 
+ 		if ((len & 3) >= 2) {
+ 			if (mmio)
+@@ -321,7 +321,7 @@ void ide_output_data(ide_drive_t *drive,
+ 		unsigned long uninitialized_var(flags);
+ 
+ 		if ((io_32bit & 2) && !mmio) {
+-			local_irq_save(flags);
++			local_irq_save_nort(flags);
+ 			ata_vlb_sync(io_ports->nsect_addr);
+ 		}
+ 
+@@ -331,7 +331,7 @@ void ide_output_data(ide_drive_t *drive,
+ 			outsl(data_addr, buf, len / 4);
+ 
+ 		if ((io_32bit & 2) && !mmio)
+-			local_irq_restore(flags);
++			local_irq_restore_nort(flags);
+ 
+ 		if ((len & 3) >= 2) {
+ 			if (mmio)
+@@ -509,12 +509,12 @@ static int __ide_wait_stat(ide_drive_t *
+ 				if ((stat & ATA_BUSY) == 0)
+ 					break;
+ 
+-				local_irq_restore(flags);
++				local_irq_restore_nort(flags);
+ 				*rstat = stat;
+ 				return -EBUSY;
+ 			}
+ 		}
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 	}
+ 	/*
+ 	 * Allow status to settle, then read it again.
+@@ -694,17 +694,17 @@ int ide_driveid_update(ide_drive_t *driv
+ 		printk("%s: CHECK for good STATUS\n", drive->name);
+ 		return 0;
+ 	}
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	SELECT_MASK(drive, 0);
+ 	id = kmalloc(SECTOR_SIZE, GFP_ATOMIC);
+ 	if (!id) {
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 		return 0;
+ 	}
+ 	tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
+ 	(void)tp_ops->read_status(hwif);	/* clear drive IRQ */
+-	local_irq_enable();
+-	local_irq_restore(flags);
++	local_irq_enable_nort();
++	local_irq_restore_nort(flags);
+ 	ide_fix_driveid(id);
+ 
+ 	drive->id[ATA_ID_UDMA_MODES]  = id[ATA_ID_UDMA_MODES];
+Index: linux-2.6-tip/drivers/ide/ide-probe.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ide/ide-probe.c
++++ linux-2.6-tip/drivers/ide/ide-probe.c
+@@ -196,10 +196,10 @@ static void do_identify(ide_drive_t *dri
+ 	int bswap = 1;
+ 
+ 	/* local CPU only; some systems need this */
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	/* read 512 bytes of id info */
+ 	hwif->tp_ops->input_data(drive, NULL, id, SECTOR_SIZE);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	drive->dev_flags |= IDE_DFLAG_ID_READ;
+ #ifdef DEBUG
+@@ -813,7 +813,7 @@ static int ide_probe_port(ide_hwif_t *hw
+ 			rc = 0;
+ 	}
+ 
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	/*
+ 	 * Use cached IRQ number. It might be (and is...) changed by probe
+Index: linux-2.6-tip/drivers/ide/ide-taskfile.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ide/ide-taskfile.c
++++ linux-2.6-tip/drivers/ide/ide-taskfile.c
+@@ -219,7 +219,7 @@ static void ide_pio_sector(ide_drive_t *
+ 	offset %= PAGE_SIZE;
+ 
+ #ifdef CONFIG_HIGHMEM
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ #endif
+ 	buf = kmap_atomic(page, KM_BIO_SRC_IRQ) + offset;
+ 
+@@ -239,7 +239,7 @@ static void ide_pio_sector(ide_drive_t *
+ 
+ 	kunmap_atomic(buf, KM_BIO_SRC_IRQ);
+ #ifdef CONFIG_HIGHMEM
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ #endif
+ }
+ 
+@@ -430,7 +430,7 @@ static ide_startstop_t pre_task_out_intr
+ 	}
+ 
+ 	if ((drive->dev_flags & IDE_DFLAG_UNMASK) == 0)
+-		local_irq_disable();
++		local_irq_disable_nort();
+ 
+ 	ide_set_handler(drive, &task_out_intr, WAIT_WORSTCASE, NULL);
+ 	ide_pio_datablock(drive, rq, 1);
+Index: linux-2.6-tip/drivers/input/gameport/gameport.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/input/gameport/gameport.c
++++ linux-2.6-tip/drivers/input/gameport/gameport.c
+@@ -20,6 +20,7 @@
+ #include <linux/slab.h>
+ #include <linux/delay.h>
+ #include <linux/kthread.h>
++#include <linux/interrupt.h>
+ #include <linux/sched.h>	/* HZ */
+ #include <linux/mutex.h>
+ #include <linux/freezer.h>
+@@ -97,12 +98,12 @@ static int gameport_measure_speed(struct
+ 	tx = 1 << 30;
+ 
+ 	for(i = 0; i < 50; i++) {
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		GET_TIME(t1);
+ 		for (t = 0; t < 50; t++) gameport_read(gameport);
+ 		GET_TIME(t2);
+ 		GET_TIME(t3);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 		udelay(i * 10);
+ 		if ((t = DELTA(t2,t1) - DELTA(t3,t2)) < tx) tx = t;
+ 	}
+@@ -121,11 +122,11 @@ static int gameport_measure_speed(struct
+ 	tx = 1 << 30;
+ 
+ 	for(i = 0; i < 50; i++) {
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		rdtscl(t1);
+ 		for (t = 0; t < 50; t++) gameport_read(gameport);
+ 		rdtscl(t2);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 		udelay(i * 10);
+ 		if (t2 - t1 < tx) tx = t2 - t1;
+ 	}
+Index: linux-2.6-tip/drivers/net/tulip/tulip_core.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/tulip/tulip_core.c
++++ linux-2.6-tip/drivers/net/tulip/tulip_core.c
+@@ -1814,6 +1814,7 @@ static void __devexit tulip_remove_one (
+ 	pci_iounmap(pdev, tp->base_addr);
+ 	free_netdev (dev);
+ 	pci_release_regions (pdev);
++	pci_disable_device (pdev);
+ 	pci_set_drvdata (pdev, NULL);
+ 
+ 	/* pci_power_off (pdev, -1); */
+Index: linux-2.6-tip/lib/ratelimit.c
+===================================================================
+--- linux-2.6-tip.orig/lib/ratelimit.c
++++ linux-2.6-tip/lib/ratelimit.c
+@@ -14,7 +14,7 @@
+ #include <linux/jiffies.h>
+ #include <linux/module.h>
+ 
+-static DEFINE_SPINLOCK(ratelimit_lock);
++static DEFINE_RAW_SPINLOCK(ratelimit_lock);
+ 
+ /*
+  * __ratelimit - rate limiting
+Index: linux-2.6-tip/drivers/oprofile/oprofilefs.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/oprofile/oprofilefs.c
++++ linux-2.6-tip/drivers/oprofile/oprofilefs.c
+@@ -21,7 +21,7 @@
+ 
+ #define OPROFILEFS_MAGIC 0x6f70726f
+ 
+-DEFINE_SPINLOCK(oprofilefs_lock);
++DEFINE_RAW_SPINLOCK(oprofilefs_lock);
+ 
+ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode)
+ {
+Index: linux-2.6-tip/drivers/pci/access.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/pci/access.c
++++ linux-2.6-tip/drivers/pci/access.c
+@@ -12,7 +12,7 @@
+  * configuration space.
+  */
+ 
+-static DEFINE_SPINLOCK(pci_lock);
++static DEFINE_RAW_SPINLOCK(pci_lock);
+ 
+ /*
+  *  Wrappers for all PCI configuration access functions.  They just check
+Index: linux-2.6-tip/drivers/video/console/vgacon.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/video/console/vgacon.c
++++ linux-2.6-tip/drivers/video/console/vgacon.c
+@@ -51,7 +51,7 @@
+ #include <video/vga.h>
+ #include <asm/io.h>
+ 
+-static DEFINE_SPINLOCK(vga_lock);
++static DEFINE_RAW_SPINLOCK(vga_lock);
+ static int cursor_size_lastfrom;
+ static int cursor_size_lastto;
+ static u32 vgacon_xres;
+Index: linux-2.6-tip/include/linux/oprofile.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/oprofile.h
++++ linux-2.6-tip/include/linux/oprofile.h
+@@ -153,7 +153,7 @@ ssize_t oprofilefs_ulong_to_user(unsigne
+ int oprofilefs_ulong_from_user(unsigned long * val, char const __user * buf, size_t count);
+ 
+ /** lock for read/write safety */
+-extern spinlock_t oprofilefs_lock;
++extern raw_spinlock_t oprofilefs_lock;
+ 
+ /**
+  * Add the contents of a circular buffer to the event buffer.
+Index: linux-2.6-tip/include/linux/percpu_counter.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/percpu_counter.h
++++ linux-2.6-tip/include/linux/percpu_counter.h
+@@ -16,7 +16,7 @@
+ #ifdef CONFIG_SMP
+ 
+ struct percpu_counter {
+-	spinlock_t lock;
++	raw_spinlock_t lock;
+ 	s64 count;
+ #ifdef CONFIG_HOTPLUG_CPU
+ 	struct list_head list;	/* All percpu_counters are on a list */
+Index: linux-2.6-tip/kernel/time/clocksource.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/time/clocksource.c
++++ linux-2.6-tip/kernel/time/clocksource.c
+@@ -51,7 +51,7 @@ static struct clocksource *curr_clocksou
+ static struct clocksource *next_clocksource;
+ static struct clocksource *clocksource_override;
+ static LIST_HEAD(clocksource_list);
+-static DEFINE_SPINLOCK(clocksource_lock);
++static DEFINE_RAW_SPINLOCK(clocksource_lock);
+ static char override_name[32];
+ static int finished_booting;
+ 
+Index: linux-2.6-tip/kernel/time/tick-broadcast.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/time/tick-broadcast.c
++++ linux-2.6-tip/kernel/time/tick-broadcast.c
+@@ -31,8 +31,8 @@ struct tick_device tick_broadcast_device
+ /* FIXME: Use cpumask_var_t. */
+ static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
+ static DECLARE_BITMAP(tmpmask, NR_CPUS);
+-static DEFINE_SPINLOCK(tick_broadcast_lock);
+ static int tick_broadcast_force;
++static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
+ 
+ #ifdef CONFIG_TICK_ONESHOT
+ static void tick_broadcast_clear_oneshot(int cpu);
+Index: linux-2.6-tip/kernel/time/tick-common.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/time/tick-common.c
++++ linux-2.6-tip/kernel/time/tick-common.c
+@@ -34,7 +34,7 @@ DEFINE_PER_CPU(struct tick_device, tick_
+ ktime_t tick_next_period;
+ ktime_t tick_period;
+ int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
+-DEFINE_SPINLOCK(tick_device_lock);
++DEFINE_RAW_SPINLOCK(tick_device_lock);
+ 
+ /*
+  * Debugging: see timer_list.c
+Index: linux-2.6-tip/kernel/time/tick-internal.h
+===================================================================
+--- linux-2.6-tip.orig/kernel/time/tick-internal.h
++++ linux-2.6-tip/kernel/time/tick-internal.h
+@@ -6,7 +6,7 @@
+ #define TICK_DO_TIMER_BOOT	-2
+ 
+ DECLARE_PER_CPU(struct tick_device, tick_cpu_device);
+-extern spinlock_t tick_device_lock;
++extern raw_spinlock_t tick_device_lock;
+ extern ktime_t tick_next_period;
+ extern ktime_t tick_period;
+ extern int tick_do_timer_cpu __read_mostly;
+Index: linux-2.6-tip/kernel/time/timer_stats.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/time/timer_stats.c
++++ linux-2.6-tip/kernel/time/timer_stats.c
+@@ -81,12 +81,12 @@ struct entry {
+ /*
+  * Spinlock protecting the tables - not taken during lookup:
+  */
+-static DEFINE_SPINLOCK(table_lock);
++static DEFINE_RAW_SPINLOCK(table_lock);
+ 
+ /*
+  * Per-CPU lookup locks for fast hash lookup:
+  */
+-static DEFINE_PER_CPU(spinlock_t, lookup_lock);
++static DEFINE_PER_CPU(raw_spinlock_t, lookup_lock);
+ 
+ /*
+  * Mutex to serialize state changes with show-stats activities:
+@@ -238,7 +238,7 @@ void timer_stats_update_stats(void *time
+ 	/*
+ 	 * It doesnt matter which lock we take:
+ 	 */
+-	spinlock_t *lock;
++	raw_spinlock_t *lock = &per_cpu(lookup_lock, raw_smp_processor_id());
+ 	struct entry *entry, input;
+ 	unsigned long flags;
+ 
+Index: linux-2.6-tip/drivers/net/usb/usbnet.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/usb/usbnet.c
++++ linux-2.6-tip/drivers/net/usb/usbnet.c
+@@ -903,6 +903,8 @@ static void tx_complete (struct urb *urb
+ 
+ 	urb->dev = NULL;
+ 	entry->state = tx_done;
++	spin_lock_rt(&dev->txq.lock);
++	spin_unlock_rt(&dev->txq.lock);
+ 	defer_bh(dev, skb, &dev->txq);
+ }
+ 
+Index: linux-2.6-tip/drivers/usb/core/devio.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/usb/core/devio.c
++++ linux-2.6-tip/drivers/usb/core/devio.c
+@@ -325,10 +325,11 @@ static void async_completed(struct urb *
+ 	struct async *as = urb->context;
+ 	struct dev_state *ps = as->ps;
+ 	struct siginfo sinfo;
++	unsigned long flags;
+ 
+-	spin_lock(&ps->lock);
++	spin_lock_irqsave(&ps->lock, flags);
+ 	list_move_tail(&as->asynclist, &ps->async_completed);
+-	spin_unlock(&ps->lock);
++	spin_unlock_irqrestore(&ps->lock, flags);
+ 	as->status = urb->status;
+ 	if (as->signr) {
+ 		sinfo.si_signo = as->signr;
+Index: linux-2.6-tip/drivers/usb/core/message.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/usb/core/message.c
++++ linux-2.6-tip/drivers/usb/core/message.c
+@@ -268,8 +268,9 @@ static void sg_complete(struct urb *urb)
+ {
+ 	struct usb_sg_request *io = urb->context;
+ 	int status = urb->status;
++	unsigned long flags;
+ 
+-	spin_lock(&io->lock);
++	spin_lock_irqsave (&io->lock, flags);
+ 
+ 	/* In 2.5 we require hcds' endpoint queues not to progress after fault
+ 	 * reports, until the completion callback (this!) returns.  That lets
+@@ -303,7 +304,7 @@ static void sg_complete(struct urb *urb)
+ 		 * unlink pending urbs so they won't rx/tx bad data.
+ 		 * careful: unlink can sometimes be synchronous...
+ 		 */
+-		spin_unlock(&io->lock);
++		spin_unlock_irqrestore (&io->lock, flags);
+ 		for (i = 0, found = 0; i < io->entries; i++) {
+ 			if (!io->urbs [i] || !io->urbs [i]->dev)
+ 				continue;
+@@ -318,7 +319,7 @@ static void sg_complete(struct urb *urb)
+ 			} else if (urb == io->urbs [i])
+ 				found = 1;
+ 		}
+-		spin_lock(&io->lock);
++		spin_lock_irqsave (&io->lock, flags);
+ 	}
+ 	urb->dev = NULL;
+ 
+@@ -328,7 +329,7 @@ static void sg_complete(struct urb *urb)
+ 	if (!io->count)
+ 		complete(&io->complete);
+ 
+-	spin_unlock(&io->lock);
++	spin_unlock_irqrestore (&io->lock, flags);
+ }
+ 
+ 
+@@ -595,7 +596,7 @@ void usb_sg_cancel(struct usb_sg_request
+ 		int i;
+ 
+ 		io->status = -ECONNRESET;
+-		spin_unlock(&io->lock);
++		spin_unlock_irqrestore(&io->lock, flags);
+ 		for (i = 0; i < io->entries; i++) {
+ 			int retval;
+ 
+@@ -606,7 +607,7 @@ void usb_sg_cancel(struct usb_sg_request
+ 				dev_warn(&io->dev->dev, "%s, unlink --> %d\n",
+ 					__func__, retval);
+ 		}
+-		spin_lock(&io->lock);
++		spin_lock_irqsave(&io->lock, flags);
+ 	}
+ 	spin_unlock_irqrestore(&io->lock, flags);
+ }
+Index: linux-2.6-tip/fs/xfs/linux-2.6/xfs_buf.h
+===================================================================
+--- linux-2.6-tip.orig/fs/xfs/linux-2.6/xfs_buf.h
++++ linux-2.6-tip/fs/xfs/linux-2.6/xfs_buf.h
+@@ -145,7 +145,7 @@ typedef int (*xfs_buf_bdstrat_t)(struct 
+ #define XB_PAGES	2
+ 
+ typedef struct xfs_buf {
+-	struct semaphore	b_sema;		/* semaphore for lockables */
++	struct compat_semaphore	b_sema;		/* semaphore for lockables */
+ 	unsigned long		b_queuetime;	/* time buffer was queued */
+ 	atomic_t		b_pin_count;	/* pin count */
+ 	wait_queue_head_t	b_waiters;	/* unpin waiters */
+Index: linux-2.6-tip/fs/btrfs/locking.c
+===================================================================
+--- linux-2.6-tip.orig/fs/btrfs/locking.c
++++ linux-2.6-tip/fs/btrfs/locking.c
+@@ -93,6 +93,7 @@ static int btrfs_spin_on_block(struct ex
+  */
+ int btrfs_try_spin_lock(struct extent_buffer *eb)
+ {
++#ifndef CONFIG_PREEMPT_RT
+ 	int i;
+ 
+ 	spin_nested(eb);
+@@ -110,6 +111,7 @@ int btrfs_try_spin_lock(struct extent_bu
+ 			return 1;
+ 		spin_unlock(&eb->lock);
+ 	}
++#endif
+ 	return 0;
+ }
+ 
+Index: linux-2.6-tip/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
++++ linux-2.6-tip/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+@@ -773,7 +773,7 @@ void ipoib_mcast_restart_task(struct wor
+ 
+ 	ipoib_mcast_stop_thread(dev, 0);
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	netif_addr_lock(dev);
+ 	spin_lock(&priv->lock);
+ 
+@@ -852,7 +852,7 @@ void ipoib_mcast_restart_task(struct wor
+ 
+ 	spin_unlock(&priv->lock);
+ 	netif_addr_unlock(dev);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	/* We have to cancel outside of the spinlock */
+ 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
+Index: linux-2.6-tip/include/asm-arm26/irq_regs.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/include/asm-arm26/irq_regs.h
+@@ -0,0 +1 @@
++#include <asm-generic/irq_regs.h>
+Index: linux-2.6-tip/arch/mips/mm/highmem.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/mm/highmem.c
++++ linux-2.6-tip/arch/mips/mm/highmem.c
+@@ -38,7 +38,7 @@ void *__kmap_atomic(struct page *page, e
+ 	enum fixed_addresses idx;
+ 	unsigned long vaddr;
+ 
+-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
++	preempt_disable();
+ 	pagefault_disable();
+ 	if (!PageHighMem(page))
+ 		return page_address(page);
+@@ -63,6 +63,7 @@ void __kunmap_atomic(void *kvaddr, enum 
+ 
+ 	if (vaddr < FIXADDR_START) { // FIXME
+ 		pagefault_enable();
++		preempt_enable();
+ 		return;
+ 	}
+ 
+@@ -78,6 +79,7 @@ void __kunmap_atomic(void *kvaddr, enum 
+ #endif
+ 
+ 	pagefault_enable();
++	preempt_enable();
+ }
+ 
+ /*
+@@ -89,6 +91,7 @@ void *kmap_atomic_pfn(unsigned long pfn,
+ 	enum fixed_addresses idx;
+ 	unsigned long vaddr;
+ 
++	preempt_disable();
+ 	pagefault_disable();
+ 
+ 	idx = type + KM_TYPE_NR*smp_processor_id();
+Index: linux-2.6-tip/arch/sparc/mm/highmem.c
+===================================================================
+--- linux-2.6-tip.orig/arch/sparc/mm/highmem.c
++++ linux-2.6-tip/arch/sparc/mm/highmem.c
+@@ -34,7 +34,7 @@ void *kmap_atomic(struct page *page, enu
+ 	unsigned long idx;
+ 	unsigned long vaddr;
+ 
+-	/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
++	preempt_disable();
+ 	pagefault_disable();
+ 	if (!PageHighMem(page))
+ 		return page_address(page);
+@@ -72,6 +72,7 @@ void kunmap_atomic(void *kvaddr, enum km
+ 
+ 	if (vaddr < FIXADDR_START) { // FIXME
+ 		pagefault_enable();
++		preempt_enable();
+ 		return;
+ 	}
+ 
+@@ -98,6 +99,7 @@ void kunmap_atomic(void *kvaddr, enum km
+ #endif
+ 
+ 	pagefault_enable();
++	preempt_enable();
+ }
+ EXPORT_SYMBOL(kunmap_atomic);
+ 
+Index: linux-2.6-tip/include/asm-frv/highmem.h
+===================================================================
+--- linux-2.6-tip.orig/include/asm-frv/highmem.h
++++ linux-2.6-tip/include/asm-frv/highmem.h
+@@ -115,6 +115,7 @@ static inline void *kmap_atomic(struct p
+ {
+ 	unsigned long paddr;
+ 
++	preempt_disable();
+ 	pagefault_disable();
+ 	paddr = page_to_phys(page);
+ 
+@@ -171,6 +172,7 @@ static inline void kunmap_atomic(void *k
+ 		BUG();
+ 	}
+ 	pagefault_enable();
++	preempt_enable();
+ }
+ 
+ #endif /* !__ASSEMBLY__ */
+Index: linux-2.6-tip/arch/arm/mm/fault.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mm/fault.c
++++ linux-2.6-tip/arch/arm/mm/fault.c
+@@ -258,7 +258,7 @@ do_page_fault(unsigned long addr, unsign
+ 	 * If we're in an interrupt or have no user
+ 	 * context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto no_context;
+ 
+ 	/*
+Index: linux-2.6-tip/arch/mips/mm/fault.c
+===================================================================
+--- linux-2.6-tip.orig/arch/mips/mm/fault.c
++++ linux-2.6-tip/arch/mips/mm/fault.c
+@@ -69,7 +69,7 @@ asmlinkage void do_page_fault(struct pt_
+ 	 * If we're in an interrupt or have no user
+ 	 * context, we must not take the fault..
+ 	 */
+-	if (in_atomic() || !mm)
++	if (in_atomic() || !mm || current->pagefault_disabled)
+ 		goto bad_area_nosemaphore;
+ 
+ 	down_read(&mm->mmap_sem);
+Index: linux-2.6-tip/include/linux/uaccess.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/uaccess.h
++++ linux-2.6-tip/include/linux/uaccess.h
+@@ -6,37 +6,10 @@
+ 
+ /*
+  * These routines enable/disable the pagefault handler in that
+- * it will not take any locks and go straight to the fixup table.
+- *
+- * They have great resemblance to the preempt_disable/enable calls
+- * and in fact they are identical; this is because currently there is
+- * no other way to make the pagefault handlers do this. So we do
+- * disable preemption but we don't necessarily care about that.
++ * it will not take any MM locks and go straight to the fixup table.
+  */
+-static inline void pagefault_disable(void)
+-{
+-	inc_preempt_count();
+-	/*
+-	 * make sure to have issued the store before a pagefault
+-	 * can hit.
+-	 */
+-	barrier();
+-}
+-
+-static inline void pagefault_enable(void)
+-{
+-	/*
+-	 * make sure to issue those last loads/stores before enabling
+-	 * the pagefault handler again.
+-	 */
+-	barrier();
+-	dec_preempt_count();
+-	/*
+-	 * make sure we do..
+-	 */
+-	barrier();
+-	preempt_check_resched();
+-}
++extern void pagefault_disable(void);
++extern void pagefault_enable(void);
+ 
+ #ifndef ARCH_HAS_NOCACHE_UACCESS
+ 
+Index: linux-2.6-tip/mm/highmem.c
+===================================================================
+--- linux-2.6-tip.orig/mm/highmem.c
++++ linux-2.6-tip/mm/highmem.c
+@@ -14,6 +14,11 @@
+  * based on Linus' idea.
+  *
+  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
++ *
++ * Largely rewritten to get rid of all global locks
++ *
++ * Copyright (C) 2006 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
++ *
+  */
+ 
+ #include <linux/mm.h>
+@@ -27,18 +32,15 @@
+ #include <linux/hash.h>
+ #include <linux/highmem.h>
+ #include <linux/blktrace_api.h>
++#include <linux/hardirq.h>
++
+ #include <asm/tlbflush.h>
++#include <asm/pgtable.h>
+ 
+-/*
+- * Virtual_count is not a pure "count".
+- *  0 means that it is not mapped, and has not been mapped
+- *    since a TLB flush - it is usable.
+- *  1 means that there are no users, but it has been mapped
+- *    since the last TLB flush - so we can't use it.
+- *  n means that there are (n-1) current users of it.
+- */
+ #ifdef CONFIG_HIGHMEM
+ 
++static int __set_page_address(struct page *page, void *virtual, int pos);
++
+ unsigned long totalhigh_pages __read_mostly;
+ EXPORT_SYMBOL(totalhigh_pages);
+ 
+@@ -59,184 +61,284 @@ unsigned int nr_free_highpages (void)
+ 	return pages;
+ }
+ 
+-static int pkmap_count[LAST_PKMAP];
+-static unsigned int last_pkmap_nr;
+-static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
++/*
++ * count is not a pure "count".
++ *  0 means its owned exclusively by someone
++ *  1 means its free for use - either mapped or not.
++ *  n means that there are (n-1) current users of it.
++ */
++static atomic_t pkmap_count[LAST_PKMAP];
++static atomic_t pkmap_hand;
++static atomic_t pkmap_free;
++static atomic_t pkmap_users;
+ 
+ pte_t * pkmap_page_table;
+ 
+-static DECLARE_WAIT_QUEUE_HEAD(pkmap_map_wait);
++static DECLARE_WAIT_QUEUE_HEAD(pkmap_wait);
+ 
+-static void flush_all_zero_pkmaps(void)
++/*
++ * Try to free a given kmap slot.
++ *
++ * Returns:
++ *  -1 - in use
++ *   0 - free, no TLB flush needed
++ *   1 - free, needs TLB flush
++ */
++static int pkmap_try_free(int pos)
+ {
+-	int i;
+-	int need_flush = 0;
++	if (atomic_cmpxchg(&pkmap_count[pos], 1, 0) != 1)
++		return -1;
++
++	atomic_dec(&pkmap_free);
++	/*
++	 * TODO: add a young bit to make it CLOCK
++	 */
++	if (!pte_none(pkmap_page_table[pos])) {
++		struct page *page = pte_page(pkmap_page_table[pos]);
++		unsigned long addr = PKMAP_ADDR(pos);
++		pte_t *ptep = &pkmap_page_table[pos];
++
++		VM_BUG_ON(addr != (unsigned long)page_address(page));
++
++		if (!__set_page_address(page, NULL, pos))
++			BUG();
++		flush_kernel_dcache_page(page);
++		pte_clear(&init_mm, addr, ptep);
++
++		return 1;
++	}
++
++	return 0;
++}
++
++static inline void pkmap_put(atomic_t *counter)
++{
++	switch (atomic_dec_return(counter)) {
++	case 0:
++		BUG();
+ 
+-	flush_cache_kmaps();
++	case 1:
++		atomic_inc(&pkmap_free);
++		wake_up(&pkmap_wait);
++	}
++}
+ 
++#define TLB_BATCH	32
++
++static int pkmap_get_free(void)
++{
++	int i, pos, flush;
++
++restart:
+ 	for (i = 0; i < LAST_PKMAP; i++) {
+-		struct page *page;
++		pos = atomic_inc_return(&pkmap_hand) & LAST_PKMAP_MASK;
++		flush = pkmap_try_free(pos);
++		if (flush >= 0)
++			goto got_one;
++	}
++
++	/*
++	 * wait for somebody else to unmap their entries
++	 */
++	if (likely(!in_interrupt()))
++		wait_event(pkmap_wait, atomic_read(&pkmap_free) != 0);
++
++	goto restart;
++
++got_one:
++	if (flush) {
++#if 0
++		flush_tlb_kernel_range(PKMAP_ADDR(pos), PKMAP_ADDR(pos+1));
++#else
++		int pos2 = (pos + 1) & LAST_PKMAP_MASK;
++		int nr;
++		int entries[TLB_BATCH];
+ 
+ 		/*
+-		 * zero means we don't have anything to do,
+-		 * >1 means that it is still in use. Only
+-		 * a count of 1 means that it is free but
+-		 * needs to be unmapped
++		 * For those architectures that cannot help but flush the
++		 * whole TLB, flush some more entries to make it worthwhile.
++		 * Scan ahead of the hand to minimise search distances.
+ 		 */
+-		if (pkmap_count[i] != 1)
+-			continue;
+-		pkmap_count[i] = 0;
++		for (i = 0, nr = 0; i < LAST_PKMAP && nr < TLB_BATCH;
++				i++, pos2 = (pos2 + 1) & LAST_PKMAP_MASK) {
+ 
+-		/* sanity check */
+-		BUG_ON(pte_none(pkmap_page_table[i]));
++			flush = pkmap_try_free(pos2);
++			if (flush < 0)
++				continue;
++
++			if (!flush) {
++				atomic_t *counter = &pkmap_count[pos2];
++				VM_BUG_ON(atomic_read(counter) != 0);
++				atomic_set(counter, 2);
++				pkmap_put(counter);
++			} else
++				entries[nr++] = pos2;
++		}
++		flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
++
++		for (i = 0; i < nr; i++) {
++			atomic_t *counter = &pkmap_count[entries[i]];
++			VM_BUG_ON(atomic_read(counter) != 0);
++			atomic_set(counter, 2);
++			pkmap_put(counter);
++		}
++#endif
++	}
++	return pos;
++}
++
++static unsigned long pkmap_insert(struct page *page)
++{
++	int pos = pkmap_get_free();
++	unsigned long vaddr = PKMAP_ADDR(pos);
++	pte_t *ptep = &pkmap_page_table[pos];
++	pte_t entry = mk_pte(page, kmap_prot);
++	atomic_t *counter = &pkmap_count[pos];
++
++	VM_BUG_ON(atomic_read(counter) != 0);
+ 
++	set_pte_at(&init_mm, vaddr, ptep, entry);
++	if (unlikely(!__set_page_address(page, (void *)vaddr, pos))) {
+ 		/*
+-		 * Don't need an atomic fetch-and-clear op here;
+-		 * no-one has the page mapped, and cannot get at
+-		 * its virtual address (and hence PTE) without first
+-		 * getting the kmap_lock (which is held here).
+-		 * So no dangers, even with speculative execution.
++		 * concurrent pkmap_inserts for this page -
++		 * the other won the race, release this entry.
++		 *
++		 * we can still clear the pte without a tlb flush since
++		 * it couldn't have been used yet.
+ 		 */
+-		page = pte_page(pkmap_page_table[i]);
+-		pte_clear(&init_mm, (unsigned long)page_address(page),
+-			  &pkmap_page_table[i]);
++		pte_clear(&init_mm, vaddr, ptep);
++		VM_BUG_ON(atomic_read(counter) != 0);
++		atomic_set(counter, 2);
++		pkmap_put(counter);
++		vaddr = 0;
++	} else
++		atomic_set(counter, 2);
+ 
+-		set_page_address(page, NULL);
+-		need_flush = 1;
+-	}
+-	if (need_flush)
+-		flush_tlb_kernel_range(PKMAP_ADDR(0), PKMAP_ADDR(LAST_PKMAP));
++	return vaddr;
+ }
+ 
+-/**
+- * kmap_flush_unused - flush all unused kmap mappings in order to remove stray mappings
++/*
++ * Flush all unused kmap mappings in order to remove stray mappings.
+  */
+ void kmap_flush_unused(void)
+ {
+-	spin_lock(&kmap_lock);
+-	flush_all_zero_pkmaps();
+-	spin_unlock(&kmap_lock);
++	WARN_ON_ONCE(1);
+ }
+ 
+-static inline unsigned long map_new_virtual(struct page *page)
++/*
++ * Avoid starvation deadlock by limiting the number of tasks that can obtain a
++ * kmap to (LAST_PKMAP - KM_TYPE_NR*NR_CPUS)/2.
++ */
++static void kmap_account(void)
+ {
+-	unsigned long vaddr;
+-	int count;
++	int weight;
+ 
+-start:
+-	count = LAST_PKMAP;
+-	/* Find an empty entry */
+-	for (;;) {
+-		last_pkmap_nr = (last_pkmap_nr + 1) & LAST_PKMAP_MASK;
+-		if (!last_pkmap_nr) {
+-			flush_all_zero_pkmaps();
+-			count = LAST_PKMAP;
+-		}
+-		if (!pkmap_count[last_pkmap_nr])
+-			break;	/* Found a usable entry */
+-		if (--count)
+-			continue;
++#ifndef CONFIG_PREEMPT_RT
++	if (in_interrupt()) {
++		/* irqs can always get them */
++		weight = -1;
++	} else
++#endif
++	if (current->flags & PF_KMAP) {
++		current->flags &= ~PF_KMAP;
++		/* we already accounted the second */
++		weight = 0;
++	} else {
++		/* mark 1, account 2 */
++		current->flags |= PF_KMAP;
++		weight = 2;
++	}
+ 
++	if (weight > 0) {
+ 		/*
+-		 * Sleep for somebody else to unmap their entries
++		 * reserve KM_TYPE_NR maps per CPU for interrupt context
+ 		 */
+-		{
+-			DECLARE_WAITQUEUE(wait, current);
+-
+-			__set_current_state(TASK_UNINTERRUPTIBLE);
+-			add_wait_queue(&pkmap_map_wait, &wait);
+-			spin_unlock(&kmap_lock);
+-			schedule();
+-			remove_wait_queue(&pkmap_map_wait, &wait);
+-			spin_lock(&kmap_lock);
+-
+-			/* Somebody else might have mapped it while we slept */
+-			if (page_address(page))
+-				return (unsigned long)page_address(page);
++		const int target = LAST_PKMAP
++#ifndef CONFIG_PREEMPT_RT
++				- KM_TYPE_NR*NR_CPUS
++#endif
++			;
+ 
+-			/* Re-start */
+-			goto start;
++again:
++		wait_event(pkmap_wait,
++			atomic_read(&pkmap_users) + weight <= target);
++
++		if (atomic_add_return(weight, &pkmap_users) > target) {
++			atomic_sub(weight, &pkmap_users);
++			goto again;
+ 		}
+ 	}
+-	vaddr = PKMAP_ADDR(last_pkmap_nr);
+-	set_pte_at(&init_mm, vaddr,
+-		   &(pkmap_page_table[last_pkmap_nr]), mk_pte(page, kmap_prot));
++}
+ 
+-	pkmap_count[last_pkmap_nr] = 1;
+-	set_page_address(page, (void *)vaddr);
++static void kunmap_account(void)
++{
++	int weight;
+ 
+-	return vaddr;
++#ifndef CONFIG_PREEMPT_RT
++	if (in_irq()) {
++		weight = -1;
++	} else
++#endif
++	if (current->flags & PF_KMAP) {
++		/* there was only 1 kmap, un-account both */
++		current->flags &= ~PF_KMAP;
++		weight = 2;
++	} else {
++		/* there were two kmaps, un-account per kunmap */
++		weight = 1;
++	}
++
++	if (weight > 0)
++		atomic_sub(weight, &pkmap_users);
++	wake_up(&pkmap_wait);
+ }
+ 
+-/**
+- * kmap_high - map a highmem page into memory
+- * @page: &struct page to map
+- *
+- * Returns the page's virtual memory address.
+- *
+- * We cannot call this from interrupts, as it may block.
+- */
+-void *kmap_high(struct page *page)
++ void *kmap_high(struct page *page)
+ {
+ 	unsigned long vaddr;
+ 
+-	/*
+-	 * For highmem pages, we can't trust "virtual" until
+-	 * after we have the lock.
+-	 */
+-	spin_lock(&kmap_lock);
++	kmap_account();
++again:
+ 	vaddr = (unsigned long)page_address(page);
++	if (vaddr) {
++		atomic_t *counter = &pkmap_count[PKMAP_NR(vaddr)];
++		if (atomic_inc_not_zero(counter)) {
++			/*
++			 * atomic_inc_not_zero implies a (memory) barrier on success
++			 * so page address will be reloaded.
++			 */
++			unsigned long vaddr2 = (unsigned long)page_address(page);
++			if (likely(vaddr == vaddr2))
++				return (void *)vaddr;
++
++			/*
++			 * Oops, we got someone else.
++			 *
++			 * This can happen if we get preempted after
++			 * page_address() and before atomic_inc_not_zero()
++			 * and during that preemption this slot is freed and
++			 * reused.
++			 */
++			pkmap_put(counter);
++			goto again;
++		}
++	}
++
++	vaddr = pkmap_insert(page);
+ 	if (!vaddr)
+-		vaddr = map_new_virtual(page);
+-	pkmap_count[PKMAP_NR(vaddr)]++;
+-	BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
+-	spin_unlock(&kmap_lock);
+-	return (void*) vaddr;
++		goto again;
++
++	return (void *)vaddr;
+ }
+ 
+ EXPORT_SYMBOL(kmap_high);
+ 
+-/**
+- * kunmap_high - map a highmem page into memory
+- * @page: &struct page to unmap
+- */
+-void kunmap_high(struct page *page)
++ void kunmap_high(struct page *page)
+ {
+-	unsigned long vaddr;
+-	unsigned long nr;
+-	int need_wakeup;
+-
+-	spin_lock(&kmap_lock);
+-	vaddr = (unsigned long)page_address(page);
++	unsigned long vaddr = (unsigned long)page_address(page);
+ 	BUG_ON(!vaddr);
+-	nr = PKMAP_NR(vaddr);
+-
+-	/*
+-	 * A count must never go down to zero
+-	 * without a TLB flush!
+-	 */
+-	need_wakeup = 0;
+-	switch (--pkmap_count[nr]) {
+-	case 0:
+-		BUG();
+-	case 1:
+-		/*
+-		 * Avoid an unnecessary wake_up() function call.
+-		 * The common case is pkmap_count[] == 1, but
+-		 * no waiters.
+-		 * The tasks queued in the wait-queue are guarded
+-		 * by both the lock in the wait-queue-head and by
+-		 * the kmap_lock.  As the kmap_lock is held here,
+-		 * no need for the wait-queue-head's lock.  Simply
+-		 * test if the queue is empty.
+-		 */
+-		need_wakeup = waitqueue_active(&pkmap_map_wait);
+-	}
+-	spin_unlock(&kmap_lock);
+-
+-	/* do wake-up, if needed, race-free outside of the spin lock */
+-	if (need_wakeup)
+-		wake_up(&pkmap_map_wait);
++	pkmap_put(&pkmap_count[PKMAP_NR(vaddr)]);
++	kunmap_account();
+ }
+ 
+ EXPORT_SYMBOL(kunmap_high);
+@@ -247,19 +349,13 @@ EXPORT_SYMBOL(kunmap_high);
+ #define PA_HASH_ORDER	7
+ 
+ /*
+- * Describes one page->virtual association
++ * Describes one page->virtual address association.
+  */
+-struct page_address_map {
++static struct page_address_map {
+ 	struct page *page;
+ 	void *virtual;
+ 	struct list_head list;
+-};
+-
+-/*
+- * page_address_map freelist, allocated from page_address_maps.
+- */
+-static struct list_head page_address_pool;	/* freelist */
+-static spinlock_t pool_lock;			/* protects page_address_pool */
++} page_address_maps[LAST_PKMAP];
+ 
+ /*
+  * Hash table bucket
+@@ -280,29 +376,37 @@ static struct page_address_slot *page_sl
+  *
+  * Returns the page's virtual address.
+  */
+-void *page_address(struct page *page)
+-{
+-	unsigned long flags;
+-	void *ret;
+-	struct page_address_slot *pas;
+ 
+-	if (!PageHighMem(page))
+-		return lowmem_page_address(page);
++static void *__page_address(struct page_address_slot *pas, struct page *page)
++{
++	void *ret = NULL;
+ 
+-	pas = page_slot(page);
+-	ret = NULL;
+-	spin_lock_irqsave(&pas->lock, flags);
+ 	if (!list_empty(&pas->lh)) {
+ 		struct page_address_map *pam;
+ 
+ 		list_for_each_entry(pam, &pas->lh, list) {
+ 			if (pam->page == page) {
+ 				ret = pam->virtual;
+-				goto done;
++				break;
+ 			}
+ 		}
+ 	}
+-done:
++
++	return ret;
++}
++
++void *page_address(struct page *page)
++{
++	unsigned long flags;
++	void *ret;
++	struct page_address_slot *pas;
++
++	if (!PageHighMem(page))
++		return lowmem_page_address(page);
++
++	pas = page_slot(page);
++	spin_lock_irqsave(&pas->lock, flags);
++	ret = __page_address(pas, page);
+ 	spin_unlock_irqrestore(&pas->lock, flags);
+ 	return ret;
+ }
+@@ -314,62 +418,90 @@ EXPORT_SYMBOL(page_address);
+  * @page: &struct page to set
+  * @virtual: virtual address to use
+  */
+-void set_page_address(struct page *page, void *virtual)
++static int __set_page_address(struct page *page, void *virtual, int pos)
+ {
++	int ret = 0;
+ 	unsigned long flags;
+ 	struct page_address_slot *pas;
+ 	struct page_address_map *pam;
+ 
+-	BUG_ON(!PageHighMem(page));
++	VM_BUG_ON(!PageHighMem(page));
++	VM_BUG_ON(atomic_read(&pkmap_count[pos]) != 0);
++	VM_BUG_ON(pos < 0 || pos >= LAST_PKMAP);
+ 
+ 	pas = page_slot(page);
+-	if (virtual) {		/* Add */
+-		BUG_ON(list_empty(&page_address_pool));
++	pam = &page_address_maps[pos];
+ 
+-		spin_lock_irqsave(&pool_lock, flags);
+-		pam = list_entry(page_address_pool.next,
+-				struct page_address_map, list);
+-		list_del(&pam->list);
+-		spin_unlock_irqrestore(&pool_lock, flags);
+-
+-		pam->page = page;
+-		pam->virtual = virtual;
+-
+-		spin_lock_irqsave(&pas->lock, flags);
+-		list_add_tail(&pam->list, &pas->lh);
+-		spin_unlock_irqrestore(&pas->lock, flags);
+-	} else {		/* Remove */
+-		spin_lock_irqsave(&pas->lock, flags);
+-		list_for_each_entry(pam, &pas->lh, list) {
+-			if (pam->page == page) {
+-				list_del(&pam->list);
+-				spin_unlock_irqrestore(&pas->lock, flags);
+-				spin_lock_irqsave(&pool_lock, flags);
+-				list_add_tail(&pam->list, &page_address_pool);
+-				spin_unlock_irqrestore(&pool_lock, flags);
+-				goto done;
+-			}
++	spin_lock_irqsave(&pas->lock, flags);
++	if (virtual) { /* add */
++		VM_BUG_ON(!list_empty(&pam->list));
++
++		if (!__page_address(pas, page)) {
++			pam->page = page;
++			pam->virtual = virtual;
++			list_add_tail(&pam->list, &pas->lh);
++			ret = 1;
++		}
++	} else { /* remove */
++		if (!list_empty(&pam->list)) {
++			list_del_init(&pam->list);
++			ret = 1;
+ 		}
+-		spin_unlock_irqrestore(&pas->lock, flags);
+ 	}
+-done:
+-	return;
++	spin_unlock_irqrestore(&pas->lock, flags);
++
++	return ret;
+ }
+ 
+-static struct page_address_map page_address_maps[LAST_PKMAP];
++int set_page_address(struct page *page, void *virtual)
++{
++	/*
++	 * set_page_address is not supposed to be called when using
++	 * hashed virtual addresses.
++	 */
++	BUG();
++	return 0;
++}
+ 
+-void __init page_address_init(void)
++void __init __page_address_init(void)
+ {
+ 	int i;
+ 
+-	INIT_LIST_HEAD(&page_address_pool);
+ 	for (i = 0; i < ARRAY_SIZE(page_address_maps); i++)
+-		list_add(&page_address_maps[i].list, &page_address_pool);
++		INIT_LIST_HEAD(&page_address_maps[i].list);
++
+ 	for (i = 0; i < ARRAY_SIZE(page_address_htable); i++) {
+ 		INIT_LIST_HEAD(&page_address_htable[i].lh);
+ 		spin_lock_init(&page_address_htable[i].lock);
+ 	}
+-	spin_lock_init(&pool_lock);
++}
++
++#elif defined (CONFIG_HIGHMEM) /* HASHED_PAGE_VIRTUAL */
++
++static int __set_page_address(struct page *page, void *virtual, int pos)
++{
++	return set_page_address(page, virtual);
++}
++
++#endif	/* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
++
++#if defined(CONFIG_HIGHMEM) || defined(HASHED_PAGE_VIRTUAL)
++
++void __init page_address_init(void)
++{
++#ifdef CONFIG_HIGHMEM
++	int i;
++
++	for (i = 0; i < ARRAY_SIZE(pkmap_count); i++)
++		atomic_set(&pkmap_count[i], 1);
++	atomic_set(&pkmap_hand, 0);
++	atomic_set(&pkmap_free, LAST_PKMAP);
++	atomic_set(&pkmap_users, 0);
++#endif
++
++#ifdef HASHED_PAGE_VIRTUAL
++	__page_address_init();
++#endif
+ }
+ 
+ #endif	/* defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) */
+Index: linux-2.6-tip/kernel/rcutorture.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/rcutorture.c
++++ linux-2.6-tip/kernel/rcutorture.c
+@@ -751,7 +751,7 @@ rcu_torture_reader(void *arg)
+ 		if (p == NULL) {
+ 			/* Wait for rcu_torture_writer to get underway */
+ 			cur_ops->readunlock(idx);
+-			schedule_timeout_interruptible(HZ);
++			schedule_timeout_interruptible(round_jiffies_relative(HZ));
+ 			continue;
+ 		}
+ 		if (p->rtort_mbtest == 0)
+Index: linux-2.6-tip/include/linux/netpoll.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/netpoll.h
++++ linux-2.6-tip/include/linux/netpoll.h
+@@ -77,7 +77,7 @@ static inline void *netpoll_poll_lock(st
+ 	rcu_read_lock(); /* deal with race on ->npinfo */
+ 	if (dev && dev->npinfo) {
+ 		spin_lock(&napi->poll_lock);
+-		napi->poll_owner = smp_processor_id();
++		napi->poll_owner = raw_smp_processor_id();
+ 		return napi;
+ 	}
+ 	return NULL;
+Index: linux-2.6-tip/net/irda/irttp.c
+===================================================================
+--- linux-2.6-tip.orig/net/irda/irttp.c
++++ linux-2.6-tip/net/irda/irttp.c
+@@ -1453,6 +1453,7 @@ struct tsap_cb *irttp_dup(struct tsap_cb
+ 	}
+ 	/* Dup */
+ 	memcpy(new, orig, sizeof(struct tsap_cb));
++	spin_lock_init(&new->lock);
+ 
+ 	/* We don't need the old instance any more */
+ 	spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags);
+Index: linux-2.6-tip/init/Makefile
+===================================================================
+--- linux-2.6-tip.orig/init/Makefile
++++ linux-2.6-tip/init/Makefile
+@@ -33,4 +33,5 @@ silent_chk_compile.h = :
+ include/linux/compile.h: FORCE
+ 	@$($(quiet)chk_compile.h)
+ 	$(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkcompile_h $@ \
+-	"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CC) $(KBUILD_CFLAGS)"
++	"$(UTS_MACHINE)" "$(CONFIG_SMP)" "$(CONFIG_PREEMPT)" "$(CONFIG_PREEMPT_RT)" \
++	"$(CC) $(KBUILD_CFLAGS)"
+Index: linux-2.6-tip/scripts/mkcompile_h
+===================================================================
+--- linux-2.6-tip.orig/scripts/mkcompile_h
++++ linux-2.6-tip/scripts/mkcompile_h
+@@ -2,7 +2,8 @@ TARGET=$1
+ ARCH=$2
+ SMP=$3
+ PREEMPT=$4
+-CC=$5
++PREEMPT_RT=$5
++CC=$6
+ 
+ vecho() { [ "${quiet}" = "silent_" ] || echo "$@" ; }
+ 
+@@ -45,6 +46,7 @@ UTS_VERSION="#$VERSION"
+ CONFIG_FLAGS=""
+ if [ -n "$SMP" ] ; then CONFIG_FLAGS="SMP"; fi
+ if [ -n "$PREEMPT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS PREEMPT"; fi
++if [ -n "$PREEMPT_RT" ] ; then CONFIG_FLAGS="$CONFIG_FLAGS RT"; fi
+ UTS_VERSION="$UTS_VERSION $CONFIG_FLAGS $TIMESTAMP"
+ 
+ # Truncate to maximum length
+Index: linux-2.6-tip/include/linux/quicklist.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/quicklist.h
++++ linux-2.6-tip/include/linux/quicklist.h
+@@ -18,7 +18,7 @@ struct quicklist {
+ 	int nr_pages;
+ };
+ 
+-DECLARE_PER_CPU(struct quicklist, quicklist)[CONFIG_NR_QUICK];
++DECLARE_PER_CPU_LOCKED(struct quicklist, quicklist)[CONFIG_NR_QUICK];
+ 
+ /*
+  * The two key functions quicklist_alloc and quicklist_free are inline so
+@@ -30,19 +30,27 @@ DECLARE_PER_CPU(struct quicklist, quickl
+  * The fast patch in quicklist_alloc touched only a per cpu cacheline and
+  * the first cacheline of the page itself. There is minmal overhead involved.
+  */
+-static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *))
++static inline void *__quicklist_alloc(struct quicklist *q)
+ {
+-	struct quicklist *q;
+-	void **p = NULL;
++	void **p = q->page;
+ 
+-	q =&get_cpu_var(quicklist)[nr];
+-	p = q->page;
+ 	if (likely(p)) {
+ 		q->page = p[0];
+ 		p[0] = NULL;
+ 		q->nr_pages--;
+ 	}
+-	put_cpu_var(quicklist);
++	return p;
++}
++
++static inline void *quicklist_alloc(int nr, gfp_t flags, void (*ctor)(void *))
++{
++	struct quicklist *q;
++	void **p;
++	int cpu;
++
++	q = &get_cpu_var_locked(quicklist, &cpu)[nr];
++	p = __quicklist_alloc(q);
++	put_cpu_var_locked(quicklist, cpu);
+ 	if (likely(p))
+ 		return p;
+ 
+@@ -56,12 +64,13 @@ static inline void __quicklist_free(int 
+ 	struct page *page)
+ {
+ 	struct quicklist *q;
++	int cpu;
+ 
+-	q = &get_cpu_var(quicklist)[nr];
++	q = &get_cpu_var_locked(quicklist, &cpu)[nr];
+ 	*(void **)p = q->page;
+ 	q->page = p;
+ 	q->nr_pages++;
+-	put_cpu_var(quicklist);
++	put_cpu_var_locked(quicklist, cpu);
+ }
+ 
+ static inline void quicklist_free(int nr, void (*dtor)(void *), void *pp)
+Index: linux-2.6-tip/kernel/cgroup.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/cgroup.c
++++ linux-2.6-tip/kernel/cgroup.c
+@@ -155,7 +155,7 @@ list_for_each_entry(_root, &roots, root_
+ /* the list of cgroups eligible for automatic release. Protected by
+  * release_list_lock */
+ static LIST_HEAD(release_list);
+-static DEFINE_SPINLOCK(release_list_lock);
++static DEFINE_RAW_SPINLOCK(release_list_lock);
+ static void cgroup_release_agent(struct work_struct *work);
+ static DECLARE_WORK(release_agent_work, cgroup_release_agent);
+ static void check_for_release(struct cgroup *cgrp);
+Index: linux-2.6-tip/include/linux/proportions.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/proportions.h
++++ linux-2.6-tip/include/linux/proportions.h
+@@ -58,7 +58,7 @@ struct prop_local_percpu {
+ 	 */
+ 	int shift;
+ 	unsigned long period;
+-	spinlock_t lock;		/* protect the snapshot state */
++	raw_spinlock_t lock;		/* protect the snapshot state */
+ };
+ 
+ int prop_local_init_percpu(struct prop_local_percpu *pl);
+@@ -106,11 +106,11 @@ struct prop_local_single {
+ 	 */
+ 	unsigned long period;
+ 	int shift;
+-	spinlock_t lock;		/* protect the snapshot state */
++	raw_spinlock_t lock;		/* protect the snapshot state */
+ };
+ 
+ #define INIT_PROP_LOCAL_SINGLE(name)			\
+-{	.lock = __SPIN_LOCK_UNLOCKED(name.lock),	\
++{	.lock = RAW_SPIN_LOCK_UNLOCKED(name.lock),	\
+ }
+ 
+ int prop_local_init_single(struct prop_local_single *pl);
+Index: linux-2.6-tip/arch/arm/mach-at91/gpio.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mach-at91/gpio.c
++++ linux-2.6-tip/arch/arm/mach-at91/gpio.c
+@@ -368,12 +368,18 @@ static int gpio_irq_type(unsigned pin, u
+ 	}
+ }
+ 
++static void gpio_irq_ack_noop(unsigned int irq)
++{
++	/* Dummy function.  */
++}
++
+ static struct irq_chip gpio_irqchip = {
+ 	.name		= "GPIO",
+ 	.mask		= gpio_irq_mask,
+ 	.unmask		= gpio_irq_unmask,
+ 	.set_type	= gpio_irq_type,
+ 	.set_wake	= gpio_irq_set_wake,
++	.ack            = gpio_irq_ack_noop,
+ };
+ 
+ static void gpio_irq_handler(unsigned irq, struct irq_desc *desc)
+@@ -520,7 +526,7 @@ void __init at91_gpio_irq_setup(void)
+ 			 * shorter, and the AIC handles interrupts sanely.
+ 			 */
+ 			set_irq_chip(pin, &gpio_irqchip);
+-			set_irq_handler(pin, handle_simple_irq);
++			set_irq_handler(pin, handle_edge_irq);
+ 			set_irq_flags(pin, IRQF_VALID);
+ 		}
+ 
+Index: linux-2.6-tip/drivers/char/vt.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/char/vt.c
++++ linux-2.6-tip/drivers/char/vt.c
+@@ -2538,7 +2538,7 @@ static struct console vt_console_driver 
+ 	.write		= vt_console_print,
+ 	.device		= vt_console_device,
+ 	.unblank	= unblank_screen,
+-	.flags		= CON_PRINTBUFFER,
++	.flags		= CON_PRINTBUFFER | CON_ATOMIC,
+ 	.index		= -1,
+ };
+ #endif
+Index: linux-2.6-tip/scripts/.gitignore
+===================================================================
+--- linux-2.6-tip.orig/scripts/.gitignore
++++ linux-2.6-tip/scripts/.gitignore
+@@ -8,3 +8,4 @@ pnmtologo
+ bin2c
+ unifdef
+ binoffset
++testlpp
+Index: linux-2.6-tip/drivers/usb/core/hcd.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/usb/core/hcd.c
++++ linux-2.6-tip/drivers/usb/core/hcd.c
+@@ -1723,7 +1723,7 @@ irqreturn_t usb_hcd_irq (int irq, void *
+ 	 * when the first handler doesn't use it.  So let's just
+ 	 * assume it's never used.
+ 	 */
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 
+ 	if (unlikely(hcd->state == HC_STATE_HALT ||
+ 		     !test_bit(HCD_FLAG_HW_ACCESSIBLE, &hcd->flags))) {
+@@ -1738,7 +1738,7 @@ irqreturn_t usb_hcd_irq (int irq, void *
+ 		rc = IRQ_HANDLED;
+ 	}
+ 
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 	return rc;
+ }
+ 
+Index: linux-2.6-tip/fs/namespace.c
+===================================================================
+--- linux-2.6-tip.orig/fs/namespace.c
++++ linux-2.6-tip/fs/namespace.c
+@@ -180,13 +180,13 @@ struct mnt_writer {
+ 	unsigned long count;
+ 	struct vfsmount *mnt;
+ } ____cacheline_aligned_in_smp;
+-static DEFINE_PER_CPU(struct mnt_writer, mnt_writers);
++static DEFINE_PER_CPU_LOCKED(struct mnt_writer, mnt_writers);
+ 
+ static int __init init_mnt_writers(void)
+ {
+ 	int cpu;
+ 	for_each_possible_cpu(cpu) {
+-		struct mnt_writer *writer = &per_cpu(mnt_writers, cpu);
++		struct mnt_writer *writer = &per_cpu_var_locked(mnt_writers, cpu);
+ 		spin_lock_init(&writer->lock);
+ 		lockdep_set_class(&writer->lock, &writer->lock_class);
+ 		writer->count = 0;
+@@ -201,7 +201,7 @@ static void unlock_mnt_writers(void)
+ 	struct mnt_writer *cpu_writer;
+ 
+ 	for_each_possible_cpu(cpu) {
+-		cpu_writer = &per_cpu(mnt_writers, cpu);
++		cpu_writer = &per_cpu_var_locked(mnt_writers, cpu);
+ 		spin_unlock(&cpu_writer->lock);
+ 	}
+ }
+@@ -253,8 +253,8 @@ int mnt_want_write(struct vfsmount *mnt)
+ {
+ 	int ret = 0;
+ 	struct mnt_writer *cpu_writer;
+-
+-	cpu_writer = &get_cpu_var(mnt_writers);
++	int cpu = 0;
++	cpu_writer = &get_cpu_var_locked(mnt_writers, &cpu);
+ 	spin_lock(&cpu_writer->lock);
+ 	if (__mnt_is_readonly(mnt)) {
+ 		ret = -EROFS;
+@@ -264,7 +264,7 @@ int mnt_want_write(struct vfsmount *mnt)
+ 	cpu_writer->count++;
+ out:
+ 	spin_unlock(&cpu_writer->lock);
+-	put_cpu_var(mnt_writers);
++	put_cpu_var_locked(mnt_writers, cpu);
+ 	return ret;
+ }
+ EXPORT_SYMBOL_GPL(mnt_want_write);
+@@ -275,7 +275,7 @@ static void lock_mnt_writers(void)
+ 	struct mnt_writer *cpu_writer;
+ 
+ 	for_each_possible_cpu(cpu) {
+-		cpu_writer = &per_cpu(mnt_writers, cpu);
++		cpu_writer = &per_cpu_var_locked(mnt_writers, cpu);
+ 		spin_lock(&cpu_writer->lock);
+ 		__clear_mnt_count(cpu_writer);
+ 		cpu_writer->mnt = NULL;
+@@ -333,8 +333,8 @@ void mnt_drop_write(struct vfsmount *mnt
+ {
+ 	int must_check_underflow = 0;
+ 	struct mnt_writer *cpu_writer;
+-
+-	cpu_writer = &get_cpu_var(mnt_writers);
++	int cpu = 0;
++	cpu_writer = &get_cpu_var_locked(mnt_writers, &cpu);
+ 	spin_lock(&cpu_writer->lock);
+ 
+ 	use_cpu_writer_for_mount(cpu_writer, mnt);
+@@ -361,7 +361,7 @@ void mnt_drop_write(struct vfsmount *mnt
+ 	 * __mnt_writers can underflow.  Without it,
+ 	 * we could theoretically wrap __mnt_writers.
+ 	 */
+-	put_cpu_var(mnt_writers);
++	put_cpu_var_locked(mnt_writers, cpu);
+ }
+ EXPORT_SYMBOL_GPL(mnt_drop_write);
+ 
+@@ -613,7 +613,8 @@ static inline void __mntput(struct vfsmo
+ 	 * can come in.
+ 	 */
+ 	for_each_possible_cpu(cpu) {
+-		struct mnt_writer *cpu_writer = &per_cpu(mnt_writers, cpu);
++		struct mnt_writer *cpu_writer = &per_cpu_var_locked(mnt_writers, cpu);
++
+ 		spin_lock(&cpu_writer->lock);
+ 		if (cpu_writer->mnt != mnt) {
+ 			spin_unlock(&cpu_writer->lock);
+Index: linux-2.6-tip/drivers/ata/libata-sff.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/ata/libata-sff.c
++++ linux-2.6-tip/drivers/ata/libata-sff.c
+@@ -824,9 +824,9 @@ unsigned int ata_sff_data_xfer_noirq(str
+ 	unsigned long flags;
+ 	unsigned int consumed;
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	consumed = ata_sff_data_xfer(dev, buf, buflen, rw);
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 
+ 	return consumed;
+ }
+@@ -865,7 +865,7 @@ static void ata_pio_sector(struct ata_qu
+ 		unsigned long flags;
+ 
+ 		/* FIXME: use a bounce buffer */
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		buf = kmap_atomic(page, KM_IRQ0);
+ 
+ 		/* do the actual data transfer */
+@@ -873,7 +873,7 @@ static void ata_pio_sector(struct ata_qu
+ 				       do_write);
+ 
+ 		kunmap_atomic(buf, KM_IRQ0);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 	} else {
+ 		buf = page_address(page);
+ 		ap->ops->sff_data_xfer(qc->dev, buf + offset, qc->sect_size,
+@@ -1003,7 +1003,7 @@ next_sg:
+ 		unsigned long flags;
+ 
+ 		/* FIXME: use bounce buffer */
+-		local_irq_save(flags);
++		local_irq_save_nort(flags);
+ 		buf = kmap_atomic(page, KM_IRQ0);
+ 
+ 		/* do the actual data transfer */
+@@ -1011,7 +1011,7 @@ next_sg:
+ 								count, rw);
+ 
+ 		kunmap_atomic(buf, KM_IRQ0);
+-		local_irq_restore(flags);
++		local_irq_restore_nort(flags);
+ 	} else {
+ 		buf = page_address(page);
+ 		consumed = ap->ops->sff_data_xfer(dev,  buf + offset,
+Index: linux-2.6-tip/kernel/time/tick-sched.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/time/tick-sched.c
++++ linux-2.6-tip/kernel/time/tick-sched.c
+@@ -248,13 +248,7 @@ void tick_nohz_stop_sched_tick(int inidl
+ 		goto end;
+ 
+ 	if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
+-		static int ratelimit;
+-
+-		if (ratelimit < 10) {
+-			printk(KERN_ERR "NOHZ: local_softirq_pending %02x\n",
+-			       local_softirq_pending());
+-			ratelimit++;
+-		}
++		softirq_check_pending_idle();
+ 		goto end;
+ 	}
+ 
+@@ -687,6 +681,7 @@ void tick_setup_sched_timer(void)
+ 	 * Emulate tick processing via per-CPU hrtimers:
+ 	 */
+ 	hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
++	ts->sched_timer.irqsafe = 1;
+ 	ts->sched_timer.function = tick_sched_timer;
+ 
+ 	/* Get the next period (per cpu) */
+Index: linux-2.6-tip/kernel/res_counter.c
+===================================================================
+--- linux-2.6-tip.orig/kernel/res_counter.c
++++ linux-2.6-tip/kernel/res_counter.c
+@@ -14,6 +14,7 @@
+ #include <linux/res_counter.h>
+ #include <linux/uaccess.h>
+ #include <linux/mm.h>
++#include <linux/interrupt.h>
+ 
+ void res_counter_init(struct res_counter *counter, struct res_counter *parent)
+ {
+@@ -43,7 +44,7 @@ int res_counter_charge(struct res_counte
+ 	struct res_counter *c, *u;
+ 
+ 	*limit_fail_at = NULL;
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	for (c = counter; c != NULL; c = c->parent) {
+ 		spin_lock(&c->lock);
+ 		ret = res_counter_charge_locked(c, val);
+@@ -62,7 +63,7 @@ undo:
+ 		spin_unlock(&u->lock);
+ 	}
+ done:
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 	return ret;
+ }
+ 
+@@ -79,13 +80,13 @@ void res_counter_uncharge(struct res_cou
+ 	unsigned long flags;
+ 	struct res_counter *c;
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 	for (c = counter; c != NULL; c = c->parent) {
+ 		spin_lock(&c->lock);
+ 		res_counter_uncharge_locked(c, val);
+ 		spin_unlock(&c->lock);
+ 	}
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ }
+ 
+ 
+Index: linux-2.6-tip/arch/x86/mm/gup.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/mm/gup.c
++++ linux-2.6-tip/arch/x86/mm/gup.c
+@@ -77,13 +77,13 @@ static noinline int gup_pte_range(pmd_t 
+ 	if (write)
+ 		mask |= _PAGE_RW;
+ 
+-	ptep = pte_offset_map(&pmd, addr);
++	ptep = pte_offset_map_direct(&pmd, addr);
+ 	do {
+ 		pte_t pte = gup_get_pte(ptep);
+ 		struct page *page;
+ 
+ 		if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
+-			pte_unmap(ptep);
++			pte_unmap_direct(ptep);
+ 			return 0;
+ 		}
+ 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
+@@ -93,7 +93,7 @@ static noinline int gup_pte_range(pmd_t 
+ 		(*nr)++;
+ 
+ 	} while (ptep++, addr += PAGE_SIZE, addr != end);
+-	pte_unmap(ptep - 1);
++	pte_unmap_direct(ptep - 1);
+ 
+ 	return 1;
+ }
+Index: linux-2.6-tip/lib/scatterlist.c
+===================================================================
+--- linux-2.6-tip.orig/lib/scatterlist.c
++++ linux-2.6-tip/lib/scatterlist.c
+@@ -9,6 +9,7 @@
+ #include <linux/module.h>
+ #include <linux/scatterlist.h>
+ #include <linux/highmem.h>
++#include <linux/interrupt.h>
+ 
+ /**
+  * sg_next - return the next scatterlist entry in a list
+@@ -392,7 +393,7 @@ void sg_miter_stop(struct sg_mapping_ite
+ 		miter->__offset += miter->consumed;
+ 
+ 		if (miter->__flags & SG_MITER_ATOMIC) {
+-			WARN_ON(!irqs_disabled());
++			WARN_ON_NONRT(!irqs_disabled());
+ 			kunmap_atomic(miter->addr, KM_BIO_SRC_IRQ);
+ 		} else
+ 			kunmap(miter->page);
+@@ -426,7 +427,7 @@ static size_t sg_copy_buffer(struct scat
+ 
+ 	sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC);
+ 
+-	local_irq_save(flags);
++	local_irq_save_nort(flags);
+ 
+ 	while (sg_miter_next(&miter) && offset < buflen) {
+ 		unsigned int len;
+@@ -445,7 +446,7 @@ static size_t sg_copy_buffer(struct scat
+ 
+ 	sg_miter_stop(&miter);
+ 
+-	local_irq_restore(flags);
++	local_irq_restore_nort(flags);
+ 	return offset;
+ }
+ 
+Index: linux-2.6-tip/net/netfilter/core.c
+===================================================================
+--- linux-2.6-tip.orig/net/netfilter/core.c
++++ linux-2.6-tip/net/netfilter/core.c
+@@ -235,7 +235,7 @@ EXPORT_SYMBOL(nf_ct_attach);
+ void (*nf_ct_destroy)(struct nf_conntrack *);
+ EXPORT_SYMBOL(nf_ct_destroy);
+ 
+-void nf_conntrack_destroy(struct nf_conntrack *nfct)
++static void __nf_conntrack_destroy(struct nf_conntrack *nfct)
+ {
+ 	void (*destroy)(struct nf_conntrack *);
+ 
+@@ -245,6 +245,28 @@ void nf_conntrack_destroy(struct nf_conn
+ 	destroy(nfct);
+ 	rcu_read_unlock();
+ }
++
++#ifdef CONFIG_PREEMPT_RT
++/*
++ * nf_contrack_destroy is called with preemption disabled
++ * and will call functions that might schedule in PREEMPT_RT.
++ * For PREEMPT_RT we use a rcu callback instead to handle
++ * the destroying.
++ */
++static void nf_conntrack_destroy_rcu(struct rcu_head *rhp)
++{
++	__nf_conntrack_destroy(container_of(rhp, struct nf_conntrack, rcu));
++}
++void nf_conntrack_destroy(struct nf_conntrack *nfct)
++{
++	call_rcu(&nfct->rcu, nf_conntrack_destroy_rcu);
++}
++#else /* !PREEMPT_RT */
++void nf_conntrack_destroy(struct nf_conntrack *nfct)
++{
++	__nf_conntrack_destroy(nfct);
++}
++#endif /* PREEMPT_RT */
+ EXPORT_SYMBOL(nf_conntrack_destroy);
+ #endif /* CONFIG_NF_CONNTRACK */
+ 
+Index: linux-2.6-tip/include/net/netfilter/nf_conntrack_ecache.h
+===================================================================
+--- linux-2.6-tip.orig/include/net/netfilter/nf_conntrack_ecache.h
++++ linux-2.6-tip/include/net/netfilter/nf_conntrack_ecache.h
+@@ -13,6 +13,7 @@
+ 
+ #ifdef CONFIG_NF_CONNTRACK_EVENTS
+ struct nf_conntrack_ecache {
++	spinlock_t lock;
+ 	struct nf_conn *ct;
+ 	unsigned int events;
+ };
+@@ -29,7 +30,8 @@ extern int nf_conntrack_register_notifie
+ extern int nf_conntrack_unregister_notifier(struct notifier_block *nb);
+ 
+ extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
+-extern void __nf_ct_event_cache_init(struct nf_conn *ct);
++extern void __nf_ct_event_cache_init(struct nf_conn *ct,
++				     struct nf_conntrack_ecache *ecache);
+ extern void nf_ct_event_cache_flush(struct net *net);
+ 
+ static inline void
+@@ -40,9 +42,11 @@ nf_conntrack_event_cache(enum ip_conntra
+ 
+ 	local_bh_disable();
+ 	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
++	spin_lock(&ecache->lock);
+ 	if (ct != ecache->ct)
+-		__nf_ct_event_cache_init(ct);
++		__nf_ct_event_cache_init(ct, ecache);
+ 	ecache->events |= event;
++	spin_unlock(&ecache->lock);
+ 	local_bh_enable();
+ }
+ 
+Index: linux-2.6-tip/net/netfilter/nf_conntrack_ecache.c
+===================================================================
+--- linux-2.6-tip.orig/net/netfilter/nf_conntrack_ecache.c
++++ linux-2.6-tip/net/netfilter/nf_conntrack_ecache.c
+@@ -61,20 +61,20 @@ void nf_ct_deliver_cached_events(const s
+ 
+ 	local_bh_disable();
+ 	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
++	spin_lock(&ecache->lock);
+ 	if (ecache->ct == ct)
+ 		__nf_ct_deliver_cached_events(ecache);
++	spin_unlock(&ecache->lock);
+ 	local_bh_enable();
+ }
+ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
+ 
+ /* Deliver cached events for old pending events, if current conntrack != old */
+-void __nf_ct_event_cache_init(struct nf_conn *ct)
++void __nf_ct_event_cache_init(struct nf_conn *ct,
++			      struct nf_conntrack_ecache *ecache)
+ {
+ 	struct net *net = nf_ct_net(ct);
+-	struct nf_conntrack_ecache *ecache;
+ 
+-	/* take care of delivering potentially old events */
+-	ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
+ 	BUG_ON(ecache->ct == ct);
+ 	if (ecache->ct)
+ 		__nf_ct_deliver_cached_events(ecache);
+@@ -93,16 +93,26 @@ void nf_ct_event_cache_flush(struct net 
+ 
+ 	for_each_possible_cpu(cpu) {
+ 		ecache = per_cpu_ptr(net->ct.ecache, cpu);
++		spin_lock(&ecache->lock);
+ 		if (ecache->ct)
+ 			nf_ct_put(ecache->ct);
++		spin_unlock(&ecache->lock);
+ 	}
+ }
+ 
+ int nf_conntrack_ecache_init(struct net *net)
+ {
++	struct nf_conntrack_ecache *ecache;
++	int cpu;
++
+ 	net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
+ 	if (!net->ct.ecache)
+ 		return -ENOMEM;
++
++	for_each_possible_cpu(cpu) {
++		ecache = per_cpu_ptr(net->ct.ecache, cpu);
++		spin_lock_init(&ecache->lock);
++	}
+ 	return 0;
+ }
+ 
+Index: linux-2.6-tip/include/linux/list.h
+===================================================================
+--- linux-2.6-tip.orig/include/linux/list.h
++++ linux-2.6-tip/include/linux/list.h
+@@ -345,6 +345,9 @@ static inline void list_splice_tail_init
+ #define list_first_entry(ptr, type, member) \
+ 	list_entry((ptr)->next, type, member)
+ 
++#define list_last_entry(ptr, type, member) \
++	list_entry((ptr)->prev, type, member)
++
+ /**
+  * list_for_each	-	iterate over a list
+  * @pos:	the &struct list_head to use as a loop cursor.
+Index: linux-2.6-tip/arch/arm/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/Kconfig
++++ linux-2.6-tip/arch/arm/Kconfig
+@@ -838,18 +838,7 @@ config LOCAL_TIMERS
+ 	  accounting to be spread across the timer interval, preventing a
+ 	  "thundering herd" at every timer tick.
+ 
+-config PREEMPT
+-	bool "Preemptible Kernel (EXPERIMENTAL)"
+-	depends on EXPERIMENTAL
+-	help
+-	  This option reduces the latency of the kernel when reacting to
+-	  real-time or interactive events by allowing a low priority process to
+-	  be preempted even if it is in kernel mode executing a system call.
+-	  This allows applications to run more reliably even when the system is
+-	  under load.
+-
+-	  Say Y here if you are building a kernel for a desktop, embedded
+-	  or real-time system.  Say N if you are unsure.
++source kernel/Kconfig.preempt
+ 
+ config HZ
+ 	int
+Index: linux-2.6-tip/arch/arm/common/time-acorn.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/common/time-acorn.c
++++ linux-2.6-tip/arch/arm/common/time-acorn.c
+@@ -75,7 +75,7 @@ ioc_timer_interrupt(int irq, void *dev_i
+ 
+ static struct irqaction ioc_timer_irq = {
+ 	.name		= "timer",
+-	.flags		= IRQF_DISABLED,
++	.flags		= IRQF_DISABLED | IRQF_NODELAY,
+ 	.handler	= ioc_timer_interrupt
+ };
+ 
+Index: linux-2.6-tip/arch/arm/oprofile/op_model_xscale.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/oprofile/op_model_xscale.c
++++ linux-2.6-tip/arch/arm/oprofile/op_model_xscale.c
+@@ -381,8 +381,9 @@ static int xscale_pmu_start(void)
+ {
+ 	int ret;
+ 	u32 pmnc = read_pmnc();
++	int irq_flags = IRQF_DISABLED | IRQF_NODELAY;
+ 
+-	ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, IRQF_DISABLED,
++	ret = request_irq(XSCALE_PMU_IRQ, xscale_pmu_interrupt, irq_flags,
+ 			"XScale PMU", (void *)results);
+ 
+ 	if (ret < 0) {
+Index: linux-2.6-tip/arch/arm/kernel/entry-common.S
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/kernel/entry-common.S
++++ linux-2.6-tip/arch/arm/kernel/entry-common.S
+@@ -55,7 +55,8 @@ work_pending:
+ 	b	ret_slow_syscall		@ Check work again
+ 
+ work_resched:
+-	bl	schedule
++	bl	__schedule
++
+ /*
+  * "slow" syscall return path.  "why" tells us if this was a real syscall.
+  */
+Index: linux-2.6-tip/arch/arm/kernel/process.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/kernel/process.c
++++ linux-2.6-tip/arch/arm/kernel/process.c
+@@ -167,9 +167,11 @@ void cpu_idle(void)
+ 			idle();
+ 		leds_event(led_idle_end);
+ 		tick_nohz_restart_sched_tick();
+-		preempt_enable_no_resched();
+-		schedule();
++		local_irq_disable();
++		__preempt_enable_no_resched();
++		__schedule();
+ 		preempt_disable();
++		local_irq_enable();
+ 	}
+ }
+ 
+Index: linux-2.6-tip/arch/arm/include/asm/dma.h
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/include/asm/dma.h
++++ linux-2.6-tip/arch/arm/include/asm/dma.h
+@@ -35,7 +35,7 @@ typedef unsigned int dmamode_t;
+ #define DMA_MODE_CASCADE 2
+ #define DMA_AUTOINIT	 4
+ 
+-extern spinlock_t  dma_spin_lock;
++extern raw_spinlock_t  dma_spin_lock;
+ 
+ static inline unsigned long claim_dma_lock(void)
+ {
+Index: linux-2.6-tip/arch/arm/include/asm/tlb.h
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/include/asm/tlb.h
++++ linux-2.6-tip/arch/arm/include/asm/tlb.h
+@@ -38,17 +38,12 @@ struct mmu_gather {
+ 	unsigned int		fullmm;
+ };
+ 
+-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
+-
+-static inline struct mmu_gather *
+-tlb_gather_mmu(struct mm_struct *mm, unsigned int full_mm_flush)
++static inline void
++tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
++	       unsigned int full_mm_flush)
+ {
+-	struct mmu_gather *tlb = &get_cpu_var(mmu_gathers);
+-
+ 	tlb->mm = mm;
+ 	tlb->fullmm = full_mm_flush;
+-
+-	return tlb;
+ }
+ 
+ static inline void
+@@ -59,8 +54,6 @@ tlb_finish_mmu(struct mmu_gather *tlb, u
+ 
+ 	/* keep the page table cache within bounds */
+ 	check_pgt_cache();
+-
+-	put_cpu_var(mmu_gathers);
+ }
+ 
+ #define tlb_remove_tlb_entry(tlb,ptep,address)	do { } while (0)
+Index: linux-2.6-tip/arch/arm/kernel/dma.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/kernel/dma.c
++++ linux-2.6-tip/arch/arm/kernel/dma.c
+@@ -20,7 +20,7 @@
+ 
+ #include <asm/mach/dma.h>
+ 
+-DEFINE_SPINLOCK(dma_spin_lock);
++DEFINE_RAW_SPINLOCK(dma_spin_lock);
+ EXPORT_SYMBOL(dma_spin_lock);
+ 
+ static dma_t dma_chan[MAX_DMA_CHANNELS];
+Index: linux-2.6-tip/arch/arm/kernel/signal.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/kernel/signal.c
++++ linux-2.6-tip/arch/arm/kernel/signal.c
+@@ -623,6 +623,14 @@ static int do_signal(sigset_t *oldset, s
+ 	siginfo_t info;
+ 	int signr;
+ 
++#ifdef CONFIG_PREEMPT_RT
++	/*
++	 * Fully-preemptible kernel does not need interrupts disabled:
++	 */
++	local_irq_enable();
++	preempt_check_resched();
++#endif
++
+ 	/*
+ 	 * We want the common case to go fast, which
+ 	 * is why we may in certain cases get here from
+Index: linux-2.6-tip/arch/arm/kernel/smp.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/kernel/smp.c
++++ linux-2.6-tip/arch/arm/kernel/smp.c
+@@ -403,7 +403,7 @@ asmlinkage void __exception do_local_tim
+ }
+ #endif
+ 
+-static DEFINE_SPINLOCK(stop_lock);
++static DEFINE_RAW_SPINLOCK(stop_lock);
+ 
+ /*
+  * ipi_cpu_stop - handle IPI from smp_send_stop()
+Index: linux-2.6-tip/arch/arm/kernel/traps.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/kernel/traps.c
++++ linux-2.6-tip/arch/arm/kernel/traps.c
+@@ -225,7 +225,7 @@ static void __die(const char *str, int e
+ 	}
+ }
+ 
+-DEFINE_SPINLOCK(die_lock);
++DEFINE_RAW_SPINLOCK(die_lock);
+ 
+ /*
+  * This function is protected against re-entrancy.
+@@ -268,7 +268,7 @@ void arm_notify_die(const char *str, str
+ }
+ 
+ static LIST_HEAD(undef_hook);
+-static DEFINE_SPINLOCK(undef_lock);
++static DEFINE_RAW_SPINLOCK(undef_lock);
+ 
+ void register_undef_hook(struct undef_hook *hook)
+ {
+Index: linux-2.6-tip/arch/arm/mm/cache-l2x0.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mm/cache-l2x0.c
++++ linux-2.6-tip/arch/arm/mm/cache-l2x0.c
+@@ -26,7 +26,7 @@
+ #define CACHE_LINE_SIZE		32
+ 
+ static void __iomem *l2x0_base;
+-static DEFINE_SPINLOCK(l2x0_lock);
++static DEFINE_RAW_SPINLOCK(l2x0_lock);
+ 
+ static inline void sync_writel(unsigned long val, unsigned long reg,
+ 			       unsigned long complete_mask)
+Index: linux-2.6-tip/arch/arm/mm/copypage-v4mc.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mm/copypage-v4mc.c
++++ linux-2.6-tip/arch/arm/mm/copypage-v4mc.c
+@@ -30,7 +30,7 @@
+ #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
+ 				  L_PTE_MT_MINICACHE)
+ 
+-static DEFINE_SPINLOCK(minicache_lock);
++static DEFINE_RAW_SPINLOCK(minicache_lock);
+ 
+ /*
+  * ARMv4 mini-dcache optimised copy_user_highpage
+Index: linux-2.6-tip/arch/arm/mm/copypage-v6.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mm/copypage-v6.c
++++ linux-2.6-tip/arch/arm/mm/copypage-v6.c
+@@ -27,7 +27,7 @@
+ #define from_address	(0xffff8000)
+ #define to_address	(0xffffc000)
+ 
+-static DEFINE_SPINLOCK(v6_lock);
++static DEFINE_RAW_SPINLOCK(v6_lock);
+ 
+ /*
+  * Copy the user page.  No aliasing to deal with so we can just
+Index: linux-2.6-tip/arch/arm/mm/copypage-xscale.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mm/copypage-xscale.c
++++ linux-2.6-tip/arch/arm/mm/copypage-xscale.c
+@@ -32,7 +32,7 @@
+ #define minicache_pgprot __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | \
+ 				  L_PTE_MT_MINICACHE)
+ 
+-static DEFINE_SPINLOCK(minicache_lock);
++static DEFINE_RAW_SPINLOCK(minicache_lock);
+ 
+ /*
+  * XScale mini-dcache optimised copy_user_highpage
+Index: linux-2.6-tip/arch/arm/mm/dma-mapping.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mm/dma-mapping.c
++++ linux-2.6-tip/arch/arm/mm/dma-mapping.c
+@@ -40,7 +40,7 @@
+  * These are the page tables (2MB each) covering uncached, DMA consistent allocations
+  */
+ static pte_t *consistent_pte[NUM_CONSISTENT_PTES];
+-static DEFINE_SPINLOCK(consistent_lock);
++static DEFINE_RAW_SPINLOCK(consistent_lock);
+ 
+ /*
+  * VM region handling support.
+Index: linux-2.6-tip/arch/arm/mm/mmu.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mm/mmu.c
++++ linux-2.6-tip/arch/arm/mm/mmu.c
+@@ -27,8 +27,6 @@
+ 
+ #include "mm.h"
+ 
+-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+-
+ /*
+  * empty_zero_page is a special page that is used for
+  * zero-initialized data and COW.
+Index: linux-2.6-tip/arch/arm/include/asm/system.h
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/include/asm/system.h
++++ linux-2.6-tip/arch/arm/include/asm/system.h
+@@ -60,6 +60,8 @@
+ #include <linux/linkage.h>
+ #include <linux/irqflags.h>
+ 
++#include <asm/memory.h>
++
+ #define __exception	__attribute__((section(".exception.text")))
+ 
+ struct thread_info;
+Index: linux-2.6-tip/arch/arm/mm/context.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mm/context.c
++++ linux-2.6-tip/arch/arm/mm/context.c
+@@ -14,7 +14,7 @@
+ #include <asm/mmu_context.h>
+ #include <asm/tlbflush.h>
+ 
+-static DEFINE_SPINLOCK(cpu_asid_lock);
++static DEFINE_RAW_SPINLOCK(cpu_asid_lock);
+ unsigned int cpu_last_asid = ASID_FIRST_VERSION;
+ 
+ /*
+Index: linux-2.6-tip/arch/arm/mach-sa1100/badge4.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mach-sa1100/badge4.c
++++ linux-2.6-tip/arch/arm/mach-sa1100/badge4.c
+@@ -240,15 +240,22 @@ void badge4_set_5V(unsigned subsystem, i
+ 	/* detect on->off and off->on transitions */
+ 	if ((!old_5V_bitmap) && (badge4_5V_bitmap)) {
+ 		/* was off, now on */
+-		printk(KERN_INFO "%s: enabling 5V supply rail\n", __func__);
+ 		GPSR = BADGE4_GPIO_PCMEN5V;
+ 	} else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) {
+ 		/* was on, now off */
+-		printk(KERN_INFO "%s: disabling 5V supply rail\n", __func__);
+ 		GPCR = BADGE4_GPIO_PCMEN5V;
+ 	}
+ 
+ 	local_irq_restore(flags);
++
++	/* detect on->off and off->on transitions */
++	if ((!old_5V_bitmap) && (badge4_5V_bitmap)) {
++		/* was off, now on */
++		printk(KERN_INFO "%s: enabling 5V supply rail\n", __FUNCTION__);
++	} else if ((old_5V_bitmap) && (!badge4_5V_bitmap)) {
++		/* was on, now off */
++		printk(KERN_INFO "%s: disabling 5V supply rail\n", __FUNCTION__);
++	}
+ }
+ EXPORT_SYMBOL(badge4_set_5V);
+ 
+Index: linux-2.6-tip/arch/arm/mach-footbridge/include/mach/hardware.h
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mach-footbridge/include/mach/hardware.h
++++ linux-2.6-tip/arch/arm/mach-footbridge/include/mach/hardware.h
+@@ -86,7 +86,7 @@
+ #define CPLD_FLASH_WR_ENABLE	1
+ 
+ #ifndef __ASSEMBLY__
+-extern spinlock_t nw_gpio_lock;
++extern raw_spinlock_t nw_gpio_lock;
+ extern void nw_gpio_modify_op(unsigned int mask, unsigned int set);
+ extern void nw_gpio_modify_io(unsigned int mask, unsigned int in);
+ extern unsigned int nw_gpio_read(void);
+Index: linux-2.6-tip/arch/arm/mach-footbridge/netwinder-hw.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mach-footbridge/netwinder-hw.c
++++ linux-2.6-tip/arch/arm/mach-footbridge/netwinder-hw.c
+@@ -68,7 +68,7 @@ static inline void wb977_ww(int reg, int
+ /*
+  * This is a lock for accessing ports GP1_IO_BASE and GP2_IO_BASE
+  */
+-DEFINE_SPINLOCK(nw_gpio_lock);
++DEFINE_RAW_SPINLOCK(nw_gpio_lock);
+ EXPORT_SYMBOL(nw_gpio_lock);
+ 
+ static unsigned int current_gpio_op;
+Index: linux-2.6-tip/arch/arm/mach-footbridge/netwinder-leds.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mach-footbridge/netwinder-leds.c
++++ linux-2.6-tip/arch/arm/mach-footbridge/netwinder-leds.c
+@@ -31,7 +31,7 @@
+ static char led_state;
+ static char hw_led_state;
+ 
+-static DEFINE_SPINLOCK(leds_lock);
++static DEFINE_RAW_SPINLOCK(leds_lock);
+ 
+ static void netwinder_leds_event(led_event_t evt)
+ {
+Index: linux-2.6-tip/arch/arm/mach-integrator/core.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mach-integrator/core.c
++++ linux-2.6-tip/arch/arm/mach-integrator/core.c
+@@ -199,7 +199,7 @@ static struct amba_pl010_data integrator
+ 
+ #define CM_CTRL	IO_ADDRESS(INTEGRATOR_HDR_BASE) + INTEGRATOR_HDR_CTRL_OFFSET
+ 
+-static DEFINE_SPINLOCK(cm_lock);
++static DEFINE_RAW_SPINLOCK(cm_lock);
+ 
+ /**
+  * cm_control - update the CM_CTRL register.
+Index: linux-2.6-tip/arch/arm/mach-integrator/pci_v3.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mach-integrator/pci_v3.c
++++ linux-2.6-tip/arch/arm/mach-integrator/pci_v3.c
+@@ -162,7 +162,7 @@
+  *	 7:2	register number
+  *  
+  */
+-static DEFINE_SPINLOCK(v3_lock);
++static DEFINE_RAW_SPINLOCK(v3_lock);
+ 
+ #define PCI_BUS_NONMEM_START	0x00000000
+ #define PCI_BUS_NONMEM_SIZE	SZ_256M
+Index: linux-2.6-tip/arch/arm/mach-ixp4xx/common-pci.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mach-ixp4xx/common-pci.c
++++ linux-2.6-tip/arch/arm/mach-ixp4xx/common-pci.c
+@@ -54,7 +54,7 @@ unsigned long ixp4xx_pci_reg_base = 0;
+  * these transactions are atomic or we will end up
+  * with corrupt data on the bus or in a driver.
+  */
+-static DEFINE_SPINLOCK(ixp4xx_pci_lock);
++static DEFINE_RAW_SPINLOCK(ixp4xx_pci_lock);
+ 
+ /*
+  * Read from PCI config space
+Index: linux-2.6-tip/arch/arm/mach-shark/leds.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/mach-shark/leds.c
++++ linux-2.6-tip/arch/arm/mach-shark/leds.c
+@@ -32,7 +32,7 @@ static char led_state;
+ static short hw_led_state;
+ static short saved_state;
+ 
+-static DEFINE_SPINLOCK(leds_lock);
++static DEFINE_RAW_SPINLOCK(leds_lock);
+ 
+ short sequoia_read(int addr) {
+   outw(addr,0x24);
+Index: linux-2.6-tip/arch/arm/plat-omap/clock.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/plat-omap/clock.c
++++ linux-2.6-tip/arch/arm/plat-omap/clock.c
+@@ -132,15 +132,12 @@ EXPORT_SYMBOL(clk_get_usecount);
+ 
+ unsigned long clk_get_rate(struct clk *clk)
+ {
+-	unsigned long flags;
+ 	unsigned long ret = 0;
+ 
+ 	if (clk == NULL || IS_ERR(clk))
+ 		return 0;
+ 
+-	spin_lock_irqsave(&clockfw_lock, flags);
+ 	ret = clk->rate;
+-	spin_unlock_irqrestore(&clockfw_lock, flags);
+ 
+ 	return ret;
+ }
+Index: linux-2.6-tip/arch/arm/include/asm/bitops.h
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/include/asm/bitops.h
++++ linux-2.6-tip/arch/arm/include/asm/bitops.h
+@@ -25,6 +25,7 @@
+ 
+ #include <linux/compiler.h>
+ #include <asm/system.h>
++#include <asm/memory.h>
+ 
+ #define smp_mb__before_clear_bit()	mb()
+ #define smp_mb__after_clear_bit()	mb()
+Index: linux-2.6-tip/arch/arm/include/asm/ftrace.h
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/include/asm/ftrace.h
++++ linux-2.6-tip/arch/arm/include/asm/ftrace.h
+@@ -11,4 +11,18 @@ extern void mcount(void);
+ 
+ #endif
+ 
++#ifndef __ASSEMBLY__
++void *return_address(unsigned int);
++
++#define HAVE_ARCH_CALLER_ADDR
++#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
++#define CALLER_ADDR1 ((unsigned long)return_address(1))
++#define CALLER_ADDR2 ((unsigned long)return_address(2))
++#define CALLER_ADDR3 ((unsigned long)return_address(3))
++#define CALLER_ADDR4 ((unsigned long)return_address(4))
++#define CALLER_ADDR5 ((unsigned long)return_address(5))
++#define CALLER_ADDR6 ((unsigned long)return_address(6))
++
++#endif
++
+ #endif /* _ASM_ARM_FTRACE */
+Index: linux-2.6-tip/arch/arm/kernel/Makefile
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/kernel/Makefile
++++ linux-2.6-tip/arch/arm/kernel/Makefile
+@@ -7,6 +7,7 @@ AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OF
+ ifdef CONFIG_DYNAMIC_FTRACE
+ CFLAGS_REMOVE_ftrace.o = -pg
+ endif
++CFLAGS_REMOVE_return_address.o = -pg
+ 
+ # Object file lists.
+ 
+@@ -29,6 +30,7 @@ obj-$(CONFIG_ATAGS_PROC)	+= atags.o
+ obj-$(CONFIG_OABI_COMPAT)	+= sys_oabi-compat.o
+ obj-$(CONFIG_ARM_THUMBEE)	+= thumbee.o
+ obj-$(CONFIG_KGDB)		+= kgdb.o
++obj-y				+= return_address.o
+ 
+ obj-$(CONFIG_CRUNCH)		+= crunch.o crunch-bits.o
+ AFLAGS_crunch-bits.o		:= -Wa,-mcpu=ep9312
+Index: linux-2.6-tip/arch/arm/kernel/return_address.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/arch/arm/kernel/return_address.c
+@@ -0,0 +1,42 @@
++#include <linux/module.h>
++#include <linux/sched.h>
++
++#include "stacktrace.h"
++
++struct return_address_data {
++	unsigned int level;
++	void *addr;
++};
++
++static int save_return_addr(struct stackframe *frame, void *d)
++{
++	struct return_address_data *data = d;
++
++	if (!data->level) {
++		data->addr = (void *)frame->lr;
++
++		return 1;
++	} else {
++		--data->level;
++		return 0;
++	}
++}
++
++void *return_address(unsigned int level)
++{
++	unsigned long fp, base;
++	struct return_address_data data;
++
++	data.level = level + 1;
++
++	base = (unsigned long)task_stack_page(current);
++	asm("mov %0, fp" : "=r" (fp));
++
++	walk_stackframe(fp, base, base + THREAD_SIZE, save_return_addr, &data);
++
++	if (!data.level)
++		return data.addr;
++	else
++		return NULL;
++}
++EXPORT_SYMBOL_GPL(return_address);
+Index: linux-2.6-tip/arch/arm/kernel/stacktrace.c
+===================================================================
+--- linux-2.6-tip.orig/arch/arm/kernel/stacktrace.c
++++ linux-2.6-tip/arch/arm/kernel/stacktrace.c
+@@ -4,7 +4,7 @@
+ 
+ #include "stacktrace.h"
+ 
+-int walk_stackframe(unsigned long fp, unsigned long low, unsigned long high,
++int notrace walk_stackframe(unsigned long fp, unsigned long low, unsigned long high,
+ 		    int (*fn)(struct stackframe *, void *), void *data)
+ {
+ 	struct stackframe *frame;
+Index: linux-2.6-tip/arch/powerpc/include/asm/rwsem.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/rwsem.h
++++ linux-2.6-tip/arch/powerpc/include/asm/rwsem.h
+@@ -21,7 +21,7 @@
+ /*
+  * the semaphore definition
+  */
+-struct rw_semaphore {
++struct compat_rw_semaphore {
+ 	/* XXX this should be able to be an atomic_t  -- paulus */
+ 	signed int		count;
+ #define RWSEM_UNLOCKED_VALUE		0x00000000
+@@ -30,7 +30,7 @@ struct rw_semaphore {
+ #define RWSEM_WAITING_BIAS		(-0x00010000)
+ #define RWSEM_ACTIVE_READ_BIAS		RWSEM_ACTIVE_BIAS
+ #define RWSEM_ACTIVE_WRITE_BIAS		(RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
+-	spinlock_t		wait_lock;
++	raw_spinlock_t		wait_lock;
+ 	struct list_head	wait_list;
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+ 	struct lockdep_map	dep_map;
+@@ -48,17 +48,17 @@ struct rw_semaphore {
+ 	  LIST_HEAD_INIT((name).wait_list) __RWSEM_DEP_MAP_INIT(name) }
+ 
+ #define DECLARE_RWSEM(name)		\
+-	struct rw_semaphore name = __RWSEM_INITIALIZER(name)
++	struct compat_rw_semaphore name = __RWSEM_INITIALIZER(name)
+ 
+-extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
+-extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
+-extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
+-extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
++extern struct compat_rw_semaphore *rwsem_down_read_failed(struct compat_rw_semaphore *sem);
++extern struct compat_rw_semaphore *rwsem_down_write_failed(struct compat_rw_semaphore *sem);
++extern struct compat_rw_semaphore *rwsem_wake(struct compat_rw_semaphore *sem);
++extern struct compat_rw_semaphore *rwsem_downgrade_wake(struct compat_rw_semaphore *sem);
+ 
+-extern void __init_rwsem(struct rw_semaphore *sem, const char *name,
++extern void __init_rwsem(struct compat_rw_semaphore *sem, const char *name,
+ 			 struct lock_class_key *key);
+ 
+-#define init_rwsem(sem)					\
++#define compat_init_rwsem(sem)					\
+ 	do {						\
+ 		static struct lock_class_key __key;	\
+ 							\
+@@ -68,13 +68,13 @@ extern void __init_rwsem(struct rw_semap
+ /*
+  * lock for reading
+  */
+-static inline void __down_read(struct rw_semaphore *sem)
++static inline void __down_read(struct compat_rw_semaphore *sem)
+ {
+ 	if (unlikely(atomic_inc_return((atomic_t *)(&sem->count)) <= 0))
+ 		rwsem_down_read_failed(sem);
+ }
+ 
+-static inline int __down_read_trylock(struct rw_semaphore *sem)
++static inline int __down_read_trylock(struct compat_rw_semaphore *sem)
+ {
+ 	int tmp;
+ 
+@@ -90,7 +90,7 @@ static inline int __down_read_trylock(st
+ /*
+  * lock for writing
+  */
+-static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
++static inline void __down_write_nested(struct compat_rw_semaphore *sem, int subclass)
+ {
+ 	int tmp;
+ 
+@@ -100,12 +100,12 @@ static inline void __down_write_nested(s
+ 		rwsem_down_write_failed(sem);
+ }
+ 
+-static inline void __down_write(struct rw_semaphore *sem)
++static inline void __down_write(struct compat_rw_semaphore *sem)
+ {
+ 	__down_write_nested(sem, 0);
+ }
+ 
+-static inline int __down_write_trylock(struct rw_semaphore *sem)
++static inline int __down_write_trylock(struct compat_rw_semaphore *sem)
+ {
+ 	int tmp;
+ 
+@@ -117,7 +117,7 @@ static inline int __down_write_trylock(s
+ /*
+  * unlock after reading
+  */
+-static inline void __up_read(struct rw_semaphore *sem)
++static inline void __up_read(struct compat_rw_semaphore *sem)
+ {
+ 	int tmp;
+ 
+@@ -129,7 +129,7 @@ static inline void __up_read(struct rw_s
+ /*
+  * unlock after writing
+  */
+-static inline void __up_write(struct rw_semaphore *sem)
++static inline void __up_write(struct compat_rw_semaphore *sem)
+ {
+ 	if (unlikely(atomic_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
+ 			      (atomic_t *)(&sem->count)) < 0))
+@@ -139,7 +139,7 @@ static inline void __up_write(struct rw_
+ /*
+  * implement atomic add functionality
+  */
+-static inline void rwsem_atomic_add(int delta, struct rw_semaphore *sem)
++static inline void rwsem_atomic_add(int delta, struct compat_rw_semaphore *sem)
+ {
+ 	atomic_add(delta, (atomic_t *)(&sem->count));
+ }
+@@ -147,7 +147,7 @@ static inline void rwsem_atomic_add(int 
+ /*
+  * downgrade write lock to read lock
+  */
+-static inline void __downgrade_write(struct rw_semaphore *sem)
++static inline void __downgrade_write(struct compat_rw_semaphore *sem)
+ {
+ 	int tmp;
+ 
+@@ -159,12 +159,12 @@ static inline void __downgrade_write(str
+ /*
+  * implement exchange and add functionality
+  */
+-static inline int rwsem_atomic_update(int delta, struct rw_semaphore *sem)
++static inline int rwsem_atomic_update(int delta, struct compat_rw_semaphore *sem)
+ {
+ 	return atomic_add_return(delta, (atomic_t *)(&sem->count));
+ }
+ 
+-static inline int rwsem_is_locked(struct rw_semaphore *sem)
++static inline int compat_rwsem_is_locked(struct compat_rw_semaphore *sem)
+ {
+ 	return (sem->count != 0);
+ }
+Index: linux-2.6-tip/arch/powerpc/include/asm/spinlock.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/spinlock.h
++++ linux-2.6-tip/arch/powerpc/include/asm/spinlock.h
+@@ -54,7 +54,7 @@
+  * This returns the old value in the lock, so we succeeded
+  * in getting the lock if the return value is 0.
+  */
+-static inline unsigned long __spin_trylock(raw_spinlock_t *lock)
++static inline unsigned long ___raw_spin_trylock(__raw_spinlock_t *lock)
+ {
+ 	unsigned long tmp, token;
+ 
+@@ -73,10 +73,10 @@ static inline unsigned long __spin_trylo
+ 	return tmp;
+ }
+ 
+-static inline int __raw_spin_trylock(raw_spinlock_t *lock)
++static inline int __raw_spin_trylock(__raw_spinlock_t *lock)
+ {
+ 	CLEAR_IO_SYNC;
+-	return __spin_trylock(lock) == 0;
++	return ___raw_spin_trylock(lock) == 0;
+ }
+ 
+ /*
+@@ -96,19 +96,19 @@ static inline int __raw_spin_trylock(raw
+ #if defined(CONFIG_PPC_SPLPAR) || defined(CONFIG_PPC_ISERIES)
+ /* We only yield to the hypervisor if we are in shared processor mode */
+ #define SHARED_PROCESSOR (get_lppaca()->shared_proc)
+-extern void __spin_yield(raw_spinlock_t *lock);
+-extern void __rw_yield(raw_rwlock_t *lock);
++extern void __spin_yield(__raw_spinlock_t *lock);
++extern void __rw_yield(__raw_rwlock_t *lock);
+ #else /* SPLPAR || ISERIES */
+ #define __spin_yield(x)	barrier()
+ #define __rw_yield(x)	barrier()
+ #define SHARED_PROCESSOR	0
+ #endif
+ 
+-static inline void __raw_spin_lock(raw_spinlock_t *lock)
++static inline void __raw_spin_lock(__raw_spinlock_t *lock)
+ {
+ 	CLEAR_IO_SYNC;
+ 	while (1) {
+-		if (likely(__spin_trylock(lock) == 0))
++		if (likely(___raw_spin_trylock(lock) == 0))
+ 			break;
+ 		do {
+ 			HMT_low();
+@@ -120,13 +120,13 @@ static inline void __raw_spin_lock(raw_s
+ }
+ 
+ static inline
+-void __raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long flags)
++void __raw_spin_lock_flags(__raw_spinlock_t *lock, unsigned long flags)
+ {
+ 	unsigned long flags_dis;
+ 
+ 	CLEAR_IO_SYNC;
+ 	while (1) {
+-		if (likely(__spin_trylock(lock) == 0))
++		if (likely(___raw_spin_trylock(lock) == 0))
+ 			break;
+ 		local_save_flags(flags_dis);
+ 		local_irq_restore(flags);
+@@ -140,7 +140,7 @@ void __raw_spin_lock_flags(raw_spinlock_
+ 	}
+ }
+ 
+-static inline void __raw_spin_unlock(raw_spinlock_t *lock)
++static inline void __raw_spin_unlock(__raw_spinlock_t *lock)
+ {
+ 	SYNC_IO;
+ 	__asm__ __volatile__("# __raw_spin_unlock\n\t"
+@@ -149,7 +149,7 @@ static inline void __raw_spin_unlock(raw
+ }
+ 
+ #ifdef CONFIG_PPC64
+-extern void __raw_spin_unlock_wait(raw_spinlock_t *lock);
++extern void __raw_spin_unlock_wait(__raw_spinlock_t *lock);
+ #else
+ #define __raw_spin_unlock_wait(lock) \
+ 	do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0)
+@@ -181,7 +181,7 @@ extern void __raw_spin_unlock_wait(raw_s
+  * This returns the old value in the lock + 1,
+  * so we got a read lock if the return value is > 0.
+  */
+-static inline long __read_trylock(raw_rwlock_t *rw)
++static inline long ___raw_read_trylock(__raw_rwlock_t *rw)
+ {
+ 	long tmp;
+ 
+@@ -205,7 +205,7 @@ static inline long __read_trylock(raw_rw
+  * This returns the old value in the lock,
+  * so we got the write lock if the return value is 0.
+  */
+-static inline long __write_trylock(raw_rwlock_t *rw)
++static inline long ___raw_write_trylock(__raw_rwlock_t *rw)
+ {
+ 	long tmp, token;
+ 
+@@ -225,10 +225,10 @@ static inline long __write_trylock(raw_r
+ 	return tmp;
+ }
+ 
+-static inline void __raw_read_lock(raw_rwlock_t *rw)
++static inline void __raw_read_lock(__raw_rwlock_t *rw)
+ {
+ 	while (1) {
+-		if (likely(__read_trylock(rw) > 0))
++		if (likely(___raw_read_trylock(rw) > 0))
+ 			break;
+ 		do {
+ 			HMT_low();
+@@ -239,10 +239,10 @@ static inline void __raw_read_lock(raw_r
+ 	}
+ }
+ 
+-static inline void __raw_write_lock(raw_rwlock_t *rw)
++static inline void __raw_write_lock(__raw_rwlock_t *rw)
+ {
+ 	while (1) {
+-		if (likely(__write_trylock(rw) == 0))
++		if (likely(___raw_write_trylock(rw) == 0))
+ 			break;
+ 		do {
+ 			HMT_low();
+@@ -253,17 +253,17 @@ static inline void __raw_write_lock(raw_
+ 	}
+ }
+ 
+-static inline int __raw_read_trylock(raw_rwlock_t *rw)
++static inline int __raw_read_trylock(__raw_rwlock_t *rw)
+ {
+-	return __read_trylock(rw) > 0;
++	return ___raw_read_trylock(rw) > 0;
+ }
+ 
+-static inline int __raw_write_trylock(raw_rwlock_t *rw)
++static inline int __raw_write_trylock(__raw_rwlock_t *rw)
+ {
+-	return __write_trylock(rw) == 0;
++	return ___raw_write_trylock(rw) == 0;
+ }
+ 
+-static inline void __raw_read_unlock(raw_rwlock_t *rw)
++static inline void __raw_read_unlock(__raw_rwlock_t *rw)
+ {
+ 	long tmp;
+ 
+@@ -280,7 +280,7 @@ static inline void __raw_read_unlock(raw
+ 	: "cr0", "xer", "memory");
+ }
+ 
+-static inline void __raw_write_unlock(raw_rwlock_t *rw)
++static inline void __raw_write_unlock(__raw_rwlock_t *rw)
+ {
+ 	__asm__ __volatile__("# write_unlock\n\t"
+ 				LWSYNC_ON_SMP: : :"memory");
+@@ -291,5 +291,9 @@ static inline void __raw_write_unlock(ra
+ #define _raw_read_relax(lock)	__rw_yield(lock)
+ #define _raw_write_relax(lock)	__rw_yield(lock)
+ 
++#define __raw_spin_relax(lock)  cpu_relax()
++#define __raw_read_relax(lock)  cpu_relax()
++#define __raw_write_relax(lock) cpu_relax()
++
+ #endif /* __KERNEL__ */
+ #endif /* __ASM_SPINLOCK_H */
+Index: linux-2.6-tip/arch/powerpc/include/asm/spinlock_types.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/spinlock_types.h
++++ linux-2.6-tip/arch/powerpc/include/asm/spinlock_types.h
+@@ -7,13 +7,13 @@
+ 
+ typedef struct {
+ 	volatile unsigned int slock;
+-} raw_spinlock_t;
++} __raw_spinlock_t;
+ 
+ #define __RAW_SPIN_LOCK_UNLOCKED	{ 0 }
+ 
+ typedef struct {
+ 	volatile signed int lock;
+-} raw_rwlock_t;
++} __raw_rwlock_t;
+ 
+ #define __RAW_RW_LOCK_UNLOCKED		{ 0 }
+ 
+Index: linux-2.6-tip/arch/powerpc/lib/locks.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/lib/locks.c
++++ linux-2.6-tip/arch/powerpc/lib/locks.c
+@@ -25,7 +25,7 @@
+ #include <asm/smp.h>
+ #include <asm/firmware.h>
+ 
+-void __spin_yield(raw_spinlock_t *lock)
++void __spin_yield(__raw_spinlock_t *lock)
+ {
+ 	unsigned int lock_value, holder_cpu, yield_count;
+ 
+@@ -55,7 +55,7 @@ void __spin_yield(raw_spinlock_t *lock)
+  * This turns out to be the same for read and write locks, since
+  * we only know the holder if it is write-locked.
+  */
+-void __rw_yield(raw_rwlock_t *rw)
++void __rw_yield(__raw_rwlock_t *rw)
+ {
+ 	int lock_value;
+ 	unsigned int holder_cpu, yield_count;
+@@ -82,12 +82,14 @@ void __rw_yield(raw_rwlock_t *rw)
+ }
+ #endif
+ 
+-void __raw_spin_unlock_wait(raw_spinlock_t *lock)
++void __raw_spin_unlock_wait(__raw_spinlock_t *lock)
+ {
+ 	while (lock->slock) {
+ 		HMT_low();
++		preempt_disable();
+ 		if (SHARED_PROCESSOR)
+ 			__spin_yield(lock);
++		preempt_enable();
+ 	}
+ 	HMT_medium();
+ }
+Index: linux-2.6-tip/drivers/macintosh/adb.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/macintosh/adb.c
++++ linux-2.6-tip/drivers/macintosh/adb.c
+@@ -83,7 +83,7 @@ static struct adb_driver *adb_controller
+ BLOCKING_NOTIFIER_HEAD(adb_client_list);
+ static int adb_got_sleep;
+ static int adb_inited;
+-static DECLARE_MUTEX(adb_probe_mutex);
++static COMPAT_DECLARE_MUTEX(adb_probe_mutex);
+ static int sleepy_trackpad;
+ static int autopoll_devs;
+ int __adb_probe_sync;
+Index: linux-2.6-tip/arch/powerpc/include/asm/mpic.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/mpic.h
++++ linux-2.6-tip/arch/powerpc/include/asm/mpic.h
+@@ -281,7 +281,7 @@ struct mpic
+ #ifdef CONFIG_MPIC_U3_HT_IRQS
+ 	/* The fixup table */
+ 	struct mpic_irq_fixup	*fixups;
+-	spinlock_t		fixup_lock;
++	raw_spinlock_t		fixup_lock;
+ #endif
+ 
+ 	/* Register access method */
+Index: linux-2.6-tip/arch/powerpc/include/asm/pmac_feature.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/pmac_feature.h
++++ linux-2.6-tip/arch/powerpc/include/asm/pmac_feature.h
+@@ -378,7 +378,7 @@ extern struct macio_chip* macio_find(str
+  * Those are exported by pmac feature for internal use by arch code
+  * only like the platform function callbacks, do not use directly in drivers
+  */
+-extern spinlock_t feature_lock;
++extern raw_spinlock_t feature_lock;
+ extern struct device_node *uninorth_node;
+ extern u32 __iomem *uninorth_base;
+ 
+Index: linux-2.6-tip/arch/powerpc/include/asm/rtas.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/rtas.h
++++ linux-2.6-tip/arch/powerpc/include/asm/rtas.h
+@@ -58,7 +58,7 @@ struct rtas_t {
+ 	unsigned long entry;		/* physical address pointer */
+ 	unsigned long base;		/* physical address pointer */
+ 	unsigned long size;
+-	spinlock_t lock;
++	raw_spinlock_t lock;
+ 	struct rtas_args args;
+ 	struct device_node *dev;	/* virtual address pointer */
+ };
+Index: linux-2.6-tip/arch/powerpc/kernel/pmc.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/pmc.c
++++ linux-2.6-tip/arch/powerpc/kernel/pmc.c
+@@ -37,7 +37,7 @@ static void dummy_perf(struct pt_regs *r
+ }
+ 
+ 
+-static DEFINE_SPINLOCK(pmc_owner_lock);
++static DEFINE_RAW_SPINLOCK(pmc_owner_lock);
+ static void *pmc_owner_caller; /* mostly for debugging */
+ perf_irq_t perf_irq = dummy_perf;
+ 
+Index: linux-2.6-tip/arch/powerpc/kernel/prom.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/prom.c
++++ linux-2.6-tip/arch/powerpc/kernel/prom.c
+@@ -81,7 +81,7 @@ struct boot_param_header *initial_boot_p
+ 
+ extern struct device_node *allnodes;	/* temporary while merging */
+ 
+-extern rwlock_t devtree_lock;	/* temporary while merging */
++extern raw_rwlock_t devtree_lock;	/* temporary while merging */
+ 
+ /* export that to outside world */
+ struct device_node *of_chosen;
+Index: linux-2.6-tip/arch/powerpc/kernel/rtas.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/rtas.c
++++ linux-2.6-tip/arch/powerpc/kernel/rtas.c
+@@ -40,7 +40,7 @@
+ #include <asm/atomic.h>
+ 
+ struct rtas_t rtas = {
+-	.lock = SPIN_LOCK_UNLOCKED
++	.lock = RAW_SPIN_LOCK_UNLOCKED(lock)
+ };
+ EXPORT_SYMBOL(rtas);
+ 
+Index: linux-2.6-tip/arch/powerpc/kernel/traps.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/traps.c
++++ linux-2.6-tip/arch/powerpc/kernel/traps.c
+@@ -96,11 +96,11 @@ static inline void pmac_backlight_unblan
+ int die(const char *str, struct pt_regs *regs, long err)
+ {
+ 	static struct {
+-		spinlock_t lock;
++		raw_spinlock_t lock;
+ 		u32 lock_owner;
+ 		int lock_owner_depth;
+ 	} die = {
+-		.lock =			__SPIN_LOCK_UNLOCKED(die.lock),
++		.lock =			_RAW_SPIN_LOCK_UNLOCKED(die.lock),
+ 		.lock_owner =		-1,
+ 		.lock_owner_depth =	0
+ 	};
+@@ -187,6 +187,11 @@ void _exception(int signr, struct pt_reg
+ 				addr, regs->nip, regs->link, code);
+ 		}
+ 
++#ifdef CONFIG_PREEMPT_RT
++	local_irq_enable();
++	preempt_check_resched();
++#endif
++
+ 	memset(&info, 0, sizeof(info));
+ 	info.si_signo = signr;
+ 	info.si_code = code;
+Index: linux-2.6-tip/arch/powerpc/mm/hash_native_64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/mm/hash_native_64.c
++++ linux-2.6-tip/arch/powerpc/mm/hash_native_64.c
+@@ -36,7 +36,7 @@
+ 
+ #define HPTE_LOCK_BIT 3
+ 
+-static DEFINE_SPINLOCK(native_tlbie_lock);
++static DEFINE_RAW_SPINLOCK(native_tlbie_lock);
+ 
+ static inline void __tlbie(unsigned long va, int psize, int ssize)
+ {
+Index: linux-2.6-tip/arch/powerpc/platforms/cell/beat_htab.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/beat_htab.c
++++ linux-2.6-tip/arch/powerpc/platforms/cell/beat_htab.c
+@@ -40,7 +40,7 @@
+ #define DBG_LOW(fmt...) do { } while (0)
+ #endif
+ 
+-static DEFINE_SPINLOCK(beat_htab_lock);
++static DEFINE_RAW_SPINLOCK(beat_htab_lock);
+ 
+ static inline unsigned int beat_read_mask(unsigned hpte_group)
+ {
+Index: linux-2.6-tip/arch/powerpc/platforms/cell/beat_interrupt.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/cell/beat_interrupt.c
++++ linux-2.6-tip/arch/powerpc/platforms/cell/beat_interrupt.c
+@@ -30,7 +30,7 @@
+ #include "beat_wrapper.h"
+ 
+ #define	MAX_IRQS	NR_IRQS
+-static DEFINE_SPINLOCK(beatic_irq_mask_lock);
++static DEFINE_RAW_SPINLOCK(beatic_irq_mask_lock);
+ static uint64_t	beatic_irq_mask_enable[(MAX_IRQS+255)/64];
+ static uint64_t	beatic_irq_mask_ack[(MAX_IRQS+255)/64];
+ 
+Index: linux-2.6-tip/arch/powerpc/platforms/chrp/smp.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/chrp/smp.c
++++ linux-2.6-tip/arch/powerpc/platforms/chrp/smp.c
+@@ -42,7 +42,7 @@ static void __devinit smp_chrp_setup_cpu
+ 	mpic_setup_this_cpu();
+ }
+ 
+-static DEFINE_SPINLOCK(timebase_lock);
++static DEFINE_RAW_SPINLOCK(timebase_lock);
+ static unsigned int timebase_upper = 0, timebase_lower = 0;
+ 
+ void __devinit smp_chrp_give_timebase(void)
+Index: linux-2.6-tip/arch/powerpc/platforms/powermac/feature.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/powermac/feature.c
++++ linux-2.6-tip/arch/powerpc/platforms/powermac/feature.c
+@@ -59,7 +59,7 @@ extern struct device_node *k2_skiplist[2
+  * We use a single global lock to protect accesses. Each driver has
+  * to take care of its own locking
+  */
+-DEFINE_SPINLOCK(feature_lock);
++DEFINE_RAW_SPINLOCK(feature_lock);
+ 
+ #define LOCK(flags)	spin_lock_irqsave(&feature_lock, flags);
+ #define UNLOCK(flags)	spin_unlock_irqrestore(&feature_lock, flags);
+Index: linux-2.6-tip/arch/powerpc/platforms/powermac/nvram.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/powermac/nvram.c
++++ linux-2.6-tip/arch/powerpc/platforms/powermac/nvram.c
+@@ -80,7 +80,7 @@ static int is_core_99;
+ static int core99_bank = 0;
+ static int nvram_partitions[3];
+ // XXX Turn that into a sem
+-static DEFINE_SPINLOCK(nv_lock);
++static DEFINE_RAW_SPINLOCK(nv_lock);
+ 
+ static int (*core99_write_bank)(int bank, u8* datas);
+ static int (*core99_erase_bank)(int bank);
+Index: linux-2.6-tip/arch/powerpc/platforms/powermac/pic.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/powermac/pic.c
++++ linux-2.6-tip/arch/powerpc/platforms/powermac/pic.c
+@@ -57,7 +57,7 @@ static int max_irqs;
+ static int max_real_irqs;
+ static u32 level_mask[4];
+ 
+-static DEFINE_SPINLOCK(pmac_pic_lock);
++static DEFINE_RAW_SPINLOCK(pmac_pic_lock);
+ 
+ #define NR_MASK_WORDS	((NR_IRQS + 31) / 32)
+ static unsigned long ppc_lost_interrupts[NR_MASK_WORDS];
+Index: linux-2.6-tip/arch/powerpc/platforms/pseries/eeh.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/pseries/eeh.c
++++ linux-2.6-tip/arch/powerpc/platforms/pseries/eeh.c
+@@ -100,7 +100,7 @@ int eeh_subsystem_enabled;
+ EXPORT_SYMBOL(eeh_subsystem_enabled);
+ 
+ /* Lock to avoid races due to multiple reports of an error */
+-static DEFINE_SPINLOCK(confirm_error_lock);
++static DEFINE_RAW_SPINLOCK(confirm_error_lock);
+ 
+ /* Buffer for reporting slot-error-detail rtas calls. Its here
+  * in BSS, and not dynamically alloced, so that it ends up in
+Index: linux-2.6-tip/arch/powerpc/platforms/pseries/smp.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/pseries/smp.c
++++ linux-2.6-tip/arch/powerpc/platforms/pseries/smp.c
+@@ -118,7 +118,7 @@ static void __devinit smp_xics_setup_cpu
+ }
+ #endif /* CONFIG_XICS */
+ 
+-static DEFINE_SPINLOCK(timebase_lock);
++static DEFINE_RAW_SPINLOCK(timebase_lock);
+ static unsigned long timebase = 0;
+ 
+ static void __devinit pSeries_give_timebase(void)
+Index: linux-2.6-tip/arch/powerpc/sysdev/i8259.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/sysdev/i8259.c
++++ linux-2.6-tip/arch/powerpc/sysdev/i8259.c
+@@ -23,7 +23,7 @@ static unsigned char cached_8259[2] = { 
+ #define cached_A1 (cached_8259[0])
+ #define cached_21 (cached_8259[1])
+ 
+-static DEFINE_SPINLOCK(i8259_lock);
++static DEFINE_RAW_SPINLOCK(i8259_lock);
+ 
+ static struct irq_host *i8259_host;
+ 
+Index: linux-2.6-tip/arch/powerpc/sysdev/ipic.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/sysdev/ipic.c
++++ linux-2.6-tip/arch/powerpc/sysdev/ipic.c
+@@ -32,7 +32,7 @@
+ 
+ static struct ipic * primary_ipic;
+ static struct irq_chip ipic_level_irq_chip, ipic_edge_irq_chip;
+-static DEFINE_SPINLOCK(ipic_lock);
++static DEFINE_RAW_SPINLOCK(ipic_lock);
+ 
+ static struct ipic_info ipic_info[] = {
+ 	[1] = {
+Index: linux-2.6-tip/drivers/of/base.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/of/base.c
++++ linux-2.6-tip/drivers/of/base.c
+@@ -25,7 +25,7 @@ struct device_node *allnodes;
+ /* use when traversing tree through the allnext, child, sibling,
+  * or parent members of struct device_node.
+  */
+-DEFINE_RWLOCK(devtree_lock);
++DEFINE_RAW_RWLOCK(devtree_lock);
+ 
+ int of_n_addr_cells(struct device_node *np)
+ {
+Index: linux-2.6-tip/arch/powerpc/include/asm/pgtable-ppc64.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/pgtable-ppc64.h
++++ linux-2.6-tip/arch/powerpc/include/asm/pgtable-ppc64.h
+@@ -285,8 +285,15 @@ static inline unsigned long pte_update(s
+ 	: "r" (ptep), "r" (clr), "m" (*ptep), "i" (_PAGE_BUSY)
+ 	: "cc" );
+ 
+-	if (old & _PAGE_HASHPTE)
++	if (old & _PAGE_HASHPTE) {
++#ifdef CONFIG_PREEMPT_RT
++		preempt_disable();
++#endif
+ 		hpte_need_flush(mm, addr, ptep, old, huge);
++#ifdef CONFIG_PREEMPT_RT
++		preempt_enable();
++#endif
++	}
+ 	return old;
+ }
+ 
+Index: linux-2.6-tip/arch/powerpc/include/asm/tlb.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/tlb.h
++++ linux-2.6-tip/arch/powerpc/include/asm/tlb.h
+@@ -30,26 +30,38 @@ struct mmu_gather;
+ #define tlb_start_vma(tlb, vma)	do { } while (0)
+ #define tlb_end_vma(tlb, vma)	do { } while (0)
+ 
++#define HAVE_ARCH_MMU_GATHER 1
++
++struct pte_freelist_batch;
++
++struct arch_mmu_gather {
++	struct pte_freelist_batch *batch;
++};
++
++#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, }
++
+ #if !defined(CONFIG_PPC_STD_MMU)
+ 
+ #define tlb_flush(tlb)			flush_tlb_mm((tlb)->mm)
+ 
+ #elif defined(__powerpc64__)
+ 
+-extern void pte_free_finish(void);
++extern void pte_free_finish(struct mmu_gather *tlb);
+ 
+ static inline void tlb_flush(struct mmu_gather *tlb)
+ {
+-	struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
++	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
+ 
+ 	/* If there's a TLB batch pending, then we must flush it because the
+ 	 * pages are going to be freed and we really don't want to have a CPU
+ 	 * access a freed page because it has a stale TLB
+ 	 */
+-	if (tlbbatch->index)
++	if (tlbbatch->index) {
+ 		__flush_tlb_pending(tlbbatch);
++	}
+ 
+-	pte_free_finish();
++	put_cpu_var(ppc64_tlb_batch);
++	pte_free_finish(tlb);
+ }
+ 
+ #else
+Index: linux-2.6-tip/arch/powerpc/include/asm/tlbflush.h
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/include/asm/tlbflush.h
++++ linux-2.6-tip/arch/powerpc/include/asm/tlbflush.h
+@@ -101,18 +101,25 @@ extern void hpte_need_flush(struct mm_st
+ 
+ static inline void arch_enter_lazy_mmu_mode(void)
+ {
+-	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
++	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
+ 
+ 	batch->active = 1;
++
++	put_cpu_var(ppc64_tlb_batch);
+ }
+ 
+ static inline void arch_leave_lazy_mmu_mode(void)
+ {
+-	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
++	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
++
++	if (batch->active) {
++		if (batch->index) {
++			__flush_tlb_pending(batch);
++		}
++		batch->active = 0;
++	}
+ 
+-	if (batch->index)
+-		__flush_tlb_pending(batch);
+-	batch->active = 0;
++	put_cpu_var(ppc64_tlb_batch);
+ }
+ 
+ #define arch_flush_lazy_mmu_mode()      do {} while (0)
+Index: linux-2.6-tip/arch/powerpc/kernel/process.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/process.c
++++ linux-2.6-tip/arch/powerpc/kernel/process.c
+@@ -302,6 +302,10 @@ struct task_struct *__switch_to(struct t
+ 	struct thread_struct *new_thread, *old_thread;
+ 	unsigned long flags;
+ 	struct task_struct *last;
++#if defined(CONFIG_PPC64) && defined (CONFIG_PREEMPT_RT)
++	struct ppc64_tlb_batch *batch;
++	int hadbatch;
++#endif
+ 
+ #ifdef CONFIG_SMP
+ 	/* avoid complexity of lazy save/restore of fpu
+@@ -393,6 +397,17 @@ struct task_struct *__switch_to(struct t
+ 		old_thread->accum_tb += (current_tb - start_tb);
+ 		new_thread->start_tb = current_tb;
+ 	}
++
++#ifdef CONFIG_PREEMPT_RT
++	batch = &__get_cpu_var(ppc64_tlb_batch);
++	if (batch->active) {
++		hadbatch = 1;
++		if (batch->index) {
++			__flush_tlb_pending(batch);
++		}
++		batch->active = 0;
++	}
++#endif /* #ifdef CONFIG_PREEMPT_RT */
+ #endif
+ 
+ 	local_irq_save(flags);
+@@ -411,6 +426,13 @@ struct task_struct *__switch_to(struct t
+ 
+ 	local_irq_restore(flags);
+ 
++#if defined(CONFIG_PPC64) && defined(CONFIG_PREEMPT_RT)
++	if (hadbatch) {
++		batch = &__get_cpu_var(ppc64_tlb_batch);
++		batch->active = 1;
++	}
++#endif
++
+ 	return last;
+ }
+ 
+Index: linux-2.6-tip/arch/powerpc/mm/init_32.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/mm/init_32.c
++++ linux-2.6-tip/arch/powerpc/mm/init_32.c
+@@ -54,8 +54,6 @@
+ #endif
+ #define MAX_LOW_MEM	CONFIG_LOWMEM_SIZE
+ 
+-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+-
+ phys_addr_t total_memory;
+ phys_addr_t total_lowmem;
+ 
+Index: linux-2.6-tip/arch/powerpc/mm/pgtable.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/mm/pgtable.c
++++ linux-2.6-tip/arch/powerpc/mm/pgtable.c
+@@ -29,7 +29,6 @@
+ #include <asm/tlbflush.h>
+ #include <asm/tlb.h>
+ 
+-static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
+ static unsigned long pte_freelist_forced_free;
+ 
+ struct pte_freelist_batch
+@@ -80,12 +79,11 @@ static void pte_free_submit(struct pte_f
+ 
+ void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
+ {
+-	/* This is safe since tlb_gather_mmu has disabled preemption */
+-        cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id());
+-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
++	struct pte_freelist_batch **batchp;
+ 
+-	if (atomic_read(&tlb->mm->mm_users) < 2 ||
+-	    cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) {
++	batchp = &tlb->arch.batch;
++
++	if (atomic_read(&tlb->mm->mm_users) < 2) {
+ 		pgtable_free(pgf);
+ 		return;
+ 	}
+@@ -105,13 +103,14 @@ void pgtable_free_tlb(struct mmu_gather 
+ 	}
+ }
+ 
+-void pte_free_finish(void)
++void pte_free_finish(struct mmu_gather *tlb)
+ {
+-	/* This is safe since tlb_gather_mmu has disabled preemption */
+-	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
++	struct pte_freelist_batch **batchp;
+ 
+-	if (*batchp == NULL)
+-		return;
+-	pte_free_submit(*batchp);
+-	*batchp = NULL;
++	batchp = &tlb->arch.batch;
++
++	if (*batchp) {
++		pte_free_submit(*batchp);
++		*batchp = NULL;
++	}
+ }
+Index: linux-2.6-tip/arch/powerpc/mm/tlb_hash64.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/mm/tlb_hash64.c
++++ linux-2.6-tip/arch/powerpc/mm/tlb_hash64.c
+@@ -30,14 +30,10 @@
+ #include <asm/tlbflush.h>
+ #include <asm/tlb.h>
+ #include <asm/bug.h>
++#include <asm/machdep.h>
+ 
+ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
+ 
+-/* This is declared as we are using the more or less generic
+- * arch/powerpc/include/asm/tlb.h file -- tgall
+- */
+-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+-
+ /*
+  * A linux PTE was changed and the corresponding hash table entry
+  * neesd to be flushed. This function will either perform the flush
+@@ -49,7 +45,7 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_ga
+ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
+ 		     pte_t *ptep, unsigned long pte, int huge)
+ {
+-	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
++	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
+ 	unsigned long vsid, vaddr;
+ 	unsigned int psize;
+ 	int ssize;
+@@ -100,6 +96,7 @@ void hpte_need_flush(struct mm_struct *m
+ 	 */
+ 	if (!batch->active) {
+ 		flush_hash_page(vaddr, rpte, psize, ssize, 0);
++		put_cpu_var(ppc64_tlb_batch);
+ 		return;
+ 	}
+ 
+@@ -126,8 +123,22 @@ void hpte_need_flush(struct mm_struct *m
+ 	batch->pte[i] = rpte;
+ 	batch->vaddr[i] = vaddr;
+ 	batch->index = ++i;
++
++#ifdef CONFIG_PREEMPT_RT
++	/*
++	 * Since flushing tlb needs expensive hypervisor call(s) on celleb,
++	 * always flush it on RT to reduce scheduling latency.
++	 */
++	if (machine_is(celleb)) {
++		__flush_tlb_pending(batch);
++		put_cpu_var(ppc64_tlb_batch);
++		return;
++	}
++#endif /* CONFIG_PREEMPT_RT */
++
+ 	if (i >= PPC64_TLB_BATCH_NR)
+ 		__flush_tlb_pending(batch);
++	put_cpu_var(ppc64_tlb_batch);
+ }
+ 
+ /*
+Index: linux-2.6-tip/arch/powerpc/platforms/pseries/iommu.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/pseries/iommu.c
++++ linux-2.6-tip/arch/powerpc/platforms/pseries/iommu.c
+@@ -140,7 +140,7 @@ static int tce_build_pSeriesLP(struct io
+ 	return ret;
+ }
+ 
+-static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
++static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL;
+ 
+ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
+ 				     long npages, unsigned long uaddr,
+@@ -154,13 +154,14 @@ static int tce_buildmulti_pSeriesLP(stru
+ 	long l, limit;
+ 	long tcenum_start = tcenum, npages_start = npages;
+ 	int ret = 0;
++	int cpu;
+ 
+ 	if (npages == 1) {
+ 		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+ 		                           direction, attrs);
+ 	}
+ 
+-	tcep = __get_cpu_var(tce_page);
++	tcep = get_cpu_var_locked(tce_page, &cpu);
+ 
+ 	/* This is safe to do since interrupts are off when we're called
+ 	 * from iommu_alloc{,_sg}()
+@@ -169,10 +170,11 @@ static int tce_buildmulti_pSeriesLP(stru
+ 		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
+ 		/* If allocation fails, fall back to the loop implementation */
+ 		if (!tcep) {
++			put_cpu_var_locked(tce_page, cpu);
+ 			return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
+ 					    direction, attrs);
+ 		}
+-		__get_cpu_var(tce_page) = tcep;
++		per_cpu_var_locked(tce_page, cpu) = tcep;
+ 	}
+ 
+ 	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
+@@ -216,6 +218,7 @@ static int tce_buildmulti_pSeriesLP(stru
+ 		printk("\ttce[0] val = 0x%llx\n", tcep[0]);
+ 		show_stack(current, (unsigned long *)__get_SP());
+ 	}
++	put_cpu_var_locked(tce_page, cpu);
+ 	return ret;
+ }
+ 
+Index: linux-2.6-tip/arch/powerpc/Kconfig
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/Kconfig
++++ linux-2.6-tip/arch/powerpc/Kconfig
+@@ -65,13 +65,6 @@ config LOCKDEP_SUPPORT
+ 	bool
+ 	default y
+ 
+-config RWSEM_GENERIC_SPINLOCK
+-	bool
+-
+-config RWSEM_XCHGADD_ALGORITHM
+-	bool
+-	default y
+-
+ config GENERIC_LOCKBREAK
+ 	bool
+ 	default y
+@@ -243,6 +236,14 @@ config HIGHMEM
+ source kernel/time/Kconfig
+ source kernel/Kconfig.hz
+ source kernel/Kconfig.preempt
++
++config RWSEM_GENERIC_SPINLOCK
++	bool
++	default y
++
++config RWSEM_XCHGADD_ALGORITHM
++	bool
++
+ source "fs/Kconfig.binfmt"
+ 
+ config HUGETLB_PAGE_SIZE_VARIABLE
+Index: linux-2.6-tip/arch/powerpc/kernel/idle.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/idle.c
++++ linux-2.6-tip/arch/powerpc/kernel/idle.c
+@@ -96,9 +96,11 @@ void cpu_idle(void)
+ 		tick_nohz_restart_sched_tick();
+ 		if (cpu_should_die())
+ 			cpu_die();
+-		preempt_enable_no_resched();
+-		schedule();
++		local_irq_disable();
++		__preempt_enable_no_resched();
++		__schedule();
+ 		preempt_disable();
++		local_irq_enable();
+ 	}
+ }
+ 
+Index: linux-2.6-tip/arch/powerpc/platforms/chrp/time.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/chrp/time.c
++++ linux-2.6-tip/arch/powerpc/platforms/chrp/time.c
+@@ -83,7 +83,12 @@ int chrp_set_rtc_time(struct rtc_time *t
+ 	unsigned char save_control, save_freq_select;
+ 	struct rtc_time tm = *tmarg;
+ 
++#if CONFIG_PREEMPT_RT
++	if (!spin_trylock(&rtc_lock))
++		return -1;
++#else
+ 	spin_lock(&rtc_lock);
++#endif
+ 
+ 	save_control = chrp_cmos_clock_read(RTC_CONTROL); /* tell the clock it's being set */
+ 
+Index: linux-2.6-tip/arch/powerpc/xmon/xmon.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/xmon/xmon.c
++++ linux-2.6-tip/arch/powerpc/xmon/xmon.c
+@@ -346,6 +346,7 @@ static int xmon_core(struct pt_regs *reg
+ 	unsigned long timeout;
+ #endif
+ 
++	preempt_disable();
+ 	local_irq_save(flags);
+ 
+ 	bp = in_breakpoint_table(regs->nip, &offset);
+@@ -522,6 +523,7 @@ static int xmon_core(struct pt_regs *reg
+ 	insert_cpu_bpts();
+ 
+ 	local_irq_restore(flags);
++	preempt_enable();
+ 
+ 	return cmd != 'X' && cmd != EOF;
+ }
+Index: linux-2.6-tip/arch/powerpc/platforms/pseries/rtasd.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/platforms/pseries/rtasd.c
++++ linux-2.6-tip/arch/powerpc/platforms/pseries/rtasd.c
+@@ -208,7 +208,7 @@ void pSeries_log_error(char *buf, unsign
+ 		break;
+ 	case ERR_TYPE_KERNEL_PANIC:
+ 	default:
+-		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
++		WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */
+ 		spin_unlock_irqrestore(&rtasd_log_lock, s);
+ 		return;
+ 	}
+@@ -228,7 +228,7 @@ void pSeries_log_error(char *buf, unsign
+ 	/* Check to see if we need to or have stopped logging */
+ 	if (fatal || !logging_enabled) {
+ 		logging_enabled = 0;
+-		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
++		WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */
+ 		spin_unlock_irqrestore(&rtasd_log_lock, s);
+ 		return;
+ 	}
+@@ -251,13 +251,13 @@ void pSeries_log_error(char *buf, unsign
+ 		else
+ 			rtas_log_start += 1;
+ 
+-		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
++		WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */
+ 		spin_unlock_irqrestore(&rtasd_log_lock, s);
+ 		wake_up_interruptible(&rtas_log_wait);
+ 		break;
+ 	case ERR_TYPE_KERNEL_PANIC:
+ 	default:
+-		WARN_ON_ONCE(!irqs_disabled()); /* @@@ DEBUG @@@ */
++		WARN_ON_ONCE_NONRT(!irqs_disabled()); /* @@@ DEBUG @@@ */
+ 		spin_unlock_irqrestore(&rtasd_log_lock, s);
+ 		return;
+ 	}
+Index: linux-2.6-tip/arch/powerpc/mm/mmu_context_nohash.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/mm/mmu_context_nohash.c
++++ linux-2.6-tip/arch/powerpc/mm/mmu_context_nohash.c
+@@ -46,7 +46,7 @@ static unsigned int next_context, nr_fre
+ static unsigned long *context_map;
+ static unsigned long *stale_map[NR_CPUS];
+ static struct mm_struct **context_mm;
+-static spinlock_t context_lock = SPIN_LOCK_UNLOCKED;
++static DEFINE_RAW_SPINLOCK(context_lock);
+ 
+ #define CTX_MAP_SIZE	\
+ 	(sizeof(unsigned long) * (last_context / BITS_PER_LONG + 1))
+Index: linux-2.6-tip/arch/powerpc/mm/tlb_nohash.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/mm/tlb_nohash.c
++++ linux-2.6-tip/arch/powerpc/mm/tlb_nohash.c
+@@ -85,7 +85,7 @@ EXPORT_SYMBOL(local_flush_tlb_page);
+  */
+ #ifdef CONFIG_SMP
+ 
+-static DEFINE_SPINLOCK(tlbivax_lock);
++static DEFINE_RAW_SPINLOCK(tlbivax_lock);
+ 
+ struct tlb_flush_param {
+ 	unsigned long addr;
+@@ -190,7 +190,9 @@ void flush_tlb_kernel_range(unsigned lon
+ 	_tlbil_pid(0);
+ 	preempt_enable();
+ #else
++	preempt_disable();
+ 	_tlbil_pid(0);
++	preempt_enable();
+ #endif
+ }
+ EXPORT_SYMBOL(flush_tlb_kernel_range);
+Index: linux-2.6-tip/arch/powerpc/kernel/kprobes.c
+===================================================================
+--- linux-2.6-tip.orig/arch/powerpc/kernel/kprobes.c
++++ linux-2.6-tip/arch/powerpc/kernel/kprobes.c
+@@ -263,7 +263,7 @@ ss_probe:
+ 
+ 			kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ 			reset_current_kprobe();
+-			preempt_enable_no_resched();
++			preempt_enable();
+ 			return 1;
+ 		} else if (ret < 0) {
+ 			/*
+@@ -282,7 +282,7 @@ ss_probe:
+ 	return 1;
+ 
+ no_kprobe:
+-	preempt_enable_no_resched();
++	preempt_enable();
+ 	return ret;
+ }
+ 
+@@ -412,7 +412,7 @@ static int __kprobes post_kprobe_handler
+ 	}
+ 	reset_current_kprobe();
+ out:
+-	preempt_enable_no_resched();
++	preempt_enable();
+ 
+ 	/*
+ 	 * if somebody else is singlestepping across a probe point, msr
+Index: linux-2.6-tip/arch/x86/kvm/i8254.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kvm/i8254.h
++++ linux-2.6-tip/arch/x86/kvm/i8254.h
+@@ -33,7 +33,7 @@ struct kvm_kpit_state {
+ 	u32    speaker_data_on;
+ 	struct mutex lock;
+ 	struct kvm_pit *pit;
+-	spinlock_t inject_lock;
++	raw_spinlock_t inject_lock;
+ 	unsigned long irq_ack;
+ 	struct kvm_irq_ack_notifier irq_ack_notifier;
+ };
+Index: linux-2.6-tip/arch/x86/kvm/irq.h
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kvm/irq.h
++++ linux-2.6-tip/arch/x86/kvm/irq.h
+@@ -60,7 +60,7 @@ struct kvm_kpic_state {
+ };
+ 
+ struct kvm_pic {
+-	spinlock_t lock;
++	raw_spinlock_t lock;
+ 	bool wakeup_needed;
+ 	unsigned pending_acks;
+ 	struct kvm *kvm;
+Index: linux-2.6-tip/arch/x86/kvm/x86.c
+===================================================================
+--- linux-2.6-tip.orig/arch/x86/kvm/x86.c
++++ linux-2.6-tip/arch/x86/kvm/x86.c
+@@ -4238,7 +4238,7 @@ static void vcpu_kick_intr(void *info)
+ void kvm_vcpu_kick(struct kvm_vcpu *vcpu)
+ {
+ 	int ipi_pcpu = vcpu->cpu;
+-	int cpu = get_cpu();
++	int cpu;
+ 
+ 	if (waitqueue_active(&vcpu->wq)) {
+ 		wake_up_interruptible(&vcpu->wq);
+@@ -4248,6 +4248,7 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu
+ 	 * We may be called synchronously with irqs disabled in guest mode,
+ 	 * So need not to call smp_call_function_single() in that case.
+ 	 */
++	cpu = get_cpu();
+ 	if (vcpu->guest_mode && vcpu->cpu != cpu)
+ 		smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0);
+ 	put_cpu();
+Index: linux-2.6-tip/net/netlink/af_netlink.c
+===================================================================
+--- linux-2.6-tip.orig/net/netlink/af_netlink.c
++++ linux-2.6-tip/net/netlink/af_netlink.c
+@@ -1046,7 +1046,7 @@ int netlink_broadcast(struct sock *ssk, 
+ 		kfree_skb(info.skb2);
+ 
+ 	if (info.delivered) {
+-		if (info.congested && (allocation & __GFP_WAIT))
++		if (info.congested && (allocation & __GFP_WAIT) && !rt_task(current))
+ 			yield();
+ 		return 0;
+ 	}
+Index: linux-2.6-tip/kernel/trace/trace_hist.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_hist.c
+@@ -0,0 +1,473 @@
++/*
++ * kernel/trace/trace_hist.c
++ *
++ * Add support for histograms of preemption-off latency and
++ * interrupt-off latency and wakeup latency, it depends on
++ * Real-Time Preemption Support.
++ *
++ *  Copyright (C) 2005 MontaVista Software, Inc.
++ *  Yi Yang <yyang@ch.mvista.com>
++ *
++ *  Converted to work with the new latency tracer.
++ *  Copyright (C) 2008 Red Hat, Inc.
++ *    Steven Rostedt <srostedt@redhat.com>
++ *
++ */
++#include <linux/module.h>
++#include <linux/debugfs.h>
++#include <linux/seq_file.h>
++#include <linux/percpu.h>
++#include <linux/marker.h>
++#include <asm/atomic.h>
++#include <asm/div64.h>
++#include <asm/uaccess.h>
++
++#include "trace.h"
++#include "trace_hist.h"
++
++enum {
++	INTERRUPT_LATENCY = 0,
++	PREEMPT_LATENCY,
++	PREEMPT_INTERRUPT_LATENCY,
++};
++
++#define MAX_ENTRY_NUM 10240
++
++struct hist_data {
++	atomic_t hist_mode; /* 0 log, 1 don't log */
++	unsigned long min_lat;
++	unsigned long max_lat;
++	unsigned long long beyond_hist_bound_samples;
++	unsigned long long accumulate_lat;
++	unsigned long long total_samples;
++	unsigned long long hist_array[MAX_ENTRY_NUM];
++};
++
++static char *latency_hist_dir_root = "latency_hist";
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++static DEFINE_PER_CPU(struct hist_data, interrupt_off_hist);
++static char *interrupt_off_hist_dir = "interrupt_off_latency";
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++static DEFINE_PER_CPU(struct hist_data, preempt_off_hist);
++static char *preempt_off_hist_dir = "preempt_off_latency";
++#endif
++
++#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
++static DEFINE_PER_CPU(struct hist_data, preempt_irqs_off_hist);
++static char *preempt_irqs_off_hist_dir = "preempt_interrupts_off_latency";
++#endif
++
++void notrace latency_hist(int latency_type, int cpu, unsigned long latency)
++{
++	struct hist_data *my_hist;
++
++	if ((cpu < 0) || (cpu >= NR_CPUS) || (latency_type < INTERRUPT_LATENCY)
++			|| (latency_type > PREEMPT_INTERRUPT_LATENCY)
++			|| (latency < 0))
++		return;
++
++	switch (latency_type) {
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++	case INTERRUPT_LATENCY:
++		my_hist = &per_cpu(interrupt_off_hist, cpu);
++		break;
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++	case PREEMPT_LATENCY:
++		my_hist = &per_cpu(preempt_off_hist, cpu);
++		break;
++#endif
++
++#if defined(CONFIG_PREEMPT_OFF_HIST) && defined(CONFIG_INTERRUPT_OFF_HIST)
++	case PREEMPT_INTERRUPT_LATENCY:
++		my_hist = &per_cpu(preempt_irqs_off_hist, cpu);
++		break;
++#endif
++
++	default:
++		return;
++	}
++
++	if (atomic_read(&my_hist->hist_mode) == 0)
++		return;
++
++	if (latency >= MAX_ENTRY_NUM)
++		my_hist->beyond_hist_bound_samples++;
++	else
++		my_hist->hist_array[latency]++;
++
++	if (latency < my_hist->min_lat)
++		my_hist->min_lat = latency;
++	else if (latency > my_hist->max_lat)
++		my_hist->max_lat = latency;
++
++	my_hist->total_samples++;
++	my_hist->accumulate_lat += latency;
++	return;
++}
++
++static void *l_start(struct seq_file *m, loff_t *pos)
++{
++	loff_t *index_ptr = kmalloc(sizeof(loff_t), GFP_KERNEL);
++	loff_t index = *pos;
++	struct hist_data *my_hist = m->private;
++
++	if (!index_ptr)
++		return NULL;
++
++	if (index == 0) {
++		char avgstr[32];
++
++		atomic_dec(&my_hist->hist_mode);
++		if (likely(my_hist->total_samples)) {
++			unsigned long avg = (unsigned long)
++			    div64_u64(my_hist->accumulate_lat,
++			    my_hist->total_samples);
++			sprintf(avgstr, "%lu", avg);
++		} else
++			strcpy(avgstr, "<undef>");
++
++		seq_printf(m, "#Minimum latency: %lu microseconds.\n"
++			   "#Average latency: %s microseconds.\n"
++			   "#Maximum latency: %lu microseconds.\n"
++			   "#Total samples: %llu\n"
++			   "#There are %llu samples greater or equal"
++			   " than %d microseconds\n"
++			   "#usecs\t%16s\n"
++			   , my_hist->min_lat
++			   , avgstr
++			   , my_hist->max_lat
++			   , my_hist->total_samples
++			   , my_hist->beyond_hist_bound_samples
++			   , MAX_ENTRY_NUM, "samples");
++	}
++	if (index >= MAX_ENTRY_NUM)
++		return NULL;
++
++	*index_ptr = index;
++	return index_ptr;
++}
++
++static void *l_next(struct seq_file *m, void *p, loff_t *pos)
++{
++	loff_t *index_ptr = p;
++	struct hist_data *my_hist = m->private;
++
++	if (++*pos >= MAX_ENTRY_NUM) {
++		atomic_inc(&my_hist->hist_mode);
++		return NULL;
++	}
++	*index_ptr = *pos;
++	return index_ptr;
++}
++
++static void l_stop(struct seq_file *m, void *p)
++{
++	kfree(p);
++}
++
++static int l_show(struct seq_file *m, void *p)
++{
++	int index = *(loff_t *) p;
++	struct hist_data *my_hist = m->private;
++
++	seq_printf(m, "%5d\t%16llu\n", index, my_hist->hist_array[index]);
++	return 0;
++}
++
++static struct seq_operations latency_hist_seq_op = {
++	.start = l_start,
++	.next  = l_next,
++	.stop  = l_stop,
++	.show  = l_show
++};
++
++static int latency_hist_open(struct inode *inode, struct file *file)
++{
++	int ret;
++
++	ret = seq_open(file, &latency_hist_seq_op);
++	if (!ret) {
++		struct seq_file *seq = file->private_data;
++		seq->private = inode->i_private;
++	}
++	return ret;
++}
++
++static struct file_operations latency_hist_fops = {
++	.open = latency_hist_open,
++	.read = seq_read,
++	.llseek = seq_lseek,
++	.release = seq_release,
++};
++
++static void hist_reset(struct hist_data *hist)
++{
++	atomic_dec(&hist->hist_mode);
++
++	memset(hist->hist_array, 0, sizeof(hist->hist_array));
++	hist->beyond_hist_bound_samples = 0ULL;
++	hist->min_lat = 0xFFFFFFFFUL;
++	hist->max_lat = 0UL;
++	hist->total_samples = 0ULL;
++	hist->accumulate_lat = 0ULL;
++
++	atomic_inc(&hist->hist_mode);
++}
++
++ssize_t latency_hist_reset(struct file *file, const char __user *a,
++			   size_t size, loff_t *off)
++{
++	int cpu;
++	struct hist_data *hist;
++	int latency_type = (long)file->private_data;
++
++	switch (latency_type) {
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++	case PREEMPT_LATENCY:
++		for_each_online_cpu(cpu) {
++			hist = &per_cpu(preempt_off_hist, cpu);
++			hist_reset(hist);
++		}
++		break;
++#endif
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++	case INTERRUPT_LATENCY:
++		for_each_online_cpu(cpu) {
++			hist = &per_cpu(interrupt_off_hist, cpu);
++			hist_reset(hist);
++		}
++		break;
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++	case PREEMPT_INTERRUPT_LATENCY:
++		for_each_online_cpu(cpu) {
++			hist = &per_cpu(preempt_irqs_off_hist, cpu);
++			hist_reset(hist);
++		}
++		break;
++#endif
++	}
++
++	return size;
++}
++
++static struct file_operations latency_hist_reset_fops = {
++	.open = tracing_open_generic,
++	.write = latency_hist_reset,
++};
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++static DEFINE_PER_CPU(cycles_t, hist_irqsoff_start);
++static DEFINE_PER_CPU(int, hist_irqsoff_tracing);
++#endif
++#ifdef CONFIG_PREEMPT_OFF_HIST
++static DEFINE_PER_CPU(cycles_t, hist_preemptoff_start);
++static DEFINE_PER_CPU(int, hist_preemptoff_tracing);
++#endif
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++static DEFINE_PER_CPU(cycles_t, hist_preemptirqsoff_start);
++static DEFINE_PER_CPU(int, hist_preemptirqsoff_tracing);
++#endif
++
++notrace void tracing_hist_preempt_start(void)
++{
++	cycle_t uninitialized_var(start);
++	int start_set = 0;
++	int cpu;
++
++	if (!preempt_count() && !irqs_disabled())
++		return;
++
++	/* cpu is only used if we are in atomic */
++	cpu = raw_smp_processor_id();
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++	if (irqs_disabled() &&
++	    !per_cpu(hist_irqsoff_tracing, cpu)) {
++		per_cpu(hist_irqsoff_tracing, cpu) = 1;
++		start_set++;
++		start = ftrace_now(cpu);
++		per_cpu(hist_irqsoff_start, cpu) = start;
++	}
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++	if (preempt_count() &&
++	    !per_cpu(hist_preemptoff_tracing, cpu)) {
++		per_cpu(hist_preemptoff_tracing, cpu) = 1;
++		if (1 || !(start_set++))
++			start = ftrace_now(cpu);
++		per_cpu(hist_preemptoff_start, cpu) = start;
++
++	}
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++	if (per_cpu(hist_irqsoff_tracing, cpu) &&
++	    per_cpu(hist_preemptoff_tracing, cpu) &&
++	    !per_cpu(hist_preemptirqsoff_tracing, cpu)) {
++		per_cpu(hist_preemptirqsoff_tracing, cpu) = 1;
++		if (1 || !(start_set))
++			start = ftrace_now(cpu);
++		per_cpu(hist_preemptirqsoff_start, cpu) = start;
++	}
++#endif
++}
++
++notrace void tracing_hist_preempt_stop(int irqs_on)
++{
++	long latency;
++	cycle_t start;
++	cycle_t uninitialized_var(stop);
++	int stop_set = 0;
++	int cpu;
++
++	/* irqs_on == TRACE_STOP if we must stop tracing. */
++
++	/* cpu is only used if we are in atomic */
++	cpu = raw_smp_processor_id();
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++	if (irqs_on  &&
++	    per_cpu(hist_irqsoff_tracing, cpu)) {
++		stop = ftrace_now(cpu);
++		stop_set++;
++		start = per_cpu(hist_irqsoff_start, cpu);
++		latency = (long)nsecs_to_usecs(stop - start);
++		if (latency > 1000000) {
++			printk(KERN_WARNING "%s(%d): latency = %ld (%lu)\n",
++				__FILE__, __LINE__, latency, latency);
++			printk(KERN_WARNING "        start = %Ld, stop = %Ld\n",
++				start, stop);
++		}
++		barrier();
++		per_cpu(hist_irqsoff_tracing, cpu) = 0;
++		latency_hist(INTERRUPT_LATENCY, cpu, latency);
++	}
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++	if ((!irqs_on || irqs_on == TRACE_STOP) &&
++	    per_cpu(hist_preemptoff_tracing, cpu)) {
++		WARN_ON(!preempt_count());
++		if (1 || !(stop_set++))
++			stop = ftrace_now(cpu);
++		start = per_cpu(hist_preemptoff_start, cpu);
++		latency = (long)nsecs_to_usecs(stop - start);
++		if (latency > 1000000) {
++			printk(KERN_WARNING "%s(%d): latency = %ld (%lu)\n",
++				__FILE__, __LINE__, latency, latency);
++			printk(KERN_WARNING "        start = %Ld, stop = %Ld\n",
++				start, stop);
++		}
++		barrier();
++		per_cpu(hist_preemptoff_tracing, cpu) = 0;
++		latency_hist(PREEMPT_LATENCY, cpu, latency);
++	}
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++	if ((!per_cpu(hist_irqsoff_tracing, cpu) ||
++	    !per_cpu(hist_preemptoff_tracing, cpu)) &&
++	    per_cpu(hist_preemptirqsoff_tracing, cpu)) {
++		WARN_ON(!preempt_count() && !irqs_disabled());
++		if (1 || !stop_set)
++			stop = ftrace_now(cpu);
++		start = per_cpu(hist_preemptirqsoff_start, cpu);
++		latency = (long)nsecs_to_usecs(stop - start);
++		if (latency > 1000000) {
++			printk(KERN_WARNING "%s(%d): latency = %ld (%lu)\n",
++				__FILE__, __LINE__, latency, latency);
++			printk(KERN_WARNING "        start = %Ld, stop = %Ld\n",
++				start, stop);
++		}
++		barrier();
++		per_cpu(hist_preemptirqsoff_tracing, cpu) = 0;
++		latency_hist(PREEMPT_INTERRUPT_LATENCY, cpu, latency);
++	}
++#endif
++}
++#endif
++
++static __init int latency_hist_init(void)
++{
++	struct dentry *latency_hist_root = NULL;
++	struct dentry *dentry;
++	struct dentry *entry;
++	int i = 0, len = 0;
++	struct hist_data *my_hist;
++	char name[64];
++
++	dentry = tracing_init_dentry();
++
++	latency_hist_root =
++		debugfs_create_dir(latency_hist_dir_root, dentry);
++
++#ifdef CONFIG_INTERRUPT_OFF_HIST
++	dentry = debugfs_create_dir(interrupt_off_hist_dir,
++				    latency_hist_root);
++	for_each_possible_cpu(i) {
++		len = sprintf(name, "CPU%d", i);
++		name[len] = '\0';
++		entry = debugfs_create_file(name, 0444, dentry,
++					    &per_cpu(interrupt_off_hist, i),
++					    &latency_hist_fops);
++		my_hist = &per_cpu(interrupt_off_hist, i);
++		atomic_set(&my_hist->hist_mode, 1);
++		my_hist->min_lat = 0xFFFFFFFFUL;
++	}
++	entry = debugfs_create_file("reset", 0444, dentry,
++				    (void *)INTERRUPT_LATENCY,
++				    &latency_hist_reset_fops);
++#endif
++
++#ifdef CONFIG_PREEMPT_OFF_HIST
++	dentry = debugfs_create_dir(preempt_off_hist_dir,
++				    latency_hist_root);
++	for_each_possible_cpu(i) {
++		len = sprintf(name, "CPU%d", i);
++		name[len] = '\0';
++		entry = debugfs_create_file(name, 0444, dentry,
++					    &per_cpu(preempt_off_hist, i),
++					    &latency_hist_fops);
++		my_hist = &per_cpu(preempt_off_hist, i);
++		atomic_set(&my_hist->hist_mode, 1);
++		my_hist->min_lat = 0xFFFFFFFFUL;
++	}
++	entry = debugfs_create_file("reset", 0444, dentry,
++				    (void *)PREEMPT_LATENCY,
++				    &latency_hist_reset_fops);
++#endif
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) && defined(CONFIG_PREEMPT_OFF_HIST)
++	dentry = debugfs_create_dir(preempt_irqs_off_hist_dir,
++				    latency_hist_root);
++	for_each_possible_cpu(i) {
++		len = sprintf(name, "CPU%d", i);
++		name[len] = '\0';
++		entry = debugfs_create_file(name, 0444, dentry,
++					    &per_cpu(preempt_off_hist, i),
++					    &latency_hist_fops);
++		my_hist = &per_cpu(preempt_irqs_off_hist, i);
++		atomic_set(&my_hist->hist_mode, 1);
++		my_hist->min_lat = 0xFFFFFFFFUL;
++	}
++	entry = debugfs_create_file("reset", 0444, dentry,
++				    (void *)PREEMPT_INTERRUPT_LATENCY,
++				    &latency_hist_reset_fops);
++#endif
++
++	return 0;
++
++}
++
++__initcall(latency_hist_init);
+Index: linux-2.6-tip/kernel/trace/trace_hist.h
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/kernel/trace/trace_hist.h
+@@ -0,0 +1,28 @@
++/*
++ * kernel/trace/trace_hist.h
++ *
++ * Add support for histograms of preemption-off latency and
++ * interrupt-off latency and wakeup latency, it depends on
++ * Real-Time Preemption Support.
++ *
++ *  Copyright (C) 2005 MontaVista Software, Inc.
++ *  Yi Yang <yyang@ch.mvista.com>
++ *
++ *  Converted to work with the new latency tracer.
++ *  Copyright (C) 2008 Red Hat, Inc.
++ *    Steven Rostedt <srostedt@redhat.com>
++ *
++ */
++#ifndef _LIB_TRACING_TRACER_HIST_H_
++#define _LIB_TRACING_TRACER_HIST_H_
++
++#if defined(CONFIG_INTERRUPT_OFF_HIST) || defined(CONFIG_PREEMPT_OFF_HIST)
++# define TRACE_STOP 2
++void tracing_hist_preempt_start(void);
++void tracing_hist_preempt_stop(int irqs_on);
++#else
++# define tracing_hist_preempt_start() do { } while (0)
++# define tracing_hist_preempt_stop(irqs_off) do { } while (0)
++#endif
++
++#endif /* ifndef _LIB_TRACING_TRACER_HIST_H_ */
+Index: linux-2.6-tip/Documentation/hwlat_detector.txt
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/Documentation/hwlat_detector.txt
+@@ -0,0 +1,64 @@
++Introduction:
++-------------
++
++The module hwlat_detector is a special purpose kernel module that is used to
++detect large system latencies induced by the behavior of certain underlying
++hardware or firmware, independent of Linux itself. The code was developed
++originally to detect SMIs (System Management Interrupts) on x86 systems,
++however there is nothing x86 specific about this patchset. It was
++originally written for use by the "RT" patch since the Real Time
++kernel is highly latency sensitive.
++
++SMIs are usually not serviced by the Linux kernel, which typically does not
++even know that they are occuring. SMIs are instead are set up by BIOS code
++and are serviced by BIOS code, usually for "critical" events such as
++management of thermal sensors and fans. Sometimes though, SMIs are used for
++other tasks and those tasks can spend an inordinate amount of time in the
++handler (sometimes measured in milliseconds). Obviously this is a problem if
++you are trying to keep event service latencies down in the microsecond range.
++
++The hardware latency detector works by hogging all of the cpus for configurable
++amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter
++for some period, then looking for gaps in the TSC data. Any gap indicates a
++time when the polling was interrupted and since the machine is stopped and
++interrupts turned off the only thing that could do that would be an SMI.
++
++Note that the SMI detector should *NEVER* be used in a production environment.
++It is intended to be run manually to determine if the hardware platform has a
++problem with long system firmware service routines.
++
++Usage:
++------
++
++Loading the module hwlat_detector passing the parameter "enabled=1" (or by
++setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only
++step required to start the hwlat_detector. It is possible to redefine the
++threshold in microseconds (us) above which latency spikes will be taken
++into account (parameter "threshold=").
++
++Example:
++
++	# modprobe hwlat_detector enabled=1 threshold=100
++
++After the module is loaded, it creates a directory named "hwlat_detector" under
++the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary
++to have debugfs mounted, which might be on /sys/debug on your system.
++
++The /debug/hwlat_detector interface contains the following files:
++
++count			- number of latency spikes observed since last reset
++enable			- a global enable/disable toggle (0/1), resets count
++max			- maximum hardware latency actually observed (usecs)
++sample			- a pipe from which to read current raw sample data
++			  in the format <timestamp> <latency observed usecs>
++			  (can be opened O_NONBLOCK for a single sample)
++threshold		- minimum latency value to be considered (usecs)
++width			- time period to sample with CPUs held (usecs)
++			  must be less than the total window size (enforced)
++window			- total period of sampling, width being inside (usecs)
++
++By default we will set width to 500,000 and window to 1,000,000, meaning that
++we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we
++observe any latencies that exceed the threshold (initially 100 usecs),
++then we write to a global sample ring buffer of 8K samples, which is
++consumed by reading from the "sample" (pipe) debugfs file interface.
+Index: linux-2.6-tip/drivers/misc/Makefile
+===================================================================
+--- linux-2.6-tip.orig/drivers/misc/Makefile
++++ linux-2.6-tip/drivers/misc/Makefile
+@@ -20,3 +20,4 @@ obj-$(CONFIG_SGI_GRU)		+= sgi-gru/
+ obj-$(CONFIG_HP_ILO)		+= hpilo.o
+ obj-$(CONFIG_C2PORT)		+= c2port/
+ obj-y				+= eeprom/
++obj-$(CONFIG_HWLAT_DETECTOR)	+= hwlat_detector.o
+Index: linux-2.6-tip/drivers/misc/hwlat_detector.c
+===================================================================
+--- /dev/null
++++ linux-2.6-tip/drivers/misc/hwlat_detector.c
+@@ -0,0 +1,1208 @@
++/*
++ * hwlat_detector.c - A simple Hardware Latency detector.
++ *
++ * Use this module to detect large system latencies induced by the behavior of
++ * certain underlying system hardware or firmware, independent of Linux itself.
++ * The code was developed originally to detect the presence of SMIs on Intel
++ * and AMD systems, although there is no dependency upon x86 herein.
++ *
++ * The classical example usage of this module is in detecting the presence of
++ * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
++ * somewhat special form of hardware interrupt spawned from earlier CPU debug
++ * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
++ * LPC (or other device) to generate a special interrupt under certain
++ * circumstances, for example, upon expiration of a special SMI timer device,
++ * due to certain external thermal readings, on certain I/O address accesses,
++ * and other situations. An SMI hits a special CPU pin, triggers a special
++ * SMI mode (complete with special memory map), and the OS is unaware.
++ *
++ * Although certain hardware-inducing latencies are necessary (for example,
++ * a modern system often requires an SMI handler for correct thermal control
++ * and remote management) they can wreak havoc upon any OS-level performance
++ * guarantees toward low-latency, especially when the OS is not even made
++ * aware of the presence of these interrupts. For this reason, we need a
++ * somewhat brute force mechanism to detect these interrupts. In this case,
++ * we do it by hogging all of the CPU(s) for configurable timer intervals,
++ * sampling the built-in CPU timer, looking for discontiguous readings.
++ *
++ * WARNING: This implementation necessarily introduces latencies. Therefore,
++ *          you should NEVER use this module in a production environment
++ *          requiring any kind of low-latency performance guarantee(s).
++ *
++ * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
++ *
++ * Includes useful feedback from Clark Williams <clark@redhat.com>
++ *
++ * This file is licensed under the terms of the GNU General Public
++ * License version 2. This program is licensed "as is" without any
++ * warranty of any kind, whether express or implied.
++ */
++
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/ring_buffer.h>
++#include <linux/stop_machine.h>
++#include <linux/time.h>
++#include <linux/hrtimer.h>
++#include <linux/kthread.h>
++#include <linux/debugfs.h>
++#include <linux/seq_file.h>
++#include <linux/uaccess.h>
++#include <linux/version.h>
++#include <linux/delay.h>
++
++#define BUF_SIZE_DEFAULT	262144UL		/* 8K*(sizeof(entry)) */
++#define BUF_FLAGS		(RB_FL_OVERWRITE)	/* no block on full */
++#define U64STR_SIZE		22			/* 20 digits max */
++
++#define VERSION			"1.0.0"
++#define BANNER			"hwlat_detector: "
++#define DRVNAME			"hwlat_detector"
++#define DEFAULT_SAMPLE_WINDOW	1000000			/* 1s */
++#define DEFAULT_SAMPLE_WIDTH	500000			/* 0.5s */
++#define DEFAULT_LAT_THRESHOLD	10			/* 10us */
++
++/* Module metadata */
++
++MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Jon Masters <jcm@redhat.com>");
++MODULE_DESCRIPTION("A simple hardware latency detector");
++MODULE_VERSION(VERSION);
++
++/* Module parameters */
++
++static int debug;
++static int enabled;
++static int threshold;
++
++module_param(debug, int, 0);			/* enable debug */
++module_param(enabled, int, 0);			/* enable detector */
++module_param(threshold, int, 0);		/* latency threshold */
++
++/* Buffering and sampling */
++
++static struct ring_buffer *ring_buffer;		/* sample buffer */
++static DEFINE_MUTEX(ring_buffer_mutex);		/* lock changes */
++static unsigned long buf_size = BUF_SIZE_DEFAULT;
++static struct task_struct *kthread;		/* sampling thread */
++
++/* DebugFS filesystem entries */
++
++static struct dentry *debug_dir;		/* debugfs directory */
++static struct dentry *debug_max;		/* maximum TSC delta */
++static struct dentry *debug_count;		/* total detect count */
++static struct dentry *debug_sample_width;	/* sample width us */
++static struct dentry *debug_sample_window;	/* sample window us */
++static struct dentry *debug_sample;		/* raw samples us */
++static struct dentry *debug_threshold;		/* threshold us */
++static struct dentry *debug_enable;         	/* enable/disable */
++
++/* Individual samples and global state */
++
++struct sample;					/* latency sample */
++struct data;					/* Global state */
++
++/* Sampling functions */
++static int __buffer_add_sample(struct sample *sample);
++static struct sample *buffer_get_sample(struct sample *sample);
++static int get_sample(void *unused);
++
++/* Threading and state */
++static int kthread_fn(void *unused);
++static int start_kthread(void);
++static int stop_kthread(void);
++static void __reset_stats(void);
++static int init_stats(void);
++
++/* Debugfs interface */
++static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
++				size_t cnt, loff_t *ppos, const u64 *entry);
++static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
++				 size_t cnt, loff_t *ppos, u64 *entry);
++static int debug_sample_fopen(struct inode *inode, struct file *filp);
++static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
++				  size_t cnt, loff_t *ppos);
++static int debug_sample_release(struct inode *inode, struct file *filp);
++static int debug_enable_fopen(struct inode *inode, struct file *filp);
++static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
++				  size_t cnt, loff_t *ppos);
++static ssize_t debug_enable_fwrite(struct file *file,
++				   const char __user *user_buffer,
++				   size_t user_size, loff_t *offset);
++
++/* Initialization functions */
++static int init_debugfs(void);
++static void free_debugfs(void);
++static int detector_init(void);
++static void detector_exit(void);
++
++/* Individual latency samples are stored here when detected and packed into
++ * the ring_buffer circular buffer, where they are overwritten when
++ * more than buf_size/sizeof(sample) samples are received. */
++struct sample {
++	u64		seqnum;		/* unique sequence */
++	u64		duration;	/* ktime delta */
++	struct timespec	timestamp;	/* wall time */
++};
++
++/* keep the global state somewhere. Mostly used under stop_machine. */
++static struct data {
++
++	struct mutex lock;		/* protect changes */
++
++	u64	count;			/* total since reset */
++	u64	max_sample;		/* max hardware latency */
++	u64	threshold;		/* sample threshold level */
++
++	u64	sample_window;		/* total sampling window (on+off) */
++	u64	sample_width;		/* active sampling portion of window */
++
++	atomic_t sample_open;		/* whether the sample file is open */
++
++	wait_queue_head_t wq;		/* waitqeue for new sample values */
++
++} data;
++
++/**
++ * __buffer_add_sample - add a new latency sample recording to the ring buffer
++ * @sample: The new latency sample value
++ *
++ * This receives a new latency sample and records it in a global ring buffer.
++ * No additional locking is used in this case - suited for stop_machine use.
++ */
++static int __buffer_add_sample(struct sample *sample)
++{
++	return ring_buffer_write(ring_buffer,
++				 sizeof(struct sample), sample);
++}
++
++/**
++ * buffer_get_sample - remove a hardware latency sample from the ring buffer
++ * @sample: Pre-allocated storage for the sample
++ *
++ * This retrieves a hardware latency sample from the global circular buffer
++ */
++static struct sample *buffer_get_sample(struct sample *sample)
++{
++	struct ring_buffer_event *e = NULL;
++	struct sample *s = NULL;
++	unsigned int cpu = 0;
++
++	if (!sample)
++		return NULL;
++
++	/* ring_buffers are per-cpu but we just want any value */
++	/* so we'll start with this cpu and try others if not */
++	/* Steven is planning to add a generic mechanism */
++	mutex_lock(&ring_buffer_mutex);
++	e = ring_buffer_consume(ring_buffer, smp_processor_id(), NULL);
++	if (!e) {
++		for_each_online_cpu(cpu) {
++			e = ring_buffer_consume(ring_buffer, cpu, NULL);
++			if (e)
++				break;
++		}
++	}
++
++	if (e) {
++		s = ring_buffer_event_data(e);
++		memcpy(sample, s, sizeof(struct sample));
++	} else
++		sample = NULL;
++	mutex_unlock(&ring_buffer_mutex);
++
++	return sample;
++}
++
++/**
++ * get_sample - sample the CPU TSC and look for likely hardware latencies
++ * @unused: This is not used but is a part of the stop_machine API
++ *
++ * Used to repeatedly capture the CPU TSC (or similar), looking for potential
++ * hardware-induced latency. Called under stop_machine, with data.lock held.
++ */
++static int get_sample(void *unused)
++{
++	ktime_t start, t1, t2;
++	s64 diff, total = 0;
++	u64 sample = 0;
++	int ret = 1;
++
++	start = ktime_get(); /* start timestamp */
++
++	do {
++
++		t1 = ktime_get();	/* we'll look for a discontinuity */
++		t2 = ktime_get();
++
++		total = ktime_to_us(ktime_sub(t2, start)); /* sample width */
++		diff = ktime_to_us(ktime_sub(t2, t1));     /* current diff */
++
++		/* This shouldn't happen */
++		if (diff < 0) {
++			printk(KERN_ERR BANNER "time running backwards\n");
++			goto out;
++		}
++
++		if (diff > sample)
++			sample = diff; /* only want highest value */
++
++	} while (total <= data.sample_width);
++
++	/* If we exceed the threshold value, we have found a hardware latency */
++	if (sample > data.threshold) {
++		struct sample s;
++
++		data.count++;
++		s.seqnum = data.count;
++		s.duration = sample;
++		s.timestamp = CURRENT_TIME;
++		__buffer_add_sample(&s);
++
++		/* Keep a running maximum ever recorded hardware latency */
++		if (sample > data.max_sample)
++			data.max_sample = sample;
++
++		wake_up(&data.wq); /* wake up reader(s) */
++	}
++
++	ret = 0;
++out:
++	return ret;
++}
++
++/*
++ * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
++ * @unused: A required part of the kthread API.
++ *
++ * Used to periodically sample the CPU TSC via a call to get_sample. We
++ * use stop_machine, whith does (intentionally) introduce latency since we
++ * need to ensure nothing else might be running (and thus pre-empting).
++ * Obviously this should never be used in production environments.
++ *
++ * stop_machine will schedule us typically only on CPU0 which is fine for
++ * almost every real-world hardware latency situation - but we might later
++ * generalize this if we find there are any actualy systems with alternate
++ * SMI delivery or other non CPU0 hardware latencies.
++ */
++static int kthread_fn(void *unused)
++{
++	int err = 0;
++	u64 interval = 0;
++
++	while (!kthread_should_stop()) {
++
++		mutex_lock(&data.lock);
++
++		err = stop_machine(get_sample, unused, 0);
++		if (err) {
++			/* Houston, we have a problem */
++			mutex_unlock(&data.lock);
++			goto err_out;
++		}
++
++		interval = data.sample_window - data.sample_width;
++		do_div(interval, USEC_PER_MSEC); /* modifies interval value */
++
++		mutex_unlock(&data.lock);
++
++		if (msleep_interruptible(interval))
++			goto out;
++	}
++		goto out;
++err_out:
++	printk(KERN_ERR BANNER "could not call stop_machine, disabling\n");
++	enabled = 0;
++out:
++	return err;
++
++}
++
++/**
++ * start_kthread - Kick off the hardware latency sampling/detector kthread
++ *
++ * This starts a kernel thread that will sit and sample the CPU timestamp
++ * counter (TSC or similar) and look for potential hardware latencies.
++ */
++static int start_kthread(void)
++{
++	kthread = kthread_run(kthread_fn, NULL,
++					DRVNAME);
++	if (IS_ERR(kthread)) {
++		printk(KERN_ERR BANNER "could not start sampling thread\n");
++		enabled = 0;
++		return -ENOMEM;
++	}
++
++	return 0;
++}
++
++/**
++ * stop_kthread - Inform the hardware latency samping/detector kthread to stop
++ *
++ * This kicks the running hardware latency sampling/detector kernel thread and
++ * tells it to stop sampling now. Use this on unload and at system shutdown.
++ */
++static int stop_kthread(void)
++{
++	int ret;
++
++	ret = kthread_stop(kthread);
++
++	return ret;
++}
++
++/**
++ * __reset_stats - Reset statistics for the hardware latency detector
++ *
++ * We use data to store various statistics and global state. We call this
++ * function in order to reset those when "enable" is toggled on or off, and
++ * also at initialization. Should be called with data.lock held.
++ */
++static void __reset_stats(void)
++{
++	data.count = 0;
++	data.max_sample = 0;
++	ring_buffer_reset(ring_buffer); /* flush out old sample entries */
++}
++
++/**
++ * init_stats - Setup global state statistics for the hardware latency detector
++ *
++ * We use data to store various statistics and global state. We also use
++ * a global ring buffer (ring_buffer) to keep raw samples of detected hardware
++ * induced system latencies. This function initializes these structures and
++ * allocates the global ring buffer also.
++ */
++static int init_stats(void)
++{
++	int ret = -ENOMEM;
++
++	mutex_init(&data.lock);
++	init_waitqueue_head(&data.wq);
++	atomic_set(&data.sample_open, 0);
++
++	ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS);
++
++	if (WARN(!ring_buffer, KERN_ERR BANNER
++			       "failed to allocate ring buffer!\n"))
++		goto out;
++
++	__reset_stats();
++	data.threshold = DEFAULT_LAT_THRESHOLD;	    /* threshold us */
++	data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */
++	data.sample_width = DEFAULT_SAMPLE_WIDTH;   /* width us */
++
++	ret = 0;
++
++out:
++	return ret;
++
++}
++
++/*
++ * simple_data_read - Wrapper read function for global state debugfs entries
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ * @entry: The entry to read from
++ *
++ * This function provides a generic read implementation for the global state
++ * "data" structure debugfs filesystem entries. It would be nice to use
++ * simple_attr_read directly, but we need to make sure that the data.lock
++ * spinlock is held during the actual read (even though we likely won't ever
++ * actually race here as the updater runs under a stop_machine context).
++ */
++static ssize_t simple_data_read(struct file *filp, char __user *ubuf,
++				size_t cnt, loff_t *ppos, const u64 *entry)
++{
++	char buf[U64STR_SIZE];
++	u64 val = 0;
++	int len = 0;
++
++	memset(buf, 0, sizeof(buf));
++
++	if (!entry)
++		return -EFAULT;
++
++	mutex_lock(&data.lock);
++	val = *entry;
++	mutex_unlock(&data.lock);
++
++	len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val);
++
++	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
++
++}
++
++/*
++ * simple_data_write - Wrapper write function for global state debugfs entries
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to write value from
++ * @cnt: The maximum number of bytes to write
++ * @ppos: The current "file" position
++ * @entry: The entry to write to
++ *
++ * This function provides a generic write implementation for the global state
++ * "data" structure debugfs filesystem entries. It would be nice to use
++ * simple_attr_write directly, but we need to make sure that the data.lock
++ * spinlock is held during the actual write (even though we likely won't ever
++ * actually race here as the updater runs under a stop_machine context).
++ */
++static ssize_t simple_data_write(struct file *filp, const char __user *ubuf,
++				 size_t cnt, loff_t *ppos, u64 *entry)
++{
++	char buf[U64STR_SIZE];
++	int csize = min(cnt, sizeof(buf));
++	u64 val = 0;
++	int err = 0;
++
++	memset(buf, '\0', sizeof(buf));
++	if (copy_from_user(buf, ubuf, csize))
++		return -EFAULT;
++
++	buf[U64STR_SIZE-1] = '\0';			/* just in case */
++	err = strict_strtoull(buf, 10, &val);
++	if (err)
++		return -EINVAL;
++
++	mutex_lock(&data.lock);
++	*entry = val;
++	mutex_unlock(&data.lock);
++
++	return csize;
++}
++
++/**
++ * debug_count_fopen - Open function for "count" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "count" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_count_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_count_fread - Read function for "count" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "count" debugfs
++ * interface to the hardware latency detector. Can be used to read the
++ * number of latency readings exceeding the configured threshold since
++ * the detector was last reset (e.g. by writing a zero into "count").
++ */
++static ssize_t debug_count_fread(struct file *filp, char __user *ubuf,
++				     size_t cnt, loff_t *ppos)
++{
++	return simple_data_read(filp, ubuf, cnt, ppos, &data.count);
++}
++
++/**
++ * debug_count_fwrite - Write function for "count" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "count" debugfs
++ * interface to the hardware latency detector. Can be used to write a
++ * desired value, especially to zero the total count.
++ */
++static ssize_t  debug_count_fwrite(struct file *filp,
++				       const char __user *ubuf,
++				       size_t cnt,
++				       loff_t *ppos)
++{
++	return simple_data_write(filp, ubuf, cnt, ppos, &data.count);
++}
++
++/**
++ * debug_enable_fopen - Dummy open function for "enable" debugfs interface
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "enable" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_enable_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_enable_fread - Read function for "enable" debugfs interface
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "enable" debugfs
++ * interface to the hardware latency detector. Can be used to determine
++ * whether the detector is currently enabled ("0\n" or "1\n" returned).
++ */
++static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf,
++				      size_t cnt, loff_t *ppos)
++{
++	char buf[4];
++
++	if ((cnt < sizeof(buf)) || (*ppos))
++		return 0;
++
++	buf[0] = enabled ? '1' : '0';
++	buf[1] = '\n';
++	buf[2] = '\0';
++	if (copy_to_user(ubuf, buf, strlen(buf)))
++		return -EFAULT;
++	return *ppos = strlen(buf);
++}
++
++/**
++ * debug_enable_fwrite - Write function for "enable" debugfs interface
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "enable" debugfs
++ * interface to the hardware latency detector. Can be used to enable or
++ * disable the detector, which will have the side-effect of possibly
++ * also resetting the global stats and kicking off the measuring
++ * kthread (on an enable) or the converse (upon a disable).
++ */
++static ssize_t  debug_enable_fwrite(struct file *filp,
++					const char __user *ubuf,
++					size_t cnt,
++					loff_t *ppos)
++{
++	char buf[4];
++	int csize = min(cnt, sizeof(buf));
++	long val = 0;
++	int err = 0;
++
++	memset(buf, '\0', sizeof(buf));
++	if (copy_from_user(buf, ubuf, csize))
++		return -EFAULT;
++
++	buf[sizeof(buf)-1] = '\0';			/* just in case */
++	err = strict_strtoul(buf, 10, &val);
++	if (0 != err)
++		return -EINVAL;
++
++	if (val) {
++		if (enabled)
++			goto unlock;
++		enabled = 1;
++		__reset_stats();
++		if (start_kthread())
++			return -EFAULT;
++	} else {
++		if (!enabled)
++			goto unlock;
++		enabled = 0;
++		stop_kthread();
++		wake_up(&data.wq);		/* reader(s) should return */
++	}
++unlock:
++	return csize;
++}
++
++/**
++ * debug_max_fopen - Open function for "max" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "max" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_max_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_max_fread - Read function for "max" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "max" debugfs
++ * interface to the hardware latency detector. Can be used to determine
++ * the maximum latency value observed since it was last reset.
++ */
++static ssize_t debug_max_fread(struct file *filp, char __user *ubuf,
++				   size_t cnt, loff_t *ppos)
++{
++	return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample);
++}
++
++/**
++ * debug_max_fwrite - Write function for "max" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "max" debugfs
++ * interface to the hardware latency detector. Can be used to reset the
++ * maximum or set it to some other desired value - if, then, subsequent
++ * measurements exceed this value, the maximum will be updated.
++ */
++static ssize_t  debug_max_fwrite(struct file *filp,
++				     const char __user *ubuf,
++				     size_t cnt,
++				     loff_t *ppos)
++{
++	return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample);
++}
++
++
++/**
++ * debug_sample_fopen - An open function for "sample" debugfs interface
++ * @inode: The in-kernel inode representation of this debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function handles opening the "sample" file within the hardware
++ * latency detector debugfs directory interface. This file is used to read
++ * raw samples from the global ring_buffer and allows the user to see a
++ * running latency history. Can be opened blocking or non-blocking,
++ * affecting whether it behaves as a buffer read pipe, or does not.
++ * Implements simple locking to prevent multiple simultaneous use.
++ */
++static int debug_sample_fopen(struct inode *inode, struct file *filp)
++{
++	if (!atomic_add_unless(&data.sample_open, 1, 1))
++		return -EBUSY;
++	else
++		return 0;
++}
++
++/**
++ * debug_sample_fread - A read function for "sample" debugfs interface
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that will contain the samples read
++ * @cnt: The maximum bytes to read from the debugfs "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function handles reading from the "sample" file within the hardware
++ * latency detector debugfs directory interface. This file is used to read
++ * raw samples from the global ring_buffer and allows the user to see a
++ * running latency history. By default this will block pending a new
++ * value written into the sample buffer, unless there are already a
++ * number of value(s) waiting in the buffer, or the sample file was
++ * previously opened in a non-blocking mode of operation.
++ */
++static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf,
++					size_t cnt, loff_t *ppos)
++{
++	int len = 0;
++	char buf[64];
++	struct sample *sample = NULL;
++
++	if (!enabled)
++		return 0;
++
++	sample = kzalloc(sizeof(struct sample), GFP_KERNEL);
++	if (!sample)
++		return -ENOMEM;
++
++	while (!buffer_get_sample(sample)) {
++
++		DEFINE_WAIT(wait);
++
++		if (filp->f_flags & O_NONBLOCK) {
++			len = -EAGAIN;
++			goto out;
++		}
++
++		prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE);
++		schedule();
++		finish_wait(&data.wq, &wait);
++
++		if (signal_pending(current)) {
++			len = -EINTR;
++			goto out;
++		}
++
++		if (!enabled) {			/* enable was toggled */
++			len = 0;
++			goto out;
++		}
++	}
++
++	len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n",
++		      sample->timestamp.tv_sec,
++		      sample->timestamp.tv_nsec,
++		      sample->duration);
++
++
++	/* handling partial reads is more trouble than it's worth */
++	if (len > cnt)
++		goto out;
++
++	if (copy_to_user(ubuf, buf, len))
++		len = -EFAULT;
++
++out:
++	kfree(sample);
++	return len;
++}
++
++/**
++ * debug_sample_release - Release function for "sample" debugfs interface
++ * @inode: The in-kernel inode represenation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function completes the close of the debugfs interface "sample" file.
++ * Frees the sample_open "lock" so that other users may open the interface.
++ */
++static int debug_sample_release(struct inode *inode, struct file *filp)
++{
++	atomic_dec(&data.sample_open);
++
++	return 0;
++}
++
++/**
++ * debug_threshold_fopen - Open function for "threshold" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "threshold" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_threshold_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_threshold_fread - Read function for "threshold" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "threshold" debugfs
++ * interface to the hardware latency detector. It can be used to determine
++ * the current threshold level at which a latency will be recorded in the
++ * global ring buffer, typically on the order of 10us.
++ */
++static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf,
++					 size_t cnt, loff_t *ppos)
++{
++	return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold);
++}
++
++/**
++ * debug_threshold_fwrite - Write function for "threshold" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "threshold" debugfs
++ * interface to the hardware latency detector. It can be used to configure
++ * the threshold level at which any subsequently detected latencies will
++ * be recorded into the global ring buffer.
++ */
++static ssize_t  debug_threshold_fwrite(struct file *filp,
++					const char __user *ubuf,
++					size_t cnt,
++					loff_t *ppos)
++{
++	int ret;
++
++	ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold);
++
++	if (enabled)
++		wake_up_process(kthread);
++
++	return ret;
++}
++
++/**
++ * debug_width_fopen - Open function for "width" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "width" debugfs
++ * interface to the hardware latency detector.
++ */
++static int debug_width_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_width_fread - Read function for "width" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "width" debugfs
++ * interface to the hardware latency detector. It can be used to determine
++ * for how many us of the total window us we will actively sample for any
++ * hardware-induced latecy periods. Obviously, it is not possible to
++ * sample constantly and have the system respond to a sample reader, or,
++ * worse, without having the system appear to have gone out to lunch.
++ */
++static ssize_t debug_width_fread(struct file *filp, char __user *ubuf,
++				     size_t cnt, loff_t *ppos)
++{
++	return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width);
++}
++
++/**
++ * debug_width_fwrite - Write function for "width" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "width" debugfs
++ * interface to the hardware latency detector. It can be used to configure
++ * for how many us of the total window us we will actively sample for any
++ * hardware-induced latency periods. Obviously, it is not possible to
++ * sample constantly and have the system respond to a sample reader, or,
++ * worse, without having the system appear to have gone out to lunch. It
++ * is enforced that width is less that the total window size.
++ */
++static ssize_t  debug_width_fwrite(struct file *filp,
++				       const char __user *ubuf,
++				       size_t cnt,
++				       loff_t *ppos)
++{
++	char buf[U64STR_SIZE];
++	int csize = min(cnt, sizeof(buf));
++	u64 val = 0;
++	int err = 0;
++
++	memset(buf, '\0', sizeof(buf));
++	if (copy_from_user(buf, ubuf, csize))
++		return -EFAULT;
++
++	buf[U64STR_SIZE-1] = '\0';			/* just in case */
++	err = strict_strtoull(buf, 10, &val);
++	if (0 != err)
++		return -EINVAL;
++
++	mutex_lock(&data.lock);
++	if (val < data.sample_window)
++		data.sample_width = val;
++	else {
++		mutex_unlock(&data.lock);
++		return -EINVAL;
++	}
++	mutex_unlock(&data.lock);
++
++	if (enabled)
++		wake_up_process(kthread);
++
++	return csize;
++}
++
++/**
++ * debug_window_fopen - Open function for "window" debugfs entry
++ * @inode: The in-kernel inode representation of the debugfs "file"
++ * @filp: The active open file structure for the debugfs "file"
++ *
++ * This function provides an open implementation for the "window" debugfs
++ * interface to the hardware latency detector. The window is the total time
++ * in us that will be considered one sample period. Conceptually, windows
++ * occur back-to-back and contain a sample width period during which
++ * actual sampling occurs.
++ */
++static int debug_window_fopen(struct inode *inode, struct file *filp)
++{
++	return 0;
++}
++
++/**
++ * debug_window_fread - Read function for "window" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The userspace provided buffer to read value into
++ * @cnt: The maximum number of bytes to read
++ * @ppos: The current "file" position
++ *
++ * This function provides a read implementation for the "window" debugfs
++ * interface to the hardware latency detector. The window is the total time
++ * in us that will be considered one sample period. Conceptually, windows
++ * occur back-to-back and contain a sample width period during which
++ * actual sampling occurs. Can be used to read the total window size.
++ */
++static ssize_t debug_window_fread(struct file *filp, char __user *ubuf,
++				      size_t cnt, loff_t *ppos)
++{
++	return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window);
++}
++
++/**
++ * debug_window_fwrite - Write function for "window" debugfs entry
++ * @filp: The active open file structure for the debugfs "file"
++ * @ubuf: The user buffer that contains the value to write
++ * @cnt: The maximum number of bytes to write to "file"
++ * @ppos: The current position in the debugfs "file"
++ *
++ * This function provides a write implementation for the "window" debufds
++ * interface to the hardware latency detetector. The window is the total time
++ * in us that will be considered one sample period. Conceptually, windows
++ * occur back-to-back and contain a sample width period during which
++ * actual sampling occurs. Can be used to write a new total window size. It
++ * is enfoced that any value written must be greater than the sample width
++ * size, or an error results.
++ */
++static ssize_t  debug_window_fwrite(struct file *filp,
++					const char __user *ubuf,
++					size_t cnt,
++					loff_t *ppos)
++{
++	char buf[U64STR_SIZE];
++	int csize = min(cnt, sizeof(buf));
++	u64 val = 0;
++	int err = 0;
++
++	memset(buf, '\0', sizeof(buf));
++	if (copy_from_user(buf, ubuf, csize))
++		return -EFAULT;
++
++	buf[U64STR_SIZE-1] = '\0';			/* just in case */
++	err = strict_strtoull(buf, 10, &val);
++	if (0 != err)
++		return -EINVAL;
++
++	mutex_lock(&data.lock);
++	if (data.sample_width < val)
++		data.sample_window = val;
++	else {
++		mutex_unlock(&data.lock);
++		return -EINVAL;
++	}
++	mutex_unlock(&data.lock);
++
++	return csize;
++}
++
++/*
++ * Function pointers for the "count" debugfs file operations
++ */
++static const struct file_operations count_fops = {
++	.open		= debug_count_fopen,
++	.read		= debug_count_fread,
++	.write		= debug_count_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "enable" debugfs file operations
++ */
++static const struct file_operations enable_fops = {
++	.open		= debug_enable_fopen,
++	.read		= debug_enable_fread,
++	.write		= debug_enable_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "max" debugfs file operations
++ */
++static const struct file_operations max_fops = {
++	.open		= debug_max_fopen,
++	.read		= debug_max_fread,
++	.write		= debug_max_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "sample" debugfs file operations
++ */
++static const struct file_operations sample_fops = {
++	.open 		= debug_sample_fopen,
++	.read		= debug_sample_fread,
++	.release	= debug_sample_release,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "threshold" debugfs file operations
++ */
++static const struct file_operations threshold_fops = {
++	.open		= debug_threshold_fopen,
++	.read		= debug_threshold_fread,
++	.write		= debug_threshold_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "width" debugfs file operations
++ */
++static const struct file_operations width_fops = {
++	.open		= debug_width_fopen,
++	.read		= debug_width_fread,
++	.write		= debug_width_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/*
++ * Function pointers for the "window" debugfs file operations
++ */
++static const struct file_operations window_fops = {
++	.open		= debug_window_fopen,
++	.read		= debug_window_fread,
++	.write		= debug_window_fwrite,
++	.owner		= THIS_MODULE,
++};
++
++/**
++ * init_debugfs - A function to initialize the debugfs interface files
++ *
++ * This function creates entries in debugfs for "hwlat_detector", including
++ * files to read values from the detector, current samples, and the
++ * maximum sample that has been captured since the hardware latency
++ * dectector was started.
++ */
++static int init_debugfs(void)
++{
++	int ret = -ENOMEM;
++
++	debug_dir = debugfs_create_dir(DRVNAME, NULL);
++	if (!debug_dir)
++		goto err_debug_dir;
++
++	debug_sample = debugfs_create_file("sample", 0444,
++					       debug_dir, NULL,
++					       &sample_fops);
++	if (!debug_sample)
++		goto err_sample;
++
++	debug_count = debugfs_create_file("count", 0444,
++					      debug_dir, NULL,
++					      &count_fops);
++	if (!debug_count)
++		goto err_count;
++
++	debug_max = debugfs_create_file("max", 0444,
++					    debug_dir, NULL,
++					    &max_fops);
++	if (!debug_max)
++		goto err_max;
++
++	debug_sample_window = debugfs_create_file("window", 0644,
++						      debug_dir, NULL,
++						      &window_fops);
++	if (!debug_sample_window)
++		goto err_window;
++
++	debug_sample_width = debugfs_create_file("width", 0644,
++						     debug_dir, NULL,
++						     &width_fops);
++	if (!debug_sample_width)
++		goto err_width;
++
++	debug_threshold = debugfs_create_file("threshold", 0644,
++						  debug_dir, NULL,
++						  &threshold_fops);
++	if (!debug_threshold)
++		goto err_threshold;
++
++	debug_enable = debugfs_create_file("enable", 0644,
++					       debug_dir, &enabled,
++					       &enable_fops);
++	if (!debug_enable)
++		goto err_enable;
++
++	else {
++		ret = 0;
++		goto out;
++	}
++
++err_enable:
++	debugfs_remove(debug_threshold);
++err_threshold:
++	debugfs_remove(debug_sample_width);
++err_width:
++	debugfs_remove(debug_sample_window);
++err_window:
++	debugfs_remove(debug_max);
++err_max:
++	debugfs_remove(debug_count);
++err_count:
++	debugfs_remove(debug_sample);
++err_sample:
++	debugfs_remove(debug_dir);
++err_debug_dir:
++out:
++	return ret;
++}
++
++/**
++ * free_debugfs - A function to cleanup the debugfs file interface
++ */
++static void free_debugfs(void)
++{
++	/* could also use a debugfs_remove_recursive */
++	debugfs_remove(debug_enable);
++	debugfs_remove(debug_threshold);
++	debugfs_remove(debug_sample_width);
++	debugfs_remove(debug_sample_window);
++	debugfs_remove(debug_max);
++	debugfs_remove(debug_count);
++	debugfs_remove(debug_sample);
++	debugfs_remove(debug_dir);
++}
++
++/**
++ * detector_init - Standard module initialization code
++ */
++static int detector_init(void)
++{
++	int ret = -ENOMEM;
++
++	printk(KERN_INFO BANNER "version %s\n", VERSION);
++
++	ret = init_stats();
++	if (0 != ret)
++		goto out;
++
++	ret = init_debugfs();
++	if (0 != ret)
++		goto err_stats;
++
++	if (enabled)
++		ret = start_kthread();
++
++	goto out;
++
++err_stats:
++	ring_buffer_free(ring_buffer);
++out:
++	return ret;
++
++}
++
++/**
++ * detector_exit - Standard module cleanup code
++ */
++static void detector_exit(void)
++{
++	if (enabled) {
++		enabled = 0;
++		stop_kthread();
++	}
++
++	free_debugfs();
++	ring_buffer_free(ring_buffer);	/* free up the ring buffer */
++
++}
++
++module_init(detector_init);
++module_exit(detector_exit);
+Index: linux-2.6-tip/drivers/net/atl1c/atl1c_main.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/atl1c/atl1c_main.c
++++ linux-2.6-tip/drivers/net/atl1c/atl1c_main.c
+@@ -2081,11 +2081,8 @@ static int atl1c_xmit_frame(struct sk_bu
+ 	}
+ 
+ 	tpd_req = atl1c_cal_tpd_req(skb);
+-	if (!spin_trylock_irqsave(&adapter->tx_lock, flags)) {
+-		if (netif_msg_pktdata(adapter))
+-			dev_info(&adapter->pdev->dev, "tx locked\n");
+-		return NETDEV_TX_LOCKED;
+-	}
++	spin_lock_irqsave(&adapter->tx_lock, flags);
++
+ 	if (skb->mark == 0x01)
+ 		type = atl1c_trans_high;
+ 	else
+Index: linux-2.6-tip/drivers/net/atl1e/atl1e_main.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/atl1e/atl1e_main.c
++++ linux-2.6-tip/drivers/net/atl1e/atl1e_main.c
+@@ -1856,8 +1856,7 @@ static int atl1e_xmit_frame(struct sk_bu
+ 		return NETDEV_TX_OK;
+ 	}
+ 	tpd_req = atl1e_cal_tdp_req(skb);
+-	if (!spin_trylock_irqsave(&adapter->tx_lock, flags))
+-		return NETDEV_TX_LOCKED;
++	spin_lock_irqsave(&adapter->tx_lock, flags);
+ 
+ 	if (atl1e_tpd_avail(adapter) < tpd_req) {
+ 		/* no enough descriptor, just stop queue */
+Index: linux-2.6-tip/drivers/net/chelsio/sge.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/chelsio/sge.c
++++ linux-2.6-tip/drivers/net/chelsio/sge.c
+@@ -1671,8 +1671,7 @@ static int t1_sge_tx(struct sk_buff *skb
+ 	struct cmdQ *q = &sge->cmdQ[qid];
+ 	unsigned int credits, pidx, genbit, count, use_sched_skb = 0;
+ 
+-	if (!spin_trylock(&q->lock))
+-		return NETDEV_TX_LOCKED;
++	spin_lock(&q->lock);
+ 
+ 	reclaim_completed_tx(sge, q);
+ 
+Index: linux-2.6-tip/drivers/net/rionet.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/rionet.c
++++ linux-2.6-tip/drivers/net/rionet.c
+@@ -180,11 +180,7 @@ static int rionet_start_xmit(struct sk_b
+ 	u16 destid;
+ 	unsigned long flags;
+ 
+-	local_irq_save(flags);
+-	if (!spin_trylock(&rnet->tx_lock)) {
+-		local_irq_restore(flags);
+-		return NETDEV_TX_LOCKED;
+-	}
++	spin_lock_irqsave(&rnet->tx_lock, flags);
+ 
+ 	if ((rnet->tx_cnt + 1) > RIONET_TX_RING_SIZE) {
+ 		netif_stop_queue(ndev);
+Index: linux-2.6-tip/drivers/net/s2io.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/s2io.c
++++ linux-2.6-tip/drivers/net/s2io.c
+@@ -4160,12 +4160,7 @@ static int s2io_xmit(struct sk_buff *skb
+ 					[skb->priority & (MAX_TX_FIFOS - 1)];
+ 	fifo = &mac_control->fifos[queue];
+ 
+-	if (do_spin_lock)
+-		spin_lock_irqsave(&fifo->tx_lock, flags);
+-	else {
+-		if (unlikely(!spin_trylock_irqsave(&fifo->tx_lock, flags)))
+-			return NETDEV_TX_LOCKED;
+-	}
++	spin_lock_irqsave(&fifo->tx_lock, flags);
+ 
+ 	if (sp->config.multiq) {
+ 		if (__netif_subqueue_stopped(dev, fifo->fifo_no)) {
+Index: linux-2.6-tip/drivers/net/tehuti.c
+===================================================================
+--- linux-2.6-tip.orig/drivers/net/tehuti.c
++++ linux-2.6-tip/drivers/net/tehuti.c
+@@ -1636,13 +1636,8 @@ static int bdx_tx_transmit(struct sk_buf
+ 	unsigned long flags;
+ 
+ 	ENTER;
+-	local_irq_save(flags);
+-	if (!spin_trylock(&priv->tx_lock)) {
+-		local_irq_restore(flags);
+-		DBG("%s[%s]: TX locked, returning NETDEV_TX_LOCKED\n",
+-		    BDX_DRV_NAME, ndev->name);
+-		return NETDEV_TX_LOCKED;
+-	}
++
++	spin_lock_irqsave(&priv->tx_lock, flags);
+ 
+ 	/* build tx descriptor */
+ 	BDX_ASSERT(f->m.wptr >= f->m.memsz);	/* started with valid wptr */
--- linux-rt-2.6.29.5.orig/debian/rules.d/5-udebs.mk
+++ linux-rt-2.6.29.5/debian/rules.d/5-udebs.mk
@@ -0,0 +1,33 @@
+# Do udebs if not disabled in the arch-specific makefile
+binary-udebs: binary-debs debian/control
+ifeq ($(disable_d_i),)
+	debian/rules do-binary-udebs;
+endif
+
+do-binary-udebs:
+	dh_testdir
+	dh_testroot
+
+	# unpack the kernels into a temporary directory
+	mkdir -p debian/d-i-${arch}
+
+	imagelist=$$(cat kernel-versions | grep ^${arch} | awk '{print $$4}') && \
+	for i in $$imagelist; do \
+	  dpkg -x $$(ls ../linux-image-$$i\_$(release)-$(revision)_${arch}.deb) \
+		debian/d-i-${arch}; \
+	  /sbin/depmod -b debian/d-i-${arch} $$i; \
+	done
+
+	touch ignore-dups
+	export SOURCEDIR=debian/d-i-${arch} && \
+	  kernel-wedge install-files && \
+	  kernel-wedge check
+
+        # Build just the udebs
+	dilist=$$(dh_listpackages -s | grep "\-di$$") && \
+	[ -z "$dilist" ] || \
+	for i in $$dilist; do \
+	  dh_fixperms -p$$i; \
+	  dh_gencontrol -p$$i; \
+	  dh_builddeb -p$$i; \
+	done
--- linux-rt-2.6.29.5.orig/debian/rules.d/4-checks.mk
+++ linux-rt-2.6.29.5/debian/rules.d/4-checks.mk
@@ -0,0 +1,26 @@
+# Check ABI for package against last release (if not same abinum)
+abi-%: $(abidir)/%
+	@# Empty for make to be happy
+$(abidir)/%: $(stampdir)/stamp-build-%
+	install -d $(abidir)
+	sed -e 's/^\(.\+\)[[:space:]]\+\(.\+\)[[:space:]]\(.\+\)$$/\3 \2 \1/'	\
+		$(builddir)/build-$*/Module.symvers | sort > $@
+
+abi-check-%: $(abidir)/%
+	@perl -f debian/scripts/abi-check "$*" "$(prev_abinum)" "$(abinum)" \
+		"$(prev_abidir)" "$(abidir)" "$(skipabi)"
+
+# Check the module list against the last release (always)
+module-%: $(abidir)/%.modules
+	@# Empty for make to be happy
+$(abidir)/%.modules: $(stampdir)/stamp-build-%
+	install -d $(abidir)
+	find $(builddir)/build-$*/ -name \*.ko | \
+		sed -e 's/.*\/\([^\/]*\)\.ko/\1/' | sort > $@
+
+module-check-%: $(abidir)/%.modules
+	@perl -f debian/scripts/module-check "$*" \
+		"$(prev_abidir)" "$(abidir)" $(skipmodule)
+
+checks-%: abi-check-% module-check-%
+	@# Will be calling more stuff later
--- linux-rt-2.6.29.5.orig/debian/rules.d/2-binary-arch.mk
+++ linux-rt-2.6.29.5/debian/rules.d/2-binary-arch.mk
@@ -0,0 +1,295 @@
+# We don't want make removing intermediary stamps
+.SECONDARY :
+
+patch: debian/stamps/patch-stamp
+debian/stamps/patch-stamp:
+	cd $(CURDIR); QUILT_PATCHES=$(CURDIR)/debian/patches quilt push -a
+	mv $(CURDIR)/.pc $(CURDIR)/debian/patches
+	touch debian/stamps/patch-stamp
+
+unpatch:
+	-mv $(CURDIR)/debian/patches/.pc $(CURDIR)
+	-cd $(CURDIR); QUILT_PATCHES=$(CURDIR)/debian/patches quilt pop -a
+	-rm -rf $(CURDIR)/.pc
+	-rm debian/stamps/patch-stamp
+
+# Prepare the out-of-tree build directory
+
+prepare-%: $(stampdir)/stamp-prepare-%
+	@# Empty for make to be happy
+$(stampdir)/stamp-prepare-%: target_flavour = $*
+$(stampdir)/stamp-prepare-%: $(confdir)/config $(confdir)/config.%
+	@echo "Preparing $*..."
+	install -d $(builddir)/build-$*
+	touch $(builddir)/build-$*/ubuntu-build
+	cat $^ | sed -e 's/.*CONFIG_VERSION_SIGNATURE.*/CONFIG_VERSION_SIGNATURE="Ubuntu $(release)-$(revision)-$*"/' > $(builddir)/build-$*/.config
+	find $(builddir)/build-$* -name "*.ko" | xargs rm -f
+	$(kmake) O=$(builddir)/build-$* silentoldconfig prepare scripts
+	touch $@
+
+
+# Do the actual build, including image and modules
+build-%: $(stampdir)/stamp-build-%
+	@# Empty for make to be happy
+$(stampdir)/stamp-build-%: target_flavour = $*
+$(stampdir)/stamp-build-%: $(stampdir)/stamp-prepare-%
+	@echo "Building $*..."
+	$(kmake) O=$(builddir)/build-$* $(conc_level) $(build_image)
+	$(kmake) O=$(builddir)/build-$* $(conc_level) modules
+	@touch $@
+
+# Install the finished build
+install-%: pkgdir = $(CURDIR)/debian/linux-image-$(abi_release)-$*
+install-%: dbgpkgdir = $(CURDIR)/debian/linux-image-debug-$(abi_release)-$*
+install-%: basepkg = linux-headers-$(abi_release)
+install-%: hdrdir = $(CURDIR)/debian/$(basepkg)-$*/usr/src/$(basepkg)-$*
+install-%: target_flavour = $*
+install-%: $(stampdir)/stamp-build-% checks-%
+	dh_testdir
+	dh_testroot
+	dh_clean -k -plinux-image-$(abi_release)-$*
+	dh_clean -k -plinux-headers-$(abi_release)-$*
+	dh_clean -k -plinux-image-debug-$(abi_release)-$*
+
+	# The main image
+	install -m644 -D $(builddir)/build-$*/$(kernel_file) \
+		$(pkgdir)/boot/$(install_file)-$(abi_release)-$*
+
+	install -m644 $(builddir)/build-$*/.config \
+		$(pkgdir)/boot/config-$(abi_release)-$*
+	install -m644 $(abidir)/$* \
+		$(pkgdir)/boot/abi-$(abi_release)-$*
+	install -m644 $(builddir)/build-$*/System.map \
+		$(pkgdir)/boot/System.map-$(abi_release)-$*
+ifeq ($(no_dumpfile),)
+	makedumpfile -g $(pkgdir)/boot/vmcoreinfo-$(abi_release)-$* \
+		-x $(builddir)/build-$*/vmlinux
+endif
+
+	$(kmake) O=$(builddir)/build-$* modules_install \
+		INSTALL_MOD_STRIP=1 INSTALL_MOD_PATH=$(pkgdir)/ \
+		INSTALL_FW_PATH=$(pkgdir)/lib/firmware/$(abi_release)-$*
+
+ifeq ($(no_dumpfile),)
+	makedumpfile -g $(pkgdir)/boot/vmcoreinfo-$(abi_release)-$* \
+		-x $(builddir)/build-$*/vmlinux
+endif
+	rm -f $(pkgdir)/lib/modules/$(abi_release)-$*/build
+	rm -f $(pkgdir)/lib/modules/$(abi_release)-$*/source
+
+	# Some initramfs-tools specific modules
+	install -d $(pkgdir)/lib/modules/$(abi_release)-$*/initrd
+	if [ -f $(pkgdir)/lib/modules/$(abi_release)-$*/kernel/drivers/video/vesafb.ko ]; then\
+	  ln -f $(pkgdir)/lib/modules/$(abi_release)-$*/kernel/drivers/video/vesafb.ko \
+		$(pkgdir)/lib/modules/$(abi_release)-$*/initrd/; \
+	fi
+
+	# Now the image scripts
+	install -d $(pkgdir)/DEBIAN
+	for script in postinst postrm preinst prerm; do				\
+	  sed -e 's/=V/$(abi_release)-$*/g' -e 's/=K/$(install_file)/g'		\
+	      -e 's/=L/$(loader)/g'         -e 's@=B@$(build_arch)@g'		\
+	       debian/control-scripts/$$script > $(pkgdir)/DEBIAN/$$script;	\
+	  chmod 755 $(pkgdir)/DEBIAN/$$script;					\
+	done
+
+ifneq ($(skipsub),true)
+	@set -e; for sub in $($(*)_sub); do					\
+		TO=$$sub FROM=$* ABI_RELEASE=$(abi_release) $(SHELL)		\
+			debian/scripts/sub-flavour;				\
+		/sbin/depmod -b debian/linux-image-$(abi_release)-$$sub		\
+			-ea -F debian/linux-image-$(abi_release)-$$sub/boot/System.map-$(abi_release)-$* \
+			$(abi_release)-$*;					\
+		install -d debian/linux-image-$(abi_release)-$$sub/DEBIAN;	\
+		for script in postinst postrm preinst prerm; do			\
+			sed -e 's/=V/$(abi_release)-$*/g'			\
+			    -e 's/=K/$(install_file)/g'				\
+			    -e 's/=L/$(loader)/g'				\
+			    -e 's@=B@$(build_arch)@g'				\
+				debian/control-scripts/$$script >		\
+				debian/linux-image-$(abi_release)-$$sub/DEBIAN/$$script;\
+			chmod 755  debian/linux-image-$(abi_release)-$$sub/DEBIAN/$$script;\
+		done;								\
+	done
+endif
+
+ifneq ($(skipdbg),true)
+	# Debug image is simple
+	install -m644 -D $(builddir)/build-$*/vmlinux \
+		$(dbgpkgdir)/usr/lib/debug/boot/vmlinux-$(abi_release)-$*
+	$(kmake) O=$(builddir)/build-$* modules_install \
+		INSTALL_MOD_PATH=$(dbgpkgdir)/usr/lib/debug
+	rm -f $(dbgpkgdir)/usr/lib/debug/lib/modules/$(abi_release)-$*/build
+	rm -f $(dbgpkgdir)/usr/lib/debug/lib/modules/$(abi_release)-$*/source
+	rm -f $(dbgpkgdir)/usr/lib/debug/lib/modules/$(abi_release)-$*/modules.*
+	rm -fr $(dbgpkgdir)/usr/lib/debug/lib/firmware
+endif
+
+	# The flavour specific headers image
+	# TODO: Would be nice if we didn't have to dupe the original builddir
+	install -d -m755 $(hdrdir)
+	cat $(builddir)/build-$*/.config | \
+		sed -e 's/.*CONFIG_DEBUG_INFO=.*/# CONFIG_DEBUG_INFO is not set/g' > \
+		$(hdrdir)/.config
+	chmod 644 $(hdrdir)/.config
+	$(kmake) O=$(hdrdir) silentoldconfig prepare scripts
+	# We'll symlink this stuff
+	rm -f $(hdrdir)/Makefile
+	rm -rf $(hdrdir)/include2
+	# Script to symlink everything up
+	$(SHELL) debian/scripts/link-headers "$(hdrdir)" "linux-rt-headers-$(release)-$(abinum)" "$*"
+	# Setup the proper asm symlink
+	rm -f $(hdrdir)/include/asm
+	ln -s asm-$(asm_link) $(hdrdir)/include/asm
+	# The build symlink
+	install -d debian/$(basepkg)-$*/lib/modules/$(abi_release)-$*
+	ln -s /usr/src/$(basepkg)-$* \
+		debian/$(basepkg)-$*/lib/modules/$(abi_release)-$*/build
+	# And finally the symvers
+	install -m644 $(builddir)/build-$*/Module.symvers \
+		$(hdrdir)/Module.symvers
+
+	# Now the header scripts
+	install -d $(CURDIR)/debian/$(basepkg)-$*/DEBIAN
+	for script in postinst; do						\
+	  sed -e 's/=V/$(abi_release)-$*/g' -e 's/=K/$(install_file)/g'	\
+		debian/control-scripts/headers-$$script > 			\
+			$(CURDIR)/debian/$(basepkg)-$*/DEBIAN/$$script;		\
+	  chmod 755 $(CURDIR)/debian/$(basepkg)-$*/DEBIAN/$$script;		\
+	done
+
+	# At the end of the package prep, call the tests
+	DPKG_ARCH="$(arch)" KERN_ARCH="$(build_arch)" FLAVOUR="$*"	\
+	 VERSION="$(abi_release)" REVISION="$(revision)"		\
+	 PREV_REVISION="$(prev_revision)" ABI_NUM="$(abinum)"		\
+	 PREV_ABI_NUM="$(prev_abinum)" BUILD_DIR="$(builddir)/build-$*"	\
+	 INSTALL_DIR="$(pkgdir)" SOURCE_DIR="$(CURDIR)"			\
+	 run-parts -v debian/tests
+
+	#
+	# Remove files which are generated at installation by postinst, except for
+	# modules.order.
+	#
+	mv $(pkgdir)/lib/modules/$(abi_release)-$*/modules.order \
+		$(pkgdir)/lib/modules/$(abi_release)-$*/_modules.order
+	rm -f $(pkgdir)/lib/modules/$(abi_release)-$*/modules.*
+	mv $(pkgdir)/lib/modules/$(abi_release)-$*/_modules.order \
+		$(pkgdir)/lib/modules/$(abi_release)-$*/modules.order
+
+headers_tmp := $(CURDIR)/debian/tmp-headers
+headers_dir := $(CURDIR)/debian/linux-libc-dev
+
+hmake := $(MAKE) -C $(CURDIR) O=$(headers_tmp) SUBLEVEL=$(SUBLEVEL) \
+	EXTRAVERSION=.5-$(abinum) INSTALL_HDR_PATH=$(headers_tmp)/install \
+	SHELL="$(SHELL)" ARCH=$(header_arch)
+
+install-arch-headers:
+	dh_testdir
+	dh_testroot
+	dh_clean -k -plinux-libc-dev
+
+	rm -rf $(headers_tmp)
+	install -d $(headers_tmp) $(headers_dir)/usr/include/
+
+	$(hmake) $(defconfig)
+	mv $(headers_tmp)/.config $(headers_tmp)/.config.old
+	sed -e 's/^# \(CONFIG_MODVERSIONS\) is not set$$/\1=y/' \
+	  -e 's/.*CONFIG_LOCALVERSION_AUTO.*/# CONFIG_LOCALVERSION_AUTO is not set/' \
+	  $(headers_tmp)/.config.old > $(headers_tmp)/.config
+	$(hmake) silentoldconfig
+	$(hmake) headers_install
+
+	( cd $(headers_tmp)/install/include/ && \
+		find . -name '.' -o -name '.*' -prune -o -print | \
+                cpio -pvd --preserve-modification-time \
+			$(headers_dir)/usr/include/ )
+
+	rm -rf $(headers_tmp)
+
+binary-arch-headers: install-arch-headers
+	dh_testdir
+	dh_testroot
+
+	dh_installchangelogs -plinux-libc-dev
+	dh_installdocs -plinux-libc-dev
+	dh_compress -plinux-libc-dev
+	dh_fixperms -plinux-libc-dev
+	dh_installdeb -plinux-libc-dev
+	dh_gencontrol -plinux-libc-dev
+	dh_md5sums -plinux-libc-dev
+	dh_builddeb -plinux-libc-dev
+
+binary-%: pkgimg = linux-image-$(abi_release)-$*
+binary-%: pkghdr = linux-headers-$(abi_release)-$*
+binary-%: dbgpkg = linux-image-debug-$(abi_release)-$*
+binary-%: install-%
+	dh_testdir
+	dh_testroot
+
+	dh_installchangelogs -p$(pkgimg)
+	dh_installdocs -p$(pkgimg)
+	dh_compress -p$(pkgimg)
+	dh_fixperms -p$(pkgimg)
+	dh_installdeb -p$(pkgimg)
+	dh_gencontrol -p$(pkgimg)
+	dh_md5sums -p$(pkgimg)
+	dh_builddeb -p$(pkgimg) -- -Zbzip2 -z9
+
+	dh_installchangelogs -p$(pkghdr)
+	dh_installdocs -p$(pkghdr)
+	dh_compress -p$(pkghdr)
+	dh_fixperms -p$(pkghdr)
+	dh_shlibdeps -p$(pkghdr)
+	dh_installdeb -p$(pkghdr)
+	dh_gencontrol -p$(pkghdr)
+	dh_md5sums -p$(pkghdr)
+	dh_builddeb -p$(pkghdr)
+
+ifneq ($(skipsub),true)
+	@set -e; for sub in $($(*)_sub); do		\
+		pkg=linux-image-$(abi_release)-$$sub;	\
+		dh_installchangelogs -p$$pkg;		\
+		dh_installdocs -p$$pkg;			\
+		dh_compress -p$$pkg;			\
+		dh_fixperms -p$$pkg;			\
+		dh_shlibdeps -p$$pkg;			\
+		dh_installdeb -p$$pkg;			\
+		dh_gencontrol -p$$pkg;			\
+		dh_md5sums -p$$pkg;			\
+		dh_builddeb -p$$pkg;			\
+	done
+endif
+
+ifneq ($(skipdbg),true)
+	dh_installchangelogs -p$(dbgpkg)
+	dh_installdocs -p$(dbgpkg)
+	dh_compress -p$(dbgpkg)
+	dh_fixperms -p$(dbgpkg)
+	dh_installdeb -p$(dbgpkg)
+	dh_gencontrol -p$(dbgpkg)
+	dh_md5sums -p$(dbgpkg)
+	dh_builddeb -p$(dbgpkg)
+
+	# Hokay...here's where we do a little twiddling...
+	mv ../$(dbgpkg)_$(release)-$(revision)_$(arch).deb \
+		../$(dbgpkg)_$(release)-$(revision)_$(arch).ddeb
+	grep -v '^$(dbgpkg)_.*$$' debian/files > debian/files.new
+	mv debian/files.new debian/files
+	# Now, the package wont get into the archive, but it will get put
+	# into the debug system.
+endif
+
+$(stampdir)/stamp-flavours:
+	@echo $(flavours) > $@
+
+#binary-debs: $(stampdir)/stamp-flavours $(addprefix binary-,$(flavours)) \
+#		binary-arch-headers
+binary-debs: $(stampdir)/stamp-flavours $(addprefix binary-,$(flavours))
+
+build-arch: debian/stamps/patch-stamp $(addprefix build-,$(flavours))
+
+binary-arch-deps = binary-debs
+#ifeq ($(AUTOBUILD),)
+#binary-arch-deps += binary-udebs
+#endif
+binary-arch: $(binary-arch-deps)
--- linux-rt-2.6.29.5.orig/debian/rules.d/3-binary-indep.mk
+++ linux-rt-2.6.29.5/debian/rules.d/3-binary-indep.mk
@@ -0,0 +1,89 @@
+build-indep:
+
+docpkg = linux-doc-$(release)
+docdir = $(CURDIR)/debian/$(docpkg)/usr/share/doc/$(docpkg)
+install-doc:
+	dh_testdir
+	dh_testroot
+	dh_clean -k -p$(docpkg)
+
+	install -d $(docdir)
+
+	# First the html docs. We skip these for autobuilds
+	if [ -z "$(AUTOBUILD) nobuild" ]; then \
+		install -d $(docdir)/linux-doc-tmp; \
+		$(kmake) O=$(docdir)/linux-doc-tmp htmldocs; \
+		mv $(docdir)/linux-doc-tmp/Documentation/DocBook \
+			$(docdir)/html; \
+		rm -rf $(docdir)/linux-doc-tmp; \
+	fi
+
+	# Copy the rest
+	cp -a Documentation/* $(docdir)
+	rm -rf $(docdir)/DocBook
+
+indep_hdrpkg = linux-rt-headers-$(abi_release)
+indep_hdrdir = $(CURDIR)/debian/$(indep_hdrpkg)/usr/src/$(indep_hdrpkg)
+install-headers:
+	dh_testdir
+	dh_testroot
+	dh_clean -k -p$(indep_hdrpkg)
+
+	install -d $(indep_hdrdir)
+	find . -path './debian/*' -prune -o -path './include/*' -prune \
+	  -o -path './scripts/*' -prune -o -type f \
+	  \( -name 'Makefile*' -o -name 'Kconfig*' -o -name 'Kbuild*' -o \
+	     -name '*.sh' -o -name '*.pl' -o -name '*.lds' \) \
+	  -print | cpio -pd --preserve-modification-time $(indep_hdrdir)
+	cp -a drivers/media/dvb/dvb-core/*.h $(indep_hdrdir)/drivers/media/dvb/dvb-core
+	cp -a drivers/media/video/*.h $(indep_hdrdir)/drivers/media/video
+	cp -a drivers/media/dvb/frontends/*.h $(indep_hdrdir)/drivers/media/dvb/frontends
+	cp -a scripts include $(indep_hdrdir)
+	(find arch -name include -type d -print | \
+		xargs -n1 -i: find : -type f) | \
+		cpio -pd --preserve-modification-time $(indep_hdrdir)
+
+srcpkg = linux-rt-source-$(release)
+srcdir = $(CURDIR)/debian/$(srcpkg)/usr/src/$(srcpkg)
+install-source:
+	dh_testdir
+	dh_testroot
+	dh_clean -k -p$(srcpkg)
+
+	install -d $(srcdir)
+	find . -path './debian/*' -prune -o \
+		-path './.*' -prune -o -print | \
+		cpio -pd --preserve-modification-time $(srcdir)
+	(cd $(srcdir)/..; tar cf - $(srcpkg)) | bzip2 -9c > \
+		$(srcdir).tar.bz2
+	rm -rf $(srcdir)
+
+#install-indep: install-headers install-doc install-source
+install-indep: install-headers
+
+# This is just to make it easy to call manually. Normally done in
+# binary-indep target during builds.
+binary-headers: install-headers
+	dh_testdir
+	dh_testroot
+	dh_installchangelogs -p$(indep_hdrpkg)
+	dh_installdocs -p$(indep_hdrpkg)
+	dh_compress -p$(indep_hdrpkg)
+	dh_fixperms -p$(indep_hdrpkg)
+	dh_installdeb -p$(indep_hdrpkg)
+	dh_gencontrol -p$(indep_hdrpkg)
+	dh_md5sums -p$(indep_hdrpkg)
+	dh_builddeb -p$(indep_hdrpkg)
+
+binary-indep: install-indep
+	dh_testdir
+	dh_testroot
+
+	dh_installchangelogs -i
+	dh_installdocs -i
+	dh_compress -i
+	dh_fixperms -i
+	dh_installdeb -i
+	dh_gencontrol -i
+	dh_md5sums -i
+	dh_builddeb -i
--- linux-rt-2.6.29.5.orig/debian/rules.d/i386.mk
+++ linux-rt-2.6.29.5/debian/rules.d/i386.mk
@@ -0,0 +1,12 @@
+build_arch	= i386
+header_arch	= x86_64
+asm_link	= x86
+defconfig	= defconfig
+flavours        = rt
+build_image	= bzImage
+kernel_file	= arch/$(build_arch)/boot/bzImage
+install_file	= vmlinuz
+
+
+
+loader		= grub
--- linux-rt-2.6.29.5.orig/debian/rules.d/0-common-vars.mk
+++ linux-rt-2.6.29.5/debian/rules.d/0-common-vars.mk
@@ -0,0 +1,107 @@
+# Get some version info
+stub=linux-rt
+
+release := $(shell sed -n '1s/^.*(\(.*\)-.*).*$$/\1/p' debian/changelog)
+revisions := $(shell sed -n 's/^$(stub)\ .*($(release)-\(.*\)).*$$/\1/p' debian/changelog | tac)
+revision ?= $(word $(words $(revisions)),$(revisions))
+prev_revisions := $(filter-out $(revision),0.0 $(revisions))
+prev_revision := $(word $(words $(prev_revisions)),$(prev_revisions))
+
+# disable udeb building
+disable_di = true
+
+# This is an internally used mechanism for the daily kernel builds. It
+# creates packages who's ABI is suffixed with a minimal representation of
+# the current git HEAD sha. If .git/HEAD is not present, then it uses the
+# uuidgen program,
+#
+# AUTOBUILD can also be used by anyone wanting to build a custom kernel
+# image, or rebuild the entire set of Ubuntu packages using custom patches
+# or configs.
+AUTOBUILD=
+
+#
+# This is a way to support some external variables. A good example is
+# a local setup for ccache and distcc See LOCAL_ENV_CC and
+# LOCAL_ENV_DISTCC_HOSTS in the definition of kmake.
+# For example:
+#      LOCAL_ENV_CC="ccache distcc"
+#      LOCAL_ENV_DISTCC_HOSTS="localhost 10.0.2.5 10.0.2.221"
+#
+-include $(CURDIR)/../.jaunty-env
+
+ifneq ($(AUTOBUILD),)
+skipabi		= true
+skipmodule	= true
+skipdbg		= true
+gitver=$(shell if test -f .git/HEAD; then cat .git/HEAD; else uuidgen; fi)
+gitverpre=$(shell echo $(gitver) | cut -b -3)
+gitverpost=$(shell echo $(gitver) | cut -b 38-40)
+abi_suffix = -$(gitverpre)$(gitverpost)
+endif
+
+ifneq ($(NOKERNLOG),)
+ubuntu_log_opts += --no-kern-log
+endif
+ifneq ($(PRINTSHAS),)
+ubuntu_log_opts += --print-shas
+endif
+
+ifeq ($(wildcard /CurrentlyBuilding),)
+skipdbg=true
+endif
+
+abinum		:= $(shell echo $(revision) | sed -e 's/\..*//')$(abi_suffix)
+prev_abinum	:= $(shell echo $(prev_revision) | sed -e 's/\..*//')$(abi_suffix)
+
+abi_release	:= $(release)-$(abinum)
+
+uploadnum	:= $(shell echo $(revision) | sed -e 's/.*\.//')
+ifneq ($(wildcard /CurrentlyBuilding),)
+  uploadnum	:= $(uploadnum)-Ubuntu
+endif
+
+# We force the sublevel to be exactly what we want. The actual source may
+# be an in development git tree. We want to force it here instead of
+# committing changes to the top level Makefile
+SUBLEVEL	:= $(shell echo $(release) | awk -F. '{print $$3}')
+
+arch		:= $(shell dpkg-architecture -qDEB_HOST_ARCH)
+abidir		:= $(CURDIR)/debian/abi/$(release)-$(revision)/$(arch)
+prev_abidir	:= $(CURDIR)/debian/abi/$(release)-$(prev_revision)/$(arch)
+confdir		:= $(CURDIR)/debian/config/$(arch)
+builddir	:= $(CURDIR)/debian/build
+stampdir	:= $(CURDIR)/debian/stamps
+
+# Support parallel=<n> in DEB_BUILD_OPTIONS (see #209008)
+COMMA=,
+DEB_BUILD_OPTIONS_PARA = $(subst parallel=,,$(filter parallel=%,$(subst $(COMMA), ,$(DEB_BUILD_OPTIONS))))
+ifneq (,$(DEB_BUILD_OPTIONS_PARA))
+  CONCURRENCY_LEVEL := $(DEB_BUILD_OPTIONS_PARA)
+endif
+
+ifeq ($(CONCURRENCY_LEVEL),)
+  # Check the environment
+  CONCURRENCY_LEVEL := $(shell echo $$CONCURRENCY_LEVEL)
+  # No? Check if this is on a buildd
+  ifeq ($(CONCURRENCY_LEVEL),)
+    ifeq ($(wildcard /CurrentlyBuilding),)
+      CONCURRENCY_LEVEL := $(shell expr `getconf _NPROCESSORS_ONLN` \* 2)
+    endif
+  endif
+  # Oh hell, give 'em one
+  ifeq ($(CONCURRENCY_LEVEL),)
+    CONCURRENCY_LEVEL := 1
+  endif
+endif
+
+conc_level		= -j$(CONCURRENCY_LEVEL)
+
+# target_flavour is filled in for each step
+kmake = make ARCH=$(build_arch) \
+	EXTRAVERSION=.5-$(abinum)-$(target_flavour) \
+	CONFIG_DEBUG_SECTION_MISMATCH=y SUBLEVEL=$(SUBLEVEL) \
+ 	KBUILD_BUILD_VERSION="$(uploadnum)"
+ifneq ($(LOCAL_ENV_CC),)
+kmake += CC=$(LOCAL_ENV_CC) DISTCC_HOSTS=$(LOCAL_ENV_DISTCC_HOSTS)
+endif
--- linux-rt-2.6.29.5.orig/debian/rules.d/amd64.mk
+++ linux-rt-2.6.29.5/debian/rules.d/amd64.mk
@@ -0,0 +1,12 @@
+build_arch	= x86_64
+header_arch	= $(build_arch)
+asm_link	= x86
+defconfig	= defconfig
+flavours	= rt
+build_image	= bzImage
+kernel_file	= arch/$(build_arch)/boot/bzImage
+install_file	= vmlinuz
+
+
+
+loader		= grub
--- linux-rt-2.6.29.5.orig/debian/rules.d/1-maintainer.mk
+++ linux-rt-2.6.29.5/debian/rules.d/1-maintainer.mk
@@ -0,0 +1,105 @@
+# The following targets are for the maintainer only! do no run if you don't
+# know what they do.
+
+.PHONY: printenv updateconfigs printchanges insertchanges startnewrelease diffupstream help
+
+help:
+	@echo "These are the targets in addition to the normal debian ones:"
+	@echo
+	@echo "  printenv        : Print some variables used in the build"
+	@echo
+	@echo "  updateconfigs   : Update debian/config/*"
+	@echo
+	@echo "  editconfigs     : Update debian/config/* interactively"
+	@echo
+	@echo "  printchanges    : Print the current changelog entries (from git)"
+	@echo
+	@echo "  insertchanges   : Insert current changelog entries (from git)"
+	@echo
+	@echo "  startnewrelease : Start a new changelog set"
+	@echo
+	@echo "  diffupstream    : Diff stock kernel code against upstream (git)"
+	@echo
+	@echo "  help            : If you are kernel hacking, you need the professional"
+	@echo "                    version of this"
+	@echo
+	@echo "Environment variables:"
+	@echo
+	@echo "  NOKERNLOG       : Do not add upstream kernel commits to changelog"
+	@echo "  CONCURRENCY_LEVEL=X"
+	@echo "                  : Use -jX for kernel compile"
+	@echo "  PRINTSHAS       : Include SHAs for commits in changelog"
+
+ARCH_CONFIGS=i386 amd64
+
+updateconfigs:
+	dh_testdir
+	@for arch in $(ARCH_CONFIGS); do	\
+		$(SHELL) debian/scripts/misc/oldconfig $$arch;		\
+	done
+	rm -rf build
+
+editconfigs:
+	dh_testdir
+	@for arch in $(ARCH_CONFIGS); do	\
+		$(SHELL) debian/scripts/misc/doconfig $$arch;		\
+	done
+	rm -rf build
+
+printenv:
+	dh_testdir
+	@echo "release           = $(release)"
+	@echo "revisions         = $(revisions)"
+	@echo "revision          = $(revision)"
+	@echo "uploadnum         = $(uploadnum)"
+	@echo "prev_revisions    = $(prev_revisions)"
+	@echo "prev_revision     = $(prev_revision)"
+	@echo "abinum            = $(abinum)"
+	@echo "gitver            = $(gitver)"
+	@echo "flavours          = $(flavours)"
+	@echo "skipabi           = $(skipabi)"
+	@echo "skipmodule        = $(skipmodule)"
+	@echo "skipdbg           = $(skipdbg)"
+	@echo "ubuntu_log_opts   = $(ubuntu_log_opts)"
+ifneq ($(SUBLEVEL),)
+	@echo "SUBLEVEL          = $(SUBLEVEL)"
+endif
+	@echo "CONCURRENCY_LEVEL = $(CONCURRENCY_LEVEL)"
+
+printchanges:
+	@git log Ubuntu-$(release)-$(prev_revision)..HEAD | \
+		perl -w -f debian/scripts/misc/git-ubuntu-log $(ubuntu_log_opts)
+
+insertchanges:
+	@perl -w -f debian/scripts/misc/insert-changes.pl
+
+diffupstream:
+	@git diff-tree -p refs/remotes/linux-2.6/master..HEAD $(shell ls | grep -vE '^(ubuntu|debian|\.git.*)')
+
+startnewrelease:
+	dh_testdir
+	@nextminor=$(shell expr `echo $(revision) | awk -F. '{print $$2}'` + 1); \
+	now="$(shell date -R)"; \
+	echo "Creating new changelog set for $(abi_release).$$nextminor..."; \
+	echo -e "$(stub) ($(abi_release).$$nextminor) UNRELEASED; urgency=low\n" > debian/changelog.new; \
+	echo "  CHANGELOG: Do not edit directly. Autogenerated at release." >> \
+		debian/changelog.new; \
+	echo "  CHANGELOG: Use the printchanges target to see the curent changes." \
+		>> debian/changelog.new; \
+	echo "  CHANGELOG: Use the insertchanges target to create the final log." \
+		>> debian/changelog.new; \
+	echo -e "\n -- $$DEBFULLNAME <$$DEBEMAIL>  $$now\n" >> \
+		debian/changelog.new ; \
+	cat debian/changelog >> debian/changelog.new; \
+	mv debian/changelog.new debian/changelog
+#
+# If $(ppa_file) exists, then only the standard flavours are built for PPA, e.g.,
+# 386, 386-generic, and amd64-generic.
+#
+prepare-ppa:
+	@echo Execute debian/scripts/misc/prepare-ppa-source to prepare an upload
+	@echo for a PPA build. You must do this outside of debian/rules since it cannot
+	@echo nest.
+
+print-ppa-file-name:
+	@echo $(ppa_file)
--- linux-rt-2.6.29.5.orig/debian/stamps/keep-dir
+++ linux-rt-2.6.29.5/debian/stamps/keep-dir
@@ -0,0 +1 @@
+Place holder
--- linux-rt-2.6.29.5.orig/debian/config/amd64/config
+++ linux-rt-2.6.29.5/debian/config/amd64/config
@@ -0,0 +1,3733 @@
+#
+# Common config options automatically generated by splitconfig.pl
+#
+CONFIG_3C359=m
+CONFIG_60XX_WDT=m
+CONFIG_64BIT=y
+CONFIG_6PACK=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+CONFIG_8139TOO_8129=y
+CONFIG_8139TOO_PIO=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_ABYSS=m
+CONFIG_AC97_BUS=m
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_ACER_WMI=m
+CONFIG_ACORN_PARTITION=y
+# CONFIG_ACORN_PARTITION_ADFS is not set
+# CONFIG_ACORN_PARTITION_CUMANA is not set
+# CONFIG_ACORN_PARTITION_EESOX is not set
+CONFIG_ACORN_PARTITION_ICS=y
+# CONFIG_ACORN_PARTITION_POWERTEC is not set
+CONFIG_ACORN_PARTITION_RISCIX=y
+CONFIG_ACPI=y
+CONFIG_ACPI_AC=y
+# CONFIG_ACPI_ASUS is not set
+CONFIG_ACPI_BATTERY=y
+CONFIG_ACPI_BLACKLIST_YEAR=0
+CONFIG_ACPI_BUTTON=y
+CONFIG_ACPI_CONTAINER=y
+# CONFIG_ACPI_CUSTOM_DSDT is not set
+CONFIG_ACPI_CUSTOM_DSDT_FILE=""
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_DOCK=y
+CONFIG_ACPI_FAN=y
+CONFIG_ACPI_HOTPLUG_CPU=y
+CONFIG_ACPI_NUMA=y
+CONFIG_ACPI_PCI_SLOT=y
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_ACPI_PROCFS=y
+CONFIG_ACPI_PROCFS_POWER=y
+CONFIG_ACPI_PROC_EVENT=y
+CONFIG_ACPI_SBS=y
+CONFIG_ACPI_SLEEP=y
+CONFIG_ACPI_SYSFS_POWER=y
+CONFIG_ACPI_THERMAL=y
+CONFIG_ACPI_TOSHIBA=m
+CONFIG_ACPI_VIDEO=m
+CONFIG_ACPI_WMI=y
+CONFIG_ACQUIRE_WDT=m
+CONFIG_ACT200L_DONGLE=m
+CONFIG_ACTISYS_DONGLE=m
+CONFIG_ADAPTEC_STARFIRE=m
+CONFIG_ADFS_FS=m
+# CONFIG_ADFS_FS_RW is not set
+CONFIG_ADM8211=m
+CONFIG_ADVANTECH_WDT=m
+CONFIG_AFFS_FS=m
+# CONFIG_AFS_DEBUG is not set
+CONFIG_AFS_FS=m
+CONFIG_AF_RXRPC=m
+# CONFIG_AF_RXRPC_DEBUG is not set
+CONFIG_AGNX=m
+CONFIG_AGP=y
+CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=m
+CONFIG_AGP_SIS=m
+CONFIG_AGP_VIA=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_DEBUG_ENABLE=y
+CONFIG_AIC79XX_DEBUG_MASK=0
+CONFIG_AIC79XX_REG_PRETTY_PRINT=y
+CONFIG_AIC79XX_RESET_DELAY_MS=5000
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=8
+CONFIG_AIC7XXX_DEBUG_ENABLE=y
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC94XX_DEBUG is not set
+CONFIG_AIO=y
+CONFIG_AIRO=m
+CONFIG_AIRO_CS=m
+CONFIG_ALIM1535_WDT=m
+CONFIG_ALIM7101_WDT=m
+CONFIG_ALI_FIR=m
+CONFIG_ALLOW_WARNINGS=y
+CONFIG_ALTERA_PCIE_CHDMA=m
+CONFIG_AMD8111_ETH=m
+CONFIG_AMD_IOMMU=y
+CONFIG_AMD_IOMMU_STATS=y
+CONFIG_AMIGA_PARTITION=y
+# CONFIG_ANDROID is not set
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ANDROID_LOGGER=m
+# CONFIG_ANDROID_LOW_MEMORY_KILLER is not set
+CONFIG_ANDROID_RAM_CONSOLE=y
+# CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT is not set
+CONFIG_ANDROID_RAM_CONSOLE_ENABLE_VERBOSE=y
+# CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION is not set
+CONFIG_ANDROID_TIMED_GPIO=m
+CONFIG_ANON_INODES=y
+CONFIG_APPLICOM=m
+CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig"
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
+CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
+CONFIG_ARCH_HAS_CPU_RELAX=y
+CONFIG_ARCH_HAS_DEFAULT_IDLE=y
+CONFIG_ARCH_HIBERNATION_HEADER=y
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+CONFIG_ARCH_PHYS_ADDR_T_64BIT=y
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_ARCH_SPARSEMEM_DEFAULT=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SUPPORTS_MSI=y
+CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_ARCH_WANT_FRAME_POINTERS=y
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_ARCNET=m
+CONFIG_ARCNET_1051=m
+CONFIG_ARCNET_1201=m
+CONFIG_ARCNET_CAP=m
+CONFIG_ARCNET_COM20020=m
+CONFIG_ARCNET_COM20020_CS=m
+CONFIG_ARCNET_COM20020_PCI=m
+CONFIG_ARCNET_COM90xx=m
+CONFIG_ARCNET_COM90xxIO=m
+CONFIG_ARCNET_RAW=m
+CONFIG_ARCNET_RIM_I=m
+# CONFIG_ARPD is not set
+CONFIG_ASK_IP_FIB_HASH=y
+CONFIG_ASM_SEMAPHORES=y
+CONFIG_ASUS_LAPTOP=m
+CONFIG_ASUS_OLED=m
+CONFIG_ASYNC_CORE=m
+CONFIG_ASYNC_MEMCPY=m
+CONFIG_ASYNC_XOR=m
+CONFIG_ATA=y
+CONFIG_ATALK=m
+CONFIG_ATARI_PARTITION=y
+CONFIG_ATA_ACPI=y
+CONFIG_ATA_GENERIC=y
+# CONFIG_ATA_NONSTANDARD is not set
+CONFIG_ATA_OVER_ETH=m
+CONFIG_ATA_PIIX=y
+CONFIG_ATA_SFF=y
+CONFIG_ATH5K=m
+# CONFIG_ATH5K_DEBUG is not set
+CONFIG_ATH9K=m
+CONFIG_ATH9K_DEBUG=y
+CONFIG_ATL1=m
+CONFIG_ATL1C=m
+CONFIG_ATL1E=m
+CONFIG_ATL2=m
+CONFIG_ATM=y
+CONFIG_ATMEL=m
+CONFIG_ATM_AMBASSADOR=m
+# CONFIG_ATM_AMBASSADOR_DEBUG is not set
+CONFIG_ATM_BR2684=m
+# CONFIG_ATM_BR2684_IPFILTER is not set
+CONFIG_ATM_CLIP=y
+# CONFIG_ATM_CLIP_NO_ICMP is not set
+CONFIG_ATM_DRIVERS=y
+# CONFIG_ATM_DUMMY is not set
+CONFIG_ATM_ENI=m
+# CONFIG_ATM_ENI_DEBUG is not set
+# CONFIG_ATM_ENI_TUNE_BURST is not set
+CONFIG_ATM_FIRESTREAM=m
+CONFIG_ATM_FORE200E=m
+CONFIG_ATM_FORE200E_DEBUG=0
+CONFIG_ATM_FORE200E_TX_RETRY=16
+# CONFIG_ATM_FORE200E_USE_TASKLET is not set
+CONFIG_ATM_HE=m
+CONFIG_ATM_HE_USE_SUNI=y
+CONFIG_ATM_HORIZON=m
+# CONFIG_ATM_HORIZON_DEBUG is not set
+CONFIG_ATM_IA=m
+# CONFIG_ATM_IA_DEBUG is not set
+CONFIG_ATM_IDT77252=m
+# CONFIG_ATM_IDT77252_DEBUG is not set
+# CONFIG_ATM_IDT77252_RCV_ALL is not set
+CONFIG_ATM_IDT77252_USE_SUNI=y
+CONFIG_ATM_LANAI=m
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_SOLOS=m
+CONFIG_ATM_TCP=m
+CONFIG_ATM_ZATM=m
+# CONFIG_ATM_ZATM_DEBUG is not set
+CONFIG_ATP=m
+CONFIG_AUDIT=y
+CONFIG_AUDITSYSCALL=y
+CONFIG_AUDIT_ARCH=y
+CONFIG_AUDIT_TREE=y
+CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
+CONFIG_AUXDISPLAY=y
+CONFIG_AX25=m
+CONFIG_AX25_DAMA_SLAVE=y
+CONFIG_B43=m
+CONFIG_B43LEGACY=m
+CONFIG_B43LEGACY_DEBUG=y
+CONFIG_B43LEGACY_DMA=y
+CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
+# CONFIG_B43LEGACY_DMA_MODE is not set
+CONFIG_B43LEGACY_LEDS=y
+CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
+CONFIG_B43LEGACY_PCI_AUTOSELECT=y
+CONFIG_B43LEGACY_PIO=y
+# CONFIG_B43LEGACY_PIO_MODE is not set
+CONFIG_B43LEGACY_RFKILL=y
+# CONFIG_B43_DEBUG is not set
+CONFIG_B43_LEDS=y
+CONFIG_B43_PCICORE_AUTOSELECT=y
+CONFIG_B43_PCI_AUTOSELECT=y
+# CONFIG_B43_PCMCIA is not set
+CONFIG_B43_RFKILL=y
+CONFIG_B44=m
+CONFIG_B44_PCI=y
+CONFIG_B44_PCICORE_AUTOSELECT=y
+CONFIG_B44_PCI_AUTOSELECT=y
+CONFIG_BACKLIGHT_CARILLO_RANCH=m
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BACKLIGHT_DA903X=m
+CONFIG_BACKLIGHT_GENERIC=m
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_BACKLIGHT_MBP_NVIDIA=m
+CONFIG_BACKLIGHT_PROGEAR=m
+CONFIG_BACKLIGHT_SAHARA=m
+# CONFIG_BACKTRACE_SELF_TEST is not set
+CONFIG_BASE_FULL=y
+CONFIG_BASE_SMALL=0
+CONFIG_BATTERY_BQ27x00=m
+CONFIG_BATTERY_DA9030=m
+CONFIG_BATTERY_DS2760=m
+CONFIG_BAYCOM_PAR=m
+CONFIG_BAYCOM_SER_FDX=m
+CONFIG_BAYCOM_SER_HDX=m
+CONFIG_BE2NET=m
+# CONFIG_BEFS_DEBUG is not set
+CONFIG_BEFS_FS=m
+CONFIG_BFS_FS=m
+CONFIG_BINARY_PRINTF=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=m
+CONFIG_BITREVERSE=y
+CONFIG_BLK_CPQ_CISS_DA=m
+CONFIG_BLK_CPQ_DA=m
+CONFIG_BLK_DEV=y
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DAC960=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_BLK_DEV_FD=m
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_BLK_DEV_SX8=m
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_UMEM=m
+# CONFIG_BLK_DEV_XIP is not set
+CONFIG_BLOCK=y
+CONFIG_BLOCK_COMPAT=y
+CONFIG_BNX2=m
+CONFIG_BNX2X=m
+CONFIG_BONDING=m
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_BOOT_TRACER is not set
+CONFIG_BOUNCE=y
+CONFIG_BPQETHER=m
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_ULOG=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_NETFILTER=y
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BROADCOM_PHY=m
+CONFIG_BSD_DISKLABEL=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_BT=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_CMTP=m
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIBPA10X=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBTSDIO=m
+CONFIG_BT_HCIBTUART=m
+CONFIG_BT_HCIBTUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_LL=y
+CONFIG_BT_HCIVHCI=m
+CONFIG_BT_HIDP=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_RFCOMM=y
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_SCO=y
+CONFIG_BUG=y
+CONFIG_C2PORT=m
+CONFIG_C2PORT_DURAMAR_2150=m
+CONFIG_CALGARY_IOMMU=y
+CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y
+# CONFIG_CAN is not set
+CONFIG_CAN_PM_TRACE=y
+CONFIG_CAPI_AVM=y
+CONFIG_CAPI_EICON=y
+CONFIG_CAPI_TRACE=y
+CONFIG_CARDBUS=y
+CONFIG_CARDMAN_4000=m
+CONFIG_CARDMAN_4040=m
+# CONFIG_CARMINE_DRAM_CUSTOM is not set
+CONFIG_CASSINI=m
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_CC_STACKPROTECTOR=y
+CONFIG_CC_STACKPROTECTOR_ALL=y
+CONFIG_CDROM_PKTCDVD=y
+CONFIG_CDROM_PKTCDVD_BUFFERS=8
+# CONFIG_CDROM_PKTCDVD_WCACHE is not set
+CONFIG_CFAG12864B=m
+CONFIG_CFAG12864B_RATE=20
+CONFIG_CFG80211=m
+CONFIG_CFG80211_REG_DEBUG=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_CPUACCT=y
+# CONFIG_CGROUP_DEBUG is not set
+# CONFIG_CGROUP_DEVICE is not set
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_MEM_RES_CTLR=y
+# CONFIG_CGROUP_MEM_RES_CTLR_SWAP is not set
+CONFIG_CGROUP_NS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CHARGER_PCF50633=m
+CONFIG_CHECK_SIGNATURE=y
+CONFIG_CHELSIO_T1=m
+CONFIG_CHELSIO_T1_1G=y
+CONFIG_CHELSIO_T3=m
+CONFIG_CHELSIO_T3_DEPENDS=y
+CONFIG_CHR_DEV_OSST=m
+CONFIG_CHR_DEV_SCH=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CICADA_PHY=m
+CONFIG_CIFS=m
+# CONFIG_CIFS_DEBUG2 is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_EXPERIMENTAL=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_STATS is not set
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CISS_SCSI_TAPE=y
+# CONFIG_CLASSIC_RCU is not set
+CONFIG_CLOCKSOURCE_WATCHDOG=y
+CONFIG_CLS_U32_MARK=y
+# CONFIG_CLS_U32_PERF is not set
+# CONFIG_CMDLINE_BOOL is not set
+CONFIG_CODA_FS=m
+# CONFIG_COMEDI is not set
+CONFIG_COMPAL_LAPTOP=m
+CONFIG_COMPAT=y
+CONFIG_COMPAT_BINFMT_ELF=y
+# CONFIG_COMPAT_BRK is not set
+CONFIG_COMPAT_FOR_U64_ALIGNMENT=y
+CONFIG_COMPAT_NET_DEV_OPS=y
+# CONFIG_COMPAT_VDSO is not set
+CONFIG_COMPUTONE=m
+CONFIG_CONFIGFS_FS=m
+CONFIG_CONNECTOR=y
+CONFIG_CONSOLE_POLL=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_CONTEXT_SWITCH_TRACER=y
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+# CONFIG_CPA_DEBUG is not set
+CONFIG_CPU5_WDT=m
+CONFIG_CPUSETS=y
+CONFIG_CPU_FREQ=y
+# CONFIG_CPU_FREQ_DEBUG is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_TABLE=y
+CONFIG_CPU_IDLE=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPU_IDLE_GOV_MENU=y
+CONFIG_CPU_SUP_AMD=y
+CONFIG_CPU_SUP_CENTAUR=y
+CONFIG_CPU_SUP_INTEL=y
+CONFIG_CRAMFS=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CRC16=y
+CONFIG_CRC32=y
+CONFIG_CRC7=m
+CONFIG_CRC_CCITT=m
+CONFIG_CRC_ITU_T=m
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_AEAD=m
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_AES_X86_64=m
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_AUTHENC=m
+CONFIG_CRYPTO_BLKCIPHER=m
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_CBC=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_CRC32C=m
+CONFIG_CRYPTO_CRC32C_INTEL=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_CTR=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_DEV_HIFN_795X=m
+CONFIG_CRYPTO_DEV_HIFN_795X_RNG=y
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_FIPS=y
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_GF128MUL=m
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_HW=y
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_RNG=m
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SALSA20_X86_64=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SEQIV=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_TWOFISH_COMMON=m
+CONFIG_CRYPTO_TWOFISH_X86_64=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CYCLADES=m
+CONFIG_CYCLADES_SYNC=m
+CONFIG_CYCLOMX_X25=y
+# CONFIG_CYZ_INTR is not set
+# CONFIG_DAB is not set
+CONFIG_DAVICOM_PHY=m
+CONFIG_DCA=m
+CONFIG_DCB=y
+CONFIG_DCDBAS=m
+CONFIG_DE2104X=m
+CONFIG_DE4X5=m
+CONFIG_DE600=m
+CONFIG_DE620=m
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_BOOT_PARAMS is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_DEBUG_DRIVER is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_GPIO is not set
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+CONFIG_DEBUG_MEMORY_INIT=y
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_NX_TEST is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_PREEMPT is not set
+CONFIG_DEBUG_RODATA=y
+# CONFIG_DEBUG_RODATA_TEST is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_DEBUG_SECTION_MISMATCH is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_SHIRQ is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_VIRTUAL is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+CONFIG_DECNET=m
+CONFIG_DECNET_NF_GRABULATOR=m
+# CONFIG_DECNET_ROUTER is not set
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_LZMA=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_BIC is not set
+CONFIG_DEFAULT_CFQ=y
+CONFIG_DEFAULT_CUBIC=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_HTCP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+CONFIG_DEFAULT_IO_DELAY_TYPE=1
+# CONFIG_DEFAULT_NOOP is not set
+# CONFIG_DEFAULT_RENO is not set
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_DEFAULT_VEGAS is not set
+# CONFIG_DEFAULT_WESTWOOD is not set
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+CONFIG_DEFXX=m
+# CONFIG_DEFXX_MMIO is not set
+CONFIG_DELL_LAPTOP=m
+CONFIG_DELL_RBU=m
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_DEVKMEM is not set
+CONFIG_DEVPORT=y
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_DEV_APPLETALK=m
+CONFIG_DE_AOC=y
+CONFIG_DIGIEPCA=m
+CONFIG_DIRECT_GBPAGES=y
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+CONFIG_DISPLAY_SUPPORT=m
+# CONFIG_DL2K is not set
+CONFIG_DLCI=m
+CONFIG_DLCI_MAX=8
+CONFIG_DLM=m
+# CONFIG_DLM_DEBUG is not set
+CONFIG_DM9102=m
+CONFIG_DMADEVICES=y
+# CONFIG_DMAR is not set
+# CONFIG_DMATEST is not set
+# CONFIG_DMA_API_DEBUG is not set
+CONFIG_DMA_ENGINE=y
+CONFIG_DMI=y
+CONFIG_DMIID=y
+CONFIG_DM_CRYPT=m
+# CONFIG_DM_DEBUG is not set
+# CONFIG_DM_DELAY is not set
+CONFIG_DM_MIRROR=y
+CONFIG_DM_MULTIPATH=y
+CONFIG_DM_SNAPSHOT=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_ZERO=m
+CONFIG_DNET=m
+CONFIG_DNOTIFY=y
+CONFIG_DONGLE=y
+CONFIG_DRM=m
+CONFIG_DRM_I810=m
+CONFIG_DRM_I830=m
+CONFIG_DRM_I915=m
+CONFIG_DRM_I915_KMS=y
+CONFIG_DRM_MGA=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_SAVAGE=m
+CONFIG_DRM_SIS=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_VIA=m
+CONFIG_DS1682=m
+CONFIG_DSCC4=m
+CONFIG_DSCC4_PCISYNC=y
+CONFIG_DSCC4_PCI_RST=y
+CONFIG_DUMMY=m
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_DVB_AF9013=m
+CONFIG_DVB_AU8522=m
+CONFIG_DVB_AV7110=m
+# CONFIG_DVB_AV7110_FIRMWARE is not set
+CONFIG_DVB_AV7110_OSD=y
+CONFIG_DVB_B2C2_FLEXCOP=m
+# CONFIG_DVB_B2C2_FLEXCOP_DEBUG is not set
+CONFIG_DVB_B2C2_FLEXCOP_PCI=m
+CONFIG_DVB_B2C2_FLEXCOP_USB=m
+CONFIG_DVB_BCM3510=m
+CONFIG_DVB_BT8XX=m
+CONFIG_DVB_BUDGET=m
+CONFIG_DVB_BUDGET_AV=m
+CONFIG_DVB_BUDGET_CI=m
+CONFIG_DVB_BUDGET_CORE=m
+CONFIG_DVB_BUDGET_PATCH=m
+CONFIG_DVB_CAPTURE_DRIVERS=y
+CONFIG_DVB_CORE=m
+CONFIG_DVB_CX22700=m
+CONFIG_DVB_CX22702=m
+CONFIG_DVB_CX24110=m
+CONFIG_DVB_CX24116=m
+CONFIG_DVB_CX24123=m
+CONFIG_DVB_DIB3000MB=m
+CONFIG_DVB_DIB3000MC=m
+CONFIG_DVB_DIB7000M=m
+CONFIG_DVB_DIB7000P=m
+CONFIG_DVB_DM1105=m
+CONFIG_DVB_DRX397XD=m
+# CONFIG_DVB_DUMMY_FE is not set
+CONFIG_DVB_DYNAMIC_MINORS=y
+# CONFIG_DVB_FE_CUSTOMISE is not set
+CONFIG_DVB_FIREDTV=m
+CONFIG_DVB_FIREDTV_IEEE1394=y
+CONFIG_DVB_FIREDTV_INPUT=y
+CONFIG_DVB_ISL6405=m
+CONFIG_DVB_ISL6421=m
+CONFIG_DVB_L64781=m
+CONFIG_DVB_LGDT3304=m
+CONFIG_DVB_LGDT330X=m
+CONFIG_DVB_LGS8GL5=m
+CONFIG_DVB_LNBP21=m
+CONFIG_DVB_MT312=m
+CONFIG_DVB_MT352=m
+CONFIG_DVB_NXT200X=m
+CONFIG_DVB_NXT6000=m
+CONFIG_DVB_OR51132=m
+CONFIG_DVB_OR51211=m
+CONFIG_DVB_PLL=m
+CONFIG_DVB_PLUTO2=m
+CONFIG_DVB_S5H1409=m
+CONFIG_DVB_S5H1411=m
+CONFIG_DVB_S5H1420=m
+CONFIG_DVB_S921=m
+CONFIG_DVB_SI21XX=m
+CONFIG_DVB_SIANO_SMS1XXX=m
+CONFIG_DVB_SIANO_SMS1XXX_SMS_IDS=y
+CONFIG_DVB_SP8870=m
+CONFIG_DVB_SP887X=m
+CONFIG_DVB_STB0899=m
+CONFIG_DVB_STB6000=m
+CONFIG_DVB_STB6100=m
+CONFIG_DVB_STV0288=m
+CONFIG_DVB_STV0297=m
+CONFIG_DVB_STV0299=m
+CONFIG_DVB_TDA10021=m
+CONFIG_DVB_TDA10023=m
+CONFIG_DVB_TDA10048=m
+CONFIG_DVB_TDA1004X=m
+CONFIG_DVB_TDA10086=m
+CONFIG_DVB_TDA8083=m
+CONFIG_DVB_TDA8261=m
+CONFIG_DVB_TDA826X=m
+CONFIG_DVB_TTUSB_BUDGET=m
+CONFIG_DVB_TTUSB_DEC=m
+CONFIG_DVB_TUA6100=m
+CONFIG_DVB_TUNER_CX24113=m
+CONFIG_DVB_TUNER_DIB0070=m
+CONFIG_DVB_TUNER_ITD1000=m
+CONFIG_DVB_USB=m
+CONFIG_DVB_USB_A800=m
+CONFIG_DVB_USB_AF9015=m
+CONFIG_DVB_USB_ANYSEE=m
+CONFIG_DVB_USB_AU6610=m
+CONFIG_DVB_USB_CINERGY_T2=m
+CONFIG_DVB_USB_CXUSB=m
+# CONFIG_DVB_USB_DEBUG is not set
+CONFIG_DVB_USB_DIB0700=m
+CONFIG_DVB_USB_DIBUSB_MB=m
+# CONFIG_DVB_USB_DIBUSB_MB_FAULTY is not set
+CONFIG_DVB_USB_DIBUSB_MC=m
+CONFIG_DVB_USB_DIGITV=m
+CONFIG_DVB_USB_DTT200U=m
+CONFIG_DVB_USB_DTV5100=m
+CONFIG_DVB_USB_DW2102=m
+CONFIG_DVB_USB_GL861=m
+CONFIG_DVB_USB_GP8PSK=m
+CONFIG_DVB_USB_M920X=m
+CONFIG_DVB_USB_NOVA_T_USB2=m
+CONFIG_DVB_USB_OPERA1=m
+CONFIG_DVB_USB_TTUSB2=m
+CONFIG_DVB_USB_UMT_010=m
+CONFIG_DVB_USB_VP702X=m
+CONFIG_DVB_USB_VP7045=m
+CONFIG_DVB_VES1820=m
+CONFIG_DVB_VES1X93=m
+CONFIG_DVB_ZL10353=m
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+CONFIG_E100=m
+CONFIG_E1000=m
+CONFIG_E1000E=m
+CONFIG_EARLY_PRINTK=y
+# CONFIG_EARLY_PRINTK_DBGP is not set
+CONFIG_ECHO=m
+CONFIG_ECONET=m
+CONFIG_ECONET_AUNUDP=y
+CONFIG_ECONET_NATIVE=y
+CONFIG_ECRYPT_FS=y
+CONFIG_EDAC=y
+# CONFIG_EDAC_DEBUG is not set
+CONFIG_EDAC_E752X=m
+CONFIG_EDAC_I3000=m
+CONFIG_EDAC_I5000=m
+CONFIG_EDAC_I5100=m
+CONFIG_EDAC_I5400=m
+CONFIG_EDAC_I82975X=m
+CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC_X38=m
+CONFIG_EDD=y
+CONFIG_EDD_OFF=y
+CONFIG_EEEPC_LAPTOP=m
+CONFIG_EEPROM_93CX6=m
+CONFIG_EEPROM_AT24=m
+CONFIG_EEPROM_AT25=m
+CONFIG_EEPROM_LEGACY=m
+CONFIG_EFI=y
+CONFIG_EFI_PARTITION=y
+CONFIG_EFI_VARS=y
+CONFIG_EFS_FS=m
+CONFIG_ELF_CORE=y
+# CONFIG_EMBEDDED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENC28J60 is not set
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_ENIC=m
+CONFIG_EPIC100=m
+CONFIG_EPL=m
+CONFIG_EPOLL=y
+CONFIG_EQUALIZER=m
+CONFIG_ESI_DONGLE=m
+CONFIG_ET131X=m
+# CONFIG_ET131X_DEBUG is not set
+CONFIG_EUROTECH_WDT=m
+CONFIG_EVENTFD=y
+# CONFIG_EVENT_TRACER is not set
+CONFIG_EXPERIMENTAL=y
+CONFIG_EXPORTFS=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XATTR=y
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT4DEV_COMPAT is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS_XATTR=y
+CONFIG_EXTRA_FIRMWARE=""
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_FARSYNC=m
+CONFIG_FAST_CMPXCHG_LOCAL=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+CONFIG_FAT_FS=m
+# CONFIG_FAULT_INJECTION is not set
+CONFIG_FB=y
+CONFIG_FB_3DFX=m
+# CONFIG_FB_3DFX_ACCEL is not set
+CONFIG_FB_ARC=m
+CONFIG_FB_ARK=m
+CONFIG_FB_ASILIANT=y
+CONFIG_FB_ATY=m
+CONFIG_FB_ATY128=m
+CONFIG_FB_ATY128_BACKLIGHT=y
+CONFIG_FB_ATY_BACKLIGHT=y
+CONFIG_FB_ATY_CT=y
+CONFIG_FB_ATY_GENERIC_LCD=y
+CONFIG_FB_ATY_GX=y
+CONFIG_FB_BACKLIGHT=y
+CONFIG_FB_BOOT_VESA_SUPPORT=y
+CONFIG_FB_CARILLO_RANCH=m
+CONFIG_FB_CARMINE=m
+CONFIG_FB_CARMINE_DRAM_EVAL=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+CONFIG_FB_CIRRUS=m
+CONFIG_FB_CYBER2000=m
+CONFIG_FB_DDC=m
+CONFIG_FB_DEFERRED_IO=y
+CONFIG_FB_EFI=y
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+CONFIG_FB_GEODE=y
+CONFIG_FB_GEODE_GX=m
+CONFIG_FB_GEODE_GX1=m
+CONFIG_FB_GEODE_LX=m
+CONFIG_FB_HECUBA=m
+CONFIG_FB_HGA=m
+# CONFIG_FB_HGA_ACCEL is not set
+CONFIG_FB_IMSTT=y
+CONFIG_FB_INTEL=m
+# CONFIG_FB_INTEL_DEBUG is not set
+CONFIG_FB_INTEL_I2C=y
+CONFIG_FB_KYRO=m
+CONFIG_FB_LE80578=m
+# CONFIG_FB_MACMODES is not set
+CONFIG_FB_MATROX=m
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_MATROX_I2C=m
+CONFIG_FB_MATROX_MAVEN=m
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MULTIHEAD=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MB862XX=m
+CONFIG_FB_MB862XX_PCI_GDC=y
+CONFIG_FB_METRONOME=m
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_N411=m
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_NVIDIA=m
+CONFIG_FB_NVIDIA_BACKLIGHT=y
+# CONFIG_FB_NVIDIA_DEBUG is not set
+CONFIG_FB_NVIDIA_I2C=y
+CONFIG_FB_PM2=m
+CONFIG_FB_PM2_FIFO_DISCONNECT=y
+CONFIG_FB_PM3=m
+CONFIG_FB_RADEON=m
+CONFIG_FB_RADEON_BACKLIGHT=y
+# CONFIG_FB_RADEON_DEBUG is not set
+CONFIG_FB_RADEON_I2C=y
+CONFIG_FB_RIVA=m
+CONFIG_FB_RIVA_BACKLIGHT=y
+# CONFIG_FB_RIVA_DEBUG is not set
+CONFIG_FB_RIVA_I2C=y
+CONFIG_FB_S1D13XXX=m
+CONFIG_FB_S3=m
+CONFIG_FB_SAVAGE=m
+CONFIG_FB_SAVAGE_ACCEL=y
+CONFIG_FB_SAVAGE_I2C=y
+CONFIG_FB_SIS=m
+CONFIG_FB_SIS_300=y
+CONFIG_FB_SIS_315=y
+CONFIG_FB_SM501=m
+CONFIG_FB_SVGALIB=m
+CONFIG_FB_SYS_COPYAREA=m
+CONFIG_FB_SYS_FILLRECT=m
+CONFIG_FB_SYS_FOPS=m
+CONFIG_FB_SYS_IMAGEBLIT=m
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_TMIO=m
+CONFIG_FB_TMIO_ACCELL=y
+CONFIG_FB_TRIDENT=m
+# CONFIG_FB_TRIDENT_ACCEL is not set
+CONFIG_FB_UVESA=m
+CONFIG_FB_VESA=y
+CONFIG_FB_VGA16=m
+CONFIG_FB_VIA=m
+# CONFIG_FB_VIRTUAL is not set
+CONFIG_FB_VOODOO1=m
+CONFIG_FB_VT8623=m
+CONFIG_FCOE=m
+CONFIG_FDDI=y
+CONFIG_FEALNX=m
+CONFIG_FIB_RULES=y
+CONFIG_FILE_LOCKING=y
+CONFIG_FIREWIRE=m
+CONFIG_FIREWIRE_OHCI=m
+CONFIG_FIREWIRE_OHCI_DEBUG=y
+# CONFIG_FIREWIRE_OHCI_REMOTE_DMA is not set
+CONFIG_FIREWIRE_SBP2=m
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_FIRMWARE_MEMMAP=y
+CONFIG_FIXED_PHY=y
+CONFIG_FIX_EARLYCON_MEM=y
+# CONFIG_FLATMEM_MANUAL is not set
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x16=y
+CONFIG_FONT_8x8=y
+CONFIG_FORCEDETH=m
+# CONFIG_FORCEDETH_NAPI is not set
+CONFIG_FRAMEBUFFER_CONSOLE=m
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+CONFIG_FRAME_POINTER=y
+CONFIG_FRAME_WARN=1024
+CONFIG_FREEZER=y
+CONFIG_FS_MBCACHE=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_FTL=m
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_FTRACE_SYSCALLS is not set
+CONFIG_FUJITSU_LAPTOP=m
+# CONFIG_FUJITSU_LAPTOP_DEBUG is not set
+# CONFIG_FUNCTION_TRACER is not set
+CONFIG_FUSE_FS=y
+CONFIG_FUSION=y
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_LAN=m
+CONFIG_FUSION_LOGGING=y
+CONFIG_FUSION_MAX_SGE=128
+CONFIG_FUSION_SAS=m
+CONFIG_FUSION_SPI=m
+CONFIG_FUTEX=y
+CONFIG_FW_LOADER=y
+CONFIG_GACT_PROB=y
+CONFIG_GAMEPORT=m
+CONFIG_GAMEPORT_EMU10K1=m
+CONFIG_GAMEPORT_FM801=m
+CONFIG_GAMEPORT_L4=m
+CONFIG_GAMEPORT_NS558=m
+CONFIG_GARP=m
+CONFIG_GART_IOMMU=y
+CONFIG_GENERIC_ACL=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_BUG_RELATIVE_POINTERS=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+CONFIG_GENERIC_CPU=y
+CONFIG_GENERIC_FIND_FIRST_BIT=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_PENDING_IRQ=y
+CONFIG_GENERIC_TIME=y
+CONFIG_GENERIC_TIME_VSYSCALL=y
+CONFIG_GFS2_FS=m
+# CONFIG_GFS2_FS_LOCKING_DLM is not set
+CONFIG_GIGASET_BASE=m
+# CONFIG_GIGASET_DEBUG is not set
+CONFIG_GIGASET_M101=m
+CONFIG_GIGASET_M105=m
+# CONFIG_GIGASET_UNDOCREQ is not set
+CONFIG_GIRBIL_DONGLE=m
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_MAX7301=m
+CONFIG_GPIO_MAX732X=m
+CONFIG_GPIO_MCP23S08=m
+CONFIG_GPIO_PCA953X=m
+CONFIG_GPIO_PCF857X=m
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_TWL4030=m
+CONFIG_GREENASIA_FF=m
+CONFIG_GROUP_SCHED=y
+CONFIG_HAMACHI=m
+CONFIG_HAMRADIO=y
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_HAPPYMEAL=m
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_HAS_DMA=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+# CONFIG_HAVE_AOUT is not set
+CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
+CONFIG_HAVE_ARCH_KGDB=y
+CONFIG_HAVE_ARCH_KMEMCHECK=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y
+CONFIG_HAVE_DMA_API_DEBUG=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
+CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
+# CONFIG_HAVE_GENERIC_DMA_COHERENT is not set
+CONFIG_HAVE_IDE=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_HAVE_MEMORY_PRESENT=y
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
+CONFIG_HAVE_MTD_OTP=y
+CONFIG_HAVE_OPROFILE=y
+CONFIG_HAVE_PERF_COUNTERS=y
+CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
+CONFIG_HDLC=m
+CONFIG_HDLC_CISCO=m
+CONFIG_HDLC_FR=m
+CONFIG_HDLC_PPP=m
+CONFIG_HDLC_RAW=m
+CONFIG_HDLC_RAW_ETH=m
+CONFIG_HDLC_X25=m
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_HERMES=m
+CONFIG_HERMES_CACHE_FW_ON_INIT=y
+CONFIG_HFSPLUS_FS=m
+CONFIG_HFS_FS=m
+CONFIG_HIBERNATION=y
+CONFIG_HID=y
+CONFIG_HIDRAW=y
+CONFIG_HID_A4TECH=m
+CONFIG_HID_APPLE=m
+CONFIG_HID_BELKIN=m
+CONFIG_HID_CHERRY=m
+CONFIG_HID_CHICONY=m
+# CONFIG_HID_COMPAT is not set
+CONFIG_HID_CYPRESS=m
+# CONFIG_HID_DEBUG is not set
+CONFIG_HID_EZKEY=m
+CONFIG_HID_GYRATION=m
+CONFIG_HID_LOGITECH=m
+CONFIG_HID_MICROSOFT=m
+CONFIG_HID_MONTEREY=m
+CONFIG_HID_NTRIG=m
+CONFIG_HID_PANTHERLORD=m
+CONFIG_HID_PETALYNX=m
+CONFIG_HID_PID=y
+CONFIG_HID_SAMSUNG=m
+CONFIG_HID_SONY=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_HID_SUPPORT=y
+CONFIG_HID_TOPSEED=m
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HIPPI=y
+CONFIG_HISAX_16_3=y
+CONFIG_HISAX_1TR6=y
+CONFIG_HISAX_AVM_A1_CS=m
+CONFIG_HISAX_AVM_A1_PCMCIA=y
+CONFIG_HISAX_BKM_A4T=y
+# CONFIG_HISAX_DEBUG is not set
+CONFIG_HISAX_DIEHLDIVA=y
+CONFIG_HISAX_ELSA=y
+CONFIG_HISAX_ELSA_CS=m
+CONFIG_HISAX_ENTERNOW_PCI=y
+CONFIG_HISAX_EURO=y
+CONFIG_HISAX_FRITZPCI=y
+CONFIG_HISAX_FRITZ_PCIPNP=m
+CONFIG_HISAX_GAZEL=y
+CONFIG_HISAX_HDLC=y
+CONFIG_HISAX_HFC4S8S=m
+CONFIG_HISAX_HFCUSB=m
+CONFIG_HISAX_HFC_PCI=y
+CONFIG_HISAX_HFC_SX=y
+CONFIG_HISAX_MAX_CARDS=8
+CONFIG_HISAX_NETJET=y
+CONFIG_HISAX_NETJET_U=y
+CONFIG_HISAX_NI1=y
+CONFIG_HISAX_NICCY=y
+# CONFIG_HISAX_NO_KEYPAD is not set
+# CONFIG_HISAX_NO_LLC is not set
+# CONFIG_HISAX_NO_SENDCOMPLETE is not set
+CONFIG_HISAX_S0BOX=y
+CONFIG_HISAX_SCT_QUADRO=y
+CONFIG_HISAX_SEDLBAUER=y
+CONFIG_HISAX_SEDLBAUER_CS=m
+CONFIG_HISAX_ST5481=m
+CONFIG_HISAX_TELESPCI=y
+CONFIG_HISAX_TELES_CS=m
+CONFIG_HISAX_W6692=y
+CONFIG_HOSTAP=m
+CONFIG_HOSTAP_CS=m
+CONFIG_HOSTAP_FIRMWARE=y
+CONFIG_HOSTAP_FIRMWARE_NVRAM=y
+CONFIG_HOSTAP_PCI=m
+CONFIG_HOSTAP_PLX=m
+CONFIG_HOTPLUG=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_ACPI=m
+CONFIG_HOTPLUG_PCI_ACPI_IBM=m
+CONFIG_HOTPLUG_PCI_CPCI=y
+CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
+CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
+CONFIG_HOTPLUG_PCI_FAKE=m
+CONFIG_HOTPLUG_PCI_PCIE=y
+CONFIG_HOTPLUG_PCI_SHPC=m
+CONFIG_HP100=m
+CONFIG_HPET=y
+CONFIG_HPET_EMULATE_RTC=y
+CONFIG_HPET_MMAP=y
+CONFIG_HPET_TIMER=y
+CONFIG_HPFS_FS=m
+CONFIG_HP_ILO=m
+# CONFIG_HP_WATCHDOG is not set
+CONFIG_HP_WMI=m
+CONFIG_HTC_PASIC3=m
+CONFIG_HT_IRQ=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_HVC_DRIVER=y
+CONFIG_HVC_IRQ=y
+CONFIG_HVC_XEN=y
+# CONFIG_HWLAT_DETECTOR is not set
+CONFIG_HWMON=y
+# CONFIG_HWMON_DEBUG_CHIP is not set
+CONFIG_HWMON_VID=m
+CONFIG_HW_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_AMD=m
+CONFIG_HW_RANDOM_INTEL=m
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_HYSDN=m
+CONFIG_HYSDN_CAPI=y
+CONFIG_HZ=250
+# CONFIG_HZ_100 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+CONFIG_I2C=y
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCA=m
+CONFIG_I2C_ALGOPCF=m
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI1563=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=m
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+CONFIG_I2C_GPIO=m
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_I2C_I801=m
+CONFIG_I2C_ISCH=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_OCORES=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_PARPORT_LIGHT=m
+CONFIG_I2C_PCA_PLATFORM=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_SIMTEC=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_STUB=m
+CONFIG_I2C_TAOS_EVM=m
+CONFIG_I2C_TINY_USB=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+CONFIG_I2C_VOODOO3=m
+CONFIG_I2O=m
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_BUS=m
+CONFIG_I2O_EXT_ADAPTEC=y
+CONFIG_I2O_EXT_ADAPTEC_DMA64=y
+CONFIG_I2O_LCT_NOTIFY_ON_CHANGES=y
+CONFIG_I2O_PROC=m
+CONFIG_I2O_SCSI=m
+CONFIG_I6300ESB_WDT=m
+CONFIG_I7300_IDLE=m
+CONFIG_I7300_IDLE_IOAT_CHANNEL=y
+CONFIG_I82092=m
+CONFIG_I8K=m
+# CONFIG_IA32_AOUT is not set
+CONFIG_IA32_EMULATION=y
+CONFIG_IB700_WDT=m
+CONFIG_IBMASR=m
+CONFIG_IBMOL=m
+CONFIG_IBM_ASM=m
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+CONFIG_ICPLUS_PHY=m
+CONFIG_ICS932S401=m
+# CONFIG_IDE is not set
+CONFIG_IEEE1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_ETH1394_ROM_ENTRY=y
+CONFIG_IEEE1394_OHCI1394=m
+CONFIG_IEEE1394_PCILYNX=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_SBP2=m
+# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IFB=m
+CONFIG_IGB=m
+CONFIG_IGB_DCA=y
+# CONFIG_IGB_LRO is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_ILLEGAL_POINTER_VALUE=0xdead000000000000
+CONFIG_INET=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_TUNNEL=m
+CONFIG_INET_AH=m
+CONFIG_INET_DCCP_DIAG=m
+CONFIG_INET_DIAG=y
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_LRO=y
+CONFIG_INET_TCP_DIAG=y
+CONFIG_INET_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_TUNNEL=m
+CONFIG_INFTL=m
+# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set
+CONFIG_INITRAMFS_COMPRESSION_GZIP=y
+# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set
+# CONFIG_INITRAMFS_COMPRESSION_NONE is not set
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+CONFIG_INPUT=y
+# CONFIG_INPUT_APANEL is not set
+CONFIG_INPUT_ATI_REMOTE=m
+CONFIG_INPUT_ATI_REMOTE2=m
+CONFIG_INPUT_ATLAS_BTNS=m
+CONFIG_INPUT_CM109=m
+CONFIG_INPUT_EVBUG=m
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_FF_MEMLESS=m
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_INPUT_KEYSPAN_REMOTE=m
+CONFIG_INPUT_MIMIO=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_MOUSE=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_PCF50633_PMU=m
+CONFIG_INPUT_PCSPKR=m
+CONFIG_INPUT_POLLDEV=m
+CONFIG_INPUT_POWERMATE=m
+CONFIG_INPUT_TABLET=y
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_INPUT_UINPUT=m
+CONFIG_INPUT_YEALINK=m
+CONFIG_INTEL_IOATDMA=m
+CONFIG_INTEL_MENLOW=m
+# CONFIG_INTR_REMAP is not set
+CONFIG_IOMMU_API=y
+# CONFIG_IOMMU_DEBUG is not set
+CONFIG_IOMMU_HELPER=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IO_DELAY_0X80 is not set
+CONFIG_IO_DELAY_0XED=y
+# CONFIG_IO_DELAY_NONE is not set
+CONFIG_IO_DELAY_TYPE_0X80=0
+CONFIG_IO_DELAY_TYPE_0XED=1
+CONFIG_IO_DELAY_TYPE_NONE=3
+CONFIG_IO_DELAY_TYPE_UDELAY=2
+# CONFIG_IO_DELAY_UDELAY is not set
+CONFIG_IP1000=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IPC_NS=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_DECAP=y
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_HANDLER=m
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_POWEROFF=m
+CONFIG_IPMI_SI=m
+CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPPP_FILTER=y
+CONFIG_IPV6=y
+# CONFIG_IPV6_MIP6 is not set
+# CONFIG_IPV6_MROUTE is not set
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_NDISC_NODETYPE=y
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
+CONFIG_IPV6_PRIVACY=y
+# CONFIG_IPV6_ROUTER_PREF is not set
+CONFIG_IPV6_SIT=m
+# CONFIG_IPV6_SUBTREES is not set
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPW2100=m
+# CONFIG_IPW2100_DEBUG is not set
+CONFIG_IPW2100_MONITOR=y
+CONFIG_IPW2200=m
+# CONFIG_IPW2200_DEBUG is not set
+CONFIG_IPW2200_MONITOR=y
+CONFIG_IPW2200_PROMISCUOUS=y
+CONFIG_IPW2200_QOS=y
+CONFIG_IPW2200_RADIOTAP=y
+CONFIG_IPWIRELESS=m
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_DCCP=m
+# CONFIG_IP_DCCP_CCID2_DEBUG is not set
+CONFIG_IP_DCCP_CCID3=y
+# CONFIG_IP_DCCP_CCID3_DEBUG is not set
+CONFIG_IP_DCCP_CCID3_RTO=100
+# CONFIG_IP_DCCP_DEBUG is not set
+CONFIG_IP_DCCP_TFRC_LIB=y
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_IP_PNP is not set
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_SCTP=m
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_IPV6=y
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_AH_ESP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_TAB_BITS=12
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IRCOMM=m
+CONFIG_IRDA=m
+CONFIG_IRDA_CACHE_LAST_LSAP=y
+CONFIG_IRDA_DEBUG=y
+CONFIG_IRDA_FAST_RR=y
+CONFIG_IRDA_ULTRA=y
+CONFIG_IRLAN=m
+CONFIG_IRNET=m
+# CONFIG_IRQSOFF_TRACER is not set
+CONFIG_IRTTY_SIR=m
+CONFIG_ISA_DMA_API=y
+CONFIG_ISCSI_IBFT=m
+CONFIG_ISCSI_IBFT_FIND=y
+CONFIG_ISCSI_TCP=m
+CONFIG_ISDN=y
+CONFIG_ISDN_AUDIO=y
+CONFIG_ISDN_CAPI=m
+CONFIG_ISDN_CAPI_CAPI20=m
+CONFIG_ISDN_CAPI_CAPIDRV=m
+CONFIG_ISDN_CAPI_CAPIFS=m
+CONFIG_ISDN_CAPI_CAPIFS_BOOL=y
+CONFIG_ISDN_CAPI_MIDDLEWARE=y
+CONFIG_ISDN_DIVAS=m
+CONFIG_ISDN_DIVAS_BRIPCI=y
+CONFIG_ISDN_DIVAS_DIVACAPI=m
+CONFIG_ISDN_DIVAS_MAINT=m
+CONFIG_ISDN_DIVAS_PRIPCI=y
+CONFIG_ISDN_DIVAS_USERIDI=m
+CONFIG_ISDN_DIVERSION=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y
+CONFIG_ISDN_DRV_GIGASET=m
+CONFIG_ISDN_DRV_HISAX=m
+CONFIG_ISDN_I4L=m
+CONFIG_ISDN_MPP=y
+CONFIG_ISDN_PPP=y
+CONFIG_ISDN_PPP_BSDCOMP=m
+CONFIG_ISDN_PPP_VJ=y
+CONFIG_ISDN_TTY_FAX=y
+CONFIG_ISDN_X25=y
+# CONFIG_ISI is not set
+CONFIG_ISO9660_FS=m
+CONFIG_ISTALLION=m
+CONFIG_IT8712F_WDT=m
+CONFIG_IT87_WDT=m
+CONFIG_ITCO_VENDOR_SUPPORT=y
+CONFIG_ITCO_WDT=m
+CONFIG_IWL3945=m
+# CONFIG_IWL3945_DEBUG is not set
+CONFIG_IWL3945_LEDS=y
+CONFIG_IWL3945_RFKILL=y
+CONFIG_IWL3945_SPECTRUM_MEASUREMENT=y
+CONFIG_IWL4965=y
+CONFIG_IWL5000=y
+CONFIG_IWLAGN=m
+CONFIG_IWLAGN_LEDS=y
+CONFIG_IWLAGN_SPECTRUM_MEASUREMENT=y
+CONFIG_IWLCORE=m
+CONFIG_IWLWIFI=m
+# CONFIG_IWLWIFI_DEBUG is not set
+CONFIG_IWLWIFI_LEDS=y
+CONFIG_IWLWIFI_RFKILL=y
+CONFIG_IXGB=m
+CONFIG_IXGBE=m
+CONFIG_IXGBE_DCA=y
+CONFIG_IXGBE_DCB=y
+CONFIG_JBD=y
+CONFIG_JBD2=y
+# CONFIG_JBD2_DEBUG is not set
+# CONFIG_JBD_DEBUG is not set
+CONFIG_JFFS2_CMODE_FAVOURLZO=y
+# CONFIG_JFFS2_CMODE_NONE is not set
+# CONFIG_JFFS2_CMODE_PRIORITY is not set
+# CONFIG_JFFS2_CMODE_SIZE is not set
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_DEBUG=0
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_XATTR is not set
+CONFIG_JFFS2_LZO=y
+CONFIG_JFFS2_RTIME=y
+# CONFIG_JFFS2_RUBIN is not set
+# CONFIG_JFFS2_SUMMARY is not set
+CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFS_DEBUG is not set
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_JME=m
+CONFIG_JOLIET=y
+CONFIG_JOYSTICK_A3D=m
+CONFIG_JOYSTICK_ADI=m
+CONFIG_JOYSTICK_ANALOG=m
+CONFIG_JOYSTICK_COBRA=m
+CONFIG_JOYSTICK_DB9=m
+CONFIG_JOYSTICK_GAMECON=m
+CONFIG_JOYSTICK_GF2K=m
+CONFIG_JOYSTICK_GRIP=m
+CONFIG_JOYSTICK_GRIP_MP=m
+CONFIG_JOYSTICK_GUILLEMOT=m
+CONFIG_JOYSTICK_IFORCE=m
+CONFIG_JOYSTICK_IFORCE_232=y
+CONFIG_JOYSTICK_IFORCE_USB=y
+CONFIG_JOYSTICK_INTERACT=m
+CONFIG_JOYSTICK_JOYDUMP=m
+CONFIG_JOYSTICK_MAGELLAN=m
+CONFIG_JOYSTICK_SIDEWINDER=m
+CONFIG_JOYSTICK_SPACEBALL=m
+CONFIG_JOYSTICK_SPACEORB=m
+CONFIG_JOYSTICK_STINGER=m
+CONFIG_JOYSTICK_TMDC=m
+CONFIG_JOYSTICK_TURBOGRAFX=m
+CONFIG_JOYSTICK_TWIDJOY=m
+CONFIG_JOYSTICK_WALKERA0701=m
+CONFIG_JOYSTICK_WARRIOR=m
+CONFIG_JOYSTICK_XPAD=m
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_JOYSTICK_ZHENHUA=m
+CONFIG_K8_NB=y
+CONFIG_K8_NUMA=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_KARMA_PARTITION=y
+# CONFIG_KERNEL_BZIP2 is not set
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_LZMA is not set
+CONFIG_KEXEC=y
+CONFIG_KEXEC_JUMP=y
+CONFIG_KEYBOARD_ATKBD=y
+CONFIG_KEYBOARD_GPIO=m
+CONFIG_KEYBOARD_LKKBD=m
+CONFIG_KEYBOARD_NEWTON=m
+CONFIG_KEYBOARD_STOWAWAY=m
+CONFIG_KEYBOARD_SUNKBD=m
+CONFIG_KEYBOARD_XTKBD=m
+CONFIG_KEYS=y
+# CONFIG_KEYS_DEBUG_PROC_KEYS is not set
+CONFIG_KGDB=y
+CONFIG_KGDB_SERIAL_CONSOLE=y
+# CONFIG_KGDB_TESTS is not set
+CONFIG_KINGSUN_DONGLE=m
+# CONFIG_KMEMCHECK is not set
+# CONFIG_KMEMTRACE is not set
+CONFIG_KPROBES=y
+# CONFIG_KPROBES_SANITY_TEST is not set
+CONFIG_KRETPROBES=y
+CONFIG_KS0108=m
+CONFIG_KS0108_DELAY=2
+CONFIG_KS0108_PORT=0x378
+CONFIG_KS959_DONGLE=m
+CONFIG_KSDAZZLE_DONGLE=m
+# CONFIG_KTIME_SCALAR is not set
+CONFIG_KVM=m
+CONFIG_KVM_AMD=m
+CONFIG_KVM_CLOCK=y
+CONFIG_KVM_GUEST=y
+CONFIG_KVM_INTEL=m
+# CONFIG_KVM_TRACE is not set
+CONFIG_LANMEDIA=m
+CONFIG_LAPB=m
+CONFIG_LAPBETHER=m
+CONFIG_LATENCYTOP=y
+CONFIG_LCD_CLASS_DEVICE=m
+CONFIG_LCD_ILI9320=m
+CONFIG_LCD_LTV350QV=m
+CONFIG_LCD_PLATFORM=m
+CONFIG_LCD_TDO24M=m
+CONFIG_LCD_VGG2432A4=m
+# CONFIG_LDM_DEBUG is not set
+CONFIG_LDM_PARTITION=y
+CONFIG_LEDS_ALIX2=m
+CONFIG_LEDS_CLASS=m
+# CONFIG_LEDS_CLEVO_MAIL is not set
+CONFIG_LEDS_DA903X=m
+CONFIG_LEDS_GPIO=m
+CONFIG_LEDS_PCA9532=m
+CONFIG_LEDS_PCA955X=m
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_BACKLIGHT=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
+CONFIG_LEDS_TRIGGER_HEARTBEAT=m
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=0
+CONFIG_LIB80211=m
+CONFIG_LIB80211_CRYPT_CCMP=m
+CONFIG_LIB80211_CRYPT_TKIP=m
+CONFIG_LIB80211_CRYPT_WEP=m
+# CONFIG_LIB80211_DEBUG is not set
+CONFIG_LIBCRC32C=m
+CONFIG_LIBERTAS=m
+CONFIG_LIBERTAS_CS=m
+# CONFIG_LIBERTAS_DEBUG is not set
+CONFIG_LIBERTAS_SDIO=m
+CONFIG_LIBERTAS_THINFIRM=m
+CONFIG_LIBERTAS_THINFIRM_USB=m
+CONFIG_LIBERTAS_USB=m
+CONFIG_LIBFC=m
+CONFIG_LIBIPW=m
+CONFIG_LIBIPW_DEBUG=y
+CONFIG_LITELINK_DONGLE=m
+# CONFIG_LKDTM is not set
+CONFIG_LLC=y
+CONFIG_LLC2=m
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_LOCKD=m
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_LOCKD_V4=y
+CONFIG_LOCK_KERNEL=y
+# CONFIG_LOCK_STAT is not set
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGITECH_FF=y
+# CONFIG_LOGO is not set
+CONFIG_LOG_BUF_SHIFT=17
+# CONFIG_LP_CONSOLE is not set
+CONFIG_LSI_ET1011C_PHY=m
+CONFIG_LXT_PHY=m
+CONFIG_LZO_COMPRESS=m
+CONFIG_LZO_DECOMPRESS=m
+CONFIG_M25PXX_USE_FAST_READ=y
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+# CONFIG_M586 is not set
+# CONFIG_M586MMX is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M686 is not set
+CONFIG_MA600_DONGLE=m
+CONFIG_MAC80211=m
+CONFIG_MAC80211_DEBUGFS=y
+# CONFIG_MAC80211_DEBUG_MENU is not set
+# CONFIG_MAC80211_HWSIM is not set
+CONFIG_MAC80211_LEDS=y
+CONFIG_MAC80211_MESH=y
+CONFIG_MAC80211_RC_DEFAULT="minstrel"
+CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
+# CONFIG_MAC80211_RC_DEFAULT_PID is not set
+CONFIG_MAC80211_RC_MINSTREL=y
+CONFIG_MACHZ_WDT=m
+CONFIG_MACINTOSH_DRIVERS=y
+CONFIG_MACVLAN=m
+CONFIG_MAC_EMUMOUSEBTN=y
+CONFIG_MAC_PARTITION=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MARKERS=y
+CONFIG_MARVELL_PHY=m
+CONFIG_MAX_RAW_DEVS=256
+# CONFIG_MCORE2 is not set
+CONFIG_MCP2120_DONGLE=m
+# CONFIG_MCRUSOE is not set
+CONFIG_MCS_FIR=m
+# CONFIG_MCYRIXIII is not set
+CONFIG_MD=y
+CONFIG_MDIO_BITBANG=m
+CONFIG_MDIO_GPIO=m
+CONFIG_MD_AUTODETECT=y
+CONFIG_MD_FAULTY=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_RAID5_RESHAPE=y
+CONFIG_ME0600=m
+CONFIG_ME0900=m
+CONFIG_ME1000=m
+CONFIG_ME1400=m
+CONFIG_ME1600=m
+CONFIG_ME4000=m
+CONFIG_ME4600=m
+CONFIG_ME6000=m
+CONFIG_ME8100=m
+CONFIG_ME8200=m
+CONFIG_MEDIA_ATTACH=y
+CONFIG_MEDIA_TUNER=m
+# CONFIG_MEDIA_TUNER_CUSTOMIZE is not set
+CONFIG_MEDIA_TUNER_MT2060=m
+CONFIG_MEDIA_TUNER_MT20XX=m
+CONFIG_MEDIA_TUNER_MT2131=m
+CONFIG_MEDIA_TUNER_MT2266=m
+CONFIG_MEDIA_TUNER_MXL5005S=m
+CONFIG_MEDIA_TUNER_MXL5007T=m
+CONFIG_MEDIA_TUNER_QT1010=m
+CONFIG_MEDIA_TUNER_SIMPLE=m
+CONFIG_MEDIA_TUNER_TDA18271=m
+CONFIG_MEDIA_TUNER_TDA827X=m
+CONFIG_MEDIA_TUNER_TDA8290=m
+CONFIG_MEDIA_TUNER_TDA9887=m
+CONFIG_MEDIA_TUNER_TEA5761=m
+CONFIG_MEDIA_TUNER_TEA5767=m
+CONFIG_MEDIA_TUNER_XC2028=m
+CONFIG_MEDIA_TUNER_XC5000=m
+CONFIG_MEDUMMY=m
+# CONFIG_MEFFICEON is not set
+CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_SAS=m
+CONFIG_MEILHAUS=m
+# CONFIG_MEMSTICK is not set
+# CONFIG_MEMTEST is not set
+CONFIG_MFD_CORE=m
+CONFIG_MFD_PCF50633=m
+CONFIG_MFD_SM501=m
+# CONFIG_MFD_SM501_GPIO is not set
+# CONFIG_MFD_TMIO is not set
+CONFIG_MFD_WM8400=m
+# CONFIG_MGEODEGX1 is not set
+# CONFIG_MGEODE_LX is not set
+CONFIG_MICROCODE=m
+CONFIG_MICROCODE_AMD=y
+CONFIG_MICROCODE_INTEL=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
+CONFIG_MIGRATION=y
+CONFIG_MII=m
+CONFIG_MINIX_FS=m
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_MISC_DEVICES=y
+CONFIG_MISC_FILESYSTEMS=y
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+CONFIG_MKISS=m
+CONFIG_MLX4_CORE=m
+CONFIG_MLX4_DEBUG=y
+CONFIG_MLX4_EN=m
+CONFIG_MMC=y
+CONFIG_MMC_BLOCK=m
+CONFIG_MMC_BLOCK_BOUNCE=y
+# CONFIG_MMC_DEBUG is not set
+CONFIG_MMC_RICOH_MMC=m
+CONFIG_MMC_SDHCI=m
+CONFIG_MMC_SDHCI_PCI=m
+CONFIG_MMC_SDRICOH_CS=m
+CONFIG_MMC_SPI=m
+# CONFIG_MMC_TEST is not set
+CONFIG_MMC_TIFM_SD=m
+# CONFIG_MMC_UNSAFE_RESUME is not set
+CONFIG_MMC_WBSD=m
+# CONFIG_MMIOTRACE is not set
+CONFIG_MMU=y
+CONFIG_MMU_NOTIFIER=y
+CONFIG_MM_OWNER=y
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MOUSE_APPLETOUCH=m
+CONFIG_MOUSE_BCM5974=m
+CONFIG_MOUSE_GPIO=m
+CONFIG_MOUSE_PS2=m
+CONFIG_MOUSE_PS2_ALPS=y
+CONFIG_MOUSE_PS2_ELANTECH=y
+CONFIG_MOUSE_PS2_LIFEBOOK=y
+CONFIG_MOUSE_PS2_LOGIPS2PP=y
+CONFIG_MOUSE_PS2_SYNAPTICS=y
+# CONFIG_MOUSE_PS2_TOUCHKIT is not set
+CONFIG_MOUSE_PS2_TRACKPOINT=y
+CONFIG_MOUSE_SERIAL=m
+CONFIG_MOUSE_VSXXXAA=m
+CONFIG_MOXA_INTELLIO=m
+CONFIG_MOXA_SMARTIO=m
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MPENTIUMII is not set
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MPSC is not set
+CONFIG_MSDOS_FS=m
+CONFIG_MSDOS_PARTITION=y
+CONFIG_MSI_LAPTOP=m
+CONFIG_MT9M001_PCA9536_SWITCH=y
+CONFIG_MT9V022_PCA9536_SWITCH=y
+CONFIG_MTD=m
+CONFIG_MTDRAM_ERASE_SIZE=128
+CONFIG_MTDRAM_TOTAL_SIZE=4096
+CONFIG_MTD_ABSENT=m
+CONFIG_MTD_ALAUDA=m
+CONFIG_MTD_AMD76XROM=m
+CONFIG_MTD_AR7_PARTS=m
+CONFIG_MTD_BLKDEVS=m
+CONFIG_MTD_BLOCK=m
+CONFIG_MTD_BLOCK2MTD=m
+CONFIG_MTD_BLOCK_RO=m
+CONFIG_MTD_CFI=m
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_CFI_AMDSTD=m
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_CFI_INTELEXT=m
+CONFIG_MTD_CFI_STAA=m
+CONFIG_MTD_CFI_UTIL=m
+CONFIG_MTD_CHAR=m
+CONFIG_MTD_CK804XROM=m
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_CONCAT=m
+CONFIG_MTD_DATAFLASH=m
+CONFIG_MTD_DATAFLASH_OTP=y
+# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_DILNETPC=m
+CONFIG_MTD_DILNETPC_BOOTSIZE=0x80000
+CONFIG_MTD_DOC2000=m
+CONFIG_MTD_DOC2001=m
+CONFIG_MTD_DOC2001PLUS=m
+CONFIG_MTD_DOCECC=m
+CONFIG_MTD_DOCPROBE=m
+CONFIG_MTD_DOCPROBE_ADDRESS=0
+# CONFIG_MTD_DOCPROBE_ADVANCED is not set
+CONFIG_MTD_ESB2ROM=m
+CONFIG_MTD_GEN_PROBE=m
+CONFIG_MTD_ICHXROM=m
+CONFIG_MTD_INTEL_VR_NOR=m
+CONFIG_MTD_JEDECPROBE=m
+CONFIG_MTD_L440GX=m
+CONFIG_MTD_LPDDR=m
+CONFIG_MTD_M25P80=m
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+CONFIG_MTD_MTDRAM=m
+CONFIG_MTD_NAND=m
+CONFIG_MTD_NAND_CAFE=m
+CONFIG_MTD_NAND_DISKONCHIP=m
+# CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE is not set
+CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
+# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
+# CONFIG_MTD_NAND_ECC_SMC is not set
+CONFIG_MTD_NAND_IDS=m
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+CONFIG_MTD_NAND_NANDSIM=m
+CONFIG_MTD_NAND_PLATFORM=m
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+CONFIG_MTD_NETSC520=m
+CONFIG_MTD_NETtel=m
+CONFIG_MTD_ONENAND=m
+CONFIG_MTD_ONENAND_2X_PROGRAM=y
+# CONFIG_MTD_ONENAND_OTP is not set
+CONFIG_MTD_ONENAND_SIM=m
+CONFIG_MTD_ONENAND_VERIFY_WRITE=y
+CONFIG_MTD_OOPS=m
+CONFIG_MTD_PARTITIONS=y
+CONFIG_MTD_PCI=m
+CONFIG_MTD_PHRAM=m
+CONFIG_MTD_PHYSMAP=m
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
+CONFIG_MTD_PLATRAM=m
+CONFIG_MTD_PMC551=m
+# CONFIG_MTD_PMC551_BUGFIX is not set
+# CONFIG_MTD_PMC551_DEBUG is not set
+CONFIG_MTD_QINFO_PROBE=m
+CONFIG_MTD_RAM=m
+CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
+CONFIG_MTD_REDBOOT_PARTS=m
+# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
+# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
+CONFIG_MTD_ROM=m
+CONFIG_MTD_SBC_GXX=m
+CONFIG_MTD_SC520CDP=m
+CONFIG_MTD_SCB2_FLASH=m
+CONFIG_MTD_SLRAM=m
+CONFIG_MTD_TESTS=m
+CONFIG_MTD_TS5500=m
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_BEB_RESERVE=1
+# CONFIG_MTD_UBI_DEBUG is not set
+CONFIG_MTD_UBI_GLUEBI=y
+CONFIG_MTD_UBI_WL_THRESHOLD=4096
+CONFIG_MTRR=y
+CONFIG_MTRR_SANITIZER=y
+CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0
+CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1
+# CONFIG_MVIAC3_2 is not set
+# CONFIG_MVIAC7 is not set
+CONFIG_MWAVE=m
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MWINCHIPC6 is not set
+CONFIG_MYRI10GE=m
+CONFIG_MYRI10GE_DCA=y
+CONFIG_NAMESPACES=y
+CONFIG_NATIONAL_PHY=m
+CONFIG_NATSEMI=m
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_PACKET_SIGNING=y
+# CONFIG_NCPFS_SMALLDOS is not set
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCP_FS=m
+CONFIG_NE2K_PCI=m
+CONFIG_NEED_MULTIPLE_NODES=y
+CONFIG_NET=y
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETDEVICES=y
+CONFIG_NETDEV_1000=y
+CONFIG_NETDEV_10000=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_NETLINK_LOG=m
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_TPROXY=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+# CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT is not set
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_RATEEST=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETLABEL=y
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NETROM=m
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NETWORK_SECMARK=y
+CONFIG_NET_ACT_GACT=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_CGROUP=y
+# CONFIG_NET_CLS_FLOW is not set
+CONFIG_NET_CLS_FW=m
+# CONFIG_NET_CLS_IND is not set
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_DCCPPROBE=m
+CONFIG_NET_DMA=y
+CONFIG_NET_DSA=y
+CONFIG_NET_DSA_MV88E6060=y
+CONFIG_NET_DSA_MV88E6123_61_65=y
+CONFIG_NET_DSA_MV88E6131=y
+CONFIG_NET_DSA_MV88E6XXX=y
+CONFIG_NET_DSA_MV88E6XXX_NEED_PPU=y
+CONFIG_NET_DSA_TAG_DSA=y
+CONFIG_NET_DSA_TAG_EDSA=y
+CONFIG_NET_DSA_TAG_TRAILER=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_ETHERNET=y
+CONFIG_NET_FC=y
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_KEY=m
+# CONFIG_NET_KEY_MIGRATE is not set
+CONFIG_NET_NS=y
+CONFIG_NET_PCI=y
+CONFIG_NET_PCMCIA=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_POCKET=y
+CONFIG_NET_POLL_CONTROLLER=y
+CONFIG_NET_SB1000=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_FIFO=y
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_NET_TULIP=y
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_NEW_LEDS=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V2_ACL=y
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFTL=m
+CONFIG_NFTL_RW=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_PROC_COMPAT=y
+# CONFIG_NF_CONNTRACK_SANE is not set
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_ACCT=y
+CONFIG_NF_CT_NETLINK=m
+# CONFIG_NF_CT_PROTO_DCCP is not set
+CONFIG_NF_CT_PROTO_GRE=m
+CONFIG_NF_CT_PROTO_SCTP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_DEFRAG_IPV4=m
+CONFIG_NF_NAT=m
+CONFIG_NF_NAT_AMANDA=m
+CONFIG_NF_NAT_FTP=m
+CONFIG_NF_NAT_H323=m
+CONFIG_NF_NAT_IRC=m
+CONFIG_NF_NAT_NEEDED=y
+CONFIG_NF_NAT_PPTP=m
+CONFIG_NF_NAT_PROTO_GRE=m
+CONFIG_NF_NAT_PROTO_SCTP=m
+CONFIG_NF_NAT_PROTO_UDPLITE=m
+CONFIG_NF_NAT_SIP=m
+CONFIG_NF_NAT_SNMP_BASIC=m
+CONFIG_NF_NAT_TFTP=m
+CONFIG_NIU=m
+CONFIG_NL80211=y
+CONFIG_NLS=y
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_DEFAULT="cp437"
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+CONFIG_NODES_SHIFT=6
+CONFIG_NODES_SPAN_OTHER_NODES=y
+CONFIG_NOP_TRACER=y
+CONFIG_NORTEL_HERMES=m
+CONFIG_NOZOMI=m
+CONFIG_NO_HZ=y
+CONFIG_NR_CPUS=64
+CONFIG_NS83820=m
+CONFIG_NSC_FIR=m
+CONFIG_NSC_GPIO=m
+# CONFIG_NTFS_DEBUG is not set
+CONFIG_NTFS_FS=m
+# CONFIG_NTFS_RW is not set
+CONFIG_NUMA=y
+# CONFIG_NUMA_EMU is not set
+# CONFIG_NUMA_MIGRATE_IRQ_DESC is not set
+CONFIG_NVRAM=m
+CONFIG_N_HDLC=m
+# CONFIG_OCFS2_DEBUG_FS is not set
+CONFIG_OCFS2_DEBUG_MASKLOG=y
+CONFIG_OCFS2_FS=m
+CONFIG_OCFS2_FS_O2CB=m
+CONFIG_OCFS2_FS_POSIX_ACL=y
+CONFIG_OCFS2_FS_STATS=y
+CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
+CONFIG_OLD_BELKIN_DONGLE=m
+CONFIG_OMFS_FS=m
+CONFIG_OPROFILE=m
+CONFIG_OPROFILE_IBS=y
+CONFIG_OPTIMIZE_INLINING=y
+CONFIG_OSF_PARTITION=y
+# CONFIG_OTUS is not set
+CONFIG_P54_COMMON=m
+CONFIG_P54_PCI=m
+CONFIG_P54_USB=m
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_PANASONIC_LAPTOP=m
+CONFIG_PANEL=m
+# CONFIG_PANEL_CHANGE_MESSAGE is not set
+CONFIG_PANEL_PARPORT=0
+CONFIG_PANEL_PROFILE=5
+CONFIG_PANTHERLORD_FF=y
+CONFIG_PARAVIRT=y
+CONFIG_PARAVIRT_CLOCK=y
+# CONFIG_PARAVIRT_DEBUG is not set
+CONFIG_PARAVIRT_GUEST=y
+CONFIG_PARIDE=m
+CONFIG_PARIDE_ATEN=m
+CONFIG_PARIDE_BPCK=m
+CONFIG_PARIDE_COMM=m
+CONFIG_PARIDE_DSTR=m
+CONFIG_PARIDE_EPAT=m
+# CONFIG_PARIDE_EPATC8 is not set
+CONFIG_PARIDE_EPIA=m
+CONFIG_PARIDE_FIT2=m
+CONFIG_PARIDE_FIT3=m
+CONFIG_PARIDE_FRIQ=m
+CONFIG_PARIDE_FRPW=m
+CONFIG_PARIDE_KBIC=m
+CONFIG_PARIDE_KTTI=m
+CONFIG_PARIDE_ON20=m
+CONFIG_PARIDE_ON26=m
+CONFIG_PARIDE_PCD=m
+CONFIG_PARIDE_PD=m
+CONFIG_PARIDE_PF=m
+CONFIG_PARIDE_PG=m
+CONFIG_PARIDE_PT=m
+CONFIG_PARPORT=m
+CONFIG_PARPORT_1284=y
+CONFIG_PARPORT_AX88796=m
+# CONFIG_PARPORT_GSC is not set
+CONFIG_PARPORT_NOT_PC=y
+CONFIG_PARPORT_PC=m
+CONFIG_PARPORT_PC_FIFO=y
+CONFIG_PARPORT_PC_PCMCIA=m
+# CONFIG_PARPORT_PC_SUPERIO is not set
+CONFIG_PARPORT_SERIAL=m
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PATA_ACPI=y
+CONFIG_PATA_ALI=y
+CONFIG_PATA_AMD=y
+CONFIG_PATA_ARTOP=y
+CONFIG_PATA_ATIIXP=y
+CONFIG_PATA_CMD640_PCI=m
+CONFIG_PATA_CMD64X=y
+CONFIG_PATA_CS5520=y
+CONFIG_PATA_CS5530=y
+CONFIG_PATA_CYPRESS=m
+CONFIG_PATA_EFAR=y
+CONFIG_PATA_HPT366=y
+CONFIG_PATA_HPT37X=y
+CONFIG_PATA_HPT3X2N=m
+CONFIG_PATA_HPT3X3=y
+# CONFIG_PATA_HPT3X3_DMA is not set
+CONFIG_PATA_IT8213=m
+CONFIG_PATA_IT821X=y
+CONFIG_PATA_JMICRON=y
+CONFIG_PATA_MARVELL=y
+CONFIG_PATA_MPIIX=y
+CONFIG_PATA_NETCELL=y
+CONFIG_PATA_NINJA32=m
+CONFIG_PATA_NS87410=y
+CONFIG_PATA_NS87415=y
+CONFIG_PATA_OLDPIIX=y
+CONFIG_PATA_OPTI=m
+CONFIG_PATA_OPTIDMA=m
+CONFIG_PATA_PCMCIA=m
+CONFIG_PATA_PDC2027X=y
+CONFIG_PATA_PDC_OLD=y
+CONFIG_PATA_RADISYS=m
+CONFIG_PATA_RZ1000=y
+CONFIG_PATA_SC1200=y
+CONFIG_PATA_SCH=y
+CONFIG_PATA_SERVERWORKS=y
+CONFIG_PATA_SIL680=y
+CONFIG_PATA_SIS=y
+CONFIG_PATA_TRIFLEX=y
+CONFIG_PATA_VIA=y
+CONFIG_PATA_WINBOND=y
+# CONFIG_PC300TOO is not set
+CONFIG_PC8736x_GPIO=m
+CONFIG_PC87413_WDT=m
+CONFIG_PCCARD=m
+CONFIG_PCCARD_NONSTATIC=m
+CONFIG_PCF50633_ADC=m
+CONFIG_PCF50633_GPIO=m
+CONFIG_PCI=y
+CONFIG_PCI200SYN=m
+CONFIG_PCIEAER=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCIPCWATCHDOG=m
+CONFIG_PCI_ATMEL=m
+# CONFIG_PCI_DEBUG is not set
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_DOMAINS=y
+CONFIG_PCI_HERMES=m
+CONFIG_PCI_LEGACY=y
+CONFIG_PCI_MMCONFIG=y
+CONFIG_PCI_MSI=y
+CONFIG_PCI_QUIRKS=y
+CONFIG_PCI_STUB=m
+CONFIG_PCMCIA=m
+CONFIG_PCMCIA_3C574=m
+CONFIG_PCMCIA_3C589=m
+CONFIG_PCMCIA_ATMEL=m
+CONFIG_PCMCIA_AXNET=m
+# CONFIG_PCMCIA_DEBUG is not set
+CONFIG_PCMCIA_FDOMAIN=m
+CONFIG_PCMCIA_FMVJ18X=m
+CONFIG_PCMCIA_HERMES=m
+CONFIG_PCMCIA_IBMTR=m
+CONFIG_PCMCIA_IOCTL=y
+CONFIG_PCMCIA_LOAD_CIS=y
+CONFIG_PCMCIA_NETWAVE=m
+CONFIG_PCMCIA_NMCLAN=m
+CONFIG_PCMCIA_PCNET=m
+CONFIG_PCMCIA_QLOGIC=m
+CONFIG_PCMCIA_RAYCS=m
+CONFIG_PCMCIA_SMC91C92=m
+CONFIG_PCMCIA_SPECTRUM=m
+CONFIG_PCMCIA_SYM53C500=m
+CONFIG_PCMCIA_WAVELAN=m
+CONFIG_PCMCIA_WL3501=m
+CONFIG_PCMCIA_XIRC2PS=m
+CONFIG_PCMCIA_XIRCOM=m
+CONFIG_PCNET32=m
+CONFIG_PCSPKR_PLATFORM=y
+CONFIG_PD6729=m
+CONFIG_PDA_POWER=m
+CONFIG_PDC_ADMA=y
+CONFIG_PERF_COUNTERS=y
+CONFIG_PHANTOM=m
+CONFIG_PHONE=m
+CONFIG_PHONET=m
+CONFIG_PHYLIB=y
+CONFIG_PHYSICAL_ALIGN=0x200000
+CONFIG_PHYSICAL_START=0x200000
+CONFIG_PHYS_ADDR_T_64BIT=y
+CONFIG_PID_NS=y
+CONFIG_PLIP=m
+CONFIG_PLX_HERMES=m
+CONFIG_PM=y
+CONFIG_PMIC_DA903X=y
+CONFIG_PM_DEBUG=y
+CONFIG_PM_SLEEP=y
+CONFIG_PM_SLEEP_SMP=y
+CONFIG_PM_STD_PARTITION=""
+CONFIG_PM_TEST_SUSPEND=y
+CONFIG_PM_TRACE=y
+CONFIG_PM_TRACE_RTC=y
+# CONFIG_PM_VERBOSE is not set
+CONFIG_PNP=y
+CONFIG_PNPACPI=y
+CONFIG_PNP_DEBUG_MESSAGES=y
+CONFIG_POCH=m
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POWER_SUPPLY=y
+# CONFIG_POWER_SUPPLY_DEBUG is not set
+# CONFIG_POWER_TRACER is not set
+CONFIG_PPDEV=m
+CONFIG_PPP=y
+CONFIG_PPPOATM=m
+CONFIG_PPPOE=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PREEMPT=y
+# CONFIG_PREEMPT_DESKTOP is not set
+CONFIG_PREEMPT_HARDIRQS=y
+# CONFIG_PREEMPT_NONE is not set
+CONFIG_PREEMPT_NOTIFIERS=y
+CONFIG_PREEMPT_RCU=y
+# CONFIG_PREEMPT_RCU_TRACE is not set
+CONFIG_PREEMPT_RT=y
+CONFIG_PREEMPT_SOFTIRQS=y
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_PRINTER=m
+CONFIG_PRINTK=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PRINT_QUOTA_WARNING=y
+CONFIG_PRISM2_USB=m
+CONFIG_PRISM54=m
+CONFIG_PROC_EVENTS=y
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_PROC_PID_CPUSET=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_VMCORE=y
+CONFIG_PROFILE_NMI=y
+CONFIG_PROFILING=y
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
+# CONFIG_PSS_HAVE_BOOT is not set
+CONFIG_PSS_MIXER=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_QLA3XXX=m
+CONFIG_QLGE=m
+CONFIG_QNX4FS_FS=m
+CONFIG_QSEMI_PHY=m
+CONFIG_QUOTA=y
+CONFIG_QUOTACTL=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QUOTA_TREE=m
+CONFIG_R3964=m
+# CONFIG_R6040 is not set
+CONFIG_R8169=m
+CONFIG_R8169_VLAN=y
+CONFIG_RADIO_ADAPTERS=y
+CONFIG_RADIO_GEMTEK_PCI=m
+CONFIG_RADIO_MAESTRO=m
+CONFIG_RADIO_MAXIRADIO=m
+CONFIG_RADIO_TEA5764=m
+CONFIG_RAID_ATTRS=m
+CONFIG_RAW_DRIVER=m
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RD_BZIP2=y
+CONFIG_RD_GZIP=y
+CONFIG_RD_LZMA=y
+# CONFIG_REALTEK_PHY is not set
+CONFIG_REED_SOLOMON=m
+CONFIG_REED_SOLOMON_DEC16=y
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_BQ24022=m
+CONFIG_REGULATOR_DA903X=m
+# CONFIG_REGULATOR_DEBUG is not set
+# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
+CONFIG_REGULATOR_PCF50633=m
+CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
+CONFIG_REGULATOR_WM8400=m
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_FS=m
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_REISERFS_FS_XATTR=y
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_RELAY=y
+CONFIG_RELOCATABLE=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_RFD_FTL=m
+CONFIG_RFKILL=y
+CONFIG_RFKILL_INPUT=y
+CONFIG_RFKILL_LEDS=y
+CONFIG_RING_BUFFER=y
+CONFIG_RIO=m
+# CONFIG_RIO_OLDPCI is not set
+CONFIG_RISCOM8=m
+CONFIG_ROADRUNNER=m
+# CONFIG_ROADRUNNER_LARGE_RINGS is not set
+CONFIG_ROCKETPORT=m
+CONFIG_ROMFS_FS=m
+CONFIG_ROSE=m
+CONFIG_RPCSEC_GSS_KRB5=m
+CONFIG_RPCSEC_GSS_SPKM3=m
+CONFIG_RT2400PCI=m
+CONFIG_RT2500PCI=m
+CONFIG_RT2500USB=m
+CONFIG_RT2860=m
+# CONFIG_RT2870 is not set
+CONFIG_RT2X00=m
+# CONFIG_RT2X00_DEBUG is not set
+CONFIG_RT2X00_LIB=m
+CONFIG_RT2X00_LIB_CRYPTO=y
+# CONFIG_RT2X00_LIB_DEBUGFS is not set
+CONFIG_RT2X00_LIB_FIRMWARE=y
+CONFIG_RT2X00_LIB_LEDS=y
+CONFIG_RT2X00_LIB_PCI=m
+CONFIG_RT2X00_LIB_RFKILL=y
+CONFIG_RT2X00_LIB_USB=m
+CONFIG_RT61PCI=m
+CONFIG_RT73USB=m
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_DEBUG is not set
+CONFIG_RTC_DRV_BQ4802=m
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_RTC_DRV_DS1286=m
+CONFIG_RTC_DRV_DS1305=m
+CONFIG_RTC_DRV_DS1307=m
+CONFIG_RTC_DRV_DS1374=m
+CONFIG_RTC_DRV_DS1390=m
+# CONFIG_RTC_DRV_DS1511 is not set
+CONFIG_RTC_DRV_DS1553=m
+CONFIG_RTC_DRV_DS1672=m
+CONFIG_RTC_DRV_DS1742=m
+CONFIG_RTC_DRV_DS3234=m
+CONFIG_RTC_DRV_FM3130=m
+CONFIG_RTC_DRV_ISL1208=m
+CONFIG_RTC_DRV_M41T80=m
+CONFIG_RTC_DRV_M41T80_WDT=y
+CONFIG_RTC_DRV_M41T94=m
+CONFIG_RTC_DRV_M48T35=m
+CONFIG_RTC_DRV_M48T59=m
+CONFIG_RTC_DRV_M48T86=m
+CONFIG_RTC_DRV_MAX6900=m
+CONFIG_RTC_DRV_MAX6902=m
+CONFIG_RTC_DRV_PCF50633=m
+CONFIG_RTC_DRV_PCF8563=m
+CONFIG_RTC_DRV_PCF8583=m
+# CONFIG_RTC_DRV_R9701 is not set
+CONFIG_RTC_DRV_RS5C348=m
+CONFIG_RTC_DRV_RS5C372=m
+CONFIG_RTC_DRV_RX8581=m
+# CONFIG_RTC_DRV_S35390A is not set
+CONFIG_RTC_DRV_STK17TA8=m
+CONFIG_RTC_DRV_TEST=m
+CONFIG_RTC_DRV_TWL4030=m
+CONFIG_RTC_DRV_V3020=m
+CONFIG_RTC_DRV_X1205=m
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+CONFIG_RTC_INTF_DEV=y
+CONFIG_RTC_INTF_DEV_UIE_EMUL=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_LIB=y
+CONFIG_RTL8180=m
+CONFIG_RTL8187=m
+CONFIG_RTL8187SE=m
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_RT_MUTEXES=y
+# CONFIG_RT_MUTEX_TESTER is not set
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_RXKAD=m
+CONFIG_S2IO=m
+# CONFIG_SAMPLES is not set
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_INIC162X=y
+CONFIG_SATA_MV=m
+CONFIG_SATA_NV=y
+CONFIG_SATA_PMP=y
+CONFIG_SATA_PROMISE=y
+CONFIG_SATA_QSTOR=y
+CONFIG_SATA_SIL=y
+CONFIG_SATA_SIL24=y
+CONFIG_SATA_SIS=y
+CONFIG_SATA_SVW=y
+CONFIG_SATA_SX4=y
+CONFIG_SATA_ULI=y
+CONFIG_SATA_VIA=y
+CONFIG_SATA_VITESSE=y
+CONFIG_SBC8360_WDT=m
+CONFIG_SBC_EPX_C3_WATCHDOG=m
+CONFIG_SBNI=m
+# CONFIG_SBNI_MULTILINE is not set
+CONFIG_SC1200_WDT=m
+CONFIG_SC520_WDT=m
+CONFIG_SC6600=y
+CONFIG_SC6600_CDROM=4
+CONFIG_SC6600_CDROMBASE=0
+CONFIG_SC6600_JOY=y
+CONFIG_SC92031=m
+CONFIG_SCHEDSTATS=y
+CONFIG_SCHED_DEBUG=y
+CONFIG_SCHED_HRTICK=y
+CONFIG_SCHED_MC=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
+CONFIG_SCHED_SMT=y
+# CONFIG_SCHED_TRACER is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_3W_9XXX=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_ACARD=m
+CONFIG_SCSI_ADVANSYS=m
+CONFIG_SCSI_AIC79XX=m
+CONFIG_SCSI_AIC7XXX=m
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+CONFIG_SCSI_AIC94XX=m
+CONFIG_SCSI_ARCMSR=m
+CONFIG_SCSI_ARCMSR_AER=y
+CONFIG_SCSI_BUSLOGIC=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_DC390T=m
+CONFIG_SCSI_DC395x=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DMA=y
+CONFIG_SCSI_DMX3191D=m
+CONFIG_SCSI_DPT_I2O=m
+CONFIG_SCSI_EATA=m
+CONFIG_SCSI_EATA_LINKED_COMMANDS=y
+CONFIG_SCSI_EATA_MAX_TAGS=16
+CONFIG_SCSI_EATA_TAGGED_QUEUE=y
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_FC_ATTRS=m
+CONFIG_SCSI_FC_TGT_ATTRS=y
+CONFIG_SCSI_FUTURE_DOMAIN=m
+CONFIG_SCSI_GDTH=m
+CONFIG_SCSI_HPTIOP=m
+CONFIG_SCSI_IMM=m
+CONFIG_SCSI_INIA100=m
+CONFIG_SCSI_INITIO=m
+CONFIG_SCSI_IPR=m
+# CONFIG_SCSI_IPR_DUMP is not set
+# CONFIG_SCSI_IPR_TRACE is not set
+CONFIG_SCSI_IPS=m
+CONFIG_SCSI_ISCSI_ATTRS=m
+# CONFIG_SCSI_IZIP_EPP16 is not set
+# CONFIG_SCSI_IZIP_SLOW_CTR is not set
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_LOWLEVEL=y
+CONFIG_SCSI_LOWLEVEL_PCMCIA=y
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_LPFC_DEBUG_FS=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_MVSAS=m
+CONFIG_SCSI_NETLINK=y
+CONFIG_SCSI_PPA=m
+CONFIG_SCSI_PROC_FS=y
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_QLOGIC_1280=m
+CONFIG_SCSI_SAS_ATA=y
+CONFIG_SCSI_SAS_ATTRS=m
+CONFIG_SCSI_SAS_HOST_SMP=y
+CONFIG_SCSI_SAS_LIBSAS=m
+# CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_SRP=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_SCSI_STEX=m
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+CONFIG_SCSI_SYM53C8XX_MMIO=y
+CONFIG_SCSI_TGT=m
+CONFIG_SCSI_WAIT_SCAN=m
+# CONFIG_SCTP_DBG_MSG is not set
+# CONFIG_SCTP_DBG_OBJCNT is not set
+CONFIG_SCTP_HMAC_MD5=y
+# CONFIG_SCTP_HMAC_NONE is not set
+# CONFIG_SCTP_HMAC_SHA1 is not set
+CONFIG_SDIO_UART=m
+CONFIG_SECCOMP=y
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536
+CONFIG_SECURITY_FILE_CAPABILITIES=y
+CONFIG_SECURITY_NETWORK=y
+# CONFIG_SECURITY_NETWORK_XFRM is not set
+CONFIG_SECURITY_PATH=y
+# CONFIG_SECURITY_ROOTPLUG is not set
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_AVC_STATS=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+CONFIG_SECURITY_SELINUX_DEVELOP=y
+CONFIG_SECURITY_SELINUX_DISABLE=y
+# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
+CONFIG_SECURITY_SMACK=y
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_SENSORS_ABITUGURU=m
+CONFIG_SENSORS_ABITUGURU3=m
+CONFIG_SENSORS_AD7414=m
+CONFIG_SENSORS_AD7418=m
+CONFIG_SENSORS_ADCXX=m
+CONFIG_SENSORS_ADM1021=m
+CONFIG_SENSORS_ADM1025=m
+CONFIG_SENSORS_ADM1026=m
+CONFIG_SENSORS_ADM1029=m
+CONFIG_SENSORS_ADM1031=m
+CONFIG_SENSORS_ADM9240=m
+CONFIG_SENSORS_ADS7828=m
+CONFIG_SENSORS_ADT7462=m
+CONFIG_SENSORS_ADT7470=m
+CONFIG_SENSORS_ADT7473=m
+CONFIG_SENSORS_ADT7475=m
+CONFIG_SENSORS_APPLESMC=m
+CONFIG_SENSORS_ASB100=m
+CONFIG_SENSORS_ATXP1=m
+CONFIG_SENSORS_CORETEMP=m
+CONFIG_SENSORS_DME1737=m
+CONFIG_SENSORS_DS1621=m
+CONFIG_SENSORS_F71805F=m
+CONFIG_SENSORS_F71882FG=m
+CONFIG_SENSORS_F75375S=m
+CONFIG_SENSORS_FSCHER=m
+CONFIG_SENSORS_FSCHMD=m
+CONFIG_SENSORS_FSCPOS=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_GL520SM=m
+CONFIG_SENSORS_HDAPS=m
+CONFIG_SENSORS_I5K_AMB=m
+CONFIG_SENSORS_IBMAEM=m
+CONFIG_SENSORS_IBMPEX=m
+CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_K8TEMP=m
+CONFIG_SENSORS_LIS3LV02D=m
+CONFIG_SENSORS_LM63=m
+CONFIG_SENSORS_LM70=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM77=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM87=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_LM92=m
+CONFIG_SENSORS_LM93=m
+CONFIG_SENSORS_LTC4245=m
+CONFIG_SENSORS_MAX1111=m
+CONFIG_SENSORS_MAX1619=m
+CONFIG_SENSORS_MAX6650=m
+CONFIG_SENSORS_MAX6875=m
+CONFIG_SENSORS_PC87360=m
+CONFIG_SENSORS_PC87427=m
+CONFIG_SENSORS_PCF8591=m
+CONFIG_SENSORS_SIS5595=m
+CONFIG_SENSORS_SMSC47B397=m
+CONFIG_SENSORS_SMSC47M1=m
+CONFIG_SENSORS_SMSC47M192=m
+CONFIG_SENSORS_THMC50=m
+CONFIG_SENSORS_TSL2550=m
+CONFIG_SENSORS_VIA686A=m
+CONFIG_SENSORS_VT1211=m
+CONFIG_SENSORS_VT8231=m
+CONFIG_SENSORS_W83627EHF=m
+CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83791D=m
+CONFIG_SENSORS_W83792D=m
+CONFIG_SENSORS_W83793=m
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83L786NG=m
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_CS=m
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_NR_UARTS=48
+CONFIG_SERIAL_8250_PCI=y
+CONFIG_SERIAL_8250_PNP=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_SERIAL_JSM=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SERIO=y
+CONFIG_SERIO_CT82C710=m
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIO_PARKBD=m
+CONFIG_SERIO_PCIPS2=m
+CONFIG_SERIO_RAW=m
+CONFIG_SERIO_SERPORT=m
+CONFIG_SFC=m
+CONFIG_SFC_MTD=y
+CONFIG_SGI_GRU=m
+# CONFIG_SGI_GRU_DEBUG is not set
+CONFIG_SGI_IOC4=m
+CONFIG_SGI_PARTITION=y
+CONFIG_SGI_XP=m
+CONFIG_SHMEM=y
+CONFIG_SIGMATEL_FIR=m
+CONFIG_SIGNALFD=y
+CONFIG_SIS190=m
+CONFIG_SIS900=m
+CONFIG_SKFP=m
+CONFIG_SKGE=m
+# CONFIG_SKGE_DEBUG is not set
+CONFIG_SKY2=m
+# CONFIG_SKY2_DEBUG is not set
+CONFIG_SLAB=y
+CONFIG_SLABINFO=y
+CONFIG_SLHC=y
+CONFIG_SLICOSS=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_MODE_SLIP6=y
+CONFIG_SLIP_SMART=y
+# CONFIG_SLOB is not set
+# CONFIG_SLUB is not set
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_SMC_IRCC_FIR=m
+CONFIG_SMP=y
+CONFIG_SMSC37B787_WDT=m
+CONFIG_SMSC9420=m
+CONFIG_SMSC_PHY=m
+CONFIG_SMSC_SCH311X_WDT=m
+CONFIG_SND=m
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_AC97_POWER_SAVE=y
+CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
+CONFIG_SND_AD1889=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_ALS300=m
+CONFIG_SND_ALS4000=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_ATIIXP_MODEM=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AW2=m
+CONFIG_SND_AZT3328=m
+CONFIG_SND_BT87X=m
+# CONFIG_SND_BT87X_OVERCLOCK is not set
+CONFIG_SND_CA0106=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_CS4281=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CS5530=m
+CONFIG_SND_DARLA20=m
+CONFIG_SND_DARLA24=m
+# CONFIG_SND_DEBUG is not set
+CONFIG_SND_DRIVERS=y
+CONFIG_SND_DUMMY=m
+CONFIG_SND_DYNAMIC_MINORS=y
+CONFIG_SND_ECHO3G=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_EMU10K1X=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_ES1938=m
+CONFIG_SND_ES1968=m
+CONFIG_SND_FM801=m
+CONFIG_SND_FM801_TEA575X=m
+CONFIG_SND_FM801_TEA575X_BOOL=y
+CONFIG_SND_GINA20=m
+CONFIG_SND_GINA24=m
+CONFIG_SND_HDA_CODEC_ANALOG=y
+CONFIG_SND_HDA_CODEC_ATIHDMI=y
+CONFIG_SND_HDA_CODEC_CMEDIA=y
+CONFIG_SND_HDA_CODEC_CONEXANT=y
+CONFIG_SND_HDA_CODEC_INTELHDMI=y
+CONFIG_SND_HDA_CODEC_NVHDMI=y
+CONFIG_SND_HDA_CODEC_REALTEK=y
+CONFIG_SND_HDA_CODEC_SI3054=y
+CONFIG_SND_HDA_CODEC_SIGMATEL=y
+CONFIG_SND_HDA_CODEC_VIA=y
+CONFIG_SND_HDA_ELD=y
+CONFIG_SND_HDA_GENERIC=y
+# CONFIG_SND_HDA_HWDEP is not set
+CONFIG_SND_HDA_INPUT_BEEP=y
+CONFIG_SND_HDA_INTEL=m
+CONFIG_SND_HDA_POWER_SAVE=y
+CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
+CONFIG_SND_HDSP=m
+CONFIG_SND_HDSPM=m
+CONFIG_SND_HIFIER=m
+CONFIG_SND_HRTIMER=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_ICE1712=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_INDIGO=m
+CONFIG_SND_INDIGODJ=m
+CONFIG_SND_INDIGOIO=m
+CONFIG_SND_INTEL8X0=m
+CONFIG_SND_INTEL8X0M=m
+CONFIG_SND_JACK=y
+CONFIG_SND_KORG1212=m
+CONFIG_SND_LAYLA20=m
+CONFIG_SND_LAYLA24=m
+CONFIG_SND_MAESTRO3=m
+CONFIG_SND_MIA=m
+CONFIG_SND_MIXART=m
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_MONA=m
+CONFIG_SND_MPU401=m
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_MTS64=m
+CONFIG_SND_NM256=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_OXYGEN=m
+CONFIG_SND_OXYGEN_LIB=m
+CONFIG_SND_PCI=y
+CONFIG_SND_PCM=m
+CONFIG_SND_PCMCIA=y
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_PCM_OSS_PLUGINS=y
+CONFIG_SND_PCSP=m
+CONFIG_SND_PCXHR=m
+CONFIG_SND_PDAUDIOCF=m
+CONFIG_SND_PORTMAN2X4=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_RIPTIDE=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_SB16_DSP=m
+CONFIG_SND_SB_COMMON=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_SOC=m
+CONFIG_SND_SOC_AD73311=m
+CONFIG_SND_SOC_AK4535=m
+CONFIG_SND_SOC_ALL_CODECS=m
+CONFIG_SND_SOC_CS4270=m
+CONFIG_SND_SOC_I2C_AND_SPI=m
+CONFIG_SND_SOC_L3=m
+CONFIG_SND_SOC_PCM3008=m
+CONFIG_SND_SOC_SSM2602=m
+CONFIG_SND_SOC_TLV320AIC23=m
+CONFIG_SND_SOC_TLV320AIC26=m
+CONFIG_SND_SOC_TLV320AIC3X=m
+CONFIG_SND_SOC_TWL4030=m
+CONFIG_SND_SOC_UDA134X=m
+CONFIG_SND_SOC_UDA1380=m
+CONFIG_SND_SOC_WM8510=m
+CONFIG_SND_SOC_WM8580=m
+CONFIG_SND_SOC_WM8728=m
+CONFIG_SND_SOC_WM8731=m
+CONFIG_SND_SOC_WM8750=m
+CONFIG_SND_SOC_WM8753=m
+CONFIG_SND_SOC_WM8900=m
+CONFIG_SND_SOC_WM8903=m
+CONFIG_SND_SOC_WM8971=m
+CONFIG_SND_SOC_WM8990=m
+CONFIG_SND_SONICVIBES=m
+CONFIG_SND_SPI=y
+CONFIG_SND_SUPPORT_OLD_API=y
+CONFIG_SND_TIMER=m
+CONFIG_SND_TRIDENT=m
+CONFIG_SND_USB=y
+CONFIG_SND_USB_AUDIO=m
+CONFIG_SND_USB_CAIAQ=m
+CONFIG_SND_USB_CAIAQ_INPUT=y
+CONFIG_SND_USB_US122L=m
+CONFIG_SND_USB_USX2Y=m
+# CONFIG_SND_VERBOSE_PRINTK is not set
+CONFIG_SND_VERBOSE_PROCFS=y
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VIA82XX_MODEM=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_VIRTUOSO=m
+CONFIG_SND_VMASTER=y
+CONFIG_SND_VX222=m
+CONFIG_SND_VXPOCKET=m
+CONFIG_SND_VX_LIB=m
+CONFIG_SND_YMFPCI=m
+CONFIG_SOC_CAMERA=m
+CONFIG_SOC_CAMERA_MT9M001=m
+CONFIG_SOC_CAMERA_MT9M111=m
+CONFIG_SOC_CAMERA_MT9T031=m
+CONFIG_SOC_CAMERA_MT9V022=m
+CONFIG_SOC_CAMERA_OV772X=m
+CONFIG_SOC_CAMERA_PLATFORM=m
+CONFIG_SOC_CAMERA_TW9910=m
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_SONYPI_COMPAT=y
+CONFIG_SONY_LAPTOP=m
+CONFIG_SOUND=m
+CONFIG_SOUND_AEDSP16=m
+CONFIG_SOUND_DMAP=y
+CONFIG_SOUND_KAHLUA=m
+CONFIG_SOUND_MPU401=m
+CONFIG_SOUND_MSS=m
+CONFIG_SOUND_OSS=m
+CONFIG_SOUND_OSS_CORE=y
+CONFIG_SOUND_PAS=m
+CONFIG_SOUND_PRIME=m
+CONFIG_SOUND_PSS=m
+CONFIG_SOUND_SB=m
+CONFIG_SOUND_SSCAPE=m
+# CONFIG_SOUND_TRACEINIT is not set
+CONFIG_SOUND_TRIX=m
+CONFIG_SOUND_UART6850=m
+CONFIG_SOUND_VMIDI=m
+CONFIG_SOUND_YM3812=m
+CONFIG_SPARSEMEM=y
+CONFIG_SPARSEMEM_EXTREME=y
+CONFIG_SPARSEMEM_MANUAL=y
+CONFIG_SPARSEMEM_VMEMMAP=y
+CONFIG_SPARSEMEM_VMEMMAP_ENABLE=y
+CONFIG_SPARSE_IRQ=y
+CONFIG_SPECIALIX=m
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=m
+CONFIG_SPI_BUTTERFLY=m
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_GPIO=m
+CONFIG_SPI_LM70_LLP=m
+CONFIG_SPI_MASTER=y
+CONFIG_SPI_SPIDEV=m
+CONFIG_SPI_TLE62X0=m
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_SQUASHFS=m
+# CONFIG_SQUASHFS_EMBEDDED is not set
+CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
+CONFIG_SSB=m
+CONFIG_SSB_B43_PCI_BRIDGE=y
+# CONFIG_SSB_DEBUG is not set
+CONFIG_SSB_DRIVER_PCICORE=y
+CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
+CONFIG_SSB_PCIHOST=y
+CONFIG_SSB_PCIHOST_POSSIBLE=y
+# CONFIG_SSB_PCMCIAHOST is not set
+CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
+CONFIG_SSB_POSSIBLE=y
+CONFIG_SSB_SPROM=y
+CONFIG_SSFDC=m
+CONFIG_STACKTRACE=y
+CONFIG_STACKTRACE_SUPPORT=y
+# CONFIG_STACK_TRACER is not set
+CONFIG_STAGING=y
+# CONFIG_STAGING_EXCLUDE_BUILD is not set
+CONFIG_STALDRV=y
+CONFIG_STALLION=m
+# CONFIG_STANDALONE is not set
+CONFIG_STE10XP=m
+CONFIG_STOP_MACHINE=y
+CONFIG_STP=m
+CONFIG_STRICT_DEVMEM=y
+CONFIG_STRIP=m
+CONFIG_SUNDANCE=m
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_SUNGEM=m
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+# CONFIG_SUNRPC_REGISTER_V4 is not set
+CONFIG_SUN_PARTITION=y
+CONFIG_SUSPEND=y
+CONFIG_SUSPEND_FREEZER=y
+CONFIG_SWAP=y
+CONFIG_SWIOTLB=y
+CONFIG_SX=m
+CONFIG_SXG=m
+CONFIG_SYNCLINK=m
+CONFIG_SYNCLINKMP=m
+CONFIG_SYNCLINK_CS=m
+CONFIG_SYNCLINK_GT=m
+CONFIG_SYN_COOKIES=y
+CONFIG_SYSCTL=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_SYSFS=y
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
+# CONFIG_SYSPROF_TRACER is not set
+CONFIG_SYSV68_PARTITION=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_COMPAT=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_SYSV_FS=m
+# CONFIG_SYS_HYPERVISOR is not set
+CONFIG_TABLET_USB_ACECAD=m
+CONFIG_TABLET_USB_AIPTEK=m
+CONFIG_TABLET_USB_GTCO=m
+CONFIG_TABLET_USB_KBTAB=m
+CONFIG_TABLET_USB_WACOM=m
+CONFIG_TASKSTATS=y
+# CONFIG_TASK_DELAY_ACCT is not set
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASK_XACCT=y
+CONFIG_TCG_ATMEL=m
+CONFIG_TCG_INFINEON=m
+CONFIG_TCG_NSC=m
+CONFIG_TCG_TIS=m
+CONFIG_TCG_TPM=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=m
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_VEGAS=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_WESTWOOD=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_TEHUTI=m
+CONFIG_TEKRAM_DONGLE=m
+CONFIG_TELCLOCK=m
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_THERMAL=y
+CONFIG_THERMAL_HWMON=y
+CONFIG_THINKPAD_ACPI=m
+CONFIG_THINKPAD_ACPI_BAY=y
+# CONFIG_THINKPAD_ACPI_DEBUG is not set
+CONFIG_THINKPAD_ACPI_DEBUGFACILITIES=y
+CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
+CONFIG_THINKPAD_ACPI_VIDEO=y
+CONFIG_THRUSTMASTER_FF=m
+CONFIG_TICK_ONESHOT=y
+CONFIG_TIFM_7XX1=m
+CONFIG_TIFM_CORE=m
+CONFIG_TIGON3=m
+CONFIG_TIMERFD=y
+CONFIG_TIMER_STATS=y
+CONFIG_TIPC=m
+# CONFIG_TIPC_ADVANCED is not set
+# CONFIG_TIPC_DEBUG is not set
+CONFIG_TLAN=m
+CONFIG_TMD_HERMES=m
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMS380TR=m
+CONFIG_TMSPCI=m
+# CONFIG_TOIM3232_DONGLE is not set
+CONFIG_TOUCHSCREEN_ADS7846=m
+CONFIG_TOUCHSCREEN_DA9034=m
+CONFIG_TOUCHSCREEN_ELO=m
+CONFIG_TOUCHSCREEN_FUJITSU=m
+CONFIG_TOUCHSCREEN_GUNZE=m
+CONFIG_TOUCHSCREEN_INEXIO=m
+CONFIG_TOUCHSCREEN_MK712=m
+CONFIG_TOUCHSCREEN_MTOUCH=m
+CONFIG_TOUCHSCREEN_PENMOUNT=m
+CONFIG_TOUCHSCREEN_TOUCHIT213=m
+CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
+CONFIG_TOUCHSCREEN_TOUCHWIN=m
+CONFIG_TOUCHSCREEN_TSC2007=m
+CONFIG_TOUCHSCREEN_UCB1400=m
+CONFIG_TOUCHSCREEN_USB_3M=y
+CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
+CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
+CONFIG_TOUCHSCREEN_USB_EGALAX=y
+CONFIG_TOUCHSCREEN_USB_ETURBO=y
+CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
+CONFIG_TOUCHSCREEN_USB_GOTOP=y
+CONFIG_TOUCHSCREEN_USB_GUNZE=y
+CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
+CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
+CONFIG_TOUCHSCREEN_USB_ITM=y
+CONFIG_TOUCHSCREEN_USB_PANJIT=y
+CONFIG_TOUCHSCREEN_WACOM_W8001=m
+CONFIG_TOUCHSCREEN_WM9705=y
+CONFIG_TOUCHSCREEN_WM9712=y
+CONFIG_TOUCHSCREEN_WM9713=y
+CONFIG_TOUCHSCREEN_WM97XX=m
+CONFIG_TPS65010=m
+CONFIG_TR=y
+CONFIG_TRACEPOINTS=y
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
+CONFIG_TRANZPORT=m
+# CONFIG_TREE_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+CONFIG_TTPCI_EEPROM=m
+CONFIG_TULIP=m
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_NAPI is not set
+CONFIG_TUN=m
+CONFIG_TWL4030_CORE=y
+CONFIG_TWL4030_USB=m
+CONFIG_TYPHOON=m
+CONFIG_UBIFS_FS=m
+# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
+# CONFIG_UBIFS_FS_DEBUG is not set
+CONFIG_UBIFS_FS_LZO=y
+CONFIG_UBIFS_FS_XATTR=y
+CONFIG_UBIFS_FS_ZLIB=y
+CONFIG_UCB1400_CORE=m
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_UFS_DEBUG is not set
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+CONFIG_UID16=y
+CONFIG_UIO=m
+CONFIG_UIO_CIF=m
+CONFIG_UIO_PDRV=m
+CONFIG_UIO_PDRV_GENIRQ=m
+CONFIG_UIO_SERCOS3=m
+CONFIG_UIO_SMX=m
+CONFIG_ULI526X=m
+CONFIG_ULTRIX_PARTITION=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_UNIX=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_USB=y
+CONFIG_USBPCWATCHDOG=m
+CONFIG_USB_ACM=m
+CONFIG_USB_ADUTUX=m
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set
+CONFIG_USB_APPLEDISPLAY=m
+CONFIG_USB_ARCH_HAS_EHCI=y
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARMLINUX=y
+CONFIG_USB_ATM=m
+CONFIG_USB_ATMEL=m
+CONFIG_USB_BELKIN=y
+CONFIG_USB_BERRY_CHARGE=m
+CONFIG_USB_C67X00_HCD=m
+CONFIG_USB_CATC=m
+CONFIG_USB_CXACRU=m
+CONFIG_USB_CYPRESS_CY7C63=m
+CONFIG_USB_CYTHERM=m
+# CONFIG_USB_DEBUG is not set
+# CONFIG_USB_DEVICEFS is not set
+# CONFIG_USB_DEVICE_CLASS is not set
+CONFIG_USB_DSBR=m
+# CONFIG_USB_DYNAMIC_MINORS is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_USB_EHCI_TT_NEWSCHED=y
+CONFIG_USB_EMI26=m
+CONFIG_USB_EMI62=m
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_ET61X251=m
+CONFIG_USB_EZUSB=y
+CONFIG_USB_FTDI_ELAN=m
+CONFIG_USB_GPIO_VBUS=m
+CONFIG_USB_GSPCA=m
+CONFIG_USB_GSPCA_CONEX=m
+CONFIG_USB_GSPCA_ETOMS=m
+CONFIG_USB_GSPCA_FINEPIX=m
+CONFIG_USB_GSPCA_MARS=m
+CONFIG_USB_GSPCA_OV519=m
+CONFIG_USB_GSPCA_OV534=m
+CONFIG_USB_GSPCA_PAC207=m
+CONFIG_USB_GSPCA_PAC7311=m
+CONFIG_USB_GSPCA_SONIXB=m
+CONFIG_USB_GSPCA_SONIXJ=m
+CONFIG_USB_GSPCA_SPCA500=m
+CONFIG_USB_GSPCA_SPCA501=m
+CONFIG_USB_GSPCA_SPCA505=m
+CONFIG_USB_GSPCA_SPCA506=m
+CONFIG_USB_GSPCA_SPCA508=m
+CONFIG_USB_GSPCA_SPCA561=m
+CONFIG_USB_GSPCA_STK014=m
+CONFIG_USB_GSPCA_SUNPLUS=m
+CONFIG_USB_GSPCA_T613=m
+CONFIG_USB_GSPCA_TV8532=m
+CONFIG_USB_GSPCA_VC032X=m
+CONFIG_USB_GSPCA_ZC3XX=m
+CONFIG_USB_HID=m
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_HSO=m
+CONFIG_USB_HWA_HCD=m
+CONFIG_USB_IBMCAM=m
+CONFIG_USB_IDMOUSE=m
+CONFIG_USB_IOWARRIOR=m
+CONFIG_USB_IP_COMMON=m
+CONFIG_USB_IP_HOST=m
+CONFIG_USB_IP_VHCI_HCD=m
+CONFIG_USB_IRDA=m
+CONFIG_USB_ISIGHTFW=m
+CONFIG_USB_ISP116X_HCD=m
+CONFIG_USB_ISP1760_HCD=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_KC2190=y
+CONFIG_USB_KONICAWC=m
+CONFIG_USB_LCD=m
+CONFIG_USB_LD=m
+CONFIG_USB_LED=m
+CONFIG_USB_LEGOTOWER=m
+# CONFIG_USB_LIBUSUAL is not set
+CONFIG_USB_M5602=m
+CONFIG_USB_MDC800=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_MON=y
+CONFIG_USB_MR800=m
+CONFIG_USB_NET_AX8817X=m
+CONFIG_USB_NET_CDCETHER=m
+CONFIG_USB_NET_CDC_SUBSET=m
+CONFIG_USB_NET_DM9601=m
+CONFIG_USB_NET_GL620A=m
+CONFIG_USB_NET_MCS7830=m
+CONFIG_USB_NET_NET1080=m
+CONFIG_USB_NET_PLUSB=m
+CONFIG_USB_NET_RNDIS_HOST=m
+CONFIG_USB_NET_RNDIS_WLAN=m
+CONFIG_USB_NET_SMSC95XX=m
+CONFIG_USB_NET_ZAURUS=m
+# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
+# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+# CONFIG_USB_OTG is not set
+CONFIG_USB_OTG_UTILS=y
+# CONFIG_USB_OV511 is not set
+CONFIG_USB_OXU210HP_HCD=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_PHIDGET=m
+CONFIG_USB_PHIDGETKIT=m
+CONFIG_USB_PHIDGETMOTORCONTROL=m
+CONFIG_USB_PHIDGETSERVO=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_PWC=m
+# CONFIG_USB_PWC_DEBUG is not set
+CONFIG_USB_QUICKCAM_MESSENGER=m
+CONFIG_USB_R8A66597_HCD=m
+CONFIG_USB_RIO500=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_S2255=m
+CONFIG_USB_SE401=m
+CONFIG_USB_SERIAL=y
+CONFIG_USB_SERIAL_AIRCABLE=m
+CONFIG_USB_SERIAL_ARK3116=m
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_CH341=m
+CONFIG_USB_SERIAL_CONSOLE=y
+CONFIG_USB_SERIAL_CP2101=y
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_CYPRESS_M8=m
+CONFIG_USB_SERIAL_DEBUG=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_FUNSOFT=m
+CONFIG_USB_SERIAL_GARMIN=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_HP4X=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IPW=m
+# CONFIG_USB_SERIAL_IR is not set
+# CONFIG_USB_SERIAL_IUU is not set
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_MOS7720=m
+CONFIG_USB_SERIAL_MOS7840=m
+CONFIG_USB_SERIAL_MOTOROLA=m
+CONFIG_USB_SERIAL_NAVMAN=m
+CONFIG_USB_SERIAL_OMNINET=m
+CONFIG_USB_SERIAL_OPTICON=m
+CONFIG_USB_SERIAL_OPTION=m
+CONFIG_USB_SERIAL_OTI6858=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_SAFE=m
+# CONFIG_USB_SERIAL_SAFE_PADDED is not set
+CONFIG_USB_SERIAL_SIEMENS_MPI=m
+CONFIG_USB_SERIAL_SIERRAWIRELESS=m
+CONFIG_USB_SERIAL_SPCP8X5=m
+CONFIG_USB_SERIAL_TI=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_WHITEHEAT=m
+CONFIG_USB_SERIAL_XIRCOM=m
+CONFIG_USB_SEVSEG=m
+CONFIG_USB_SI470X=m
+CONFIG_USB_SISUSBVGA=m
+# CONFIG_USB_SISUSBVGA_CON is not set
+CONFIG_USB_SL811_CS=m
+CONFIG_USB_SL811_HCD=m
+CONFIG_USB_SN9C102=m
+CONFIG_USB_SPEEDTOUCH=m
+CONFIG_USB_STKWEBCAM=m
+CONFIG_USB_STORAGE=m
+CONFIG_USB_STORAGE_ALAUDA=y
+# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+CONFIG_USB_STORAGE_KARMA=y
+# CONFIG_USB_STORAGE_ONETOUCH is not set
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_USBAT=y
+CONFIG_USB_STV06XX=m
+CONFIG_USB_STV680=m
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_SUSPEND=y
+CONFIG_USB_TMC=m
+CONFIG_USB_TRANCEVIBRATOR=m
+CONFIG_USB_U132_HCD=m
+CONFIG_USB_UEAGLEATM=m
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_USBNET=m
+CONFIG_USB_USS720=m
+CONFIG_USB_VICAM=m
+CONFIG_USB_VIDEO_CLASS=m
+CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
+CONFIG_USB_VST=m
+CONFIG_USB_W9968CF=m
+CONFIG_USB_WDM=m
+CONFIG_USB_WUSB=m
+CONFIG_USB_WUSB_CBAF=m
+# CONFIG_USB_WUSB_CBAF_DEBUG is not set
+CONFIG_USB_XUSBATM=m
+CONFIG_USB_ZC0301=m
+CONFIG_USB_ZD1201=m
+CONFIG_USB_ZR364XX=m
+# CONFIG_USER_NS is not set
+# CONFIG_USER_SCHED is not set
+CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
+CONFIG_UTS_NS=y
+CONFIG_UWB=m
+CONFIG_UWB_HWA=m
+CONFIG_UWB_I1480U=m
+CONFIG_UWB_I1480U_WLP=m
+CONFIG_UWB_WHCI=m
+CONFIG_UWB_WLP=m
+CONFIG_V4L_USB_DRIVERS=y
+CONFIG_VETH=m
+CONFIG_VFAT_FS=m
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
+CONFIG_VGASTATE=m
+CONFIG_VGA_CONSOLE=y
+CONFIG_VIA_FIR=m
+CONFIG_VIA_RHINE=m
+CONFIG_VIA_RHINE_MMIO=y
+CONFIG_VIA_VELOCITY=m
+CONFIG_VIDEOBUF_DMA_SG=m
+CONFIG_VIDEOBUF_DVB=m
+CONFIG_VIDEOBUF_GEN=m
+CONFIG_VIDEOBUF_VMALLOC=m
+CONFIG_VIDEO_ADV7170=m
+CONFIG_VIDEO_ADV7175=m
+# CONFIG_VIDEO_ADV_DEBUG is not set
+CONFIG_VIDEO_ALLOW_V4L1=y
+CONFIG_VIDEO_AU0828=m
+CONFIG_VIDEO_BT819=m
+CONFIG_VIDEO_BT848=m
+CONFIG_VIDEO_BT848_DVB=y
+CONFIG_VIDEO_BT856=m
+CONFIG_VIDEO_BT866=m
+CONFIG_VIDEO_BTCX=m
+CONFIG_VIDEO_BWQCAM=m
+CONFIG_VIDEO_CAFE_CCIC=m
+CONFIG_VIDEO_CAPTURE_DRIVERS=y
+CONFIG_VIDEO_CPIA=m
+CONFIG_VIDEO_CPIA2=m
+CONFIG_VIDEO_CPIA_PP=m
+CONFIG_VIDEO_CPIA_USB=m
+CONFIG_VIDEO_CQCAM=m
+CONFIG_VIDEO_CS5345=m
+CONFIG_VIDEO_CS53L32A=m
+CONFIG_VIDEO_CX18=m
+CONFIG_VIDEO_CX2341X=m
+CONFIG_VIDEO_CX23885=m
+CONFIG_VIDEO_CX25840=m
+CONFIG_VIDEO_DEV=m
+CONFIG_VIDEO_EM28XX=m
+CONFIG_VIDEO_EM28XX_ALSA=m
+CONFIG_VIDEO_EM28XX_DVB=m
+CONFIG_VIDEO_FB_IVTV=m
+# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
+CONFIG_VIDEO_GO7007=m
+CONFIG_VIDEO_GO7007_USB=m
+CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m
+CONFIG_VIDEO_HELPER_CHIPS_AUTO=y
+CONFIG_VIDEO_HEXIUM_GEMINI=m
+CONFIG_VIDEO_HEXIUM_ORION=m
+CONFIG_VIDEO_IR=m
+CONFIG_VIDEO_IR_I2C=m
+CONFIG_VIDEO_IVTV=m
+CONFIG_VIDEO_KS0127=m
+CONFIG_VIDEO_M52790=m
+CONFIG_VIDEO_MEDIA=m
+CONFIG_VIDEO_MEYE=m
+CONFIG_VIDEO_MSP3400=m
+CONFIG_VIDEO_MXB=m
+CONFIG_VIDEO_OUTPUT_CONTROL=m
+CONFIG_VIDEO_OV7670=m
+CONFIG_VIDEO_OVCAMCHIP=m
+CONFIG_VIDEO_PVRUSB2=m
+# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
+CONFIG_VIDEO_PVRUSB2_DVB=y
+CONFIG_VIDEO_PVRUSB2_SYSFS=y
+CONFIG_VIDEO_SAA5246A=m
+CONFIG_VIDEO_SAA5249=m
+CONFIG_VIDEO_SAA6588=m
+CONFIG_VIDEO_SAA7110=m
+CONFIG_VIDEO_SAA7111=m
+CONFIG_VIDEO_SAA7114=m
+CONFIG_VIDEO_SAA711X=m
+CONFIG_VIDEO_SAA7127=m
+CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_SAA7134_ALSA=m
+CONFIG_VIDEO_SAA7134_DVB=m
+CONFIG_VIDEO_SAA7146=m
+CONFIG_VIDEO_SAA7146_VV=m
+CONFIG_VIDEO_SAA717X=m
+CONFIG_VIDEO_SAA7185=m
+CONFIG_VIDEO_STRADIS=m
+CONFIG_VIDEO_TDA7432=m
+CONFIG_VIDEO_TDA9840=m
+CONFIG_VIDEO_TDA9875=m
+CONFIG_VIDEO_TEA6415C=m
+CONFIG_VIDEO_TEA6420=m
+CONFIG_VIDEO_TUNER=m
+CONFIG_VIDEO_TVAUDIO=m
+CONFIG_VIDEO_TVEEPROM=m
+CONFIG_VIDEO_TVP5150=m
+CONFIG_VIDEO_UPD64031A=m
+CONFIG_VIDEO_UPD64083=m
+CONFIG_VIDEO_USBVIDEO=m
+CONFIG_VIDEO_USBVISION=m
+CONFIG_VIDEO_V4L1=m
+CONFIG_VIDEO_V4L1_COMPAT=y
+CONFIG_VIDEO_V4L2=m
+CONFIG_VIDEO_V4L2_COMMON=m
+CONFIG_VIDEO_VIVI=m
+CONFIG_VIDEO_VP27SMPX=m
+CONFIG_VIDEO_VPX3220=m
+CONFIG_VIDEO_W9966=m
+CONFIG_VIDEO_WM8739=m
+CONFIG_VIDEO_WM8775=m
+CONFIG_VIDEO_ZORAN=m
+CONFIG_VIDEO_ZORAN_AVS6EYES=m
+CONFIG_VIDEO_ZORAN_BUZ=m
+CONFIG_VIDEO_ZORAN_DC10=m
+CONFIG_VIDEO_ZORAN_DC30=m
+CONFIG_VIDEO_ZORAN_LML33=m
+CONFIG_VIDEO_ZORAN_LML33R10=m
+CONFIG_VIDEO_ZORAN_ZR36060=m
+CONFIG_VIRTIO=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_BLK=m
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_VIRTIO_NET=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_RING=m
+CONFIG_VIRTUALIZATION=y
+CONFIG_VIRT_TO_BUS=y
+CONFIG_VITESSE_PHY=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_VLSI_FIR=m
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_VORTEX=m
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_VXFS_FS=m
+CONFIG_W1=m
+CONFIG_W1_CON=y
+CONFIG_W1_MASTER_DS2482=m
+CONFIG_W1_MASTER_DS2490=m
+CONFIG_W1_MASTER_GPIO=m
+CONFIG_W1_MASTER_MATROX=m
+CONFIG_W1_SLAVE_BQ27000=m
+CONFIG_W1_SLAVE_DS2431=m
+CONFIG_W1_SLAVE_DS2433=m
+# CONFIG_W1_SLAVE_DS2433_CRC is not set
+CONFIG_W1_SLAVE_DS2760=m
+CONFIG_W1_SLAVE_SMEM=m
+CONFIG_W1_SLAVE_THERM=m
+CONFIG_W35UND=m
+CONFIG_W83627HF_WDT=m
+CONFIG_W83697HF_WDT=m
+CONFIG_W83697UG_WDT=m
+CONFIG_W83877F_WDT=m
+CONFIG_W83977F_WDT=m
+CONFIG_WAFER_WDT=m
+CONFIG_WAN=y
+CONFIG_WANXL=m
+CONFIG_WAN_ROUTER=m
+CONFIG_WAN_ROUTER_DRIVERS=m
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+CONFIG_WDTPCI=m
+CONFIG_WDT_501_PCI=y
+CONFIG_WIMAX=m
+CONFIG_WIMAX_DEBUG_LEVEL=8
+CONFIG_WIMAX_I2400M=m
+CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
+CONFIG_WIMAX_I2400M_SDIO=m
+CONFIG_WINBOND_840=m
+CONFIG_WINBOND_FIR=m
+CONFIG_WIRELESS=y
+CONFIG_WIRELESS_EXT=y
+CONFIG_WIRELESS_EXT_SYSFS=y
+# CONFIG_WIRELESS_OLD_REGULATORY is not set
+CONFIG_WLAN_80211=y
+CONFIG_WLAN_PRE80211=y
+CONFIG_X25=m
+CONFIG_X25_ASY=m
+CONFIG_X86=y
+# CONFIG_X86_32 is not set
+CONFIG_X86_64=y
+CONFIG_X86_64_ACPI_NUMA=y
+CONFIG_X86_64_SMP=y
+CONFIG_X86_ACPI_CPUFREQ=y
+CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
+CONFIG_X86_CHECK_BIOS_CORRUPTION=y
+CONFIG_X86_CMOV=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_CMPXCHG64=y
+CONFIG_X86_CPU=y
+CONFIG_X86_CPUID=m
+CONFIG_X86_CPU_DEBUG=m
+CONFIG_X86_DEBUGCTLMSR=y
+# CONFIG_X86_DS is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
+CONFIG_X86_HT=y
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
+CONFIG_X86_IO_APIC=y
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_SHIFT=6
+CONFIG_X86_LOCAL_APIC=y
+# CONFIG_X86_MCE is not set
+CONFIG_X86_MINIMUM_CPU_FAMILY=64
+CONFIG_X86_MPPARSE=y
+CONFIG_X86_MSR=m
+CONFIG_X86_P4_CLOCKMOD=m
+# CONFIG_X86_PAT is not set
+CONFIG_X86_PLATFORM_DEVICES=y
+CONFIG_X86_PM_TIMER=y
+CONFIG_X86_POWERNOW_K8=y
+CONFIG_X86_POWERNOW_K8_ACPI=y
+# CONFIG_X86_PTDUMP is not set
+# CONFIG_X86_PTRACE_BTS is not set
+CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
+CONFIG_X86_RESERVE_LOW_64K=y
+CONFIG_X86_SPEEDSTEP_CENTRINO=y
+CONFIG_X86_SPEEDSTEP_LIB=m
+CONFIG_X86_TRAMPOLINE=y
+CONFIG_X86_TSC=y
+CONFIG_X86_UV=y
+# CONFIG_X86_VERBOSE_BOOTUP is not set
+# CONFIG_X86_VSMP is not set
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_X2APIC=y
+CONFIG_XEN=y
+CONFIG_XENFS=m
+CONFIG_XEN_BALLOON=y
+CONFIG_XEN_BLKDEV_FRONTEND=m
+CONFIG_XEN_COMPAT_XENFS=y
+# CONFIG_XEN_DEBUG_FS is not set
+CONFIG_XEN_FBDEV_FRONTEND=m
+CONFIG_XEN_KBDDEV_FRONTEND=m
+CONFIG_XEN_MAX_DOMAIN_MEMORY=32
+CONFIG_XEN_NETDEV_FRONTEND=m
+CONFIG_XEN_SAVE_RESTORE=y
+CONFIG_XEN_SCRUB_PAGES=y
+CONFIG_XFRM=y
+CONFIG_XFRM_IPCOMP=m
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+CONFIG_XFRM_USER=m
+# CONFIG_XFS_DEBUG is not set
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_RT=y
+CONFIG_XOR_BLOCKS=m
+CONFIG_YAM=m
+CONFIG_YELLOWFIN=m
+CONFIG_YENTA=m
+CONFIG_YENTA_ENE_TUNE=y
+CONFIG_YENTA_O2=y
+CONFIG_YENTA_RICOH=y
+CONFIG_YENTA_TI=y
+CONFIG_YENTA_TOSHIBA=y
+CONFIG_ZD1211RW=m
+# CONFIG_ZD1211RW_DEBUG is not set
+CONFIG_ZEROPLUS_FF=m
+CONFIG_ZISOFS=y
+CONFIG_ZLIB_DEFLATE=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZONE_DMA=y
+CONFIG_ZONE_DMA32=y
+CONFIG_ZONE_DMA_FLAG=1
--- linux-rt-2.6.29.5.orig/debian/config/amd64/config.rt
+++ linux-rt-2.6.29.5/debian/config/amd64/config.rt
@@ -0,0 +1,3 @@
+#
+# Config options for config.rt automatically generated by splitconfig.pl
+#
--- linux-rt-2.6.29.5.orig/debian/config/i386/config
+++ linux-rt-2.6.29.5/debian/config/i386/config
@@ -0,0 +1,3988 @@
+#
+# Common config options automatically generated by splitconfig.pl
+#
+CONFIG_3C359=m
+CONFIG_3C515=m
+# CONFIG_4KSTACKS is not set
+CONFIG_60XX_WDT=m
+# CONFIG_64BIT is not set
+CONFIG_6PACK=m
+CONFIG_8139CP=m
+CONFIG_8139TOO=m
+CONFIG_8139TOO_8129=y
+CONFIG_8139TOO_PIO=y
+# CONFIG_8139TOO_TUNE_TWISTER is not set
+# CONFIG_8139_OLD_RX_RESET is not set
+CONFIG_ABYSS=m
+CONFIG_AC3200=m
+CONFIG_AC97_BUS=m
+# CONFIG_ACCESSIBILITY is not set
+CONFIG_ACENIC=m
+# CONFIG_ACENIC_OMIT_TIGON_I is not set
+CONFIG_ACER_WMI=m
+CONFIG_ACORN_PARTITION=y
+# CONFIG_ACORN_PARTITION_ADFS is not set
+# CONFIG_ACORN_PARTITION_CUMANA is not set
+# CONFIG_ACORN_PARTITION_EESOX is not set
+CONFIG_ACORN_PARTITION_ICS=y
+# CONFIG_ACORN_PARTITION_POWERTEC is not set
+CONFIG_ACORN_PARTITION_RISCIX=y
+CONFIG_ACPI=y
+CONFIG_ACPI_AC=y
+# CONFIG_ACPI_ASUS is not set
+CONFIG_ACPI_BATTERY=y
+CONFIG_ACPI_BLACKLIST_YEAR=2000
+CONFIG_ACPI_BUTTON=y
+CONFIG_ACPI_CONTAINER=y
+# CONFIG_ACPI_CUSTOM_DSDT is not set
+CONFIG_ACPI_CUSTOM_DSDT_FILE=""
+# CONFIG_ACPI_DEBUG is not set
+CONFIG_ACPI_DOCK=y
+CONFIG_ACPI_FAN=y
+CONFIG_ACPI_HOTPLUG_CPU=y
+CONFIG_ACPI_PCI_SLOT=y
+CONFIG_ACPI_PROCESSOR=y
+CONFIG_ACPI_PROCFS=y
+CONFIG_ACPI_PROCFS_POWER=y
+CONFIG_ACPI_PROC_EVENT=y
+CONFIG_ACPI_SBS=y
+CONFIG_ACPI_SLEEP=y
+CONFIG_ACPI_SYSFS_POWER=y
+CONFIG_ACPI_THERMAL=y
+CONFIG_ACPI_TOSHIBA=m
+CONFIG_ACPI_VIDEO=m
+CONFIG_ACPI_WMI=y
+CONFIG_ACQUIRE_WDT=m
+CONFIG_ACT200L_DONGLE=m
+CONFIG_ACTISYS_DONGLE=m
+CONFIG_ADAPTEC_STARFIRE=m
+CONFIG_ADFS_FS=m
+# CONFIG_ADFS_FS_RW is not set
+CONFIG_ADM8211=m
+CONFIG_ADVANTECH_WDT=m
+CONFIG_AFFS_FS=m
+# CONFIG_AFS_DEBUG is not set
+CONFIG_AFS_FS=m
+CONFIG_AF_RXRPC=m
+# CONFIG_AF_RXRPC_DEBUG is not set
+CONFIG_AGNX=m
+CONFIG_AGP=m
+CONFIG_AGP_ALI=m
+CONFIG_AGP_AMD=m
+CONFIG_AGP_AMD64=m
+CONFIG_AGP_ATI=m
+CONFIG_AGP_EFFICEON=m
+CONFIG_AGP_INTEL=m
+CONFIG_AGP_NVIDIA=m
+CONFIG_AGP_SIS=m
+CONFIG_AGP_SWORKS=m
+CONFIG_AGP_VIA=m
+CONFIG_AIC79XX_CMDS_PER_DEVICE=32
+CONFIG_AIC79XX_DEBUG_ENABLE=y
+CONFIG_AIC79XX_DEBUG_MASK=0
+CONFIG_AIC79XX_REG_PRETTY_PRINT=y
+CONFIG_AIC79XX_RESET_DELAY_MS=5000
+CONFIG_AIC7XXX_CMDS_PER_DEVICE=8
+CONFIG_AIC7XXX_DEBUG_ENABLE=y
+CONFIG_AIC7XXX_DEBUG_MASK=0
+CONFIG_AIC7XXX_REG_PRETTY_PRINT=y
+CONFIG_AIC7XXX_RESET_DELAY_MS=15000
+# CONFIG_AIC94XX_DEBUG is not set
+CONFIG_AIO=y
+CONFIG_AIRO=m
+CONFIG_AIRO_CS=m
+CONFIG_ALIM1535_WDT=m
+CONFIG_ALIM7101_WDT=m
+CONFIG_ALI_FIR=m
+CONFIG_ALLOW_WARNINGS=y
+CONFIG_ALTERA_PCIE_CHDMA=m
+CONFIG_AMD8111_ETH=m
+CONFIG_AMIGA_PARTITION=y
+CONFIG_ANDROID=y
+CONFIG_ANDROID_BINDER_IPC=y
+CONFIG_ANDROID_LOGGER=m
+# CONFIG_ANDROID_LOW_MEMORY_KILLER is not set
+CONFIG_ANDROID_RAM_CONSOLE=y
+# CONFIG_ANDROID_RAM_CONSOLE_EARLY_INIT is not set
+CONFIG_ANDROID_RAM_CONSOLE_ENABLE_VERBOSE=y
+# CONFIG_ANDROID_RAM_CONSOLE_ERROR_CORRECTION is not set
+CONFIG_ANDROID_TIMED_GPIO=m
+CONFIG_ANON_INODES=y
+CONFIG_APM=m
+# CONFIG_APM_ALLOW_INTS is not set
+# CONFIG_APM_CPU_IDLE is not set
+# CONFIG_APM_DISPLAY_BLANK is not set
+# CONFIG_APM_DO_ENABLE is not set
+# CONFIG_APM_IGNORE_USER_SUSPEND is not set
+CONFIG_APPLICOM=m
+CONFIG_APRICOT=m
+CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig"
+CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
+CONFIG_ARCH_FLATMEM_ENABLE=y
+CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y
+CONFIG_ARCH_HAS_CPU_IDLE_WAIT=y
+CONFIG_ARCH_HAS_CPU_RELAX=y
+CONFIG_ARCH_HAS_DEFAULT_IDLE=y
+CONFIG_ARCH_HIBERNATION_POSSIBLE=y
+CONFIG_ARCH_MAY_HAVE_PC_FDC=y
+# CONFIG_ARCH_PHYS_ADDR_T_64BIT is not set
+CONFIG_ARCH_POPULATES_NODE_MAP=y
+CONFIG_ARCH_SELECT_MEMORY_MODEL=y
+CONFIG_ARCH_SPARSEMEM_ENABLE=y
+CONFIG_ARCH_SUPPORTS_MSI=y
+CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y
+CONFIG_ARCH_SUSPEND_POSSIBLE=y
+CONFIG_ARCH_WANT_FRAME_POINTERS=y
+CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y
+CONFIG_ARCNET=m
+CONFIG_ARCNET_1051=m
+CONFIG_ARCNET_1201=m
+CONFIG_ARCNET_CAP=m
+CONFIG_ARCNET_COM20020=m
+CONFIG_ARCNET_COM20020_CS=m
+CONFIG_ARCNET_COM20020_ISA=m
+CONFIG_ARCNET_COM20020_PCI=m
+CONFIG_ARCNET_COM90xx=m
+CONFIG_ARCNET_COM90xxIO=m
+CONFIG_ARCNET_RAW=m
+CONFIG_ARCNET_RIM_I=m
+CONFIG_ARLAN=m
+# CONFIG_ARPD is not set
+CONFIG_ASK_IP_FIB_HASH=y
+CONFIG_ASM_SEMAPHORES=y
+CONFIG_ASUS_LAPTOP=m
+CONFIG_ASUS_OLED=m
+CONFIG_ASYNC_CORE=m
+CONFIG_ASYNC_MEMCPY=m
+CONFIG_ASYNC_XOR=m
+CONFIG_AT1700=m
+CONFIG_ATA=y
+CONFIG_ATALK=m
+CONFIG_ATARI_PARTITION=y
+CONFIG_ATA_ACPI=y
+CONFIG_ATA_GENERIC=y
+# CONFIG_ATA_NONSTANDARD is not set
+CONFIG_ATA_OVER_ETH=m
+CONFIG_ATA_PIIX=y
+CONFIG_ATA_SFF=y
+CONFIG_ATH5K=m
+# CONFIG_ATH5K_DEBUG is not set
+CONFIG_ATH9K=m
+CONFIG_ATH9K_DEBUG=y
+CONFIG_ATL1=m
+CONFIG_ATL1C=m
+CONFIG_ATL1E=m
+CONFIG_ATL2=m
+CONFIG_ATM=y
+CONFIG_ATMEL=m
+CONFIG_ATM_AMBASSADOR=m
+# CONFIG_ATM_AMBASSADOR_DEBUG is not set
+CONFIG_ATM_BR2684=m
+# CONFIG_ATM_BR2684_IPFILTER is not set
+CONFIG_ATM_CLIP=y
+# CONFIG_ATM_CLIP_NO_ICMP is not set
+CONFIG_ATM_DRIVERS=y
+# CONFIG_ATM_DUMMY is not set
+CONFIG_ATM_ENI=m
+# CONFIG_ATM_ENI_DEBUG is not set
+# CONFIG_ATM_ENI_TUNE_BURST is not set
+CONFIG_ATM_FIRESTREAM=m
+CONFIG_ATM_FORE200E=m
+CONFIG_ATM_FORE200E_DEBUG=0
+CONFIG_ATM_FORE200E_TX_RETRY=16
+# CONFIG_ATM_FORE200E_USE_TASKLET is not set
+CONFIG_ATM_HE=m
+CONFIG_ATM_HE_USE_SUNI=y
+CONFIG_ATM_HORIZON=m
+# CONFIG_ATM_HORIZON_DEBUG is not set
+CONFIG_ATM_IA=m
+# CONFIG_ATM_IA_DEBUG is not set
+CONFIG_ATM_IDT77252=m
+# CONFIG_ATM_IDT77252_DEBUG is not set
+# CONFIG_ATM_IDT77252_RCV_ALL is not set
+CONFIG_ATM_IDT77252_USE_SUNI=y
+CONFIG_ATM_LANAI=m
+CONFIG_ATM_LANE=m
+CONFIG_ATM_MPOA=m
+CONFIG_ATM_NICSTAR=m
+# CONFIG_ATM_NICSTAR_USE_IDT77105 is not set
+# CONFIG_ATM_NICSTAR_USE_SUNI is not set
+CONFIG_ATM_SOLOS=m
+CONFIG_ATM_TCP=m
+CONFIG_ATM_ZATM=m
+# CONFIG_ATM_ZATM_DEBUG is not set
+CONFIG_ATP=m
+CONFIG_AUDIT=y
+CONFIG_AUDITSYSCALL=y
+# CONFIG_AUDIT_ARCH is not set
+CONFIG_AUDIT_GENERIC=y
+CONFIG_AUDIT_TREE=y
+CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
+CONFIG_AUXDISPLAY=y
+CONFIG_AX25=m
+CONFIG_AX25_DAMA_SLAVE=y
+CONFIG_B43=m
+CONFIG_B43LEGACY=m
+CONFIG_B43LEGACY_DEBUG=y
+CONFIG_B43LEGACY_DMA=y
+CONFIG_B43LEGACY_DMA_AND_PIO_MODE=y
+# CONFIG_B43LEGACY_DMA_MODE is not set
+CONFIG_B43LEGACY_LEDS=y
+CONFIG_B43LEGACY_PCICORE_AUTOSELECT=y
+CONFIG_B43LEGACY_PCI_AUTOSELECT=y
+CONFIG_B43LEGACY_PIO=y
+# CONFIG_B43LEGACY_PIO_MODE is not set
+CONFIG_B43LEGACY_RFKILL=y
+# CONFIG_B43_DEBUG is not set
+CONFIG_B43_LEDS=y
+CONFIG_B43_PCICORE_AUTOSELECT=y
+CONFIG_B43_PCI_AUTOSELECT=y
+# CONFIG_B43_PCMCIA is not set
+CONFIG_B43_RFKILL=y
+CONFIG_B44=m
+CONFIG_B44_PCI=y
+CONFIG_B44_PCICORE_AUTOSELECT=y
+CONFIG_B44_PCI_AUTOSELECT=y
+CONFIG_BACKLIGHT_CARILLO_RANCH=m
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BACKLIGHT_DA903X=m
+CONFIG_BACKLIGHT_GENERIC=m
+CONFIG_BACKLIGHT_LCD_SUPPORT=y
+CONFIG_BACKLIGHT_MBP_NVIDIA=m
+CONFIG_BACKLIGHT_PROGEAR=m
+CONFIG_BACKLIGHT_SAHARA=m
+# CONFIG_BACKTRACE_SELF_TEST is not set
+CONFIG_BASE_FULL=y
+CONFIG_BASE_SMALL=0
+CONFIG_BATTERY_BQ27x00=m
+CONFIG_BATTERY_DA9030=m
+CONFIG_BATTERY_DS2760=m
+CONFIG_BATTERY_OLPC=m
+CONFIG_BAYCOM_EPP=m
+CONFIG_BAYCOM_PAR=m
+CONFIG_BAYCOM_SER_FDX=m
+CONFIG_BAYCOM_SER_HDX=m
+CONFIG_BE2NET=m
+# CONFIG_BEFS_DEBUG is not set
+CONFIG_BEFS_FS=m
+CONFIG_BFS_FS=m
+CONFIG_BINARY_PRINTF=y
+CONFIG_BINFMT_AOUT=m
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=m
+CONFIG_BITREVERSE=y
+CONFIG_BLK_CPQ_CISS_DA=m
+CONFIG_BLK_CPQ_DA=m
+CONFIG_BLK_DEV=y
+CONFIG_BLK_DEV_3W_XXXX_RAID=m
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_BLK_DEV_COW_COMMON is not set
+CONFIG_BLK_DEV_CRYPTOLOOP=m
+CONFIG_BLK_DEV_DAC960=m
+CONFIG_BLK_DEV_DM=y
+CONFIG_BLK_DEV_FD=m
+# CONFIG_BLK_DEV_HD is not set
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_INTEGRITY=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BLK_DEV_MD=y
+CONFIG_BLK_DEV_NBD=m
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_COUNT=16
+CONFIG_BLK_DEV_RAM_SIZE=65536
+CONFIG_BLK_DEV_SD=y
+CONFIG_BLK_DEV_SR=y
+# CONFIG_BLK_DEV_SR_VENDOR is not set
+CONFIG_BLK_DEV_SX8=m
+# CONFIG_BLK_DEV_UB is not set
+CONFIG_BLK_DEV_UMEM=m
+# CONFIG_BLK_DEV_XD is not set
+# CONFIG_BLK_DEV_XIP is not set
+CONFIG_BLOCK=y
+CONFIG_BNX2=m
+CONFIG_BNX2X=m
+CONFIG_BONDING=m
+# CONFIG_BOOTPARAM_HUNG_TASK_PANIC is not set
+CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE=0
+# CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC is not set
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE=0
+# CONFIG_BOOT_PRINTK_DELAY is not set
+# CONFIG_BOOT_TRACER is not set
+CONFIG_BOUNCE=y
+CONFIG_BPQETHER=m
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_EBT_802_3=m
+CONFIG_BRIDGE_EBT_AMONG=m
+CONFIG_BRIDGE_EBT_ARP=m
+CONFIG_BRIDGE_EBT_ARPREPLY=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_DNAT=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_IP6=m
+CONFIG_BRIDGE_EBT_LIMIT=m
+CONFIG_BRIDGE_EBT_LOG=m
+CONFIG_BRIDGE_EBT_MARK=m
+CONFIG_BRIDGE_EBT_MARK_T=m
+CONFIG_BRIDGE_EBT_NFLOG=m
+CONFIG_BRIDGE_EBT_PKTTYPE=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_SNAT=m
+CONFIG_BRIDGE_EBT_STP=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_EBT_T_NAT=m
+CONFIG_BRIDGE_EBT_ULOG=m
+CONFIG_BRIDGE_EBT_VLAN=m
+CONFIG_BRIDGE_NETFILTER=y
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BROADCOM_PHY=m
+CONFIG_BSD_DISKLABEL=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_BSD_PROCESS_ACCT_V3=y
+CONFIG_BT=y
+CONFIG_BTRFS_FS=m
+CONFIG_BTRFS_FS_POSIX_ACL=y
+CONFIG_BT_BNEP=m
+CONFIG_BT_BNEP_MC_FILTER=y
+CONFIG_BT_BNEP_PROTO_FILTER=y
+CONFIG_BT_CMTP=m
+CONFIG_BT_HCIBCM203X=m
+CONFIG_BT_HCIBFUSB=m
+CONFIG_BT_HCIBLUECARD=m
+CONFIG_BT_HCIBPA10X=m
+CONFIG_BT_HCIBT3C=m
+CONFIG_BT_HCIBTSDIO=m
+CONFIG_BT_HCIBTUART=m
+CONFIG_BT_HCIBTUSB=m
+CONFIG_BT_HCIDTL1=m
+CONFIG_BT_HCIUART=m
+CONFIG_BT_HCIUART_BCSP=y
+CONFIG_BT_HCIUART_H4=y
+CONFIG_BT_HCIUART_LL=y
+CONFIG_BT_HCIVHCI=m
+CONFIG_BT_HIDP=m
+CONFIG_BT_L2CAP=y
+CONFIG_BT_RFCOMM=y
+CONFIG_BT_RFCOMM_TTY=y
+CONFIG_BT_SCO=y
+CONFIG_BUG=y
+CONFIG_C101=m
+CONFIG_C2PORT=m
+CONFIG_C2PORT_DURAMAR_2150=m
+# CONFIG_CAN is not set
+CONFIG_CAN_PM_TRACE=y
+CONFIG_CAPI_AVM=y
+CONFIG_CAPI_EICON=y
+CONFIG_CAPI_TRACE=y
+CONFIG_CARDBUS=y
+CONFIG_CARDMAN_4000=m
+CONFIG_CARDMAN_4040=m
+# CONFIG_CARMINE_DRAM_CUSTOM is not set
+CONFIG_CASSINI=m
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
+CONFIG_CC_STACKPROTECTOR=y
+CONFIG_CC_STACKPROTECTOR_ALL=y
+CONFIG_CDROM_PKTCDVD=y
+CONFIG_CDROM_PKTCDVD_BUFFERS=8
+# CONFIG_CDROM_PKTCDVD_WCACHE is not set
+CONFIG_CFAG12864B=m
+CONFIG_CFAG12864B_RATE=20
+CONFIG_CFG80211=m
+CONFIG_CFG80211_REG_DEBUG=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_CPUACCT=y
+# CONFIG_CGROUP_DEBUG is not set
+# CONFIG_CGROUP_DEVICE is not set
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_MEM_RES_CTLR=y
+# CONFIG_CGROUP_MEM_RES_CTLR_SWAP is not set
+CONFIG_CGROUP_NS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CHARGER_PCF50633=m
+CONFIG_CHECK_SIGNATURE=y
+CONFIG_CHELSIO_T1=m
+CONFIG_CHELSIO_T1_1G=y
+CONFIG_CHELSIO_T3=m
+CONFIG_CHELSIO_T3_DEPENDS=y
+CONFIG_CHR_DEV_OSST=m
+CONFIG_CHR_DEV_SCH=m
+CONFIG_CHR_DEV_SG=y
+CONFIG_CHR_DEV_ST=m
+CONFIG_CICADA_PHY=m
+CONFIG_CIFS=m
+# CONFIG_CIFS_DEBUG2 is not set
+CONFIG_CIFS_DFS_UPCALL=y
+CONFIG_CIFS_EXPERIMENTAL=y
+CONFIG_CIFS_POSIX=y
+# CONFIG_CIFS_STATS is not set
+CONFIG_CIFS_UPCALL=y
+CONFIG_CIFS_WEAK_PW_HASH=y
+CONFIG_CIFS_XATTR=y
+CONFIG_CISS_SCSI_TAPE=y
+# CONFIG_CLASSIC_RCU is not set
+CONFIG_CLOCKSOURCE_WATCHDOG=y
+CONFIG_CLS_U32_MARK=y
+# CONFIG_CLS_U32_PERF is not set
+# CONFIG_CMDLINE_BOOL is not set
+CONFIG_CODA_FS=m
+# CONFIG_COMEDI is not set
+CONFIG_COMPAL_LAPTOP=m
+# CONFIG_COMPAT_BRK is not set
+CONFIG_COMPAT_NET_DEV_OPS=y
+# CONFIG_COMPAT_VDSO is not set
+CONFIG_COMPUTONE=m
+CONFIG_CONFIGFS_FS=m
+CONFIG_CONNECTOR=y
+CONFIG_CONSOLE_POLL=y
+CONFIG_CONSOLE_TRANSLATIONS=y
+CONFIG_CONTEXT_SWITCH_TRACER=y
+# CONFIG_COPS is not set
+# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
+CONFIG_COSA=m
+# CONFIG_CPA_DEBUG is not set
+CONFIG_CPU5_WDT=m
+CONFIG_CPUSETS=y
+CONFIG_CPU_FREQ=y
+# CONFIG_CPU_FREQ_DEBUG is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND is not set
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+# CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE is not set
+# CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE is not set
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_FREQ_STAT_DETAILS=y
+CONFIG_CPU_FREQ_TABLE=y
+CONFIG_CPU_IDLE=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPU_IDLE_GOV_MENU=y
+CONFIG_CPU_SUP_AMD=y
+CONFIG_CPU_SUP_CENTAUR=y
+CONFIG_CPU_SUP_CYRIX_32=y
+CONFIG_CPU_SUP_INTEL=y
+CONFIG_CPU_SUP_TRANSMETA_32=y
+CONFIG_CPU_SUP_UMC_32=y
+CONFIG_CRAMFS=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CRC16=y
+CONFIG_CRC32=y
+CONFIG_CRC7=m
+CONFIG_CRC_CCITT=m
+CONFIG_CRC_ITU_T=m
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_AEAD=m
+CONFIG_CRYPTO_AEAD2=y
+CONFIG_CRYPTO_AES=m
+CONFIG_CRYPTO_AES_586=m
+CONFIG_CRYPTO_ALGAPI=y
+CONFIG_CRYPTO_ALGAPI2=y
+CONFIG_CRYPTO_ANSI_CPRNG=m
+CONFIG_CRYPTO_ANUBIS=m
+CONFIG_CRYPTO_ARC4=m
+CONFIG_CRYPTO_AUTHENC=m
+CONFIG_CRYPTO_BLKCIPHER=m
+CONFIG_CRYPTO_BLKCIPHER2=y
+CONFIG_CRYPTO_BLOWFISH=m
+CONFIG_CRYPTO_CAMELLIA=m
+CONFIG_CRYPTO_CAST5=m
+CONFIG_CRYPTO_CAST6=m
+CONFIG_CRYPTO_CBC=m
+CONFIG_CRYPTO_CCM=m
+CONFIG_CRYPTO_CRC32C=m
+CONFIG_CRYPTO_CRC32C_INTEL=m
+CONFIG_CRYPTO_CRYPTD=m
+CONFIG_CRYPTO_CTR=m
+CONFIG_CRYPTO_CTS=m
+CONFIG_CRYPTO_DEFLATE=m
+CONFIG_CRYPTO_DES=m
+CONFIG_CRYPTO_DEV_GEODE=m
+CONFIG_CRYPTO_DEV_HIFN_795X=m
+CONFIG_CRYPTO_DEV_HIFN_795X_RNG=y
+CONFIG_CRYPTO_DEV_PADLOCK=y
+CONFIG_CRYPTO_DEV_PADLOCK_AES=m
+CONFIG_CRYPTO_DEV_PADLOCK_SHA=m
+CONFIG_CRYPTO_ECB=m
+CONFIG_CRYPTO_FCRYPT=m
+CONFIG_CRYPTO_FIPS=y
+CONFIG_CRYPTO_GCM=m
+CONFIG_CRYPTO_GF128MUL=m
+CONFIG_CRYPTO_HASH=y
+CONFIG_CRYPTO_HASH2=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_HW=y
+CONFIG_CRYPTO_KHAZAD=m
+CONFIG_CRYPTO_LRW=m
+CONFIG_CRYPTO_LZO=m
+CONFIG_CRYPTO_MANAGER=y
+CONFIG_CRYPTO_MANAGER2=y
+CONFIG_CRYPTO_MD4=m
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_MICHAEL_MIC=m
+CONFIG_CRYPTO_NULL=m
+CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_RMD128=m
+CONFIG_CRYPTO_RMD160=m
+CONFIG_CRYPTO_RMD256=m
+CONFIG_CRYPTO_RMD320=m
+CONFIG_CRYPTO_RNG=m
+CONFIG_CRYPTO_RNG2=y
+CONFIG_CRYPTO_SALSA20=m
+CONFIG_CRYPTO_SALSA20_586=m
+CONFIG_CRYPTO_SEED=m
+CONFIG_CRYPTO_SEQIV=m
+CONFIG_CRYPTO_SERPENT=m
+CONFIG_CRYPTO_SHA1=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_CRYPTO_SHA512=m
+CONFIG_CRYPTO_TEA=m
+CONFIG_CRYPTO_TEST=m
+CONFIG_CRYPTO_TGR192=m
+CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_TWOFISH_586=m
+CONFIG_CRYPTO_TWOFISH_COMMON=m
+CONFIG_CRYPTO_WP512=m
+CONFIG_CRYPTO_XCBC=m
+CONFIG_CRYPTO_XTS=m
+CONFIG_CS5535_GPIO=m
+CONFIG_CS89x0=m
+CONFIG_CYCLADES=m
+CONFIG_CYCLADES_SYNC=m
+CONFIG_CYCLOMX_X25=y
+# CONFIG_CYZ_INTR is not set
+# CONFIG_DAB is not set
+CONFIG_DAVICOM_PHY=m
+CONFIG_DCA=m
+CONFIG_DCB=y
+CONFIG_DCDBAS=m
+CONFIG_DE2104X=m
+CONFIG_DE4X5=m
+CONFIG_DE600=m
+CONFIG_DE620=m
+# CONFIG_DEBUG_BLOCK_EXT_DEVT is not set
+# CONFIG_DEBUG_BOOT_PARAMS is not set
+CONFIG_DEBUG_BUGVERBOSE=y
+# CONFIG_DEBUG_DEVRES is not set
+# CONFIG_DEBUG_DRIVER is not set
+CONFIG_DEBUG_FS=y
+# CONFIG_DEBUG_GPIO is not set
+# CONFIG_DEBUG_HIGHMEM is not set
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_KERNEL=y
+# CONFIG_DEBUG_KOBJECT is not set
+# CONFIG_DEBUG_LIST is not set
+# CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set
+# CONFIG_DEBUG_LOCK_ALLOC is not set
+CONFIG_DEBUG_MEMORY_INIT=y
+# CONFIG_DEBUG_NOTIFIERS is not set
+# CONFIG_DEBUG_NX_TEST is not set
+# CONFIG_DEBUG_OBJECTS is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_DEBUG_PREEMPT is not set
+CONFIG_DEBUG_RODATA=y
+# CONFIG_DEBUG_RODATA_TEST is not set
+# CONFIG_DEBUG_RT_MUTEXES is not set
+# CONFIG_DEBUG_SECTION_MISMATCH is not set
+# CONFIG_DEBUG_SG is not set
+# CONFIG_DEBUG_SHIRQ is not set
+# CONFIG_DEBUG_SLAB is not set
+# CONFIG_DEBUG_SPINLOCK is not set
+# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
+# CONFIG_DEBUG_STACKOVERFLOW is not set
+# CONFIG_DEBUG_STACK_USAGE is not set
+# CONFIG_DEBUG_VIRTUAL is not set
+# CONFIG_DEBUG_VM is not set
+# CONFIG_DEBUG_WRITECOUNT is not set
+CONFIG_DECNET=m
+CONFIG_DECNET_NF_GRABULATOR=m
+# CONFIG_DECNET_ROUTER is not set
+CONFIG_DECOMPRESS_BZIP2=y
+CONFIG_DECOMPRESS_GZIP=y
+CONFIG_DECOMPRESS_LZMA=y
+# CONFIG_DEFAULT_AS is not set
+# CONFIG_DEFAULT_BIC is not set
+CONFIG_DEFAULT_CFQ=y
+CONFIG_DEFAULT_CUBIC=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_HTCP is not set
+CONFIG_DEFAULT_IOSCHED="cfq"
+CONFIG_DEFAULT_IO_DELAY_TYPE=1
+# CONFIG_DEFAULT_NOOP is not set
+# CONFIG_DEFAULT_RENO is not set
+CONFIG_DEFAULT_TCP_CONG="cubic"
+# CONFIG_DEFAULT_VEGAS is not set
+# CONFIG_DEFAULT_WESTWOOD is not set
+CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config"
+CONFIG_DEFXX=m
+# CONFIG_DEFXX_MMIO is not set
+CONFIG_DELL_LAPTOP=m
+CONFIG_DELL_RBU=m
+CONFIG_DEPCA=m
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DETECT_SOFTLOCKUP=y
+# CONFIG_DEVKMEM is not set
+CONFIG_DEVPORT=y
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
+CONFIG_DEV_APPLETALK=m
+CONFIG_DE_AOC=y
+CONFIG_DIGIEPCA=m
+# CONFIG_DISCONTIGMEM_MANUAL is not set
+CONFIG_DISPLAY_SUPPORT=m
+# CONFIG_DL2K is not set
+CONFIG_DLCI=m
+CONFIG_DLCI_MAX=8
+CONFIG_DLM=m
+# CONFIG_DLM_DEBUG is not set
+CONFIG_DM9102=m
+CONFIG_DMADEVICES=y
+# CONFIG_DMATEST is not set
+# CONFIG_DMA_API_DEBUG is not set
+CONFIG_DMA_ENGINE=y
+CONFIG_DMI=y
+CONFIG_DMIID=y
+CONFIG_DM_CRYPT=m
+# CONFIG_DM_DEBUG is not set
+# CONFIG_DM_DELAY is not set
+CONFIG_DM_MIRROR=y
+CONFIG_DM_MULTIPATH=y
+CONFIG_DM_SNAPSHOT=y
+CONFIG_DM_UEVENT=y
+CONFIG_DM_ZERO=m
+CONFIG_DNET=m
+CONFIG_DNOTIFY=y
+CONFIG_DONGLE=y
+CONFIG_DOUBLEFAULT=y
+CONFIG_DRM=m
+CONFIG_DRM_I810=m
+CONFIG_DRM_I830=m
+CONFIG_DRM_I915=m
+CONFIG_DRM_I915_KMS=y
+CONFIG_DRM_MGA=m
+CONFIG_DRM_R128=m
+CONFIG_DRM_RADEON=m
+CONFIG_DRM_SAVAGE=m
+CONFIG_DRM_SIS=m
+CONFIG_DRM_TDFX=m
+CONFIG_DRM_VIA=m
+CONFIG_DS1682=m
+CONFIG_DSCC4=m
+CONFIG_DSCC4_PCISYNC=y
+CONFIG_DSCC4_PCI_RST=y
+CONFIG_DTLK=m
+CONFIG_DUMMY=m
+CONFIG_DUMMY_CONSOLE=y
+CONFIG_DVB_AF9013=m
+CONFIG_DVB_AU8522=m
+CONFIG_DVB_AV7110=m
+# CONFIG_DVB_AV7110_FIRMWARE is not set
+CONFIG_DVB_AV7110_OSD=y
+CONFIG_DVB_B2C2_FLEXCOP=m
+# CONFIG_DVB_B2C2_FLEXCOP_DEBUG is not set
+CONFIG_DVB_B2C2_FLEXCOP_PCI=m
+CONFIG_DVB_B2C2_FLEXCOP_USB=m
+CONFIG_DVB_BCM3510=m
+CONFIG_DVB_BT8XX=m
+CONFIG_DVB_BUDGET=m
+CONFIG_DVB_BUDGET_AV=m
+CONFIG_DVB_BUDGET_CI=m
+CONFIG_DVB_BUDGET_CORE=m
+CONFIG_DVB_BUDGET_PATCH=m
+CONFIG_DVB_CAPTURE_DRIVERS=y
+CONFIG_DVB_CORE=m
+CONFIG_DVB_CX22700=m
+CONFIG_DVB_CX22702=m
+CONFIG_DVB_CX24110=m
+CONFIG_DVB_CX24116=m
+CONFIG_DVB_CX24123=m
+CONFIG_DVB_DIB3000MB=m
+CONFIG_DVB_DIB3000MC=m
+CONFIG_DVB_DIB7000M=m
+CONFIG_DVB_DIB7000P=m
+CONFIG_DVB_DM1105=m
+CONFIG_DVB_DRX397XD=m
+CONFIG_DVB_DUMMY_FE=m
+CONFIG_DVB_DYNAMIC_MINORS=y
+# CONFIG_DVB_FE_CUSTOMISE is not set
+CONFIG_DVB_FIREDTV=m
+CONFIG_DVB_FIREDTV_IEEE1394=y
+CONFIG_DVB_FIREDTV_INPUT=y
+CONFIG_DVB_ISL6405=m
+CONFIG_DVB_ISL6421=m
+CONFIG_DVB_L64781=m
+CONFIG_DVB_LGDT3304=m
+CONFIG_DVB_LGDT330X=m
+CONFIG_DVB_LGS8GL5=m
+CONFIG_DVB_LNBP21=m
+CONFIG_DVB_MT312=m
+CONFIG_DVB_MT352=m
+CONFIG_DVB_NXT200X=m
+CONFIG_DVB_NXT6000=m
+CONFIG_DVB_OR51132=m
+CONFIG_DVB_OR51211=m
+CONFIG_DVB_PLL=m
+CONFIG_DVB_PLUTO2=m
+CONFIG_DVB_S5H1409=m
+CONFIG_DVB_S5H1411=m
+CONFIG_DVB_S5H1420=m
+CONFIG_DVB_S921=m
+CONFIG_DVB_SI21XX=m
+CONFIG_DVB_SIANO_SMS1XXX=m
+CONFIG_DVB_SIANO_SMS1XXX_SMS_IDS=y
+CONFIG_DVB_SP8870=m
+CONFIG_DVB_SP887X=m
+CONFIG_DVB_STB0899=m
+CONFIG_DVB_STB6000=m
+CONFIG_DVB_STB6100=m
+CONFIG_DVB_STV0288=m
+CONFIG_DVB_STV0297=m
+CONFIG_DVB_STV0299=m
+CONFIG_DVB_TDA10021=m
+CONFIG_DVB_TDA10023=m
+CONFIG_DVB_TDA10048=m
+CONFIG_DVB_TDA1004X=m
+CONFIG_DVB_TDA10086=m
+CONFIG_DVB_TDA8083=m
+CONFIG_DVB_TDA8261=m
+CONFIG_DVB_TDA826X=m
+CONFIG_DVB_TTUSB_BUDGET=m
+CONFIG_DVB_TTUSB_DEC=m
+CONFIG_DVB_TUA6100=m
+CONFIG_DVB_TUNER_CX24113=m
+CONFIG_DVB_TUNER_DIB0070=m
+CONFIG_DVB_TUNER_ITD1000=m
+CONFIG_DVB_USB=m
+CONFIG_DVB_USB_A800=m
+CONFIG_DVB_USB_AF9015=m
+CONFIG_DVB_USB_ANYSEE=m
+CONFIG_DVB_USB_AU6610=m
+CONFIG_DVB_USB_CINERGY_T2=m
+CONFIG_DVB_USB_CXUSB=m
+# CONFIG_DVB_USB_DEBUG is not set
+CONFIG_DVB_USB_DIB0700=m
+CONFIG_DVB_USB_DIBUSB_MB=m
+# CONFIG_DVB_USB_DIBUSB_MB_FAULTY is not set
+CONFIG_DVB_USB_DIBUSB_MC=m
+CONFIG_DVB_USB_DIGITV=m
+CONFIG_DVB_USB_DTT200U=m
+CONFIG_DVB_USB_DTV5100=m
+CONFIG_DVB_USB_DW2102=m
+CONFIG_DVB_USB_GL861=m
+CONFIG_DVB_USB_GP8PSK=m
+CONFIG_DVB_USB_M920X=m
+CONFIG_DVB_USB_NOVA_T_USB2=m
+CONFIG_DVB_USB_OPERA1=m
+CONFIG_DVB_USB_TTUSB2=m
+CONFIG_DVB_USB_UMT_010=m
+CONFIG_DVB_USB_VP702X=m
+CONFIG_DVB_USB_VP7045=m
+CONFIG_DVB_VES1820=m
+CONFIG_DVB_VES1X93=m
+CONFIG_DVB_ZL10353=m
+# CONFIG_DYNAMIC_PRINTK_DEBUG is not set
+CONFIG_E100=m
+CONFIG_E1000=m
+CONFIG_E1000E=m
+CONFIG_E2100=m
+CONFIG_EARLY_PRINTK=y
+# CONFIG_EARLY_PRINTK_DBGP is not set
+CONFIG_ECHO=m
+CONFIG_ECONET=m
+CONFIG_ECONET_AUNUDP=y
+CONFIG_ECONET_NATIVE=y
+CONFIG_ECRYPT_FS=y
+CONFIG_EDAC=y
+CONFIG_EDAC_AMD76X=m
+# CONFIG_EDAC_DEBUG is not set
+CONFIG_EDAC_E752X=m
+CONFIG_EDAC_E7XXX=m
+CONFIG_EDAC_I3000=m
+CONFIG_EDAC_I5000=m
+CONFIG_EDAC_I5100=m
+CONFIG_EDAC_I5400=m
+CONFIG_EDAC_I82860=m
+CONFIG_EDAC_I82875P=m
+CONFIG_EDAC_I82975X=m
+CONFIG_EDAC_MM_EDAC=m
+CONFIG_EDAC_R82600=m
+CONFIG_EDAC_X38=m
+CONFIG_EDD=y
+CONFIG_EDD_OFF=y
+CONFIG_EEEPC_LAPTOP=m
+CONFIG_EEPROM_93CX6=m
+CONFIG_EEPROM_AT24=m
+CONFIG_EEPROM_AT25=m
+CONFIG_EEPROM_LEGACY=m
+CONFIG_EEXPRESS=m
+CONFIG_EEXPRESS_PRO=m
+CONFIG_EFI=y
+CONFIG_EFI_PARTITION=y
+CONFIG_EFI_VARS=y
+CONFIG_EFS_FS=m
+CONFIG_EISA=y
+CONFIG_EISA_NAMES=y
+CONFIG_EISA_PCI_EISA=y
+CONFIG_EISA_VIRTUAL_ROOT=y
+CONFIG_EISA_VLB_PRIMING=y
+CONFIG_EL1=m
+CONFIG_EL16=m
+CONFIG_EL2=m
+CONFIG_EL3=m
+CONFIG_ELF_CORE=y
+CONFIG_ELMC=m
+CONFIG_ELMC_II=m
+CONFIG_ELPLUS=m
+# CONFIG_EMBEDDED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENC28J60 is not set
+CONFIG_ENCLOSURE_SERVICES=m
+CONFIG_ENIC=m
+CONFIG_EPIC100=m
+CONFIG_EPL=m
+CONFIG_EPOLL=y
+CONFIG_EQUALIZER=m
+CONFIG_ES3210=m
+CONFIG_ESI_DONGLE=m
+CONFIG_ET131X=m
+# CONFIG_ET131X_DEBUG is not set
+CONFIG_ETH16I=m
+CONFIG_EUROTECH_WDT=m
+CONFIG_EVENTFD=y
+# CONFIG_EVENT_TRACER is not set
+CONFIG_EWRK3=m
+CONFIG_EXPERIMENTAL=y
+CONFIG_EXPORTFS=m
+CONFIG_EXT2_FS=y
+CONFIG_EXT2_FS_POSIX_ACL=y
+CONFIG_EXT2_FS_SECURITY=y
+CONFIG_EXT2_FS_XATTR=y
+# CONFIG_EXT2_FS_XIP is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_POSIX_ACL=y
+CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT4DEV_COMPAT is not set
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS_XATTR=y
+CONFIG_EXTRA_FIRMWARE=""
+CONFIG_FAIR_GROUP_SCHED=y
+CONFIG_FARSYNC=m
+CONFIG_FAST_CMPXCHG_LOCAL=y
+CONFIG_FAT_DEFAULT_CODEPAGE=437
+CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
+CONFIG_FAT_FS=m
+# CONFIG_FAULT_INJECTION is not set
+CONFIG_FB=y
+CONFIG_FB_3DFX=m
+# CONFIG_FB_3DFX_ACCEL is not set
+CONFIG_FB_ARC=m
+CONFIG_FB_ARK=m
+CONFIG_FB_ASILIANT=y
+CONFIG_FB_ATY=m
+CONFIG_FB_ATY128=m
+CONFIG_FB_ATY128_BACKLIGHT=y
+CONFIG_FB_ATY_BACKLIGHT=y
+CONFIG_FB_ATY_CT=y
+CONFIG_FB_ATY_GENERIC_LCD=y
+CONFIG_FB_ATY_GX=y
+CONFIG_FB_BACKLIGHT=y
+CONFIG_FB_BOOT_VESA_SUPPORT=y
+CONFIG_FB_CARILLO_RANCH=m
+CONFIG_FB_CARMINE=m
+CONFIG_FB_CARMINE_DRAM_EVAL=y
+CONFIG_FB_CFB_COPYAREA=y
+CONFIG_FB_CFB_FILLRECT=y
+CONFIG_FB_CFB_IMAGEBLIT=y
+# CONFIG_FB_CFB_REV_PIXELS_IN_BYTE is not set
+CONFIG_FB_CIRRUS=m
+CONFIG_FB_CYBER2000=m
+CONFIG_FB_CYBLA=m
+CONFIG_FB_DDC=m
+CONFIG_FB_DEFERRED_IO=y
+CONFIG_FB_EFI=y
+# CONFIG_FB_FOREIGN_ENDIAN is not set
+CONFIG_FB_GEODE=y
+CONFIG_FB_GEODE_GX=m
+CONFIG_FB_GEODE_GX1=m
+CONFIG_FB_GEODE_LX=m
+CONFIG_FB_HECUBA=m
+CONFIG_FB_HGA=m
+# CONFIG_FB_HGA_ACCEL is not set
+CONFIG_FB_I810=m
+# CONFIG_FB_I810_GTF is not set
+CONFIG_FB_IMSTT=y
+CONFIG_FB_INTEL=m
+# CONFIG_FB_INTEL_DEBUG is not set
+CONFIG_FB_INTEL_I2C=y
+CONFIG_FB_KYRO=m
+CONFIG_FB_LE80578=m
+# CONFIG_FB_MACMODES is not set
+CONFIG_FB_MATROX=m
+CONFIG_FB_MATROX_G=y
+CONFIG_FB_MATROX_I2C=m
+CONFIG_FB_MATROX_MAVEN=m
+CONFIG_FB_MATROX_MILLENIUM=y
+CONFIG_FB_MATROX_MULTIHEAD=y
+CONFIG_FB_MATROX_MYSTIQUE=y
+CONFIG_FB_MB862XX=m
+CONFIG_FB_MB862XX_PCI_GDC=y
+CONFIG_FB_METRONOME=m
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_N411=m
+CONFIG_FB_NEOMAGIC=m
+CONFIG_FB_NVIDIA=m
+CONFIG_FB_NVIDIA_BACKLIGHT=y
+# CONFIG_FB_NVIDIA_DEBUG is not set
+CONFIG_FB_NVIDIA_I2C=y
+CONFIG_FB_PM2=m
+CONFIG_FB_PM2_FIFO_DISCONNECT=y
+CONFIG_FB_PM3=m
+CONFIG_FB_RADEON=m
+CONFIG_FB_RADEON_BACKLIGHT=y
+# CONFIG_FB_RADEON_DEBUG is not set
+CONFIG_FB_RADEON_I2C=y
+CONFIG_FB_RIVA=m
+CONFIG_FB_RIVA_BACKLIGHT=y
+# CONFIG_FB_RIVA_DEBUG is not set
+CONFIG_FB_RIVA_I2C=y
+CONFIG_FB_S1D13XXX=m
+CONFIG_FB_S3=m
+CONFIG_FB_SAVAGE=m
+CONFIG_FB_SAVAGE_ACCEL=y
+CONFIG_FB_SAVAGE_I2C=y
+CONFIG_FB_SIS=m
+CONFIG_FB_SIS_300=y
+CONFIG_FB_SIS_315=y
+CONFIG_FB_SM501=m
+CONFIG_FB_SVGALIB=m
+CONFIG_FB_SYS_COPYAREA=m
+CONFIG_FB_SYS_FILLRECT=m
+CONFIG_FB_SYS_FOPS=m
+CONFIG_FB_SYS_IMAGEBLIT=m
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_TMIO=m
+CONFIG_FB_TMIO_ACCELL=y
+CONFIG_FB_TRIDENT=m
+# CONFIG_FB_TRIDENT_ACCEL is not set
+CONFIG_FB_UVESA=m
+CONFIG_FB_VESA=y
+CONFIG_FB_VGA16=m
+CONFIG_FB_VIA=m
+# CONFIG_FB_VIRTUAL is not set
+CONFIG_FB_VOODOO1=m
+CONFIG_FB_VT8623=m
+CONFIG_FCOE=m
+CONFIG_FDDI=y
+CONFIG_FEALNX=m
+CONFIG_FIB_RULES=y
+CONFIG_FILE_LOCKING=y
+CONFIG_FIREWIRE=m
+CONFIG_FIREWIRE_OHCI=m
+CONFIG_FIREWIRE_OHCI_DEBUG=y
+# CONFIG_FIREWIRE_OHCI_REMOTE_DMA is not set
+CONFIG_FIREWIRE_SBP2=m
+CONFIG_FIRMWARE_EDID=y
+CONFIG_FIRMWARE_IN_KERNEL=y
+CONFIG_FIRMWARE_MEMMAP=y
+CONFIG_FIXED_PHY=y
+CONFIG_FIX_EARLYCON_MEM=y
+CONFIG_FLATMEM=y
+CONFIG_FLATMEM_MANUAL=y
+CONFIG_FLAT_NODE_MEM_MAP=y
+# CONFIG_FONTS is not set
+CONFIG_FONT_8x16=y
+CONFIG_FONT_8x8=y
+CONFIG_FORCEDETH=m
+# CONFIG_FORCEDETH_NAPI is not set
+CONFIG_FRAMEBUFFER_CONSOLE=m
+# CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY is not set
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
+CONFIG_FRAME_POINTER=y
+CONFIG_FRAME_WARN=1024
+CONFIG_FREEZER=y
+CONFIG_FS_MBCACHE=y
+CONFIG_FS_POSIX_ACL=y
+CONFIG_FTL=m
+# CONFIG_FTRACE_STARTUP_TEST is not set
+# CONFIG_FTRACE_SYSCALLS is not set
+CONFIG_FUJITSU_LAPTOP=m
+# CONFIG_FUJITSU_LAPTOP_DEBUG is not set
+# CONFIG_FUNCTION_TRACER is not set
+CONFIG_FUSE_FS=y
+CONFIG_FUSION=y
+CONFIG_FUSION_CTL=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_LAN=m
+CONFIG_FUSION_LOGGING=y
+CONFIG_FUSION_MAX_SGE=128
+CONFIG_FUSION_SAS=m
+CONFIG_FUSION_SPI=m
+CONFIG_FUTEX=y
+CONFIG_FW_LOADER=y
+CONFIG_GACT_PROB=y
+CONFIG_GAMEPORT=m
+CONFIG_GAMEPORT_EMU10K1=m
+CONFIG_GAMEPORT_FM801=m
+CONFIG_GAMEPORT_L4=m
+CONFIG_GAMEPORT_NS558=m
+CONFIG_GARP=m
+CONFIG_GENERIC_ACL=y
+CONFIG_GENERIC_BUG=y
+CONFIG_GENERIC_CALIBRATE_DELAY=y
+CONFIG_GENERIC_CLOCKEVENTS=y
+CONFIG_GENERIC_CLOCKEVENTS_BROADCAST=y
+CONFIG_GENERIC_CLOCKEVENTS_BUILD=y
+CONFIG_GENERIC_CMOS_UPDATE=y
+# CONFIG_GENERIC_CPU is not set
+CONFIG_GENERIC_FIND_FIRST_BIT=y
+CONFIG_GENERIC_FIND_LAST_BIT=y
+CONFIG_GENERIC_FIND_NEXT_BIT=y
+CONFIG_GENERIC_GPIO=y
+CONFIG_GENERIC_HARDIRQS=y
+CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y
+CONFIG_GENERIC_HWEIGHT=y
+CONFIG_GENERIC_IOMAP=y
+CONFIG_GENERIC_IRQ_PROBE=y
+CONFIG_GENERIC_ISA_DMA=y
+CONFIG_GENERIC_PENDING_IRQ=y
+CONFIG_GENERIC_TIME=y
+# CONFIG_GENERIC_TIME_VSYSCALL is not set
+CONFIG_GFS2_FS=m
+# CONFIG_GFS2_FS_LOCKING_DLM is not set
+CONFIG_GIGASET_BASE=m
+# CONFIG_GIGASET_DEBUG is not set
+CONFIG_GIGASET_M101=m
+CONFIG_GIGASET_M105=m
+# CONFIG_GIGASET_UNDOCREQ is not set
+CONFIG_GIRBIL_DONGLE=m
+CONFIG_GPIOLIB=y
+CONFIG_GPIO_MAX7301=m
+CONFIG_GPIO_MAX732X=m
+CONFIG_GPIO_MCP23S08=m
+CONFIG_GPIO_PCA953X=m
+CONFIG_GPIO_PCF857X=m
+CONFIG_GPIO_SYSFS=y
+CONFIG_GPIO_TWL4030=m
+CONFIG_GREENASIA_FF=m
+CONFIG_GROUP_SCHED=y
+CONFIG_HAMACHI=m
+CONFIG_HAMRADIO=y
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_HAPPYMEAL=m
+CONFIG_HARDIRQS_SW_RESEND=y
+CONFIG_HAS_DMA=y
+CONFIG_HAS_IOMEM=y
+CONFIG_HAS_IOPORT=y
+CONFIG_HAVE_AOUT=y
+CONFIG_HAVE_ARCH_KGDB=y
+CONFIG_HAVE_ARCH_KMEMCHECK=y
+CONFIG_HAVE_ARCH_TRACEHOOK=y
+CONFIG_HAVE_ATOMIC_IOMAP=y
+# CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set
+CONFIG_HAVE_DMA_API_DEBUG=y
+CONFIG_HAVE_DYNAMIC_FTRACE=y
+CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y
+CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y
+CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y
+CONFIG_HAVE_FTRACE_SYSCALLS=y
+CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACER=y
+CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y
+CONFIG_HAVE_GENERIC_DMA_COHERENT=y
+CONFIG_HAVE_IDE=y
+CONFIG_HAVE_IOREMAP_PROT=y
+CONFIG_HAVE_KERNEL_BZIP2=y
+CONFIG_HAVE_KERNEL_GZIP=y
+CONFIG_HAVE_KERNEL_LZMA=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_KVM=y
+CONFIG_HAVE_KVM_IRQCHIP=y
+CONFIG_HAVE_LATENCYTOP_SUPPORT=y
+CONFIG_HAVE_MMIOTRACE_SUPPORT=y
+CONFIG_HAVE_MTD_OTP=y
+CONFIG_HAVE_OPROFILE=y
+CONFIG_HAVE_PERF_COUNTERS=y
+CONFIG_HAVE_SETUP_PER_CPU_AREA=y
+CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y
+CONFIG_HDLC=m
+CONFIG_HDLC_CISCO=m
+CONFIG_HDLC_FR=m
+CONFIG_HDLC_PPP=m
+CONFIG_HDLC_RAW=m
+CONFIG_HDLC_RAW_ETH=m
+CONFIG_HDLC_X25=m
+# CONFIG_HEADERS_CHECK is not set
+CONFIG_HERMES=m
+CONFIG_HERMES_CACHE_FW_ON_INIT=y
+CONFIG_HFSPLUS_FS=m
+CONFIG_HFS_FS=m
+CONFIG_HIBERNATION=y
+CONFIG_HID=y
+CONFIG_HIDRAW=y
+CONFIG_HID_A4TECH=m
+CONFIG_HID_APPLE=m
+CONFIG_HID_BELKIN=m
+CONFIG_HID_CHERRY=m
+CONFIG_HID_CHICONY=m
+# CONFIG_HID_COMPAT is not set
+CONFIG_HID_CYPRESS=m
+# CONFIG_HID_DEBUG is not set
+CONFIG_HID_EZKEY=m
+CONFIG_HID_GYRATION=m
+CONFIG_HID_LOGITECH=m
+CONFIG_HID_MICROSOFT=m
+CONFIG_HID_MONTEREY=m
+CONFIG_HID_NTRIG=m
+CONFIG_HID_PANTHERLORD=m
+CONFIG_HID_PETALYNX=m
+CONFIG_HID_PID=y
+CONFIG_HID_SAMSUNG=m
+CONFIG_HID_SONY=m
+CONFIG_HID_SUNPLUS=m
+CONFIG_HID_SUPPORT=y
+CONFIG_HID_TOPSEED=m
+CONFIG_HIGHMEM=y
+CONFIG_HIGHMEM4G=y
+# CONFIG_HIGHMEM64G is not set
+CONFIG_HIGHPTE=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HIPPI=y
+CONFIG_HISAX_16_0=y
+CONFIG_HISAX_16_3=y
+CONFIG_HISAX_1TR6=y
+CONFIG_HISAX_ASUSCOM=y
+CONFIG_HISAX_AVM_A1=y
+CONFIG_HISAX_AVM_A1_CS=m
+CONFIG_HISAX_AVM_A1_PCMCIA=y
+CONFIG_HISAX_BKM_A4T=y
+# CONFIG_HISAX_DEBUG is not set
+CONFIG_HISAX_DIEHLDIVA=y
+CONFIG_HISAX_ELSA=y
+CONFIG_HISAX_ELSA_CS=m
+CONFIG_HISAX_ENTERNOW_PCI=y
+CONFIG_HISAX_EURO=y
+CONFIG_HISAX_FRITZPCI=y
+CONFIG_HISAX_FRITZ_PCIPNP=m
+CONFIG_HISAX_GAZEL=y
+CONFIG_HISAX_HDLC=y
+CONFIG_HISAX_HFC4S8S=m
+CONFIG_HISAX_HFCS=y
+CONFIG_HISAX_HFCUSB=m
+CONFIG_HISAX_HFC_PCI=y
+CONFIG_HISAX_HFC_SX=y
+CONFIG_HISAX_HSTSAPHIR=y
+CONFIG_HISAX_ISURF=y
+CONFIG_HISAX_IX1MICROR2=y
+CONFIG_HISAX_MAX_CARDS=8
+CONFIG_HISAX_MIC=y
+CONFIG_HISAX_NETJET=y
+CONFIG_HISAX_NETJET_U=y
+CONFIG_HISAX_NI1=y
+CONFIG_HISAX_NICCY=y
+# CONFIG_HISAX_NO_KEYPAD is not set
+# CONFIG_HISAX_NO_LLC is not set
+# CONFIG_HISAX_NO_SENDCOMPLETE is not set
+CONFIG_HISAX_S0BOX=y
+CONFIG_HISAX_SCT_QUADRO=y
+CONFIG_HISAX_SEDLBAUER=y
+CONFIG_HISAX_SEDLBAUER_CS=m
+CONFIG_HISAX_SPORTSTER=y
+CONFIG_HISAX_ST5481=m
+CONFIG_HISAX_TELEINT=y
+CONFIG_HISAX_TELESPCI=y
+CONFIG_HISAX_TELES_CS=m
+CONFIG_HISAX_W6692=y
+CONFIG_HOSTAP=m
+CONFIG_HOSTAP_CS=m
+CONFIG_HOSTAP_FIRMWARE=y
+CONFIG_HOSTAP_FIRMWARE_NVRAM=y
+CONFIG_HOSTAP_PCI=m
+CONFIG_HOSTAP_PLX=m
+CONFIG_HOSTESS_SV11=m
+CONFIG_HOTPLUG=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_HOTPLUG_PCI=y
+CONFIG_HOTPLUG_PCI_ACPI=m
+CONFIG_HOTPLUG_PCI_ACPI_IBM=m
+CONFIG_HOTPLUG_PCI_COMPAQ=m
+CONFIG_HOTPLUG_PCI_COMPAQ_NVRAM=y
+CONFIG_HOTPLUG_PCI_CPCI=y
+CONFIG_HOTPLUG_PCI_CPCI_GENERIC=m
+CONFIG_HOTPLUG_PCI_CPCI_ZT5550=m
+CONFIG_HOTPLUG_PCI_FAKE=m
+CONFIG_HOTPLUG_PCI_IBM=m
+CONFIG_HOTPLUG_PCI_PCIE=y
+CONFIG_HOTPLUG_PCI_SHPC=m
+CONFIG_HP100=m
+CONFIG_HPET=y
+CONFIG_HPET_EMULATE_RTC=y
+CONFIG_HPET_MMAP=y
+CONFIG_HPET_TIMER=y
+CONFIG_HPFS_FS=m
+CONFIG_HPLAN=m
+CONFIG_HP_ILO=m
+# CONFIG_HP_WATCHDOG is not set
+CONFIG_HP_WMI=m
+CONFIG_HTC_PASIC3=m
+CONFIG_HT_IRQ=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_HVC_DRIVER=y
+# CONFIG_HWLAT_DETECTOR is not set
+CONFIG_HWMON=y
+# CONFIG_HWMON_DEBUG_CHIP is not set
+CONFIG_HWMON_VID=m
+CONFIG_HW_CONSOLE=y
+CONFIG_HW_RANDOM=y
+CONFIG_HW_RANDOM_AMD=m
+CONFIG_HW_RANDOM_GEODE=m
+CONFIG_HW_RANDOM_INTEL=m
+CONFIG_HW_RANDOM_VIA=m
+CONFIG_HW_RANDOM_VIRTIO=m
+CONFIG_HYSDN=m
+CONFIG_HYSDN_CAPI=y
+CONFIG_HZ=250
+# CONFIG_HZ_100 is not set
+# CONFIG_HZ_1000 is not set
+CONFIG_HZ_250=y
+# CONFIG_HZ_300 is not set
+CONFIG_I2C=y
+CONFIG_I2C_ALGOBIT=m
+CONFIG_I2C_ALGOPCA=m
+CONFIG_I2C_ALGOPCF=m
+CONFIG_I2C_ALI1535=m
+CONFIG_I2C_ALI1563=m
+CONFIG_I2C_ALI15X3=m
+CONFIG_I2C_AMD756=m
+CONFIG_I2C_AMD8111=m
+CONFIG_I2C_BOARDINFO=y
+CONFIG_I2C_CHARDEV=m
+# CONFIG_I2C_DEBUG_ALGO is not set
+# CONFIG_I2C_DEBUG_BUS is not set
+# CONFIG_I2C_DEBUG_CHIP is not set
+# CONFIG_I2C_DEBUG_CORE is not set
+CONFIG_I2C_GPIO=m
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_I2C_I801=m
+CONFIG_I2C_ISCH=m
+CONFIG_I2C_NFORCE2=m
+CONFIG_I2C_OCORES=m
+CONFIG_I2C_PARPORT=m
+CONFIG_I2C_PARPORT_LIGHT=m
+CONFIG_I2C_PCA_ISA=m
+CONFIG_I2C_PCA_PLATFORM=m
+CONFIG_I2C_PIIX4=m
+CONFIG_I2C_SIMTEC=m
+CONFIG_I2C_SIS5595=m
+CONFIG_I2C_SIS630=m
+CONFIG_I2C_SIS96X=m
+CONFIG_I2C_STUB=m
+CONFIG_I2C_TAOS_EVM=m
+CONFIG_I2C_TINY_USB=m
+CONFIG_I2C_VIA=m
+CONFIG_I2C_VIAPRO=m
+CONFIG_I2C_VOODOO3=m
+CONFIG_I2O=m
+CONFIG_I2O_BLOCK=m
+CONFIG_I2O_BUS=m
+CONFIG_I2O_CONFIG=m
+CONFIG_I2O_CONFIG_OLD_IOCTL=y
+CONFIG_I2O_EXT_ADAPTEC=y
+CONFIG_I2O_LCT_NOTIFY_ON_CHANGES=y
+CONFIG_I2O_PROC=m
+CONFIG_I2O_SCSI=m
+CONFIG_I6300ESB_WDT=m
+CONFIG_I82092=m
+CONFIG_I82365=m
+CONFIG_I8K=m
+CONFIG_IB700_WDT=m
+CONFIG_IBMASR=m
+CONFIG_IBMLANA=m
+CONFIG_IBMLS=m
+# CONFIG_IBMMCA_SCSI_DEV_RESET is not set
+CONFIG_IBMMCA_SCSI_ORDER_STANDARD=y
+CONFIG_IBMOL=m
+CONFIG_IBMTR=m
+CONFIG_IBM_ASM=m
+# CONFIG_IBM_NEW_EMAC_EMAC4 is not set
+# CONFIG_IBM_NEW_EMAC_MAL_CLR_ICINTSTAT is not set
+# CONFIG_IBM_NEW_EMAC_MAL_COMMON_ERR is not set
+# CONFIG_IBM_NEW_EMAC_NO_FLOW_CTRL is not set
+# CONFIG_IBM_NEW_EMAC_RGMII is not set
+# CONFIG_IBM_NEW_EMAC_TAH is not set
+# CONFIG_IBM_NEW_EMAC_ZMII is not set
+CONFIG_ICPLUS_PHY=m
+CONFIG_ICS932S401=m
+# CONFIG_IDE is not set
+CONFIG_IEEE1394=m
+CONFIG_IEEE1394_DV1394=m
+CONFIG_IEEE1394_ETH1394=m
+CONFIG_IEEE1394_ETH1394_ROM_ENTRY=y
+CONFIG_IEEE1394_OHCI1394=m
+CONFIG_IEEE1394_PCILYNX=m
+CONFIG_IEEE1394_RAWIO=m
+CONFIG_IEEE1394_SBP2=m
+# CONFIG_IEEE1394_SBP2_PHYS_DMA is not set
+# CONFIG_IEEE1394_VERBOSEDEBUG is not set
+CONFIG_IEEE1394_VIDEO1394=m
+CONFIG_IFB=m
+CONFIG_IGB=m
+CONFIG_IGB_DCA=y
+# CONFIG_IGB_LRO is not set
+# CONFIG_IKCONFIG is not set
+CONFIG_ILLEGAL_POINTER_VALUE=0
+CONFIG_INET=y
+CONFIG_INET6_AH=m
+CONFIG_INET6_ESP=m
+CONFIG_INET6_IPCOMP=m
+CONFIG_INET6_TUNNEL=m
+CONFIG_INET6_XFRM_MODE_BEET=m
+CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
+CONFIG_INET6_XFRM_MODE_TRANSPORT=m
+CONFIG_INET6_XFRM_MODE_TUNNEL=m
+CONFIG_INET6_XFRM_TUNNEL=m
+CONFIG_INET_AH=m
+CONFIG_INET_DCCP_DIAG=m
+CONFIG_INET_DIAG=y
+CONFIG_INET_ESP=m
+CONFIG_INET_IPCOMP=m
+CONFIG_INET_LRO=y
+CONFIG_INET_TCP_DIAG=y
+CONFIG_INET_TUNNEL=m
+CONFIG_INET_XFRM_MODE_BEET=m
+CONFIG_INET_XFRM_MODE_TRANSPORT=m
+CONFIG_INET_XFRM_MODE_TUNNEL=m
+CONFIG_INET_XFRM_TUNNEL=m
+CONFIG_INFTL=m
+# CONFIG_INITRAMFS_COMPRESSION_BZIP2 is not set
+CONFIG_INITRAMFS_COMPRESSION_GZIP=y
+# CONFIG_INITRAMFS_COMPRESSION_LZMA is not set
+# CONFIG_INITRAMFS_COMPRESSION_NONE is not set
+CONFIG_INITRAMFS_SOURCE=""
+CONFIG_INIT_ENV_ARG_LIMIT=32
+CONFIG_INOTIFY=y
+CONFIG_INOTIFY_USER=y
+CONFIG_INPUT=y
+# CONFIG_INPUT_APANEL is not set
+CONFIG_INPUT_ATI_REMOTE=m
+CONFIG_INPUT_ATI_REMOTE2=m
+CONFIG_INPUT_ATLAS_BTNS=m
+CONFIG_INPUT_CM109=m
+CONFIG_INPUT_EVBUG=m
+CONFIG_INPUT_EVDEV=y
+CONFIG_INPUT_FF_MEMLESS=m
+CONFIG_INPUT_JOYDEV=m
+CONFIG_INPUT_JOYSTICK=y
+CONFIG_INPUT_KEYBOARD=y
+CONFIG_INPUT_KEYSPAN_REMOTE=m
+CONFIG_INPUT_MIMIO=m
+CONFIG_INPUT_MISC=y
+CONFIG_INPUT_MOUSE=y
+CONFIG_INPUT_MOUSEDEV=y
+CONFIG_INPUT_MOUSEDEV_PSAUX=y
+CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
+CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
+CONFIG_INPUT_PCF50633_PMU=m
+CONFIG_INPUT_PCSPKR=m
+CONFIG_INPUT_POLLDEV=m
+CONFIG_INPUT_POWERMATE=m
+CONFIG_INPUT_TABLET=y
+CONFIG_INPUT_TOUCHSCREEN=y
+CONFIG_INPUT_UINPUT=m
+CONFIG_INPUT_WISTRON_BTNS=m
+CONFIG_INPUT_YEALINK=m
+CONFIG_INTEL_IOATDMA=m
+CONFIG_INTEL_MENLOW=m
+# CONFIG_IOMMU_API is not set
+# CONFIG_IOMMU_HELPER is not set
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_NOOP=y
+# CONFIG_IO_DELAY_0X80 is not set
+CONFIG_IO_DELAY_0XED=y
+# CONFIG_IO_DELAY_NONE is not set
+CONFIG_IO_DELAY_TYPE_0X80=0
+CONFIG_IO_DELAY_TYPE_0XED=1
+CONFIG_IO_DELAY_TYPE_NONE=3
+CONFIG_IO_DELAY_TYPE_UDELAY=2
+# CONFIG_IO_DELAY_UDELAY is not set
+CONFIG_IP1000=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_MATCH_AH=m
+CONFIG_IP6_NF_MATCH_EUI64=m
+CONFIG_IP6_NF_MATCH_FRAG=m
+CONFIG_IP6_NF_MATCH_HL=m
+CONFIG_IP6_NF_MATCH_IPV6HEADER=m
+CONFIG_IP6_NF_MATCH_MH=m
+CONFIG_IP6_NF_MATCH_OPTS=m
+CONFIG_IP6_NF_MATCH_RT=m
+CONFIG_IP6_NF_QUEUE=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_TARGET_HL=m
+CONFIG_IP6_NF_TARGET_LOG=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IPC_NS=y
+CONFIG_IPDDP=m
+CONFIG_IPDDP_DECAP=y
+CONFIG_IPDDP_ENCAP=y
+CONFIG_IPMI_DEVICE_INTERFACE=m
+CONFIG_IPMI_HANDLER=m
+# CONFIG_IPMI_PANIC_EVENT is not set
+CONFIG_IPMI_POWEROFF=m
+CONFIG_IPMI_SI=m
+CONFIG_IPMI_WATCHDOG=m
+CONFIG_IPPP_FILTER=y
+CONFIG_IPV6=y
+# CONFIG_IPV6_MIP6 is not set
+# CONFIG_IPV6_MROUTE is not set
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_NDISC_NODETYPE=y
+# CONFIG_IPV6_OPTIMISTIC_DAD is not set
+CONFIG_IPV6_PRIVACY=y
+# CONFIG_IPV6_ROUTER_PREF is not set
+CONFIG_IPV6_SIT=m
+# CONFIG_IPV6_SUBTREES is not set
+CONFIG_IPV6_TUNNEL=m
+CONFIG_IPW2100=m
+# CONFIG_IPW2100_DEBUG is not set
+CONFIG_IPW2100_MONITOR=y
+CONFIG_IPW2200=m
+# CONFIG_IPW2200_DEBUG is not set
+CONFIG_IPW2200_MONITOR=y
+CONFIG_IPW2200_PROMISCUOUS=y
+CONFIG_IPW2200_QOS=y
+CONFIG_IPW2200_RADIOTAP=y
+CONFIG_IPWIRELESS=m
+CONFIG_IPX=m
+# CONFIG_IPX_INTERN is not set
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_DCCP=m
+# CONFIG_IP_DCCP_CCID2_DEBUG is not set
+CONFIG_IP_DCCP_CCID3=y
+# CONFIG_IP_DCCP_CCID3_DEBUG is not set
+CONFIG_IP_DCCP_CCID3_RTO=100
+# CONFIG_IP_DCCP_DEBUG is not set
+CONFIG_IP_DCCP_TFRC_LIB=y
+CONFIG_IP_FIB_HASH=y
+# CONFIG_IP_FIB_TRIE is not set
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_ARPFILTER=m
+CONFIG_IP_NF_ARPTABLES=m
+CONFIG_IP_NF_ARP_MANGLE=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_MATCH_ADDRTYPE=m
+CONFIG_IP_NF_MATCH_AH=m
+CONFIG_IP_NF_MATCH_ECN=m
+CONFIG_IP_NF_MATCH_TTL=m
+CONFIG_IP_NF_QUEUE=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_SECURITY=m
+CONFIG_IP_NF_TARGET_CLUSTERIP=m
+CONFIG_IP_NF_TARGET_ECN=m
+CONFIG_IP_NF_TARGET_LOG=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
+CONFIG_IP_NF_TARGET_NETMAP=m
+CONFIG_IP_NF_TARGET_REDIRECT=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_NF_TARGET_ULOG=m
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+# CONFIG_IP_PNP is not set
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_SCTP=m
+CONFIG_IP_VS=m
+# CONFIG_IP_VS_DEBUG is not set
+CONFIG_IP_VS_DH=m
+CONFIG_IP_VS_FTP=m
+CONFIG_IP_VS_IPV6=y
+CONFIG_IP_VS_LBLC=m
+CONFIG_IP_VS_LBLCR=m
+CONFIG_IP_VS_LC=m
+CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_PROTO_AH=y
+CONFIG_IP_VS_PROTO_AH_ESP=y
+CONFIG_IP_VS_PROTO_ESP=y
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_PROTO_UDP=y
+CONFIG_IP_VS_RR=m
+CONFIG_IP_VS_SED=m
+CONFIG_IP_VS_SH=m
+CONFIG_IP_VS_TAB_BITS=12
+CONFIG_IP_VS_WLC=m
+CONFIG_IP_VS_WRR=m
+CONFIG_IRCOMM=m
+CONFIG_IRDA=m
+CONFIG_IRDA_CACHE_LAST_LSAP=y
+CONFIG_IRDA_DEBUG=y
+CONFIG_IRDA_FAST_RR=y
+CONFIG_IRDA_ULTRA=y
+CONFIG_IRLAN=m
+CONFIG_IRNET=m
+# CONFIG_IRQSOFF_TRACER is not set
+CONFIG_IRTTY_SIR=m
+CONFIG_ISA=y
+CONFIG_ISAPNP=y
+CONFIG_ISA_DMA_API=y
+CONFIG_ISCSI_IBFT=m
+CONFIG_ISCSI_IBFT_FIND=y
+CONFIG_ISCSI_TCP=m
+CONFIG_ISDN=y
+CONFIG_ISDN_AUDIO=y
+CONFIG_ISDN_CAPI=m
+CONFIG_ISDN_CAPI_CAPI20=m
+CONFIG_ISDN_CAPI_CAPIDRV=m
+CONFIG_ISDN_CAPI_CAPIFS=m
+CONFIG_ISDN_CAPI_CAPIFS_BOOL=y
+CONFIG_ISDN_CAPI_MIDDLEWARE=y
+CONFIG_ISDN_DIVAS=m
+CONFIG_ISDN_DIVAS_BRIPCI=y
+CONFIG_ISDN_DIVAS_DIVACAPI=m
+CONFIG_ISDN_DIVAS_MAINT=m
+CONFIG_ISDN_DIVAS_PRIPCI=y
+CONFIG_ISDN_DIVAS_USERIDI=m
+CONFIG_ISDN_DIVERSION=m
+CONFIG_ISDN_DRV_ACT2000=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_B1ISA=m
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
+CONFIG_ISDN_DRV_AVMB1_T1ISA=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_VERBOSE_REASON=y
+CONFIG_ISDN_DRV_GIGASET=m
+CONFIG_ISDN_DRV_HISAX=m
+CONFIG_ISDN_DRV_ICN=m
+CONFIG_ISDN_DRV_PCBIT=m
+CONFIG_ISDN_DRV_SC=m
+CONFIG_ISDN_I4L=m
+CONFIG_ISDN_MPP=y
+CONFIG_ISDN_PPP=y
+CONFIG_ISDN_PPP_BSDCOMP=m
+CONFIG_ISDN_PPP_VJ=y
+CONFIG_ISDN_TTY_FAX=y
+CONFIG_ISDN_X25=y
+# CONFIG_ISI is not set
+CONFIG_ISO9660_FS=m
+CONFIG_ISTALLION=m
+CONFIG_IT8712F_WDT=m
+CONFIG_IT87_WDT=m
+CONFIG_ITCO_VENDOR_SUPPORT=y
+CONFIG_ITCO_WDT=m
+CONFIG_IWL3945=m
+# CONFIG_IWL3945_DEBUG is not set
+CONFIG_IWL3945_LEDS=y
+CONFIG_IWL3945_RFKILL=y
+CONFIG_IWL3945_SPECTRUM_MEASUREMENT=y
+CONFIG_IWL4965=y
+CONFIG_IWL5000=y
+CONFIG_IWLAGN=m
+CONFIG_IWLAGN_LEDS=y
+CONFIG_IWLAGN_SPECTRUM_MEASUREMENT=y
+CONFIG_IWLCORE=m
+CONFIG_IWLWIFI=m
+# CONFIG_IWLWIFI_DEBUG is not set
+CONFIG_IWLWIFI_LEDS=y
+CONFIG_IWLWIFI_RFKILL=y
+CONFIG_IXGB=m
+CONFIG_IXGBE=m
+CONFIG_IXGBE_DCA=y
+CONFIG_IXGBE_DCB=y
+CONFIG_JBD=y
+CONFIG_JBD2=y
+# CONFIG_JBD2_DEBUG is not set
+# CONFIG_JBD_DEBUG is not set
+CONFIG_JFFS2_CMODE_FAVOURLZO=y
+# CONFIG_JFFS2_CMODE_NONE is not set
+# CONFIG_JFFS2_CMODE_PRIORITY is not set
+# CONFIG_JFFS2_CMODE_SIZE is not set
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+CONFIG_JFFS2_FS=m
+CONFIG_JFFS2_FS_DEBUG=0
+# CONFIG_JFFS2_FS_WBUF_VERIFY is not set
+CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_FS_XATTR is not set
+CONFIG_JFFS2_LZO=y
+CONFIG_JFFS2_RTIME=y
+# CONFIG_JFFS2_RUBIN is not set
+# CONFIG_JFFS2_SUMMARY is not set
+CONFIG_JFFS2_ZLIB=y
+# CONFIG_JFS_DEBUG is not set
+CONFIG_JFS_FS=m
+CONFIG_JFS_POSIX_ACL=y
+CONFIG_JFS_SECURITY=y
+CONFIG_JFS_STATISTICS=y
+CONFIG_JME=m
+CONFIG_JOLIET=y
+CONFIG_JOYSTICK_A3D=m
+CONFIG_JOYSTICK_ADI=m
+CONFIG_JOYSTICK_ANALOG=m
+CONFIG_JOYSTICK_COBRA=m
+CONFIG_JOYSTICK_DB9=m
+CONFIG_JOYSTICK_GAMECON=m
+CONFIG_JOYSTICK_GF2K=m
+CONFIG_JOYSTICK_GRIP=m
+CONFIG_JOYSTICK_GRIP_MP=m
+CONFIG_JOYSTICK_GUILLEMOT=m
+CONFIG_JOYSTICK_IFORCE=m
+CONFIG_JOYSTICK_IFORCE_232=y
+CONFIG_JOYSTICK_IFORCE_USB=y
+CONFIG_JOYSTICK_INTERACT=m
+CONFIG_JOYSTICK_JOYDUMP=m
+CONFIG_JOYSTICK_MAGELLAN=m
+CONFIG_JOYSTICK_SIDEWINDER=m
+CONFIG_JOYSTICK_SPACEBALL=m
+CONFIG_JOYSTICK_SPACEORB=m
+CONFIG_JOYSTICK_STINGER=m
+CONFIG_JOYSTICK_TMDC=m
+CONFIG_JOYSTICK_TURBOGRAFX=m
+CONFIG_JOYSTICK_TWIDJOY=m
+CONFIG_JOYSTICK_WALKERA0701=m
+CONFIG_JOYSTICK_WARRIOR=m
+CONFIG_JOYSTICK_XPAD=m
+CONFIG_JOYSTICK_XPAD_FF=y
+CONFIG_JOYSTICK_XPAD_LEDS=y
+CONFIG_JOYSTICK_ZHENHUA=m
+CONFIG_K8_NB=y
+CONFIG_KALLSYMS=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_KALLSYMS_EXTRA_PASS is not set
+CONFIG_KARMA_PARTITION=y
+# CONFIG_KERNEL_BZIP2 is not set
+CONFIG_KERNEL_GZIP=y
+# CONFIG_KERNEL_LZMA is not set
+CONFIG_KEXEC=y
+CONFIG_KEXEC_JUMP=y
+CONFIG_KEYBOARD_ATKBD=y
+CONFIG_KEYBOARD_GPIO=m
+CONFIG_KEYBOARD_LKKBD=m
+CONFIG_KEYBOARD_NEWTON=m
+CONFIG_KEYBOARD_STOWAWAY=m
+CONFIG_KEYBOARD_SUNKBD=m
+CONFIG_KEYBOARD_XTKBD=m
+CONFIG_KEYS=y
+# CONFIG_KEYS_DEBUG_PROC_KEYS is not set
+CONFIG_KGDB=y
+CONFIG_KGDB_SERIAL_CONSOLE=y
+# CONFIG_KGDB_TESTS is not set
+CONFIG_KINGSUN_DONGLE=m
+# CONFIG_KMEMCHECK is not set
+# CONFIG_KMEMTRACE is not set
+CONFIG_KPROBES=y
+# CONFIG_KPROBES_SANITY_TEST is not set
+CONFIG_KRETPROBES=y
+CONFIG_KS0108=m
+CONFIG_KS0108_DELAY=2
+CONFIG_KS0108_PORT=0x378
+CONFIG_KS959_DONGLE=m
+CONFIG_KSDAZZLE_DONGLE=m
+CONFIG_KTIME_SCALAR=y
+CONFIG_KVM=m
+CONFIG_KVM_AMD=m
+CONFIG_KVM_CLOCK=y
+CONFIG_KVM_GUEST=y
+CONFIG_KVM_INTEL=m
+# CONFIG_KVM_TRACE is not set
+CONFIG_LANCE=m
+CONFIG_LANMEDIA=m
+CONFIG_LAPB=m
+CONFIG_LAPBETHER=m
+CONFIG_LATENCYTOP=y
+CONFIG_LBD=y
+CONFIG_LCD_CLASS_DEVICE=m
+CONFIG_LCD_ILI9320=m
+CONFIG_LCD_LTV350QV=m
+CONFIG_LCD_PLATFORM=m
+CONFIG_LCD_TDO24M=m
+CONFIG_LCD_VGG2432A4=m
+# CONFIG_LDM_DEBUG is not set
+CONFIG_LDM_PARTITION=y
+CONFIG_LEDS_ALIX2=m
+CONFIG_LEDS_CLASS=m
+# CONFIG_LEDS_CLEVO_MAIL is not set
+CONFIG_LEDS_DA903X=m
+CONFIG_LEDS_GPIO=m
+CONFIG_LEDS_NET48XX=m
+CONFIG_LEDS_PCA9532=m
+CONFIG_LEDS_PCA955X=m
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_BACKLIGHT=m
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=m
+CONFIG_LEDS_TRIGGER_HEARTBEAT=m
+CONFIG_LEDS_TRIGGER_TIMER=m
+CONFIG_LEDS_WRAP=m
+CONFIG_LEGACY_PTYS=y
+CONFIG_LEGACY_PTY_COUNT=0
+# CONFIG_LGUEST is not set
+# CONFIG_LGUEST_GUEST is not set
+CONFIG_LIB80211=m
+CONFIG_LIB80211_CRYPT_CCMP=m
+CONFIG_LIB80211_CRYPT_TKIP=m
+CONFIG_LIB80211_CRYPT_WEP=m
+# CONFIG_LIB80211_DEBUG is not set
+CONFIG_LIBCRC32C=m
+CONFIG_LIBERTAS=m
+CONFIG_LIBERTAS_CS=m
+# CONFIG_LIBERTAS_DEBUG is not set
+CONFIG_LIBERTAS_SDIO=m
+CONFIG_LIBERTAS_THINFIRM=m
+CONFIG_LIBERTAS_THINFIRM_USB=m
+CONFIG_LIBERTAS_USB=m
+CONFIG_LIBFC=m
+CONFIG_LIBIPW=m
+CONFIG_LIBIPW_DEBUG=y
+CONFIG_LITELINK_DONGLE=m
+# CONFIG_LKDTM is not set
+CONFIG_LLC=y
+CONFIG_LLC2=m
+CONFIG_LNE390=m
+CONFIG_LOCALVERSION=""
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_LOCKD=m
+CONFIG_LOCKDEP_SUPPORT=y
+CONFIG_LOCKD_V4=y
+CONFIG_LOCK_KERNEL=y
+# CONFIG_LOCK_STAT is not set
+CONFIG_LOGIRUMBLEPAD2_FF=y
+CONFIG_LOGITECH_FF=y
+# CONFIG_LOGO is not set
+CONFIG_LOG_BUF_SHIFT=17
+CONFIG_LP486E=m
+# CONFIG_LP_CONSOLE is not set
+CONFIG_LSI_ET1011C_PHY=m
+CONFIG_LTPC=m
+CONFIG_LXT_PHY=m
+CONFIG_LZO_COMPRESS=m
+CONFIG_LZO_DECOMPRESS=m
+CONFIG_M25PXX_USE_FAST_READ=y
+# CONFIG_M386 is not set
+# CONFIG_M486 is not set
+CONFIG_M586=y
+# CONFIG_M586MMX is not set
+# CONFIG_M586TSC is not set
+# CONFIG_M686 is not set
+CONFIG_MA600_DONGLE=m
+CONFIG_MAC80211=m
+CONFIG_MAC80211_DEBUGFS=y
+# CONFIG_MAC80211_DEBUG_MENU is not set
+# CONFIG_MAC80211_HWSIM is not set
+CONFIG_MAC80211_LEDS=y
+CONFIG_MAC80211_MESH=y
+CONFIG_MAC80211_RC_DEFAULT="minstrel"
+CONFIG_MAC80211_RC_DEFAULT_MINSTREL=y
+# CONFIG_MAC80211_RC_DEFAULT_PID is not set
+CONFIG_MAC80211_RC_MINSTREL=y
+CONFIG_MACHZ_WDT=m
+CONFIG_MACINTOSH_DRIVERS=y
+CONFIG_MACVLAN=m
+CONFIG_MAC_EMUMOUSEBTN=y
+CONFIG_MAC_PARTITION=y
+CONFIG_MADGEMC=m
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MARKERS=y
+CONFIG_MARVELL_PHY=m
+# CONFIG_MATH_EMULATION is not set
+CONFIG_MAX_RAW_DEVS=256
+CONFIG_MCA=y
+CONFIG_MCA_LEGACY=y
+# CONFIG_MCA_PROC_FS is not set
+# CONFIG_MCORE2 is not set
+CONFIG_MCP2120_DONGLE=m
+# CONFIG_MCRUSOE is not set
+CONFIG_MCS_FIR=m
+# CONFIG_MCYRIXIII is not set
+CONFIG_MD=y
+CONFIG_MDA_CONSOLE=m
+CONFIG_MDIO_BITBANG=m
+CONFIG_MDIO_GPIO=m
+CONFIG_MD_AUTODETECT=y
+CONFIG_MD_FAULTY=m
+CONFIG_MD_LINEAR=m
+CONFIG_MD_MULTIPATH=m
+CONFIG_MD_RAID0=m
+CONFIG_MD_RAID1=m
+CONFIG_MD_RAID10=m
+CONFIG_MD_RAID456=m
+CONFIG_MD_RAID5_RESHAPE=y
+CONFIG_ME0600=m
+CONFIG_ME0900=m
+CONFIG_ME1000=m
+CONFIG_ME1400=m
+CONFIG_ME1600=m
+CONFIG_ME4000=m
+CONFIG_ME4600=m
+CONFIG_ME6000=m
+CONFIG_ME8100=m
+CONFIG_ME8200=m
+CONFIG_MEDIA_ATTACH=y
+CONFIG_MEDIA_TUNER=m
+# CONFIG_MEDIA_TUNER_CUSTOMIZE is not set
+CONFIG_MEDIA_TUNER_MT2060=m
+CONFIG_MEDIA_TUNER_MT20XX=m
+CONFIG_MEDIA_TUNER_MT2131=m
+CONFIG_MEDIA_TUNER_MT2266=m
+CONFIG_MEDIA_TUNER_MXL5005S=m
+CONFIG_MEDIA_TUNER_MXL5007T=m
+CONFIG_MEDIA_TUNER_QT1010=m
+CONFIG_MEDIA_TUNER_SIMPLE=m
+CONFIG_MEDIA_TUNER_TDA18271=m
+CONFIG_MEDIA_TUNER_TDA827X=m
+CONFIG_MEDIA_TUNER_TDA8290=m
+CONFIG_MEDIA_TUNER_TDA9887=m
+CONFIG_MEDIA_TUNER_TEA5761=m
+CONFIG_MEDIA_TUNER_TEA5767=m
+CONFIG_MEDIA_TUNER_XC2028=m
+CONFIG_MEDIA_TUNER_XC5000=m
+CONFIG_MEDUMMY=m
+# CONFIG_MEFFICEON is not set
+CONFIG_MEGARAID_LEGACY=m
+CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_MM=m
+CONFIG_MEGARAID_NEWGEN=y
+CONFIG_MEGARAID_SAS=m
+CONFIG_MEILHAUS=m
+# CONFIG_MEMSTICK is not set
+# CONFIG_MEMTEST is not set
+CONFIG_MFD_CORE=m
+CONFIG_MFD_PCF50633=m
+CONFIG_MFD_SM501=m
+# CONFIG_MFD_SM501_GPIO is not set
+# CONFIG_MFD_TMIO is not set
+CONFIG_MFD_WM8400=m
+# CONFIG_MGEODEGX1 is not set
+# CONFIG_MGEODE_LX is not set
+CONFIG_MICROCODE=m
+CONFIG_MICROCODE_AMD=y
+CONFIG_MICROCODE_INTEL=y
+CONFIG_MICROCODE_OLD_INTERFACE=y
+CONFIG_MII=m
+CONFIG_MINIX_FS=m
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_MISC_DEVICES=y
+CONFIG_MISC_FILESYSTEMS=y
+CONFIG_MIXCOMWD=m
+# CONFIG_MK6 is not set
+# CONFIG_MK7 is not set
+# CONFIG_MK8 is not set
+CONFIG_MKISS=m
+CONFIG_MLX4_CORE=m
+CONFIG_MLX4_DEBUG=y
+CONFIG_MLX4_EN=m
+CONFIG_MMC=y
+CONFIG_MMC_BLOCK=m
+CONFIG_MMC_BLOCK_BOUNCE=y
+# CONFIG_MMC_DEBUG is not set
+CONFIG_MMC_RICOH_MMC=m
+CONFIG_MMC_SDHCI=m
+CONFIG_MMC_SDHCI_PCI=m
+CONFIG_MMC_SDRICOH_CS=m
+# CONFIG_MMC_TEST is not set
+CONFIG_MMC_TIFM_SD=m
+# CONFIG_MMC_UNSAFE_RESUME is not set
+CONFIG_MMC_WBSD=m
+# CONFIG_MMIOTRACE is not set
+CONFIG_MMU=y
+CONFIG_MMU_NOTIFIER=y
+CONFIG_MM_OWNER=y
+CONFIG_MODULES=y
+# CONFIG_MODULE_FORCE_LOAD is not set
+# CONFIG_MODULE_FORCE_UNLOAD is not set
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODVERSIONS=y
+CONFIG_MOUSE_APPLETOUCH=m
+# CONFIG_MOUSE_ATIXL is not set
+CONFIG_MOUSE_BCM5974=m
+CONFIG_MOUSE_GPIO=m
+CONFIG_MOUSE_INPORT=m
+CONFIG_MOUSE_LOGIBM=m
+CONFIG_MOUSE_PC110PAD=m
+CONFIG_MOUSE_PS2=m
+CONFIG_MOUSE_PS2_ALPS=y
+CONFIG_MOUSE_PS2_ELANTECH=y
+CONFIG_MOUSE_PS2_LIFEBOOK=y
+CONFIG_MOUSE_PS2_LOGIPS2PP=y
+CONFIG_MOUSE_PS2_OLPC=y
+CONFIG_MOUSE_PS2_SYNAPTICS=y
+# CONFIG_MOUSE_PS2_TOUCHKIT is not set
+CONFIG_MOUSE_PS2_TRACKPOINT=y
+CONFIG_MOUSE_SERIAL=m
+CONFIG_MOUSE_VSXXXAA=m
+CONFIG_MOXA_INTELLIO=m
+CONFIG_MOXA_SMARTIO=m
+# CONFIG_MPENTIUM4 is not set
+# CONFIG_MPENTIUMII is not set
+# CONFIG_MPENTIUMIII is not set
+# CONFIG_MPENTIUMM is not set
+# CONFIG_MPSC is not set
+CONFIG_MSDOS_FS=m
+CONFIG_MSDOS_PARTITION=y
+CONFIG_MSI_LAPTOP=m
+CONFIG_MSNDCLAS_INIT_FILE="/etc/sound/msndinit.bin"
+CONFIG_MSNDCLAS_PERM_FILE="/etc/sound/msndperm.bin"
+CONFIG_MSNDPIN_INIT_FILE="/etc/sound/pndspini.bin"
+CONFIG_MSNDPIN_PERM_FILE="/etc/sound/pndsperm.bin"
+CONFIG_MT9M001_PCA9536_SWITCH=y
+CONFIG_MT9V022_PCA9536_SWITCH=y
+CONFIG_MTD=m
+CONFIG_MTDRAM_ERASE_SIZE=128
+CONFIG_MTDRAM_TOTAL_SIZE=4096
+CONFIG_MTD_ABSENT=m
+CONFIG_MTD_ALAUDA=m
+CONFIG_MTD_AMD76XROM=m
+CONFIG_MTD_AR7_PARTS=m
+CONFIG_MTD_BLKDEVS=m
+CONFIG_MTD_BLOCK=m
+CONFIG_MTD_BLOCK2MTD=m
+CONFIG_MTD_BLOCK_RO=m
+CONFIG_MTD_CFI=m
+# CONFIG_MTD_CFI_ADV_OPTIONS is not set
+CONFIG_MTD_CFI_AMDSTD=m
+CONFIG_MTD_CFI_I1=y
+CONFIG_MTD_CFI_I2=y
+# CONFIG_MTD_CFI_I4 is not set
+# CONFIG_MTD_CFI_I8 is not set
+CONFIG_MTD_CFI_INTELEXT=m
+CONFIG_MTD_CFI_STAA=m
+CONFIG_MTD_CFI_UTIL=m
+CONFIG_MTD_CHAR=m
+CONFIG_MTD_CK804XROM=m
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_CONCAT=m
+CONFIG_MTD_DATAFLASH=m
+CONFIG_MTD_DATAFLASH_OTP=y
+# CONFIG_MTD_DATAFLASH_WRITE_VERIFY is not set
+# CONFIG_MTD_DEBUG is not set
+CONFIG_MTD_DILNETPC=m
+CONFIG_MTD_DILNETPC_BOOTSIZE=0x80000
+CONFIG_MTD_DOC2000=m
+CONFIG_MTD_DOC2001=m
+CONFIG_MTD_DOC2001PLUS=m
+CONFIG_MTD_DOCECC=m
+CONFIG_MTD_DOCPROBE=m
+CONFIG_MTD_DOCPROBE_ADDRESS=0
+# CONFIG_MTD_DOCPROBE_ADVANCED is not set
+CONFIG_MTD_ESB2ROM=m
+CONFIG_MTD_GEN_PROBE=m
+CONFIG_MTD_ICHXROM=m
+CONFIG_MTD_INTEL_VR_NOR=m
+CONFIG_MTD_JEDECPROBE=m
+CONFIG_MTD_L440GX=m
+CONFIG_MTD_LPDDR=m
+CONFIG_MTD_M25P80=m
+CONFIG_MTD_MAP_BANK_WIDTH_1=y
+# CONFIG_MTD_MAP_BANK_WIDTH_16 is not set
+CONFIG_MTD_MAP_BANK_WIDTH_2=y
+# CONFIG_MTD_MAP_BANK_WIDTH_32 is not set
+CONFIG_MTD_MAP_BANK_WIDTH_4=y
+# CONFIG_MTD_MAP_BANK_WIDTH_8 is not set
+CONFIG_MTD_MTDRAM=m
+CONFIG_MTD_NAND=m
+CONFIG_MTD_NAND_CAFE=m
+CONFIG_MTD_NAND_CS553X=m
+CONFIG_MTD_NAND_DISKONCHIP=m
+# CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE is not set
+CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADDRESS=0
+# CONFIG_MTD_NAND_DISKONCHIP_PROBE_ADVANCED is not set
+# CONFIG_MTD_NAND_ECC_SMC is not set
+CONFIG_MTD_NAND_IDS=m
+# CONFIG_MTD_NAND_MUSEUM_IDS is not set
+CONFIG_MTD_NAND_NANDSIM=m
+CONFIG_MTD_NAND_PLATFORM=m
+# CONFIG_MTD_NAND_VERIFY_WRITE is not set
+CONFIG_MTD_NETSC520=m
+CONFIG_MTD_NETtel=m
+CONFIG_MTD_ONENAND=m
+CONFIG_MTD_ONENAND_2X_PROGRAM=y
+# CONFIG_MTD_ONENAND_OTP is not set
+CONFIG_MTD_ONENAND_SIM=m
+CONFIG_MTD_ONENAND_VERIFY_WRITE=y
+CONFIG_MTD_OOPS=m
+CONFIG_MTD_PARTITIONS=y
+CONFIG_MTD_PCI=m
+CONFIG_MTD_PHRAM=m
+CONFIG_MTD_PHYSMAP=m
+# CONFIG_MTD_PHYSMAP_COMPAT is not set
+CONFIG_MTD_PLATRAM=m
+CONFIG_MTD_PMC551=m
+# CONFIG_MTD_PMC551_BUGFIX is not set
+# CONFIG_MTD_PMC551_DEBUG is not set
+CONFIG_MTD_QINFO_PROBE=m
+CONFIG_MTD_RAM=m
+CONFIG_MTD_REDBOOT_DIRECTORY_BLOCK=-1
+CONFIG_MTD_REDBOOT_PARTS=m
+# CONFIG_MTD_REDBOOT_PARTS_READONLY is not set
+# CONFIG_MTD_REDBOOT_PARTS_UNALLOCATED is not set
+CONFIG_MTD_ROM=m
+CONFIG_MTD_SBC_GXX=m
+CONFIG_MTD_SC520CDP=m
+CONFIG_MTD_SCB2_FLASH=m
+CONFIG_MTD_SCx200_DOCFLASH=m
+CONFIG_MTD_SLRAM=m
+CONFIG_MTD_TESTS=m
+CONFIG_MTD_TS5500=m
+CONFIG_MTD_UBI=m
+CONFIG_MTD_UBI_BEB_RESERVE=1
+# CONFIG_MTD_UBI_DEBUG is not set
+CONFIG_MTD_UBI_GLUEBI=y
+CONFIG_MTD_UBI_WL_THRESHOLD=4096
+CONFIG_MTRR=y
+CONFIG_MTRR_SANITIZER=y
+CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT=0
+CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT=1
+# CONFIG_MVIAC3_2 is not set
+# CONFIG_MVIAC7 is not set
+CONFIG_MWAVE=m
+# CONFIG_MWINCHIP3D is not set
+# CONFIG_MWINCHIPC6 is not set
+CONFIG_MYRI10GE=m
+CONFIG_MYRI10GE_DCA=y
+CONFIG_N2=m
+CONFIG_NAMESPACES=y
+CONFIG_NATIONAL_PHY=m
+CONFIG_NATSEMI=m
+CONFIG_NCPFS_EXTRAS=y
+CONFIG_NCPFS_IOCTL_LOCKING=y
+CONFIG_NCPFS_NFS_NS=y
+CONFIG_NCPFS_NLS=y
+CONFIG_NCPFS_OS2_NS=y
+CONFIG_NCPFS_PACKET_SIGNING=y
+# CONFIG_NCPFS_SMALLDOS is not set
+CONFIG_NCPFS_STRONG=y
+CONFIG_NCP_FS=m
+CONFIG_NE2000=m
+CONFIG_NE2K_PCI=m
+CONFIG_NE2_MCA=m
+CONFIG_NE3210=m
+CONFIG_NET=y
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETDEVICES=y
+CONFIG_NETDEV_1000=y
+CONFIG_NETDEV_10000=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+# CONFIG_NETFILTER_DEBUG is not set
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_NETLINK_LOG=m
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_TPROXY=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
+CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_DCCP=m
+CONFIG_NETFILTER_XT_MATCH_DSCP=m
+CONFIG_NETFILTER_XT_MATCH_ESP=m
+CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
+CONFIG_NETFILTER_XT_MATCH_HELPER=m
+CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MARK=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_OWNER=m
+CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
+CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_MATCH_QUOTA=m
+CONFIG_NETFILTER_XT_MATCH_RATEEST=m
+CONFIG_NETFILTER_XT_MATCH_REALM=m
+CONFIG_NETFILTER_XT_MATCH_RECENT=m
+# CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT is not set
+CONFIG_NETFILTER_XT_MATCH_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_SOCKET=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_TIME=m
+CONFIG_NETFILTER_XT_MATCH_U32=m
+CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
+CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
+CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
+CONFIG_NETFILTER_XT_TARGET_DSCP=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NETFILTER_XT_TARGET_NFLOG=m
+CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
+CONFIG_NETFILTER_XT_TARGET_NOTRACK=m
+CONFIG_NETFILTER_XT_TARGET_RATEEST=m
+CONFIG_NETFILTER_XT_TARGET_SECMARK=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+# CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP is not set
+CONFIG_NETFILTER_XT_TARGET_TPROXY=m
+CONFIG_NETFILTER_XT_TARGET_TRACE=m
+CONFIG_NETLABEL=y
+CONFIG_NETPOLL=y
+# CONFIG_NETPOLL_TRAP is not set
+CONFIG_NETROM=m
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_NETWORK_SECMARK=y
+CONFIG_NET_ACT_GACT=m
+CONFIG_NET_ACT_IPT=m
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_NAT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_SIMP=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_CLS=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_CGROUP=y
+# CONFIG_NET_CLS_FLOW is not set
+CONFIG_NET_CLS_FW=m
+# CONFIG_NET_CLS_IND is not set
+CONFIG_NET_CLS_ROUTE=y
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_RSVP6=m
+CONFIG_NET_CLS_TCINDEX=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_DCCPPROBE=m
+CONFIG_NET_DMA=y
+CONFIG_NET_DSA=y
+CONFIG_NET_DSA_MV88E6060=y
+CONFIG_NET_DSA_MV88E6123_61_65=y
+CONFIG_NET_DSA_MV88E6131=y
+CONFIG_NET_DSA_MV88E6XXX=y
+CONFIG_NET_DSA_MV88E6XXX_NEED_PPU=y
+CONFIG_NET_DSA_TAG_DSA=y
+CONFIG_NET_DSA_TAG_EDSA=y
+CONFIG_NET_DSA_TAG_TRAILER=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_CMP=m
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_EMATCH_NBYTE=m
+CONFIG_NET_EMATCH_STACK=32
+CONFIG_NET_EMATCH_TEXT=m
+CONFIG_NET_EMATCH_U32=m
+CONFIG_NET_ETHERNET=y
+CONFIG_NET_FC=y
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_NET_IPIP=m
+CONFIG_NET_ISA=y
+CONFIG_NET_KEY=m
+# CONFIG_NET_KEY_MIGRATE is not set
+CONFIG_NET_NS=y
+CONFIG_NET_PCI=y
+CONFIG_NET_PCMCIA=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_POCKET=y
+CONFIG_NET_POLL_CONTROLLER=y
+CONFIG_NET_SB1000=m
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_DSMARK=m
+CONFIG_NET_SCH_FIFO=y
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_TEQL=m
+CONFIG_NET_TCPPROBE=m
+CONFIG_NET_TULIP=y
+CONFIG_NET_VENDOR_3COM=y
+CONFIG_NET_VENDOR_RACAL=y
+CONFIG_NET_VENDOR_SMC=y
+CONFIG_NEW_LEDS=y
+CONFIG_NFSD=m
+CONFIG_NFSD_V2_ACL=y
+CONFIG_NFSD_V3=y
+CONFIG_NFSD_V3_ACL=y
+CONFIG_NFSD_V4=y
+CONFIG_NFS_ACL_SUPPORT=m
+CONFIG_NFS_COMMON=y
+CONFIG_NFS_FS=m
+CONFIG_NFS_V3=y
+CONFIG_NFS_V3_ACL=y
+CONFIG_NFS_V4=y
+CONFIG_NFTL=m
+CONFIG_NFTL_RW=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_AMANDA=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_H323=m
+CONFIG_NF_CONNTRACK_IPV4=m
+CONFIG_NF_CONNTRACK_IPV6=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_NETBIOS_NS=m
+CONFIG_NF_CONNTRACK_PPTP=m
+CONFIG_NF_CONNTRACK_PROC_COMPAT=y
+# CONFIG_NF_CONNTRACK_SANE is not set
+CONFIG_NF_CONNTRACK_SECMARK=y
+CONFIG_NF_CONNTRACK_SIP=m
+CONFIG_NF_CONNTRACK_TFTP=m
+CONFIG_NF_CT_ACCT=y
+CONFIG_NF_CT_NETLINK=m
+# CONFIG_NF_CT_PROTO_DCCP is not set
+CONFIG_NF_CT_PROTO_GRE=m
+CONFIG_NF_CT_PROTO_SCTP=m
+CONFIG_NF_CT_PROTO_UDPLITE=m
+CONFIG_NF_DEFRAG_IPV4=m
+CONFIG_NF_NAT=m
+CONFIG_NF_NAT_AMANDA=m
+CONFIG_NF_NAT_FTP=m
+CONFIG_NF_NAT_H323=m
+CONFIG_NF_NAT_IRC=m
+CONFIG_NF_NAT_NEEDED=y
+CONFIG_NF_NAT_PPTP=m
+CONFIG_NF_NAT_PROTO_GRE=m
+CONFIG_NF_NAT_PROTO_SCTP=m
+CONFIG_NF_NAT_PROTO_UDPLITE=m
+CONFIG_NF_NAT_SIP=m
+CONFIG_NF_NAT_SNMP_BASIC=m
+CONFIG_NF_NAT_TFTP=m
+CONFIG_NI52=m
+CONFIG_NI65=m
+CONFIG_NIU=m
+CONFIG_NL80211=y
+CONFIG_NLS=y
+CONFIG_NLS_ASCII=m
+CONFIG_NLS_CODEPAGE_1250=m
+CONFIG_NLS_CODEPAGE_1251=m
+CONFIG_NLS_CODEPAGE_437=m
+CONFIG_NLS_CODEPAGE_737=m
+CONFIG_NLS_CODEPAGE_775=m
+CONFIG_NLS_CODEPAGE_850=m
+CONFIG_NLS_CODEPAGE_852=m
+CONFIG_NLS_CODEPAGE_855=m
+CONFIG_NLS_CODEPAGE_857=m
+CONFIG_NLS_CODEPAGE_860=m
+CONFIG_NLS_CODEPAGE_861=m
+CONFIG_NLS_CODEPAGE_862=m
+CONFIG_NLS_CODEPAGE_863=m
+CONFIG_NLS_CODEPAGE_864=m
+CONFIG_NLS_CODEPAGE_865=m
+CONFIG_NLS_CODEPAGE_866=m
+CONFIG_NLS_CODEPAGE_869=m
+CONFIG_NLS_CODEPAGE_874=m
+CONFIG_NLS_CODEPAGE_932=m
+CONFIG_NLS_CODEPAGE_936=m
+CONFIG_NLS_CODEPAGE_949=m
+CONFIG_NLS_CODEPAGE_950=m
+CONFIG_NLS_DEFAULT="cp437"
+CONFIG_NLS_ISO8859_1=m
+CONFIG_NLS_ISO8859_13=m
+CONFIG_NLS_ISO8859_14=m
+CONFIG_NLS_ISO8859_15=m
+CONFIG_NLS_ISO8859_2=m
+CONFIG_NLS_ISO8859_3=m
+CONFIG_NLS_ISO8859_4=m
+CONFIG_NLS_ISO8859_5=m
+CONFIG_NLS_ISO8859_6=m
+CONFIG_NLS_ISO8859_7=m
+CONFIG_NLS_ISO8859_8=m
+CONFIG_NLS_ISO8859_9=m
+CONFIG_NLS_KOI8_R=m
+CONFIG_NLS_KOI8_U=m
+CONFIG_NLS_UTF8=m
+# CONFIG_NOHIGHMEM is not set
+CONFIG_NOP_TRACER=y
+CONFIG_NORTEL_HERMES=m
+CONFIG_NOZOMI=m
+CONFIG_NO_HZ=y
+CONFIG_NR_CPUS=8
+CONFIG_NS83820=m
+CONFIG_NSC_FIR=m
+CONFIG_NSC_GPIO=m
+# CONFIG_NTFS_DEBUG is not set
+CONFIG_NTFS_FS=m
+# CONFIG_NTFS_RW is not set
+CONFIG_NVRAM=m
+CONFIG_N_HDLC=m
+# CONFIG_OCFS2_DEBUG_FS is not set
+CONFIG_OCFS2_DEBUG_MASKLOG=y
+CONFIG_OCFS2_FS=m
+CONFIG_OCFS2_FS_O2CB=m
+CONFIG_OCFS2_FS_POSIX_ACL=y
+CONFIG_OCFS2_FS_STATS=y
+CONFIG_OCFS2_FS_USERSPACE_CLUSTER=m
+CONFIG_OLD_BELKIN_DONGLE=m
+CONFIG_OLPC=y
+CONFIG_OMFS_FS=m
+CONFIG_OPROFILE=m
+CONFIG_OPROFILE_IBS=y
+CONFIG_OPTIMIZE_INLINING=y
+CONFIG_OSF_PARTITION=y
+# CONFIG_OTUS is not set
+CONFIG_P54_COMMON=m
+CONFIG_P54_PCI=m
+CONFIG_P54_USB=m
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+CONFIG_PAGEFLAGS_EXTENDED=y
+CONFIG_PAGE_OFFSET=0xC0000000
+CONFIG_PANASONIC_LAPTOP=m
+CONFIG_PANEL=m
+# CONFIG_PANEL_CHANGE_MESSAGE is not set
+CONFIG_PANEL_PARPORT=0
+CONFIG_PANEL_PROFILE=5
+CONFIG_PANTHERLORD_FF=y
+CONFIG_PARAVIRT=y
+CONFIG_PARAVIRT_CLOCK=y
+# CONFIG_PARAVIRT_DEBUG is not set
+CONFIG_PARAVIRT_GUEST=y
+CONFIG_PARIDE=m
+CONFIG_PARIDE_ATEN=m
+CONFIG_PARIDE_BPCK=m
+CONFIG_PARIDE_BPCK6=m
+CONFIG_PARIDE_COMM=m
+CONFIG_PARIDE_DSTR=m
+CONFIG_PARIDE_EPAT=m
+# CONFIG_PARIDE_EPATC8 is not set
+CONFIG_PARIDE_EPIA=m
+CONFIG_PARIDE_FIT2=m
+CONFIG_PARIDE_FIT3=m
+CONFIG_PARIDE_FRIQ=m
+CONFIG_PARIDE_FRPW=m
+CONFIG_PARIDE_KBIC=m
+CONFIG_PARIDE_KTTI=m
+CONFIG_PARIDE_ON20=m
+CONFIG_PARIDE_ON26=m
+CONFIG_PARIDE_PCD=m
+CONFIG_PARIDE_PD=m
+CONFIG_PARIDE_PF=m
+CONFIG_PARIDE_PG=m
+CONFIG_PARIDE_PT=m
+CONFIG_PARPORT=m
+CONFIG_PARPORT_1284=y
+CONFIG_PARPORT_AX88796=m
+# CONFIG_PARPORT_GSC is not set
+CONFIG_PARPORT_NOT_PC=y
+CONFIG_PARPORT_PC=m
+CONFIG_PARPORT_PC_FIFO=y
+CONFIG_PARPORT_PC_PCMCIA=m
+# CONFIG_PARPORT_PC_SUPERIO is not set
+CONFIG_PARPORT_SERIAL=m
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PATA_ACPI=y
+CONFIG_PATA_ALI=y
+CONFIG_PATA_AMD=y
+CONFIG_PATA_ARTOP=y
+CONFIG_PATA_ATIIXP=y
+CONFIG_PATA_CMD640_PCI=y
+CONFIG_PATA_CMD64X=y
+CONFIG_PATA_CS5520=y
+CONFIG_PATA_CS5530=y
+CONFIG_PATA_CS5535=m
+CONFIG_PATA_CS5536=y
+CONFIG_PATA_CYPRESS=m
+CONFIG_PATA_EFAR=y
+CONFIG_PATA_HPT366=y
+CONFIG_PATA_HPT37X=m
+CONFIG_PATA_HPT3X2N=y
+CONFIG_PATA_HPT3X3=y
+# CONFIG_PATA_HPT3X3_DMA is not set
+CONFIG_PATA_ISAPNP=m
+CONFIG_PATA_IT8213=m
+CONFIG_PATA_IT821X=y
+CONFIG_PATA_JMICRON=y
+CONFIG_PATA_LEGACY=m
+CONFIG_PATA_MARVELL=y
+CONFIG_PATA_MPIIX=y
+CONFIG_PATA_NETCELL=y
+CONFIG_PATA_NINJA32=m
+CONFIG_PATA_NS87410=y
+CONFIG_PATA_NS87415=y
+CONFIG_PATA_OLDPIIX=m
+CONFIG_PATA_OPTI=m
+CONFIG_PATA_OPTIDMA=m
+CONFIG_PATA_PCMCIA=m
+CONFIG_PATA_PDC2027X=y
+CONFIG_PATA_PDC_OLD=y
+CONFIG_PATA_QDI=y
+CONFIG_PATA_RADISYS=m
+CONFIG_PATA_RZ1000=y
+CONFIG_PATA_SC1200=y
+CONFIG_PATA_SCH=y
+CONFIG_PATA_SERVERWORKS=y
+CONFIG_PATA_SIL680=y
+CONFIG_PATA_SIS=y
+CONFIG_PATA_TRIFLEX=y
+CONFIG_PATA_VIA=y
+CONFIG_PATA_WINBOND=y
+CONFIG_PATA_WINBOND_VLB=m
+# CONFIG_PC300TOO is not set
+CONFIG_PC8736x_GPIO=m
+CONFIG_PC87413_WDT=m
+CONFIG_PCCARD=m
+CONFIG_PCCARD_NONSTATIC=m
+CONFIG_PCF50633_ADC=m
+CONFIG_PCF50633_GPIO=m
+CONFIG_PCI=y
+CONFIG_PCI200SYN=m
+CONFIG_PCIEAER=y
+# CONFIG_PCIEASPM is not set
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCIPCWATCHDOG=m
+CONFIG_PCI_ATMEL=m
+CONFIG_PCI_BIOS=y
+# CONFIG_PCI_DEBUG is not set
+CONFIG_PCI_DIRECT=y
+CONFIG_PCI_DOMAINS=y
+CONFIG_PCI_GOANY=y
+# CONFIG_PCI_GOBIOS is not set
+# CONFIG_PCI_GODIRECT is not set
+# CONFIG_PCI_GOMMCONFIG is not set
+# CONFIG_PCI_GOOLPC is not set
+CONFIG_PCI_HERMES=m
+CONFIG_PCI_LEGACY=y
+CONFIG_PCI_MMCONFIG=y
+CONFIG_PCI_MSI=y
+CONFIG_PCI_OLPC=y
+CONFIG_PCI_QUIRKS=y
+CONFIG_PCI_STUB=m
+CONFIG_PCMCIA=m
+CONFIG_PCMCIA_3C574=m
+CONFIG_PCMCIA_3C589=m
+CONFIG_PCMCIA_AHA152X=m
+CONFIG_PCMCIA_ATMEL=m
+CONFIG_PCMCIA_AXNET=m
+# CONFIG_PCMCIA_DEBUG is not set
+CONFIG_PCMCIA_FDOMAIN=m
+CONFIG_PCMCIA_FMVJ18X=m
+CONFIG_PCMCIA_HERMES=m
+CONFIG_PCMCIA_IBMTR=m
+CONFIG_PCMCIA_IOCTL=y
+CONFIG_PCMCIA_LOAD_CIS=y
+CONFIG_PCMCIA_NETWAVE=m
+CONFIG_PCMCIA_NINJA_SCSI=m
+CONFIG_PCMCIA_NMCLAN=m
+CONFIG_PCMCIA_PCNET=m
+CONFIG_PCMCIA_PROBE=y
+CONFIG_PCMCIA_QLOGIC=m
+CONFIG_PCMCIA_RAYCS=m
+CONFIG_PCMCIA_SMC91C92=m
+CONFIG_PCMCIA_SPECTRUM=m
+CONFIG_PCMCIA_SYM53C500=m
+CONFIG_PCMCIA_WAVELAN=m
+CONFIG_PCMCIA_WL3501=m
+CONFIG_PCMCIA_XIRC2PS=m
+CONFIG_PCMCIA_XIRCOM=m
+CONFIG_PCNET32=m
+CONFIG_PCSPKR_PLATFORM=y
+CONFIG_PCWATCHDOG=m
+CONFIG_PD6729=m
+CONFIG_PDA_POWER=m
+CONFIG_PDC_ADMA=y
+CONFIG_PERF_COUNTERS=y
+CONFIG_PHANTOM=m
+CONFIG_PHONE=m
+CONFIG_PHONET=m
+CONFIG_PHYLIB=y
+CONFIG_PHYSICAL_ALIGN=0x100000
+CONFIG_PHYSICAL_START=0x100000
+# CONFIG_PHYS_ADDR_T_64BIT is not set
+CONFIG_PID_NS=y
+CONFIG_PLIP=m
+CONFIG_PLX_HERMES=m
+CONFIG_PM=y
+CONFIG_PMIC_DA903X=y
+CONFIG_PM_DEBUG=y
+CONFIG_PM_SLEEP=y
+CONFIG_PM_SLEEP_SMP=y
+CONFIG_PM_STD_PARTITION=""
+CONFIG_PM_TEST_SUSPEND=y
+CONFIG_PM_TRACE=y
+CONFIG_PM_TRACE_RTC=y
+# CONFIG_PM_VERBOSE is not set
+CONFIG_PNP=y
+CONFIG_PNPACPI=y
+CONFIG_PNPBIOS=y
+CONFIG_PNPBIOS_PROC_FS=y
+CONFIG_PNP_DEBUG_MESSAGES=y
+CONFIG_POCH=m
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POWER_SUPPLY=y
+# CONFIG_POWER_SUPPLY_DEBUG is not set
+# CONFIG_POWER_TRACER is not set
+CONFIG_PPDEV=m
+CONFIG_PPP=y
+CONFIG_PPPOATM=m
+CONFIG_PPPOE=m
+CONFIG_PPPOL2TP=m
+CONFIG_PPP_ASYNC=m
+CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_DEFLATE=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MPPE=m
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPP_SYNC_TTY=m
+CONFIG_PREEMPT=y
+# CONFIG_PREEMPT_DESKTOP is not set
+CONFIG_PREEMPT_HARDIRQS=y
+# CONFIG_PREEMPT_NONE is not set
+CONFIG_PREEMPT_NOTIFIERS=y
+CONFIG_PREEMPT_RCU=y
+# CONFIG_PREEMPT_RCU_TRACE is not set
+CONFIG_PREEMPT_RT=y
+CONFIG_PREEMPT_SOFTIRQS=y
+# CONFIG_PREEMPT_TRACER is not set
+# CONFIG_PREEMPT_VOLUNTARY is not set
+CONFIG_PREVENT_FIRMWARE_BUILD=y
+CONFIG_PRINTER=m
+CONFIG_PRINTK=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PRINT_QUOTA_WARNING=y
+CONFIG_PRISM2_USB=m
+CONFIG_PRISM54=m
+CONFIG_PROC_EVENTS=y
+CONFIG_PROC_FS=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROC_PAGE_MONITOR=y
+CONFIG_PROC_PID_CPUSET=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_PROC_VMCORE=y
+CONFIG_PROFILE_NMI=y
+CONFIG_PROFILING=y
+CONFIG_PROTEON=m
+# CONFIG_PROVE_LOCKING is not set
+# CONFIG_PROVIDE_OHCI1394_DMA_INIT is not set
+# CONFIG_PSS_HAVE_BOOT is not set
+CONFIG_PSS_MIXER=y
+CONFIG_QFMT_V1=m
+CONFIG_QFMT_V2=m
+CONFIG_QLA3XXX=m
+CONFIG_QLGE=m
+CONFIG_QNX4FS_FS=m
+CONFIG_QSEMI_PHY=m
+CONFIG_QUOTA=y
+CONFIG_QUOTACTL=y
+CONFIG_QUOTA_NETLINK_INTERFACE=y
+CONFIG_QUOTA_TREE=m
+CONFIG_R3964=m
+# CONFIG_R6040 is not set
+CONFIG_R8169=m
+CONFIG_R8169_VLAN=y
+CONFIG_RADIO_ADAPTERS=y
+CONFIG_RADIO_AZTECH=m
+CONFIG_RADIO_CADET=m
+CONFIG_RADIO_GEMTEK=m
+CONFIG_RADIO_GEMTEK_PCI=m
+CONFIG_RADIO_MAESTRO=m
+CONFIG_RADIO_MAXIRADIO=m
+CONFIG_RADIO_RTRACK=m
+CONFIG_RADIO_RTRACK2=m
+CONFIG_RADIO_SF16FMI=m
+CONFIG_RADIO_SF16FMR2=m
+CONFIG_RADIO_TEA5764=m
+CONFIG_RADIO_TERRATEC=m
+CONFIG_RADIO_TRUST=m
+CONFIG_RADIO_TYPHOON=m
+CONFIG_RADIO_TYPHOON_PROC_FS=y
+CONFIG_RADIO_ZOLTRIX=m
+CONFIG_RAID_ATTRS=m
+CONFIG_RAW_DRIVER=m
+# CONFIG_RCU_TORTURE_TEST is not set
+# CONFIG_RCU_TRACE is not set
+CONFIG_RD_BZIP2=y
+CONFIG_RD_GZIP=y
+CONFIG_RD_LZMA=y
+# CONFIG_REALTEK_PHY is not set
+CONFIG_REED_SOLOMON=m
+CONFIG_REED_SOLOMON_DEC16=y
+CONFIG_REGULATOR=y
+CONFIG_REGULATOR_BQ24022=m
+CONFIG_REGULATOR_DA903X=m
+CONFIG_REGULATOR_DEBUG=y
+# CONFIG_REGULATOR_FIXED_VOLTAGE is not set
+CONFIG_REGULATOR_PCF50633=m
+CONFIG_REGULATOR_VIRTUAL_CONSUMER=m
+CONFIG_REGULATOR_WM8400=m
+# CONFIG_REISERFS_CHECK is not set
+CONFIG_REISERFS_FS=m
+CONFIG_REISERFS_FS_POSIX_ACL=y
+CONFIG_REISERFS_FS_SECURITY=y
+CONFIG_REISERFS_FS_XATTR=y
+# CONFIG_REISERFS_PROC_INFO is not set
+CONFIG_RELAY=y
+CONFIG_RELOCATABLE=y
+CONFIG_RESOURCE_COUNTERS=y
+CONFIG_RFD_FTL=m
+CONFIG_RFKILL=y
+CONFIG_RFKILL_INPUT=y
+CONFIG_RFKILL_LEDS=y
+CONFIG_RING_BUFFER=y
+CONFIG_RIO=m
+# CONFIG_RIO_OLDPCI is not set
+CONFIG_RISCOM8=m
+CONFIG_ROADRUNNER=m
+# CONFIG_ROADRUNNER_LARGE_RINGS is not set
+CONFIG_ROCKETPORT=m
+CONFIG_ROMFS_FS=m
+CONFIG_ROSE=m
+CONFIG_RPCSEC_GSS_KRB5=m
+CONFIG_RPCSEC_GSS_SPKM3=m
+CONFIG_RT2400PCI=m
+CONFIG_RT2500PCI=m
+CONFIG_RT2500USB=m
+CONFIG_RT2860=m
+# CONFIG_RT2870 is not set
+CONFIG_RT2X00=m
+# CONFIG_RT2X00_DEBUG is not set
+CONFIG_RT2X00_LIB=m
+CONFIG_RT2X00_LIB_CRYPTO=y
+# CONFIG_RT2X00_LIB_DEBUGFS is not set
+CONFIG_RT2X00_LIB_FIRMWARE=y
+CONFIG_RT2X00_LIB_LEDS=y
+CONFIG_RT2X00_LIB_PCI=m
+CONFIG_RT2X00_LIB_RFKILL=y
+CONFIG_RT2X00_LIB_USB=m
+CONFIG_RT61PCI=m
+CONFIG_RT73USB=m
+CONFIG_RTC_CLASS=y
+# CONFIG_RTC_DEBUG is not set
+CONFIG_RTC_DRV_BQ4802=m
+CONFIG_RTC_DRV_CMOS=y
+CONFIG_RTC_DRV_DS1286=m
+CONFIG_RTC_DRV_DS1305=m
+CONFIG_RTC_DRV_DS1307=m
+CONFIG_RTC_DRV_DS1374=m
+CONFIG_RTC_DRV_DS1390=m
+# CONFIG_RTC_DRV_DS1511 is not set
+CONFIG_RTC_DRV_DS1553=m
+CONFIG_RTC_DRV_DS1672=m
+CONFIG_RTC_DRV_DS1742=m
+CONFIG_RTC_DRV_DS3234=m
+CONFIG_RTC_DRV_FM3130=m
+CONFIG_RTC_DRV_ISL1208=m
+CONFIG_RTC_DRV_M41T80=m
+CONFIG_RTC_DRV_M41T80_WDT=y
+CONFIG_RTC_DRV_M41T94=m
+CONFIG_RTC_DRV_M48T35=m
+CONFIG_RTC_DRV_M48T59=m
+CONFIG_RTC_DRV_M48T86=m
+CONFIG_RTC_DRV_MAX6900=m
+CONFIG_RTC_DRV_MAX6902=m
+CONFIG_RTC_DRV_PCF50633=m
+CONFIG_RTC_DRV_PCF8563=m
+CONFIG_RTC_DRV_PCF8583=m
+# CONFIG_RTC_DRV_R9701 is not set
+CONFIG_RTC_DRV_RS5C348=m
+CONFIG_RTC_DRV_RS5C372=m
+CONFIG_RTC_DRV_RX8581=m
+# CONFIG_RTC_DRV_S35390A is not set
+CONFIG_RTC_DRV_STK17TA8=m
+CONFIG_RTC_DRV_TEST=m
+CONFIG_RTC_DRV_TWL4030=m
+CONFIG_RTC_DRV_V3020=m
+CONFIG_RTC_DRV_X1205=m
+CONFIG_RTC_HCTOSYS=y
+CONFIG_RTC_HCTOSYS_DEVICE="rtc0"
+CONFIG_RTC_INTF_DEV=y
+CONFIG_RTC_INTF_DEV_UIE_EMUL=y
+CONFIG_RTC_INTF_PROC=y
+CONFIG_RTC_INTF_SYSFS=y
+CONFIG_RTC_LIB=y
+CONFIG_RTL8180=m
+CONFIG_RTL8187=m
+CONFIG_RTL8187SE=m
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_RT_MUTEXES=y
+# CONFIG_RT_MUTEX_TESTER is not set
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+CONFIG_RXKAD=m
+CONFIG_S2IO=m
+# CONFIG_SAMPLES is not set
+CONFIG_SATA_AHCI=y
+CONFIG_SATA_INIC162X=y
+CONFIG_SATA_MV=m
+CONFIG_SATA_NV=y
+CONFIG_SATA_PMP=y
+CONFIG_SATA_PROMISE=y
+CONFIG_SATA_QSTOR=y
+CONFIG_SATA_SIL=y
+CONFIG_SATA_SIL24=y
+CONFIG_SATA_SIS=y
+CONFIG_SATA_SVW=y
+CONFIG_SATA_SX4=m
+CONFIG_SATA_ULI=y
+CONFIG_SATA_VIA=y
+CONFIG_SATA_VITESSE=y
+CONFIG_SBC7240_WDT=m
+CONFIG_SBC8360_WDT=m
+CONFIG_SBC_EPX_C3_WATCHDOG=m
+CONFIG_SBNI=m
+# CONFIG_SBNI_MULTILINE is not set
+CONFIG_SC1200_WDT=m
+CONFIG_SC520_WDT=m
+CONFIG_SC6600=y
+CONFIG_SC6600_CDROM=4
+CONFIG_SC6600_CDROMBASE=0
+CONFIG_SC6600_JOY=y
+CONFIG_SC92031=m
+CONFIG_SCC=m
+# CONFIG_SCC_DELAY is not set
+# CONFIG_SCC_TRXECHO is not set
+CONFIG_SCHEDSTATS=y
+CONFIG_SCHED_DEBUG=y
+CONFIG_SCHED_HRTICK=y
+CONFIG_SCHED_MC=y
+CONFIG_SCHED_OMIT_FRAME_POINTER=y
+CONFIG_SCHED_SMT=y
+# CONFIG_SCHED_TRACER is not set
+CONFIG_SCSI=y
+CONFIG_SCSI_3W_9XXX=m
+CONFIG_SCSI_7000FASST=m
+CONFIG_SCSI_AACRAID=m
+CONFIG_SCSI_ACARD=m
+CONFIG_SCSI_ADVANSYS=m
+CONFIG_SCSI_AHA152X=m
+CONFIG_SCSI_AHA1542=m
+CONFIG_SCSI_AHA1740=m
+CONFIG_SCSI_AIC79XX=m
+CONFIG_SCSI_AIC7XXX=m
+# CONFIG_SCSI_AIC7XXX_OLD is not set
+CONFIG_SCSI_AIC94XX=m
+CONFIG_SCSI_ARCMSR=m
+CONFIG_SCSI_ARCMSR_AER=y
+CONFIG_SCSI_BUSLOGIC=m
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_CXGB3_ISCSI=m
+CONFIG_SCSI_DC390T=m
+CONFIG_SCSI_DC395x=m
+CONFIG_SCSI_DEBUG=m
+CONFIG_SCSI_DH=y
+CONFIG_SCSI_DH_ALUA=m
+CONFIG_SCSI_DH_EMC=m
+CONFIG_SCSI_DH_HP_SW=m
+CONFIG_SCSI_DH_RDAC=m
+CONFIG_SCSI_DMA=y
+CONFIG_SCSI_DMX3191D=m
+CONFIG_SCSI_DPT_I2O=m
+CONFIG_SCSI_DTC3280=m
+CONFIG_SCSI_EATA=m
+CONFIG_SCSI_EATA_LINKED_COMMANDS=y
+CONFIG_SCSI_EATA_MAX_TAGS=16
+CONFIG_SCSI_EATA_TAGGED_QUEUE=y
+CONFIG_SCSI_ENCLOSURE=m
+CONFIG_SCSI_FC_ATTRS=m
+CONFIG_SCSI_FC_TGT_ATTRS=y
+CONFIG_SCSI_FD_MCS=m
+# CONFIG_SCSI_FLASHPOINT is not set
+CONFIG_SCSI_FUTURE_DOMAIN=m
+CONFIG_SCSI_GDTH=m
+CONFIG_SCSI_GENERIC_NCR5380=m
+CONFIG_SCSI_GENERIC_NCR5380_MMIO=m
+CONFIG_SCSI_GENERIC_NCR53C400=y
+CONFIG_SCSI_HPTIOP=m
+CONFIG_SCSI_IBMMCA=m
+CONFIG_SCSI_IMM=m
+CONFIG_SCSI_IN2000=m
+CONFIG_SCSI_INIA100=m
+CONFIG_SCSI_INITIO=m
+CONFIG_SCSI_IPR=m
+# CONFIG_SCSI_IPR_DUMP is not set
+# CONFIG_SCSI_IPR_TRACE is not set
+CONFIG_SCSI_IPS=m
+CONFIG_SCSI_ISCSI_ATTRS=m
+# CONFIG_SCSI_IZIP_EPP16 is not set
+# CONFIG_SCSI_IZIP_SLOW_CTR is not set
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_LOWLEVEL=y
+CONFIG_SCSI_LOWLEVEL_PCMCIA=y
+CONFIG_SCSI_LPFC=m
+CONFIG_SCSI_LPFC_DEBUG_FS=y
+CONFIG_SCSI_MULTI_LUN=y
+CONFIG_SCSI_MVSAS=m
+CONFIG_SCSI_NCR53C406A=m
+CONFIG_SCSI_NCR53C8XX_DEFAULT_TAGS=8
+CONFIG_SCSI_NCR53C8XX_MAX_TAGS=4
+CONFIG_SCSI_NCR53C8XX_SYNC=5
+CONFIG_SCSI_NCR_D700=m
+CONFIG_SCSI_NCR_Q720=m
+CONFIG_SCSI_NETLINK=y
+CONFIG_SCSI_NSP32=m
+CONFIG_SCSI_PAS16=m
+CONFIG_SCSI_PPA=m
+CONFIG_SCSI_PROC_FS=y
+CONFIG_SCSI_QLA_FC=m
+CONFIG_SCSI_QLA_ISCSI=m
+CONFIG_SCSI_QLOGIC_1280=m
+CONFIG_SCSI_QLOGIC_FAS=m
+CONFIG_SCSI_SAS_ATA=y
+CONFIG_SCSI_SAS_ATTRS=m
+CONFIG_SCSI_SAS_HOST_SMP=y
+CONFIG_SCSI_SAS_LIBSAS=m
+# CONFIG_SCSI_SAS_LIBSAS_DEBUG is not set
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI_SIM710=m
+CONFIG_SCSI_SPI_ATTRS=m
+CONFIG_SCSI_SRP=m
+CONFIG_SCSI_SRP_ATTRS=m
+CONFIG_SCSI_SRP_TGT_ATTRS=y
+CONFIG_SCSI_STEX=m
+CONFIG_SCSI_SYM53C416=m
+CONFIG_SCSI_SYM53C8XX_2=m
+CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
+CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
+CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
+CONFIG_SCSI_SYM53C8XX_MMIO=y
+CONFIG_SCSI_T128=m
+CONFIG_SCSI_TGT=m
+CONFIG_SCSI_U14_34F=m
+CONFIG_SCSI_U14_34F_LINKED_COMMANDS=y
+CONFIG_SCSI_U14_34F_MAX_TAGS=8
+CONFIG_SCSI_U14_34F_TAGGED_QUEUE=y
+CONFIG_SCSI_ULTRASTOR=m
+CONFIG_SCSI_WAIT_SCAN=m
+# CONFIG_SCTP_DBG_MSG is not set
+# CONFIG_SCTP_DBG_OBJCNT is not set
+CONFIG_SCTP_HMAC_MD5=y
+# CONFIG_SCTP_HMAC_NONE is not set
+# CONFIG_SCTP_HMAC_SHA1 is not set
+CONFIG_SCx200=m
+CONFIG_SCx200HR_TIMER=m
+CONFIG_SCx200_ACB=m
+CONFIG_SCx200_GPIO=m
+CONFIG_SCx200_I2C=m
+CONFIG_SCx200_I2C_SCL=12
+CONFIG_SCx200_I2C_SDA=13
+CONFIG_SCx200_WDT=m
+CONFIG_SDIO_UART=m
+CONFIG_SDLA=m
+CONFIG_SEALEVEL_4021=m
+CONFIG_SECCOMP=y
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
+CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536
+CONFIG_SECURITY_FILE_CAPABILITIES=y
+CONFIG_SECURITY_NETWORK=y
+# CONFIG_SECURITY_NETWORK_XFRM is not set
+CONFIG_SECURITY_PATH=y
+# CONFIG_SECURITY_ROOTPLUG is not set
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SECURITY_SELINUX_AVC_STATS=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM=y
+CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
+CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1
+CONFIG_SECURITY_SELINUX_DEVELOP=y
+CONFIG_SECURITY_SELINUX_DISABLE=y
+# CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set
+CONFIG_SECURITY_SMACK=y
+CONFIG_SEEQ8005=m
+CONFIG_SELECT_MEMORY_MODEL=y
+CONFIG_SENSORS_ABITUGURU=m
+CONFIG_SENSORS_ABITUGURU3=m
+CONFIG_SENSORS_AD7414=m
+CONFIG_SENSORS_AD7418=m
+CONFIG_SENSORS_ADCXX=m
+CONFIG_SENSORS_ADM1021=m
+CONFIG_SENSORS_ADM1025=m
+CONFIG_SENSORS_ADM1026=m
+CONFIG_SENSORS_ADM1029=m
+CONFIG_SENSORS_ADM1031=m
+CONFIG_SENSORS_ADM9240=m
+CONFIG_SENSORS_ADS7828=m
+CONFIG_SENSORS_ADT7462=m
+CONFIG_SENSORS_ADT7470=m
+CONFIG_SENSORS_ADT7473=m
+CONFIG_SENSORS_ADT7475=m
+CONFIG_SENSORS_APPLESMC=m
+CONFIG_SENSORS_ASB100=m
+CONFIG_SENSORS_ATXP1=m
+CONFIG_SENSORS_CORETEMP=m
+CONFIG_SENSORS_DME1737=m
+CONFIG_SENSORS_DS1621=m
+CONFIG_SENSORS_F71805F=m
+CONFIG_SENSORS_F71882FG=m
+CONFIG_SENSORS_F75375S=m
+CONFIG_SENSORS_FSCHER=m
+CONFIG_SENSORS_FSCHMD=m
+CONFIG_SENSORS_FSCPOS=m
+CONFIG_SENSORS_GL518SM=m
+CONFIG_SENSORS_GL520SM=m
+CONFIG_SENSORS_HDAPS=m
+CONFIG_SENSORS_I5K_AMB=m
+CONFIG_SENSORS_IBMAEM=m
+CONFIG_SENSORS_IBMPEX=m
+CONFIG_SENSORS_IT87=m
+CONFIG_SENSORS_K8TEMP=m
+CONFIG_SENSORS_LIS3LV02D=m
+CONFIG_SENSORS_LM63=m
+CONFIG_SENSORS_LM70=m
+CONFIG_SENSORS_LM75=m
+CONFIG_SENSORS_LM77=m
+CONFIG_SENSORS_LM78=m
+CONFIG_SENSORS_LM80=m
+CONFIG_SENSORS_LM83=m
+CONFIG_SENSORS_LM85=m
+CONFIG_SENSORS_LM87=m
+CONFIG_SENSORS_LM90=m
+CONFIG_SENSORS_LM92=m
+CONFIG_SENSORS_LM93=m
+CONFIG_SENSORS_LTC4245=m
+CONFIG_SENSORS_MAX1111=m
+CONFIG_SENSORS_MAX1619=m
+CONFIG_SENSORS_MAX6650=m
+CONFIG_SENSORS_MAX6875=m
+CONFIG_SENSORS_PC87360=m
+CONFIG_SENSORS_PC87427=m
+CONFIG_SENSORS_PCF8591=m
+CONFIG_SENSORS_SIS5595=m
+CONFIG_SENSORS_SMSC47B397=m
+CONFIG_SENSORS_SMSC47M1=m
+CONFIG_SENSORS_SMSC47M192=m
+CONFIG_SENSORS_THMC50=m
+CONFIG_SENSORS_TSL2550=m
+CONFIG_SENSORS_VIA686A=m
+CONFIG_SENSORS_VT1211=m
+CONFIG_SENSORS_VT8231=m
+CONFIG_SENSORS_W83627EHF=m
+CONFIG_SENSORS_W83627HF=m
+CONFIG_SENSORS_W83781D=m
+CONFIG_SENSORS_W83791D=m
+CONFIG_SENSORS_W83792D=m
+CONFIG_SENSORS_W83793=m
+CONFIG_SENSORS_W83L785TS=m
+CONFIG_SENSORS_W83L786NG=m
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_ACCENT=m
+CONFIG_SERIAL_8250_BOCA=m
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_CS=m
+# CONFIG_SERIAL_8250_DETECT_IRQ is not set
+CONFIG_SERIAL_8250_EXAR_ST16C554=m
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_FOURPORT=m
+CONFIG_SERIAL_8250_HUB6=m
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_MCA=m
+CONFIG_SERIAL_8250_NR_UARTS=48
+CONFIG_SERIAL_8250_PCI=y
+CONFIG_SERIAL_8250_PNP=y
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_RUNTIME_UARTS=4
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_CORE=y
+CONFIG_SERIAL_CORE_CONSOLE=y
+CONFIG_SERIAL_JSM=m
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SERIO=y
+CONFIG_SERIO_CT82C710=m
+CONFIG_SERIO_I8042=y
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SERIO_PARKBD=m
+CONFIG_SERIO_PCIPS2=m
+CONFIG_SERIO_RAW=m
+CONFIG_SERIO_SERPORT=m
+CONFIG_SFC=m
+CONFIG_SFC_MTD=y
+CONFIG_SGI_IOC4=m
+CONFIG_SGI_PARTITION=y
+CONFIG_SHMEM=y
+CONFIG_SIGMATEL_FIR=m
+CONFIG_SIGNALFD=y
+CONFIG_SIS190=m
+CONFIG_SIS900=m
+CONFIG_SKFP=m
+CONFIG_SKGE=m
+# CONFIG_SKGE_DEBUG is not set
+CONFIG_SKISA=m
+CONFIG_SKY2=m
+# CONFIG_SKY2_DEBUG is not set
+CONFIG_SLAB=y
+CONFIG_SLABINFO=y
+CONFIG_SLHC=y
+CONFIG_SLICOSS=m
+CONFIG_SLIP=m
+CONFIG_SLIP_COMPRESSED=y
+CONFIG_SLIP_MODE_SLIP6=y
+CONFIG_SLIP_SMART=y
+# CONFIG_SLOB is not set
+# CONFIG_SLUB is not set
+CONFIG_SMB_FS=m
+# CONFIG_SMB_NLS_DEFAULT is not set
+CONFIG_SMC9194=m
+CONFIG_SMCTR=m
+CONFIG_SMC_IRCC_FIR=m
+CONFIG_SMP=y
+CONFIG_SMSC37B787_WDT=m
+CONFIG_SMSC9420=m
+CONFIG_SMSC_PHY=m
+CONFIG_SMSC_SCH311X_WDT=m
+CONFIG_SND=m
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_AC97_POWER_SAVE=y
+CONFIG_SND_AC97_POWER_SAVE_DEFAULT=0
+CONFIG_SND_AD1816A=m
+CONFIG_SND_AD1848=m
+CONFIG_SND_AD1889=m
+CONFIG_SND_ADLIB=m
+CONFIG_SND_ALI5451=m
+CONFIG_SND_ALS100=m
+CONFIG_SND_ALS300=m
+CONFIG_SND_ALS4000=m
+CONFIG_SND_ATIIXP=m
+CONFIG_SND_ATIIXP_MODEM=m
+CONFIG_SND_AU8810=m
+CONFIG_SND_AU8820=m
+CONFIG_SND_AU8830=m
+CONFIG_SND_AW2=m
+CONFIG_SND_AZT2320=m
+CONFIG_SND_AZT3328=m
+CONFIG_SND_BT87X=m
+# CONFIG_SND_BT87X_OVERCLOCK is not set
+CONFIG_SND_CA0106=m
+CONFIG_SND_CMI8330=m
+CONFIG_SND_CMIPCI=m
+CONFIG_SND_CS4231=m
+CONFIG_SND_CS4232=m
+CONFIG_SND_CS4236=m
+CONFIG_SND_CS4281=m
+CONFIG_SND_CS46XX=m
+CONFIG_SND_CS46XX_NEW_DSP=y
+CONFIG_SND_CS5530=m
+CONFIG_SND_CS5535AUDIO=m
+CONFIG_SND_DARLA20=m
+CONFIG_SND_DARLA24=m
+# CONFIG_SND_DEBUG is not set
+CONFIG_SND_DRIVERS=y
+CONFIG_SND_DT019X=m
+CONFIG_SND_DUMMY=m
+CONFIG_SND_DYNAMIC_MINORS=y
+CONFIG_SND_ECHO3G=m
+CONFIG_SND_EMU10K1=m
+CONFIG_SND_EMU10K1X=m
+CONFIG_SND_ENS1370=m
+CONFIG_SND_ENS1371=m
+CONFIG_SND_ES1688=m
+CONFIG_SND_ES18XX=m
+CONFIG_SND_ES1938=m
+CONFIG_SND_ES1968=m
+CONFIG_SND_ES968=m
+CONFIG_SND_FM801=m
+CONFIG_SND_FM801_TEA575X=m
+CONFIG_SND_FM801_TEA575X_BOOL=y
+CONFIG_SND_GINA20=m
+CONFIG_SND_GINA24=m
+CONFIG_SND_GUSCLASSIC=m
+CONFIG_SND_GUSEXTREME=m
+CONFIG_SND_GUSMAX=m
+CONFIG_SND_HDA_CODEC_ANALOG=y
+CONFIG_SND_HDA_CODEC_ATIHDMI=y
+CONFIG_SND_HDA_CODEC_CMEDIA=y
+CONFIG_SND_HDA_CODEC_CONEXANT=y
+CONFIG_SND_HDA_CODEC_INTELHDMI=y
+CONFIG_SND_HDA_CODEC_NVHDMI=y
+CONFIG_SND_HDA_CODEC_REALTEK=y
+CONFIG_SND_HDA_CODEC_SI3054=y
+CONFIG_SND_HDA_CODEC_SIGMATEL=y
+CONFIG_SND_HDA_CODEC_VIA=y
+CONFIG_SND_HDA_ELD=y
+CONFIG_SND_HDA_GENERIC=y
+# CONFIG_SND_HDA_HWDEP is not set
+CONFIG_SND_HDA_INPUT_BEEP=y
+CONFIG_SND_HDA_INTEL=m
+CONFIG_SND_HDA_POWER_SAVE=y
+CONFIG_SND_HDA_POWER_SAVE_DEFAULT=0
+CONFIG_SND_HDSP=m
+CONFIG_SND_HDSPM=m
+CONFIG_SND_HIFIER=m
+CONFIG_SND_HRTIMER=m
+CONFIG_SND_HWDEP=m
+CONFIG_SND_ICE1712=m
+CONFIG_SND_ICE1724=m
+CONFIG_SND_INDIGO=m
+CONFIG_SND_INDIGODJ=m
+CONFIG_SND_INDIGOIO=m
+CONFIG_SND_INTEL8X0=m
+CONFIG_SND_INTEL8X0M=m
+CONFIG_SND_INTERWAVE=m
+CONFIG_SND_INTERWAVE_STB=m
+CONFIG_SND_ISA=y
+CONFIG_SND_JACK=y
+CONFIG_SND_KORG1212=m
+CONFIG_SND_LAYLA20=m
+CONFIG_SND_LAYLA24=m
+CONFIG_SND_MAESTRO3=m
+CONFIG_SND_MIA=m
+CONFIG_SND_MIRO=m
+CONFIG_SND_MIXART=m
+CONFIG_SND_MIXER_OSS=m
+CONFIG_SND_MONA=m
+CONFIG_SND_MPU401=m
+CONFIG_SND_MPU401_UART=m
+CONFIG_SND_MTS64=m
+CONFIG_SND_NM256=m
+CONFIG_SND_OPL3SA2=m
+CONFIG_SND_OPL3_LIB=m
+CONFIG_SND_OPL4_LIB=m
+CONFIG_SND_OPTI92X_AD1848=m
+CONFIG_SND_OPTI92X_CS4231=m
+CONFIG_SND_OPTI93X=m
+CONFIG_SND_OSSEMUL=y
+CONFIG_SND_OXYGEN=m
+CONFIG_SND_OXYGEN_LIB=m
+CONFIG_SND_PCI=y
+CONFIG_SND_PCM=m
+CONFIG_SND_PCMCIA=y
+CONFIG_SND_PCM_OSS=m
+CONFIG_SND_PCM_OSS_PLUGINS=y
+CONFIG_SND_PCSP=m
+CONFIG_SND_PCXHR=m
+CONFIG_SND_PDAUDIOCF=m
+CONFIG_SND_PORTMAN2X4=m
+CONFIG_SND_RAWMIDI=m
+CONFIG_SND_RIPTIDE=m
+CONFIG_SND_RME32=m
+CONFIG_SND_RME96=m
+CONFIG_SND_RME9652=m
+CONFIG_SND_SB16=m
+CONFIG_SND_SB16_CSP=y
+CONFIG_SND_SB16_DSP=m
+CONFIG_SND_SB8=m
+CONFIG_SND_SB8_DSP=m
+CONFIG_SND_SBAWE=m
+CONFIG_SND_SB_COMMON=m
+CONFIG_SND_SC6000=m
+CONFIG_SND_SEQUENCER=m
+CONFIG_SND_SEQUENCER_OSS=y
+CONFIG_SND_SEQ_DUMMY=m
+CONFIG_SND_SEQ_HRTIMER_DEFAULT=y
+CONFIG_SND_SERIAL_U16550=m
+CONFIG_SND_SGALAXY=m
+CONFIG_SND_SIS7019=m
+CONFIG_SND_SOC=m
+CONFIG_SND_SOC_AD73311=m
+CONFIG_SND_SOC_AK4535=m
+CONFIG_SND_SOC_ALL_CODECS=m
+CONFIG_SND_SOC_CS4270=m
+CONFIG_SND_SOC_I2C_AND_SPI=m
+CONFIG_SND_SOC_L3=m
+CONFIG_SND_SOC_PCM3008=m
+CONFIG_SND_SOC_SSM2602=m
+CONFIG_SND_SOC_TLV320AIC23=m
+CONFIG_SND_SOC_TLV320AIC26=m
+CONFIG_SND_SOC_TLV320AIC3X=m
+CONFIG_SND_SOC_TWL4030=m
+CONFIG_SND_SOC_UDA134X=m
+CONFIG_SND_SOC_UDA1380=m
+CONFIG_SND_SOC_WM8510=m
+CONFIG_SND_SOC_WM8580=m
+CONFIG_SND_SOC_WM8728=m
+CONFIG_SND_SOC_WM8731=m
+CONFIG_SND_SOC_WM8750=m
+CONFIG_SND_SOC_WM8753=m
+CONFIG_SND_SOC_WM8900=m
+CONFIG_SND_SOC_WM8903=m
+CONFIG_SND_SOC_WM8971=m
+CONFIG_SND_SOC_WM8990=m
+CONFIG_SND_SONICVIBES=m
+CONFIG_SND_SPI=y
+CONFIG_SND_SSCAPE=m
+CONFIG_SND_SUPPORT_OLD_API=y
+CONFIG_SND_TIMER=m
+CONFIG_SND_TRIDENT=m
+CONFIG_SND_USB=y
+CONFIG_SND_USB_AUDIO=m
+CONFIG_SND_USB_CAIAQ=m
+CONFIG_SND_USB_CAIAQ_INPUT=y
+CONFIG_SND_USB_US122L=m
+CONFIG_SND_USB_USX2Y=m
+# CONFIG_SND_VERBOSE_PRINTK is not set
+CONFIG_SND_VERBOSE_PROCFS=y
+CONFIG_SND_VIA82XX=m
+CONFIG_SND_VIA82XX_MODEM=m
+CONFIG_SND_VIRMIDI=m
+CONFIG_SND_VIRTUOSO=m
+CONFIG_SND_VMASTER=y
+CONFIG_SND_VX222=m
+CONFIG_SND_VXPOCKET=m
+CONFIG_SND_VX_LIB=m
+CONFIG_SND_WAVEFRONT=m
+CONFIG_SND_WAVEFRONT_FIRMWARE_IN_KERNEL=y
+CONFIG_SND_WSS_LIB=m
+CONFIG_SND_YMFPCI=m
+CONFIG_SOC_CAMERA=m
+CONFIG_SOC_CAMERA_MT9M001=m
+CONFIG_SOC_CAMERA_MT9M111=m
+CONFIG_SOC_CAMERA_MT9T031=m
+CONFIG_SOC_CAMERA_MT9V022=m
+CONFIG_SOC_CAMERA_OV772X=m
+CONFIG_SOC_CAMERA_PLATFORM=m
+CONFIG_SOC_CAMERA_TW9910=m
+CONFIG_SOFT_WATCHDOG=m
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_SONYPI=m
+CONFIG_SONYPI_COMPAT=y
+CONFIG_SONY_LAPTOP=m
+CONFIG_SOUND=m
+CONFIG_SOUND_AEDSP16=m
+CONFIG_SOUND_DMAP=y
+CONFIG_SOUND_KAHLUA=m
+CONFIG_SOUND_MPU401=m
+CONFIG_SOUND_MSNDCLAS=m
+CONFIG_SOUND_MSNDPIN=m
+CONFIG_SOUND_MSS=m
+CONFIG_SOUND_OSS=m
+CONFIG_SOUND_OSS_CORE=y
+CONFIG_SOUND_PAS=m
+CONFIG_SOUND_PRIME=m
+CONFIG_SOUND_PSS=m
+CONFIG_SOUND_SB=m
+CONFIG_SOUND_SSCAPE=m
+# CONFIG_SOUND_TRACEINIT is not set
+CONFIG_SOUND_TRIX=m
+CONFIG_SOUND_UART6850=m
+CONFIG_SOUND_VMIDI=m
+CONFIG_SOUND_YM3812=m
+# CONFIG_SPARSEMEM_MANUAL is not set
+CONFIG_SPARSEMEM_STATIC=y
+CONFIG_SPARSE_IRQ=y
+CONFIG_SPECIALIX=m
+CONFIG_SPI=y
+CONFIG_SPI_BITBANG=m
+CONFIG_SPI_BUTTERFLY=m
+# CONFIG_SPI_DEBUG is not set
+CONFIG_SPI_GPIO=m
+CONFIG_SPI_LM70_LLP=m
+CONFIG_SPI_MASTER=y
+CONFIG_SPI_SPIDEV=m
+CONFIG_SPI_TLE62X0=m
+CONFIG_SPLIT_PTLOCK_CPUS=4
+CONFIG_SQUASHFS=m
+# CONFIG_SQUASHFS_EMBEDDED is not set
+CONFIG_SQUASHFS_FRAGMENT_CACHE_SIZE=3
+CONFIG_SSB=m
+CONFIG_SSB_B43_PCI_BRIDGE=y
+# CONFIG_SSB_DEBUG is not set
+CONFIG_SSB_DRIVER_PCICORE=y
+CONFIG_SSB_DRIVER_PCICORE_POSSIBLE=y
+CONFIG_SSB_PCIHOST=y
+CONFIG_SSB_PCIHOST_POSSIBLE=y
+# CONFIG_SSB_PCMCIAHOST is not set
+CONFIG_SSB_PCMCIAHOST_POSSIBLE=y
+CONFIG_SSB_POSSIBLE=y
+CONFIG_SSB_SPROM=y
+CONFIG_SSFDC=m
+CONFIG_STACKTRACE=y
+CONFIG_STACKTRACE_SUPPORT=y
+# CONFIG_STACK_TRACER is not set
+CONFIG_STAGING=y
+# CONFIG_STAGING_EXCLUDE_BUILD is not set
+CONFIG_STALDRV=y
+CONFIG_STALLION=m
+# CONFIG_STANDALONE is not set
+CONFIG_STE10XP=m
+CONFIG_STOP_MACHINE=y
+CONFIG_STP=m
+CONFIG_STRICT_DEVMEM=y
+CONFIG_STRIP=m
+CONFIG_SUNDANCE=m
+# CONFIG_SUNDANCE_MMIO is not set
+CONFIG_SUNGEM=m
+CONFIG_SUNRPC=m
+CONFIG_SUNRPC_GSS=m
+# CONFIG_SUNRPC_REGISTER_V4 is not set
+CONFIG_SUN_PARTITION=y
+CONFIG_SUSPEND=y
+CONFIG_SUSPEND_FREEZER=y
+CONFIG_SWAP=y
+CONFIG_SX=m
+CONFIG_SXG=m
+CONFIG_SYNCLINK=m
+CONFIG_SYNCLINKMP=m
+CONFIG_SYNCLINK_CS=m
+CONFIG_SYNCLINK_GT=m
+CONFIG_SYN_COOKIES=y
+CONFIG_SYSCTL=y
+CONFIG_SYSCTL_SYSCALL=y
+CONFIG_SYSCTL_SYSCALL_CHECK=y
+CONFIG_SYSFS=y
+# CONFIG_SYSFS_DEPRECATED_V2 is not set
+# CONFIG_SYSPROF_TRACER is not set
+CONFIG_SYSV68_PARTITION=y
+CONFIG_SYSVIPC=y
+CONFIG_SYSVIPC_SYSCTL=y
+CONFIG_SYSV_FS=m
+# CONFIG_SYS_HYPERVISOR is not set
+CONFIG_TABLET_USB_ACECAD=m
+CONFIG_TABLET_USB_AIPTEK=m
+CONFIG_TABLET_USB_GTCO=m
+CONFIG_TABLET_USB_KBTAB=m
+CONFIG_TABLET_USB_WACOM=m
+CONFIG_TASKSTATS=y
+# CONFIG_TASK_DELAY_ACCT is not set
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASK_XACCT=y
+CONFIG_TC1100_WMI=m
+CONFIG_TCG_ATMEL=m
+CONFIG_TCG_INFINEON=m
+CONFIG_TCG_NSC=m
+CONFIG_TCG_TIS=m
+CONFIG_TCG_TPM=m
+CONFIG_TCIC=m
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_BIC=m
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_TCP_CONG_HSTCP=m
+CONFIG_TCP_CONG_HTCP=m
+CONFIG_TCP_CONG_HYBLA=m
+CONFIG_TCP_CONG_ILLINOIS=m
+CONFIG_TCP_CONG_LP=m
+CONFIG_TCP_CONG_SCALABLE=m
+CONFIG_TCP_CONG_VEGAS=m
+CONFIG_TCP_CONG_VENO=m
+CONFIG_TCP_CONG_WESTWOOD=m
+CONFIG_TCP_CONG_YEAH=m
+CONFIG_TCP_MD5SIG=y
+CONFIG_TEHUTI=m
+CONFIG_TEKRAM_DONGLE=m
+CONFIG_TELCLOCK=m
+CONFIG_TEXTSEARCH=y
+CONFIG_TEXTSEARCH_BM=m
+CONFIG_TEXTSEARCH_FSM=m
+CONFIG_TEXTSEARCH_KMP=m
+CONFIG_THERMAL=y
+CONFIG_THERMAL_HWMON=y
+CONFIG_THINKPAD_ACPI=m
+CONFIG_THINKPAD_ACPI_BAY=y
+# CONFIG_THINKPAD_ACPI_DEBUG is not set
+CONFIG_THINKPAD_ACPI_DEBUGFACILITIES=y
+CONFIG_THINKPAD_ACPI_HOTKEY_POLL=y
+CONFIG_THINKPAD_ACPI_VIDEO=y
+CONFIG_THRUSTMASTER_FF=m
+CONFIG_TICK_ONESHOT=y
+CONFIG_TIFM_7XX1=m
+CONFIG_TIFM_CORE=m
+CONFIG_TIGON3=m
+CONFIG_TIMERFD=y
+CONFIG_TIMER_STATS=y
+CONFIG_TIPC=m
+# CONFIG_TIPC_ADVANCED is not set
+# CONFIG_TIPC_DEBUG is not set
+CONFIG_TLAN=m
+CONFIG_TMD_HERMES=m
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMS380TR=m
+CONFIG_TMSPCI=m
+# CONFIG_TOIM3232_DONGLE is not set
+# CONFIG_TOSHIBA is not set
+CONFIG_TOSHIBA_FIR=m
+CONFIG_TOUCHSCREEN_ADS7846=m
+CONFIG_TOUCHSCREEN_DA9034=m
+CONFIG_TOUCHSCREEN_ELO=m
+CONFIG_TOUCHSCREEN_FUJITSU=m
+CONFIG_TOUCHSCREEN_GUNZE=m
+CONFIG_TOUCHSCREEN_HTCPEN=m
+CONFIG_TOUCHSCREEN_INEXIO=m
+CONFIG_TOUCHSCREEN_MK712=m
+CONFIG_TOUCHSCREEN_MTOUCH=m
+CONFIG_TOUCHSCREEN_PENMOUNT=m
+CONFIG_TOUCHSCREEN_TOUCHIT213=m
+CONFIG_TOUCHSCREEN_TOUCHRIGHT=m
+CONFIG_TOUCHSCREEN_TOUCHWIN=m
+CONFIG_TOUCHSCREEN_TSC2007=m
+CONFIG_TOUCHSCREEN_UCB1400=m
+CONFIG_TOUCHSCREEN_USB_3M=y
+CONFIG_TOUCHSCREEN_USB_COMPOSITE=m
+CONFIG_TOUCHSCREEN_USB_DMC_TSC10=y
+CONFIG_TOUCHSCREEN_USB_EGALAX=y
+CONFIG_TOUCHSCREEN_USB_ETURBO=y
+CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH=y
+CONFIG_TOUCHSCREEN_USB_GOTOP=y
+CONFIG_TOUCHSCREEN_USB_GUNZE=y
+CONFIG_TOUCHSCREEN_USB_IDEALTEK=y
+CONFIG_TOUCHSCREEN_USB_IRTOUCH=y
+CONFIG_TOUCHSCREEN_USB_ITM=y
+CONFIG_TOUCHSCREEN_USB_PANJIT=y
+CONFIG_TOUCHSCREEN_WACOM_W8001=m
+CONFIG_TOUCHSCREEN_WM9705=y
+CONFIG_TOUCHSCREEN_WM9712=y
+CONFIG_TOUCHSCREEN_WM9713=y
+CONFIG_TOUCHSCREEN_WM97XX=m
+CONFIG_TPS65010=m
+CONFIG_TR=y
+CONFIG_TRACEPOINTS=y
+# CONFIG_TRACE_BRANCH_PROFILING is not set
+CONFIG_TRACE_IRQFLAGS_SUPPORT=y
+CONFIG_TRACING=y
+CONFIG_TRACING_SUPPORT=y
+CONFIG_TRANZPORT=m
+# CONFIG_TREE_RCU is not set
+# CONFIG_TREE_RCU_TRACE is not set
+CONFIG_TTPCI_EEPROM=m
+CONFIG_TULIP=m
+# CONFIG_TULIP_MMIO is not set
+# CONFIG_TULIP_MWI is not set
+# CONFIG_TULIP_NAPI is not set
+CONFIG_TUN=m
+CONFIG_TWL4030_CORE=y
+CONFIG_TWL4030_USB=m
+CONFIG_TYPHOON=m
+CONFIG_UBIFS_FS=m
+# CONFIG_UBIFS_FS_ADVANCED_COMPR is not set
+# CONFIG_UBIFS_FS_DEBUG is not set
+CONFIG_UBIFS_FS_LZO=y
+CONFIG_UBIFS_FS_XATTR=y
+CONFIG_UBIFS_FS_ZLIB=y
+CONFIG_UCB1400_CORE=m
+CONFIG_UDF_FS=m
+CONFIG_UDF_NLS=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_UFS_DEBUG is not set
+CONFIG_UFS_FS=m
+# CONFIG_UFS_FS_WRITE is not set
+CONFIG_UID16=y
+CONFIG_UIO=m
+CONFIG_UIO_CIF=m
+CONFIG_UIO_PDRV=m
+CONFIG_UIO_PDRV_GENIRQ=m
+CONFIG_UIO_SERCOS3=m
+CONFIG_UIO_SMX=m
+CONFIG_ULI526X=m
+CONFIG_ULTRA=m
+CONFIG_ULTRA32=m
+CONFIG_ULTRAMCA=m
+CONFIG_ULTRIX_PARTITION=y
+CONFIG_UNEVICTABLE_LRU=y
+CONFIG_UNIX=y
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_UNUSED_SYMBOLS=y
+CONFIG_USB=y
+CONFIG_USBPCWATCHDOG=m
+CONFIG_USB_ACM=m
+CONFIG_USB_ADUTUX=m
+CONFIG_USB_ALI_M5632=y
+CONFIG_USB_AN2720=y
+# CONFIG_USB_ANNOUNCE_NEW_DEVICES is not set
+CONFIG_USB_APPLEDISPLAY=m
+CONFIG_USB_ARCH_HAS_EHCI=y
+CONFIG_USB_ARCH_HAS_HCD=y
+CONFIG_USB_ARCH_HAS_OHCI=y
+CONFIG_USB_ARMLINUX=y
+CONFIG_USB_ATM=m
+CONFIG_USB_ATMEL=m
+CONFIG_USB_BELKIN=y
+CONFIG_USB_BERRY_CHARGE=m
+CONFIG_USB_C67X00_HCD=m
+CONFIG_USB_CATC=m
+CONFIG_USB_CXACRU=m
+CONFIG_USB_CYPRESS_CY7C63=m
+CONFIG_USB_CYTHERM=m
+# CONFIG_USB_DEBUG is not set
+# CONFIG_USB_DEVICEFS is not set
+# CONFIG_USB_DEVICE_CLASS is not set
+CONFIG_USB_DSBR=m
+# CONFIG_USB_DYNAMIC_MINORS is not set
+CONFIG_USB_EHCI_HCD=y
+CONFIG_USB_EHCI_ROOT_HUB_TT=y
+CONFIG_USB_EHCI_TT_NEWSCHED=y
+CONFIG_USB_EMI26=m
+CONFIG_USB_EMI62=m
+CONFIG_USB_EPSON2888=y
+CONFIG_USB_ET61X251=m
+CONFIG_USB_EZUSB=y
+CONFIG_USB_FTDI_ELAN=m
+CONFIG_USB_GPIO_VBUS=m
+CONFIG_USB_GSPCA=m
+CONFIG_USB_GSPCA_CONEX=m
+CONFIG_USB_GSPCA_ETOMS=m
+CONFIG_USB_GSPCA_FINEPIX=m
+CONFIG_USB_GSPCA_MARS=m
+CONFIG_USB_GSPCA_OV519=m
+CONFIG_USB_GSPCA_OV534=m
+CONFIG_USB_GSPCA_PAC207=m
+CONFIG_USB_GSPCA_PAC7311=m
+CONFIG_USB_GSPCA_SONIXB=m
+CONFIG_USB_GSPCA_SONIXJ=m
+CONFIG_USB_GSPCA_SPCA500=m
+CONFIG_USB_GSPCA_SPCA501=m
+CONFIG_USB_GSPCA_SPCA505=m
+CONFIG_USB_GSPCA_SPCA506=m
+CONFIG_USB_GSPCA_SPCA508=m
+CONFIG_USB_GSPCA_SPCA561=m
+CONFIG_USB_GSPCA_STK014=m
+CONFIG_USB_GSPCA_SUNPLUS=m
+CONFIG_USB_GSPCA_T613=m
+CONFIG_USB_GSPCA_TV8532=m
+CONFIG_USB_GSPCA_VC032X=m
+CONFIG_USB_GSPCA_ZC3XX=m
+CONFIG_USB_HID=m
+CONFIG_USB_HIDDEV=y
+CONFIG_USB_HSO=m
+CONFIG_USB_HWA_HCD=m
+CONFIG_USB_IBMCAM=m
+CONFIG_USB_IDMOUSE=m
+CONFIG_USB_IOWARRIOR=m
+CONFIG_USB_IP_COMMON=m
+CONFIG_USB_IP_HOST=m
+CONFIG_USB_IP_VHCI_HCD=m
+CONFIG_USB_IRDA=m
+CONFIG_USB_ISIGHTFW=m
+CONFIG_USB_ISP116X_HCD=m
+CONFIG_USB_ISP1760_HCD=m
+CONFIG_USB_KAWETH=m
+CONFIG_USB_KC2190=y
+CONFIG_USB_KONICAWC=m
+CONFIG_USB_LCD=m
+CONFIG_USB_LD=m
+CONFIG_USB_LED=m
+CONFIG_USB_LEGOTOWER=m
+# CONFIG_USB_LIBUSUAL is not set
+CONFIG_USB_M5602=m
+CONFIG_USB_MDC800=m
+CONFIG_USB_MICROTEK=m
+CONFIG_USB_MON=y
+CONFIG_USB_MR800=m
+CONFIG_USB_NET_AX8817X=m
+CONFIG_USB_NET_CDCETHER=m
+CONFIG_USB_NET_CDC_SUBSET=m
+CONFIG_USB_NET_DM9601=m
+CONFIG_USB_NET_GL620A=m
+CONFIG_USB_NET_MCS7830=m
+CONFIG_USB_NET_NET1080=m
+CONFIG_USB_NET_PLUSB=m
+CONFIG_USB_NET_RNDIS_HOST=m
+CONFIG_USB_NET_RNDIS_WLAN=m
+CONFIG_USB_NET_SMSC95XX=m
+CONFIG_USB_NET_ZAURUS=m
+# CONFIG_USB_OHCI_BIG_ENDIAN_DESC is not set
+# CONFIG_USB_OHCI_BIG_ENDIAN_MMIO is not set
+CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_OHCI_LITTLE_ENDIAN=y
+# CONFIG_USB_OTG is not set
+CONFIG_USB_OTG_UTILS=y
+# CONFIG_USB_OV511 is not set
+CONFIG_USB_OXU210HP_HCD=m
+CONFIG_USB_PEGASUS=m
+CONFIG_USB_PHIDGET=m
+CONFIG_USB_PHIDGETKIT=m
+CONFIG_USB_PHIDGETMOTORCONTROL=m
+CONFIG_USB_PHIDGETSERVO=m
+CONFIG_USB_PRINTER=m
+CONFIG_USB_PWC=m
+# CONFIG_USB_PWC_DEBUG is not set
+CONFIG_USB_QUICKCAM_MESSENGER=m
+CONFIG_USB_R8A66597_HCD=m
+CONFIG_USB_RIO500=m
+CONFIG_USB_RTL8150=m
+CONFIG_USB_S2255=m
+CONFIG_USB_SE401=m
+CONFIG_USB_SERIAL=y
+CONFIG_USB_SERIAL_AIRCABLE=m
+CONFIG_USB_SERIAL_ARK3116=m
+CONFIG_USB_SERIAL_BELKIN=m
+CONFIG_USB_SERIAL_CH341=m
+CONFIG_USB_SERIAL_CONSOLE=y
+CONFIG_USB_SERIAL_CP2101=y
+CONFIG_USB_SERIAL_CYBERJACK=m
+CONFIG_USB_SERIAL_CYPRESS_M8=m
+CONFIG_USB_SERIAL_DEBUG=m
+CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT=m
+CONFIG_USB_SERIAL_EDGEPORT_TI=m
+CONFIG_USB_SERIAL_EMPEG=m
+CONFIG_USB_SERIAL_FTDI_SIO=m
+CONFIG_USB_SERIAL_FUNSOFT=m
+CONFIG_USB_SERIAL_GARMIN=m
+CONFIG_USB_SERIAL_GENERIC=y
+CONFIG_USB_SERIAL_HP4X=m
+CONFIG_USB_SERIAL_IPAQ=m
+CONFIG_USB_SERIAL_IPW=m
+# CONFIG_USB_SERIAL_IR is not set
+# CONFIG_USB_SERIAL_IUU is not set
+CONFIG_USB_SERIAL_KEYSPAN=m
+CONFIG_USB_SERIAL_KEYSPAN_MPR=y
+CONFIG_USB_SERIAL_KEYSPAN_PDA=m
+CONFIG_USB_SERIAL_KEYSPAN_USA18X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QI=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19QW=y
+CONFIG_USB_SERIAL_KEYSPAN_USA19W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28X=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XA=y
+CONFIG_USB_SERIAL_KEYSPAN_USA28XB=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49W=y
+CONFIG_USB_SERIAL_KEYSPAN_USA49WLC=y
+CONFIG_USB_SERIAL_KLSI=m
+CONFIG_USB_SERIAL_KOBIL_SCT=m
+CONFIG_USB_SERIAL_MCT_U232=m
+CONFIG_USB_SERIAL_MOS7720=m
+CONFIG_USB_SERIAL_MOS7840=m
+CONFIG_USB_SERIAL_MOTOROLA=m
+CONFIG_USB_SERIAL_NAVMAN=m
+CONFIG_USB_SERIAL_OMNINET=m
+CONFIG_USB_SERIAL_OPTICON=m
+CONFIG_USB_SERIAL_OPTION=m
+CONFIG_USB_SERIAL_OTI6858=m
+CONFIG_USB_SERIAL_PL2303=m
+CONFIG_USB_SERIAL_SAFE=m
+# CONFIG_USB_SERIAL_SAFE_PADDED is not set
+CONFIG_USB_SERIAL_SIEMENS_MPI=m
+CONFIG_USB_SERIAL_SIERRAWIRELESS=m
+CONFIG_USB_SERIAL_SPCP8X5=m
+CONFIG_USB_SERIAL_TI=m
+CONFIG_USB_SERIAL_VISOR=m
+CONFIG_USB_SERIAL_WHITEHEAT=m
+CONFIG_USB_SERIAL_XIRCOM=m
+CONFIG_USB_SEVSEG=m
+CONFIG_USB_SI470X=m
+CONFIG_USB_SISUSBVGA=m
+# CONFIG_USB_SISUSBVGA_CON is not set
+CONFIG_USB_SL811_CS=m
+CONFIG_USB_SL811_HCD=m
+CONFIG_USB_SN9C102=m
+CONFIG_USB_SPEEDTOUCH=m
+CONFIG_USB_STKWEBCAM=m
+CONFIG_USB_STORAGE=m
+CONFIG_USB_STORAGE_ALAUDA=y
+# CONFIG_USB_STORAGE_CYPRESS_ATACB is not set
+CONFIG_USB_STORAGE_DATAFAB=y
+# CONFIG_USB_STORAGE_DEBUG is not set
+CONFIG_USB_STORAGE_FREECOM=y
+CONFIG_USB_STORAGE_ISD200=y
+CONFIG_USB_STORAGE_JUMPSHOT=y
+CONFIG_USB_STORAGE_KARMA=y
+# CONFIG_USB_STORAGE_ONETOUCH is not set
+CONFIG_USB_STORAGE_SDDR09=y
+CONFIG_USB_STORAGE_SDDR55=y
+CONFIG_USB_STORAGE_USBAT=y
+CONFIG_USB_STV06XX=m
+CONFIG_USB_STV680=m
+CONFIG_USB_SUPPORT=y
+CONFIG_USB_SUSPEND=y
+CONFIG_USB_TMC=m
+CONFIG_USB_TRANCEVIBRATOR=m
+CONFIG_USB_U132_HCD=m
+CONFIG_USB_UEAGLEATM=m
+CONFIG_USB_UHCI_HCD=y
+CONFIG_USB_USBNET=m
+CONFIG_USB_USS720=m
+CONFIG_USB_VICAM=m
+CONFIG_USB_VIDEO_CLASS=m
+CONFIG_USB_VIDEO_CLASS_INPUT_EVDEV=y
+CONFIG_USB_VST=m
+CONFIG_USB_W9968CF=m
+CONFIG_USB_WDM=m
+CONFIG_USB_WUSB=m
+CONFIG_USB_WUSB_CBAF=m
+# CONFIG_USB_WUSB_CBAF_DEBUG is not set
+CONFIG_USB_XUSBATM=m
+CONFIG_USB_ZC0301=m
+CONFIG_USB_ZD1201=m
+CONFIG_USB_ZR364XX=m
+# CONFIG_USER_NS is not set
+# CONFIG_USER_SCHED is not set
+CONFIG_USER_STACKTRACE_SUPPORT=y
+CONFIG_USE_GENERIC_SMP_HELPERS=y
+CONFIG_UTS_NS=y
+CONFIG_UWB=m
+CONFIG_UWB_HWA=m
+CONFIG_UWB_I1480U=m
+CONFIG_UWB_I1480U_WLP=m
+CONFIG_UWB_WHCI=m
+CONFIG_UWB_WLP=m
+CONFIG_V4L_USB_DRIVERS=y
+CONFIG_VETH=m
+CONFIG_VFAT_FS=m
+# CONFIG_VGACON_SOFT_SCROLLBACK is not set
+CONFIG_VGASTATE=m
+CONFIG_VGA_CONSOLE=y
+CONFIG_VIA_FIR=m
+CONFIG_VIA_RHINE=m
+CONFIG_VIA_RHINE_MMIO=y
+CONFIG_VIA_VELOCITY=m
+CONFIG_VIDEOBUF_DMA_SG=m
+CONFIG_VIDEOBUF_DVB=m
+CONFIG_VIDEOBUF_GEN=m
+CONFIG_VIDEOBUF_VMALLOC=m
+CONFIG_VIDEO_ADV7170=m
+CONFIG_VIDEO_ADV7175=m
+# CONFIG_VIDEO_ADV_DEBUG is not set
+CONFIG_VIDEO_ALLOW_V4L1=y
+CONFIG_VIDEO_AU0828=m
+CONFIG_VIDEO_BT819=m
+CONFIG_VIDEO_BT848=m
+CONFIG_VIDEO_BT848_DVB=y
+CONFIG_VIDEO_BT856=m
+CONFIG_VIDEO_BT866=m
+CONFIG_VIDEO_BTCX=m
+CONFIG_VIDEO_BWQCAM=m
+CONFIG_VIDEO_CAFE_CCIC=m
+CONFIG_VIDEO_CAPTURE_DRIVERS=y
+CONFIG_VIDEO_CPIA=m
+CONFIG_VIDEO_CPIA2=m
+CONFIG_VIDEO_CPIA_PP=m
+CONFIG_VIDEO_CPIA_USB=m
+CONFIG_VIDEO_CQCAM=m
+CONFIG_VIDEO_CS5345=m
+CONFIG_VIDEO_CS53L32A=m
+CONFIG_VIDEO_CX18=m
+CONFIG_VIDEO_CX2341X=m
+CONFIG_VIDEO_CX23885=m
+CONFIG_VIDEO_CX25840=m
+CONFIG_VIDEO_DEV=m
+CONFIG_VIDEO_EM28XX=m
+CONFIG_VIDEO_EM28XX_ALSA=m
+CONFIG_VIDEO_EM28XX_DVB=m
+CONFIG_VIDEO_FB_IVTV=m
+# CONFIG_VIDEO_FIXED_MINOR_RANGES is not set
+CONFIG_VIDEO_GO7007=m
+CONFIG_VIDEO_GO7007_USB=m
+CONFIG_VIDEO_GO7007_USB_S2250_BOARD=m
+CONFIG_VIDEO_HELPER_CHIPS_AUTO=y
+CONFIG_VIDEO_HEXIUM_GEMINI=m
+CONFIG_VIDEO_HEXIUM_ORION=m
+CONFIG_VIDEO_IR=m
+CONFIG_VIDEO_IR_I2C=m
+CONFIG_VIDEO_IVTV=m
+CONFIG_VIDEO_KS0127=m
+CONFIG_VIDEO_M52790=m
+CONFIG_VIDEO_MEDIA=m
+CONFIG_VIDEO_MEYE=m
+CONFIG_VIDEO_MSP3400=m
+CONFIG_VIDEO_MXB=m
+CONFIG_VIDEO_OUTPUT_CONTROL=m
+CONFIG_VIDEO_OV7670=m
+CONFIG_VIDEO_OVCAMCHIP=m
+CONFIG_VIDEO_PMS=m
+CONFIG_VIDEO_PVRUSB2=m
+# CONFIG_VIDEO_PVRUSB2_DEBUGIFC is not set
+CONFIG_VIDEO_PVRUSB2_DVB=y
+CONFIG_VIDEO_PVRUSB2_SYSFS=y
+CONFIG_VIDEO_SAA5246A=m
+CONFIG_VIDEO_SAA5249=m
+CONFIG_VIDEO_SAA6588=m
+CONFIG_VIDEO_SAA7110=m
+CONFIG_VIDEO_SAA7111=m
+CONFIG_VIDEO_SAA7114=m
+CONFIG_VIDEO_SAA711X=m
+CONFIG_VIDEO_SAA7127=m
+CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_SAA7134_ALSA=m
+CONFIG_VIDEO_SAA7134_DVB=m
+CONFIG_VIDEO_SAA7146=m
+CONFIG_VIDEO_SAA7146_VV=m
+CONFIG_VIDEO_SAA717X=m
+CONFIG_VIDEO_SAA7185=m
+CONFIG_VIDEO_STRADIS=m
+CONFIG_VIDEO_TDA7432=m
+CONFIG_VIDEO_TDA9840=m
+CONFIG_VIDEO_TDA9875=m
+CONFIG_VIDEO_TEA6415C=m
+CONFIG_VIDEO_TEA6420=m
+CONFIG_VIDEO_TUNER=m
+CONFIG_VIDEO_TVAUDIO=m
+CONFIG_VIDEO_TVEEPROM=m
+CONFIG_VIDEO_TVP5150=m
+CONFIG_VIDEO_UPD64031A=m
+CONFIG_VIDEO_UPD64083=m
+CONFIG_VIDEO_USBVIDEO=m
+CONFIG_VIDEO_USBVISION=m
+CONFIG_VIDEO_V4L1=m
+CONFIG_VIDEO_V4L1_COMPAT=y
+CONFIG_VIDEO_V4L2=m
+CONFIG_VIDEO_V4L2_COMMON=m
+CONFIG_VIDEO_VIVI=m
+CONFIG_VIDEO_VP27SMPX=m
+CONFIG_VIDEO_VPX3220=m
+CONFIG_VIDEO_W9966=m
+CONFIG_VIDEO_WM8739=m
+CONFIG_VIDEO_WM8775=m
+CONFIG_VIDEO_ZORAN=m
+CONFIG_VIDEO_ZORAN_AVS6EYES=m
+CONFIG_VIDEO_ZORAN_BUZ=m
+CONFIG_VIDEO_ZORAN_DC10=m
+CONFIG_VIDEO_ZORAN_DC30=m
+CONFIG_VIDEO_ZORAN_LML33=m
+CONFIG_VIDEO_ZORAN_LML33R10=m
+CONFIG_VIDEO_ZORAN_ZR36060=m
+CONFIG_VIRTIO=m
+CONFIG_VIRTIO_BALLOON=m
+CONFIG_VIRTIO_BLK=m
+CONFIG_VIRTIO_CONSOLE=m
+CONFIG_VIRTIO_NET=m
+CONFIG_VIRTIO_PCI=m
+CONFIG_VIRTIO_RING=m
+CONFIG_VIRTUALIZATION=y
+CONFIG_VIRT_TO_BUS=y
+CONFIG_VITESSE_PHY=m
+CONFIG_VLAN_8021Q=m
+CONFIG_VLAN_8021Q_GVRP=y
+CONFIG_VLSI_FIR=m
+CONFIG_VM86=y
+CONFIG_VMI=y
+CONFIG_VM_EVENT_COUNTERS=y
+CONFIG_VORTEX=m
+CONFIG_VT=y
+CONFIG_VT_CONSOLE=y
+CONFIG_VT_HW_CONSOLE_BINDING=y
+CONFIG_VXFS_FS=m
+CONFIG_W1=m
+CONFIG_W1_CON=y
+CONFIG_W1_MASTER_DS2482=m
+CONFIG_W1_MASTER_DS2490=m
+CONFIG_W1_MASTER_GPIO=m
+CONFIG_W1_MASTER_MATROX=m
+CONFIG_W1_SLAVE_BQ27000=m
+CONFIG_W1_SLAVE_DS2431=m
+CONFIG_W1_SLAVE_DS2433=m
+# CONFIG_W1_SLAVE_DS2433_CRC is not set
+CONFIG_W1_SLAVE_DS2760=m
+CONFIG_W1_SLAVE_SMEM=m
+CONFIG_W1_SLAVE_THERM=m
+CONFIG_W35UND=m
+CONFIG_W83627HF_WDT=m
+CONFIG_W83697HF_WDT=m
+CONFIG_W83697UG_WDT=m
+CONFIG_W83877F_WDT=m
+CONFIG_W83977F_WDT=m
+CONFIG_WAFER_WDT=m
+CONFIG_WAN=y
+CONFIG_WANXL=m
+CONFIG_WAN_ROUTER=m
+CONFIG_WAN_ROUTER_DRIVERS=m
+CONFIG_WATCHDOG=y
+# CONFIG_WATCHDOG_NOWAYOUT is not set
+CONFIG_WAVELAN=m
+CONFIG_WDT=m
+CONFIG_WDTPCI=m
+CONFIG_WDT_501=y
+CONFIG_WDT_501_PCI=y
+CONFIG_WIMAX=m
+CONFIG_WIMAX_DEBUG_LEVEL=8
+CONFIG_WIMAX_I2400M=m
+CONFIG_WIMAX_I2400M_DEBUG_LEVEL=8
+CONFIG_WIMAX_I2400M_SDIO=m
+CONFIG_WINBOND_840=m
+CONFIG_WINBOND_FIR=m
+CONFIG_WIRELESS=y
+CONFIG_WIRELESS_EXT=y
+CONFIG_WIRELESS_EXT_SYSFS=y
+# CONFIG_WIRELESS_OLD_REGULATORY is not set
+CONFIG_WLAN_80211=y
+CONFIG_WLAN_PRE80211=y
+CONFIG_X25=m
+CONFIG_X25_ASY=m
+CONFIG_X86=y
+CONFIG_X86_32=y
+# CONFIG_X86_32_NON_STANDARD is not set
+CONFIG_X86_32_SMP=y
+# CONFIG_X86_64 is not set
+CONFIG_X86_ACPI_CPUFREQ=y
+CONFIG_X86_ALIGNMENT_16=y
+CONFIG_X86_APM_BOOT=y
+# CONFIG_X86_BIGSMP is not set
+CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y
+CONFIG_X86_BSWAP=y
+CONFIG_X86_CHECK_BIOS_CORRUPTION=y
+CONFIG_X86_CMPXCHG=y
+CONFIG_X86_CPU=y
+CONFIG_X86_CPUFREQ_NFORCE2=y
+CONFIG_X86_CPUID=m
+# CONFIG_X86_CPU_DEBUG is not set
+# CONFIG_X86_ELAN is not set
+CONFIG_X86_EXTENDED_PLATFORM=y
+CONFIG_X86_E_POWERSAVER=m
+CONFIG_X86_F00F_BUG=y
+CONFIG_X86_GENERIC=y
+CONFIG_X86_GX_SUSPMOD=y
+CONFIG_X86_HT=y
+CONFIG_X86_INTEL_USERCOPY=y
+CONFIG_X86_INTERNODE_CACHE_BYTES=64
+CONFIG_X86_INVLPG=y
+CONFIG_X86_IO_APIC=y
+CONFIG_X86_L1_CACHE_BYTES=64
+CONFIG_X86_L1_CACHE_SHIFT=5
+CONFIG_X86_LOCAL_APIC=y
+CONFIG_X86_LONGHAUL=y
+CONFIG_X86_LONGRUN=y
+# CONFIG_X86_MCE is not set
+CONFIG_X86_MINIMUM_CPU_FAMILY=4
+CONFIG_X86_MPPARSE=y
+CONFIG_X86_MSR=m
+CONFIG_X86_P4_CLOCKMOD=m
+# CONFIG_X86_PAT is not set
+CONFIG_X86_PLATFORM_DEVICES=y
+CONFIG_X86_PM_TIMER=y
+CONFIG_X86_POPAD_OK=y
+CONFIG_X86_POWERNOW_K6=y
+CONFIG_X86_POWERNOW_K7=y
+CONFIG_X86_POWERNOW_K7_ACPI=y
+CONFIG_X86_POWERNOW_K8=y
+CONFIG_X86_POWERNOW_K8_ACPI=y
+CONFIG_X86_PPRO_FENCE=y
+# CONFIG_X86_PTDUMP is not set
+# CONFIG_X86_RDC321X is not set
+CONFIG_X86_REBOOTFIXUPS=y
+CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y
+CONFIG_X86_RESERVE_LOW_64K=y
+CONFIG_X86_SPEEDSTEP_CENTRINO=y
+CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE=y
+CONFIG_X86_SPEEDSTEP_ICH=y
+CONFIG_X86_SPEEDSTEP_LIB=y
+CONFIG_X86_SPEEDSTEP_RELAXED_CAP_CHECK=y
+CONFIG_X86_SPEEDSTEP_SMI=y
+CONFIG_X86_TRAMPOLINE=y
+# CONFIG_X86_VERBOSE_BOOTUP is not set
+CONFIG_X86_WP_WORKS_OK=y
+CONFIG_X86_XADD=y
+CONFIG_XFRM=y
+CONFIG_XFRM_IPCOMP=m
+# CONFIG_XFRM_MIGRATE is not set
+# CONFIG_XFRM_STATISTICS is not set
+# CONFIG_XFRM_SUB_POLICY is not set
+CONFIG_XFRM_USER=m
+# CONFIG_XFS_DEBUG is not set
+CONFIG_XFS_FS=m
+CONFIG_XFS_POSIX_ACL=y
+CONFIG_XFS_QUOTA=y
+CONFIG_XFS_RT=y
+CONFIG_XOR_BLOCKS=m
+CONFIG_YAM=m
+CONFIG_YELLOWFIN=m
+CONFIG_YENTA=m
+CONFIG_YENTA_ENE_TUNE=y
+CONFIG_YENTA_O2=y
+CONFIG_YENTA_RICOH=y
+CONFIG_YENTA_TI=y
+CONFIG_YENTA_TOSHIBA=y
+CONFIG_ZD1211RW=m
+# CONFIG_ZD1211RW_DEBUG is not set
+CONFIG_ZEROPLUS_FF=m
+CONFIG_ZISOFS=y
+CONFIG_ZLIB_DEFLATE=m
+CONFIG_ZLIB_INFLATE=y
+CONFIG_ZNET=m
+CONFIG_ZONE_DMA=y
+# CONFIG_ZONE_DMA32 is not set
+CONFIG_ZONE_DMA_FLAG=1
--- linux-rt-2.6.29.5.orig/debian/config/i386/config.rt
+++ linux-rt-2.6.29.5/debian/config/i386/config.rt
@@ -0,0 +1,3 @@
+#
+# Config options for config.rt automatically generated by splitconfig.pl
+#
--- linux-rt-2.6.29.5.orig/debian/sub-flavours/virtual.list
+++ linux-rt-2.6.29.5/debian/sub-flavours/virtual.list
@@ -0,0 +1,111 @@
+arch/*/{crypto,kernel,oprofile}
+crypto/*
+drivers/acpi/*
+drivers/ata/ata_generic.ko
+drivers/ata/ata_piix.ko
+drivers/ata/libata.ko
+drivers/block/virtio_blk.ko
+drivers/block/nbd.ko
+drivers/block/loop.ko
+drivers/block/floppy.ko
+drivers/block/cryptoloop.ko
+drivers/block/xen-blkfront.ko
+drivers/cdrom/cdrom.ko
+drivers/char/hangcheck-timer.ko
+drivers/char/lp.ko
+drivers/char/nvram.ko
+drivers/char/ppdev.ko
+drivers/char/raw.ko
+drivers/input/evbug.ko
+drivers/input/evdev.ko
+drivers/input/gameport/gameport.ko
+drivers/input/mouse/psmouse.ko
+drivers/input/serio/serio_raw.ko
+drivers/input/serio/serport.ko
+drivers/input/joydev.ko
+drivers/input/misc/uinput.ko
+drivers/input/touchscreen/usbtouchscreen.ko
+drivers/input/xen-kbdfront.ko
+drivers/md/*
+drivers/message/fusion*
+drivers/misc/eeprom_93cx6.ko
+drivers/net/8139too.ko
+drivers/net/8139cp.ko
+drivers/net/appletalk/ipddp.ko
+drivers/net/bonding/bonding.ko
+drivers/net/bsd_comp.ko
+drivers/net/dummy.ko
+drivers/net/e1000/e1000.ko
+drivers/net/eql.ko
+drivers/net/ifb.ko
+drivers/net/mii.ko
+drivers/net/ne2k-pci.ko
+drivers/net/netconsole.ko
+drivers/net/pcnet32.ko
+drivers/net/ppp_async.ko
+drivers/net/ppp_deflate.ko
+drivers/net/ppp_generic.ko
+drivers/net/ppp_mppe.ko
+drivers/net/pppoe.ko
+drivers/net/pppol2tp.ko
+drivers/net/pppox.ko
+drivers/net/ppp_synctty.ko
+drivers/net/slhc.ko
+drivers/net/slip.ko
+drivers/net/tun.ko
+drivers/net/veth.ko
+drivers/net/virtio_net.ko
+drivers/net/xen-netfront.ko
+drivers/parport/parport.ko
+drivers/parport/parport_pc.ko
+drivers/net/tulip/tulip.ko
+drivers/net/virtio_net.ko
+drivers/scsi/BusLogic.ko
+drivers/scsi/iscsi_tcp.ko
+drivers/scsi/libiscsi.ko
+drivers/scsi/libsas/*
+drivers/scsi/libsas/libsas.ko
+drivers/scsi/qla1280.ko
+drivers/scsi/raid_class.ko
+drivers/scsi/scsi_mod.ko
+drivers/scsi/scsi_transport_fc.ko
+drivers/scsi/scsi_transport_iscsi.ko
+drivers/scsi/scsi_transport_sas.ko
+drivers/scsi/scsi_transport_spi.ko
+drivers/scsi/scsi_wait_scan.ko
+drivers/scsi/sd_mod.ko
+drivers/scsi/sg.ko
+drivers/scsi/sr_mod.ko
+drivers/usb/core/usbcore.ko
+drivers/usb/host/ehci-hcd.ko
+drivers/usb/host/uhci-hcd.ko
+drivers/usb/storage/usb-storage.ko
+drivers/video/cirrusfb.ko
+drivers/video/console/bitblit.ko
+drivers/video/console/fbcon.ko
+drivers/video/console/font.ko
+drivers/video/console/softcursor.ko
+drivers/video/console/tileblit.ko
+drivers/video/output.ko
+drivers/video/syscopyarea.ko
+drivers/video/sysfillrect.ko
+drivers/video/sysimgblt.ko
+drivers/video/vesafb.ko
+drivers/video/vga16fb.ko
+drivers/video/vgastate.ko
+drivers/video/xen-fbfront.ko
+drivers/virtio/virtio_balloon.ko
+drivers/virtio/virtio.ko
+drivers/virtio/virtio_pci.ko
+drivers/virtio/virtio_ring.ko
+drivers/watchdog/softdog.ko
+drivers/xen/*
+fs/*
+lib/*
+net/*
+sound/core/*
+sound/pci/snd-ens1370.ko
+sound/drivers/pcsp/snd-pcsp.ko
+ubuntu/e1000e/e1000e.ko
+ubuntu/squashfs/squashfs.ko
+ubuntu/iscsitarget/iscsi_trgt.ko
--- linux-rt-2.6.29.5.orig/debian/sub-flavours/control.stub
+++ linux-rt-2.6.29.5/debian/sub-flavours/control.stub
@@ -0,0 +1,39 @@
+# Items that get replaced:
+# FLAVOUR
+# DESC
+# ARCH
+# SUPPORTED
+# TARGET
+# BOOTLOADER
+# =PROVIDES=
+# =CONFLICTS=
+#
+# Items marked with =FOO= are optional
+#
+# XXX: Leave the blank line before the first package!!
+
+Package: linux-image-PKGVER-ABINUM-FLAVOUR
+Architecture: ARCH
+Section: base
+Priority: optional
+Pre-Depends: dpkg (>= 1.10.24)
+Provides: linux-image, linux-image-2.6, fuse-module, =PROVIDES=
+Depends: initramfs-tools (>= 0.36ubuntu6), coreutils | fileutils (>= 4.0), module-init-tools (>= 3.3-pre11-4ubuntu3)
+Conflicts: hotplug (<< 0.0.20040105-1), =CONFLICTS=
+Recommends: BOOTLOADER
+Suggests: fdutils, linux-doc-PKGVER | linux-source-PKGVER
+Description: Linux kernel image for version PKGVER on DESC
+ This package contains the Linux kernel image for version PKGVER on
+ DESC.
+ .
+ Also includes the corresponding System.map file, the modules built by the
+ packager, and scripts that try to ensure that the system is not left in an
+ unbootable state after an update.
+ .
+ Supports SUPPORTED processors.
+ .
+ TARGET
+ .
+ You likely do not want to install this package directly. Instead, install
+ the linux-FLAVOUR meta-package, which will ensure that upgrades work
+ correctly, and that supporting packages are also installed.
--- linux-rt-2.6.29.5.orig/debian/sub-flavours/README
+++ linux-rt-2.6.29.5/debian/sub-flavours/README
@@ -0,0 +1,12 @@
+Sub flavours are flavours that are built based on other builds. IOW, they
+are usually a subset of something else.
+
+Requirements:
+
+debian/sub-flavours/<sub>.list	: The file list, uses glob syntax
+debian/sub-flavours/<sub>.vars	: The make vars, similar to normal flavours
+debian/rules.d/<arch>.mk	: Add <base>_sub var listing the <sub>, e.g.
+				  server_sub = virtual would mean virtual is
+				  based on the server flavour.
+
+Note, the vars must include a conflicts with the flavour it was built on.
--- linux-rt-2.6.29.5.orig/debian/commit-templates/update-configs
+++ linux-rt-2.6.29.5/debian/commit-templates/update-configs
@@ -0,0 +1,10 @@
+#
+# This template is used for commit messages that don't need to
+# show up in debian/changelog. Administrative stuff like config
+# updates, ABI bumps, etc. Setting 'Ignore: yes' prevents
+# 'debian/rules insertchanges' from inserting this commit meesage
+# as a changelog entry.
+#
+UBUNTU: Updating configs
+
+Ignore: yes
--- linux-rt-2.6.29.5.orig/debian/commit-templates/bumpabi
+++ linux-rt-2.6.29.5/debian/commit-templates/bumpabi
@@ -0,0 +1,3 @@
+UBUNTU: Bump ABI
+
+Ignore: yes
--- linux-rt-2.6.29.5.orig/debian/commit-templates/sauce-patch
+++ linux-rt-2.6.29.5/debian/commit-templates/sauce-patch
@@ -0,0 +1,38 @@
+# Ubuntu commit template.
+#
+# NOTE: This gets reformatted for debian/changelog
+#
+#
+# SAUCE refers to the fact that this patch might not go upstream, but we need to
+# carry it to successive releases. In most cases you DONOT want to use this
+# template. 
+#
+# An example of a SAUCE patch is the ACPI DSDT-in-initramfs patch which has been
+# refused upstream, but still provides useful functionality to users with broken
+# BIOSes.
+#
+#-------------------------------------------------------------------------
+#
+# The initial UBUNTU is a flag that this is an Ubuntu commit. It will be
+# referenced to the Author in the debian/changelog entry.
+#
+# The text following is the short message that will be placed in the
+# changelog. Extra text on the following lines will be ignored, but left
+# in the git commit. Lines with # will be ignored in the commit.
+#
+# OriginalAuthor allows for alternate attribution.
+#
+# OriginalLocation allows for a URL or description of where the patch came
+# from.
+#
+# Bug is a reference to a Malone bug number.
+#
+# Ignore: yes will keep this commit from showing up in the changelog.
+#
+UBUNTU: SAUCE: 
+
+# OriginalAuthor: 
+# OriginalLocation: 
+# Bug: #
+# Ignore: yes
+# Other text below here.
--- linux-rt-2.6.29.5.orig/debian/commit-templates/patch
+++ linux-rt-2.6.29.5/debian/commit-templates/patch
@@ -0,0 +1,28 @@
+# Ubuntu commit template.
+#
+# NOTE: This gets reformatted for debian/changelog
+#
+# The initial UBUNTU is a flag that this is an Ubuntu commit. It will be
+# referenced to the Author in the debian/changelog entry.
+#
+# The text following is the short message that will be placed in the
+# changelog. Extra text on the following lines will be ignored, but left
+# in the git commit. Lines with # will be ignored in the commit.
+#
+# OriginalAuthor allows for alternate attribution.
+#
+# OriginalLocation allows for a URL or description of where the patch came
+# from.
+#
+# Bug is a reference to a Malone bug number. Be sure to include the '#' as
+# part of the bug number, e.g., 'Bug: #1'.
+#
+# Ignore: yes will keep this commit from showing up in the changelog.
+#
+UBUNTU: 
+
+# OriginalAuthor: 
+# OriginalLocation: 
+# Bug: #
+# Ignore: yes
+# Other text below here.
--- linux-rt-2.6.29.5.orig/debian/commit-templates/missing-modules
+++ linux-rt-2.6.29.5/debian/commit-templates/missing-modules
@@ -0,0 +1,3 @@
+UBUNTU: build/modules: Add modules that have intentionally gone missing
+
+Ignore: yes
--- linux-rt-2.6.29.5.orig/debian/commit-templates/external-driver
+++ linux-rt-2.6.29.5/debian/commit-templates/external-driver
@@ -0,0 +1,20 @@
+# Ubuntu external driver commit.
+#
+# NOTE: This gets reformatted for README.Ubuntu-External-Drivers and
+# debian/changelog.
+#
+# This is only needed when a driver is added, updated or removed. It is
+# not needed when patches or fixes are applied to the driver. If the
+# driver is being removed, add the line:
+#
+# Removing: yes
+#
+# to the commit, and you can remove all other tags (except UBUNTU:).
+#
+UBUNTU: 
+
+ExternalDriver: 
+Description:
+Url:
+Mask:
+Version:
--- linux-rt-2.6.29.5.orig/debian/commit-templates/newrelease
+++ linux-rt-2.6.29.5/debian/commit-templates/newrelease
@@ -0,0 +1,3 @@
+UBUNTU: Start new release
+
+Ignore: yes
--- linux-rt-2.6.29.5.orig/debian/scripts/module-check
+++ linux-rt-2.6.29.5/debian/scripts/module-check
@@ -0,0 +1,120 @@
+#!/usr/bin/perl -w
+
+$flavour = shift;
+$prev_abidir = shift;
+$abidir = shift;
+$skipmodule = shift;
+
+print "II: Checking modules for $flavour...";
+
+if (-f "$prev_abidir/ignore.modules"
+    or -f "$prev_abidir/$flavour.ignore.modules") {
+	print "explicitly ignoring modules\n";
+	exit(0);
+}
+
+if (not -f "$abidir/$flavour.modules" or not -f
+    "$prev_abidir/$flavour.modules") {
+	print "previous or current modules file missing!\n";
+	print "   $abidir/$flavour.modules\n";
+	print "   $prev_abidir/$flavour.modules\n";
+	if (defined($skipmodule)) {
+		exit(0);
+	} else {
+		exit(1);
+	}
+}
+
+print "\n";
+
+my %modules;
+my %modules_ignore;
+my $missing = 0;
+my $new = 0;
+my $errors = 0;
+
+# See if we have any ignores
+if (-f "$prev_abidir/../modules.ignore") {
+	my $ignore = 0;
+	open(IGNORE, "< $prev_abidir/../modules.ignore") or
+		die "Could not open $prev_abidir/../modules.ignore";
+	print "   reading modules to ignore...";
+	while (<IGNORE>) {
+		chomp;
+		next if /\s*#/;
+		$modules_ignore{$_} = 1;
+		$ignore++;
+	}
+	close(IGNORE);
+	print "read $ignore modules.\n";
+}
+
+# Read new modules first
+print "   reading new modules...";
+$new_count = 0;
+open(NEW, "< $abidir/$flavour.modules") or
+	die "Could not open $abidir/$flavour.modules";
+while (<NEW>) {
+	chomp;
+	$modules{$_} = 1;
+	$new_count++;
+}
+close(NEW);
+print "read $new_count modules.\n";
+
+# Now the old modules, checking for missing ones
+print "   reading old modules...";
+$old_count = 0;
+open(OLD, "< $prev_abidir/$flavour.modules") or
+	die "Could not open $prev_abidir/$flavour.modules";
+while (<OLD>) {
+	chomp;
+	if (not defined($modules{$_})) {
+		print "\n" if not $missing;
+		$missing++;
+		if (not defined($modules_ignore{$_})) {
+			print "      MISS: $_\n";
+			$errors++;
+		} else {
+			print "      MISS: $_ (ignored)\n";
+		}
+	} else {
+		$modules{$_}++;
+	}
+	$old_count++;
+}
+close(OLD);
+# Check for new modules
+foreach $mod (keys(%modules)) {
+	if ($modules{$mod} < 2) {
+		print "\n" if not $missing and not $new;
+		print "      NEW : $mod\n";
+		$new++;
+	}
+}
+if ($new or $missing) {
+	print "      read $old_count modules : new($new)  missing($missing)\n";
+} else {
+	print "read $old_count modules.\n";
+}
+
+
+# Let's see where we stand...
+if ($errors) {
+	if (defined($skipmodule)) {
+		print "WW: Explicitly asked to ignore failures (probably not good)\n";
+	} else {
+		print "EE: Missing modules (start begging for mercy)\n";
+		exit 1
+	}
+}
+
+if ($new) {
+	print "II: New modules (you've been busy, wipe the poop off your nose)\n";
+} else {
+	print "II: No new modules (hope you're happy, slacker)\n";
+}
+
+print "II: Done\n";
+
+exit(0);
--- linux-rt-2.6.29.5.orig/debian/scripts/sub-flavour
+++ linux-rt-2.6.29.5/debian/scripts/sub-flavour
@@ -0,0 +1,30 @@
+#!/bin/bash -e
+
+
+
+echo "SUB_PROCESS $FROM => $TO"
+
+export from_pkg="linux-image-$ABI_RELEASE-$FROM"
+export to_pkg="linux-image-$ABI_RELEASE-$TO"
+
+from_moddir="debian/$from_pkg/lib/modules/$ABI_RELEASE-$FROM"
+to_moddir="debian/$to_pkg/lib/modules/$ABI_RELEASE-$FROM"
+
+install -d "debian/$to_pkg/boot"
+install -m644 debian/$from_pkg/boot/{vmlinuz,System.map}-$ABI_RELEASE-$FROM \
+	debian/$to_pkg/boot/
+
+cat debian/sub-flavours/$TO.list | while read line; do
+	(cd debian/$from_pkg/lib/modules/$ABI_RELEASE-$FROM/kernel;
+	eval find $line -name '*.ko');
+done | while read mod; do
+	echo "SUB_INST $mod"
+	grep "^/lib/modules/$ABI_RELEASE-$FROM/kernel/$mod:" \
+		$from_moddir/modules.dep | sed -e 's/://' -e 's/ /\n/g' | \
+	while read m; do
+		test -f debian/$to_pkg/$m && continue
+		echo "SUB_INST $mod"
+		install -D -m644 debian/$from_pkg/$m \
+			debian/$to_pkg/$m
+	done
+done
--- linux-rt-2.6.29.5.orig/debian/scripts/control-create
+++ linux-rt-2.6.29.5/debian/scripts/control-create
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+vars=$1
+
+. $vars
+
+if [ "$is_sub" = "" ]; then
+	flavour=$(basename $vars | sed 's/.*\.//')
+	stub=debian/control.d/flavour-control.stub
+else
+	flavour=$(basename $vars .vars)
+	stub=debian/sub-flavours/control.stub
+fi
+
+cat $stub | grep -v '^#' | sed			\
+	-e "s#FLAVOUR#$flavour#g"		\
+	-e "s#DESC#$desc#g"			\
+	-e "s#ARCH#$arch#g"			\
+	-e "s#SUPPORTED#$supported#g"		\
+	-e "s#TARGET#$target#g"			\
+	-e "s#BOOTLOADER#$bootloader#g" 	\
+	-e "s#=PROVIDES=#$provides#g"		\
+	-e "s#=CONFLICTS=#$conflicts#g"
--- linux-rt-2.6.29.5.orig/debian/scripts/abi-check
+++ linux-rt-2.6.29.5/debian/scripts/abi-check
@@ -0,0 +1,210 @@
+#!/usr/bin/perl -w
+
+my $flavour = shift;
+my $prev_abinum = shift;
+my $abinum = shift;
+my $prev_abidir = shift;
+my $abidir = shift;
+my $skipabi = shift;
+
+my $fail_exit = 1;
+my $EE = "EE:";
+my $errors = 0;
+my $abiskip = 0;
+
+my $count;
+
+print "II: Checking ABI for $flavour...\n";
+
+if (-f "$prev_abidir/ignore"
+    or -f "$prev_abidir/$flavour.ignore" or "$skipabi" eq "true") {
+	print "WW: Explicitly asked to ignore ABI, running in no-fail mode\n";
+	$fail_exit = 0;
+	$abiskip = 1;
+	$EE = "WW:";
+}
+
+if ($prev_abinum != $abinum) {
+	print "II: Different ABI's, running in no-fail mode\n";
+	$fail_exit = 0;
+	$EE = "WW:";
+}
+
+if (not -f "$abidir/$flavour" or not -f "$prev_abidir/$flavour") {
+	print "EE: Previous or current ABI file missing!\n";
+	print "    $abidir/$flavour\n" if not -f "$abidir/$flavour";
+	print "    $prev_abidir/$flavour\n" if not -f "$prev_abidir/$flavour";
+
+	# Exit if the ABI files are missing, but return status based on whether
+	# skip ABI was indicated.
+	if ("$abiskip" eq "1") {
+		exit(0);
+	} else {
+		exit(1);
+	}
+}
+
+my %symbols;
+my %symbols_ignore;
+my %modules_ignore;
+my %module_syms;
+
+# See if we have any ignores
+my $ignore = 0;
+print "    Reading symbols/modules to ignore...";
+
+for $file ("$prev_abidir/../blacklist", "$prev_abidir/../../perm-blacklist") {
+	if (-f $file) {
+		open(IGNORE, "< $file") or
+			die "Could not open $file";
+		while (<IGNORE>) {
+			chomp;
+			if ($_ =~ m/M: (.*)/) {
+				$modules_ignore{$1} = 1;
+			} else {
+				$symbols_ignore{$_} = 1;
+			}
+			$ignore++;
+		}
+		close(IGNORE);
+	}
+}
+print "read $ignore symbols/modules.\n";
+
+sub is_ignored($$) {
+	my ($mod, $sym) = @_;
+
+	die "Missing module name in is_ignored()" if not defined($mod);
+	die "Missing symbol name in is_ignored()" if not defined($sym);
+
+	if (defined($symbols_ignore{$sym}) or defined($modules_ignore{$mod})) {
+		return 1;
+	}
+	return 0;
+}
+
+# Read new syms first
+print "    Reading new symbols ($abinum)...";
+$count = 0;
+open(NEW, "< $abidir/$flavour") or
+	die "Could not open $abidir/$flavour";
+while (<NEW>) {
+	chomp;
+	m/^(EXPORT_.+)\s(.+)\s(0x[0-9a-f]+)\s(.+)$/;
+	$symbols{$4}{'type'} = $1;
+	$symbols{$4}{'loc'} = $2;
+	$symbols{$4}{'hash'} = $3;
+	$module_syms{$2} = 0;
+	$count++;
+}
+close(NEW);
+print "read $count symbols.\n";
+
+# Now the old symbols, checking for missing ones
+print "    Reading old symbols ($prev_abinum)...";
+$count = 0;
+open(OLD, "< $prev_abidir/$flavour") or
+	die "Could not open $prev_abidir/$flavour";
+while (<OLD>) {
+	chomp;
+	m/^(EXPORT_.+)\s(.+)\s(0x[0-9a-f]+)\s(.+)$/;
+	$symbols{$4}{'old_type'} = $1;
+	$symbols{$4}{'old_loc'} = $2;
+	$symbols{$4}{'old_hash'} = $3;
+	$count++;
+}
+close(OLD);
+
+print "read $count symbols.\n";
+
+print "II: Checking for missing symbols in new ABI...";
+$count = 0;
+foreach $sym (keys(%symbols)) {
+	if (!defined($symbols{$sym}{'type'})) {
+		print "\n" if not $count;
+		printf("    MISS : %s%s\n", $sym,
+			is_ignored($symbols{$sym}{'old_loc'}, $sym) ? " (ignored)" : "");
+		$count++ if !is_ignored($symbols{$sym}{'old_loc'}, $sym);
+	}
+}
+print "    " if $count;
+print "found $count missing symbols\n";
+if ($count) {
+	print "$EE Symbols gone missing (what did you do!?!)\n";
+	$errors++;
+}
+
+
+print "II: Checking for new symbols in new ABI...";
+$count = 0;
+foreach $sym (keys(%symbols)) {
+	if (!defined($symbols{$sym}{'old_type'})) {
+		print "\n" if not $count;
+		print "    NEW : $sym\n";
+		$count++;
+	}
+}
+print "    " if $count;
+print "found $count new symbols\n";
+if ($count and $prev_abinum == $abinum) {
+	print "WW: Found new symbols within same ABI. Not recommended\n";
+}
+
+print "II: Checking for changes to ABI...\n";
+$count = 0;
+my $moved = 0;
+my $changed_type = 0;
+my $changed_hash = 0;
+foreach $sym (keys(%symbols)) {
+	if (!defined($symbols{$sym}{'old_type'}) or
+	    !defined($symbols{$sym}{'type'})) {
+		next;
+	}
+
+	# Changes in location don't hurt us, but log it anyway
+	if ($symbols{$sym}{'loc'} ne $symbols{$sym}{'old_loc'}) {
+		printf("    MOVE : %-40s : %s => %s\n", $sym, $symbols{$sym}{'old_loc'},
+			$symbols{$sym}{'loc'});
+		$moved++;
+	}
+
+	# Changes to export type are only bad if new type isn't
+	# EXPORT_SYMBOL. Changing things to GPL are bad.
+	if ($symbols{$sym}{'type'} ne $symbols{$sym}{'old_type'}) {
+		printf("    TYPE : %-40s : %s => %s%s\n", $sym, $symbols{$sym}{'old_type'}.
+			$symbols{$sym}{'type'}, is_ignored($symbols{$sym}{'loc'}, $sym)
+			? " (ignored)" : "");
+		$changed_type++ if $symbols{$sym}{'type'} ne "EXPORT_SYMBOL"
+			and !is_ignored($symbols{$sym}{'loc'}, $sym);
+	}
+
+	# Changes to the hash are always bad
+	if ($symbols{$sym}{'hash'} ne $symbols{$sym}{'old_hash'}) {
+		printf("    HASH : %-40s : %s => %s%s\n", $sym, $symbols{$sym}{'old_hash'},
+			$symbols{$sym}{'hash'}, is_ignored($symbols{$sym}{'loc'}, $sym)
+			? " (ignored)" : "");
+		$changed_hash++ if !is_ignored($symbols{$sym}{'loc'}, $sym);
+		$module_syms{$symbols{$sym}{'loc'}}++;
+	}
+}
+
+print "WW: $moved symbols changed location\n" if $moved;
+print "$EE $changed_type symbols changed export type and weren't ignored\n" if $changed_type;
+print "$EE $changed_hash symbols changed hash and weren't ignored\n" if $changed_hash;
+
+$errors++ if $changed_hash or $changed_type;
+if ($changed_hash) {
+	print "II: Module hash change summary...\n";
+	foreach $mod (sort { $module_syms{$b} <=> $module_syms{$a} } keys %module_syms) {
+		next if ! $module_syms{$mod};
+		printf("    %-40s: %d\n", $mod, $module_syms{$mod});
+	}
+}
+
+print "II: Done\n";
+
+if ($errors) {
+	exit($fail_exit);
+} else {
+	exit(0);
+}
--- linux-rt-2.6.29.5.orig/debian/scripts/link-headers
+++ linux-rt-2.6.29.5/debian/scripts/link-headers
@@ -0,0 +1,40 @@
+#!/bin/bash -e
+
+hdrdir="$1"
+symdir="$2"
+flavour="$3"
+
+echo "Symlinking and copying headers for $flavour..."
+
+excludes='( -path ./debian -prune -o -path ./.git ) -prune -o'
+
+(
+find . $excludes  -type f \
+	\( -name 'Makefile*' -o -name 'Kconfig*' -o -name 'Kbuild*' -o \
+	-name '*.sh' -o -name '*.pl' -o -name '*.lds' \) -print
+find ./include ./scripts -name .gitignore -prune -o -type f -print
+find ./include -mindepth 1 -maxdepth 1 $excludes -type d -print
+) | (
+while read file; do
+	dir=$file
+	lastdir=$file
+
+	if [ -e "$hdrdir/$file" -o -L "$hdrdir/$file" ]; then
+		continue
+	fi
+
+	while [ ! -e "$hdrdir/$dir" -a ! -L "$hdrdir/$dir" ]; do
+		lastdir=$dir
+		dir=`dirname $dir`
+	done
+	# If the last item to exist is a symlink we assume all is good
+	if [ ! -L "$hdrdir/$dir" ]; then
+		# Turns things like "./foo" into "../"
+		deref="`echo -n $lastdir | sed -e 's/^\.//' -e's,/[^/]*,../,g'`"
+		item="`echo -n $lastdir | sed -e 's/^\.\///'`"
+		ln -s $deref$symdir/$item $hdrdir/$item
+	fi
+done
+)
+
+exit
--- linux-rt-2.6.29.5.orig/debian/scripts/misc/getabis
+++ linux-rt-2.6.29.5/debian/scripts/misc/getabis
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+if [ "$#" != "2" ]; then
+	echo "Usage: $0 <release> <revision>" 1>&2
+	exit 1
+fi
+
+ver=$1
+revision=$2
+abi=$(echo $revision | awk -F. '{print $1}')
+
+verabi=$ver-$abi
+verfull=$ver-$revision
+
+repo="http://archive.ubuntu.com/ubuntu/pool/main/l"
+repo_ports="http://ports.ubuntu.com/ubuntu-ports/pool/main/l"
+repo_uni="http://archive.ubuntu.com/ubuntu/pool/universe/l"
+
+WGET="wget --quiet -c"
+
+abidir="`pwd`/debian/abi/$verfull"
+tmpdir="`pwd`/abi-tmp-$verfull"
+origdir="`pwd`"
+
+test -d $tmpdir || mkdir $tmpdir
+
+getall() {
+	arch=$1
+	shift
+
+	mkdir -p $abidir/$arch
+
+	for sub in $@; do
+		if [ -f $abidir/$arch/$sub ]; then
+			echo "Exists: $sub"
+			continue
+		fi
+		echo -n "Fetching $sub..."
+		filename=linux-image-${verabi}-${sub}_${verfull}_${arch}.deb
+		cd $tmpdir
+		if ! [ -f $filename ]; then
+			$WGET $repo/linux-rt/$filename
+		fi
+		if ! [ -f $filename ]; then
+			$WGET $repo_ports/linux-rt/$filename
+		fi
+		if ! [ -f $filename ]; then
+			$WGET $repo_uni/linux-rt/$filename
+		fi
+		if [ "$?" = "0" ]; then
+			echo -n "extracting..."
+			dpkg-deb --extract $filename tmp
+			if [ -f tmp/boot/abi-* ]; then
+				mv tmp/boot/abi-* $abidir/$arch/$sub
+			else
+				echo -n "NO ABI FILE..."
+			fi
+			(cd tmp; find lib/modules/$verabi-$sub/kernel -name '*.ko') | \
+				sed -e 's/.*\/\([^\/]*\)\.ko/\1/' | sort > \
+				$abidir/$arch/$sub.modules
+			rm -rf tmp $filename
+			echo "done."
+		else
+			echo "FAILED."
+		fi
+		cd $origdir
+	done
+}
+
+# MAIN
+
+# Setup abi directory
+mkdir -p $abidir
+echo $abi > $abidir/abiname
+
+# NOTE: The flavours are hardcoded, because they may have changed from the
+# current build.
+
+getall amd64 rt
+getall i386 rt
+
+rmdir $tmpdir
--- linux-rt-2.6.29.5.orig/debian/scripts/misc/retag
+++ linux-rt-2.6.29.5/debian/scripts/misc/retag
@@ -0,0 +1,34 @@
+#!/usr/bin/perl -w
+
+open(TAGS, "git tag -l |") or die "Could not get list of tags";
+@tags = <TAGS>;
+close(TAGS);
+
+open(LOGS, "git log --pretty=short |") or die "ERROR: Calling git log";
+my $commit = "";
+
+while (<LOGS>) {
+	my $origtag;
+
+	if (m|^commit (.*)$|) {
+		$commit = $1;
+		next;
+	}
+
+	m|\s*UBUNTU: (Ubuntu-2\.6\..*)| or next;
+
+	$tag = $1;
+
+	($origtag) = grep(/^$tag.orig$/, @tags);
+
+	if (!defined($origtag)) {
+		print "I: Adding original tag for $tag\n";
+		system("git tag -m $tag $tag.orig $tag");
+	}
+
+	print "I: Tagging $tag => $commit\n";
+
+	system("git tag -f -m $tag $tag $commit");
+}
+
+close(LOGS);
--- linux-rt-2.6.29.5.orig/debian/scripts/misc/ppa-cron-job
+++ linux-rt-2.6.29.5/debian/scripts/misc/ppa-cron-job
@@ -0,0 +1,47 @@
+#!/bin/sh
+
+#
+# Use this script as a template for the daily kernel build cron job.
+# You should copy it somewhere outside of the git tree 'cause the whole
+# git tree gets removed and recreated.
+#
+KNAME=jaunty
+DAILY_BUILD_DIR=${KBDIR:=${HOME}/${KNAME}}
+KERNEL_GIT_REPO=${KREPO:=/srv/kernel.ubuntu.com/git/ubuntu/ubuntu-${KNAME}.git}
+
+#
+# Nothing works unless there is a dput configuration.
+#
+if [ ! -f ${HOME}/.dput.cf ]
+then
+	echo No dput configuration.
+	exit 1
+fi
+
+if [ ! -d ${DAILY_BUILD_DIR} ]
+then
+	rm -rf ${DAILY_BUILD_DIR}
+	mkdir -p ${DAILY_BUILD_DIR}
+fi
+
+#
+# Start with a fresh repo.
+#
+cd ${DAILY_BUILD_DIR}
+rm -rf ubuntu-${KNAME}
+git clone ${KERNEL_GIT_REPO}
+
+#
+# Remember that the success of prepare-ppa depends on
+# this user account having an un-passworded GPG key.
+# Otherwise it requires user intervention, e.g., a
+# user must enter the GPG key password.
+#
+rm -f *.changes
+(cd ubuntu-${KNAME}; debian/scripts/misc/prepare-ppa-source)
+
+find . -maxdepth 1 -type f -name "*.changes" | while read f
+do
+	echo dput my-ppa $f
+done
+
--- linux-rt-2.6.29.5.orig/debian/scripts/misc/doconfig
+++ linux-rt-2.6.29.5/debian/scripts/misc/doconfig
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+# We have to be in the top level kernel source directory
+if [ ! -f MAINTAINERS ] || [ ! -f Makefile ]; then
+	echo "This does not appear to be the kernel source directory." 1>&2
+	exit 1
+fi
+
+
+# One arg, and that's it. Just pass an architecture
+if [ $# -ne 1 ]; then
+	echo "Usage: $0 <arch>" 1>&2
+	exit 1
+fi
+
+arch="$1"
+
+case "$arch" in
+	amd64)	kernarch="x86_64"	;;
+	armel)	kernarch="arm"	;;
+	*)	kernarch="$arch"	;;
+esac
+
+confdir="`pwd`/debian/config/$arch"
+bindir="`pwd`/debian/scripts/misc"
+
+# Make sure the architecture exists
+if [ ! -d $confdir ]; then
+	echo "Could not find config directory for $arch" 1>&2
+	exit 1
+fi
+
+echo "Processing $arch ($kernarch) ... "
+
+configs=$(cd $confdir && ls config.*)
+
+if [ -f $confdir/config ]; then
+	for config in $configs; do
+		case $config in
+			*)
+				cat $confdir/config >> $confdir/$config
+				;;
+		esac
+	done
+	rm -f $confdir/config
+fi
+
+test -d build || mkdir build
+cd build
+for config in $configs; do
+
+	cat $confdir/$config > .config
+
+	echo About to configure $arch $config
+	read 
+	make -C ../ O=`pwd` ARCH=$kernarch menuconfig
+
+	cat .config > $confdir/$config
+done
+cd ..
+
+echo "Running splitconfig.pl ... "
+echo
+
+(cd $confdir ; $bindir/splitconfig.pl)
--- linux-rt-2.6.29.5.orig/debian/scripts/misc/insert-changes.pl
+++ linux-rt-2.6.29.5/debian/scripts/misc/insert-changes.pl
@@ -0,0 +1,30 @@
+#!/usr/bin/perl -w
+
+system("make -s -f debian/rules printchanges > debian/changes");
+
+open(CHANGELOG, "< debian/changelog") or die "Cannot open changelog";
+open(CHANGES, "< debian/changes") or die "Cannot open new changes";
+open(NEW, "> debian/changelog.new") or die "Cannot open new changelog";
+
+$printed = 0;
+
+while (<CHANGELOG>) {
+	if (/^  CHANGELOG: /) {
+		next if $printed;
+
+		while (<CHANGES>) {
+			print NEW;
+		}
+
+		$printed = 1;
+	} else {
+		print NEW;
+	}
+}
+
+close(NEW);
+close(CHANGES);
+close(CHANGELOG);
+
+rename("debian/changelog.new", "debian/changelog");
+unlink("debian/changes");
--- linux-rt-2.6.29.5.orig/debian/scripts/misc/prepare-ppa-source
+++ linux-rt-2.6.29.5/debian/scripts/misc/prepare-ppa-source
@@ -0,0 +1,106 @@
+#!/bin/sh
+#
+# This script prepares a source upload for a PPA build.
+#
+LAST_UPLOAD=../last-ppa-upload
+LAST_GIT_CHANGELOG=../last_git_changelog
+LAST_PPA_CHANGELOG=../last_ppa_changelog
+PPA_FILE="`make --no-print-directory -f debian/rules print-ppa-file-name`"
+
+if [ "$1" = "scrub" ]
+then
+	SCRUB=1
+fi
+
+#
+# The identity of the git committer must be known.
+#
+if [ ! -z "$GIT_AUTHOR_NAME" ] && [ ! -z "$GIT_AUTHOR_EMAIL" ]
+then
+	SIGNER_NAME="$GIT_AUTHOR_NAME"
+	SIGNER_EMAIL="$GIT_AUTHOR_EMAIL"
+elif [ ! -z "$GIT_COMMITTER_NAME" ] && [ ! -z "$GIT_COMMITTER_EMAIL" ]
+then
+	SIGNER_NAME="$GIT_COMMITTER_NAME"
+	SIGNER_EMAIL="$GIT_COMMITTER_EMAIL"
+else
+	echo Error: Unknown committer.
+	exit 1
+fi
+
+#
+# git current and cleanup.
+#
+git checkout -f
+git ls-files --others | xargs rm -rf
+
+#
+# Don't bother if the repo hasn't changed since the last upload.
+#
+if [ ! -f ${LAST_UPLOAD} ]
+then
+	touch ${LAST_UPLOAD}
+fi
+git log|head -n 1|sed 's/commit //' > ${LAST_UPLOAD}.tmp
+if cmp ${LAST_UPLOAD} ${LAST_UPLOAD}.tmp > /dev/null
+then
+	rm -f ${LAST_UPLOAD}.tmp
+	echo No upload needed.
+	exit 0
+fi
+mv ${LAST_UPLOAD}.tmp ${LAST_UPLOAD}
+
+#
+# The git HEAD can change without anyone updating the debian/changelog.
+# However, if the changelog version is updated, then we want to work
+# forward from that  version.
+#
+cp debian/changelog changelog.sav
+if [ -f ${LAST_GIT_CHANGELOG} ] && [ -f ${LAST_PPA_CHANGELOG} ]
+then
+	#
+	# If the changelog has not changed, then work forward from the
+	# last daily build version.
+	#
+	if cmp ${LAST_GIT_CHANGELOG} debian/changelog > /dev/null
+	then
+		cp ${LAST_PPA_CHANGELOG} debian/changelog
+	fi
+fi
+mv changelog.sav ${LAST_GIT_CHANGELOG}
+
+#
+# Notify the build scripts that this is a PPA build.
+#
+cp -v ${LAST_UPLOAD} ${PPA_FILE}
+
+#
+# In order to sign the package you must override the first signer's changelog entry.
+#
+export DEBEMAIL="$SIGNER_EMAIL"
+export DEBFULLNAME="$SIGNER_NAME"
+DEBCHANGE_COMMENT="PPA Upload from git HEAD `cat ${LAST_UPLOAD}`"
+debchange --increment --preserve "${DEBCHANGE_COMMENT}"
+if ! head -n 1 debian/changelog | grep ubuntu > /dev/null
+then
+	echo debchange did not work.
+	exit 1
+fi
+
+#
+# Make sure the third changelog field says hardy.
+#
+sed -i 's/) .*;/) hardy;/1' debian/changelog
+
+#
+# Make sure the next daily build works forward from this version if the git
+# changelog has not changed.
+#
+cp debian/changelog ${LAST_PPA_CHANGELOG}
+
+rm -rf ../linux* include/config .config
+dpkg-buildpackage -S -sa -rfakeroot -I.git -I.gitignore -i'\.git.*'
+
+rm -f ${PPA_FILE}
+exit 0
+
--- linux-rt-2.6.29.5.orig/debian/scripts/misc/git-ubuntu-log
+++ linux-rt-2.6.29.5/debian/scripts/misc/git-ubuntu-log
@@ -0,0 +1,220 @@
+#!/usr/bin/perl -w
+
+use strict;
+use Text::Wrap;
+
+my $kernel_auth = "Upstream Kernel Changes";
+
+my (%map, @reverts);
+my $pstate = 1;
+my $no_kern_log = 0;
+my $print_shas = 0;
+my $first_print = 1;
+
+while (@ARGV) {
+	my $opt = $ARGV[0];
+	shift;
+	if ($opt eq "--no-kern-log") {
+		$no_kern_log = 1;
+	} elsif ($opt eq "--print-shas") {
+		$print_shas = 1;
+	} else {
+		print STDERR "Unknown options: $opt\n";
+		exit(1);
+	}
+}
+
+sub check_reverts($) {
+	my ($entry) = @_;
+	my ($check);
+
+	foreach $check (reverse @reverts) {
+		my $desc = "Revert \"" . $entry->{'desc'} . "\"";
+		if ($check->{'desc'} eq $desc) {
+			@reverts = grep($_->{'desc'} ne $desc, @reverts);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+sub add_entry($) {
+	my ($entry) = @_;
+	my $key = $entry->{'author'};
+
+        # store description in array, in email->{desc list} map
+        if (exists $map{$key}) {
+                # grab ref
+                my $obj = $map{$key};
+
+                # add desc to array
+                push(@$obj, $entry);
+        } else {
+                # create new array, containing 1 item
+                my @arr = ($entry);
+
+                # store ref to array
+                $map{$key} = \@arr;
+        }
+}
+
+sub shortlog_entry($$$$$) {
+	my ($name, $desc, $bug, $cve, $commit) = @_;
+	my $entry;
+
+	$desc =~ s#/pub/scm/linux/kernel/git/#/.../#g;
+	$desc =~ s#\[PATCH\] ##g;
+
+	$desc =~ s#^\s*##g;
+	$desc =~ s# *UBUNTU: ##g;
+
+	$entry->{'desc'} = $desc;
+	$entry->{'bugno'} = $bug;
+	$entry->{'cve'} = $cve;
+	$entry->{'commit'} = $commit;
+	$entry->{'author'} = $name;
+
+	if ($desc =~ /^Revert "/) {
+		push(@reverts, $entry);
+		return;
+	}
+
+	return if check_reverts($entry);
+
+	add_entry($entry);
+}
+
+# sort comparison function
+sub by_name($$) {
+	my ($a, $b) = @_;
+
+	uc($a) cmp uc($b);
+}
+
+sub shortlog_output {
+	my ($obj, $key, $entry);
+
+	foreach $key (sort by_name keys %map) {
+		next if $key eq $kernel_auth and $no_kern_log;
+
+		print "\n" unless $first_print;
+		$first_print = 0;
+
+		# output author
+		printf "  [ %s ]\n\n", $key;
+
+		# output author's 1-line summaries
+		$obj = $map{$key};
+		foreach $entry (reverse @$obj) {
+			print wrap("  * ", "    ", $entry->{'desc'}) . "\n";
+			# For non upstream changes, add other info.
+			if ($key ne $kernel_auth) {
+				if ($print_shas) {
+					print "    - GIT-SHA " . $entry->{'commit'} .
+						"\n";
+				}
+			}
+			if (defined($entry->{'bugno'})) {
+				print "    - LP: #" .  $entry->{'bugno'} . "\n";
+			}
+			if (defined($entry->{'cve'})) {
+				print "    - " . $entry->{'cve'} . "\n";
+			}
+		}
+	}
+}
+
+sub changelog_input {
+	my ($author, $desc, $commit, $entry, $cve);
+
+	while (<STDIN>) {
+		# get commit
+		if ($pstate == 1) {
+			next unless /^commit (.*)/;
+
+			$commit = $1;
+
+			$pstate++;
+		}
+
+		# get author and email
+		elsif ($pstate == 2) {
+			my ($email);
+
+			next unless /^[Aa]uthor:?\s*(.*?)\s*<(.*)>/;
+
+			$author = $1;
+			$email = $2;
+			$desc = undef;
+			$cve = undef;
+
+			# cset author fixups
+			if (!$author) {
+				$author = $email;
+			}
+			$pstate++;
+		}
+
+		# skip to blank line
+		elsif ($pstate == 3) {
+			next unless /^\s*$/;
+			$pstate++;
+		}
+
+		# skip to non-blank line
+		elsif ($pstate == 4) {
+			next unless /^\s*?(.*)/;
+			my $ignore = 0;
+			my $bug = undef;
+
+			# skip lines that are obviously not
+			# a 1-line cset description
+			next if /^\s*From: /;
+
+			chomp;
+			$desc = $1;
+
+			if ($desc =~ /^ *(Revert "|)UBUNTU:/) {
+				while (<STDIN>) {
+					$ignore = 1 if /^ *Ignore: yes/i;
+					$bug = $2 if /^ *Bug: *(#|)(.*)/;
+					$cve = $1 if /^ *(CVE-.*)/;
+					last if /^commit /;
+				}
+			} else {
+				$author = $kernel_auth;
+				$ignore = 1 if $desc =~ /Merge /;
+				while (<STDIN>) {
+					$bug = $2 if /^ *Bug: *(#|)(.*)/;
+					$cve = $1 if /^ *(CVE-.*)/;
+					last if /^commit /;
+				}
+			}
+
+			if (!$ignore) {
+				&shortlog_entry($author, $desc, $bug,
+						$cve, $commit, 0);
+			}
+
+			$pstate = 1;
+			if ($_ && /^commit (.*)/) {
+				$commit = $1;
+				$pstate++;
+			}
+		}
+	
+		else {
+			die "invalid parse state $pstate";
+		}
+	}
+
+	foreach $entry (@reverts) {
+		add_entry($entry);
+	}	
+}
+
+&changelog_input;
+&shortlog_output;
+
+exit(0);
--- linux-rt-2.6.29.5.orig/debian/scripts/misc/splitconfig.pl
+++ linux-rt-2.6.29.5/debian/scripts/misc/splitconfig.pl
@@ -0,0 +1,110 @@
+#!/usr/bin/perl -w
+
+%configs = ();
+%common = ();
+
+print "Reading config's ...\n";
+
+opendir(DIR, ".");
+
+while (defined($config = readdir(DIR))) {
+	# Only config.*
+	next if $config !~ /^config\..*/;
+	# Nothing that is disabled, or remnant
+	next if $config =~ /.*\.(default|disabled|stub)$/;
+	# Server config's are standalone
+	#next if $config =~ /config.server-.*/;
+
+	%{$configs{$config}} = ();
+
+	print "  processing $config ... ";
+
+	open(CONFIG, "< $config");
+
+	while (<CONFIG>) {
+		/^#*\s*CONFIG_(\w+)[\s=](.*)$/ or next;
+
+		${$configs{$config}}{$1} = $2;
+
+		$common{$1} = $2;
+	}
+
+	close(CONFIG);
+
+	print "done.\n";
+}
+
+closedir(DIR);
+
+print "\n";
+
+print "Merging lists ... \n";
+
+for $config (keys(%configs)) {
+	my %options = %{$configs{$config}};
+
+	print "   processing $config ... ";
+
+	for $key (keys(%common)) {
+		next if not defined $common{$key};
+
+		# If we don't have the common option, then it isn't
+		# common. If we do have that option, it must have the same
+		# value (this is where the old split.py was broken). It
+		# also did the common check while it was parsing files, so
+		# that there were cases where a non-common option was in
+		# common anyway (ordering).
+		if (not defined($options{$key})) {
+			undef $common{$key};
+		} elsif ($common{$key} ne $options{$key}) {
+			undef $common{$key};
+		}
+	}
+
+	print "done.\n";
+}
+
+print "\n";
+
+print "Creating common config ... ";
+
+open(COMMON, "> config");
+print COMMON "#\n# Common config options automatically generated by splitconfig.pl\n#\n";
+
+for $key (sort(keys(%common))) {
+	next if not defined $common{$key};
+
+	if ($common{$key} eq "is not set") {
+		print COMMON "# CONFIG_$key is not set\n";
+	} else {
+		print COMMON "CONFIG_$key=$common{$key}\n";
+	}
+}
+close(COMMON);
+
+print "done.\n\n";
+
+print "Creating stub configs ...\n";
+
+for $config (keys(%configs)) {
+	my %options = %{$configs{$config}};
+
+	print "  processing $config ... ";
+
+	open(STUB, "> $config");
+	print STUB "#\n# Config options for $config automatically generated by splitconfig.pl\n#\n";
+
+	for $key (sort(keys(%options))) {
+		next if defined $common{$key};
+
+		if ($options{$key} eq "is not set") {
+			print STUB "# CONFIG_$key is not set\n";
+		} else {
+			print STUB "CONFIG_$key=$options{$key}\n";
+		}
+	}
+
+	close(STUB);
+
+	print "done.\n";
+}
--- linux-rt-2.6.29.5.orig/debian/scripts/misc/oldconfig
+++ linux-rt-2.6.29.5/debian/scripts/misc/oldconfig
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+# We have to be in the top level kernel source directory
+if [ ! -f MAINTAINERS ] || [ ! -f Makefile ]; then
+	echo "This does not appear to be the kernel source directory." 1>&2
+	exit 1
+fi
+
+
+# One arg, and that's it. Just pass an architecture
+if [ $# -ne 1 ]; then
+	echo "Usage: $0 <arch>" 1>&2
+	exit 1
+fi
+
+arch="$1"
+
+case "$arch" in
+	amd64)	kernarch="x86_64"	;;
+	armel)	kernarch="arm"	;;
+	*)	kernarch="$arch"	;;
+esac
+
+confdir="`pwd`/debian/config/$arch"
+bindir="`pwd`/debian/scripts/misc"
+
+# Make sure the architecture exists
+if [ ! -d $confdir ]; then
+	echo "Could not find config directory for $arch" 1>&2
+	exit 1
+fi
+
+echo "Processing $arch ($kernarch) ... "
+
+configs=$(cd $confdir && ls config.*)
+
+if [ -f $confdir/config ]; then
+	for config in $configs; do
+		case $config in
+			*)
+				cat $confdir/config >> $confdir/$config
+				;;
+		esac
+	done
+	rm -f $confdir/config
+fi
+
+test -d build || mkdir build
+cd build
+for config in $configs; do
+	echo "Running silentoldconfig for $config ... "
+
+	cat $confdir/$config > .config
+
+	make -C ../ O=`pwd` silentoldconfig ARCH=$kernarch
+
+	cat .config > $confdir/$config
+done
+cd ..
+
+echo "Running splitconfig.pl ... "
+echo
+
+(cd $confdir ; $bindir/splitconfig.pl)
--- linux-rt-2.6.29.5.orig/debian/control.d/flavour-control.stub
+++ linux-rt-2.6.29.5/debian/control.d/flavour-control.stub
@@ -0,0 +1,66 @@
+# Items that get replaced:
+# FLAVOUR
+# DESC
+# ARCH
+# SUPPORTED
+# TARGET
+# BOOTLOADER
+# =PROVIDES=
+#
+# Items marked with =FOO= are optional
+#
+# XXX: Leave the blank line before the first package!!
+
+Package: linux-image-PKGVER-ABINUM-FLAVOUR
+Architecture: ARCH
+Section: base
+Priority: optional
+Pre-Depends: dpkg (>= 1.10.24)
+Provides: linux-image, linux-image-2.6, fuse-module, =PROVIDES=
+Depends: initramfs-tools (>= 0.36ubuntu6), coreutils | fileutils (>= 4.0), module-init-tools (>= 3.3-pre11-4ubuntu3)
+Conflicts: hotplug (<< 0.0.20040105-1)
+Recommends: BOOTLOADER
+Suggests: fdutils, linux-doc-PKGVER | linux-source-PKGVER
+Description: Linux kernel image for version PKGVER on DESC
+ This package contains the Linux kernel image for version PKGVER on
+ DESC.
+ .
+ Also includes the corresponding System.map file, the modules built by the
+ packager, and scripts that try to ensure that the system is not left in an
+ unbootable state after an update.
+ .
+ Supports SUPPORTED processors.
+ .
+ TARGET
+ .
+ You likely do not want to install this package directly. Instead, install
+ the linux-FLAVOUR meta-package, which will ensure that upgrades work
+ correctly, and that supporting packages are also installed.
+
+Package: linux-headers-PKGVER-ABINUM-FLAVOUR
+Architecture: ARCH
+Section: devel
+Priority: optional
+Depends: coreutils | fileutils (>= 4.0), linux-rt-headers-PKGVER-ABINUM, ${shlibs:Depends}
+Provides: linux-headers, linux-headers-2.6
+Description: Linux kernel headers for version PKGVER on DESC
+ This package provides kernel header files for version PKGVER on
+ DESC.
+ .
+ This is for sites that want the latest kernel headers.  Please read
+ /usr/share/doc/linux-headers-PKGVER-ABINUM/debian.README.gz for details.
+
+Package: linux-image-debug-PKGVER-ABINUM-FLAVOUR
+Architecture: ARCH
+Section: devel
+Priority: optional
+Provides: linux-debug
+Description: Linux kernel debug image for version PKGVER on DESC
+ This package provides a kernel debug image for version PKGVER on
+ DESC.
+ .
+ This is for sites that wish to debug the kernel.
+ .
+ The kernel image contained in this package is NOT meant to boot from. It
+ is uncompressed, and unstripped. This package also includes the
+ unstripped modules.
--- linux-rt-2.6.29.5.orig/debian/control.d/vars.rt
+++ linux-rt-2.6.29.5/debian/control.d/vars.rt
@@ -0,0 +1,6 @@
+arch="i386 amd64"
+supported="Generic"
+target="Geared toward real time systems."
+desc="Ingo Molnar's full real time preemption patch (2.6.28-rt)"
+bootloader="grub | lilo (>= 19.1)"
+provides="kvm-api-4, redhat-cluster-modules, ivtv-modules, ndiswrapper-modules-1.9"
--- linux-rt-2.6.29.5.orig/debian/control-scripts/headers-postinst
+++ linux-rt-2.6.29.5/debian/control-scripts/headers-postinst
@@ -0,0 +1,126 @@
+#!/usr/bin/perl
+#                              -*- Mode: Cperl -*- 
+# debian.postinst ---
+# Author           : Manoj Srivastava ( srivasta@pilgrim.umass.edu )
+# Created On       : Sat Apr 27 05:42:43 1996
+# Created On Node  : melkor.pilgrim.umass.edu
+# Last Modified By : Manoj Srivastava
+# Last Modified On : Sat Aug  5 13:20:22 2006
+# Last Machine Used: glaurung.internal.golden-gryphon.com
+# Update Count     : 45
+# Status           : Unknown, Use with caution!
+# HISTORY          :
+# Description      :
+#
+#
+#
+#  arch-tag: 1c716174-2f0a-476d-a626-a1322e62503a
+#
+
+
+$|=1;
+
+# Predefined values:
+my $version           = "=V";
+my $kimage            = "=K";
+my $package_name    = "linux-image-$version";
+
+
+# Ignore all invocations uxcept when called on to configure.
+exit 0 unless ($ARGV[0] && $ARGV[0] =~ /configure/);
+
+#known variables
+my $image_dest      = "/";
+my $realimageloc    = "/boot/";
+my $silent_modules  = '';
+my $modules_base    = '/lib/modules';
+my $CONF_LOC        = '/etc/kernel-img.conf';
+# remove multiple leading slashes; make sure there is at least one.
+$realimageloc  =~ s|^/*|/|o;
+$realimageloc  =~ s|/+|/|o;
+
+chdir '/usr/src' or die "Could not chdir to /usr/src:$!";
+
+if (-r "$CONF_LOC" && -f "$CONF_LOC"  ) {
+  if (open(CONF, "$CONF_LOC")) {
+    while (<CONF>) {
+      chomp;
+      s/\#.*$//g;
+      next if /^\s*$/;
+
+      $header_postinst_hook   = "$1"  if /^\s*header_postinst_hook\s*=\s*(\S+)/ig;
+    }
+    close CONF;
+  }
+}
+
+sub exec_script {
+  my $type   = shift;
+  my $script = shift;
+  print STDERR "Running $type hook script $script.\n";
+  system ("$script $version $realimageloc$kimage-$version") &&
+    print STDERR "User $type hook script [$script] ";
+  if ($?) {
+    if ($? == -1) {
+      print STDERR "failed to execute: $!\n";
+    }
+    elsif ($? & 127) {
+      printf STDERR "died with signal %d, %s coredump\n",
+        ($? & 127),  ($? & 128) ? 'with' : 'without';
+    }
+    else {
+      printf STDERR "exited with value %d\n", $? >> 8;
+    }
+    exit $? >> 8;
+  }
+}
+sub run_hook {
+  my $type   = shift;
+  my $script = shift;
+  if ($script =~ m,^/,) {
+    # Full path provided for the hook script
+    if (-x "$script") {
+      &exec_script($type,$script);
+    }
+    else {
+      die "The provided $type hook script [$script] could not be run.\n";
+    }
+  }
+  else {
+    # Look for it in a safe path
+    for my $path ('/bin', '/sbin', '/usr/bin', '/usr/sbin') {
+      if (-x "$path/$script") {
+        &exec_script($type, "$path/$script");
+        return 0;
+      }
+    }
+    # No luck
+    print STDERR "Could not find $type hook script [$script].\n";
+    die "Looked in: '/bin', '/sbin', '/usr/bin', '/usr/sbin'\n";
+  }
+}
+
+## Run user hook script here, if any
+if (-x "$header_postinst_hook") {
+  &run_hook("postinst", $header_postinst_hook);
+}
+
+if (-d "/etc/kernel/header_postinst.d") {
+  print STDERR "Examining /etc/kernel/header_postinst.d.\n";
+  system ("run-parts --verbose --exit-on-error --arg=$version " .
+          "--arg=$realimageloc$kimage-$version " .
+          "/etc/kernel/header_postinst.d") &&
+            die "Failed to process /etc/kernel/header_postinst.d";
+}
+
+if (-d "/etc/kernel/header_postinst.d/$version") {
+  print STDERR "Examining /etc/kernel/header_postinst.d/$version.\n";
+  system ("run-parts --verbose --exit-on-error --arg=$version " .
+          "--arg=$realimageloc$kimage-$version " .
+          "/etc/kernel/header_postinst.d/$version") &&
+            die "Failed to process /etc/kernel/header_postinst.d/$version";
+}
+
+exit 0;
+
+__END__
--- linux-rt-2.6.29.5.orig/debian/control-scripts/postrm
+++ linux-rt-2.6.29.5/debian/control-scripts/postrm
@@ -0,0 +1,353 @@
+#! /usr/bin/perl
+#                              -*- Mode: Cperl -*- 
+# image.postrm --- 
+# Author           : Manoj Srivastava ( srivasta@glaurung.green-gryphon.com ) 
+# Created On       : Sat May 15 11:05:13 1999
+# Created On Node  : glaurung.green-gryphon.com
+# Last Modified By : Manoj Srivastava
+# Last Modified On : Wed Sep 13 11:26:19 2006
+# Last Machine Used: glaurung.internal.golden-gryphon.com
+# Update Count     : 57
+# Status           : Unknown, Use with caution!
+# HISTORY          : 
+# Description      : 
+# 
+#     $Id: image.postrm,v 1.31 2003/10/07 16:24:20 srivasta Exp $
+#
+
+
+# 
+#use strict; #for debugging
+use Cwd 'abs_path';
+
+$|=1;
+
+# Predefined values:
+my $version           = "=V";
+my $link_in_boot      = "";	# Should be empty, mostly
+my $no_symlink        = "";	# Should be empty, mostly
+my $reverse_symlink   = "";	# Should be empty, mostly
+my $do_symlink        = "Yes";	# target machine defined
+my $do_boot_enable    = "Yes";	# target machine defined
+my $do_bootfloppy     = "Yes";	# target machine defined
+my $do_bootloader     = "Yes";	# target machine defined
+my $move_image        = '';       # target machine defined
+my $kimage            = "=K";	# Should be empty, mostly
+my $loader            = "=L";     # lilo, silo, quik, palo, vmelilo, or nettrom
+my $image_dir         = "/boot";     # where the image is located
+my $clobber_modules   = '';       # target machine defined
+my $initrd            = "YES";     # initrd kernel
+my $do_initrd         = '';       # Normally, we don't
+my $warn_initrd       = 'YES';    # Normally we do
+my $use_hard_links    = '';       # hardlinks do not work across fs boundaries
+my $postinst_hook     = '';       #Normally we do not
+my $postrm_hook       = '';       #Normally we do not
+my $preinst_hook      = '';       #Normally we do not
+my $prerm_hook        = '';       #Normally we do not
+my $minimal_swap      = '';       # Do not swap symlinks
+my $ignore_depmod_err = '';	# normally we do not
+my $relink_build_link = 'YES';	# There is no harm in checking the link
+my $force_build_link  = '';	# we shall not create a dangling link
+my $kernel_arch       = "=B";
+my $ramdisk           = "/usr/sbin/update-initramfs";
+my $package_name    = "linux-image-$version";
+
+my $Loader          = "NoLOADER"; # 
+$Loader             = "LILO"     if $loader =~ /^lilo/io;
+$Loader             = "SILO"     if $loader =~ /^silo/io;
+$Loader             = "QUIK"     if $loader =~ /^quik/io;
+$Loader             = "yaboot"   if $loader =~ /^yaboot/io;
+$Loader             = "PALO"     if $loader =~ /^palo/io;
+$Loader             = "NETTROM"  if $loader =~ /^nettrom/io;
+$Loader             = "VMELILO"  if $loader =~ /^vmelilo/io;
+$Loader             = "ZIPL"     if $loader =~ /^zipl/io;
+$Loader             = "ELILO"    if $loader =~ /^elilo/io;
+
+
+# This should not point to /tmp, because of security risks.
+my $temp_file_name = "/var/log/$loader" . "_log.$$";
+
+#known variables
+my @boilerplate     = ();
+my @silotemplate    = ();
+my @quiktemplate    = ();
+my @palotemplate    = ();
+my @vmelilotemplate = ();
+my $bootdevice      = '';
+my $rootdevice      = '';
+my $rootdisk        = '';
+my $rootpartition   = '';
+my $image_dest      = "/";
+my $realimageloc    = "/$image_dir/";
+my $have_conffile   = "";
+my $CONF_LOC        = '/etc/kernel-img.conf';
+my $relative_links = '';
+my $silent_modules  = '';
+my $silent_loader   = '';
+my $warn_reboot     = 'Yes';     # Warn that we are installing a version of
+                                 # the kernel we are running
+
+chdir('/')           or die "could not chdir to /:$!\n";
+# remove multiple leading slashes; make sure there is at least one.
+$realimageloc  =~ s|^/*|/|o;
+$realimageloc  =~ s|/+|/|o;
+
+
+if (-r "$CONF_LOC" && -f "$CONF_LOC"  ) {
+  if (open(CONF, "$CONF_LOC")) {
+    while (<CONF>) {
+      chomp;
+      s/\#.*$//g;
+      next if /^\s*$/;
+
+      $do_symlink      = "" if /^\s*do_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $no_symlink      = "" if /^\s*no_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $reverse_symlink = "" if /^\s*reverse_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $link_in_boot    = "" if /^\s*image_in_boot\s*=\s*(no|false|0)\s*$/ig;
+      $link_in_boot    = "" if /^\s*link_in_boot\s*=\s*(no|false|0)\s*$/ig;
+      $move_image      = "" if /^\s*move_image\s*=\s*(no|false|0)\s*$/ig;
+      $clobber_modules = '' if /^\s*clobber_modules\s*=\s*(no|false|0)\s*$/ig;
+      $do_boot_enable  = '' if /^\s*do_boot_enable\s*=\s*(no|false|0)\s*$/ig;
+      $do_bootfloppy   = '' if /^\s*do_bootfloppy\s*=\s*(no|false|0)\s*$/ig;
+      $relative_links  = '' if /^\s*relative_links \s*=\s*(no|false|0)\s*$/ig;
+      $do_bootloader   = '' if /^\s*do_bootloader\s*=\s*(no|false|0)\s*$/ig;
+      $do_initrd       = '' if /^\s*do_initrd\s*=\s*(no|false|0)\s*$/ig;
+      $warn_initrd     = '' if /^\s*warn_initrd\s*=\s*(no|false|0)\s*$/ig;
+      $use_hard_links  = '' if /^\s*use_hard_links\s*=\s*(no|false|0)\s*$/ig;
+      $silent_modules  = '' if /^\s*silent_modules\s*=\s*(no|false|0)\s*$/ig;
+      $silent_loader   = '' if /^\s*silent_loader\s*=\s*(no|false|0)\s*$/ig;
+      $warn_reboot     = '' if /^\s*warn_reboot\s*=\s*(no|false|0)\s*$/ig;
+      $minimal_swap    = '' if /^\s*minimal_swap\s*=\s*(no|false|0)\s*$/ig;
+      $ignore_depmod_err = '' if /^\s*ignore_depmod_err\s*=\s*(no|false|0)\s*$/ig;
+      $relink_build_link = '' if /^\s*relink_build_link\s*=\s*(no|false|0)\s*$/ig;
+      $force_build_link = '' if /^\s*force_build_link\s*=\s*(no|false|0)\s*$/ig;
+
+      $do_symlink      = "Yes" if /^\s*do_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $no_symlink      = "Yes" if /^\s*no_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $reverse_symlink = "Yes" if /^\s*reverse_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $link_in_boot    = "Yes" if /^\s*image_in_boot\s*=\s*(yes|true|1)\s*$/ig;
+      $link_in_boot    = "Yes" if /^\s*link_in_boot\s*=\s*(yes|true|1)\s*$/ig;
+      $move_image      = "Yes" if /^\s*move_image\s*=\s*(yes|true|1)\s*$/ig;
+      $clobber_modules = "Yes" if /^\s*clobber_modules\s*=\s*(yes|true|1)\s*$/ig;
+      $do_boot_enable  = "Yes" if /^\s*do_boot_enable\s*=\s*(yes|true|1)\s*$/ig;
+      $do_bootfloppy   = "Yes" if /^\s*do_bootfloppy\s*=\s*(yes|true|1)\s*$/ig;
+      $do_bootloader   = "Yes" if /^\s*do_bootloader\s*=\s*(yes|true|1)\s*$/ig;
+      $relative_links  = "Yes" if /^\s*relative_links\s*=\s*(yes|true|1)\s*$/ig;
+      $do_initrd       = "Yes" if /^\s*do_initrd\s*=\s*(yes|true|1)\s*$/ig;
+      $warn_initrd     = "Yes" if /^\s*warn_initrd\s*=\s*(yes|true|1)\s*$/ig;
+      $use_hard_links  = "Yes" if /^\s*use_hard_links\s*=\s*(yes|true|1)\s*$/ig;
+      $silent_modules  = 'Yes' if /^\s*silent_modules\s*=\s*(yes|true|1)\s*$/ig;
+      $silent_loader   = 'Yes' if /^\s*silent_loader\s*=\s*(yes|true|1)\s*$/ig;
+      $warn_reboot     = 'Yes' if /^\s*warn_reboot\s*=\s*(yes|true|1)\s*$/ig;
+      $minimal_swap    = 'Yes' if /^\s*minimal_swap\s*=\s*(yes|true|1)\s*$/ig;
+      $ignore_depmod_err = 'Yes' if /^\s*ignore_depmod_err\s*=\s*(yes|true|1)\s*$/ig;
+      $relink_build_link = 'Yes' if /^\s*relink_build_link\s*=\s*(yes|true|1)\s*$/ig;
+      $force_build_link = 'Yes' if /^\s*force_build_link\s*=\s*(yes|true|1)\s*$/ig;
+
+      $image_dest      = "$1"  if /^\s*image_dest\s*=\s*(\S+)/ig;
+      $postinst_hook   = "$1"  if /^\s*postinst_hook\s*=\s*(\S+)/ig;
+      $postrm_hook     = "$1"  if /^\s*postrm_hook\s*=\s*(\S+)/ig;
+      $preinst_hook    = "$1"  if /^\s*preinst_hook\s*=\s*(\S+)/ig;
+      $prerm_hook      = "$1"  if /^\s*prerm_hook\s*=\s*(\S+)/ig;
+      $ramdisk         = "$1"  if /^\s*ramdisk\s*=\s*(.+)$/ig;
+    }
+    close CONF;
+    $have_conffile = "Yes";
+  }
+}
+
+if ($link_in_boot) {
+  $image_dest = "/$image_dir/";
+  $image_dest =~ s|^/*|/|o;
+}
+
+$image_dest = "$image_dest/";
+$image_dest =~ s|/+$|/|o;
+
+# The destdir may be gone by now.
+if (-d "$image_dest") {
+  chdir("$image_dest") or die "could not chdir to $image_dest:$!\n";
+}
+
+# Paranoid check to make sure that the correct value is put in there
+if (! $kimage)                    {$kimage = "vmlinuz"} # Hmm. empty
+elsif ($kimage =~ m/^b?zImage$/o) {$kimage = "vmlinuz"} # these produce vmlinuz
+elsif ($kimage =~ m/^[iI]mage$/o) { my $nop = $kimage;}
+elsif ($kimage =~ m/^vmlinux$/o)  { my $nop = $kimage;}
+else                              {$kimage = "vmlinuz"} # default
+
+$ENV{KERNEL_ARCH}=$kernel_arch if $kernel_arch;
+
+
+######################################################################
+######################################################################
+############
+######################################################################
+######################################################################
+sub remove_sym_link {
+  my $bad_image = $_[0];
+
+  warn "Removing symbolic link $bad_image \n";
+  if ($loader =~ /lilo/i) 
+    {
+      warn "Unless you used the optional flag in lilo, \n";
+    }
+  warn " you may need to re-run your boot loader" . ($loader ? "[$loader]":"")
+    . "\n";
+  # Remove the dangling link
+  unlink "$bad_image";
+}
+
+######################################################################
+######################################################################
+############
+######################################################################
+######################################################################
+sub CanonicalizePath {
+  my $path = join '/', @_;
+  my @work = split '/', $path;
+  my @out;
+  my $is_absolute;
+
+  if (@work && $work[0] eq "") { $is_absolute = 1; shift @work; }
+
+  while (@work) {
+    my $seg = shift @work;
+    if ($seg eq "." || $seg eq "") {
+    } elsif ($seg eq "..") {
+      if (@out && $out[-1] ne "..") {
+        pop @out;
+      } else {
+        # Leading "..", or "../..", etc.
+        push @out, $seg;
+      }
+    } else {
+      push @out, $seg;
+    }
+  }
+
+  unshift @out, "" if $is_absolute;
+  return join('/', @out);
+}
+
+######################################################################
+######################################################################
+############
+######################################################################
+######################################################################
+# This removes dangling symlinks. What do we do about hard links? Surely a 
+# something with the nane $image_dest . "$kimage" ought not to be left behind? 
+sub image_magic {
+  my $kimage = $_[0];
+  my $image_dest = $_[1];
+
+  if (-l "$kimage") {
+    # There is a symbolic link
+    my $force_move = 0;
+    my $vmlinuz_target = readlink "$kimage";
+    my $real_target = '';
+    $real_target = abs_path($vmlinuz_target) if defined ($vmlinuz_target);
+    if (!defined($vmlinuz_target) || ! -f "$real_target") {
+      # what, a dangling symlink?
+      warn "The link "  . $image_dest . "$kimage is a damaged link\n";
+      # Remove the dangling link
+      &remove_sym_link("$kimage");
+    }
+    else {
+      my $canonical_target = CanonicalizePath("$vmlinuz_target");
+      if (! -e $canonical_target) {
+	warn "The link "  . $image_dest . "$kimage is a dangling link\n";
+	&remove_sym_link("$kimage");
+      }
+    }
+  }
+}
+
+# set the env var stem
+$ENV{'STEM'} = "linux";
+
+sub exec_script {
+  my $type   = shift;
+  my $script = shift;
+  print STDERR "Running $type hook script $script.\n";
+  system ("$script $version $realimageloc$kimage-$version") &&
+    print STDERR "User $type hook script [$script] ";
+  if ($?) {
+    if ($? == -1) {
+      print STDERR "failed to execute: $!\n";
+    }
+    elsif ($? & 127) {
+      printf STDERR "died with signal %d, %s coredump\n",
+        ($? & 127),  ($? & 128) ? 'with' : 'without';
+    }
+    else {
+      printf STDERR "exited with value %d\n", $? >> 8;
+    }
+  }
+}
+sub run_hook {
+  my $type   = shift;
+  my $script = shift;
+  if ($script =~ m,^/,) {
+    # Full path provided for the hook script
+    if (-x "$script") {
+      &exec_script($type,$script);
+    }
+    else {
+      warn "The provided $type hook script [$script] could not be run.\n";
+    }
+  }
+  else {
+    # Look for it in a safe path
+    for my $path ('/bin', '/sbin', '/usr/bin', '/usr/sbin') {
+      if (-x "$path/$script") {
+        &exec_script($type, "$path/$script");
+        return 0;
+      }
+    }
+    # No luck
+    print STDERR "Could not find $type hook script [$script].\n";
+    warn "Looked in: '/bin', '/sbin', '/usr/bin', '/usr/sbin'\n";
+  }
+}
+
+## Run user hook script here, if any
+if ($postrm_hook) {
+  &run_hook("postrm", $postrm_hook);
+}
+if (-d "/etc/kernel/postrm.d") {
+  warn "Examining /etc/kernel/postrm.d .\n";
+  system ("run-parts --verbose --exit-on-error --arg=$version " .
+          "--arg=$realimageloc$kimage-$version " .
+          "/etc/kernel/postrm.d") &&
+            die "Failed to process /etc/kernel/postrm.d";
+}
+if (-d "/etc/kernel/postrm.d/$version") {
+  warn "Examining /etc/kernel/postrm.d/$version .\n";
+  system ("run-parts --verbose --exit-on-error --arg=$version " .
+          "--arg=$realimageloc$kimage-$version " .
+          "/etc/kernel/postrm.d/$version") &&
+            die "Failed to process /etc/kernel/postrm.d/$version";
+}
+
+# check and remove damaged and dangling symlinks
+if ($ARGV[0] !~ /upgrade/) {
+  system("$ramdisk -d -k " . $version . " > /dev/null 2>&1");
+  if (-f $realimageloc . "initrd.img-$version.bak") {
+    unlink $realimageloc . "initrd.img-$version.bak";
+  }
+  image_magic($kimage,          $image_dest);
+  image_magic($kimage . ".old", $image_dest);
+  image_magic("initrd.img",     $image_dest) if $initrd;
+  image_magic("initrd.img.old", $image_dest) if $initrd;
+}
+
+exit 0;
+
+__END__
+
+
+
+
+
+
--- linux-rt-2.6.29.5.orig/debian/control-scripts/preinst
+++ linux-rt-2.6.29.5/debian/control-scripts/preinst
@@ -0,0 +1,299 @@
+#! /usr/bin/perl
+#                              -*- Mode: Cperl -*-
+# image.preinst ---
+# Author           : Manoj Srivastava ( srivasta@tiamat.datasync.com )
+# Created On       : Sun Jun 14 03:38:02 1998
+# Created On Node  : tiamat.datasync.com
+# Last Modified By : Manoj Srivastava
+# Last Modified On : Sun Sep 24 14:04:42 2006
+# Last Machine Used: glaurung.internal.golden-gryphon.com
+# Update Count     : 99
+# Status           : Unknown, Use with caution!
+# HISTORY          :
+# Description      :
+#
+#
+
+#
+#use strict; #for debugging
+
+use Debconf::Client::ConfModule qw(:all);
+version('2.0');
+my $capb=capb("backup");
+
+$|=1;
+
+# Predefined values:
+my $version         = "=V";
+my $link_in_boot    = "";     # Should be empty, mostly
+my $no_symlink      = "";     # Should be empty, mostly
+my $reverse_symlink = "";     # Should be empty, mostly
+my $do_symlink      = "Yes";	# target machine defined
+my $do_boot_enable  = "Yes";	# target machine defined
+my $do_bootfloppy   = "Yes";	# target machine defined
+my $do_bootloader   = "Yes";	# target machine defined
+my $move_image      = '';       # target machine defined
+my $kimage          = "=K";     # Should be empty, mostly
+my $loader          = "=L";     # lilo, silo, quik, palo, vmelilo, nettrom
+                                # or elilo
+my $image_dir       = "/boot";     # where the image is located
+my $initrd          = "YES";     # initrd kernel
+my $use_hard_links  = '';       # hardlinks do not wirk across fs boundaries
+my $postinst_hook   = '';       #Normally we do not
+my $postrm_hook     = '';       #Normally we do not
+my $preinst_hook    = '';       #Normally we do not
+my $prerm_hook      = '';       #Normally we do not
+my $minimal_swap    = '';       # Do not swap symlinks
+my $ignore_depmod_err = '';    # normally we do not
+my $relink_src_link   = 'YES';	# There is no harm in checking the link
+my $relink_build_link = 'YES'; # There is no harm in checking the link
+my $force_build_link  = '';	 # There is no harm in checking the link
+my $kernel_arch       = "=B";
+my $ramdisk           = "/usr/sbin/update-initramfs";  # List of tools to create initial ram fs.
+my $package_name    = "linux-image-$version";
+
+my $Loader          = "NoLOADER"; #
+$Loader             = "LILO"     if $loader =~ /^lilo/io;
+$Loader             = "SILO"     if $loader =~ /^silo/io;
+$Loader             = "QUIK"     if $loader =~ /^quik/io;
+$Loader             = "yaboot"   if $loader =~ /^yaboot/io;
+$Loader             = "PALO"     if $loader =~ /^palo/io;
+$Loader             = "NETTROM"  if $loader =~ /^nettrom/io;
+$Loader             = "VMELILO"  if $loader =~ /^vmelilo/io;
+$Loader             = "ZIPL"     if $loader =~ /^zipl/io;
+$Loader             = "ELILO"    if $loader =~ /^elilo/io;
+
+
+#known variables
+my @boilerplate     = ();
+my @silotemplate    = ();
+my @quiktemplate    = ();
+my @palotemplate    = ();
+my @vmelilotemplate = ();
+my $bootdevice      = '';
+my $rootdevice      = '';
+my $rootdisk        = '';
+my $rootpartition   = '';
+my $image_dest      = "/";
+my $realimageloc    = "/$image_dir/";
+my $have_conffile   = "";
+my $CONF_LOC        = '/etc/kernel-img.conf';
+my $relative_links  = '';
+my $silent_loader   = '';
+my $warn_reboot     = '';        # Warn that we are installing a version of
+                                 # the kernel we are running
+
+my $modules_base    = '/lib/modules';
+
+die "Pre inst Internal error. Aborting." unless $version;
+
+exit 0 if $ARGV[0] =~ /abort-upgrade/;
+exit 1 unless $ARGV[0] =~ /(install|upgrade)/;
+
+# remove multiple leading slashes; make sure there is at least one.
+$realimageloc  =~ s|^/*|/|o;
+$realimageloc  =~ s|/+|/|o;
+
+if (-r "$CONF_LOC" && -f "$CONF_LOC"  ) {
+  if (open(CONF, "$CONF_LOC")) {
+    while (<CONF>) {
+      chomp;
+      s/\#.*$//g;
+      next if /^\s*$/;
+
+      $do_symlink      = "" if /^\s*do_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $no_symlink      = "" if /^\s*no_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $reverse_symlink = "" if /^\s*reverse_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $link_in_boot    = "" if /^\s*image_in_boot\s*=\s*(no|false|0)\s*$/ig;
+      $link_in_boot    = "" if /^\s*link_in_boot\s*=\s*(no|false|0)\s*$/ig;
+      $move_image      = "" if /^\s*move_image\s*=\s*(no|false|0)\s*$/ig;
+      $do_boot_enable  = '' if /^\s*do_boot_enable\s*=\s*(no|false|0)\s*$/ig;
+      $do_bootfloppy   = '' if /^\s*do_bootfloppy\s*=\s*(no|false|0)\s*$/ig;
+      $do_bootloader   = '' if /^\s*do_bootloader\s*=\s*(no|false|0)\s*$/ig;
+      $relative_links  = '' if /^\s*relative_links \s*=\s*(no|false|0)\s*$/ig;
+      $use_hard_links  = '' if /^\s*use_hard_links\s*=\s*(no|false|0)\s*$/ig;
+      $silent_loader   = '' if /^\s*silent_loader\s*=\s*(no|false|0)\s*$/ig;
+      $warn_reboot     = '' if /^\s*warn_reboot\s*=\s*(no|false|0)\s*$/ig;
+      $minimal_swap    = '' if /^\s*minimal_swap\s*=\s*(no|false|0)\s*$/ig;
+      $ignore_depmod_err = '' if /^\s*ignore_depmod_err\s*=\s*(no|false|0)\s*$/ig;
+      $relink_src_link   = '' if /^\s*relink_src_link\s*=\s*(no|false|0)\s*$/ig;
+      $relink_build_link = '' if /^\s*relink_build_link\s*=\s*(no|false|0)\s*$/ig;
+      $force_build_link = '' if /^\s*force_build_link\s*=\s*(no|false|0)\s*$/ig;
+
+      $do_symlink      = "Yes" if /^\s*do_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $no_symlink      = "Yes" if /^\s*no_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $reverse_symlink = "Yes" if /^\s*reverse_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $link_in_boot    = "Yes" if /^\s*image_in_boot\s*=\s*(yes|true|1)\s*$/ig;
+      $link_in_boot    = "Yes" if /^\s*link_in_boot\s*=\s*(yes|true|1)\s*$/ig;
+      $move_image      = "Yes" if /^\s*move_image\s*=\s*(yes|true|1)\s*$/ig;
+      $do_boot_enable  = "Yes" if /^\s*do_boot_enable\s*=\s*(yes|true|1)\s*$/ig;
+      $do_bootfloppy   = "Yes" if /^\s*do_bootfloppy\s*=\s*(yes|true|1)\s*$/ig;
+      $do_bootloader   = "Yes" if /^\s*do_bootloader\s*=\s*(yes|true|1)\s*$/ig;
+      $relative_links  = "Yes" if /^\s*relative_links\s*=\s*(yes|true|1)\s*$/ig;
+      $use_hard_links  = "Yes" if /^\s*use_hard_links\s*=\s*(yes|true|1)\s*$/ig;
+      $silent_loader   = 'Yes' if /^\s*silent_loader\s*=\s*(yes|true|1)\s*$/ig;
+      $warn_reboot     = 'Yes' if /^\s*warn_reboot\s*=\s*(yes|true|1)\s*$/ig;
+      $minimal_swap    = 'Yes' if /^\s*minimal_swap\s*=\s*(yes|true|1)\s*$/ig;
+      $ignore_depmod_err = 'Yes' if /^\s*ignore_depmod_err\s*=\s*(yes|true|1)\s*$/ig;
+      $relink_src_link   = 'Yes' if /^\s*relink_src_link\s*=\s*(yes|true|1)\s*$/ig;
+      $relink_build_link = 'Yes' if /^\s*relink_build_link\s*=\s*(yes|true|1)\s*$/ig;
+      $force_build_link = 'Yes' if /^\s*force_build_link\s*=\s*(yes|true|1)\s*$/ig;
+
+      $image_dest      = "$1"  if /^\s*image_dest\s*=\s*(\S+)/ig;
+      $postinst_hook   = "$1"  if /^\s*postinst_hook\s*=\s*(\S+)/ig;
+      $postrm_hook     = "$1"  if /^\s*postrm_hook\s*=\s*(\S+)/ig;
+      $preinst_hook    = "$1"  if /^\s*preinst_hook\s*=\s*(\S+)/ig;
+      $prerm_hook      = "$1"  if /^\s*prerm_hook\s*=\s*(\S+)/ig;
+      $ramdisk         = "$1"  if /^\s*ramdisk\s*=\s*(.+)$/ig;
+    }
+    close CONF;
+    $have_conffile = "Yes";
+    $have_conffile = "Yes";	# stop perl complaining
+  }
+}
+
+$ENV{KERNEL_ARCH}=$kernel_arch if $kernel_arch;
+
+# About to upgrade this package from version $2 TO THIS VERSION.
+# "prerm upgrade" has already been called for the old version of
+# this package.
+
+sub find_initrd_tool {
+  my $hostversion = shift;
+  my $version = shift;
+  my @ramdisks =
+    grep {
+      my $args = 
+        "$_ " .
+          "--supported-host-version=$hostversion " .
+            "--supported-target-version=$version " .
+              "1>/dev/null 2>&1"
+                ;
+      system($args) == 0;
+    }
+      split (/[:,\s]+/, $ramdisk);
+}
+
+sub check {
+  my $version = shift;
+  my $lib_modules="$modules_base/$version";
+  my $message = '';
+
+  if (-d "$lib_modules") {
+    opendir(DIR, $lib_modules) || die "can’t opendir $lib_modules: $!";
+    my @children = readdir(DIR);
+    if ($#children > 1) {
+      my @dirs  = grep { -d "$lib_modules/$_" } @children;
+      if ($#dirs > 1) { # we have subdirs
+        my $dir_message='';
+        for my $dir (@dirs) {
+          if ($dir =~/kernel$/) {
+            $dir_message="An older install was detected.\n";
+          }
+          else {
+            $dir_message="Module sub-directories were detected.\n"
+              unless $dir_message;
+          }
+        }
+        $message += $dir_message if $dir_message;
+      }
+
+      my @links = grep { -l "$lib_modules/$_" } @children;
+      if ($#links > -1) {
+        my $links_message = '';
+        for my $link (@links) {
+          next if ($link =~ /^build$/);
+          next if ($link =~ /^source$/);
+          $links_message = "Symbolic links were detected in $modules_base/$version.\n";
+        }
+        $message += $links_message if $links_message;
+      }
+      my @files = grep { -f "$lib_modules/$_" } @children;
+      $message += "Additional files also exist in $modules_base/$version.\n"
+        if ($#files > -1);
+    }
+  }
+  else { $message .= "$lib_modules does not exist. ";}
+  return $message;
+}
+
+if (-d "$modules_base/$version") {
+  my $errors=check($version);
+  warn "Info:\n$errors\n" if $errors;
+}
+
+# set the env var stem
+$ENV{'STEM'} = "linux";
+
+sub exec_script {
+  my $type   = shift;
+  my $script = shift;
+  print STDERR "Running $type hook script $script.\n";
+  system ("$script $version $realimageloc$kimage-$version") &&
+    print STDERR "User $type hook script [$script] ";
+  if ($?) {
+    if ($? == -1) {
+      print STDERR "failed to execute: $!\n";
+    }
+    elsif ($? & 127) {
+      printf STDERR "died with signal %d, %s coredump\n",
+        ($? & 127),  ($? & 128) ? 'with' : 'without';
+    }
+    else {
+      printf STDERR "exited with value %d\n", $? >> 8;
+    }
+    exit $? >> 8;
+  }
+}
+sub run_hook {
+  my $type   = shift;
+  my $script = shift;
+  if ($script =~ m,^/,) {
+    # Full path provided for the hook script
+    if (-x "$script") {
+      &exec_script($type,$script);
+    }
+    else {
+      die "The provided $type hook script [$script] could not be run.\n";
+    }
+  }
+  else {
+    # Look for it in a safe path
+    for my $path ('/bin', '/sbin', '/usr/bin', '/usr/sbin') {
+      if (-x "$path/$script") {
+        &exec_script($type, "$path/$script");
+        return 0;
+      }
+    }
+    # No luck
+    print STDERR "Could not find $type hook script [$script].\n";
+    die "Looked in: '/bin', '/sbin', '/usr/bin', '/usr/sbin'\n";
+  }
+}
+
+
+## Run user hook script here, if any
+if (-x "$preinst_hook") {
+  &run_hook("preinst", $preinst_hook);
+}
+if (-d "/etc/kernel/preinst.d") {
+  print STDERR "Examining /etc/kernel/preinst.d/\n";
+  system ("run-parts --verbose --exit-on-error --arg=$version" .
+          " --arg=$realimageloc$kimage-$version" .
+          " /etc/kernel/preinst.d") &&
+            die "Failed to process /etc/kernel/preinst.d";
+}
+if (-d "/etc/kernel/preinst.d/$version") {
+  print STDERR "Examining /etc/kernel/preinst.d/$version.\n";
+  system ("run-parts --verbose --exit-on-error --arg=$version" .
+          " --arg=$realimageloc$kimage-$version" .
+          " /etc/kernel/preinst.d/$version") &&
+            die "Failed to process /etc/kernel/preinst.d/$version";
+}
+print STDERR "Done.\n";
+
+exit 0;
+
+__END__
+
+
--- linux-rt-2.6.29.5.orig/debian/control-scripts/postinst
+++ linux-rt-2.6.29.5/debian/control-scripts/postinst
@@ -0,0 +1,1087 @@
+#! /usr/bin/perl
+# OriginalAuthor : Manoj Srivastava ( srivasta@pilgrim.umass.edu )
+#
+# Customized for Ubuntu by: Ben Collins <bcollins@ubuntu.com>
+
+#use strict; #for debugging
+use Cwd 'abs_path';
+
+$|=1;
+
+# Predefined values:
+my $version           = "=V";
+my $link_in_boot      = "";  # Should be empty, mostly
+my $no_symlink        = "";   # Should be empty, mostly
+my $reverse_symlink   = "";   # Should be empty, mostly
+my $do_symlink        = "Yes";  # target machine defined
+my $do_boot_enable    = "Yes";  # target machine defined
+my $do_bootfloppy     = "Yes";  # target machine defined
+my $do_bootloader     = "Yes";  # target machine defined
+my $move_image        = '';     # target machine defined
+my $kimage            = "=K";   # Should be empty, mostly
+my $loader            = "=L"; # lilo, silo, quik, palo, vmelilo, nettrom, arcboot or delo
+my $image_dir         = "/boot";        # where the image is located
+my $clobber_modules   = '';          # target machine defined
+my $relative_links    = "";          # target machine defined
+my $initrd            = "YES";        # initrd kernel
+my $do_initrd         = '';     # Normally we do not
+my $use_hard_links    = ''; # hardlinks do not work across fs boundaries
+my $postinst_hook     = '';          #Normally we do not
+my $postrm_hook       = '';          #Normally we do not
+my $preinst_hook      = '';          #Normally we do not
+my $prerm_hook        = '';          #Normally we do not
+my $minimal_swap      = '';          # Do not swap symlinks
+my $ignore_depmod_err = '';          # normally we do not
+my $kernel_arch       = "=B";
+my $ramdisk           = "/usr/sbin/update-initramfs";  # List of tools to create initial ram fs.
+my $notifier          = "/usr/share/update-notifier/notify-reboot-required";
+my $package_name      = "linux-image-$version";
+my $explicit_do_loader = 'Yes';
+
+my $Loader          = "NoLOADER"; #
+$Loader             = "LILO"     if $loader =~ /^lilo/io;
+$Loader             = "SILO"     if $loader =~ /^silo/io;
+$Loader             = "QUIK"     if $loader =~ /^quik/io;
+$Loader             = "yaboot"   if $loader =~ /^yaboot/io;
+$Loader             = "PALO"     if $loader =~ /^palo/io;
+$Loader             = "NETTROM"  if $loader =~ /^nettrom/io;
+$Loader             = "VMELILO"  if $loader =~ /^vmelilo/io;
+$Loader             = "ZIPL"     if $loader =~ /^zipl/io;
+$Loader             = "ELILO"    if $loader =~ /^elilo/io;
+$Loader             = "ARCBOOT"  if $loader =~ /^arcboot/io;
+$Loader             = "DELO"     if $loader =~ /^delo/io;
+
+# This should not point to /tmp, because of security risks.
+my $temp_file_name = "/var/log/$loader" . "_log.$$";
+
+#known variables
+my $image_dest      = "/";
+my $realimageloc    = "/$image_dir/";
+my $have_conffile   = "";
+my $silent_modules  = '';
+my $silent_loader   = '';
+my $warn_reboot     = 'Yes';     # Warn that we are installing a version of
+                                 # the kernel we are running
+
+my $modules_base    = '/lib/modules';
+my $CONF_LOC        = '/etc/kernel-img.conf';
+
+# Ignore all invocations except when called on to configure.
+exit 0 unless $ARGV[0] =~ /configure/;
+
+my $DEBUG = 0;
+
+# Do some preliminary sanity checks here to ensure we actually have an
+# valid image dir
+chdir('/')           or die "could not chdir to /:$!\n";
+die "Internal Error: ($image_dir) is not a directory!\n"
+  unless -d $image_dir;
+
+# remove multiple leading slashes; make sure there is at least one.
+$realimageloc  =~ s|^/*|/|o;
+$realimageloc  =~ s|/+|/|o;
+die "Internal Error: ($realimageloc) is not a directory!\n"
+  unless -d $realimageloc;
+
+if (-r "$CONF_LOC" && -f "$CONF_LOC"  ) {
+  if (open(CONF, "$CONF_LOC")) {
+    while (<CONF>) {
+      chomp;
+      s/\#.*$//g;
+      next if /^\s*$/;
+
+      $do_symlink      = "" if /^\s*do_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $no_symlink      = "" if /^\s*no_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $reverse_symlink = "" if /^\s*reverse_symlink\s*=\s*(no|false|0)\s*$/ig;
+      $link_in_boot    = "" if /^\s*image_in_boot\s*=\s*(no|false|0)\s*$/ig;
+      $link_in_boot    = "" if /^\s*link_in_boot\s*=\s*(no|false|0)\s*$/ig;
+      $move_image      = "" if /^\s*move_image\s*=\s*(no|false|0)\s*$/ig;
+      $clobber_modules = '' if /^\s*clobber_modules\s*=\s*(no|false|0)\s*$/ig;
+      $do_boot_enable  = '' if /^\s*do_boot_enable\s*=\s*(no|false|0)\s*$/ig;
+      $do_bootfloppy   = '' if /^\s*do_bootfloppy\s*=\s*(no|false|0)\s*$/ig;
+      $relative_links  = '' if /^\s*relative_links \s*=\s*(no|false|0)\s*$/ig;
+      $do_bootloader   = '' if /^\s*do_bootloader\s*=\s*(no|false|0)\s*$/ig;
+      $explicit_do_loader = '' if /^\s*do_bootloader\s*=\s*(no|false|0)\s*$/ig;
+      $do_initrd       = '' if /^\s*do_initrd\s*=\s*(no|false|0)\s*$/ig;
+      $use_hard_links  = '' if /^\s*use_hard_links\s*=\s*(no|false|0)\s*$/ig;
+      $silent_modules  = '' if /^\s*silent_modules\s*=\s*(no|false|0)\s*$/ig;
+      $silent_loader   = '' if /^\s*silent_loader\s*=\s*(no|false|0)\s*$/ig;
+      $warn_reboot     = '' if /^\s*warn_reboot\s*=\s*(no|false|0)\s*$/ig;
+      $minimal_swap    = '' if /^\s*minimal_swap\s*=\s*(no|false|0)\s*$/ig;
+      $ignore_depmod_err = '' if /^\s*ignore_depmod_err\s*=\s*(no|false|0)\s*$/ig;
+
+      $do_symlink      = "Yes" if /^\s*do_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $no_symlink      = "Yes" if /^\s*no_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $reverse_symlink = "Yes" if /^\s*reverse_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $link_in_boot    = "Yes" if /^\s*image_in_boot\s*=\s*(yes|true|1)\s*$/ig;
+      $link_in_boot    = "Yes" if /^\s*link_in_boot\s*=\s*(yes|true|1)\s*$/ig;
+      $move_image      = "Yes" if /^\s*move_image\s*=\s*(yes|true|1)\s*$/ig;
+      $clobber_modules = "Yes" if /^\s*clobber_modules\s*=\s*(yes|true|1)\s*$/ig;
+      $do_boot_enable  = "Yes" if /^\s*do_boot_enable\s*=\s*(yes|true|1)\s*$/ig;
+      $do_bootfloppy   = "Yes" if /^\s*do_bootfloppy\s*=\s*(yes|true|1)\s*$/ig;
+      $do_bootloader   = "Yes" if /^\s*do_bootloader\s*=\s*(yes|true|1)\s*$/ig;
+      $explicit_do_loader = "YES" if /^\s*do_bootloader\s*=\s*(yes|true|1)\s*$/ig;
+      $relative_links  = "Yes" if /^\s*relative_links\s*=\s*(yes|true|1)\s*$/ig;
+      $do_initrd       = "Yes" if /^\s*do_initrd\s*=\s*(yes|true|1)\s*$/ig;
+      $use_hard_links  = "Yes" if /^\s*use_hard_links\s*=\s*(yes|true|1)\s*$/ig;
+      $silent_modules  = 'Yes' if /^\s*silent_modules\s*=\s*(yes|true|1)\s*$/ig;
+      $silent_loader   = 'Yes' if /^\s*silent_loader\s*=\s*(yes|true|1)\s*$/ig;
+      $warn_reboot     = 'Yes' if /^\s*warn_reboot\s*=\s*(yes|true|1)\s*$/ig;
+      $minimal_swap    = 'Yes' if /^\s*minimal_swap\s*=\s*(yes|true|1)\s*$/ig;
+      $ignore_depmod_err = 'Yes' if /^\s*ignore_depmod_err\s*=\s*(yes|true|1)\s*$/ig;
+
+      $image_dest      = "$1"  if /^\s*image_dest\s*=\s*(\S+)/ig;
+      $postinst_hook   = "$1"  if /^\s*postinst_hook\s*=\s*(\S+)/ig;
+      $postrm_hook     = "$1"  if /^\s*postrm_hook\s*=\s*(\S+)/ig;
+      $preinst_hook    = "$1"  if /^\s*preinst_hook\s*=\s*(\S+)/ig;
+      $prerm_hook      = "$1"  if /^\s*prerm_hook\s*=\s*(\S+)/ig;
+      $ramdisk         = "$1"  if /^\s*ramdisk\s*=\s*(.+)$/ig;
+    }
+    close CONF;
+    $have_conffile = "Yes";
+  }
+}
+
+
+
+# For some versions of kernel-package, we had this warning in the
+# postinst, but the rules did not really interpolate the value in.
+# Here is a sanity check.
+my $pattern = "=" . "I";
+$initrd=~ s/^$pattern$//;
+
+if ($link_in_boot) {
+  $image_dest = "/$image_dir/"; # same as realimageloc
+}
+
+# Tack on at least one trainling /
+$image_dest = "$image_dest/";
+$image_dest =~ s|^/*|/|o;
+$image_dest =~ s|/+$|/|o;
+
+if (! -d "$image_dest") {
+  die "Expected Image Destination dir ($image_dest) to be a valid directory!\n";
+}
+
+# sanity
+if (!($do_bootfloppy || $do_bootloader)) {
+  $do_boot_enable = '';
+}
+if ($do_symlink && $no_symlink) {
+  warn "Both do_symlinks and no_symlinks options enabled; disabling no_symlinks\n";
+  $no_symlink = 0;
+}
+
+# most of our work is done in $image_dest (nominally /)
+chdir("$image_dest") or die "could not chdir to $image_dest:$!\n";
+
+# Paranoid check to make sure that the correct value is put in there
+if    (! $kimage)                 { $kimage = "vmlinuz"; } # Hmm. empty
+elsif ($kimage =~ m/^b?zImage$/o) { $kimage = "vmlinuz"; } # these produce vmlinuz
+elsif ($kimage =~ m/^[iI]mage$/o) { my $nop = $kimage;   }
+elsif ($kimage =~ m/^vmlinux$/o)  { my $nop = $kimage;   }
+else                              { $kimage = "vmlinuz"; } # Default
+
+$ENV{KERNEL_ARCH}=$kernel_arch if $kernel_arch;
+
+
+die "Internal Error: Could not find image (" . $realimageloc
+  . "$kimage-$version)\n" unless -e $realimageloc
+  . "$kimage-$version";
+
+# search for the boot loader in the path
+my $loader_exec;
+($loader_exec = $loader)  =~ s|.*/||;
+my ($loaderloc) = grep -x, map "$_/$loader_exec",
+  map { length($_) ? $_ : "." }  split /:/, $ENV{PATH};
+
+
+######################################################################
+######################################################################
+###########        Test whether a relative symlinkwould be OK #######
+######################################################################
+######################################################################
+sub test_relative {
+  my %params = @_;
+  my $cwd;
+
+  die "Internal Error: Missing Required paramater 'Old Dir' "
+    unless $params{'Old Dir'};
+  die "Internal Error: Missing Required paramater New Dir' "
+    unless $params{'New Dir'};
+
+
+  die "Internal Error: No such dir $params{'Old Dir'} "
+    unless -d $params{'Old Dir'};
+  die "Internal Error: No such dir $params{'New Dir'} "
+    unless -d $params{'New Dir'};
+
+  warn "Test relative: testing $params{'Old Dir'} -> $params{'New Dir'}"
+    if $DEBUG;
+  chomp($cwd = `pwd`);
+  chdir ($params{'New Dir'}) or die "Could not chdir to $params{'New Dir'}:$!";
+  my $ok = 0;
+  $params{'Old Dir'}  =~ s|^/*||o;
+  if (-d $params{'Old Dir'} ) {
+    if (defined $params{'Test File'}) {
+      if (-e $params{'Old Dir'} . $params{'Test File'}) {
+        $ok  = 1;
+      }
+    } else {
+      $ok = 1;                  # well, backward compatibility
+    }
+  }
+  chdir ($cwd) or die "Could not chdir to $params{'New Dir'}:$!";
+  return $ok;
+}
+
+######################################################################
+######################################################################
+############
+######################################################################
+######################################################################
+# sub CanonicalizePath {
+#   my $path = join '/', @_;
+#   my @work = split '/', $path;
+#   my @out;
+#   my $is_absolute;
+
+#   if (@work && $work[0] eq "") {
+#     $is_absolute = 1; shift @work;
+#   }
+
+#   while (@work) {
+#     my $seg = shift @work;
+#     if ($seg eq "." || $seg eq "") {
+#     } 
+#     elsif ($seg eq "..") {
+#       if (@out && $out[-1] ne "..") {
+#         pop @out;
+#       } 
+#       else {
+#         # Leading "..", or "../..", etc.
+#         push @out, $seg;
+#       }
+#     } 
+#     else {
+#       push @out, $seg;
+#     }
+#   }
+
+#   unshift @out, "" if $is_absolute;
+#   return join('/', @out);
+# }
+######################################################################
+######################################################################
+############
+######################################################################
+######################################################################
+
+sub spath {
+  my %params = @_;
+
+  die "Missing Required paramater 'Old'" unless $params{'Old'};
+  die "Missing Required paramater 'New'" unless  $params{'New'};
+
+  my @olddir  = split '/', `readlink -q -m $params{'Old'}`;
+  my @newdir  = split '/', `readlink -q -m $params{'New'}`;
+  my @outdir  = @olddir;
+
+  my $out = '';
+  my $i;
+  for ($i = 0; $i <= $#olddir && $i <= $#newdir; $i++) {
+    $out++ if ($olddir[$i] ne $newdir[$i]);
+    shift @outdir unless $out;
+    unshift @outdir, ".." if $out;
+  }
+  if ($#newdir > $#olddir) {
+    for ($i=0; $i < $#newdir; $i++) {
+      unshift @outdir, "..";
+    }
+  }
+  return join ('/', @outdir);
+}
+######################################################################
+######################################################################
+############
+######################################################################
+######################################################################
+
+
+# This routine actually moves the kernel image
+# From: $realimageloc/$kimage-$version (/boot/vmlinuz-2.6.12)
+# To:   $image_dest/$kimage-$version   (/vmlinuz-2.6.12)
+# Note that the image is moved to a versioned destination, but ordinary
+# symlinks we create otherwise are not normally versioned
+sub really_move_image {
+  my $src_dir  = $_[0];
+  my $target   = $_[1];
+  my $dest_dir = $_[2];
+
+  warn "Really move image: src_dir=$src_dir, target=$target,\n destdir=$dest_dir"
+    if $DEBUG;
+  if (-e "$target") {
+    # we should be in dir $dest_dir == $image_dest /, normally
+    rename("$target", "$target.$$") ||
+      die "failed to move " . $dest_dir . "$target:$!";
+    warn "mv $target $target.$$" if $DEBUG;
+  }
+  warn "mv -f $src_dir$target $target" if $DEBUG;
+  my $ret = system("mv -f " . $src_dir . "$target " .
+                   " $target");
+  if ($ret) {
+    die("Failed to move " . $src_dir . "$target to "
+        . $dest_dir . "$target.\n");
+  }
+  # Ok, now we may clobber the previous .old files
+  if (-e "$target.$$") {
+    rename("$target.$$", "$target.old") ||
+      die "failed to move " . $dest_dir . "$target:$!";
+    warn "mv $target.$$ $target " if $DEBUG;
+  }
+}
+
+# Normally called after really_move_image; and only called if we asked for 
+# reversed link this routine reverses the symbolic link that is notmally 
+# created. Since the real kernel image has been moved over to 
+# $image_dest/$kimage-$version. So, this routine links
+# From:   $image_dest/$kimage-$version   (/vmlinuz-2.6.12)
+# To:     $realimageloc/$kimage-$version (/boot/vmlinuz-2.6.12)
+sub really_reverse_link {
+  my $src_dir    = $_[0];
+  my $link_name  = $_[1];
+  my $dest_dir   = $_[2];
+  warn "Really reverse link: src_dir=$src_dir, link name=$link_name\n" .
+    "\tdestdir=$dest_dir" if $DEBUG;
+
+  my $Old = $dest_dir;
+  if (test_relative ('Old Dir' => $Old, 'New Dir' => $src_dir,
+                     'Test File' => "$link_name")) {
+    $Old   =~ s|^/*||o;
+  }
+  # Special case is they are in the same dir
+  my $rel_path = spath('Old' => "$Old", 'New' => "$src_dir" );
+  $Old ="" if $rel_path =~ m/^\s*$/o;
+
+  if ($use_hard_links =~ m/YES/i) {
+    link($Old . "$link_name", $src_dir . "$link_name") ||
+      die("Failed to symbolic-link " . $dest_dir . "$link_name to " . $src_dir
+          . "$link_name .\n");
+    warn "ln " . $Old . "$link_name " . $src_dir . "$link_name"  if $DEBUG;
+  }
+  else {
+    symlink($Old . "$link_name", $src_dir . "$link_name") ||
+      die("Failed to link " . $dest_dir . "$link_name to " . $src_dir . 
+          "$link_name .\n");
+    warn "ln -s " . $Old . "$link_name " . $src_dir . "$link_name" if $DEBUG;
+  }
+}
+
+# This routine is invoked if there is a symbolic link in place
+# in $image_dest/$kimage -- so a symlink exists in the destination.
+# What we are trying to determine is if we need to move the symbolic link over
+# to the the .old location
+sub move_p {
+  my $kimage     = $_[0];       # Name of the symbolic link
+  my $image_dest = $_[1];       # The directory the links goes into
+  my $image_name = $_[2]; 
+  my $src_dir    = $_[3]; 
+  my $force_move = 0;
+  warn "Move?: kimage=$kimage, image_dest=$image_dest, \n" .
+    "\timage_name=$image_name, src_dir=$src_dir" if $DEBUG;
+
+  if ($no_symlink || $reverse_symlink) {
+    # we do not want links, yet we have a symbolic link here!
+    warn "found a symbolic link in " . $image_dest . "$kimage \n" .
+      "even though no_symlink is defined\n" if $no_symlink;
+    warn "found a symbolic link in " . $image_dest . "$kimage \n" .
+      "even though reverse_symlink is defined\n" if $reverse_symlink;
+    # make sure we change this state of affairs
+    $force_move = 1;
+    return $force_move;
+  }
+
+  warn "DEBUG: OK. We found symlink, and we should have a symlink here.\n"
+    if $DEBUG;
+  my $vmlinuz_target = readlink "$kimage";
+  my $real_target = '';
+  my $target = `readlink -q -m "${realimageloc}${kimage-$version}"`;
+  $real_target = abs_path($vmlinuz_target) if defined($vmlinuz_target);
+
+  if (!defined($vmlinuz_target) || ! -f "$real_target") {
+    # what, a dangling symlink?
+    warn "The link "  . $image_dest . "$kimage is a dangling link" .
+      "to $real_target\n";
+    $force_move = 1;
+    return $force_move;
+  }
+
+
+  warn "DEBUG: The link $kimage points to ($vmlinuz_target)\n" if $DEBUG;
+  warn "DEBUG: ($vmlinuz_target) is really ($real_target)\n" if $DEBUG;
+  my $cwd;
+  chomp ($cwd=`pwd`);
+  if ($vmlinuz_target !~ m|^/|o) {
+    $vmlinuz_target = $cwd . "/" . $vmlinuz_target;
+    $vmlinuz_target =~ s|/+|/|o;
+  }
+  $vmlinuz_target = `readlink -q -m $vmlinuz_target`;
+
+  if ("$vmlinuz_target" ne "$target") {
+    warn "DEBUG: We need to handle this.\n" if $DEBUG;
+    if ($minimal_swap) {
+      warn "DEBUG: Minimal swap.\n" if $DEBUG;
+      if (-l "$kimage.old") {
+        warn "DEBUG: There is an old link at $kimage.old\n" if $DEBUG;
+        my $old_target = readlink "$kimage.old";
+        my $real_old_target = '';
+        $real_old_target=abs_path($old_target) if defined ($old_target);
+
+        if ($real_old_target  && -f "$real_old_target") {
+          if ($old_target !~ m|^/|o) {
+            $old_target = $cwd . "/" . $old_target;
+            $old_target =~ s|/+|/|o;
+          }
+          $old_target = `readlink -q -m $old_target`;
+          if ("$old_target"  ne "$target") {
+            $force_move = 1;
+            warn "DEBUG: Old link ($old_target) does not point to us ($target)\n"
+              if $DEBUG;
+          } 
+          else {            # The .old points to the current
+            warn "$kimage.old --> $target -- doing nothing";
+            $force_move = 0;
+          }
+        } 
+        else { 
+          warn "DEBUG: Well, the old link does not exist -- so we move\n"
+            if $DEBUG;
+          $force_move = 1;
+        }
+      } 
+      else {
+        warn "DEBUG: No .old link -- OK to move\n"
+          if $DEBUG;
+        $force_move = 1;
+      }
+    } 
+    else {
+      warn "DEBUG: ok, minimal swap is no-- so we move.\n"
+        if $DEBUG;
+      $force_move = 1;
+    }
+  }
+  else {                  # already have proper link
+    warn "$kimage($vmlinuz_target) points to $target ($real_target) -- doing nothing";
+    $force_move = 0;
+  }
+  return $force_move;
+}
+
+
+# This routine moves the symbolic link around (/vmlinuz -> /vmlinuz.old)
+# It pays attention to whether we should the fact whether we should be using
+# hard links or not.
+sub really_move_link {
+  my $kimage     = $_[0];       # Name of the symbolic link
+  my $image_dest = $_[1];       # The directory the links goes into
+  my $image_name = $_[2]; 
+  my $src_dir    = $_[3]; 
+  warn "really_move_link: kimage=$kimage, image_dest=$image_dest\n" .
+    "\t image_name=$image_name, src_dir=$src_dir" if $DEBUG;
+
+  # don't clobber $kimage.old quite yet
+  rename("$kimage", "$kimage.$$") ||
+    die "failed to move " . $image_dest . "$kimage:$!";
+  warn "mv $kimage $kimage.$$" if $DEBUG;
+  my $Old = $src_dir;
+  my $cwd;
+
+  chomp($cwd=`pwd`);
+  if (test_relative ('Old Dir' => $Old, 'New Dir' => $cwd,
+                     'Test File' => "$image_name")) {
+    $Old   =~ s|^/*||o;
+  }
+  # Special case is they are in the same dir
+  my $rel_path = spath('Old' => "$Old", 'New' => "$cwd" );
+  $Old ="" if $rel_path =~ m/^\s*$/o;
+
+  if ($use_hard_links =~ m/YES/i) {
+    warn "ln ${Old}${image_name} $kimage" if $DEBUG;
+    if (! link("${Old}${image_name}", "$kimage")) {
+      rename("$kimage.$$", "$kimage");
+      die("Failed to link ${Old}${image_name} to " .
+          "${image_dest}${kimage}.\n");
+    }
+  } 
+  else {
+    warn "ln -s ${Old}${image_name} $kimage" if $DEBUG;
+    if (! symlink("${Old}${image_name}", "$kimage")) {
+      rename("$kimage.$$", "$kimage");
+      die("Failed to symbolic-link ${Old}${image_name} to " .
+          "${image_dest}${kimage}.\n");
+    }
+  }
+
+  # Ok, now we may clobber the previous .old file
+  if (-l "$kimage.old" || ! -e "$kimage.old" ) {
+    rename("$kimage.$$", "$kimage.old");
+    warn "mv $kimage.$$ $kimage.old" if $DEBUG;
+  }
+  else {
+    warn "$kimage.old is not a symlink, not clobbering\n";
+    warn "rm $kimage.$$";
+    unlink "$kimage.$$" if $DEBUG;
+  }
+}
+
+# This routine handles a request to do symlinks, but there is no
+# symlink file already there.  Either we are supposed to use copy, or we are
+# installing on a pristine system, or the user does not want symbolic links at
+# all.  We use a configuration file to tell the last two cases apart, creating
+# a config file if needed.
+sub handle_missing_link {
+  my $kimage     = $_[0];       # Name of the symbolic link
+  my $image_dest = $_[1];       # The directory the links goes into
+  my $image_name = $_[2]; 
+  my $src_dir    = $_[3]; 
+  warn "handle_missing_link: kimage=$kimage, image_dest=$image_dest\n" .
+    "\t image_name=$image_name, src_dir=$src_dir" if $DEBUG;
+
+  if ($no_symlink) {
+    warn "cp -a --backup=t $realimageloc$image_name $kimage" if $DEBUG;
+    my $ret = system("cp -a --backup=t " . $realimageloc .
+                     "$image_name "   . " $kimage");
+    if ($ret) {
+      die("Failed to copy " . $realimageloc . "$image_name to "
+          . $image_dest . "$kimage .\n");
+    }
+  } 
+  elsif ($reverse_symlink) {
+    warn "mv -f $realimageloc$image_name $kimage" if $DEBUG;
+    my $ret = system("mv -f " . $realimageloc . "$image_name "
+                     . "$kimage");
+    if ($ret) {
+      die("Failed to move " . $realimageloc . "$image_name to "
+          . $image_dest . "$kimage .\n");
+    }
+  } 
+  else {
+    if (! $have_conffile) {
+      my $ret;
+      my $answer='';
+      $do_symlink = "Yes";
+
+      if (open(CONF, ">$CONF_LOC")) {
+        print CONF "# Kernel Image management overrides\n";
+        print CONF "# See kernel-img.conf(5) for details\n";
+        if ($loader =~ /palo/i) {
+          print CONF "link_in_boot = Yes\n";
+          print CONF "do_symlinks = Yes\n";
+          print CONF "relative_links = Yes\n";
+          print CONF "do_bootloader = No\n";
+        } else {
+          print CONF "do_symlinks = $do_symlink\n";
+        }
+        close CONF;
+      }
+      $have_conffile = "Yes";
+    }
+  }
+
+  if (! $no_symlink && $do_symlink =~ /Yes/i) {
+    my $Old = $realimageloc;
+    my $New = $image_dest;
+    my $Name = "$image_name";
+    my $Link_Dest = "$kimage";
+
+    if ($reverse_symlink) {
+      $Old = $image_dest;
+      $New = $realimageloc;
+      $Name = "$kimage";
+      $Link_Dest = $realimageloc . "$image_name";
+    }
+    if (test_relative ('Old Dir' => $Old,
+                       'New Dir' => $New,
+                       'Test File' => $Name)) {
+      $Old   =~ s|^/*||o;
+    }
+    # Special case is they are in the same dir
+    my $rel_path = spath('Old' => "$Old", 'New' => "$New" );
+    $Old ="" if $rel_path =~ m/^\s*$/o;
+
+    symlink($Old . "$Name", "$Link_Dest") ||
+      die("Failed to symbolic-link ${Old}$Name to $Link_Dest.\n");
+    warn "ln -s ${Old}$Name $Link_Dest" if $DEBUG;
+
+  }
+}
+
+# This routine handles the rest of the cases, where the user has requested 
+# non-traditional handling, like using cp, or reverse symlinks, or hard links.
+sub handle_non_symlinks {
+  my $kimage     = $_[0];       # Name of the symbolic link
+  my $image_dest = $_[1];       # The directory the links goes into
+  my $image_name = $_[2]; 
+  my $src_dir    = $_[3]; 
+  warn "handle_non_link: kimage=$kimage, image_dest=$image_dest\n" .
+    "\t image_name=$image_name, src_dir=$src_dir" if $DEBUG;
+
+  # Save the current image. We do this in all four cases
+  rename("$kimage", "$kimage.$$") || 
+    die "failed to move " . $image_dest . "$kimage:$!";
+  warn "mv $kimage $kimage.$$" if $DEBUG;
+
+  ##,#### 
+  # case One
+  #`####
+  if ($no_symlink) {
+    # Maybe /$image_dest is on a dos system?
+    warn "cp -a --backup=t $realimageloc$image_name $kimage" if $DEBUG;
+    my $ret = system("cp -a --backup=t " . $realimageloc
+                     . "$image_name " . "$kimage");
+    if ($ret) {
+      if (-e "$kimage.$$") {
+        rename("$kimage.$$", "$kimage");
+        warn "mv $kimage.$$ $kimage" if $DEBUG;
+      }
+      die("Failed to copy " . $realimageloc . "$image_name to "
+          . $image_dest . "$kimage .\n");
+    }
+  }
+  ##,#### 
+  # case Two
+  #`####
+  elsif ($reverse_symlink) {  # Maybe /$image_dest is on a dos system?
+    warn "mv -f  $realimageloc$image_name $kimage" if $DEBUG;
+    my $ret = system("mv -f " . $realimageloc . "$image_name "
+                     . $image_dest . "$kimage");
+    if ($ret) {
+      if (-e "$kimage.$$") {
+        rename("$kimage.$$", "$kimage");
+        warn "mv $kimage.$$ $kimage" if $DEBUG;
+      }
+      die("Failed to move " . $realimageloc . "$image_name to "
+          . $image_dest . "$kimage .\n");
+    }
+    my $Old = $image_dest;
+    if (test_relative ('Old Dir' => $Old, 'New Dir' => $realimageloc,
+                       'Test File' => "$kimage")) {
+      $Old   =~ s|^/*||o;
+    }
+    # Special case is they are in the same dir
+    my $rel_path = spath('Old' => "$Old", 'New' => "$realimageloc" );
+    $Old ="" if $rel_path =~ m/^\s*$/o;
+
+    if ($use_hard_links =~ m/YES/i) {
+      warn "ln " . $Old . "$kimage " . $realimageloc . "$image_name" if $DEBUG;
+      if (! link($Old . "$kimage", $realimageloc . "$image_name")) {
+        warn "Could not link " . $image_dest .
+          "$kimage to $image_name :$!";
+      }
+    }
+    else {
+      warn "ln -s " . $Old . "$kimage " . $realimageloc . "$image_name" if $DEBUG;
+      if (! symlink($Old . "$kimage", $realimageloc . "$image_name")) {
+        warn "Could not symlink " . $image_dest .
+          "$kimage to $image_name :$!";
+      }
+    }
+  }
+  ##,####
+  # case Three
+  #`####
+  elsif ($use_hard_links =~ m/YES/i ) {
+    # Ok then. this ought to be a hard link, and hence fair game
+    # don't clobber $kimage.old quite yet
+    my $Old = $realimageloc;
+    my $cwd;
+    chomp($cwd=`pwd`);
+    if (test_relative ('Old Dir' => $Old, 'New Dir' => $cwd,
+                       'Test File' => "$image_name")) {
+      $Old   =~ s|^/*||o;
+    }
+    # Special case is they are in the same dir
+    my $rel_path = spath('Old' => "$Old", 'New' => "$cwd" );
+    $Old ="" if $rel_path =~ m/^\s*$/o;
+
+    warn "ln " . $Old . "$image_name " . "$kimage" if $DEBUG;
+    if (! link($Old . "$image_name", "$kimage")) {
+      warn "mv $kimage.$$ $kimage" if $DEBUG;
+      rename("$kimage.$$", "$kimage");
+      die("Failed to link " . $realimageloc . "$image_name to "
+          . $image_dest . "$kimage .\n");
+    }
+  }
+  ##,####
+  # case Four
+  #`####
+  else {
+    # We just use cp
+    warn "cp -a --backup=t $realimageloc$image_name $kimage" if $DEBUG;
+    my $ret = system("cp -a --backup=t " . $realimageloc
+                     . "$image_name " . "$kimage");
+    if ($ret) {
+      if (-e "$kimage.$$") {
+        warn "mv $kimage.$$ $kimage" if $DEBUG;
+        rename("$kimage.$$", "$kimage");
+      }
+      die("Failed to copy " . $realimageloc . "$image_name to "
+          . $image_dest . "$kimage .\n");
+    }
+  }
+  # Ok, now we may clobber the previous .old file
+  warn "mv $kimage.$$ $kimage.old if -e $kimage.$$" if $DEBUG;
+  rename("$kimage.$$", "$kimage.old") if -e "$kimage.$$";
+}
+
+# This routine is responsible for setting up the symbolic links
+# So, the actual kernel image lives in
+# $realimageloc/$image_name (/boot/vmlinuz-2.6.12).
+# This routine creates symbolic links in $image_dest/$kimage (/vmlinuz)
+sub image_magic {
+  my $kimage     = $_[0];       # Name of the symbolic link
+  my $image_dest = $_[1];       # The directory the links goes into
+  my $image_name = "$kimage-$version";
+  my $src_dir    = $realimageloc;
+  warn "image_magic: kimage=$kimage, image_dest=$image_dest\n" .
+    "\t image_name=$image_name, src_dir=$src_dir" if $DEBUG;
+
+  # Well, in any case, if the destination (the symlink we are trying
+  # to create) is a directory, we should do nothing, except throw a
+  # diagnostic.
+  if (-d "$kimage" ) {
+    die ("Hmm. $kimage is a directory, which I did not expect.  I am\n" .
+         "trying to create a symbolic link with that name linked to \n" .
+         "$image_dest . Since a directory exists here, my assumptions \n" .
+         "are way off, and I am aborting.\n" );
+    exit (3);
+  }
+
+  if ($move_image) {   # Maybe $image_dest is in on dos, or something?
+    #                   source dir,    link name,        dest dir
+    really_move_image(  $realimageloc, $image_name, $image_dest);
+    really_reverse_link($realimageloc, $image_name, $image_dest)
+      if $reverse_symlink;
+    return;
+  }
+
+  if (-l "$kimage") {           # There is a symbolic link
+    warn "DEBUG: There is a symlink for $kimage\n" if $DEBUG;
+    my $force_move = move_p($kimage, $image_dest, $image_name, $src_dir);
+
+    if ($force_move) {
+      really_move_link($kimage, $image_dest, $image_name, $src_dir);
+    }
+  }
+  elsif (! -e "$kimage") {
+    # Hmm. Pristine system? How can that be? Installing from scratch?
+    # Or maybe the user does not want a symbolic link here.
+    # Possibly they do not want a link here. (we should be in /
+    # here[$image_dest, really]
+    handle_missing_link($kimage, $image_dest, $image_name, $src_dir);
+  }
+  elsif (-e "$kimage" ) {
+    # OK, $kimage exists -- but is not a link
+    handle_non_symlinks($kimage, $image_dest, $image_name, $src_dir);
+  }
+}
+
+######################################################################
+######################################################################
+######################################################################
+######################################################################
+
+# We may not have any modules installed
+if ( -d "$modules_base/$version" ) {
+  print STDERR "Running depmod.\n";
+  my $ret = system("depmod -a $version");
+  if ($ret) {
+    print STDERR "Failed to run depmod\n";
+    exit(1);
+  }
+}
+
+
+
+sub find_initrd_tool {
+  my $hostversion = shift;
+  my $version = shift;
+  print STDERR "Finding valid ramdisk creators.\n";
+  my @ramdisks =
+    grep {
+      my $args = 
+        "$_ " .
+          "--supported-host-version=$hostversion " .
+            "--supported-target-version=$version " .
+              "1>/dev/null 2>&1"
+                ;
+      system($args) == 0;
+    }
+      split (/[:,\s]+/, $ramdisk);
+}
+
+# The initrd symlink should probably be in the same dir that the
+# symlinks are in
+if ($initrd) {
+  my $success = 0;
+
+  # Update-initramfs is called slightly different than mkinitrd and
+  # mkinitramfs. XXX It should really be made compatible with this stuff
+  # some how.
+  my $upgrading = 1;
+  if (! defined $ARGV[1] || ! $ARGV[1] || $ARGV[1] =~ m/<unknown>/og) {
+    $upgrading = 0;
+  }
+  my $ret = system("$ramdisk " . ($upgrading ? "-u" : "-c") . " -k " . $version . " >&2");
+  $success = 1 unless $ret;
+  die "Failed to create initrd image.\n" unless $success;
+  if (! defined $ARGV[1] || ! $ARGV[1] || $ARGV[1] =~ m/<unknown>/og) {
+    image_magic("initrd.img", $image_dest);
+  }
+  else {
+    if (! -e "initrd.img") {
+      handle_missing_link("initrd.img", $image_dest, "initrd.img-$version",
+                          $realimageloc);
+    }
+    else {
+      print STDERR
+        "Not updating initrd symbolic links since we are being updated/reinstalled \n";
+      print STDERR
+        "($ARGV[1] was configured last, according to dpkg)\n";
+    }
+  }
+
+  if ($initrd && -l "initrd" ) {
+    unlink "initrd";
+  }
+
+  if ($initrd && -l "$image_dir/initrd" && ! $link_in_boot) {
+    unlink "$image_dir/initrd";
+  }
+}
+else {                        # Not making an initrd emage
+  if (-l "initrd.img") {
+    # Ooh, last image was an initrd image? in any case, we should move it. 
+    my $target = readlink "initrd.img";
+    my $real_target = '';
+    $real_target = abs_path($target) if defined ($target);
+
+    if (!defined($target) || ! -f "$real_target") {
+      # Eh. dangling link. can safely be removed.
+      unlink("initrd.img");
+    } else {
+      if (-l "initrd.img.old" || ! -e "initrd.img.old" ) {
+        rename("initrd.img", "initrd.img.old");
+      } else {
+        warn "initrd.img.old is not a symlink, not clobbering\n";
+        unlink("initrd.img");
+      }
+    }
+  }
+}
+
+# Warn of a reboot
+if (-x $notifier) {
+  system($notifier);
+}
+
+# Let programs know not to hibernate if the kernel that would be used for
+# resume-from-hibernate is likely to differ from the currently running kernel.
+system("mountpoint -q /var/run");
+if ($? eq 0) {
+	system("touch /var/run/do-not-hibernate");
+}
+
+# Only change the symlinks if we are not being upgraded
+if (! defined $ARGV[1] || ! $ARGV[1] || $ARGV[1] =~ m/<unknown>/og) {
+  image_magic($kimage, $image_dest);
+}
+else {
+  if (! -e "$kimage") {
+    handle_missing_link($kimage, $image_dest, "$kimage-$version", 
+                        $realimageloc);
+  }
+  else {
+    print STDERR
+      "Not updating image symbolic links since we are being updated/reinstalled \n";
+    print STDERR
+      "($ARGV[1] was configured last, according to dpkg)\n";
+  }
+}
+
+# We used to have System.* files in /
+if (-e "/System.map" || -e "/System.old") {
+  unlink '/System.map' if -e '/System.map';
+  unlink '/System.old' if -e '/System.old';
+}
+
+# creating some info about kernel and initrd
+if ($DEBUG) {
+  my $ksize=sprintf("%.0f",(stat($realimageloc .
+                                 "$kimage-$version"))[7]/1024)."kB";
+  my $initrdsize='';
+  if ($initrd) {
+    $initrdsize=sprintf("%.0f",(stat($realimageloc .
+                                     "initrd.img-$version"))[7]/1024)."kB";
+  }
+
+  print STDERR <<"EOMSG";
+A new kernel image has been installed at $realimageloc$kimage-$version
+ (Size: $ksize)
+
+Symbolic links, unless otherwise specified, can be found in $image_dest
+
+EOMSG
+  ;
+
+  if ($initrd) {
+    print STDERR <<"EOMSGA";
+
+ Initial rootdisk image: ${realimageloc}initrd.img-$version (Size: $initrdsize)
+EOMSGA
+    ;
+  }
+}
+
+# set the env var stem
+$ENV{'STEM'} = "linux";
+sub exec_script {
+  my $type   = shift;
+  my $script = shift;
+  print STDERR "Running $type hook script $script.\n";
+  system ("$script $version $realimageloc$kimage-$version") &&
+    print STDERR "User $type hook script [$script] ";
+  if ($?) {
+    if ($? == -1) {
+      print STDERR "failed to execute: $!\n";
+    }
+    elsif ($? & 127) {
+      printf STDERR "died with signal %d, %s coredump\n",
+        ($? & 127),  ($? & 128) ? 'with' : 'without';
+    }
+    else {
+      printf STDERR "exited with value %d\n", $? >> 8;
+    }
+    exit $? >> 8;
+  }
+}
+sub run_hook {
+  my $type   = shift;
+  my $script = shift;
+  if ($script =~ m,^/,) {
+    # Full path provided for the hook script
+    if (-x "$script") {
+      &exec_script($type,$script);
+    }
+    else {
+      die "The provided $type hook script [$script] could not be run.\n";
+    }
+  }
+  else {
+    # Look for it in a safe path
+    for my $path ('/bin', '/sbin', '/usr/bin', '/usr/sbin') {
+      if (-x "$path/$script") {
+        &exec_script($type, "$path/$script");
+        return 0;
+      }
+    }
+    # No luck
+    print STDERR "Could not find $type hook script [$script].\n";
+    die "Looked in: '/bin', '/sbin', '/usr/bin', '/usr/sbin'\n";
+  }
+}
+
+## Run user hook script here, if any
+if ($postinst_hook) {
+  &run_hook("postinst", $postinst_hook);
+}
+
+if (-d "/etc/kernel/postinst.d") {
+  print STDERR "Examining /etc/kernel/postinst.d.\n";
+  system ("run-parts --verbose --exit-on-error --arg=$version " .
+          "--arg=$realimageloc$kimage-$version " .
+          "/etc/kernel/postinst.d") &&
+            die "Failed to process /etc/kernel/postinst.d";
+}
+
+if (-d "/etc/kernel/postinst.d/$version") {
+  print STDERR "Examining /etc/kernel/postinst.d/$version.\n";
+  system ("run-parts --verbose --exit-on-error --arg=$version " .
+          "--arg=$realimageloc$kimage-$version " .
+          "/etc/kernel/postinst.d/$version") &&
+            die "Failed to process /etc/kernel/postinst.d/$version";
+}
+
+LOADER: {
+  last unless $do_boot_enable; # Exit if explicitly asked to
+
+  last if $loader =~ /silo/i; # SILO does not have to be executed.
+  last if $loader =~ /yaboot/i; # yaboot does not have to be executed.
+  last if $loader =~ /milo/i; # MILO does not have to be executed.
+  last if $loader =~ /nettrom/i; # NETTROM does not have to be executed.
+  last if $loader =~ /arcboot/i; # ARCBOOT does not have to be executed.
+  last if $loader =~ /delo/i; # DELO does not have to be executed.
+  last if $loader =~ /quik/i; # maintainer asked quik invocation to be ignored
+
+  last unless $loaderloc;
+  last unless -x $loaderloc;
+  last unless $do_bootloader;
+
+  if (-T "/etc/$loader.conf") {
+    # Trust and use the existing lilo.conf.
+    print STDERR "You already have a $Loader configuration in /etc/$loader.conf\n";
+    my $ret = &run_lilo();
+    exit $ret if $ret;
+  }
+}
+
+
+sub run_lilo (){
+  my $ret;
+  # Try and figure out if the user really wants lilo to be run --
+  # since the default is to run the boot laoder, which is ! grub -- but
+  # the user may be using grub now, and not changed the default.
+
+  # So, if the user has explicitly asked for the loader to be run, or
+  # if there is no postinst hook, or if there is no grub installed --
+  # we are OK. Or else, we ask.
+  if ($explicit_do_loader || (! ($postinst_hook && -x '/usr/sbin/grub')))  {
+    print STDERR "Running boot loader as requested\n";
+  } else {
+    print STDERR "Ok, not running $loader\n";
+  }
+  if ($loader =~ /^lilo/io or $loader =~ /vmelilo/io) {
+    print STDERR "Testing $loader.conf ... \n";
+    unlink $temp_file_name;     # security
+    $ret = system("$loaderloc -t >$temp_file_name 2>&1");
+    if ($ret) {
+      print STDERR "Boot loader test failed\n";
+      return $ret;
+    }
+    unlink "$temp_file_name";
+    print STDERR "Testing successful.\n";
+    print STDERR "Installing the ";
+    print STDERR "partition " if $loader =~ /^lilo/io;
+    print STDERR "boot sector... \n";
+  }
+
+  print STDERR "Running $loaderloc  ... \n";
+  if ($loader =~ /^elilo/io) {
+    $ret = system("$loaderloc 2>&1 | tee $temp_file_name");
+  } else {
+    $ret = system("$loaderloc >$temp_file_name 2>&1");
+  }
+  if ($ret) {
+    print STDERR "Boot loader failed to run\n";
+    return $ret;
+  }
+  unlink $temp_file_name;
+  print STDERR "Installation successful.\n";
+  return 0;
+}
+
+exit 0;
+
+__END__
+
--- linux-rt-2.6.29.5.orig/debian/control-scripts/prerm
+++ linux-rt-2.6.29.5/debian/control-scripts/prerm
@@ -0,0 +1,307 @@
+#! /usr/bin/perl
+#                              -*- Mode: Perl -*- 
+# image.prerm --- 
+# Author           : root ( root@melkor.pilgrim.umass.edu ) 
+# Created On       : Fri May 17 03:28:59 1996
+# Created On Node  : melkor.pilgrim.umass.edu
+# Last Modified By : Manoj Srivastava
+# Last Modified On : Sat Aug  5 13:14:17 2006
+# Last Machine Used: glaurung.internal.golden-gryphon.com
+# Update Count     : 85
+# Status           : Unknown, Use with caution!
+# HISTORY          : 
+# Description      : 
+# 
+#
+#    $Id: image.prerm,v 1.22 2003/10/07 16:24:20 srivasta Exp $
+#
+# 
+#use strict;
+
+$|=1;
+# Predefined values:
+my $version         = "=V";
+my $link_in_boot    = "";	# Should be empty, mostly
+my $no_symlink      = "";	# Should be empty, mostly
+my $reverse_symlink = "";	# Should be empty, mostly
+my $do_symlinks     = "Yes";	# target machine defined
+my $do_boot_enable  = "Yes";	# target machine defined
+my $do_bootfloppy   = "Yes";	# target machine defined
+my $do_bootloader   = "Yes";	# target machine defined
+my $move_image      = '';       # target machine defined
+my $kimage          = "=K";	# Should be empty, mostly
+my $loader          = "=L";     # lilo, silo, quik, palo, vmelilo, or nettrom
+my $image_dir       = "/boot";     # where the image is located
+my $clobber_modules = '';       # target machine defined
+my $initrd          = "YES";     # initrd kernel
+my $use_hard_links  = '';       # hardlinks do not wirk across fs boundaries
+my $postinst_hook   = '';       #Normally we do not
+my $postrm_hook     = '';       #Normally we do not
+my $preinst_hook    = '';       #Normally we do not
+my $prerm_hook      = '';       #Normally we do not
+my $minimal_swap    = '';       # Do not swap symlinks
+my $ignore_depmod_err = '';	# normally we do not
+my $relink_build_link = 'YES';	# There is no harm in checking the link
+my $force_build_link = '';	# There is no harm in checking the link
+my $kernel_arch       = "=B";
+my $ramdisk           = "/usr/sbin/update-initramfs";
+my $package_name    = "linux-image-$version";
+
+my $Loader          = "NoLOADER"; # 
+$Loader             = "LILO"     if $loader =~ /^lilo/io;
+$Loader             = "SILO"     if $loader =~ /^silo/io;
+$Loader             = "QUIK"     if $loader =~ /^quik/io;
+$Loader             = "yaboot"   if $loader =~ /^yaboot/io;
+$Loader             = "PALO"     if $loader =~ /^palo/io;
+$Loader             = "NETTROM"  if $loader =~ /^nettrom/io;
+$Loader             = "VMELILO"  if $loader =~ /^vmelilo/io;
+$Loader             = "ZIPL"     if $loader =~ /^zipl/io;
+$Loader             = "ELILO"    if $loader =~ /^elilo/io;
+
+
+# This should not point to /tmp, because of security risks.
+my $temp_file_name = "/var/log/$loader" . "_log.$$";
+
+#known variables
+my $image_dest      = "/";
+my $realimageloc    = "/$image_dir/";
+my $have_conffile   = "";
+my $CONF_LOC        = '/etc/kernel-img.conf';
+my $relative_links = '';
+my $silent_loader   = '';
+my $warn_reboot     = 'Yes';     # Warn that we are installing a version of
+                                 # the kernel we are running
+
+# remove multiple leading slashes; make sure there is at least one.
+$realimageloc  =~ s|^/*|/|o;
+$realimageloc  =~ s|/+|/|o;
+
+my $DEBUG = 0;
+
+# Variables used
+my $image='';
+my $ret=0;
+my $seen='';
+my $answer='';
+my $running = '';
+my $WouldInvalidate = 0;
+
+if ($ARGV[0] && ($ARGV[0] =~ /remove/ || $ARGV[0] =~ /upgrade/)) {
+  if (-l "/usr/doc/linux-image-$version") {
+    unlink "/usr/doc/linux-image-$version";
+  }
+}
+
+# Ignore all invocations uxcept when called on to remove
+exit 0 unless ($ARGV[0] && $ARGV[0] =~ /remove/) ;
+
+# Paranoid check to make sure that the correct value is put in there
+if (! $kimage)                    { $kimage = "vmlinuz";} # Hmm. empty
+elsif ($kimage =~ m/^b?zImage$/o) { $kimage = "vmlinuz";} # these produce vmlinuz
+elsif ($kimage =~ m/^[iI]mage$/o) { my $nop = $kimage;  }
+elsif ($kimage =~ m/^vmlinux$/o)  { my $nop = $kimage;  }
+else                              { $kimage = "vmlinuz";} # Default
+
+if (-r "$CONF_LOC" && -f "$CONF_LOC"  ) {
+  if (open(CONF, "$CONF_LOC")) {
+    while (<CONF>) {
+      chomp;
+      s/\#.*$//g;
+      next if /^\s*$/;
+
+      $do_symlink      = "" if /^\s*do_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $no_symlink      = "" if /^\s*no_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $reverse_symlink = "" if /^\s*reverse_symlinks\s*=\s*(no|false|0)\s*$/ig;
+      $link_in_boot    = "" if /^\s*image_in_boot\s*=\s*(no|false|0)\s*$/ig;
+      $link_in_boot    = "" if /^\s*link_in_boot\s*=\s*(no|false|0)\s*$/ig;
+      $move_image      = "" if /^\s*move_image\s*=\s*(no|false|0)\s*$/ig;
+      $clobber_modules = '' if /^\s*clobber_modules\s*=\s*(no|false|0)\s*$/ig;
+      $do_boot_enable  = '' if /^\s*do_boot_enable\s*=\s*(no|false|0)\s*$/ig;
+      $do_bootfloppy   = '' if /^\s*do_bootfloppy\s*=\s*(no|false|0)\s*$/ig;
+      $relative_links  = '' if /^\s*relative_links \s*=\s*(no|false|0)\s*$/ig;
+      $do_bootloader   = '' if /^\s*do_bootloader\s*=\s*(no|false|0)\s*$/ig;
+      $do_initrd       = '' if /^\s*do_initrd\s*=\s*(no|false|0)\s*$/ig;
+      $use_hard_links  = '' if /^\s*use_hard_links\s*=\s*(no|false|0)\s*$/ig;
+      $silent_loader   = '' if /^\s*silent_loader\s*=\s*(no|false|0)\s*$/ig;
+      $warn_reboot     = '' if /^\s*warn_reboot\s*=\s*(no|false|0)\s*$/ig;
+      $minimal_swap    = '' if /^\s*minimal_swap\s*=\s*(no|false|0)\s*$/ig;
+      $ignore_depmod_err = '' if /^\s*ignore_depmod_err\s*=\s*(no|false|0)\s*$/ig;
+      $relink_build_link = '' if /^\s*relink_build_link\s*=\s*(no|false|0)\s*$/ig;
+      $force_build_link = '' if /^\s*force_build_link\s*=\s*(no|false|0)\s*$/ig;
+
+
+      $do_symlink      = "Yes" if /^\s*do_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $no_symlink      = "Yes" if /^\s*no_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $reverse_symlink = "Yes" if /^\s*reverse_symlinks\s*=\s*(yes|true|1)\s*$/ig;
+      $link_in_boot    = "Yes" if /^\s*image_in_boot\s*=\s*(yes|true|1)\s*$/ig;
+      $link_in_boot    = "Yes" if /^\s*link_in_boot\s*=\s*(yes|true|1)\s*$/ig;
+      $move_image      = "Yes" if /^\s*move_image\s*=\s*(yes|true|1)\s*$/ig;
+      $clobber_modules = "Yes" if /^\s*clobber_modules\s*=\s*(yes|true|1)\s*$/ig;
+      $do_boot_enable  = "Yes" if /^\s*do_boot_enable\s*=\s*(yes|true|1)\s*$/ig;
+      $do_bootfloppy   = "Yes" if /^\s*do_bootfloppy\s*=\s*(yes|true|1)\s*$/ig;
+      $do_bootloader   = "Yes" if /^\s*do_bootloader\s*=\s*(yes|true|1)\s*$/ig;
+      $relative_links  = "Yes" if /^\s*relative_links\s*=\s*(yes|true|1)\s*$/ig;
+      $do_initrd       = "Yes" if /^\s*do_initrd\s*=\s*(yes|true|1)\s*$/ig;
+      $use_hard_links  = "Yes" if /^\s*use_hard_links\s*=\s*(yes|true|1)\s*$/ig;
+      $silent_loader   = 'Yes' if /^\s*silent_loader\s*=\s*(yes|true|1)\s*$/ig;
+      $warn_reboot     = 'Yes' if /^\s*warn_reboot\s*=\s*(yes|true|1)\s*$/ig;
+      $minimal_swap    = 'Yes' if /^\s*minimal_swap\s*=\s*(yes|true|1)\s*$/ig;
+      $ignore_depmod_err = 'Yes' if /^\s*ignore_depmod_err\s*=\s*(yes|true|1)\s*$/ig;
+      $relink_build_link = 'Yes' if /^\s*relink_build_link\s*=\s*(yes|true|1)\s*$/ig;
+      $force_build_link = 'Yes' if /^\s*force_build_link\s*=\s*(yes|true|1)\s*$/ig;
+
+      $image_dest      = "$1"  if /^\s*image_dest\s*=\s*(\S+)/ig;
+      $postinst_hook   = "$1"  if /^\s*postinst_hook\s*=\s*(\S+)/ig;
+      $postrm_hook     = "$1"  if /^\s*postrm_hook\s*=\s*(\S+)/ig;
+      $preinst_hook    = "$1"  if /^\s*preinst_hook\s*=\s*(\S+)/ig;
+      $prerm_hook      = "$1"  if /^\s*prerm_hook\s*=\s*(\S+)/ig;
+      $ramdisk         = "$1"  if /^\s*ramdisk\s*=\s*(.+)$/ig;
+    }
+    close CONF;
+    $have_conffile = "Yes";
+  }
+}
+
+
+$ENV{KERNEL_ARCH}=$kernel_arch if $kernel_arch;
+
+#check to see if we are trying to remove a running kernel
+# if so we abort right now.
+chop($running=`uname -r`);
+if ($running eq $version) {
+  print STDERR "WARN: Proceeding with removing running kernel image.\n";
+}
+
+#Now, they have an alternate kernel which they are currently running
+
+# This is just us being nice to lilo users.
+
+chdir("/") or die "could not chdir to /:$!\n";
+
+if (-f "/etc/$loader.conf") { #I know, could be a link, but ..
+  open (LILO, "/etc/$loader.conf") || &success(); # this is not critical
+  while (<LILO>) {
+    chop;
+    s/\#.*//;			  # nix the comments
+    next unless /^\s*image\s*=\s(\S+)/o;
+    $image = $1;
+    if ($image && -e $image) {
+      while (defined($image) && -l $image) {
+	$image = readlink ($image);
+      }
+      if (defined($image) && -e $image) {
+	$WouldInvalidate |= $image =~ /$kimage-$version/;
+      }
+      else {
+	&success(); # invalid $loader.conf file
+      }
+    }
+    else {
+      &success(); # invalid $loader.conf file
+    }
+  }
+  close (LILO);
+  if ($WouldInvalidate) {
+    print STFERR "WARN: Proceeding with removing running kernel image.\n";
+    &success();
+  }
+}
+
+
+# set the env var stem
+$ENV{'STEM'} = "linux";
+
+sub exec_script {
+  my $type   = shift;
+  my $script = shift;
+  print STDERR "Running $type hook script $script.\n";
+  system ("$script $version $realimageloc$kimage-$version") &&
+    print STDERR "User $type hook script [$script] ";
+  if ($?) {
+    if ($? == -1) {
+      print STDERR "failed to execute: $!\n";
+    }
+    elsif ($? & 127) {
+      printf STDERR "died with signal %d, %s coredump\n",
+        ($? & 127),  ($? & 128) ? 'with' : 'without';
+    }
+    else {
+      printf STDERR "exited with value %d\n", $? >> 8;
+    }
+    exit $? >> 8;
+  }
+}
+sub run_hook {
+  my $type   = shift;
+  my $script = shift;
+  if ($script =~ m,^/,) {
+    # Full path provided for the hook script
+    if (-x "$script") {
+      &exec_script($type,$script);
+    }
+    else {
+      die "The provided $type hook script [$script] could not be run.\n";
+    }
+  }
+  else {
+    # Look for it in a safe path
+    for my $path ('/bin', '/sbin', '/usr/bin', '/usr/sbin') {
+      if (-x "$path/$script") {
+        &exec_script($type, "$path/$script");
+        return 0;
+      }
+    }
+    # No luck
+    print STDERR "Could not find $type hook script [$script].\n";
+    die "Looked in: '/bin', '/sbin', '/usr/bin', '/usr/sbin'\n";
+  }
+}
+
+
+## Run user hook script here, if any
+if (-x "$prerm_hook") {
+  &run_hook("prerm", $prerm_hook);
+}
+if (-d "/etc/kernel/prerm.d") {
+  print STDERR "Examining /etc/kernel/prerm.d.\n";
+  system ("run-parts --verbose --exit-on-error --arg=$version " . 
+          "--arg=$realimageloc$kimage-$version /etc/kernel/prerm.d") &&
+            die "Failed to process /etc/kernel/prerm.d";
+}
+if (-d "/etc/kernel/prerm.d/$version") {
+  print STDERR "Examining /etc/kernel/prerm.d/$version.\n";
+  system ("run-parts --verbose --exit-on-error --arg=$version" .
+          " --arg=$realimageloc$kimage-$version " .
+          "/etc/kernel/prerm.d/$version") &&
+            die "Failed to process /etc/kernel/prerm.d/$version";
+}
+
+sub success () {
+  my @files_to_remove = qw{
+    modules.dep modules.isapnpmap modules.pcimap
+    modules.usbmap modules.parportmap
+    modules.generic_string modules.ieee1394map
+    modules.ieee1394map modules.pnpbiosmap
+    modules.alias modules.ccwmap modules.inputmap
+    modules.symbols modules.ofmap modules.seriomap
+    modules.alias.bin modules.dep.bin modules.symbols.bin
+  };
+
+  foreach my $extra_file (@files_to_remove) {
+    if (-f "/lib/modules/$version/$extra_file") {
+      unlink "/lib/modules/$version/$extra_file";
+    }
+  }
+  exit 0;
+}
+
+
+
+&success();
+exit 0;
+__END__
+
+
+
+
+
--- linux-rt-2.6.29.5.orig/debian/abi/2.6.29.5-1.1/amd64/ignore.modules
+++ linux-rt-2.6.29.5/debian/abi/2.6.29.5-1.1/amd64/ignore.modules
@@ -0,0 +1 @@
+1
--- linux-rt-2.6.29.5.orig/debian/abi/2.6.29.5-1.1/amd64/ignore
+++ linux-rt-2.6.29.5/debian/abi/2.6.29.5-1.1/amd64/ignore
@@ -0,0 +1 @@
+1
--- linux-rt-2.6.29.5.orig/debian/abi/2.6.29.5-1.1/i386/ignore.modules
+++ linux-rt-2.6.29.5/debian/abi/2.6.29.5-1.1/i386/ignore.modules
@@ -0,0 +1 @@
+1
--- linux-rt-2.6.29.5.orig/debian/abi/2.6.29.5-1.1/i386/ignore
+++ linux-rt-2.6.29.5/debian/abi/2.6.29.5-1.1/i386/ignore
@@ -0,0 +1 @@
+1
--- linux-rt-2.6.29.5.orig/debian/tests/check-aliases
+++ linux-rt-2.6.29.5/debian/tests/check-aliases
@@ -0,0 +1,24 @@
+#!/usr/bin/perl -w
+
+my %map;
+
+print "Checking for dupe aliases in $ENV{'FLAVOUR'}...\n";
+
+$aliases =
+  "$ENV{'INSTALL_DIR'}/lib/modules/$ENV{'VERSION'}-$ENV{'FLAVOUR'}/modules.alias";
+
+open(ALIASES, "< $aliases") or die "Could not open $aliases";
+
+while (<ALIASES>) {
+	chomp;
+	my ($junk, $alias, $module) = split;
+
+	if (defined($map{$alias})) {
+		printf("%s %20s / %-20s : %s \n", ("$map{$alias}" eq "$module")
+			? "INT" : "   ", $map{$alias}, $module, $alias);
+	} else {
+		$map{$alias} = $module;
+	}
+}
+
+exit(0);
--- linux-rt-2.6.29.5.orig/debian/tests/README
+++ linux-rt-2.6.29.5/debian/tests/README
@@ -0,0 +1,21 @@
+Scripts placed in this directory get called one at a time by run-parts(8).
+The scripts are expected to perform some sort of sanity checks on the
+finished build. Scripts will be called once for each flavour.
+
+Some environment variables are exported to make life a little easier:
+
+DPKG_ARCH     : The dpkg architecture (e.g. "amd64")
+KERN_ARCH     : The kernel architecture (e.g. "x86_64")
+FLAVOUR       : The specific flavour for this run (e.g. "generic")
+VERSION       : The full version of this build (e.g. 2.6.22-1)
+REVISION      : The exact revision of this build (e.g. 1.3)
+PREV_REVISION : The revision prior to this one
+ABI_NUM       : The specific ABI number for this build (e.g. 2)
+PREV_ABI_NUM  : The previous ABI number. Can be the same as ABI_NUM.
+BUILD_DIR     : The directory where this build took place
+INSTALL_DIR   : The directory where the package is prepared
+SOURCE_DIR    : Where the main kernel source is
+
+Scripts are expected to have a zero exit status when no problems occur,
+and non-zero when an error occurs that should stop the build. Scripts
+should print whatever info they deem needed to deduce the problem.
--- linux-rt-2.6.29.5.orig/debian/d-i/exclude-modules.armel-orion5x
+++ linux-rt-2.6.29.5/debian/d-i/exclude-modules.armel-orion5x
@@ -0,0 +1,3 @@
+fat-modules
+nic-pcmcia-modules
+nic-usb-modules
--- linux-rt-2.6.29.5.orig/debian/d-i/exclude-modules.armel-ixp4xx
+++ linux-rt-2.6.29.5/debian/d-i/exclude-modules.armel-ixp4xx
@@ -0,0 +1,4 @@
+fat-modules
+storage-core-modules
+nic-pcmcia-modules
+nic-usb-modules
--- linux-rt-2.6.29.5.orig/debian/d-i/kernel-versions
+++ linux-rt-2.6.29.5/debian/d-i/kernel-versions
@@ -0,0 +1,9 @@
+# arch	version		flavour		installedname			suffix	bdep
+amd64	2.6.28-1	generic		2.6.28-1-generic		-	
+
+armel	2.6.28-1	iop32x		2.6.28-1-iop32x		y
+armel	2.6.28-1	ixp4xx		2.6.28-1-ixp4xx		y
+armel	2.6.28-1	orion5x		2.6.28-1-orion5x		y
+armel	2.6.28-1	versatile	2.6.28-1-versatile		y
+
+i386	2.6.28-1	generic		2.6.28-1-generic		-	
--- linux-rt-2.6.29.5.orig/debian/d-i/exclude-modules.armel-versatile
+++ linux-rt-2.6.29.5/debian/d-i/exclude-modules.armel-versatile
@@ -0,0 +1,8 @@
+fb-modules
+fs-core-modules
+fs-secondary-modules
+nic-modules
+nic-pcmcia-modules
+nic-usb-modules
+scsi-modules
+storage-core-modules
--- linux-rt-2.6.29.5.orig/debian/d-i/exclude-modules.armel-iop32x
+++ linux-rt-2.6.29.5/debian/d-i/exclude-modules.armel-iop32x
@@ -0,0 +1,4 @@
+fat-modules
+storage-core-modules
+nic-pcmcia-modules
+nic-usb-modules
--- linux-rt-2.6.29.5.orig/debian/d-i/package-list
+++ linux-rt-2.6.29.5/debian/d-i/package-list
@@ -0,0 +1,169 @@
+Package: kernel-image
+Provides_armel: crypto-modules, ext2-modules, ext3-modules, socket-modules, fat-modules
+Provides_armel_versatile: crypto-modules, ext2-modules, ext3-modules, socket-modules, fb-modules
+
+Package: fat-modules
+Depends: kernel-image
+Priority: standard
+Description: FAT filesystem support
+ This includes Windows FAT and VFAT support.
+
+Package: fb-modules
+Depends: kernel-image
+Priority: standard
+Description: Framebuffer modules
+
+Package: firewire-core-modules
+Depends: kernel-image, storage-core-modules
+Priority: standard
+Description: Firewire (IEEE-1394) Support
+
+Package: floppy-modules
+Depends: kernel-image
+Priority: standard
+Description: Floppy driver support
+
+Package: fs-core-modules
+Depends: kernel-image
+Priority: standard
+Provides: jfs-modules, reiserfs-modules, xfs-modules
+Provides_armel: jfs-modules, reiserfs-modules, xfs-modules
+Description: Base filesystem modules
+ This includes jfs, reiserfs and xfs.
+
+Package: fs-secondary-modules
+Depends: kernel-image, fat-modules
+Priority: standard
+Provides: ntfs-modules, hfs-modules
+Description: Extra filesystem modules
+ This includes support for Windows NTFS and MacOS HFS/HFSPlus
+
+Package: input-modules
+Depends: kernel-image, usb-modules
+Priority: standard
+Description: Support for various input methods
+
+Package: irda-modules
+Depends: kernel-image, nic-shared-modules
+Priority: standard
+Description: Support for Infrared protocols
+
+Package: md-modules
+Depends: kernel-image
+Priority: standard
+Description: Multi-device support (raid, device-mapper, lvm)
+
+Package: nic-modules
+Depends: kernel-image, nic-shared-modules, virtio-modules
+Priority: standard
+Description: Network interface support
+
+Package: nic-pcmcia-modules
+Depends: kernel-image, nic-shared-modules, nic-modules
+Priority: standard
+Description: PCMCIA network interface support
+
+Package: nic-usb-modules
+Depends: kernel-image, nic-shared-modules, usb-modules
+Priority: standard
+Description: USB network interface support
+
+Package: parport-modules
+Depends: kernel-image
+Priority: standard
+Description: Parallel port support
+
+Package: pata-modules
+Depends: kernel-image, storage-core-modules
+Priority: standard
+Description: PATA support modules
+
+Package: pcmcia-modules
+Depends: kernel-image
+Priority: standard
+Description: PCMCIA Modules
+
+Package: pcmcia-storage-modules
+Depends: kernel-image, scsi-modules
+Priority: standard
+Description: PCMCIA storage support
+
+Package: plip-modules
+Depends: kernel-image, nic-shared-modules, parport-modules
+Priority: standard
+Description: PLIP (parallel port) networking support
+
+Package: ppp-modules
+Depends: kernel-image, nic-shared-modules, serial-modules
+Priority: standard
+Description: PPP (serial port) networking support
+
+Package: sata-modules
+Depends: kernel-image, storage-core-modules
+Priority: standard
+Description: SATA storage support
+
+Package: scsi-modules
+Depends: kernel-image, storage-core-modules
+Priority: standard
+Description: SCSI storage support
+
+Package: serial-modules
+Depends: kernel-image
+Priority: standard
+Description: Serial port support
+
+Package: storage-core-modules
+Depends: kernel-image
+Priority: standard
+Provides: loop-modules
+Description: Core storage support
+ Includes core SCSI, LibATA, USB-Storage. Also includes related block
+ devices for CD, Disk and Tape medium (and IDE Floppy).
+
+Package: usb-modules
+Depends: kernel-image, storage-core-modules
+Priority: standard
+Description: Core USB support
+
+Package: nfs-modules
+Priority: standard
+Depends: kernel-image
+Description: NFS filesystem drivers
+ Includes the NFS client driver, and supporting modules.
+
+Package: block-modules
+Priority: standard
+Depends: kernel-image, storage-core-modules, parport-modules, virtio-modules
+Description: Block storage devices
+ This package contains the block storage devices, including DAC960 and
+ paraide.
+
+Package: message-modules
+Priority: standard
+Depends: kernel-image, storage-core-modules, scsi-modules
+Description: Fusion and i2o storage modules
+ This package containes the fusion and i2o storage modules.
+
+Package: crypto-modules
+Priority: extra
+Depends: kernel-image
+Description: crypto modules
+ This package contains crypto modules.
+
+Package: virtio-modules
+Priority: standard
+Depends: kernel-image
+Description: VirtIO Modules
+ Includes modules for VirtIO (virtual machine, generally kvm guests)
+
+Package: socket-modules
+Depends: kernel-image
+Priority: standard
+Description: Unix socket support
+
+Package: mouse-modules
+Depends: kernel-image, input-modules, usb-modules
+Priority: extra
+Description: Mouse support
+ This package contains mouse drivers for the Linux kernel.
--- linux-rt-2.6.29.5.orig/debian/d-i/kernel-versions.in
+++ linux-rt-2.6.29.5/debian/d-i/kernel-versions.in
@@ -0,0 +1,9 @@
+# arch	version		flavour		installedname			suffix	bdep
+amd64	PKGVER-ABINUM	generic		PKGVER-ABINUM-generic		-	
+
+armel	PKGVER-ABINUM	iop32x		PKGVER-ABINUM-iop32x		y
+armel	PKGVER-ABINUM	ixp4xx		PKGVER-ABINUM-ixp4xx		y
+armel	PKGVER-ABINUM	orion5x		PKGVER-ABINUM-orion5x		y
+armel	PKGVER-ABINUM	versatile	PKGVER-ABINUM-versatile		y
+
+i386	PKGVER-ABINUM	generic		PKGVER-ABINUM-generic		-	
--- linux-rt-2.6.29.5.orig/debian/d-i/exclude-modules.armel
+++ linux-rt-2.6.29.5/debian/d-i/exclude-modules.armel
@@ -0,0 +1,27 @@
+acpi-modules
+block-modules
+crypto-modules
+ext2-modules
+ext3-modules
+fb-modules
+firewire-core-modules
+floppy-modules
+input-modules
+irda-modules
+md-modules
+message-modules
+mouse-modules
+nfs-modules
+nic-pcmcia-modules
+nic-shared-modules
+parport-modules
+pata-modules
+pcmcia-modules
+pcmcia-storage-modules
+plip-modules
+ppp-modules
+sata-modules
+serial-modules
+socket-modules
+usb-modules
+virtio-modules
--- linux-rt-2.6.29.5.orig/debian/d-i/modules-armel-orion5x/storage-core-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules-armel-orion5x/storage-core-modules
@@ -0,0 +1,6 @@
+# Block level
+cdrom
+sr_mod ?
+
+# Needs to be here for better cdrom initrd layout
+isofs
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/nic-usb-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/nic-usb-modules
@@ -0,0 +1,11 @@
+catc ?
+kaweth ?
+pegasus ?
+prism2_usb ?
+rtl8150 ?
+usbnet ?
+zd1211rw ?
+zd1201 ?
+rt2500usb ?
+rt73usb ?
+rt2570 ?
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/parport-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/parport-modules
@@ -0,0 +1,2 @@
+parport
+parport_pc
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/plip-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/plip-modules
@@ -0,0 +1 @@
+plip
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/serial-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/serial-modules
@@ -0,0 +1,3 @@
+generic_serial
+serial_cs
+synclink_cs
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/fb-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/fb-modules
@@ -0,0 +1,3 @@
+fbcon
+vesafb
+vga16fb
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/message-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/message-modules
@@ -0,0 +1,13 @@
+mptbase
+mptctl
+mptfc
+mptlan
+mptsas
+mptscsih
+mptspi
+i2o_block
+i2o_bus
+i2o_config
+i2o_core
+i2o_proc
+i2o_scsi
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/firewire-core-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/firewire-core-modules
@@ -0,0 +1,4 @@
+ieee1394
+ohci1394
+sbp2
+eth1394
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/usb-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/usb-modules
@@ -0,0 +1,9 @@
+ehci-hcd
+isp116x-hcd
+isp1760
+ohci-hcd
+r8a66597-hcd
+sl811_cs
+sl811-hcd
+u132-hcd
+uhci-hcd
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/sata-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/sata-modules
@@ -0,0 +1,2 @@
+
+sata_mv
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/nic-pcmcia-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/nic-pcmcia-modules
@@ -0,0 +1,19 @@
+3c574_cs ?
+3c589_cs ?
+airo_cs ?
+atmel_cs ?
+axnet_cs ?
+com20020_cs ?
+fmvj18x_cs ?
+ibmtr_cs ?
+netwave_cs ?
+nmclan_cs ?
+orinoco_cs ?
+pcnet_cs ?
+ray_cs ?
+smc91c92_cs ?
+wavelan_cs ?
+wl3501_cs ?
+xirc2ps_cs ?
+xircom_cb ?
+xircom_tulip_cb ?
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/storage-core-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/storage-core-modules
@@ -0,0 +1,10 @@
+# Core stacks
+usb-storage
+
+# Block level
+
+# Loop modules
+cryptoloop
+
+# Needs to be here for better cdrom initrd layout
+isofs
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/pcmcia-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/pcmcia-modules
@@ -0,0 +1,8 @@
+i82092
+i82365 ?
+pcmcia
+pcmcia_core
+pd6729
+rsrc_nonstatic
+tcic ?
+yenta_socket
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/nic-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/nic-modules
@@ -0,0 +1,150 @@
+3c359 ?
+3c501 ?
+3c503 ?
+3c505 ?
+3c507 ?
+3c509 ?
+3c515 ?
+3c523 ?
+3c527 ?
+3c59x ?
+8139cp ?
+8139too ?
+82596 ?
+abyss ?
+ac3200 ?
+adm8211 ?
+airo ?
+airport ?
+amd8111e ?
+arc4 ?
+arcnet ?
+arc-rawmode ?
+arc-rimi ?
+arlan ?
+at1700 ?
+atl1 ?
+atl1e ?
+atl2 ?
+atmel ?
+atmel_pci ?
+b44 ?
+bcm43xx ?
+bcm43xx-mac80211 ?
+bmac ?
+bnx2 ?
+bonding ?
+cassini ?
+com20020 ?
+com20020-pci ?
+com90io ?
+com90xx ?
+cs89x0 ?
+de2104x ?
+de4x5 ?
+de600 ?
+de620 ?
+defxx ?
+depca ?
+dl2k ?
+dmfe ?
+dummy ?
+e100 ?
+e1000 ?
+e1000e ?
+e2100 ?
+eepro ?
+eepro100 ?
+eexpress ?
+epic100 ?
+eql ?
+es3210 ?
+eth16i ?
+ewrk3 ?
+fealnx ?
+forcedeth ?
+igb ?
+ps3_gelic ?
+hamachi ?
+hermes ?
+hp ?
+hp100 ?
+hp-plus ?
+ibmtr ?
+ipddp ?
+ipw2100 ?
+ipw2200 ?
+ipw3945 ?
+ixgb ?
+lance ?
+lanstreamer ?
+lasi_82596 ?
+lne390 ?
+lp486e ?
+mace ?
+mv643xx_eth ?
+myri_sbus ?
+natsemi ?
+ne ?
+ne2 ?
+ne2k-pci ?
+ne3210 ?
+netconsole ?
+ni5010 ?
+ni52 ?
+ni65 ?
+niu ?
+ns83820 ?
+olympic ?
+orinoco ?
+orinoco_pci ?
+orinoco_plx ?
+orinoco_tmd ?
+pcnet32 ?
+prism54 ?
+r8169 ?
+rate_control ?
+rfc1051 ?
+rfc1201 ?
+rrunner ?
+rt2400 ?
+rt2500 ?
+rt61pci ?
+s2io ?
+shaper ?
+sis190 ?
+sis900 ?
+spidernet ?
+skfp ?
+skge ?
+sk98lin ?
+sky2 ?
+smc9194 ?
+smc-ultra ?
+smc-ultra32 ?
+starfire ?
+strip ?
+sunbmac ?
+sundance ?
+sungem ?
+sungem_phy ?
+sunhme ?
+sunlance ?
+sunqe ?
+sunvnet ?
+tg3 ?
+tlan ?
+tms380tr ?
+tmspci ?
+tulip ?
+tun ?
+typhoon ?
+uli526x ?
+via-rhine ?
+via-velocity ?
+virtio_net ?
+wavelan ?
+wd ?
+winbond-840 ?
+yellowfin ?
+znet ?
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/nfs-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/nfs-modules
@@ -0,0 +1,4 @@
+nfs
+nfs_acl
+lockd
+sunrpc
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/floppy-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/floppy-modules
@@ -0,0 +1 @@
+floppy
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/fs-core-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/fs-core-modules
@@ -0,0 +1,3 @@
+jfs
+reiserfs
+xfs
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/irda-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/irda-modules
@@ -0,0 +1,30 @@
+act200l-sir
+actisys-sir
+ali-ircc
+donauboe ?
+esi-sir
+girbil-sir
+ircomm
+ircomm-tty
+irda
+irda-usb
+irlan
+irnet
+irport ?
+irtty-sir
+kingsun-sir
+ks959-sir
+ksdazzle-sir
+litelink-sir
+ma600-sir
+mcp2120-sir
+mcs7780
+nsc-ircc
+old_belkin-sir
+sir-dev
+smsc-ircc2
+stir4200
+tekram-sir
+via-ircc
+vlsi_ir
+w83977af_ir
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/pata-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/pata-modules
@@ -0,0 +1,2 @@
+pata_it8213
+pata_ninja32
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/md-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/md-modules
@@ -0,0 +1,14 @@
+dm-crypt
+dm-zero
+faulty
+linear
+multipath
+raid0
+raid1
+raid10
+raid456
+
+# Extras
+dm-raid4-5 ?
+dm-loop
+dm-bbr
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/scsi-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/scsi-modules
@@ -0,0 +1,113 @@
+# SCSI
+raid_class ?
+scsi_transport_spi ?
+scsi_transport_fc ?
+scsi_transport_iscsi ?
+scsi_transport_sas ?
+iscsi_tcp ?
+libiscsi ?
+amiga7xx ?
+a3000 ?
+a2091 ?
+gvp11 ?
+mvme147 ?
+sgiwd93 ?
+cyberstorm ?
+cyberstormII ?
+blz2060 ?
+blz1230 ?
+fastlane ?
+oktagon_esp_mod ?
+atari_scsi ?
+mac_scsi ?
+mac_esp ?
+sun3_scsi ?
+mvme16x ?
+bvme6000 ?
+sim710 ?
+advansys ?
+psi240i ?
+BusLogic ?
+dpt_i2o ?
+u14-34f ?
+ultrastor ?
+aha152x ?
+aha1542 ?
+aha1740 ?
+aic7xxx_old ?
+ips ?
+fd_mcs ?
+fdomain ?
+in2000 ?
+g_NCR5380 ?
+g_NCR5380_mmio ?
+NCR53c406a ?
+NCR_D700 ?
+NCR_Q720_mod ?
+sym53c416 ?
+qlogicfas408 ?
+qla1280 ?
+pas16 ?
+seagate ?
+seagate ?
+t128 ?
+dmx3191d ?
+dtc ?
+zalon7xx ?
+eata_pio ?
+wd7000 ?
+mca_53c9x ?
+ibmmca ?
+eata ?
+dc395x ?
+tmscsim ?
+megaraid ?
+atp870u ?
+esp ?
+gdth ?
+initio ?
+a100u2w ?
+qlogicpti ?
+ide-scsi ?
+mesh ?
+mac53c94 ?
+pluto ?
+dec_esp ?
+3w-xxxx ?
+3w-9xxx ?
+ppa ?
+imm ?
+jazz_esp ?
+sun3x_esp ?
+fcal ?
+lasi700 ?
+nsp32 ?
+ipr ?
+hptiop ?
+stex ?
+osst ?
+sg ?
+ch ?
+scsi_debug ?
+aacraid ?
+aic7xxx ?
+aic79xx ?
+aic94xx ?
+arcmsr ?
+acornscsi_mod ?
+arxescsi ?
+cumana_1 ?
+cumana_2 ?
+ecoscsi ?
+oak ?
+powertec ?
+eesox ?
+ibmvscsic ?
+libsas ?
+lpfc ?
+megaraid_mm ?
+megaraid_mbox ?
+megaraid_sas ?
+qla2xxx ?
+sym53c8xx ?
+qla4xxx ?
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/crypto-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/crypto-modules
@@ -0,0 +1,8 @@
+aes_generic
+blowfish
+twofish
+serpent
+sha256_generic
+cbc
+ecb
+crc32c
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/mouse-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/mouse-modules
@@ -0,0 +1,2 @@
+psmouse
+usbmouse ?
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/virtio-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/virtio-modules
@@ -0,0 +1,4 @@
+virtio_balloon
+virtio_pci
+virtio_ring ?
+virtio-rng
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/nic-shared-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/nic-shared-modules
@@ -0,0 +1,21 @@
+# PHY
+8390
+mii
+
+# CRC modules
+crc-ccitt
+crc-itu-t
+
+# mac80211 stuff
+mac80211
+cfg80211
+
+# rt2x00 lib (since rt2x00 is split across usb/pci/cb
+rt2x00lib
+
+# Wireless 802.11 modules
+ieee80211
+ieee80211_crypt
+ieee80211_crypt_ccmp
+ieee80211_crypt_tkip
+ieee80211_crypt_wep
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/fat-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/fat-modules
@@ -0,0 +1,7 @@
+fat
+vfat
+
+# Supporting modules ?
+nls_cp437 ?
+nls_iso8859-1 ?
+nls_utf8 ?
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/block-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/block-modules
@@ -0,0 +1,30 @@
+aoe
+aten
+bpck
+bpck6 ?
+cciss
+comm
+cpqarray
+DAC960
+dstr
+epat
+epia
+fit2
+fit3
+friq
+frpw
+kbic
+ktti
+nbd
+on20
+on26
+paride
+pcd
+pd
+pf
+pg
+pt
+sx8
+ub
+umem
+virtio_blk ?
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/input-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/input-modules
@@ -0,0 +1,22 @@
+hid_a4tech ?
+hid_apple ?
+hid_belkin ?
+hid_bright ?
+hid_cherry ?
+hid_chicony ?
+hid_cypress ?
+hid_dell ?
+hid_ezkey ?
+hid_gyration ?
+hid_logitech ?
+hid_microsoft ?
+hid_monterey ?
+hid_petalynx ?
+hid_pl ?
+hid_samsung ?
+hid_sony ?
+hid_sunplus ?
+hid_tmff ?
+hid_zpff ?
+usbhid
+usbkbd
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/pcmcia-storage-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/pcmcia-storage-modules
@@ -0,0 +1,6 @@
+pata_pcmcia
+qlogic_cs
+fdomain_cs
+aha152x_cs ?
+nsp_cs ?
+sym53c500_cs
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/fs-secondary-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/fs-secondary-modules
@@ -0,0 +1,4 @@
+fuse ?
+ntfs ?
+hfs ?
+hfsplus ?
--- linux-rt-2.6.29.5.orig/debian/d-i/modules/ppp-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules/ppp-modules
@@ -0,0 +1,7 @@
+ppp_async
+ppp_deflate
+ppp_mppe
+pppoe
+pppox
+ppp_synctty
+syncppp
--- linux-rt-2.6.29.5.orig/debian/d-i/modules-armel/fs-core-modules
+++ linux-rt-2.6.29.5/debian/d-i/modules-armel/fs-core-modules
@@ -0,0 +1,5 @@
+# ext2 and ext3 are built-in.
+
+jfs ?
+reiserfs ?
+xfs ?